1
0
Fork 0

Merge pull request #2935 from jderuiter/markdown-import

Convert description from Markdown when importing from Open Library
This commit is contained in:
Mouse Reeve 2023-08-06 16:14:25 -07:00 committed by GitHub
commit 861d3b1500
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 121 additions and 6 deletions

View file

@ -2,8 +2,11 @@
import re import re
from typing import Any, Optional, Union, Iterator, Iterable from typing import Any, Optional, Union, Iterator, Iterable
from markdown import markdown
from bookwyrm import models from bookwyrm import models
from bookwyrm.book_search import SearchResult from bookwyrm.book_search import SearchResult
from bookwyrm.utils.sanitizer import clean
from .abstract_connector import AbstractConnector, Mapping, JsonDict from .abstract_connector import AbstractConnector, Mapping, JsonDict
from .abstract_connector import get_data, infer_physical_format, unique_physical_format from .abstract_connector import get_data, infer_physical_format, unique_physical_format
from .connector_manager import ConnectorException, create_edition_task from .connector_manager import ConnectorException, create_edition_task
@ -235,11 +238,22 @@ def ignore_edition(edition_data: JsonDict) -> bool:
return True return True
def get_description(description_blob: Union[JsonDict, str]) -> Optional[str]: def get_description(description_blob: Union[JsonDict, str]) -> str:
"""descriptions can be a string or a dict""" """descriptions can be a string or a dict"""
if isinstance(description_blob, dict): if isinstance(description_blob, dict):
return description_blob.get("value") description = markdown(description_blob.get("value", ""))
return description_blob else:
description = markdown(description_blob)
if (
description.startswith("<p>")
and description.endswith("</p>")
and description.count("<p>") == 1
):
# If there is just one <p> tag and it is around the text remove it
return description[len("<p>") : -len("</p>")].strip()
return clean(description)
def get_openlibrary_key(key: str) -> str: def get_openlibrary_key(key: str) -> str:

View file

@ -14,7 +14,7 @@ from bookwyrm.connectors.openlibrary import get_languages, get_description
from bookwyrm.connectors.openlibrary import pick_default_edition, get_openlibrary_key from bookwyrm.connectors.openlibrary import pick_default_edition, get_openlibrary_key
from bookwyrm.connectors.connector_manager import ConnectorException from bookwyrm.connectors.connector_manager import ConnectorException
# pylint: disable=too-many-public-methods
class Openlibrary(TestCase): class Openlibrary(TestCase):
"""test loading data from openlibrary.org""" """test loading data from openlibrary.org"""
@ -34,11 +34,15 @@ class Openlibrary(TestCase):
work_file = pathlib.Path(__file__).parent.joinpath("../data/ol_work.json") work_file = pathlib.Path(__file__).parent.joinpath("../data/ol_work.json")
edition_file = pathlib.Path(__file__).parent.joinpath("../data/ol_edition.json") edition_file = pathlib.Path(__file__).parent.joinpath("../data/ol_edition.json")
edition_md_file = pathlib.Path(__file__).parent.joinpath(
"../data/ol_edition_markdown.json"
)
edition_list_file = pathlib.Path(__file__).parent.joinpath( edition_list_file = pathlib.Path(__file__).parent.joinpath(
"../data/ol_edition_list.json" "../data/ol_edition_list.json"
) )
self.work_data = json.loads(work_file.read_bytes()) self.work_data = json.loads(work_file.read_bytes())
self.edition_data = json.loads(edition_file.read_bytes()) self.edition_data = json.loads(edition_file.read_bytes())
self.edition_md_data = json.loads(edition_md_file.read_bytes())
self.edition_list_data = json.loads(edition_list_file.read_bytes()) self.edition_list_data = json.loads(edition_list_file.read_bytes())
def test_get_remote_id_from_data(self): def test_get_remote_id_from_data(self):
@ -185,6 +189,18 @@ class Openlibrary(TestCase):
expected = "First in the Old Kingdom/Abhorsen series." expected = "First in the Old Kingdom/Abhorsen series."
self.assertEqual(description, expected) self.assertEqual(description, expected)
def test_get_description_markdown_paragraphs(self):
"""should do some cleanup on the description data"""
description = get_description("Paragraph 1\n\nParagraph 2")
expected = "<p>Paragraph 1</p>\n<p>Paragraph 2</p>"
self.assertEqual(description, expected)
def test_get_description_markdown_blockquote(self):
"""should do some cleanup on the description data"""
description = get_description("> Quote\n\nParagraph 2")
expected = "<blockquote>\n<p>Quote</p>\n</blockquote>\n<p>Paragraph 2</p>"
self.assertEqual(description, expected)
def test_get_openlibrary_key(self): def test_get_openlibrary_key(self):
"""extracts the uuid""" """extracts the uuid"""
key = get_openlibrary_key("/books/OL27320736M") key = get_openlibrary_key("/books/OL27320736M")
@ -218,13 +234,44 @@ class Openlibrary(TestCase):
self.assertEqual(result.parent_work, work) self.assertEqual(result.parent_work, work)
self.assertEqual(result.title, "Sabriel") self.assertEqual(result.title, "Sabriel")
self.assertEqual(result.isbn_10, "0060273224") self.assertEqual(result.isbn_10, "0060273224")
self.assertIsNotNone(result.description) self.assertEqual(result.description, self.edition_data["description"]["value"])
self.assertEqual(result.languages[0], "English") self.assertEqual(result.languages[0], "English")
self.assertEqual(result.publishers[0], "Harper Trophy") self.assertEqual(result.publishers[0], "Harper Trophy")
self.assertEqual(result.pages, 491) self.assertEqual(result.pages, 491)
self.assertEqual(result.subjects[0], "Fantasy.") self.assertEqual(result.subjects[0], "Fantasy.")
self.assertEqual(result.physical_format, "Hardcover") self.assertEqual(result.physical_format, "Hardcover")
@responses.activate
def test_create_edition_markdown_from_data(self):
"""okay but can it actually create an edition with proper metadata"""
work = models.Work.objects.create(title="Hello")
responses.add(
responses.GET,
"https://openlibrary.org/authors/OL10183984A",
json={"hi": "there"},
status=200,
)
with patch(
"bookwyrm.connectors.openlibrary.Connector.get_authors_from_data"
) as mock:
mock.return_value = []
result = self.connector.create_edition_from_data(work, self.edition_md_data)
self.assertEqual(
result.description,
'<blockquote>\n<p>"She didn\'t choose her garden" opens this chapbook '
"exploring Black womanhood, mental and physical health, spirituality, and "
"ancestral roots. It is an investigation of how to locate a self amidst "
"complex racial history and how to forge an authentic way forward. There's "
"internal slippage as the subject weaves between the presence and spirits "
"of others, as well as a reckoning with the toll of navigating this world "
"as a Black woman. Yet, we also see hopefulness: a refuge in becoming part "
"of the collective, beyond individuality. <em>The Stars With You</em> "
"gives us a speculative yearning for what is to come and probes what is "
"required to reach it.</p>\n</blockquote>\n<ul>\n<li><a "
'href="https://store.cooperdillon.com/product/the-stars-with-you-by-'
'stefani-cox">publisher</a></li>\n</ul>',
)
def test_ignore_edition(self): def test_ignore_edition(self):
"""skip editions with poor metadata""" """skip editions with poor metadata"""
self.assertFalse(ignore_edition({"isbn_13": "hi"})) self.assertFalse(ignore_edition({"isbn_13": "hi"}))

View file

@ -0,0 +1,54 @@
{
"type": {
"key": "/type/edition"
},
"authors": [
{
"key": "/authors/OL10183984A"
}
],
"languages": [
{
"key": "/languages/eng"
}
],
"publish_date": "2022",
"publishers": [
"Cooper Dillon Books"
],
"source_records": [
"bwb:9781943899159"
],
"subjects": [
"Poetry (poetic works by one author)",
"Poetry, collections"
],
"title": "The Stars with You",
"description": {
"type": "/type/text",
"value": ">\"She didn't choose her garden\" opens this chapbook exploring Black womanhood, mental and physical health, spirituality, and ancestral roots. It is an investigation of how to locate a self amidst complex racial history and how to forge an authentic way forward. There's internal slippage as the subject weaves between the presence and spirits of others, as well as a reckoning with the toll of navigating this world as a Black woman. Yet, we also see hopefulness: a refuge in becoming part of the collective, beyond individuality. *The Stars With You* gives us a speculative yearning for what is to come and probes what is required to reach it.\r\n\r\n- [publisher](https://store.cooperdillon.com/product/the-stars-with-you-by-stefani-cox)"
},
"works": [
{
"key": "/works/OL27172905W"
}
],
"key": "/books/OL36884359M",
"identifiers": {},
"isbn_13": [
"9781943899159"
],
"classifications": {},
"physical_format": "Paperback",
"number_of_pages": 36,
"latest_revision": 3,
"revision": 3,
"created": {
"type": "/type/datetime",
"value": "2022-01-28T19:20:08.156459"
},
"last_modified": {
"type": "/type/datetime",
"value": "2023-07-30T23:42:51.589566"
}
}

View file

@ -2,7 +2,7 @@
import bleach import bleach
def clean(input_text): def clean(input_text: str) -> str:
"""Run through "bleach" """ """Run through "bleach" """
return bleach.clean( return bleach.clean(
input_text, input_text,