diff --git a/bookwyrm/connectors/openlibrary.py b/bookwyrm/connectors/openlibrary.py index 98c1b2b7c..4dc6d6ac1 100644 --- a/bookwyrm/connectors/openlibrary.py +++ b/bookwyrm/connectors/openlibrary.py @@ -2,8 +2,11 @@ import re from typing import Any, Optional, Union, Iterator, Iterable +from markdown import markdown + from bookwyrm import models from bookwyrm.book_search import SearchResult +from bookwyrm.utils.sanitizer import clean from .abstract_connector import AbstractConnector, Mapping, JsonDict from .abstract_connector import get_data, infer_physical_format, unique_physical_format from .connector_manager import ConnectorException, create_edition_task @@ -235,11 +238,22 @@ def ignore_edition(edition_data: JsonDict) -> bool: return True -def get_description(description_blob: Union[JsonDict, str]) -> Optional[str]: +def get_description(description_blob: Union[JsonDict, str]) -> str: """descriptions can be a string or a dict""" if isinstance(description_blob, dict): - return description_blob.get("value") - return description_blob + description = markdown(description_blob.get("value", "")) + else: + description = markdown(description_blob) + + if ( + description.startswith("
") + and description.endswith("
") + and description.count("") == 1 + ): + # If there is just one
tag and it is around the text remove it + return description[len("
") : -len("
")].strip() + + return clean(description) def get_openlibrary_key(key: str) -> str: diff --git a/bookwyrm/tests/connectors/test_openlibrary_connector.py b/bookwyrm/tests/connectors/test_openlibrary_connector.py index 88ab09856..70db03483 100644 --- a/bookwyrm/tests/connectors/test_openlibrary_connector.py +++ b/bookwyrm/tests/connectors/test_openlibrary_connector.py @@ -14,7 +14,7 @@ from bookwyrm.connectors.openlibrary import get_languages, get_description from bookwyrm.connectors.openlibrary import pick_default_edition, get_openlibrary_key from bookwyrm.connectors.connector_manager import ConnectorException - +# pylint: disable=too-many-public-methods class Openlibrary(TestCase): """test loading data from openlibrary.org""" @@ -34,11 +34,15 @@ class Openlibrary(TestCase): work_file = pathlib.Path(__file__).parent.joinpath("../data/ol_work.json") edition_file = pathlib.Path(__file__).parent.joinpath("../data/ol_edition.json") + edition_md_file = pathlib.Path(__file__).parent.joinpath( + "../data/ol_edition_markdown.json" + ) edition_list_file = pathlib.Path(__file__).parent.joinpath( "../data/ol_edition_list.json" ) self.work_data = json.loads(work_file.read_bytes()) self.edition_data = json.loads(edition_file.read_bytes()) + self.edition_md_data = json.loads(edition_md_file.read_bytes()) self.edition_list_data = json.loads(edition_list_file.read_bytes()) def test_get_remote_id_from_data(self): @@ -185,6 +189,18 @@ class Openlibrary(TestCase): expected = "First in the Old Kingdom/Abhorsen series." self.assertEqual(description, expected) + def test_get_description_markdown_paragraphs(self): + """should do some cleanup on the description data""" + description = get_description("Paragraph 1\n\nParagraph 2") + expected = "Paragraph 1
\nParagraph 2
" + self.assertEqual(description, expected) + + def test_get_description_markdown_blockquote(self): + """should do some cleanup on the description data""" + description = get_description("> Quote\n\nParagraph 2") + expected = "\n\nQuote
\n
Paragraph 2
" + self.assertEqual(description, expected) + def test_get_openlibrary_key(self): """extracts the uuid""" key = get_openlibrary_key("/books/OL27320736M") @@ -218,13 +234,44 @@ class Openlibrary(TestCase): self.assertEqual(result.parent_work, work) self.assertEqual(result.title, "Sabriel") self.assertEqual(result.isbn_10, "0060273224") - self.assertIsNotNone(result.description) + self.assertEqual(result.description, self.edition_data["description"]["value"]) self.assertEqual(result.languages[0], "English") self.assertEqual(result.publishers[0], "Harper Trophy") self.assertEqual(result.pages, 491) self.assertEqual(result.subjects[0], "Fantasy.") self.assertEqual(result.physical_format, "Hardcover") + @responses.activate + def test_create_edition_markdown_from_data(self): + """okay but can it actually create an edition with proper metadata""" + work = models.Work.objects.create(title="Hello") + responses.add( + responses.GET, + "https://openlibrary.org/authors/OL10183984A", + json={"hi": "there"}, + status=200, + ) + with patch( + "bookwyrm.connectors.openlibrary.Connector.get_authors_from_data" + ) as mock: + mock.return_value = [] + result = self.connector.create_edition_from_data(work, self.edition_md_data) + self.assertEqual( + result.description, + '\n\n"She didn\'t choose her garden" opens this chapbook ' + "exploring Black womanhood, mental and physical health, spirituality, and " + "ancestral roots. It is an investigation of how to locate a self amidst " + "complex racial history and how to forge an authentic way forward. There's " + "internal slippage as the subject weaves between the presence and spirits " + "of others, as well as a reckoning with the toll of navigating this world " + "as a Black woman. Yet, we also see hopefulness: a refuge in becoming part " + "of the collective, beyond individuality. The Stars With You " + "gives us a speculative yearning for what is to come and probes what is " + "required to reach it.
\n