diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 08661e9c2..a3117f7cb 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -21,8 +21,7 @@ jobs: run: | python -m pip install --upgrade pip pip install -r requirements.txt - pip install pylint - name: Analysing the code with pylint run: | - pylint bookwyrm/ --ignore=migrations --disable=E1101,E1135,E1136,R0903,R0901,R0902,W0707,W0511,W0406,R0401,R0801 + pylint bookwyrm/ diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 000000000..7f92d0168 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,6 @@ +[MAIN] +ignore=migrations +load-plugins=pylint.extensions.no_self_use + +[MESSAGES CONTROL] +disable=E1101,E1135,E1136,R0903,R0901,R0902,W0707,W0511,W0406,R0401,R0801,C3001 diff --git a/Dockerfile b/Dockerfile index 349dd82b1..b3cd26e88 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,7 @@ RUN mkdir /app /app/static /app/images WORKDIR /app +RUN apt-get update && apt-get install -y gettext libgettextpo-dev tidy && apt-get clean + COPY requirements.txt /app/ RUN pip install -r requirements.txt --no-cache-dir -RUN apt-get update && apt-get install -y gettext libgettextpo-dev tidy && apt-get clean diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000..c4e5e9cf9 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,5 @@ +# Security Policy + +## Reporting a Vulnerability + +Please report security issues to `mousereeve@riseup.net` \ No newline at end of file diff --git a/bookwyrm/activitypub/base_activity.py b/bookwyrm/activitypub/base_activity.py index 6bee25f62..fa1535694 100644 --- a/bookwyrm/activitypub/base_activity.py +++ b/bookwyrm/activitypub/base_activity.py @@ -1,6 +1,7 @@ """ basics for an activitypub serializer """ from dataclasses import dataclass, fields, MISSING from json import JSONEncoder +import logging from django.apps import apps from django.db import IntegrityError, transaction @@ -8,6 +9,8 @@ from django.db import IntegrityError, transaction from bookwyrm.connectors import ConnectorException, get_data from bookwyrm.tasks import app +logger = logging.getLogger(__name__) + class ActivitySerializerError(ValueError): """routine problems serializing activitypub json""" @@ -39,12 +42,12 @@ def naive_parse(activity_objects, activity_json, serializer=None): activity_json["type"] = "PublicKey" activity_type = activity_json.get("type") + if activity_type in ["Question", "Article"]: + return None try: serializer = activity_objects[activity_type] except KeyError as err: # we know this exists and that we can't handle it - if activity_type in ["Question"]: - return None raise ActivitySerializerError(err) return serializer(activity_objects=activity_objects, **activity_json) @@ -65,7 +68,7 @@ class ActivityObject: try: value = kwargs[field.name] if value in (None, MISSING, {}): - raise KeyError() + raise KeyError("Missing required field", field.name) try: is_subclass = issubclass(field.type, ActivityObject) except TypeError: @@ -268,9 +271,9 @@ def resolve_remote_id( try: data = get_data(remote_id) except ConnectorException: - raise ActivitySerializerError( - f"Could not connect to host for remote_id: {remote_id}" - ) + logger.exception("Could not connect to host for remote_id: %s", remote_id) + return None + # determine the model implicitly, if not provided # or if it's a model with subclasses like Status, check again if not model or hasattr(model.objects, "select_subclasses"): diff --git a/bookwyrm/activitystreams.py b/bookwyrm/activitystreams.py index f2dd43fb2..a90d7943b 100644 --- a/bookwyrm/activitystreams.py +++ b/bookwyrm/activitystreams.py @@ -298,8 +298,9 @@ def add_status_on_create_command(sender, instance, created): priority = HIGH # check if this is an old status, de-prioritize if so # (this will happen if federation is very slow, or, more expectedly, on csv import) - one_day = 60 * 60 * 24 - if (instance.created_date - instance.published_date).seconds > one_day: + if instance.published_date < timezone.now() - timedelta( + days=1 + ) or instance.created_date < instance.published_date - timedelta(days=1): priority = LOW add_status_task.apply_async( diff --git a/bookwyrm/book_search.py b/bookwyrm/book_search.py index e42a6d8c3..4b0a6eab9 100644 --- a/bookwyrm/book_search.py +++ b/bookwyrm/book_search.py @@ -148,8 +148,8 @@ class SearchResult: def __repr__(self): # pylint: disable=consider-using-f-string - return "".format( - self.key, self.title, self.author + return "".format( + self.key, self.title, self.author, self.confidence ) def json(self): diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index 56e273886..dc4be4b3d 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -1,9 +1,8 @@ """ functionality outline for a book data connector """ from abc import ABC, abstractmethod import imghdr -import ipaddress import logging -from urllib.parse import urlparse +import re from django.core.files.base import ContentFile from django.db import transaction @@ -11,7 +10,7 @@ import requests from requests.exceptions import RequestException from bookwyrm import activitypub, models, settings -from .connector_manager import load_more_data, ConnectorException +from .connector_manager import load_more_data, ConnectorException, raise_not_valid_url from .format_mappings import format_mappings @@ -39,62 +38,34 @@ class AbstractMinimalConnector(ABC): for field in self_fields: setattr(self, field, getattr(info, field)) - def search(self, query, min_confidence=None, timeout=settings.QUERY_TIMEOUT): - """free text search""" - params = {} - if min_confidence: - params["min_confidence"] = min_confidence + def get_search_url(self, query): + """format the query url""" + # Check if the query resembles an ISBN + if maybe_isbn(query) and self.isbn_search_url and self.isbn_search_url != "": + return f"{self.isbn_search_url}{query}" - data = self.get_search_data( - f"{self.search_url}{query}", - params=params, - timeout=timeout, - ) - results = [] + # NOTE: previously, we tried searching isbn and if that produces no results, + # searched as free text. This, instead, only searches isbn if it's isbn-y + return f"{self.search_url}{query}" - for doc in self.parse_search_data(data)[:10]: - results.append(self.format_search_result(doc)) - return results - - def isbn_search(self, query, timeout=settings.QUERY_TIMEOUT): - """isbn search""" - params = {} - data = self.get_search_data( - f"{self.isbn_search_url}{query}", - params=params, - timeout=timeout, - ) - results = [] - - # this shouldn't be returning mutliple results, but just in case - for doc in self.parse_isbn_search_data(data)[:10]: - results.append(self.format_isbn_search_result(doc)) - return results - - def get_search_data(self, remote_id, **kwargs): # pylint: disable=no-self-use - """this allows connectors to override the default behavior""" - return get_data(remote_id, **kwargs) + def process_search_response(self, query, data, min_confidence): + """Format the search results based on the formt of the query""" + if maybe_isbn(query): + return list(self.parse_isbn_search_data(data))[:10] + return list(self.parse_search_data(data, min_confidence))[:10] @abstractmethod def get_or_create_book(self, remote_id): """pull up a book record by whatever means possible""" @abstractmethod - def parse_search_data(self, data): + def parse_search_data(self, data, min_confidence): """turn the result json from a search into a list""" - @abstractmethod - def format_search_result(self, search_result): - """create a SearchResult obj from json""" - @abstractmethod def parse_isbn_search_data(self, data): """turn the result json from a search into a list""" - @abstractmethod - def format_isbn_search_result(self, search_result): - """create a SearchResult obj from json""" - class AbstractConnector(AbstractMinimalConnector): """generic book data connector""" @@ -254,9 +225,6 @@ def get_data(url, params=None, timeout=10): # check if the url is blocked raise_not_valid_url(url) - if models.FederatedServer.is_blocked(url): - raise ConnectorException(f"Attempting to load data from blocked url: {url}") - try: resp = requests.get( url, @@ -311,20 +279,6 @@ def get_image(url, timeout=10): return image_content, extension -def raise_not_valid_url(url): - """do some basic reality checks on the url""" - parsed = urlparse(url) - if not parsed.scheme in ["http", "https"]: - raise ConnectorException("Invalid scheme: ", url) - - try: - ipaddress.ip_address(parsed.netloc) - raise ConnectorException("Provided url is an IP address: ", url) - except ValueError: - # it's not an IP address, which is good - pass - - class Mapping: """associate a local database field with a field in an external dataset""" @@ -366,3 +320,9 @@ def unique_physical_format(format_text): # try a direct match, so saving this would be redundant return None return format_text + + +def maybe_isbn(query): + """check if a query looks like an isbn""" + isbn = re.sub(r"[\W_]", "", query) # removes filler characters + return len(isbn) in [10, 13] # ISBN10 or ISBN13 diff --git a/bookwyrm/connectors/bookwyrm_connector.py b/bookwyrm/connectors/bookwyrm_connector.py index 6dcba7c31..e07a0b281 100644 --- a/bookwyrm/connectors/bookwyrm_connector.py +++ b/bookwyrm/connectors/bookwyrm_connector.py @@ -10,15 +10,12 @@ class Connector(AbstractMinimalConnector): def get_or_create_book(self, remote_id): return activitypub.resolve_remote_id(remote_id, model=models.Edition) - def parse_search_data(self, data): - return data - - def format_search_result(self, search_result): - search_result["connector"] = self - return SearchResult(**search_result) + def parse_search_data(self, data, min_confidence): + for search_result in data: + search_result["connector"] = self + yield SearchResult(**search_result) def parse_isbn_search_data(self, data): - return data - - def format_isbn_search_result(self, search_result): - return self.format_search_result(search_result) + for search_result in data: + search_result["connector"] = self + yield SearchResult(**search_result) diff --git a/bookwyrm/connectors/connector_manager.py b/bookwyrm/connectors/connector_manager.py index 14bb702cb..37b093aa9 100644 --- a/bookwyrm/connectors/connector_manager.py +++ b/bookwyrm/connectors/connector_manager.py @@ -1,17 +1,18 @@ """ interface with whatever connectors the app has """ -from datetime import datetime +import asyncio import importlib +import ipaddress import logging -import re from urllib.parse import urlparse +import aiohttp from django.dispatch import receiver from django.db.models import signals from requests import HTTPError from bookwyrm import book_search, models -from bookwyrm.settings import SEARCH_TIMEOUT +from bookwyrm.settings import SEARCH_TIMEOUT, USER_AGENT from bookwyrm.tasks import app logger = logging.getLogger(__name__) @@ -21,53 +22,85 @@ class ConnectorException(HTTPError): """when the connector can't do what was asked""" +async def get_results(session, url, min_confidence, query, connector): + """try this specific connector""" + # pylint: disable=line-too-long + headers = { + "Accept": ( + 'application/json, application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"; charset=utf-8' + ), + "User-Agent": USER_AGENT, + } + params = {"min_confidence": min_confidence} + try: + async with session.get(url, headers=headers, params=params) as response: + if not response.ok: + logger.info("Unable to connect to %s: %s", url, response.reason) + return + + try: + raw_data = await response.json() + except aiohttp.client_exceptions.ContentTypeError as err: + logger.exception(err) + return + + return { + "connector": connector, + "results": connector.process_search_response( + query, raw_data, min_confidence + ), + } + except asyncio.TimeoutError: + logger.info("Connection timed out for url: %s", url) + except aiohttp.ClientError as err: + logger.exception(err) + + +async def async_connector_search(query, items, min_confidence): + """Try a number of requests simultaneously""" + timeout = aiohttp.ClientTimeout(total=SEARCH_TIMEOUT) + async with aiohttp.ClientSession(timeout=timeout) as session: + tasks = [] + for url, connector in items: + tasks.append( + asyncio.ensure_future( + get_results(session, url, min_confidence, query, connector) + ) + ) + + results = await asyncio.gather(*tasks) + return results + + def search(query, min_confidence=0.1, return_first=False): """find books based on arbitary keywords""" if not query: return [] results = [] - # Have we got a ISBN ? - isbn = re.sub(r"[\W_]", "", query) - maybe_isbn = len(isbn) in [10, 13] # ISBN10 or ISBN13 - - start_time = datetime.now() + items = [] for connector in get_connectors(): - result_set = None - if maybe_isbn and connector.isbn_search_url and connector.isbn_search_url != "": - # Search on ISBN - try: - result_set = connector.isbn_search(isbn) - except Exception as err: # pylint: disable=broad-except - logger.info(err) - # if this fails, we can still try regular search + # get the search url from the connector before sending + url = connector.get_search_url(query) + try: + raise_not_valid_url(url) + except ConnectorException: + # if this URL is invalid we should skip it and move on + logger.info("Request denied to blocked domain: %s", url) + continue + items.append((url, connector)) - # if no isbn search results, we fallback to generic search - if not result_set: - try: - result_set = connector.search(query, min_confidence=min_confidence) - except Exception as err: # pylint: disable=broad-except - # we don't want *any* error to crash the whole search page - logger.info(err) - continue - - if return_first and result_set: - # if we found anything, return it - return result_set[0] - - if result_set: - results.append( - { - "connector": connector, - "results": result_set, - } - ) - if (datetime.now() - start_time).seconds >= SEARCH_TIMEOUT: - break + # load as many results as we can + results = asyncio.run(async_connector_search(query, items, min_confidence)) + results = [r for r in results if r] if return_first: - return None + # find the best result from all the responses and return that + all_results = [r for con in results for r in con["results"]] + all_results = sorted(all_results, key=lambda r: r.confidence, reverse=True) + return all_results[0] if all_results else None + # failed requests will return None, so filter those out return results @@ -119,6 +152,15 @@ def load_more_data(connector_id, book_id): connector.expand_book_data(book) +@app.task(queue="low_priority") +def create_edition_task(connector_id, work_id, data): + """separate task for each of the 10,000 editions of LoTR""" + connector_info = models.Connector.objects.get(id=connector_id) + connector = load_connector(connector_info) + work = models.Work.objects.select_subclasses().get(id=work_id) + connector.create_edition_from_data(work, data) + + def load_connector(connector_info): """instantiate the connector class""" connector = importlib.import_module( @@ -133,3 +175,20 @@ def create_connector(sender, instance, created, *args, **kwargs): """create a connector to an external bookwyrm server""" if instance.application_type == "bookwyrm": get_or_create_connector(f"https://{instance.server_name}") + + +def raise_not_valid_url(url): + """do some basic reality checks on the url""" + parsed = urlparse(url) + if not parsed.scheme in ["http", "https"]: + raise ConnectorException("Invalid scheme: ", url) + + try: + ipaddress.ip_address(parsed.netloc) + raise ConnectorException("Provided url is an IP address: ", url) + except ValueError: + # it's not an IP address, which is good + pass + + if models.FederatedServer.is_blocked(url): + raise ConnectorException(f"Attempting to load data from blocked url: {url}") diff --git a/bookwyrm/connectors/inventaire.py b/bookwyrm/connectors/inventaire.py index a9aeb94f9..3d5f913bd 100644 --- a/bookwyrm/connectors/inventaire.py +++ b/bookwyrm/connectors/inventaire.py @@ -5,7 +5,7 @@ from bookwyrm import models from bookwyrm.book_search import SearchResult from .abstract_connector import AbstractConnector, Mapping from .abstract_connector import get_data -from .connector_manager import ConnectorException +from .connector_manager import ConnectorException, create_edition_task class Connector(AbstractConnector): @@ -77,53 +77,42 @@ class Connector(AbstractConnector): **{k: data.get(k) for k in ["uri", "image", "labels", "sitelinks", "type"]}, } - def search(self, query, min_confidence=None): # pylint: disable=arguments-differ - """overrides default search function with confidence ranking""" - results = super().search(query) - if min_confidence: - # filter the search results after the fact - return [r for r in results if r.confidence >= min_confidence] - return results - - def parse_search_data(self, data): - return data.get("results") - - def format_search_result(self, search_result): - images = search_result.get("image") - cover = f"{self.covers_url}/img/entities/{images[0]}" if images else None - # a deeply messy translation of inventaire's scores - confidence = float(search_result.get("_score", 0.1)) - confidence = 0.1 if confidence < 150 else 0.999 - return SearchResult( - title=search_result.get("label"), - key=self.get_remote_id(search_result.get("uri")), - author=search_result.get("description"), - view_link=f"{self.base_url}/entity/{search_result.get('uri')}", - cover=cover, - confidence=confidence, - connector=self, - ) + def parse_search_data(self, data, min_confidence): + for search_result in data.get("results", []): + images = search_result.get("image") + cover = f"{self.covers_url}/img/entities/{images[0]}" if images else None + # a deeply messy translation of inventaire's scores + confidence = float(search_result.get("_score", 0.1)) + confidence = 0.1 if confidence < 150 else 0.999 + if confidence < min_confidence: + continue + yield SearchResult( + title=search_result.get("label"), + key=self.get_remote_id(search_result.get("uri")), + author=search_result.get("description"), + view_link=f"{self.base_url}/entity/{search_result.get('uri')}", + cover=cover, + confidence=confidence, + connector=self, + ) def parse_isbn_search_data(self, data): """got some daaaata""" results = data.get("entities") if not results: - return [] - return list(results.values()) - - def format_isbn_search_result(self, search_result): - """totally different format than a regular search result""" - title = search_result.get("claims", {}).get("wdt:P1476", []) - if not title: - return None - return SearchResult( - title=title[0], - key=self.get_remote_id(search_result.get("uri")), - author=search_result.get("description"), - view_link=f"{self.base_url}/entity/{search_result.get('uri')}", - cover=self.get_cover_url(search_result.get("image")), - connector=self, - ) + return + for search_result in list(results.values()): + title = search_result.get("claims", {}).get("wdt:P1476", []) + if not title: + continue + yield SearchResult( + title=title[0], + key=self.get_remote_id(search_result.get("uri")), + author=search_result.get("description"), + view_link=f"{self.base_url}/entity/{search_result.get('uri')}", + cover=self.get_cover_url(search_result.get("image")), + connector=self, + ) def is_work_data(self, data): return data.get("type") == "work" @@ -167,12 +156,16 @@ class Connector(AbstractConnector): for edition_uri in edition_options.get("uris"): remote_id = self.get_remote_id(edition_uri) - try: - data = self.get_book_data(remote_id) - except ConnectorException: - # who, indeed, knows - continue - self.create_edition_from_data(work, data) + create_edition_task.delay(self.connector.id, work.id, remote_id) + + def create_edition_from_data(self, work, edition_data, instance=None): + """pass in the url as data and then call the version in abstract connector""" + try: + data = self.get_book_data(edition_data) + except ConnectorException: + # who, indeed, knows + return + super().create_edition_from_data(work, data, instance=instance) def get_cover_url(self, cover_blob, *_): """format the relative cover url into an absolute one: diff --git a/bookwyrm/connectors/openlibrary.py b/bookwyrm/connectors/openlibrary.py index 118222a16..0fd786660 100644 --- a/bookwyrm/connectors/openlibrary.py +++ b/bookwyrm/connectors/openlibrary.py @@ -5,7 +5,7 @@ from bookwyrm import models from bookwyrm.book_search import SearchResult from .abstract_connector import AbstractConnector, Mapping from .abstract_connector import get_data, infer_physical_format, unique_physical_format -from .connector_manager import ConnectorException +from .connector_manager import ConnectorException, create_edition_task from .openlibrary_languages import languages @@ -152,39 +152,41 @@ class Connector(AbstractConnector): image_name = f"{cover_id}-{size}.jpg" return f"{self.covers_url}/b/id/{image_name}" - def parse_search_data(self, data): - return data.get("docs") + def parse_search_data(self, data, min_confidence): + for idx, search_result in enumerate(data.get("docs")): + # build the remote id from the openlibrary key + key = self.books_url + search_result["key"] + author = search_result.get("author_name") or ["Unknown"] + cover_blob = search_result.get("cover_i") + cover = self.get_cover_url([cover_blob], size="M") if cover_blob else None - def format_search_result(self, search_result): - # build the remote id from the openlibrary key - key = self.books_url + search_result["key"] - author = search_result.get("author_name") or ["Unknown"] - cover_blob = search_result.get("cover_i") - cover = self.get_cover_url([cover_blob], size="M") if cover_blob else None - return SearchResult( - title=search_result.get("title"), - key=key, - author=", ".join(author), - connector=self, - year=search_result.get("first_publish_year"), - cover=cover, - ) + # OL doesn't provide confidence, but it does sort by an internal ranking, so + # this confidence value is relative to the list position + confidence = 1 / (idx + 1) + + yield SearchResult( + title=search_result.get("title"), + key=key, + author=", ".join(author), + connector=self, + year=search_result.get("first_publish_year"), + cover=cover, + confidence=confidence, + ) def parse_isbn_search_data(self, data): - return list(data.values()) - - def format_isbn_search_result(self, search_result): - # build the remote id from the openlibrary key - key = self.books_url + search_result["key"] - authors = search_result.get("authors") or [{"name": "Unknown"}] - author_names = [author.get("name") for author in authors] - return SearchResult( - title=search_result.get("title"), - key=key, - author=", ".join(author_names), - connector=self, - year=search_result.get("publish_date"), - ) + for search_result in list(data.values()): + # build the remote id from the openlibrary key + key = self.books_url + search_result["key"] + authors = search_result.get("authors") or [{"name": "Unknown"}] + author_names = [author.get("name") for author in authors] + yield SearchResult( + title=search_result.get("title"), + key=key, + author=", ".join(author_names), + connector=self, + year=search_result.get("publish_date"), + ) def load_edition_data(self, olkey): """query openlibrary for editions of a work""" @@ -208,7 +210,7 @@ class Connector(AbstractConnector): # does this edition have ANY interesting data? if ignore_edition(edition_data): continue - self.create_edition_from_data(work, edition_data) + create_edition_task.delay(self.connector.id, work.id, edition_data) def ignore_edition(edition_data): diff --git a/bookwyrm/forms/forms.py b/bookwyrm/forms/forms.py index 9d8f9f392..4aa1e5758 100644 --- a/bookwyrm/forms/forms.py +++ b/bookwyrm/forms/forms.py @@ -53,7 +53,12 @@ class ReadThroughForm(CustomForm): self.add_error( "finish_date", _("Reading finish date cannot be before start date.") ) + stopped_date = cleaned_data.get("stopped_date") + if start_date and stopped_date and start_date > stopped_date: + self.add_error( + "stopped_date", _("Reading stopped date cannot be before start date.") + ) class Meta: model = models.ReadThrough - fields = ["user", "book", "start_date", "finish_date"] + fields = ["user", "book", "start_date", "finish_date", "stopped_date"] diff --git a/bookwyrm/importers/__init__.py b/bookwyrm/importers/__init__.py index dd3d62e8b..6ce50f160 100644 --- a/bookwyrm/importers/__init__.py +++ b/bookwyrm/importers/__init__.py @@ -1,6 +1,7 @@ """ import classes """ from .importer import Importer +from .calibre_import import CalibreImporter from .goodreads_import import GoodreadsImporter from .librarything_import import LibrarythingImporter from .openlibrary_import import OpenLibraryImporter diff --git a/bookwyrm/importers/calibre_import.py b/bookwyrm/importers/calibre_import.py new file mode 100644 index 000000000..7395e2f7b --- /dev/null +++ b/bookwyrm/importers/calibre_import.py @@ -0,0 +1,28 @@ +""" handle reading a csv from calibre """ +from bookwyrm.models import Shelf + +from . import Importer + + +class CalibreImporter(Importer): + """csv downloads from Calibre""" + + service = "Calibre" + + def __init__(self, *args, **kwargs): + # Add timestamp to row_mappings_guesses for date_added to avoid + # integrity error + row_mappings_guesses = [] + + for field, mapping in self.row_mappings_guesses: + if field in ("date_added",): + row_mappings_guesses.append((field, mapping + ["timestamp"])) + else: + row_mappings_guesses.append((field, mapping)) + + self.row_mappings_guesses = row_mappings_guesses + super().__init__(*args, **kwargs) + + def get_shelf(self, normalized_row): + # Calibre export does not indicate which shelf to use. Go with a default one for now + return Shelf.TO_READ diff --git a/bookwyrm/importers/librarything_import.py b/bookwyrm/importers/librarything_import.py index 37730dee3..c6833547d 100644 --- a/bookwyrm/importers/librarything_import.py +++ b/bookwyrm/importers/librarything_import.py @@ -1,5 +1,8 @@ """ handle reading a tsv from librarything """ import re + +from bookwyrm.models import Shelf + from . import Importer @@ -21,7 +24,7 @@ class LibrarythingImporter(Importer): def get_shelf(self, normalized_row): if normalized_row["date_finished"]: - return "read" + return Shelf.READ_FINISHED if normalized_row["date_started"]: - return "reading" - return "to-read" + return Shelf.READING + return Shelf.TO_READ diff --git a/bookwyrm/management/commands/generate_preview_images.py b/bookwyrm/management/commands/generate_preview_images.py index 0454e5e51..9ff16c26a 100644 --- a/bookwyrm/management/commands/generate_preview_images.py +++ b/bookwyrm/management/commands/generate_preview_images.py @@ -56,12 +56,17 @@ class Command(BaseCommand): self.stdout.write(" OK 🖼") # Books - books = models.Book.objects.select_subclasses().filter() - self.stdout.write( - " → Book preview images ({}): ".format(len(books)), ending="" + book_ids = ( + models.Book.objects.select_subclasses() + .filter() + .values_list("id", flat=True) ) - for book in books: - preview_images.generate_edition_preview_image_task.delay(book.id) + + self.stdout.write( + " → Book preview images ({}): ".format(len(book_ids)), ending="" + ) + for book_id in book_ids: + preview_images.generate_edition_preview_image_task.delay(book_id) self.stdout.write(".", ending="") self.stdout.write(" OK 🖼") diff --git a/bookwyrm/management/commands/initdb.py b/bookwyrm/management/commands/initdb.py index 160502ca0..23020a0a6 100644 --- a/bookwyrm/management/commands/initdb.py +++ b/bookwyrm/management/commands/initdb.py @@ -89,7 +89,7 @@ def init_connectors(): covers_url="https://inventaire.io", search_url="https://inventaire.io/api/search?types=works&types=works&search=", isbn_search_url="https://inventaire.io/api/entities?action=by-uris&uris=isbn%3A", - priority=3, + priority=1, ) models.Connector.objects.create( @@ -101,7 +101,7 @@ def init_connectors(): covers_url="https://covers.openlibrary.org", search_url="https://openlibrary.org/search?q=", isbn_search_url="https://openlibrary.org/api/books?jscmd=data&format=json&bibkeys=ISBN:", - priority=3, + priority=1, ) diff --git a/bookwyrm/migrations/0146_auto_20220316_2320.py b/bookwyrm/migrations/0146_auto_20220316_2320.py new file mode 100644 index 000000000..e50bf25ec --- /dev/null +++ b/bookwyrm/migrations/0146_auto_20220316_2320.py @@ -0,0 +1,80 @@ +# Generated by Django 3.2.12 on 2022-03-16 23:20 + +import bookwyrm.models.fields +from django.db import migrations +from bookwyrm.models import Shelf + + +def add_shelves(apps, schema_editor): + """add any superusers to the "admin" group""" + + db_alias = schema_editor.connection.alias + shelf_model = apps.get_model("bookwyrm", "Shelf") + + users = apps.get_model("bookwyrm", "User") + local_users = users.objects.using(db_alias).filter(local=True) + for user in local_users: + remote_id = f"{user.remote_id}/books/stopped" + shelf_model.objects.using(db_alias).create( + name="Stopped reading", + identifier=Shelf.STOPPED_READING, + user=user, + editable=False, + remote_id=remote_id, + ) + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0145_sitesettings_version"), + ] + + operations = [ + migrations.AlterField( + model_name="comment", + name="reading_status", + field=bookwyrm.models.fields.CharField( + blank=True, + choices=[ + ("to-read", "To-Read"), + ("reading", "Reading"), + ("read", "Read"), + ("stopped-reading", "Stopped-Reading"), + ], + max_length=255, + null=True, + ), + ), + migrations.AlterField( + model_name="quotation", + name="reading_status", + field=bookwyrm.models.fields.CharField( + blank=True, + choices=[ + ("to-read", "To-Read"), + ("reading", "Reading"), + ("read", "Read"), + ("stopped-reading", "Stopped-Reading"), + ], + max_length=255, + null=True, + ), + ), + migrations.AlterField( + model_name="review", + name="reading_status", + field=bookwyrm.models.fields.CharField( + blank=True, + choices=[ + ("to-read", "To-Read"), + ("reading", "Reading"), + ("read", "Read"), + ("stopped-reading", "Stopped-Reading"), + ], + max_length=255, + null=True, + ), + ), + migrations.RunPython(add_shelves, reverse_code=migrations.RunPython.noop), + ] diff --git a/bookwyrm/migrations/0148_merge_20220326_2006.py b/bookwyrm/migrations/0148_merge_20220326_2006.py new file mode 100644 index 000000000..978662765 --- /dev/null +++ b/bookwyrm/migrations/0148_merge_20220326_2006.py @@ -0,0 +1,13 @@ +# Generated by Django 3.2.12 on 2022-03-26 20:06 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0146_auto_20220316_2320"), + ("bookwyrm", "0147_alter_user_preferred_language"), + ] + + operations = [] diff --git a/bookwyrm/migrations/0149_merge_20220526_1716.py b/bookwyrm/migrations/0149_merge_20220526_1716.py new file mode 100644 index 000000000..b42bccd3b --- /dev/null +++ b/bookwyrm/migrations/0149_merge_20220526_1716.py @@ -0,0 +1,13 @@ +# Generated by Django 3.2.13 on 2022-05-26 17:16 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0148_alter_user_preferred_language"), + ("bookwyrm", "0148_merge_20220326_2006"), + ] + + operations = [] diff --git a/bookwyrm/migrations/0150_readthrough_stopped_date.py b/bookwyrm/migrations/0150_readthrough_stopped_date.py new file mode 100644 index 000000000..6ce2f89a9 --- /dev/null +++ b/bookwyrm/migrations/0150_readthrough_stopped_date.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.13 on 2022-05-26 18:33 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0149_merge_20220526_1716"), + ] + + operations = [ + migrations.AddField( + model_name="readthrough", + name="stopped_date", + field=models.DateTimeField(blank=True, null=True), + ), + ] diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py index 3ea8e1a8e..190046019 100644 --- a/bookwyrm/models/book.py +++ b/bookwyrm/models/book.py @@ -176,8 +176,8 @@ class Book(BookDataModel): """properties of this edition, as a string""" items = [ self.physical_format if hasattr(self, "physical_format") else None, - self.languages[0] + " language" - if self.languages and self.languages[0] != "English" + f"{self.languages[0]} language" + if self.languages and self.languages[0] and self.languages[0] != "English" else None, str(self.published_date.year) if self.published_date else None, ", ".join(self.publishers) if hasattr(self, "publishers") else None, diff --git a/bookwyrm/models/import_job.py b/bookwyrm/models/import_job.py index bcba391b6..556f133f9 100644 --- a/bookwyrm/models/import_job.py +++ b/bookwyrm/models/import_job.py @@ -175,9 +175,15 @@ class ImportItem(models.Model): def date_added(self): """when the book was added to this dataset""" if self.normalized_data.get("date_added"): - return timezone.make_aware( - dateutil.parser.parse(self.normalized_data.get("date_added")) + parsed_date_added = dateutil.parser.parse( + self.normalized_data.get("date_added") ) + + if timezone.is_aware(parsed_date_added): + # Keep timezone if import already had one + return parsed_date_added + + return timezone.make_aware(parsed_date_added) return None @property diff --git a/bookwyrm/models/readthrough.py b/bookwyrm/models/readthrough.py index ceb8e0b6e..314b40a5c 100644 --- a/bookwyrm/models/readthrough.py +++ b/bookwyrm/models/readthrough.py @@ -27,6 +27,7 @@ class ReadThrough(BookWyrmModel): ) start_date = models.DateTimeField(blank=True, null=True) finish_date = models.DateTimeField(blank=True, null=True) + stopped_date = models.DateTimeField(blank=True, null=True) is_active = models.BooleanField(default=True) def save(self, *args, **kwargs): @@ -34,7 +35,7 @@ class ReadThrough(BookWyrmModel): cache.delete(f"latest_read_through-{self.user.id}-{self.book.id}") self.user.update_active_date() # an active readthrough must have an unset finish date - if self.finish_date: + if self.finish_date or self.stopped_date: self.is_active = False super().save(*args, **kwargs) diff --git a/bookwyrm/models/shelf.py b/bookwyrm/models/shelf.py index 8ea274ea1..3291d5653 100644 --- a/bookwyrm/models/shelf.py +++ b/bookwyrm/models/shelf.py @@ -18,8 +18,9 @@ class Shelf(OrderedCollectionMixin, BookWyrmModel): TO_READ = "to-read" READING = "reading" READ_FINISHED = "read" + STOPPED_READING = "stopped-reading" - READ_STATUS_IDENTIFIERS = (TO_READ, READING, READ_FINISHED) + READ_STATUS_IDENTIFIERS = (TO_READ, READING, READ_FINISHED, STOPPED_READING) name = fields.CharField(max_length=100) identifier = models.CharField(max_length=100) diff --git a/bookwyrm/models/status.py b/bookwyrm/models/status.py index 17fcd4587..3949e09a9 100644 --- a/bookwyrm/models/status.py +++ b/bookwyrm/models/status.py @@ -116,11 +116,8 @@ class Status(OrderedCollectionPageMixin, BookWyrmModel): def ignore_activity(cls, activity): # pylint: disable=too-many-return-statements """keep notes if they are replies to existing statuses""" if activity.type == "Announce": - try: - boosted = activitypub.resolve_remote_id( - activity.object, get_activity=True - ) - except activitypub.ActivitySerializerError: + boosted = activitypub.resolve_remote_id(activity.object, get_activity=True) + if not boosted: # if we can't load the status, definitely ignore it return True # keep the boost if we would keep the status @@ -265,7 +262,7 @@ class GeneratedNote(Status): ReadingStatusChoices = models.TextChoices( - "ReadingStatusChoices", ["to-read", "reading", "read"] + "ReadingStatusChoices", ["to-read", "reading", "read", "stopped-reading"] ) @@ -306,10 +303,17 @@ class Comment(BookStatus): @property def pure_content(self): """indicate the book in question for mastodon (or w/e) users""" - return ( - f'{self.content}

(comment on ' - f'"{self.book.title}")

' - ) + if self.progress_mode == "PG" and self.progress and (self.progress > 0): + return_value = ( + f'{self.content}

(comment on ' + f'"{self.book.title}", page {self.progress})

' + ) + else: + return_value = ( + f'{self.content}

(comment on ' + f'"{self.book.title}")

' + ) + return return_value activity_serializer = activitypub.Comment @@ -335,10 +339,17 @@ class Quotation(BookStatus): """indicate the book in question for mastodon (or w/e) users""" quote = re.sub(r"^

", '

"', self.quote) quote = re.sub(r"

$", '"

', quote) - return ( - f'{quote}

-- ' - f'"{self.book.title}"

{self.content}' - ) + if self.position_mode == "PG" and self.position and (self.position > 0): + return_value = ( + f'{quote}

-- ' + f'"{self.book.title}", page {self.position}

{self.content}' + ) + else: + return_value = ( + f'{quote}

-- ' + f'"{self.book.title}"

{self.content}' + ) + return return_value activity_serializer = activitypub.Quotation @@ -377,7 +388,7 @@ class Review(BookStatus): def save(self, *args, **kwargs): """clear rating caches""" if self.book.parent_work: - cache.delete(f"book-rating-{self.book.parent_work.id}-*") + cache.delete(f"book-rating-{self.book.parent_work.id}") super().save(*args, **kwargs) diff --git a/bookwyrm/models/user.py b/bookwyrm/models/user.py index be5c19922..dce74022c 100644 --- a/bookwyrm/models/user.py +++ b/bookwyrm/models/user.py @@ -374,6 +374,10 @@ class User(OrderedCollectionPageMixin, AbstractUser): "name": "Read", "identifier": "read", }, + { + "name": "Stopped Reading", + "identifier": "stopped-reading", + }, ] for shelf in shelves: diff --git a/bookwyrm/settings.py b/bookwyrm/settings.py index 416610e49..dc0d71f30 100644 --- a/bookwyrm/settings.py +++ b/bookwyrm/settings.py @@ -11,7 +11,7 @@ from django.utils.translation import gettext_lazy as _ env = Env() env.read_env() DOMAIN = env("DOMAIN") -VERSION = "0.3.4" +VERSION = "0.4.0" RELEASE_API = env( "RELEASE_API", @@ -21,7 +21,7 @@ RELEASE_API = env( PAGE_LENGTH = env("PAGE_LENGTH", 15) DEFAULT_LANGUAGE = env("DEFAULT_LANGUAGE", "English") -JS_CACHE = "bc93172a" +JS_CACHE = "e678183b" # email EMAIL_BACKEND = env("EMAIL_BACKEND", "django.core.mail.backends.smtp.EmailBackend") @@ -212,7 +212,7 @@ STREAMS = [ # Search configuration # total time in seconds that the instance will spend searching connectors -SEARCH_TIMEOUT = int(env("SEARCH_TIMEOUT", 15)) +SEARCH_TIMEOUT = int(env("SEARCH_TIMEOUT", 8)) # timeout for a query to an individual connector QUERY_TIMEOUT = int(env("QUERY_TIMEOUT", 5)) diff --git a/bookwyrm/static/js/status_cache.js b/bookwyrm/static/js/status_cache.js index b19489c1d..0a9f3abc5 100644 --- a/bookwyrm/static/js/status_cache.js +++ b/bookwyrm/static/js/status_cache.js @@ -203,6 +203,8 @@ let StatusCache = new (class { .forEach((item) => (item.disabled = false)); next_identifier = next_identifier == "complete" ? "read" : next_identifier; + next_identifier = + next_identifier == "stopped-reading-complete" ? "stopped-reading" : next_identifier; // Disable the current state button.querySelector( diff --git a/bookwyrm/templates/about/layout.html b/bookwyrm/templates/about/layout.html index 458e4b1d1..e921fcd29 100644 --- a/bookwyrm/templates/about/layout.html +++ b/bookwyrm/templates/about/layout.html @@ -50,7 +50,7 @@ -
+
{% block about_content %}{% endblock %}
diff --git a/bookwyrm/templates/author/edit_author.html b/bookwyrm/templates/author/edit_author.html index 6f72b8700..b0727c43b 100644 --- a/bookwyrm/templates/author/edit_author.html +++ b/bookwyrm/templates/author/edit_author.html @@ -24,7 +24,7 @@ {% endif %} -
+ {% csrf_token %} diff --git a/bookwyrm/templates/book/edit/edit_book.html b/bookwyrm/templates/book/edit/edit_book.html index b088c1e87..e5b865b55 100644 --- a/bookwyrm/templates/book/edit/edit_book.html +++ b/bookwyrm/templates/book/edit/edit_book.html @@ -41,10 +41,18 @@ class="block" {% if book.id %} name="edit-book" - action="{{ book.local_path }}/{% if confirm_mode %}confirm{% else %}edit{% endif %}" + {% if confirm_mode %} + action="{% url 'edit-book-confirm' book.id %}" + {% else %} + action="{% url 'edit-book' book.id %}" + {% endif %} {% else %} name="create-book" - action="/create-book{% if confirm_mode %}/confirm{% endif %}" + {% if confirm_mode %} + action="{% url 'create-book-confirm' %}" + {% else %} + action="{% url 'create-book' %}" + {% endif %} {% endif %} method="post" enctype="multipart/form-data" diff --git a/bookwyrm/templates/get_started/book_preview.html b/bookwyrm/templates/get_started/book_preview.html index 8a20d0d77..9cfb56b00 100644 --- a/bookwyrm/templates/get_started/book_preview.html +++ b/bookwyrm/templates/get_started/book_preview.html @@ -10,6 +10,7 @@ {% if shelf.identifier == 'to-read' %}{% trans "To Read" %} {% elif shelf.identifier == 'reading' %}{% trans "Currently Reading" %} {% elif shelf.identifier == 'read' %}{% trans "Read" %} + {% elif shelf.identifier == 'stopped-reading' %}{% trans "Stopped Reading" %} {% else %}{{ shelf.name }}{% endif %} {% endfor %} diff --git a/bookwyrm/templates/import/import.html b/bookwyrm/templates/import/import.html index 6df7c0843..fc00389c5 100644 --- a/bookwyrm/templates/import/import.html +++ b/bookwyrm/templates/import/import.html @@ -32,6 +32,9 @@ + diff --git a/bookwyrm/templates/reading_progress/stop.html b/bookwyrm/templates/reading_progress/stop.html new file mode 100644 index 000000000..5811c09b6 --- /dev/null +++ b/bookwyrm/templates/reading_progress/stop.html @@ -0,0 +1,14 @@ +{% extends 'layout.html' %} +{% load i18n %} + +{% block title %} +{% blocktrans trimmed with book_title=book.title %} +Stop Reading "{{ book_title }}" +{% endblocktrans %} +{% endblock %} + +{% block content %} + +{% include "snippets/reading_modals/stop_reading_modal.html" with book=book active=True static=True %} + +{% endblock %} diff --git a/bookwyrm/templates/readthrough/readthrough_form.html b/bookwyrm/templates/readthrough/readthrough_form.html index 1558dada4..45c92043a 100644 --- a/bookwyrm/templates/readthrough/readthrough_form.html +++ b/bookwyrm/templates/readthrough/readthrough_form.html @@ -19,6 +19,7 @@ {% include "snippets/progress_field.html" with id=field_id %} {% endif %} +