diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
index 08661e9c2..a3117f7cb 100644
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@@ -21,8 +21,7 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- pip install pylint
- name: Analysing the code with pylint
run: |
- pylint bookwyrm/ --ignore=migrations --disable=E1101,E1135,E1136,R0903,R0901,R0902,W0707,W0511,W0406,R0401,R0801
+ pylint bookwyrm/
diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 000000000..7f92d0168
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,6 @@
+[MAIN]
+ignore=migrations
+load-plugins=pylint.extensions.no_self_use
+
+[MESSAGES CONTROL]
+disable=E1101,E1135,E1136,R0903,R0901,R0902,W0707,W0511,W0406,R0401,R0801,C3001
diff --git a/Dockerfile b/Dockerfile
index 349dd82b1..b3cd26e88 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,6 +6,7 @@ RUN mkdir /app /app/static /app/images
WORKDIR /app
+RUN apt-get update && apt-get install -y gettext libgettextpo-dev tidy && apt-get clean
+
COPY requirements.txt /app/
RUN pip install -r requirements.txt --no-cache-dir
-RUN apt-get update && apt-get install -y gettext libgettextpo-dev tidy && apt-get clean
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 000000000..c4e5e9cf9
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,5 @@
+# Security Policy
+
+## Reporting a Vulnerability
+
+Please report security issues to `mousereeve@riseup.net`
\ No newline at end of file
diff --git a/bookwyrm/activitypub/base_activity.py b/bookwyrm/activitypub/base_activity.py
index 6bee25f62..fa1535694 100644
--- a/bookwyrm/activitypub/base_activity.py
+++ b/bookwyrm/activitypub/base_activity.py
@@ -1,6 +1,7 @@
""" basics for an activitypub serializer """
from dataclasses import dataclass, fields, MISSING
from json import JSONEncoder
+import logging
from django.apps import apps
from django.db import IntegrityError, transaction
@@ -8,6 +9,8 @@ from django.db import IntegrityError, transaction
from bookwyrm.connectors import ConnectorException, get_data
from bookwyrm.tasks import app
+logger = logging.getLogger(__name__)
+
class ActivitySerializerError(ValueError):
"""routine problems serializing activitypub json"""
@@ -39,12 +42,12 @@ def naive_parse(activity_objects, activity_json, serializer=None):
activity_json["type"] = "PublicKey"
activity_type = activity_json.get("type")
+ if activity_type in ["Question", "Article"]:
+ return None
try:
serializer = activity_objects[activity_type]
except KeyError as err:
# we know this exists and that we can't handle it
- if activity_type in ["Question"]:
- return None
raise ActivitySerializerError(err)
return serializer(activity_objects=activity_objects, **activity_json)
@@ -65,7 +68,7 @@ class ActivityObject:
try:
value = kwargs[field.name]
if value in (None, MISSING, {}):
- raise KeyError()
+ raise KeyError("Missing required field", field.name)
try:
is_subclass = issubclass(field.type, ActivityObject)
except TypeError:
@@ -268,9 +271,9 @@ def resolve_remote_id(
try:
data = get_data(remote_id)
except ConnectorException:
- raise ActivitySerializerError(
- f"Could not connect to host for remote_id: {remote_id}"
- )
+ logger.exception("Could not connect to host for remote_id: %s", remote_id)
+ return None
+
# determine the model implicitly, if not provided
# or if it's a model with subclasses like Status, check again
if not model or hasattr(model.objects, "select_subclasses"):
diff --git a/bookwyrm/activitystreams.py b/bookwyrm/activitystreams.py
index f2dd43fb2..a90d7943b 100644
--- a/bookwyrm/activitystreams.py
+++ b/bookwyrm/activitystreams.py
@@ -298,8 +298,9 @@ def add_status_on_create_command(sender, instance, created):
priority = HIGH
# check if this is an old status, de-prioritize if so
# (this will happen if federation is very slow, or, more expectedly, on csv import)
- one_day = 60 * 60 * 24
- if (instance.created_date - instance.published_date).seconds > one_day:
+ if instance.published_date < timezone.now() - timedelta(
+ days=1
+ ) or instance.created_date < instance.published_date - timedelta(days=1):
priority = LOW
add_status_task.apply_async(
diff --git a/bookwyrm/book_search.py b/bookwyrm/book_search.py
index e42a6d8c3..4b0a6eab9 100644
--- a/bookwyrm/book_search.py
+++ b/bookwyrm/book_search.py
@@ -148,8 +148,8 @@ class SearchResult:
def __repr__(self):
# pylint: disable=consider-using-f-string
- return "".format(
- self.key, self.title, self.author
+ return "".format(
+ self.key, self.title, self.author, self.confidence
)
def json(self):
diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py
index 56e273886..dc4be4b3d 100644
--- a/bookwyrm/connectors/abstract_connector.py
+++ b/bookwyrm/connectors/abstract_connector.py
@@ -1,9 +1,8 @@
""" functionality outline for a book data connector """
from abc import ABC, abstractmethod
import imghdr
-import ipaddress
import logging
-from urllib.parse import urlparse
+import re
from django.core.files.base import ContentFile
from django.db import transaction
@@ -11,7 +10,7 @@ import requests
from requests.exceptions import RequestException
from bookwyrm import activitypub, models, settings
-from .connector_manager import load_more_data, ConnectorException
+from .connector_manager import load_more_data, ConnectorException, raise_not_valid_url
from .format_mappings import format_mappings
@@ -39,62 +38,34 @@ class AbstractMinimalConnector(ABC):
for field in self_fields:
setattr(self, field, getattr(info, field))
- def search(self, query, min_confidence=None, timeout=settings.QUERY_TIMEOUT):
- """free text search"""
- params = {}
- if min_confidence:
- params["min_confidence"] = min_confidence
+ def get_search_url(self, query):
+ """format the query url"""
+ # Check if the query resembles an ISBN
+ if maybe_isbn(query) and self.isbn_search_url and self.isbn_search_url != "":
+ return f"{self.isbn_search_url}{query}"
- data = self.get_search_data(
- f"{self.search_url}{query}",
- params=params,
- timeout=timeout,
- )
- results = []
+ # NOTE: previously, we tried searching isbn and if that produces no results,
+ # searched as free text. This, instead, only searches isbn if it's isbn-y
+ return f"{self.search_url}{query}"
- for doc in self.parse_search_data(data)[:10]:
- results.append(self.format_search_result(doc))
- return results
-
- def isbn_search(self, query, timeout=settings.QUERY_TIMEOUT):
- """isbn search"""
- params = {}
- data = self.get_search_data(
- f"{self.isbn_search_url}{query}",
- params=params,
- timeout=timeout,
- )
- results = []
-
- # this shouldn't be returning mutliple results, but just in case
- for doc in self.parse_isbn_search_data(data)[:10]:
- results.append(self.format_isbn_search_result(doc))
- return results
-
- def get_search_data(self, remote_id, **kwargs): # pylint: disable=no-self-use
- """this allows connectors to override the default behavior"""
- return get_data(remote_id, **kwargs)
+ def process_search_response(self, query, data, min_confidence):
+ """Format the search results based on the formt of the query"""
+ if maybe_isbn(query):
+ return list(self.parse_isbn_search_data(data))[:10]
+ return list(self.parse_search_data(data, min_confidence))[:10]
@abstractmethod
def get_or_create_book(self, remote_id):
"""pull up a book record by whatever means possible"""
@abstractmethod
- def parse_search_data(self, data):
+ def parse_search_data(self, data, min_confidence):
"""turn the result json from a search into a list"""
- @abstractmethod
- def format_search_result(self, search_result):
- """create a SearchResult obj from json"""
-
@abstractmethod
def parse_isbn_search_data(self, data):
"""turn the result json from a search into a list"""
- @abstractmethod
- def format_isbn_search_result(self, search_result):
- """create a SearchResult obj from json"""
-
class AbstractConnector(AbstractMinimalConnector):
"""generic book data connector"""
@@ -254,9 +225,6 @@ def get_data(url, params=None, timeout=10):
# check if the url is blocked
raise_not_valid_url(url)
- if models.FederatedServer.is_blocked(url):
- raise ConnectorException(f"Attempting to load data from blocked url: {url}")
-
try:
resp = requests.get(
url,
@@ -311,20 +279,6 @@ def get_image(url, timeout=10):
return image_content, extension
-def raise_not_valid_url(url):
- """do some basic reality checks on the url"""
- parsed = urlparse(url)
- if not parsed.scheme in ["http", "https"]:
- raise ConnectorException("Invalid scheme: ", url)
-
- try:
- ipaddress.ip_address(parsed.netloc)
- raise ConnectorException("Provided url is an IP address: ", url)
- except ValueError:
- # it's not an IP address, which is good
- pass
-
-
class Mapping:
"""associate a local database field with a field in an external dataset"""
@@ -366,3 +320,9 @@ def unique_physical_format(format_text):
# try a direct match, so saving this would be redundant
return None
return format_text
+
+
+def maybe_isbn(query):
+ """check if a query looks like an isbn"""
+ isbn = re.sub(r"[\W_]", "", query) # removes filler characters
+ return len(isbn) in [10, 13] # ISBN10 or ISBN13
diff --git a/bookwyrm/connectors/bookwyrm_connector.py b/bookwyrm/connectors/bookwyrm_connector.py
index 6dcba7c31..e07a0b281 100644
--- a/bookwyrm/connectors/bookwyrm_connector.py
+++ b/bookwyrm/connectors/bookwyrm_connector.py
@@ -10,15 +10,12 @@ class Connector(AbstractMinimalConnector):
def get_or_create_book(self, remote_id):
return activitypub.resolve_remote_id(remote_id, model=models.Edition)
- def parse_search_data(self, data):
- return data
-
- def format_search_result(self, search_result):
- search_result["connector"] = self
- return SearchResult(**search_result)
+ def parse_search_data(self, data, min_confidence):
+ for search_result in data:
+ search_result["connector"] = self
+ yield SearchResult(**search_result)
def parse_isbn_search_data(self, data):
- return data
-
- def format_isbn_search_result(self, search_result):
- return self.format_search_result(search_result)
+ for search_result in data:
+ search_result["connector"] = self
+ yield SearchResult(**search_result)
diff --git a/bookwyrm/connectors/connector_manager.py b/bookwyrm/connectors/connector_manager.py
index 14bb702cb..37b093aa9 100644
--- a/bookwyrm/connectors/connector_manager.py
+++ b/bookwyrm/connectors/connector_manager.py
@@ -1,17 +1,18 @@
""" interface with whatever connectors the app has """
-from datetime import datetime
+import asyncio
import importlib
+import ipaddress
import logging
-import re
from urllib.parse import urlparse
+import aiohttp
from django.dispatch import receiver
from django.db.models import signals
from requests import HTTPError
from bookwyrm import book_search, models
-from bookwyrm.settings import SEARCH_TIMEOUT
+from bookwyrm.settings import SEARCH_TIMEOUT, USER_AGENT
from bookwyrm.tasks import app
logger = logging.getLogger(__name__)
@@ -21,53 +22,85 @@ class ConnectorException(HTTPError):
"""when the connector can't do what was asked"""
+async def get_results(session, url, min_confidence, query, connector):
+ """try this specific connector"""
+ # pylint: disable=line-too-long
+ headers = {
+ "Accept": (
+ 'application/json, application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"; charset=utf-8'
+ ),
+ "User-Agent": USER_AGENT,
+ }
+ params = {"min_confidence": min_confidence}
+ try:
+ async with session.get(url, headers=headers, params=params) as response:
+ if not response.ok:
+ logger.info("Unable to connect to %s: %s", url, response.reason)
+ return
+
+ try:
+ raw_data = await response.json()
+ except aiohttp.client_exceptions.ContentTypeError as err:
+ logger.exception(err)
+ return
+
+ return {
+ "connector": connector,
+ "results": connector.process_search_response(
+ query, raw_data, min_confidence
+ ),
+ }
+ except asyncio.TimeoutError:
+ logger.info("Connection timed out for url: %s", url)
+ except aiohttp.ClientError as err:
+ logger.exception(err)
+
+
+async def async_connector_search(query, items, min_confidence):
+ """Try a number of requests simultaneously"""
+ timeout = aiohttp.ClientTimeout(total=SEARCH_TIMEOUT)
+ async with aiohttp.ClientSession(timeout=timeout) as session:
+ tasks = []
+ for url, connector in items:
+ tasks.append(
+ asyncio.ensure_future(
+ get_results(session, url, min_confidence, query, connector)
+ )
+ )
+
+ results = await asyncio.gather(*tasks)
+ return results
+
+
def search(query, min_confidence=0.1, return_first=False):
"""find books based on arbitary keywords"""
if not query:
return []
results = []
- # Have we got a ISBN ?
- isbn = re.sub(r"[\W_]", "", query)
- maybe_isbn = len(isbn) in [10, 13] # ISBN10 or ISBN13
-
- start_time = datetime.now()
+ items = []
for connector in get_connectors():
- result_set = None
- if maybe_isbn and connector.isbn_search_url and connector.isbn_search_url != "":
- # Search on ISBN
- try:
- result_set = connector.isbn_search(isbn)
- except Exception as err: # pylint: disable=broad-except
- logger.info(err)
- # if this fails, we can still try regular search
+ # get the search url from the connector before sending
+ url = connector.get_search_url(query)
+ try:
+ raise_not_valid_url(url)
+ except ConnectorException:
+ # if this URL is invalid we should skip it and move on
+ logger.info("Request denied to blocked domain: %s", url)
+ continue
+ items.append((url, connector))
- # if no isbn search results, we fallback to generic search
- if not result_set:
- try:
- result_set = connector.search(query, min_confidence=min_confidence)
- except Exception as err: # pylint: disable=broad-except
- # we don't want *any* error to crash the whole search page
- logger.info(err)
- continue
-
- if return_first and result_set:
- # if we found anything, return it
- return result_set[0]
-
- if result_set:
- results.append(
- {
- "connector": connector,
- "results": result_set,
- }
- )
- if (datetime.now() - start_time).seconds >= SEARCH_TIMEOUT:
- break
+ # load as many results as we can
+ results = asyncio.run(async_connector_search(query, items, min_confidence))
+ results = [r for r in results if r]
if return_first:
- return None
+ # find the best result from all the responses and return that
+ all_results = [r for con in results for r in con["results"]]
+ all_results = sorted(all_results, key=lambda r: r.confidence, reverse=True)
+ return all_results[0] if all_results else None
+ # failed requests will return None, so filter those out
return results
@@ -119,6 +152,15 @@ def load_more_data(connector_id, book_id):
connector.expand_book_data(book)
+@app.task(queue="low_priority")
+def create_edition_task(connector_id, work_id, data):
+ """separate task for each of the 10,000 editions of LoTR"""
+ connector_info = models.Connector.objects.get(id=connector_id)
+ connector = load_connector(connector_info)
+ work = models.Work.objects.select_subclasses().get(id=work_id)
+ connector.create_edition_from_data(work, data)
+
+
def load_connector(connector_info):
"""instantiate the connector class"""
connector = importlib.import_module(
@@ -133,3 +175,20 @@ def create_connector(sender, instance, created, *args, **kwargs):
"""create a connector to an external bookwyrm server"""
if instance.application_type == "bookwyrm":
get_or_create_connector(f"https://{instance.server_name}")
+
+
+def raise_not_valid_url(url):
+ """do some basic reality checks on the url"""
+ parsed = urlparse(url)
+ if not parsed.scheme in ["http", "https"]:
+ raise ConnectorException("Invalid scheme: ", url)
+
+ try:
+ ipaddress.ip_address(parsed.netloc)
+ raise ConnectorException("Provided url is an IP address: ", url)
+ except ValueError:
+ # it's not an IP address, which is good
+ pass
+
+ if models.FederatedServer.is_blocked(url):
+ raise ConnectorException(f"Attempting to load data from blocked url: {url}")
diff --git a/bookwyrm/connectors/inventaire.py b/bookwyrm/connectors/inventaire.py
index a9aeb94f9..3d5f913bd 100644
--- a/bookwyrm/connectors/inventaire.py
+++ b/bookwyrm/connectors/inventaire.py
@@ -5,7 +5,7 @@ from bookwyrm import models
from bookwyrm.book_search import SearchResult
from .abstract_connector import AbstractConnector, Mapping
from .abstract_connector import get_data
-from .connector_manager import ConnectorException
+from .connector_manager import ConnectorException, create_edition_task
class Connector(AbstractConnector):
@@ -77,53 +77,42 @@ class Connector(AbstractConnector):
**{k: data.get(k) for k in ["uri", "image", "labels", "sitelinks", "type"]},
}
- def search(self, query, min_confidence=None): # pylint: disable=arguments-differ
- """overrides default search function with confidence ranking"""
- results = super().search(query)
- if min_confidence:
- # filter the search results after the fact
- return [r for r in results if r.confidence >= min_confidence]
- return results
-
- def parse_search_data(self, data):
- return data.get("results")
-
- def format_search_result(self, search_result):
- images = search_result.get("image")
- cover = f"{self.covers_url}/img/entities/{images[0]}" if images else None
- # a deeply messy translation of inventaire's scores
- confidence = float(search_result.get("_score", 0.1))
- confidence = 0.1 if confidence < 150 else 0.999
- return SearchResult(
- title=search_result.get("label"),
- key=self.get_remote_id(search_result.get("uri")),
- author=search_result.get("description"),
- view_link=f"{self.base_url}/entity/{search_result.get('uri')}",
- cover=cover,
- confidence=confidence,
- connector=self,
- )
+ def parse_search_data(self, data, min_confidence):
+ for search_result in data.get("results", []):
+ images = search_result.get("image")
+ cover = f"{self.covers_url}/img/entities/{images[0]}" if images else None
+ # a deeply messy translation of inventaire's scores
+ confidence = float(search_result.get("_score", 0.1))
+ confidence = 0.1 if confidence < 150 else 0.999
+ if confidence < min_confidence:
+ continue
+ yield SearchResult(
+ title=search_result.get("label"),
+ key=self.get_remote_id(search_result.get("uri")),
+ author=search_result.get("description"),
+ view_link=f"{self.base_url}/entity/{search_result.get('uri')}",
+ cover=cover,
+ confidence=confidence,
+ connector=self,
+ )
def parse_isbn_search_data(self, data):
"""got some daaaata"""
results = data.get("entities")
if not results:
- return []
- return list(results.values())
-
- def format_isbn_search_result(self, search_result):
- """totally different format than a regular search result"""
- title = search_result.get("claims", {}).get("wdt:P1476", [])
- if not title:
- return None
- return SearchResult(
- title=title[0],
- key=self.get_remote_id(search_result.get("uri")),
- author=search_result.get("description"),
- view_link=f"{self.base_url}/entity/{search_result.get('uri')}",
- cover=self.get_cover_url(search_result.get("image")),
- connector=self,
- )
+ return
+ for search_result in list(results.values()):
+ title = search_result.get("claims", {}).get("wdt:P1476", [])
+ if not title:
+ continue
+ yield SearchResult(
+ title=title[0],
+ key=self.get_remote_id(search_result.get("uri")),
+ author=search_result.get("description"),
+ view_link=f"{self.base_url}/entity/{search_result.get('uri')}",
+ cover=self.get_cover_url(search_result.get("image")),
+ connector=self,
+ )
def is_work_data(self, data):
return data.get("type") == "work"
@@ -167,12 +156,16 @@ class Connector(AbstractConnector):
for edition_uri in edition_options.get("uris"):
remote_id = self.get_remote_id(edition_uri)
- try:
- data = self.get_book_data(remote_id)
- except ConnectorException:
- # who, indeed, knows
- continue
- self.create_edition_from_data(work, data)
+ create_edition_task.delay(self.connector.id, work.id, remote_id)
+
+ def create_edition_from_data(self, work, edition_data, instance=None):
+ """pass in the url as data and then call the version in abstract connector"""
+ try:
+ data = self.get_book_data(edition_data)
+ except ConnectorException:
+ # who, indeed, knows
+ return
+ super().create_edition_from_data(work, data, instance=instance)
def get_cover_url(self, cover_blob, *_):
"""format the relative cover url into an absolute one:
diff --git a/bookwyrm/connectors/openlibrary.py b/bookwyrm/connectors/openlibrary.py
index 118222a16..0fd786660 100644
--- a/bookwyrm/connectors/openlibrary.py
+++ b/bookwyrm/connectors/openlibrary.py
@@ -5,7 +5,7 @@ from bookwyrm import models
from bookwyrm.book_search import SearchResult
from .abstract_connector import AbstractConnector, Mapping
from .abstract_connector import get_data, infer_physical_format, unique_physical_format
-from .connector_manager import ConnectorException
+from .connector_manager import ConnectorException, create_edition_task
from .openlibrary_languages import languages
@@ -152,39 +152,41 @@ class Connector(AbstractConnector):
image_name = f"{cover_id}-{size}.jpg"
return f"{self.covers_url}/b/id/{image_name}"
- def parse_search_data(self, data):
- return data.get("docs")
+ def parse_search_data(self, data, min_confidence):
+ for idx, search_result in enumerate(data.get("docs")):
+ # build the remote id from the openlibrary key
+ key = self.books_url + search_result["key"]
+ author = search_result.get("author_name") or ["Unknown"]
+ cover_blob = search_result.get("cover_i")
+ cover = self.get_cover_url([cover_blob], size="M") if cover_blob else None
- def format_search_result(self, search_result):
- # build the remote id from the openlibrary key
- key = self.books_url + search_result["key"]
- author = search_result.get("author_name") or ["Unknown"]
- cover_blob = search_result.get("cover_i")
- cover = self.get_cover_url([cover_blob], size="M") if cover_blob else None
- return SearchResult(
- title=search_result.get("title"),
- key=key,
- author=", ".join(author),
- connector=self,
- year=search_result.get("first_publish_year"),
- cover=cover,
- )
+ # OL doesn't provide confidence, but it does sort by an internal ranking, so
+ # this confidence value is relative to the list position
+ confidence = 1 / (idx + 1)
+
+ yield SearchResult(
+ title=search_result.get("title"),
+ key=key,
+ author=", ".join(author),
+ connector=self,
+ year=search_result.get("first_publish_year"),
+ cover=cover,
+ confidence=confidence,
+ )
def parse_isbn_search_data(self, data):
- return list(data.values())
-
- def format_isbn_search_result(self, search_result):
- # build the remote id from the openlibrary key
- key = self.books_url + search_result["key"]
- authors = search_result.get("authors") or [{"name": "Unknown"}]
- author_names = [author.get("name") for author in authors]
- return SearchResult(
- title=search_result.get("title"),
- key=key,
- author=", ".join(author_names),
- connector=self,
- year=search_result.get("publish_date"),
- )
+ for search_result in list(data.values()):
+ # build the remote id from the openlibrary key
+ key = self.books_url + search_result["key"]
+ authors = search_result.get("authors") or [{"name": "Unknown"}]
+ author_names = [author.get("name") for author in authors]
+ yield SearchResult(
+ title=search_result.get("title"),
+ key=key,
+ author=", ".join(author_names),
+ connector=self,
+ year=search_result.get("publish_date"),
+ )
def load_edition_data(self, olkey):
"""query openlibrary for editions of a work"""
@@ -208,7 +210,7 @@ class Connector(AbstractConnector):
# does this edition have ANY interesting data?
if ignore_edition(edition_data):
continue
- self.create_edition_from_data(work, edition_data)
+ create_edition_task.delay(self.connector.id, work.id, edition_data)
def ignore_edition(edition_data):
diff --git a/bookwyrm/forms/forms.py b/bookwyrm/forms/forms.py
index 9d8f9f392..4aa1e5758 100644
--- a/bookwyrm/forms/forms.py
+++ b/bookwyrm/forms/forms.py
@@ -53,7 +53,12 @@ class ReadThroughForm(CustomForm):
self.add_error(
"finish_date", _("Reading finish date cannot be before start date.")
)
+ stopped_date = cleaned_data.get("stopped_date")
+ if start_date and stopped_date and start_date > stopped_date:
+ self.add_error(
+ "stopped_date", _("Reading stopped date cannot be before start date.")
+ )
class Meta:
model = models.ReadThrough
- fields = ["user", "book", "start_date", "finish_date"]
+ fields = ["user", "book", "start_date", "finish_date", "stopped_date"]
diff --git a/bookwyrm/importers/__init__.py b/bookwyrm/importers/__init__.py
index dd3d62e8b..6ce50f160 100644
--- a/bookwyrm/importers/__init__.py
+++ b/bookwyrm/importers/__init__.py
@@ -1,6 +1,7 @@
""" import classes """
from .importer import Importer
+from .calibre_import import CalibreImporter
from .goodreads_import import GoodreadsImporter
from .librarything_import import LibrarythingImporter
from .openlibrary_import import OpenLibraryImporter
diff --git a/bookwyrm/importers/calibre_import.py b/bookwyrm/importers/calibre_import.py
new file mode 100644
index 000000000..7395e2f7b
--- /dev/null
+++ b/bookwyrm/importers/calibre_import.py
@@ -0,0 +1,28 @@
+""" handle reading a csv from calibre """
+from bookwyrm.models import Shelf
+
+from . import Importer
+
+
+class CalibreImporter(Importer):
+ """csv downloads from Calibre"""
+
+ service = "Calibre"
+
+ def __init__(self, *args, **kwargs):
+ # Add timestamp to row_mappings_guesses for date_added to avoid
+ # integrity error
+ row_mappings_guesses = []
+
+ for field, mapping in self.row_mappings_guesses:
+ if field in ("date_added",):
+ row_mappings_guesses.append((field, mapping + ["timestamp"]))
+ else:
+ row_mappings_guesses.append((field, mapping))
+
+ self.row_mappings_guesses = row_mappings_guesses
+ super().__init__(*args, **kwargs)
+
+ def get_shelf(self, normalized_row):
+ # Calibre export does not indicate which shelf to use. Go with a default one for now
+ return Shelf.TO_READ
diff --git a/bookwyrm/importers/librarything_import.py b/bookwyrm/importers/librarything_import.py
index 37730dee3..c6833547d 100644
--- a/bookwyrm/importers/librarything_import.py
+++ b/bookwyrm/importers/librarything_import.py
@@ -1,5 +1,8 @@
""" handle reading a tsv from librarything """
import re
+
+from bookwyrm.models import Shelf
+
from . import Importer
@@ -21,7 +24,7 @@ class LibrarythingImporter(Importer):
def get_shelf(self, normalized_row):
if normalized_row["date_finished"]:
- return "read"
+ return Shelf.READ_FINISHED
if normalized_row["date_started"]:
- return "reading"
- return "to-read"
+ return Shelf.READING
+ return Shelf.TO_READ
diff --git a/bookwyrm/management/commands/generate_preview_images.py b/bookwyrm/management/commands/generate_preview_images.py
index 0454e5e51..9ff16c26a 100644
--- a/bookwyrm/management/commands/generate_preview_images.py
+++ b/bookwyrm/management/commands/generate_preview_images.py
@@ -56,12 +56,17 @@ class Command(BaseCommand):
self.stdout.write(" OK 🖼")
# Books
- books = models.Book.objects.select_subclasses().filter()
- self.stdout.write(
- " → Book preview images ({}): ".format(len(books)), ending=""
+ book_ids = (
+ models.Book.objects.select_subclasses()
+ .filter()
+ .values_list("id", flat=True)
)
- for book in books:
- preview_images.generate_edition_preview_image_task.delay(book.id)
+
+ self.stdout.write(
+ " → Book preview images ({}): ".format(len(book_ids)), ending=""
+ )
+ for book_id in book_ids:
+ preview_images.generate_edition_preview_image_task.delay(book_id)
self.stdout.write(".", ending="")
self.stdout.write(" OK 🖼")
diff --git a/bookwyrm/management/commands/initdb.py b/bookwyrm/management/commands/initdb.py
index 160502ca0..23020a0a6 100644
--- a/bookwyrm/management/commands/initdb.py
+++ b/bookwyrm/management/commands/initdb.py
@@ -89,7 +89,7 @@ def init_connectors():
covers_url="https://inventaire.io",
search_url="https://inventaire.io/api/search?types=works&types=works&search=",
isbn_search_url="https://inventaire.io/api/entities?action=by-uris&uris=isbn%3A",
- priority=3,
+ priority=1,
)
models.Connector.objects.create(
@@ -101,7 +101,7 @@ def init_connectors():
covers_url="https://covers.openlibrary.org",
search_url="https://openlibrary.org/search?q=",
isbn_search_url="https://openlibrary.org/api/books?jscmd=data&format=json&bibkeys=ISBN:",
- priority=3,
+ priority=1,
)
diff --git a/bookwyrm/migrations/0146_auto_20220316_2320.py b/bookwyrm/migrations/0146_auto_20220316_2320.py
new file mode 100644
index 000000000..e50bf25ec
--- /dev/null
+++ b/bookwyrm/migrations/0146_auto_20220316_2320.py
@@ -0,0 +1,80 @@
+# Generated by Django 3.2.12 on 2022-03-16 23:20
+
+import bookwyrm.models.fields
+from django.db import migrations
+from bookwyrm.models import Shelf
+
+
+def add_shelves(apps, schema_editor):
+ """add any superusers to the "admin" group"""
+
+ db_alias = schema_editor.connection.alias
+ shelf_model = apps.get_model("bookwyrm", "Shelf")
+
+ users = apps.get_model("bookwyrm", "User")
+ local_users = users.objects.using(db_alias).filter(local=True)
+ for user in local_users:
+ remote_id = f"{user.remote_id}/books/stopped"
+ shelf_model.objects.using(db_alias).create(
+ name="Stopped reading",
+ identifier=Shelf.STOPPED_READING,
+ user=user,
+ editable=False,
+ remote_id=remote_id,
+ )
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("bookwyrm", "0145_sitesettings_version"),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name="comment",
+ name="reading_status",
+ field=bookwyrm.models.fields.CharField(
+ blank=True,
+ choices=[
+ ("to-read", "To-Read"),
+ ("reading", "Reading"),
+ ("read", "Read"),
+ ("stopped-reading", "Stopped-Reading"),
+ ],
+ max_length=255,
+ null=True,
+ ),
+ ),
+ migrations.AlterField(
+ model_name="quotation",
+ name="reading_status",
+ field=bookwyrm.models.fields.CharField(
+ blank=True,
+ choices=[
+ ("to-read", "To-Read"),
+ ("reading", "Reading"),
+ ("read", "Read"),
+ ("stopped-reading", "Stopped-Reading"),
+ ],
+ max_length=255,
+ null=True,
+ ),
+ ),
+ migrations.AlterField(
+ model_name="review",
+ name="reading_status",
+ field=bookwyrm.models.fields.CharField(
+ blank=True,
+ choices=[
+ ("to-read", "To-Read"),
+ ("reading", "Reading"),
+ ("read", "Read"),
+ ("stopped-reading", "Stopped-Reading"),
+ ],
+ max_length=255,
+ null=True,
+ ),
+ ),
+ migrations.RunPython(add_shelves, reverse_code=migrations.RunPython.noop),
+ ]
diff --git a/bookwyrm/migrations/0148_merge_20220326_2006.py b/bookwyrm/migrations/0148_merge_20220326_2006.py
new file mode 100644
index 000000000..978662765
--- /dev/null
+++ b/bookwyrm/migrations/0148_merge_20220326_2006.py
@@ -0,0 +1,13 @@
+# Generated by Django 3.2.12 on 2022-03-26 20:06
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("bookwyrm", "0146_auto_20220316_2320"),
+ ("bookwyrm", "0147_alter_user_preferred_language"),
+ ]
+
+ operations = []
diff --git a/bookwyrm/migrations/0149_merge_20220526_1716.py b/bookwyrm/migrations/0149_merge_20220526_1716.py
new file mode 100644
index 000000000..b42bccd3b
--- /dev/null
+++ b/bookwyrm/migrations/0149_merge_20220526_1716.py
@@ -0,0 +1,13 @@
+# Generated by Django 3.2.13 on 2022-05-26 17:16
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("bookwyrm", "0148_alter_user_preferred_language"),
+ ("bookwyrm", "0148_merge_20220326_2006"),
+ ]
+
+ operations = []
diff --git a/bookwyrm/migrations/0150_readthrough_stopped_date.py b/bookwyrm/migrations/0150_readthrough_stopped_date.py
new file mode 100644
index 000000000..6ce2f89a9
--- /dev/null
+++ b/bookwyrm/migrations/0150_readthrough_stopped_date.py
@@ -0,0 +1,18 @@
+# Generated by Django 3.2.13 on 2022-05-26 18:33
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("bookwyrm", "0149_merge_20220526_1716"),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name="readthrough",
+ name="stopped_date",
+ field=models.DateTimeField(blank=True, null=True),
+ ),
+ ]
diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py
index 3ea8e1a8e..190046019 100644
--- a/bookwyrm/models/book.py
+++ b/bookwyrm/models/book.py
@@ -176,8 +176,8 @@ class Book(BookDataModel):
"""properties of this edition, as a string"""
items = [
self.physical_format if hasattr(self, "physical_format") else None,
- self.languages[0] + " language"
- if self.languages and self.languages[0] != "English"
+ f"{self.languages[0]} language"
+ if self.languages and self.languages[0] and self.languages[0] != "English"
else None,
str(self.published_date.year) if self.published_date else None,
", ".join(self.publishers) if hasattr(self, "publishers") else None,
diff --git a/bookwyrm/models/import_job.py b/bookwyrm/models/import_job.py
index bcba391b6..556f133f9 100644
--- a/bookwyrm/models/import_job.py
+++ b/bookwyrm/models/import_job.py
@@ -175,9 +175,15 @@ class ImportItem(models.Model):
def date_added(self):
"""when the book was added to this dataset"""
if self.normalized_data.get("date_added"):
- return timezone.make_aware(
- dateutil.parser.parse(self.normalized_data.get("date_added"))
+ parsed_date_added = dateutil.parser.parse(
+ self.normalized_data.get("date_added")
)
+
+ if timezone.is_aware(parsed_date_added):
+ # Keep timezone if import already had one
+ return parsed_date_added
+
+ return timezone.make_aware(parsed_date_added)
return None
@property
diff --git a/bookwyrm/models/readthrough.py b/bookwyrm/models/readthrough.py
index ceb8e0b6e..314b40a5c 100644
--- a/bookwyrm/models/readthrough.py
+++ b/bookwyrm/models/readthrough.py
@@ -27,6 +27,7 @@ class ReadThrough(BookWyrmModel):
)
start_date = models.DateTimeField(blank=True, null=True)
finish_date = models.DateTimeField(blank=True, null=True)
+ stopped_date = models.DateTimeField(blank=True, null=True)
is_active = models.BooleanField(default=True)
def save(self, *args, **kwargs):
@@ -34,7 +35,7 @@ class ReadThrough(BookWyrmModel):
cache.delete(f"latest_read_through-{self.user.id}-{self.book.id}")
self.user.update_active_date()
# an active readthrough must have an unset finish date
- if self.finish_date:
+ if self.finish_date or self.stopped_date:
self.is_active = False
super().save(*args, **kwargs)
diff --git a/bookwyrm/models/shelf.py b/bookwyrm/models/shelf.py
index 8ea274ea1..3291d5653 100644
--- a/bookwyrm/models/shelf.py
+++ b/bookwyrm/models/shelf.py
@@ -18,8 +18,9 @@ class Shelf(OrderedCollectionMixin, BookWyrmModel):
TO_READ = "to-read"
READING = "reading"
READ_FINISHED = "read"
+ STOPPED_READING = "stopped-reading"
- READ_STATUS_IDENTIFIERS = (TO_READ, READING, READ_FINISHED)
+ READ_STATUS_IDENTIFIERS = (TO_READ, READING, READ_FINISHED, STOPPED_READING)
name = fields.CharField(max_length=100)
identifier = models.CharField(max_length=100)
diff --git a/bookwyrm/models/status.py b/bookwyrm/models/status.py
index 17fcd4587..3949e09a9 100644
--- a/bookwyrm/models/status.py
+++ b/bookwyrm/models/status.py
@@ -116,11 +116,8 @@ class Status(OrderedCollectionPageMixin, BookWyrmModel):
def ignore_activity(cls, activity): # pylint: disable=too-many-return-statements
"""keep notes if they are replies to existing statuses"""
if activity.type == "Announce":
- try:
- boosted = activitypub.resolve_remote_id(
- activity.object, get_activity=True
- )
- except activitypub.ActivitySerializerError:
+ boosted = activitypub.resolve_remote_id(activity.object, get_activity=True)
+ if not boosted:
# if we can't load the status, definitely ignore it
return True
# keep the boost if we would keep the status
@@ -265,7 +262,7 @@ class GeneratedNote(Status):
ReadingStatusChoices = models.TextChoices(
- "ReadingStatusChoices", ["to-read", "reading", "read"]
+ "ReadingStatusChoices", ["to-read", "reading", "read", "stopped-reading"]
)
@@ -306,10 +303,17 @@ class Comment(BookStatus):
@property
def pure_content(self):
"""indicate the book in question for mastodon (or w/e) users"""
- return (
- f'{self.content}(comment on '
- f'"{self.book.title}")
'
- )
+ if self.progress_mode == "PG" and self.progress and (self.progress > 0):
+ return_value = (
+ f'{self.content}(comment on '
+ f'"{self.book.title}", page {self.progress})
'
+ )
+ else:
+ return_value = (
+ f'{self.content}(comment on '
+ f'"{self.book.title}")
'
+ )
+ return return_value
activity_serializer = activitypub.Comment
@@ -335,10 +339,17 @@ class Quotation(BookStatus):
"""indicate the book in question for mastodon (or w/e) users"""
quote = re.sub(r"^", '
"', self.quote)
quote = re.sub(r"
$", '"
', quote)
- return (
- f'{quote} -- '
- f'"{self.book.title}"
{self.content}'
- )
+ if self.position_mode == "PG" and self.position and (self.position > 0):
+ return_value = (
+ f'{quote} -- '
+ f'"{self.book.title}", page {self.position}
{self.content}'
+ )
+ else:
+ return_value = (
+ f'{quote} -- '
+ f'"{self.book.title}"
{self.content}'
+ )
+ return return_value
activity_serializer = activitypub.Quotation
@@ -377,7 +388,7 @@ class Review(BookStatus):
def save(self, *args, **kwargs):
"""clear rating caches"""
if self.book.parent_work:
- cache.delete(f"book-rating-{self.book.parent_work.id}-*")
+ cache.delete(f"book-rating-{self.book.parent_work.id}")
super().save(*args, **kwargs)
diff --git a/bookwyrm/models/user.py b/bookwyrm/models/user.py
index be5c19922..dce74022c 100644
--- a/bookwyrm/models/user.py
+++ b/bookwyrm/models/user.py
@@ -374,6 +374,10 @@ class User(OrderedCollectionPageMixin, AbstractUser):
"name": "Read",
"identifier": "read",
},
+ {
+ "name": "Stopped Reading",
+ "identifier": "stopped-reading",
+ },
]
for shelf in shelves:
diff --git a/bookwyrm/settings.py b/bookwyrm/settings.py
index 416610e49..dc0d71f30 100644
--- a/bookwyrm/settings.py
+++ b/bookwyrm/settings.py
@@ -11,7 +11,7 @@ from django.utils.translation import gettext_lazy as _
env = Env()
env.read_env()
DOMAIN = env("DOMAIN")
-VERSION = "0.3.4"
+VERSION = "0.4.0"
RELEASE_API = env(
"RELEASE_API",
@@ -21,7 +21,7 @@ RELEASE_API = env(
PAGE_LENGTH = env("PAGE_LENGTH", 15)
DEFAULT_LANGUAGE = env("DEFAULT_LANGUAGE", "English")
-JS_CACHE = "bc93172a"
+JS_CACHE = "e678183b"
# email
EMAIL_BACKEND = env("EMAIL_BACKEND", "django.core.mail.backends.smtp.EmailBackend")
@@ -212,7 +212,7 @@ STREAMS = [
# Search configuration
# total time in seconds that the instance will spend searching connectors
-SEARCH_TIMEOUT = int(env("SEARCH_TIMEOUT", 15))
+SEARCH_TIMEOUT = int(env("SEARCH_TIMEOUT", 8))
# timeout for a query to an individual connector
QUERY_TIMEOUT = int(env("QUERY_TIMEOUT", 5))
diff --git a/bookwyrm/static/js/status_cache.js b/bookwyrm/static/js/status_cache.js
index b19489c1d..0a9f3abc5 100644
--- a/bookwyrm/static/js/status_cache.js
+++ b/bookwyrm/static/js/status_cache.js
@@ -203,6 +203,8 @@ let StatusCache = new (class {
.forEach((item) => (item.disabled = false));
next_identifier = next_identifier == "complete" ? "read" : next_identifier;
+ next_identifier =
+ next_identifier == "stopped-reading-complete" ? "stopped-reading" : next_identifier;
// Disable the current state
button.querySelector(
diff --git a/bookwyrm/templates/about/layout.html b/bookwyrm/templates/about/layout.html
index 458e4b1d1..e921fcd29 100644
--- a/bookwyrm/templates/about/layout.html
+++ b/bookwyrm/templates/about/layout.html
@@ -50,7 +50,7 @@
-
+
{% block about_content %}{% endblock %}
diff --git a/bookwyrm/templates/author/edit_author.html b/bookwyrm/templates/author/edit_author.html
index 6f72b8700..b0727c43b 100644
--- a/bookwyrm/templates/author/edit_author.html
+++ b/bookwyrm/templates/author/edit_author.html
@@ -24,7 +24,7 @@
{% endif %}
-