1
0
Fork 0

Merge branch 'main' into 2678

This commit is contained in:
Zach Flanders 2023-04-25 19:47:07 -05:00 committed by GitHub
commit 1985c2d284
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
47 changed files with 992 additions and 737 deletions

View file

@ -4,13 +4,16 @@ from urllib.parse import quote_plus
import imghdr
import logging
import re
import asyncio
import requests
from requests.exceptions import RequestException
import aiohttp
from django.core.files.base import ContentFile
from django.db import transaction
import requests
from requests.exceptions import RequestException
from bookwyrm import activitypub, models, settings
from bookwyrm.settings import USER_AGENT
from .connector_manager import load_more_data, ConnectorException, raise_not_valid_url
from .format_mappings import format_mappings
@ -57,6 +60,39 @@ class AbstractMinimalConnector(ABC):
return list(self.parse_isbn_search_data(data))[:10]
return list(self.parse_search_data(data, min_confidence))[:10]
async def get_results(self, session, url, min_confidence, query):
"""try this specific connector"""
# pylint: disable=line-too-long
headers = {
"Accept": (
'application/json, application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"; charset=utf-8'
),
"User-Agent": USER_AGENT,
}
params = {"min_confidence": min_confidence}
try:
async with session.get(url, headers=headers, params=params) as response:
if not response.ok:
logger.info("Unable to connect to %s: %s", url, response.reason)
return
try:
raw_data = await response.json()
except aiohttp.client_exceptions.ContentTypeError as err:
logger.exception(err)
return
return {
"connector": self,
"results": self.process_search_response(
query, raw_data, min_confidence
),
}
except asyncio.TimeoutError:
logger.info("Connection timed out for url: %s", url)
except aiohttp.ClientError as err:
logger.info(err)
@abstractmethod
def get_or_create_book(self, remote_id):
"""pull up a book record by whatever means possible"""

View file

@ -12,7 +12,7 @@ from django.db.models import signals
from requests import HTTPError
from bookwyrm import book_search, models
from bookwyrm.settings import SEARCH_TIMEOUT, USER_AGENT
from bookwyrm.settings import SEARCH_TIMEOUT
from bookwyrm.tasks import app, LOW
logger = logging.getLogger(__name__)
@ -22,40 +22,6 @@ class ConnectorException(HTTPError):
"""when the connector can't do what was asked"""
async def get_results(session, url, min_confidence, query, connector):
"""try this specific connector"""
# pylint: disable=line-too-long
headers = {
"Accept": (
'application/json, application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"; charset=utf-8'
),
"User-Agent": USER_AGENT,
}
params = {"min_confidence": min_confidence}
try:
async with session.get(url, headers=headers, params=params) as response:
if not response.ok:
logger.info("Unable to connect to %s: %s", url, response.reason)
return
try:
raw_data = await response.json()
except aiohttp.client_exceptions.ContentTypeError as err:
logger.exception(err)
return
return {
"connector": connector,
"results": connector.process_search_response(
query, raw_data, min_confidence
),
}
except asyncio.TimeoutError:
logger.info("Connection timed out for url: %s", url)
except aiohttp.ClientError as err:
logger.info(err)
async def async_connector_search(query, items, min_confidence):
"""Try a number of requests simultaneously"""
timeout = aiohttp.ClientTimeout(total=SEARCH_TIMEOUT)
@ -64,7 +30,7 @@ async def async_connector_search(query, items, min_confidence):
for url, connector in items:
tasks.append(
asyncio.ensure_future(
get_results(session, url, min_confidence, query, connector)
connector.get_results(session, url, min_confidence, query)
)
)

View file

@ -19,7 +19,7 @@ class LibrarythingImporter(Importer):
normalized = {k: remove_brackets(entry.get(v)) for k, v in mappings.items()}
isbn_13 = normalized.get("isbn_13")
isbn_13 = isbn_13.split(", ") if isbn_13 else []
normalized["isbn_13"] = isbn_13[1] if len(isbn_13) > 0 else None
normalized["isbn_13"] = isbn_13[1] if len(isbn_13) > 1 else None
return normalized
def get_shelf(self, normalized_row):

View file

@ -3,38 +3,7 @@ merge book data objects """
from django.core.management.base import BaseCommand
from django.db.models import Count
from bookwyrm import models
def update_related(canonical, obj):
"""update all the models with fk to the object being removed"""
# move related models to canonical
related_models = [
(r.remote_field.name, r.related_model) for r in canonical._meta.related_objects
]
for (related_field, related_model) in related_models:
related_objs = related_model.objects.filter(**{related_field: obj})
for related_obj in related_objs:
print("replacing in", related_model.__name__, related_field, related_obj.id)
try:
setattr(related_obj, related_field, canonical)
related_obj.save()
except TypeError:
getattr(related_obj, related_field).add(canonical)
getattr(related_obj, related_field).remove(obj)
def copy_data(canonical, obj):
"""try to get the most data possible"""
for data_field in obj._meta.get_fields():
if not hasattr(data_field, "activitypub_field"):
continue
data_value = getattr(obj, data_field.name)
if not data_value:
continue
if not getattr(canonical, data_field.name):
print("setting data field", data_field.name, data_value)
setattr(canonical, data_field.name, data_value)
canonical.save()
from bookwyrm.management.merge import merge_objects
def dedupe_model(model):
@ -61,10 +30,7 @@ def dedupe_model(model):
print("keeping", canonical.remote_id)
for obj in objs[1:]:
print(obj.remote_id)
copy_data(canonical, obj)
update_related(canonical, obj)
# remove the outdated entry
obj.delete()
merge_objects(canonical, obj)
class Command(BaseCommand):

View file

@ -0,0 +1,12 @@
""" PROCEED WITH CAUTION: uses deduplication fields to permanently
merge author data objects """
from bookwyrm import models
from bookwyrm.management.merge_command import MergeCommand
class Command(MergeCommand):
"""merges two authors by ID"""
help = "merges specified authors into one"
MODEL = models.Author

View file

@ -0,0 +1,12 @@
""" PROCEED WITH CAUTION: uses deduplication fields to permanently
merge edition data objects """
from bookwyrm import models
from bookwyrm.management.merge_command import MergeCommand
class Command(MergeCommand):
"""merges two editions by ID"""
help = "merges specified editions into one"
MODEL = models.Edition

View file

@ -0,0 +1,50 @@
from django.db.models import ManyToManyField
def update_related(canonical, obj):
"""update all the models with fk to the object being removed"""
# move related models to canonical
related_models = [
(r.remote_field.name, r.related_model) for r in canonical._meta.related_objects
]
for (related_field, related_model) in related_models:
# Skip the ManyToMany fields that arent auto-created. These
# should have a corresponding OneToMany field in the model for
# the linking table anyway. If we update it through that model
# instead then we wont lose the extra fields in the linking
# table.
related_field_obj = related_model._meta.get_field(related_field)
if isinstance(related_field_obj, ManyToManyField):
through = related_field_obj.remote_field.through
if not through._meta.auto_created:
continue
related_objs = related_model.objects.filter(**{related_field: obj})
for related_obj in related_objs:
print("replacing in", related_model.__name__, related_field, related_obj.id)
try:
setattr(related_obj, related_field, canonical)
related_obj.save()
except TypeError:
getattr(related_obj, related_field).add(canonical)
getattr(related_obj, related_field).remove(obj)
def copy_data(canonical, obj):
"""try to get the most data possible"""
for data_field in obj._meta.get_fields():
if not hasattr(data_field, "activitypub_field"):
continue
data_value = getattr(obj, data_field.name)
if not data_value:
continue
if not getattr(canonical, data_field.name):
print("setting data field", data_field.name, data_value)
setattr(canonical, data_field.name, data_value)
canonical.save()
def merge_objects(canonical, obj):
copy_data(canonical, obj)
update_related(canonical, obj)
# remove the outdated entry
obj.delete()

View file

@ -0,0 +1,29 @@
from bookwyrm.management.merge import merge_objects
from django.core.management.base import BaseCommand
class MergeCommand(BaseCommand):
"""base class for merge commands"""
def add_arguments(self, parser):
"""add the arguments for this command"""
parser.add_argument("--canonical", type=int, required=True)
parser.add_argument("--other", type=int, required=True)
# pylint: disable=no-self-use,unused-argument
def handle(self, *args, **options):
"""merge the two objects"""
model = self.MODEL
try:
canonical = model.objects.get(id=options["canonical"])
except model.DoesNotExist:
print("canonical book doesnt exist!")
return
try:
other = model.objects.get(id=options["other"])
except model.DoesNotExist:
print("other book doesnt exist!")
return
merge_objects(canonical, other)

View file

@ -252,9 +252,12 @@ class ImportItem(models.Model):
@property
def rating(self):
"""x/5 star rating for a book"""
if self.normalized_data.get("rating"):
if not self.normalized_data.get("rating"):
return None
try:
return float(self.normalized_data.get("rating"))
return None
except ValueError:
return None
@property
def date_added(self):

View file

@ -12,7 +12,7 @@ from django.core.exceptions import ImproperlyConfigured
env = Env()
env.read_env()
DOMAIN = env("DOMAIN")
VERSION = "0.6.1"
VERSION = "0.6.2"
RELEASE_API = env(
"RELEASE_API",
@ -22,7 +22,7 @@ RELEASE_API = env(
PAGE_LENGTH = env.int("PAGE_LENGTH", 15)
DEFAULT_LANGUAGE = env("DEFAULT_LANGUAGE", "English")
JS_CACHE = "a7d4e720"
JS_CACHE = "ea91d7df"
# email
EMAIL_BACKEND = env("EMAIL_BACKEND", "django.core.mail.backends.smtp.EmailBackend")