Merge branch 'main' into 2678
This commit is contained in:
commit
1985c2d284
47 changed files with 992 additions and 737 deletions
|
@ -4,13 +4,16 @@ from urllib.parse import quote_plus
|
|||
import imghdr
|
||||
import logging
|
||||
import re
|
||||
import asyncio
|
||||
import requests
|
||||
from requests.exceptions import RequestException
|
||||
import aiohttp
|
||||
|
||||
from django.core.files.base import ContentFile
|
||||
from django.db import transaction
|
||||
import requests
|
||||
from requests.exceptions import RequestException
|
||||
|
||||
from bookwyrm import activitypub, models, settings
|
||||
from bookwyrm.settings import USER_AGENT
|
||||
from .connector_manager import load_more_data, ConnectorException, raise_not_valid_url
|
||||
from .format_mappings import format_mappings
|
||||
|
||||
|
@ -57,6 +60,39 @@ class AbstractMinimalConnector(ABC):
|
|||
return list(self.parse_isbn_search_data(data))[:10]
|
||||
return list(self.parse_search_data(data, min_confidence))[:10]
|
||||
|
||||
async def get_results(self, session, url, min_confidence, query):
|
||||
"""try this specific connector"""
|
||||
# pylint: disable=line-too-long
|
||||
headers = {
|
||||
"Accept": (
|
||||
'application/json, application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"; charset=utf-8'
|
||||
),
|
||||
"User-Agent": USER_AGENT,
|
||||
}
|
||||
params = {"min_confidence": min_confidence}
|
||||
try:
|
||||
async with session.get(url, headers=headers, params=params) as response:
|
||||
if not response.ok:
|
||||
logger.info("Unable to connect to %s: %s", url, response.reason)
|
||||
return
|
||||
|
||||
try:
|
||||
raw_data = await response.json()
|
||||
except aiohttp.client_exceptions.ContentTypeError as err:
|
||||
logger.exception(err)
|
||||
return
|
||||
|
||||
return {
|
||||
"connector": self,
|
||||
"results": self.process_search_response(
|
||||
query, raw_data, min_confidence
|
||||
),
|
||||
}
|
||||
except asyncio.TimeoutError:
|
||||
logger.info("Connection timed out for url: %s", url)
|
||||
except aiohttp.ClientError as err:
|
||||
logger.info(err)
|
||||
|
||||
@abstractmethod
|
||||
def get_or_create_book(self, remote_id):
|
||||
"""pull up a book record by whatever means possible"""
|
||||
|
|
|
@ -12,7 +12,7 @@ from django.db.models import signals
|
|||
from requests import HTTPError
|
||||
|
||||
from bookwyrm import book_search, models
|
||||
from bookwyrm.settings import SEARCH_TIMEOUT, USER_AGENT
|
||||
from bookwyrm.settings import SEARCH_TIMEOUT
|
||||
from bookwyrm.tasks import app, LOW
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -22,40 +22,6 @@ class ConnectorException(HTTPError):
|
|||
"""when the connector can't do what was asked"""
|
||||
|
||||
|
||||
async def get_results(session, url, min_confidence, query, connector):
|
||||
"""try this specific connector"""
|
||||
# pylint: disable=line-too-long
|
||||
headers = {
|
||||
"Accept": (
|
||||
'application/json, application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"; charset=utf-8'
|
||||
),
|
||||
"User-Agent": USER_AGENT,
|
||||
}
|
||||
params = {"min_confidence": min_confidence}
|
||||
try:
|
||||
async with session.get(url, headers=headers, params=params) as response:
|
||||
if not response.ok:
|
||||
logger.info("Unable to connect to %s: %s", url, response.reason)
|
||||
return
|
||||
|
||||
try:
|
||||
raw_data = await response.json()
|
||||
except aiohttp.client_exceptions.ContentTypeError as err:
|
||||
logger.exception(err)
|
||||
return
|
||||
|
||||
return {
|
||||
"connector": connector,
|
||||
"results": connector.process_search_response(
|
||||
query, raw_data, min_confidence
|
||||
),
|
||||
}
|
||||
except asyncio.TimeoutError:
|
||||
logger.info("Connection timed out for url: %s", url)
|
||||
except aiohttp.ClientError as err:
|
||||
logger.info(err)
|
||||
|
||||
|
||||
async def async_connector_search(query, items, min_confidence):
|
||||
"""Try a number of requests simultaneously"""
|
||||
timeout = aiohttp.ClientTimeout(total=SEARCH_TIMEOUT)
|
||||
|
@ -64,7 +30,7 @@ async def async_connector_search(query, items, min_confidence):
|
|||
for url, connector in items:
|
||||
tasks.append(
|
||||
asyncio.ensure_future(
|
||||
get_results(session, url, min_confidence, query, connector)
|
||||
connector.get_results(session, url, min_confidence, query)
|
||||
)
|
||||
)
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ class LibrarythingImporter(Importer):
|
|||
normalized = {k: remove_brackets(entry.get(v)) for k, v in mappings.items()}
|
||||
isbn_13 = normalized.get("isbn_13")
|
||||
isbn_13 = isbn_13.split(", ") if isbn_13 else []
|
||||
normalized["isbn_13"] = isbn_13[1] if len(isbn_13) > 0 else None
|
||||
normalized["isbn_13"] = isbn_13[1] if len(isbn_13) > 1 else None
|
||||
return normalized
|
||||
|
||||
def get_shelf(self, normalized_row):
|
||||
|
|
|
@ -3,38 +3,7 @@ merge book data objects """
|
|||
from django.core.management.base import BaseCommand
|
||||
from django.db.models import Count
|
||||
from bookwyrm import models
|
||||
|
||||
|
||||
def update_related(canonical, obj):
|
||||
"""update all the models with fk to the object being removed"""
|
||||
# move related models to canonical
|
||||
related_models = [
|
||||
(r.remote_field.name, r.related_model) for r in canonical._meta.related_objects
|
||||
]
|
||||
for (related_field, related_model) in related_models:
|
||||
related_objs = related_model.objects.filter(**{related_field: obj})
|
||||
for related_obj in related_objs:
|
||||
print("replacing in", related_model.__name__, related_field, related_obj.id)
|
||||
try:
|
||||
setattr(related_obj, related_field, canonical)
|
||||
related_obj.save()
|
||||
except TypeError:
|
||||
getattr(related_obj, related_field).add(canonical)
|
||||
getattr(related_obj, related_field).remove(obj)
|
||||
|
||||
|
||||
def copy_data(canonical, obj):
|
||||
"""try to get the most data possible"""
|
||||
for data_field in obj._meta.get_fields():
|
||||
if not hasattr(data_field, "activitypub_field"):
|
||||
continue
|
||||
data_value = getattr(obj, data_field.name)
|
||||
if not data_value:
|
||||
continue
|
||||
if not getattr(canonical, data_field.name):
|
||||
print("setting data field", data_field.name, data_value)
|
||||
setattr(canonical, data_field.name, data_value)
|
||||
canonical.save()
|
||||
from bookwyrm.management.merge import merge_objects
|
||||
|
||||
|
||||
def dedupe_model(model):
|
||||
|
@ -61,10 +30,7 @@ def dedupe_model(model):
|
|||
print("keeping", canonical.remote_id)
|
||||
for obj in objs[1:]:
|
||||
print(obj.remote_id)
|
||||
copy_data(canonical, obj)
|
||||
update_related(canonical, obj)
|
||||
# remove the outdated entry
|
||||
obj.delete()
|
||||
merge_objects(canonical, obj)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
|
|
12
bookwyrm/management/commands/merge_authors.py
Normal file
12
bookwyrm/management/commands/merge_authors.py
Normal file
|
@ -0,0 +1,12 @@
|
|||
""" PROCEED WITH CAUTION: uses deduplication fields to permanently
|
||||
merge author data objects """
|
||||
from bookwyrm import models
|
||||
from bookwyrm.management.merge_command import MergeCommand
|
||||
|
||||
|
||||
class Command(MergeCommand):
|
||||
"""merges two authors by ID"""
|
||||
|
||||
help = "merges specified authors into one"
|
||||
|
||||
MODEL = models.Author
|
12
bookwyrm/management/commands/merge_editions.py
Normal file
12
bookwyrm/management/commands/merge_editions.py
Normal file
|
@ -0,0 +1,12 @@
|
|||
""" PROCEED WITH CAUTION: uses deduplication fields to permanently
|
||||
merge edition data objects """
|
||||
from bookwyrm import models
|
||||
from bookwyrm.management.merge_command import MergeCommand
|
||||
|
||||
|
||||
class Command(MergeCommand):
|
||||
"""merges two editions by ID"""
|
||||
|
||||
help = "merges specified editions into one"
|
||||
|
||||
MODEL = models.Edition
|
50
bookwyrm/management/merge.py
Normal file
50
bookwyrm/management/merge.py
Normal file
|
@ -0,0 +1,50 @@
|
|||
from django.db.models import ManyToManyField
|
||||
|
||||
|
||||
def update_related(canonical, obj):
|
||||
"""update all the models with fk to the object being removed"""
|
||||
# move related models to canonical
|
||||
related_models = [
|
||||
(r.remote_field.name, r.related_model) for r in canonical._meta.related_objects
|
||||
]
|
||||
for (related_field, related_model) in related_models:
|
||||
# Skip the ManyToMany fields that aren’t auto-created. These
|
||||
# should have a corresponding OneToMany field in the model for
|
||||
# the linking table anyway. If we update it through that model
|
||||
# instead then we won’t lose the extra fields in the linking
|
||||
# table.
|
||||
related_field_obj = related_model._meta.get_field(related_field)
|
||||
if isinstance(related_field_obj, ManyToManyField):
|
||||
through = related_field_obj.remote_field.through
|
||||
if not through._meta.auto_created:
|
||||
continue
|
||||
related_objs = related_model.objects.filter(**{related_field: obj})
|
||||
for related_obj in related_objs:
|
||||
print("replacing in", related_model.__name__, related_field, related_obj.id)
|
||||
try:
|
||||
setattr(related_obj, related_field, canonical)
|
||||
related_obj.save()
|
||||
except TypeError:
|
||||
getattr(related_obj, related_field).add(canonical)
|
||||
getattr(related_obj, related_field).remove(obj)
|
||||
|
||||
|
||||
def copy_data(canonical, obj):
|
||||
"""try to get the most data possible"""
|
||||
for data_field in obj._meta.get_fields():
|
||||
if not hasattr(data_field, "activitypub_field"):
|
||||
continue
|
||||
data_value = getattr(obj, data_field.name)
|
||||
if not data_value:
|
||||
continue
|
||||
if not getattr(canonical, data_field.name):
|
||||
print("setting data field", data_field.name, data_value)
|
||||
setattr(canonical, data_field.name, data_value)
|
||||
canonical.save()
|
||||
|
||||
|
||||
def merge_objects(canonical, obj):
|
||||
copy_data(canonical, obj)
|
||||
update_related(canonical, obj)
|
||||
# remove the outdated entry
|
||||
obj.delete()
|
29
bookwyrm/management/merge_command.py
Normal file
29
bookwyrm/management/merge_command.py
Normal file
|
@ -0,0 +1,29 @@
|
|||
from bookwyrm.management.merge import merge_objects
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
|
||||
class MergeCommand(BaseCommand):
|
||||
"""base class for merge commands"""
|
||||
|
||||
def add_arguments(self, parser):
|
||||
"""add the arguments for this command"""
|
||||
parser.add_argument("--canonical", type=int, required=True)
|
||||
parser.add_argument("--other", type=int, required=True)
|
||||
|
||||
# pylint: disable=no-self-use,unused-argument
|
||||
def handle(self, *args, **options):
|
||||
"""merge the two objects"""
|
||||
model = self.MODEL
|
||||
|
||||
try:
|
||||
canonical = model.objects.get(id=options["canonical"])
|
||||
except model.DoesNotExist:
|
||||
print("canonical book doesn’t exist!")
|
||||
return
|
||||
try:
|
||||
other = model.objects.get(id=options["other"])
|
||||
except model.DoesNotExist:
|
||||
print("other book doesn’t exist!")
|
||||
return
|
||||
|
||||
merge_objects(canonical, other)
|
|
@ -252,9 +252,12 @@ class ImportItem(models.Model):
|
|||
@property
|
||||
def rating(self):
|
||||
"""x/5 star rating for a book"""
|
||||
if self.normalized_data.get("rating"):
|
||||
if not self.normalized_data.get("rating"):
|
||||
return None
|
||||
try:
|
||||
return float(self.normalized_data.get("rating"))
|
||||
return None
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
@property
|
||||
def date_added(self):
|
||||
|
|
|
@ -12,7 +12,7 @@ from django.core.exceptions import ImproperlyConfigured
|
|||
env = Env()
|
||||
env.read_env()
|
||||
DOMAIN = env("DOMAIN")
|
||||
VERSION = "0.6.1"
|
||||
VERSION = "0.6.2"
|
||||
|
||||
RELEASE_API = env(
|
||||
"RELEASE_API",
|
||||
|
@ -22,7 +22,7 @@ RELEASE_API = env(
|
|||
PAGE_LENGTH = env.int("PAGE_LENGTH", 15)
|
||||
DEFAULT_LANGUAGE = env("DEFAULT_LANGUAGE", "English")
|
||||
|
||||
JS_CACHE = "a7d4e720"
|
||||
JS_CACHE = "ea91d7df"
|
||||
|
||||
# email
|
||||
EMAIL_BACKEND = env("EMAIL_BACKEND", "django.core.mail.backends.smtp.EmailBackend")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue