Runs black
This commit is contained in:
parent
a07f955781
commit
70296e760b
198 changed files with 10239 additions and 8572 deletions
|
@ -1,4 +1,4 @@
|
|||
''' openlibrary data connector '''
|
||||
""" openlibrary data connector """
|
||||
import re
|
||||
|
||||
from bookwyrm import models
|
||||
|
@ -9,148 +9,134 @@ from .openlibrary_languages import languages
|
|||
|
||||
|
||||
class Connector(AbstractConnector):
|
||||
''' instantiate a connector for OL '''
|
||||
""" instantiate a connector for OL """
|
||||
|
||||
def __init__(self, identifier):
|
||||
super().__init__(identifier)
|
||||
|
||||
get_first = lambda a: a[0]
|
||||
get_remote_id = lambda a: self.base_url + a
|
||||
self.book_mappings = [
|
||||
Mapping('title'),
|
||||
Mapping('id', remote_field='key', formatter=get_remote_id),
|
||||
Mapping("title"),
|
||||
Mapping("id", remote_field="key", formatter=get_remote_id),
|
||||
Mapping("cover", remote_field="covers", formatter=self.get_cover_url),
|
||||
Mapping("sortTitle", remote_field="sort_title"),
|
||||
Mapping("subtitle"),
|
||||
Mapping("description", formatter=get_description),
|
||||
Mapping("languages", formatter=get_languages),
|
||||
Mapping("series", formatter=get_first),
|
||||
Mapping("seriesNumber", remote_field="series_number"),
|
||||
Mapping("subjects"),
|
||||
Mapping("subjectPlaces", remote_field="subject_places"),
|
||||
Mapping("isbn13", remote_field="isbn_13", formatter=get_first),
|
||||
Mapping("isbn10", remote_field="isbn_10", formatter=get_first),
|
||||
Mapping("lccn", formatter=get_first),
|
||||
Mapping("oclcNumber", remote_field="oclc_numbers", formatter=get_first),
|
||||
Mapping(
|
||||
'cover', remote_field='covers', formatter=self.get_cover_url),
|
||||
Mapping('sortTitle', remote_field='sort_title'),
|
||||
Mapping('subtitle'),
|
||||
Mapping('description', formatter=get_description),
|
||||
Mapping('languages', formatter=get_languages),
|
||||
Mapping('series', formatter=get_first),
|
||||
Mapping('seriesNumber', remote_field='series_number'),
|
||||
Mapping('subjects'),
|
||||
Mapping('subjectPlaces', remote_field='subject_places'),
|
||||
Mapping('isbn13', remote_field='isbn_13', formatter=get_first),
|
||||
Mapping('isbn10', remote_field='isbn_10', formatter=get_first),
|
||||
Mapping('lccn', formatter=get_first),
|
||||
Mapping(
|
||||
'oclcNumber', remote_field='oclc_numbers',
|
||||
formatter=get_first
|
||||
"openlibraryKey", remote_field="key", formatter=get_openlibrary_key
|
||||
),
|
||||
Mapping("goodreadsKey", remote_field="goodreads_key"),
|
||||
Mapping("asin"),
|
||||
Mapping(
|
||||
'openlibraryKey', remote_field='key',
|
||||
formatter=get_openlibrary_key
|
||||
"firstPublishedDate",
|
||||
remote_field="first_publish_date",
|
||||
),
|
||||
Mapping('goodreadsKey', remote_field='goodreads_key'),
|
||||
Mapping('asin'),
|
||||
Mapping(
|
||||
'firstPublishedDate', remote_field='first_publish_date',
|
||||
),
|
||||
Mapping('publishedDate', remote_field='publish_date'),
|
||||
Mapping('pages', remote_field='number_of_pages'),
|
||||
Mapping('physicalFormat', remote_field='physical_format'),
|
||||
Mapping('publishers'),
|
||||
Mapping("publishedDate", remote_field="publish_date"),
|
||||
Mapping("pages", remote_field="number_of_pages"),
|
||||
Mapping("physicalFormat", remote_field="physical_format"),
|
||||
Mapping("publishers"),
|
||||
]
|
||||
|
||||
self.author_mappings = [
|
||||
Mapping('id', remote_field='key', formatter=get_remote_id),
|
||||
Mapping('name'),
|
||||
Mapping("id", remote_field="key", formatter=get_remote_id),
|
||||
Mapping("name"),
|
||||
Mapping(
|
||||
'openlibraryKey', remote_field='key',
|
||||
formatter=get_openlibrary_key
|
||||
"openlibraryKey", remote_field="key", formatter=get_openlibrary_key
|
||||
),
|
||||
Mapping('born', remote_field='birth_date'),
|
||||
Mapping('died', remote_field='death_date'),
|
||||
Mapping('bio', formatter=get_description),
|
||||
Mapping("born", remote_field="birth_date"),
|
||||
Mapping("died", remote_field="death_date"),
|
||||
Mapping("bio", formatter=get_description),
|
||||
]
|
||||
|
||||
|
||||
def get_remote_id_from_data(self, data):
|
||||
''' format a url from an openlibrary id field '''
|
||||
""" format a url from an openlibrary id field """
|
||||
try:
|
||||
key = data['key']
|
||||
key = data["key"]
|
||||
except KeyError:
|
||||
raise ConnectorException('Invalid book data')
|
||||
return '%s%s' % (self.books_url, key)
|
||||
|
||||
raise ConnectorException("Invalid book data")
|
||||
return "%s%s" % (self.books_url, key)
|
||||
|
||||
def is_work_data(self, data):
|
||||
return bool(re.match(r'^[\/\w]+OL\d+W$', data['key']))
|
||||
|
||||
return bool(re.match(r"^[\/\w]+OL\d+W$", data["key"]))
|
||||
|
||||
def get_edition_from_work_data(self, data):
|
||||
try:
|
||||
key = data['key']
|
||||
key = data["key"]
|
||||
except KeyError:
|
||||
raise ConnectorException('Invalid book data')
|
||||
url = '%s%s/editions' % (self.books_url, key)
|
||||
raise ConnectorException("Invalid book data")
|
||||
url = "%s%s/editions" % (self.books_url, key)
|
||||
data = get_data(url)
|
||||
return pick_default_edition(data['entries'])
|
||||
|
||||
return pick_default_edition(data["entries"])
|
||||
|
||||
def get_work_from_edition_data(self, data):
|
||||
try:
|
||||
key = data['works'][0]['key']
|
||||
key = data["works"][0]["key"]
|
||||
except (IndexError, KeyError):
|
||||
raise ConnectorException('No work found for edition')
|
||||
url = '%s%s' % (self.books_url, key)
|
||||
raise ConnectorException("No work found for edition")
|
||||
url = "%s%s" % (self.books_url, key)
|
||||
return get_data(url)
|
||||
|
||||
|
||||
def get_authors_from_data(self, data):
|
||||
''' parse author json and load or create authors '''
|
||||
for author_blob in data.get('authors', []):
|
||||
author_blob = author_blob.get('author', author_blob)
|
||||
""" parse author json and load or create authors """
|
||||
for author_blob in data.get("authors", []):
|
||||
author_blob = author_blob.get("author", author_blob)
|
||||
# this id is "/authors/OL1234567A"
|
||||
author_id = author_blob['key']
|
||||
url = '%s%s' % (self.base_url, author_id)
|
||||
author_id = author_blob["key"]
|
||||
url = "%s%s" % (self.base_url, author_id)
|
||||
yield self.get_or_create_author(url)
|
||||
|
||||
|
||||
def get_cover_url(self, cover_blob):
|
||||
''' ask openlibrary for the cover '''
|
||||
""" ask openlibrary for the cover """
|
||||
cover_id = cover_blob[0]
|
||||
image_name = '%s-L.jpg' % cover_id
|
||||
return '%s/b/id/%s' % (self.covers_url, image_name)
|
||||
|
||||
image_name = "%s-L.jpg" % cover_id
|
||||
return "%s/b/id/%s" % (self.covers_url, image_name)
|
||||
|
||||
def parse_search_data(self, data):
|
||||
return data.get('docs')
|
||||
|
||||
return data.get("docs")
|
||||
|
||||
def format_search_result(self, search_result):
|
||||
# build the remote id from the openlibrary key
|
||||
key = self.books_url + search_result['key']
|
||||
author = search_result.get('author_name') or ['Unknown']
|
||||
key = self.books_url + search_result["key"]
|
||||
author = search_result.get("author_name") or ["Unknown"]
|
||||
return SearchResult(
|
||||
title=search_result.get('title'),
|
||||
title=search_result.get("title"),
|
||||
key=key,
|
||||
author=', '.join(author),
|
||||
author=", ".join(author),
|
||||
connector=self,
|
||||
year=search_result.get('first_publish_year'),
|
||||
year=search_result.get("first_publish_year"),
|
||||
)
|
||||
|
||||
|
||||
def parse_isbn_search_data(self, data):
|
||||
return list(data.values())
|
||||
|
||||
def format_isbn_search_result(self, search_result):
|
||||
# build the remote id from the openlibrary key
|
||||
key = self.books_url + search_result['key']
|
||||
authors = search_result.get('authors') or [{'name': 'Unknown'}]
|
||||
author_names = [ author.get('name') for author in authors]
|
||||
key = self.books_url + search_result["key"]
|
||||
authors = search_result.get("authors") or [{"name": "Unknown"}]
|
||||
author_names = [author.get("name") for author in authors]
|
||||
return SearchResult(
|
||||
title=search_result.get('title'),
|
||||
title=search_result.get("title"),
|
||||
key=key,
|
||||
author=', '.join(author_names),
|
||||
author=", ".join(author_names),
|
||||
connector=self,
|
||||
year=search_result.get('publish_date'),
|
||||
year=search_result.get("publish_date"),
|
||||
)
|
||||
|
||||
def load_edition_data(self, olkey):
|
||||
''' query openlibrary for editions of a work '''
|
||||
url = '%s/works/%s/editions' % (self.books_url, olkey)
|
||||
""" query openlibrary for editions of a work """
|
||||
url = "%s/works/%s/editions" % (self.books_url, olkey)
|
||||
return get_data(url)
|
||||
|
||||
|
||||
def expand_book_data(self, book):
|
||||
work = book
|
||||
# go from the edition to the work, if necessary
|
||||
|
@ -164,7 +150,7 @@ class Connector(AbstractConnector):
|
|||
# who knows, man
|
||||
return
|
||||
|
||||
for edition_data in edition_options.get('entries'):
|
||||
for edition_data in edition_options.get("entries"):
|
||||
# does this edition have ANY interesting data?
|
||||
if ignore_edition(edition_data):
|
||||
continue
|
||||
|
@ -172,62 +158,63 @@ class Connector(AbstractConnector):
|
|||
|
||||
|
||||
def ignore_edition(edition_data):
|
||||
''' don't load a million editions that have no metadata '''
|
||||
""" don't load a million editions that have no metadata """
|
||||
# an isbn, we love to see it
|
||||
if edition_data.get('isbn_13') or edition_data.get('isbn_10'):
|
||||
print(edition_data.get('isbn_10'))
|
||||
if edition_data.get("isbn_13") or edition_data.get("isbn_10"):
|
||||
print(edition_data.get("isbn_10"))
|
||||
return False
|
||||
# grudgingly, oclc can stay
|
||||
if edition_data.get('oclc_numbers'):
|
||||
print(edition_data.get('oclc_numbers'))
|
||||
if edition_data.get("oclc_numbers"):
|
||||
print(edition_data.get("oclc_numbers"))
|
||||
return False
|
||||
# if it has a cover it can stay
|
||||
if edition_data.get('covers'):
|
||||
print(edition_data.get('covers'))
|
||||
if edition_data.get("covers"):
|
||||
print(edition_data.get("covers"))
|
||||
return False
|
||||
# keep non-english editions
|
||||
if edition_data.get('languages') and \
|
||||
'languages/eng' not in str(edition_data.get('languages')):
|
||||
print(edition_data.get('languages'))
|
||||
if edition_data.get("languages") and "languages/eng" not in str(
|
||||
edition_data.get("languages")
|
||||
):
|
||||
print(edition_data.get("languages"))
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def get_description(description_blob):
|
||||
''' descriptions can be a string or a dict '''
|
||||
""" descriptions can be a string or a dict """
|
||||
if isinstance(description_blob, dict):
|
||||
return description_blob.get('value')
|
||||
return description_blob.get("value")
|
||||
return description_blob
|
||||
|
||||
|
||||
def get_openlibrary_key(key):
|
||||
''' convert /books/OL27320736M into OL27320736M '''
|
||||
return key.split('/')[-1]
|
||||
""" convert /books/OL27320736M into OL27320736M """
|
||||
return key.split("/")[-1]
|
||||
|
||||
|
||||
def get_languages(language_blob):
|
||||
''' /language/eng -> English '''
|
||||
""" /language/eng -> English """
|
||||
langs = []
|
||||
for lang in language_blob:
|
||||
langs.append(
|
||||
languages.get(lang.get('key', ''), None)
|
||||
)
|
||||
langs.append(languages.get(lang.get("key", ""), None))
|
||||
return langs
|
||||
|
||||
|
||||
def pick_default_edition(options):
|
||||
''' favor physical copies with covers in english '''
|
||||
""" favor physical copies with covers in english """
|
||||
if not options:
|
||||
return None
|
||||
if len(options) == 1:
|
||||
return options[0]
|
||||
|
||||
options = [e for e in options if e.get('covers')] or options
|
||||
options = [e for e in options if \
|
||||
'/languages/eng' in str(e.get('languages'))] or options
|
||||
formats = ['paperback', 'hardcover', 'mass market paperback']
|
||||
options = [e for e in options if \
|
||||
str(e.get('physical_format')).lower() in formats] or options
|
||||
options = [e for e in options if e.get('isbn_13')] or options
|
||||
options = [e for e in options if e.get('ocaid')] or options
|
||||
options = [e for e in options if e.get("covers")] or options
|
||||
options = [
|
||||
e for e in options if "/languages/eng" in str(e.get("languages"))
|
||||
] or options
|
||||
formats = ["paperback", "hardcover", "mass market paperback"]
|
||||
options = [
|
||||
e for e in options if str(e.get("physical_format")).lower() in formats
|
||||
] or options
|
||||
options = [e for e in options if e.get("isbn_13")] or options
|
||||
options = [e for e in options if e.get("ocaid")] or options
|
||||
return options[0]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue