From ce446d57fc7f2d6014b8545cb4c22699d3daafae Mon Sep 17 00:00:00 2001 From: Adam Kelly Date: Wed, 25 Mar 2020 12:29:21 +0000 Subject: [PATCH] Handle uploaded CSV and match to openlibrary titles. --- fedireads/connectors/openlibrary.py | 1 + fedireads/goodreads_import.py | 59 +++++++++++++++++++++++++ fedireads/templates/import_results.html | 3 +- fedireads/view_actions.py | 49 +++----------------- 4 files changed, 68 insertions(+), 44 deletions(-) create mode 100644 fedireads/goodreads_import.py diff --git a/fedireads/connectors/openlibrary.py b/fedireads/connectors/openlibrary.py index 2c3b458f7..1beedb08b 100644 --- a/fedireads/connectors/openlibrary.py +++ b/fedireads/connectors/openlibrary.py @@ -31,6 +31,7 @@ class OpenLibraryConnector(AbstractConnector): key, author[0], doc.get('first_publish_year'), + doc )) return results diff --git a/fedireads/goodreads_import.py b/fedireads/goodreads_import.py new file mode 100644 index 000000000..1ba17faf2 --- /dev/null +++ b/fedireads/goodreads_import.py @@ -0,0 +1,59 @@ +import re +import csv +import itertools +from requests import HTTPError + +from fedireads import books_manager + +def unquote_string(text): + match = re.match(r'="([^"]*)"', text) + if match: + return match.group(1) + else: + return text + +def construct_search_term(title, author): + # Strip brackets (usually series title from search term) + title = re.sub(r'\s*\([^)]*\)\s*', '', title) + # Open library doesn't like including author initials in search term. + author = re.sub(r'(\w\.)+\s*', '', author) + + return ' '.join([title, author]) + +class GoodreadsCsv(object): + def __init__(self, csv_file): + self.reader = csv.DictReader(csv_file) + + def __iter__(self): + for line in itertools.islice(self.reader, 20, 30): + entry = GoodreadsItem(line) + try: + entry.resolve() + except HTTPError: + pass + yield entry + +class GoodreadsItem(object): + def __init__(self, line): + self.line = line + self.book = None + + def resolve(self): + self.book = self.get_book_from_isbn() + if not self.book: + self.book = self.get_book_from_title_author() + + def get_book_from_isbn(self): + isbn = unquote_string(self.line['ISBN13']) + search_results = books_manager.search(isbn) + if search_results: + return books_manager.get_or_create_book(search_results[0].key) + + def get_book_from_title_author(self): + search_term = construct_search_term(self.line['Title'], self.line['Author']) + search_results = books_manager.search(search_term) + if search_results: + return books_manager.get_or_create_book(search_results[0].key) + + def __repr__(self): + return "".format(self.line['Title']) diff --git a/fedireads/templates/import_results.html b/fedireads/templates/import_results.html index 73996cfa8..3082878f6 100644 --- a/fedireads/templates/import_results.html +++ b/fedireads/templates/import_results.html @@ -18,8 +18,7 @@ diff --git a/fedireads/view_actions.py b/fedireads/view_actions.py index a3e2c2e59..dae69559a 100644 --- a/fedireads/view_actions.py +++ b/fedireads/view_actions.py @@ -1,15 +1,16 @@ ''' views for actions you can take in the application ''' +from io import TextIOWrapper + from django.contrib.auth import authenticate, login, logout from django.contrib.auth.decorators import login_required from django.http import HttpResponseBadRequest from django.shortcuts import redirect from django.template.response import TemplateResponse -import re -import csv from fedireads import forms, models, books_manager, outgoing from fedireads.settings import DOMAIN from fedireads.views import get_user_from_username +from fedireads.goodreads_import import GoodreadsCsv def user_login(request): @@ -289,54 +290,18 @@ def delete_follow_request(request): outgoing.handle_outgoing_reject(requester, request.user, follow_request) return redirect('/user/%s' % request.user.localname) - -def unquote_string(text): - match = re.match(r'="([^"]*)"', text) - if match: - return match.group(1) - else: - return text - -def construct_search_term(title, author): - # Strip brackets (usually series title from search term) - title = re.sub(r'\s*\([^)]*\)\s*', '', title) - # Open library doesn't like including author initials in search term. - author = re.sub(r'(\w\.)+\s*', '', author) - - return ' '.join([title, author]) -import itertools -from io import TextIOWrapper -from requests import HTTPError - @login_required def import_data(request): form = forms.ImportForm(request.POST, request.FILES) if form.is_valid(): - reader = csv.DictReader(TextIOWrapper(request.FILES['csv_file'], encoding=request.encoding)) results = [] failures = [] - for line in itertools.islice(reader, 20): - isbn = unquote_string(line['ISBN13']) - print(line['Title'], isbn, line['Exclusive Shelf']) - search_results = books_manager.search(isbn) - if search_results: - book = books_manager.get_or_create_book(search_results[0].key) - print(book) - results.append(book) + for item in GoodreadsCsv(TextIOWrapper(request.FILES['csv_file'], encoding=request.encoding)): + if item.book: + results.append(item.book) else: - try: - search_term = construct_search_term(line['Title'], line['Author']) - print("Search term: ", search_term) - search_results = books_manager.search(search_term) - if search_results: - book = books_manager.get_or_create_book(search_results[0].key) - print(book) - results.append(book) - else: - failures.append(line) - except HTTPError: - failures.append(line) # + failures.append(item) return TemplateResponse(request, 'import_results.html', { 'results': results, 'failures': failures