From 8f0f3e6ace98cf8812473b1569fc2f0b8204e437 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adeodato=20Sim=C3=B3?= Date: Sun, 15 Oct 2023 19:43:51 -0300 Subject: [PATCH 1/4] ImportItem: preserve parsed timezones in date_started, date_read This is a follow-up to b564e514f ("Handle parsed dates that already have a timezone on import"), which was applied to `date_added` only. (Appart from consistency, this will allow to apply future parsing fixes more easily.) --- bookwyrm/models/import_job.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/bookwyrm/models/import_job.py b/bookwyrm/models/import_job.py index 8929e9037..8329ee955 100644 --- a/bookwyrm/models/import_job.py +++ b/bookwyrm/models/import_job.py @@ -259,12 +259,10 @@ class ImportItem(models.Model): except ValueError: return None - @property - def date_added(self): - """when the book was added to this dataset""" - if self.normalized_data.get("date_added"): + def _parse_datefield(self, field, /): + if self.normalized_data.get(field): parsed_date_added = dateutil.parser.parse( - self.normalized_data.get("date_added") + self.normalized_data.get(field) ) if timezone.is_aware(parsed_date_added): @@ -274,23 +272,20 @@ class ImportItem(models.Model): return timezone.make_aware(parsed_date_added) return None + @property + def date_added(self): + """when the book was added to this dataset""" + return self._parse_datefield("date_added") + @property def date_started(self): """when the book was started""" - if self.normalized_data.get("date_started"): - return timezone.make_aware( - dateutil.parser.parse(self.normalized_data.get("date_started")) - ) - return None + return self._parse_datefield("date_started") @property def date_read(self): """the date a book was completed""" - if self.normalized_data.get("date_finished"): - return timezone.make_aware( - dateutil.parser.parse(self.normalized_data.get("date_finished")) - ) - return None + return self._parse_datefield("date_finished") @property def reads(self): From c066d11eb1c62021a980ec1535b7c8fde0bbf9a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adeodato=20Sim=C3=B3?= Date: Sun, 15 Oct 2023 19:49:00 -0300 Subject: [PATCH 2/4] Bugfix: default missing date components to 1, not today's Fixes: #2660. --- bookwyrm/models/import_job.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/bookwyrm/models/import_job.py b/bookwyrm/models/import_job.py index 8329ee955..f5d86ad2e 100644 --- a/bookwyrm/models/import_job.py +++ b/bookwyrm/models/import_job.py @@ -1,4 +1,5 @@ """ track progress of goodreads imports """ +from datetime import datetime import math import re import dateutil.parser @@ -260,17 +261,14 @@ class ImportItem(models.Model): return None def _parse_datefield(self, field, /): - if self.normalized_data.get(field): - parsed_date_added = dateutil.parser.parse( - self.normalized_data.get(field) - ) + if not (date := self.normalized_data.get(field)): + return None - if timezone.is_aware(parsed_date_added): - # Keep timezone if import already had one - return parsed_date_added + defaults = datetime(1970, 1, 1) # "2022-10" => "2022-10-01" + parsed = dateutil.parser.parse(date, default=defaults) - return timezone.make_aware(parsed_date_added) - return None + # Keep timezone if import already had one, else use default. + return parsed if timezone.is_aware(parsed) else timezone.make_aware(parsed) @property def date_added(self): From c02306a66b23feb11e9436c0084a194c3485fda5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adeodato=20Sim=C3=B3?= Date: Sun, 15 Oct 2023 19:58:51 -0300 Subject: [PATCH 3/4] Default to Jan 1st too on incomplete dates received from ActivityPub --- bookwyrm/models/fields.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bookwyrm/models/fields.py b/bookwyrm/models/fields.py index d21c9363d..28effaf9b 100644 --- a/bookwyrm/models/fields.py +++ b/bookwyrm/models/fields.py @@ -1,5 +1,6 @@ """ activitypub-aware django model fields """ from dataclasses import MISSING +from datetime import datetime import re from uuid import uuid4 from urllib.parse import urljoin @@ -534,8 +535,10 @@ class DateTimeField(ActivitypubFieldMixin, models.DateTimeField): return value.isoformat() def field_from_activity(self, value, allow_external_connections=True): + missing_fields = datetime(1970, 1, 1) # "2022-10" => "2022-10-01" try: - date_value = dateutil.parser.parse(value) + # TODO(dato): investigate `ignoretz=True` wrt bookwyrm#3028. + date_value = dateutil.parser.parse(value, default=missing_fields) try: return timezone.make_aware(date_value) except ValueError: From 8afcb9b6d3e4ab46d38afed7e5bdc9cfd9c890f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adeodato=20Sim=C3=B3?= Date: Sun, 15 Oct 2023 22:03:45 -0300 Subject: [PATCH 4/4] Fix tests warning: ImportJob.updated_date received a naive datetime --- bookwyrm/tests/views/imports/test_import.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bookwyrm/tests/views/imports/test_import.py b/bookwyrm/tests/views/imports/test_import.py index ea88b197d..7dd87d4c2 100644 --- a/bookwyrm/tests/views/imports/test_import.py +++ b/bookwyrm/tests/views/imports/test_import.py @@ -7,6 +7,7 @@ from django.core.files.uploadedfile import SimpleUploadedFile from django.template.response import TemplateResponse from django.test import TestCase from django.test.client import RequestFactory +from django.utils import timezone from bookwyrm import forms, models, views from bookwyrm.tests.validate_html import validate_html @@ -128,7 +129,7 @@ class ImportViews(TestCase): def test_get_average_import_time_with_data(self): """Now, with data""" - now = datetime.datetime.now() + now = timezone.now() two_hours_ago = now - datetime.timedelta(hours=2) four_hours_ago = now - datetime.timedelta(hours=4) models.ImportJob.objects.create( @@ -152,7 +153,7 @@ class ImportViews(TestCase): def test_get_average_import_time_ignore_stopped(self): """Don't include stopped, do include no status""" - now = datetime.datetime.now() + now = timezone.now() two_hours_ago = now - datetime.timedelta(hours=2) four_hours_ago = now - datetime.timedelta(hours=4) models.ImportJob.objects.create(