1
0
Fork 0

Merge from main into 'better-fmt-patch-calls'

Conflicts:
	bookwyrm/tests/test_book_search.py
This commit is contained in:
Adeodato Simó 2024-03-27 17:12:56 -03:00
commit 3133a47b7c
No known key found for this signature in database
GPG key ID: 98EF291323013F6E
85 changed files with 8356 additions and 2387 deletions

View file

@ -1,4 +1,5 @@
"""Do further startup configuration and initialization"""
import os
import urllib
import logging
@ -14,16 +15,16 @@ def download_file(url, destination):
"""Downloads a file to the given path"""
try:
# Ensure our destination directory exists
os.makedirs(os.path.dirname(destination))
os.makedirs(os.path.dirname(destination), exist_ok=True)
with urllib.request.urlopen(url) as stream:
with open(destination, "b+w") as outfile:
outfile.write(stream.read())
except (urllib.error.HTTPError, urllib.error.URLError):
logger.info("Failed to download file %s", url)
except OSError:
logger.info("Couldn't open font file %s for writing", destination)
except: # pylint: disable=bare-except
logger.info("Unknown error in file download")
except (urllib.error.HTTPError, urllib.error.URLError) as err:
logger.error("Failed to download file %s: %s", url, err)
except OSError as err:
logger.error("Couldn't open font file %s for writing: %s", destination, err)
except Exception as err: # pylint:disable=broad-except
logger.error("Unknown error in file download: %s", err)
class BookwyrmConfig(AppConfig):

View file

@ -15,6 +15,7 @@ class AuthorForm(CustomForm):
"aliases",
"bio",
"wikipedia_link",
"wikidata",
"website",
"born",
"died",
@ -32,6 +33,7 @@ class AuthorForm(CustomForm):
"wikipedia_link": forms.TextInput(
attrs={"aria-describedby": "desc_wikipedia_link"}
),
"wikidata": forms.TextInput(attrs={"aria-describedby": "desc_wikidata"}),
"website": forms.TextInput(attrs={"aria-describedby": "desc_website"}),
"born": forms.SelectDateWidget(attrs={"aria-describedby": "desc_born"}),
"died": forms.SelectDateWidget(attrs={"aria-describedby": "desc_died"}),

View file

@ -0,0 +1,16 @@
# Generated by Django 3.2.20 on 2023-11-24 17:11
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0188_theme_loads"),
]
operations = [
migrations.RemoveIndex(
model_name="author",
name="bookwyrm_au_search__b050a8_gin",
),
]

View file

@ -0,0 +1,76 @@
# Generated by Django 3.2.20 on 2023-11-25 00:47
from importlib import import_module
import re
from django.db import migrations
import pgtrigger.compiler
import pgtrigger.migrations
trigger_migration = import_module("bookwyrm.migrations.0077_auto_20210623_2155")
# it's _very_ convenient for development that this migration be reversible
search_vector_trigger = trigger_migration.Migration.operations[4]
author_search_vector_trigger = trigger_migration.Migration.operations[5]
assert re.search(r"\bCREATE TRIGGER search_vector_trigger\b", search_vector_trigger.sql)
assert re.search(
r"\bCREATE TRIGGER author_search_vector_trigger\b",
author_search_vector_trigger.sql,
)
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0190_book_search_updates"),
]
operations = [
pgtrigger.migrations.AddTrigger(
model_name="book",
trigger=pgtrigger.compiler.Trigger(
name="update_search_vector_on_book_edit",
sql=pgtrigger.compiler.UpsertTriggerSql(
func="new.search_vector := setweight(coalesce(nullif(to_tsvector('english', new.title), ''), to_tsvector('simple', new.title)), 'A') || setweight(to_tsvector('english', coalesce(new.subtitle, '')), 'B') || (SELECT setweight(to_tsvector('simple', coalesce(array_to_string(array_agg(bookwyrm_author.name), ' '), '')), 'C') FROM bookwyrm_author LEFT JOIN bookwyrm_book_authors ON bookwyrm_author.id = bookwyrm_book_authors.author_id WHERE bookwyrm_book_authors.book_id = new.id ) || setweight(to_tsvector('english', coalesce(new.series, '')), 'D');RETURN NEW;",
hash="77d6399497c0a89b0bf09d296e33c396da63705c",
operation='INSERT OR UPDATE OF "title", "subtitle", "series", "search_vector"',
pgid="pgtrigger_update_search_vector_on_book_edit_bec58",
table="bookwyrm_book",
when="BEFORE",
),
),
),
pgtrigger.migrations.AddTrigger(
model_name="author",
trigger=pgtrigger.compiler.Trigger(
name="reset_search_vector_on_author_edit",
sql=pgtrigger.compiler.UpsertTriggerSql(
func="WITH updated_books AS (SELECT book_id FROM bookwyrm_book_authors WHERE author_id = new.id ) UPDATE bookwyrm_book SET search_vector = '' FROM updated_books WHERE id = updated_books.book_id;RETURN NEW;",
hash="e7bbf08711ff3724c58f4d92fb7a082ffb3d7826",
operation='UPDATE OF "name"',
pgid="pgtrigger_reset_search_vector_on_author_edit_a447c",
table="bookwyrm_author",
when="AFTER",
),
),
),
migrations.RunSQL(
sql="""DROP TRIGGER IF EXISTS search_vector_trigger ON bookwyrm_book;
DROP FUNCTION IF EXISTS book_trigger;
""",
reverse_sql=search_vector_trigger.sql,
),
migrations.RunSQL(
sql="""DROP TRIGGER IF EXISTS author_search_vector_trigger ON bookwyrm_author;
DROP FUNCTION IF EXISTS author_trigger;
""",
reverse_sql=author_search_vector_trigger.sql,
),
migrations.RunSQL(
# Recalculate book search vector for any missed author name changes
# due to bug in JOIN in the old trigger.
sql="UPDATE bookwyrm_book SET search_vector = NULL;",
reverse_sql=migrations.RunSQL.noop,
),
]

View file

@ -0,0 +1,13 @@
# Generated by Django 3.2.23 on 2024-03-18 00:48
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0191_migrate_search_vec_triggers_to_pgtriggers"),
("bookwyrm", "0195_alter_user_preferred_language"),
]
operations = []

View file

@ -0,0 +1,41 @@
# Generated by Django 3.2.25 on 2024-03-20 15:15
import django.contrib.postgres.indexes
from django.db import migrations
import pgtrigger.compiler
import pgtrigger.migrations
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0196_merge_pr3134_into_main"),
]
operations = [
migrations.AddIndex(
model_name="author",
index=django.contrib.postgres.indexes.GinIndex(
fields=["search_vector"], name="bookwyrm_au_search__b050a8_gin"
),
),
pgtrigger.migrations.AddTrigger(
model_name="author",
trigger=pgtrigger.compiler.Trigger(
name="update_search_vector_on_author_edit",
sql=pgtrigger.compiler.UpsertTriggerSql(
func="new.search_vector := setweight(to_tsvector('simple', new.name), 'A') || setweight(to_tsvector('simple', coalesce(array_to_string(new.aliases, ' '), '')), 'B');RETURN NEW;",
hash="b97919016236d74d0ade51a0769a173ea269da64",
operation='INSERT OR UPDATE OF "name", "aliases", "search_vector"',
pgid="pgtrigger_update_search_vector_on_author_edit_c61cb",
table="bookwyrm_author",
when="BEFORE",
),
),
),
migrations.RunSQL(
# Calculate search vector for all Authors.
sql="UPDATE bookwyrm_author SET search_vector = NULL;",
reverse_sql="UPDATE bookwyrm_author SET search_vector = NULL;",
),
]

View file

@ -0,0 +1,57 @@
# Generated by Django 3.2.25 on 2024-03-20 15:52
from django.db import migrations
import pgtrigger.compiler
import pgtrigger.migrations
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0197_author_search_vector"),
]
operations = [
pgtrigger.migrations.RemoveTrigger(
model_name="author",
name="reset_search_vector_on_author_edit",
),
pgtrigger.migrations.RemoveTrigger(
model_name="book",
name="update_search_vector_on_book_edit",
),
pgtrigger.migrations.AddTrigger(
model_name="author",
trigger=pgtrigger.compiler.Trigger(
name="reset_book_search_vector_on_author_edit",
sql=pgtrigger.compiler.UpsertTriggerSql(
func="WITH updated_books AS (SELECT book_id FROM bookwyrm_book_authors WHERE author_id = new.id ) UPDATE bookwyrm_book SET search_vector = '' FROM updated_books WHERE id = updated_books.book_id;RETURN NEW;",
hash="68422c0f29879c5802b82159dde45297eff53e73",
operation='UPDATE OF "name", "aliases"',
pgid="pgtrigger_reset_book_search_vector_on_author_edit_a50c7",
table="bookwyrm_author",
when="AFTER",
),
),
),
pgtrigger.migrations.AddTrigger(
model_name="book",
trigger=pgtrigger.compiler.Trigger(
name="update_search_vector_on_book_edit",
sql=pgtrigger.compiler.UpsertTriggerSql(
func="WITH author_names AS (SELECT array_to_string(bookwyrm_author.name || bookwyrm_author.aliases, ' ') AS name_and_aliases FROM bookwyrm_author LEFT JOIN bookwyrm_book_authors ON bookwyrm_author.id = bookwyrm_book_authors.author_id WHERE bookwyrm_book_authors.book_id = new.id ) SELECT setweight(coalesce(nullif(to_tsvector('english', new.title), ''), to_tsvector('simple', new.title)), 'A') || setweight(to_tsvector('english', coalesce(new.subtitle, '')), 'B') || (SELECT setweight(to_tsvector('simple', coalesce(array_to_string(array_agg(name_and_aliases), ' '), '')), 'C') FROM author_names) || setweight(to_tsvector('english', coalesce(new.series, '')), 'D') INTO new.search_vector;RETURN NEW;",
hash="9324f5ca76a6f5e63931881d62d11da11f595b2c",
operation='INSERT OR UPDATE OF "title", "subtitle", "series", "search_vector"',
pgid="pgtrigger_update_search_vector_on_book_edit_bec58",
table="bookwyrm_book",
when="BEFORE",
),
),
),
migrations.RunSQL(
# Recalculate search vector for all Books because it now includes
# Author aliases.
sql="UPDATE bookwyrm_book SET search_vector = NULL;",
reverse_sql="UPDATE bookwyrm_book SET search_vector = NULL;",
),
]

View file

@ -2,11 +2,13 @@
import re
from typing import Tuple, Any
from django.contrib.postgres.indexes import GinIndex
from django.db import models
from django.contrib.postgres.indexes import GinIndex
import pgtrigger
from bookwyrm import activitypub
from bookwyrm.settings import DOMAIN
from bookwyrm.utils.db import format_trigger
from .book import BookDataModel
from . import fields
@ -67,9 +69,46 @@ class Author(BookDataModel):
"""editions and works both use "book" instead of model_name"""
return f"https://{DOMAIN}/author/{self.id}"
activity_serializer = activitypub.Author
class Meta:
"""sets up postgres GIN index field"""
"""sets up indexes and triggers"""
# pylint: disable=line-too-long
indexes = (GinIndex(fields=["search_vector"]),)
triggers = [
pgtrigger.Trigger(
name="update_search_vector_on_author_edit",
when=pgtrigger.Before,
operation=pgtrigger.Insert
| pgtrigger.UpdateOf("name", "aliases", "search_vector"),
func=format_trigger(
"""new.search_vector :=
-- author name, with priority A
setweight(to_tsvector('simple', new.name), 'A') ||
-- author aliases, with priority B
setweight(to_tsvector('simple', coalesce(array_to_string(new.aliases, ' '), '')), 'B');
RETURN new;
"""
),
),
pgtrigger.Trigger(
name="reset_book_search_vector_on_author_edit",
when=pgtrigger.After,
operation=pgtrigger.UpdateOf("name", "aliases"),
func=format_trigger(
"""WITH updated_books AS (
SELECT book_id
FROM bookwyrm_book_authors
WHERE author_id = new.id
)
UPDATE bookwyrm_book
SET search_vector = ''
FROM updated_books
WHERE id = updated_books.book_id;
RETURN new;
"""
),
),
]
activity_serializer = activitypub.Author

View file

@ -13,6 +13,7 @@ from django.utils.translation import gettext_lazy as _
from model_utils import FieldTracker
from model_utils.managers import InheritanceManager
from imagekit.models import ImageSpecField
import pgtrigger
from bookwyrm import activitypub
from bookwyrm.isbn.isbn import hyphenator_singleton as hyphenator
@ -24,6 +25,7 @@ from bookwyrm.settings import (
ENABLE_PREVIEW_IMAGES,
ENABLE_THUMBNAIL_GENERATION,
)
from bookwyrm.utils.db import format_trigger
from .activitypub_mixin import OrderedCollectionPageMixin, ObjectMixin
from .base_model import BookWyrmModel
@ -232,9 +234,49 @@ class Book(BookDataModel):
)
class Meta:
"""sets up postgres GIN index field"""
"""set up indexes and triggers"""
# pylint: disable=line-too-long
indexes = (GinIndex(fields=["search_vector"]),)
triggers = [
pgtrigger.Trigger(
name="update_search_vector_on_book_edit",
when=pgtrigger.Before,
operation=pgtrigger.Insert
| pgtrigger.UpdateOf("title", "subtitle", "series", "search_vector"),
func=format_trigger(
"""
WITH author_names AS (
SELECT array_to_string(bookwyrm_author.name || bookwyrm_author.aliases, ' ') AS name_and_aliases
FROM bookwyrm_author
LEFT JOIN bookwyrm_book_authors
ON bookwyrm_author.id = bookwyrm_book_authors.author_id
WHERE bookwyrm_book_authors.book_id = new.id
)
SELECT
-- title, with priority A (parse in English, default to simple if empty)
setweight(COALESCE(nullif(
to_tsvector('english', new.title), ''),
to_tsvector('simple', new.title)), 'A') ||
-- subtitle, with priority B (always in English?)
setweight(to_tsvector('english', COALESCE(new.subtitle, '')), 'B') ||
-- list of authors names and aliases (with priority C)
(SELECT setweight(to_tsvector('simple', COALESCE(array_to_string(ARRAY_AGG(name_and_aliases), ' '), '')), 'C')
FROM author_names
) ||
--- last: series name, with lowest priority
setweight(to_tsvector('english', COALESCE(new.series, '')), 'D')
INTO new.search_vector;
RETURN new;
"""
),
)
]
class Work(OrderedCollectionPageMixin, Book):

View file

@ -260,12 +260,12 @@ class PrivacyField(ActivitypubFieldMixin, models.CharField):
if to == [self.public]:
setattr(instance, self.name, "public")
elif self.public in cc:
setattr(instance, self.name, "unlisted")
elif to == [user.followers_url]:
setattr(instance, self.name, "followers")
elif cc == []:
setattr(instance, self.name, "direct")
elif self.public in cc:
setattr(instance, self.name, "unlisted")
else:
setattr(instance, self.name, "followers")
return original == getattr(instance, self.name)

View file

@ -1,4 +1,5 @@
""" Generate social media preview images for twitter/mastodon/etc """
import math
import os
import textwrap
@ -42,8 +43,8 @@ def get_imagefont(name, size):
return ImageFont.truetype(path, size)
except KeyError:
logger.error("Font %s not found in config", name)
except OSError:
logger.error("Could not load font %s from file", name)
except OSError as err:
logger.error("Could not load font %s from file: %s", name, err)
return ImageFont.load_default()
@ -59,7 +60,7 @@ def get_font(weight, size=28):
font.set_variation_by_name("Bold")
if weight == "regular":
font.set_variation_by_name("Regular")
except AttributeError:
except OSError:
pass
return font

View file

@ -108,6 +108,7 @@ INSTALLED_APPS = [
"celery",
"django_celery_beat",
"imagekit",
"pgtrigger",
"storages",
]

View file

@ -8,7 +8,7 @@
<h1 class="title">{% trans "File too large" %}</h1>
<p class="content">{% trans "The file you are uploading is too large." %}</p>
<p class="content">
{% blocktrans %}
{% blocktrans trimmed %}
You you can try using a smaller file, or ask your BookWyrm server administrator to increase the <code>DATA_UPLOAD_MAX_MEMORY_SIZE</code> setting.
{% endblocktrans %}
</p>

View file

@ -55,6 +55,8 @@
<p class="field"><label class="label" for="id_wikipedia_link">{% trans "Wikipedia link:" %}</label> {{ form.wikipedia_link }}</p>
<p class="field"><label class="label" for="id_wikidata">{% trans "Wikidata:" %}</label> {{ form.wikidata }}</p>
{% include 'snippets/form_errors.html' with errors_list=form.wikipedia_link.errors id="desc_wikipedia_link" %}
<p class="field"><label class="label" for="id_website">{% trans "Website:" %}</label> {{ form.website }}</p>

View file

@ -45,18 +45,22 @@
{% endif %}
{% if book.series %}
<meta itemprop="position" content="{{ book.series_number }}">
{% spaceless %}
<span itemprop="isPartOf" itemscope itemtype="https://schema.org/BookSeries">
{% if book.authors.exists %}
<a href="{% url 'book-series-by' book.authors.first.id %}?series_name={{ book.series | urlencode }}"
itemprop="url">
{% endif %}
<span itemprop="name">{{ book.series }}</span>
{% if book.series_number %} #{{ book.series_number }}{% endif %}
{% if book.authors.exists %}
</a>
{% endif %}
</span>
{% if book.series_number %}
<span>, #</span>
<span itemprop="position">{{ book.series_number }}</span>
{% endif %}
{% endspaceless %}
{% endif %}
</p>
{% endif %}

View file

@ -109,7 +109,7 @@
<p class="block">
{% if request.user.is_authenticated %}
{% if not remote %}
<a href="{{ request.path }}?q={{ query }}&type=book&remote=true" id="tour-load-from-other-catalogues">
<a href="{{ request.path }}?q={{ query|urlencode }}&type=book&remote=true" id="tour-load-from-other-catalogues">
{% trans "Load results from other catalogues" %}
</a>
{% else %}

View file

@ -41,18 +41,18 @@
<nav class="tabs">
<ul>
<li{% if type == "book" %} class="is-active"{% endif %}>
<a href="{% url 'search' %}?q={{ query }}&type=book">{% trans "Books" %}</a>
<a href="{% url 'search' %}?q={{ query|urlencode }}&type=book">{% trans "Books" %}</a>
</li>
<li{% if type == "author" %} class="is-active"{% endif %}>
<a href="{% url 'search' %}?q={{ query }}&type=author">{% trans "Authors" %}</a>
<a href="{% url 'search' %}?q={{ query|urlencode }}&type=author">{% trans "Authors" %}</a>
</li>
{% if request.user.is_authenticated %}
<li{% if type == "user" %} class="is-active"{% endif %}>
<a href="{% url 'search' %}?q={{ query }}&type=user">{% trans "Users" %}</a>
<a href="{% url 'search' %}?q={{ query|urlencode }}&type=user">{% trans "Users" %}</a>
</li>
{% endif %}
<li{% if type == "list" %} class="is-active"{% endif %}>
<a href="{% url 'search' %}?q={{ query }}&type=list">{% trans "Lists" %}</a>
<a href="{% url 'search' %}?q={{ query|urlencode }}&type=list">{% trans "Lists" %}</a>
</li>
</ul>
</nav>

View file

@ -0,0 +1,3 @@
{% if book.series %}
({{book.series}}{%if book.series_number %}, #{{book.series_number}}{% endif %})
{% endif %}

View file

@ -9,12 +9,15 @@
{% if book.authors.exists %}
{% blocktrans trimmed with path=book.local_path title=book|book_title %}
<a href="{{ path }}">{{ title }}</a> by
<a href="{{ path }}">{{ title }}</a>
by
{% endblocktrans %}&nbsp;{% include 'snippets/authors.html' with book=book limit=3 %}
{% else %}
<a href="{{ book.local_path }}">{{ book|book_title }}</a>
{% endif %}
{% include 'snippets/book_series.html' with book=book %}
{% endcache %}
{% endspaceless %}

View file

@ -17,4 +17,7 @@ commented on <a href="{{ book_path }}">{{ book }}</a>
{% endblocktrans %}
{% endif %}
{% include 'snippets/book_series.html' with book=book %}
{% endwith %}

View file

@ -17,4 +17,7 @@ quoted <a href="{{ book_path }}">{{ book }}</a>
{% endblocktrans %}
{% endif %}
{% include 'snippets/book_series.html' with book=book %}
{% endwith %}

View file

@ -19,4 +19,7 @@ finished reading <a href="{{ book_path }}">{{ book }}</a>
{% endblocktrans %}
{% endif %}
{% include 'snippets/book_series.html' with book=book %}
{% endspaceless %}

View file

@ -19,4 +19,7 @@ started reading <a href="{{ book_path }}">{{ book }}</a>
{% endblocktrans %}
{% endif %}
{% include 'snippets/book_series.html' with book=book %}
{% endspaceless %}

View file

@ -17,4 +17,7 @@ reviewed <a href="{{ book_path }}">{{ book }}</a>
{% endblocktrans %}
{% endif %}
{% include 'snippets/book_series.html' with book=book %}
{% endwith %}

View file

@ -19,5 +19,8 @@ stopped reading <a href="{{ book_path }}">{{ book }}</a>
{% endblocktrans %}
{% endif %}
{% include 'snippets/book_series.html' with book=book %}
{% endspaceless %}

View file

@ -19,4 +19,7 @@ wants to read <a href="{{ book_path }}">{{ book }}</a>
{% endblocktrans %}
{% endif %}
{% include 'snippets/book_series.html' with book=book %}
{% endspaceless %}

View file

@ -136,6 +136,7 @@
],
"bio": "<p>American political scientist and anthropologist</p>",
"wikipediaLink": "https://en.wikipedia.org/wiki/James_C._Scott",
"wikidata": "Q3025403",
"website": "",
"@context": "https://www.w3.org/ns/activitystreams"
}
@ -320,6 +321,7 @@
"aliases": [],
"bio": "",
"wikipediaLink": "",
"wikidata": "",
"website": "",
"@context": "https://www.w3.org/ns/activitystreams"
}
@ -396,4 +398,4 @@
"https://your.domain.here/user/rat"
],
"blocks": ["https://your.domain.here/user/badger"]
}
}

View file

@ -0,0 +1,87 @@
""" test searching for authors """
from django.test import TestCase
from django.contrib.postgres.search import SearchRank, SearchQuery
from django.db.models import F
from bookwyrm import models
class AuthorSearch(TestCase):
"""look for some authors"""
@classmethod
def setUpTestData(cls):
"""we need basic test data and mocks"""
cls.bob = models.Author.objects.create(
name="Bob", aliases=["Robertus", "Alice"]
)
cls.alice = models.Author.objects.create(name="Alice")
def test_search(self):
"""search for an author in the db"""
results = self._search("Bob")
self.assertEqual(len(results), 1)
self.assertEqual(results[0], self.bob)
def test_alias_priority(self):
"""aliases should be included, with lower priority than name"""
results = self._search("Alice")
self.assertEqual(len(results), 2)
self.assertEqual(results[0], self.alice)
def _search_first(self, query):
"""wrapper around search_title_author"""
return self._search(query, return_first=True)
@staticmethod
def _search(query, *, return_first=False):
"""author search"""
search_query = SearchQuery(query, config="simple")
min_confidence = 0
results = (
models.Author.objects.filter(search_vector=search_query)
.annotate(rank=SearchRank(F("search_vector"), search_query))
.filter(rank__gt=min_confidence)
.order_by("-rank")
)
if return_first:
return results.first()
return results
class SearchVectorTest(TestCase):
"""check search_vector is computed correctly"""
def test_search_vector_simple(self):
"""simplest search vector"""
author = self._create_author("Mary")
self.assertEqual(author.search_vector, "'mary':1A")
def test_search_vector_aliases(self):
"""author aliases should be included with lower priority"""
author = self._create_author("Mary", aliases=["Maria", "Example"])
self.assertEqual(author.search_vector, "'example':3B 'maria':2B 'mary':1A")
def test_search_vector_parse_author(self):
"""author name and alias is not stem'd or affected by stop words"""
author = self._create_author("Writes", aliases=["Reads"])
self.assertEqual(author.search_vector, "'reads':2B 'writes':1A")
def test_search_vector_on_update(self):
"""make sure that search_vector is being set correctly on edit"""
author = self._create_author("Mary")
self.assertEqual(author.search_vector, "'mary':1A")
author.name = "Example"
author.save(broadcast=False)
author.refresh_from_db()
self.assertEqual(author.search_vector, "'example':1A")
@staticmethod
def _create_author(name, /, *, aliases=None):
"""quickly create an author"""
author = models.Author.objects.create(name=name, aliases=aliases or [])
author.refresh_from_db()
return author

View file

@ -1,5 +1,6 @@
""" test searching for books """
import datetime
from django.db import connection
from django.test import TestCase
from django.utils import timezone
@ -13,6 +14,13 @@ class BookSearch(TestCase):
@classmethod
def setUpTestData(cls):
"""we need basic test data and mocks"""
cls.first_author = models.Author.objects.create(
name="Author One", aliases=["The First"]
)
cls.second_author = models.Author.objects.create(
name="Author Two", aliases=["The Second"]
)
cls.work = models.Work.objects.create(title="Example Work")
cls.first_edition = models.Edition.objects.create(
@ -22,6 +30,8 @@ class BookSearch(TestCase):
physical_format="Paperback",
published_date=datetime.datetime(2019, 4, 9, 0, 0, tzinfo=timezone.utc),
)
cls.first_edition.authors.add(cls.first_author)
cls.second_edition = models.Edition.objects.create(
title="Another Edition",
parent_work=cls.work,
@ -29,19 +39,34 @@ class BookSearch(TestCase):
openlibrary_key="hello",
pages=150,
)
cls.second_edition.authors.add(cls.first_author)
cls.second_edition.authors.add(cls.second_author)
cls.third_edition = models.Edition.objects.create(
title="Another Edition with annoying ISBN",
parent_work=cls.work,
isbn_10="022222222X",
)
cls.third_edition.authors.add(cls.first_author)
cls.third_edition.authors.add(cls.second_author)
def test_search(self):
"""search for a book in the db"""
# title/author
# title
results = book_search.search("Example")
self.assertEqual(len(results), 1)
self.assertEqual(results[0], self.first_edition)
# author
results = book_search.search("One")
self.assertEqual(len(results), 1)
self.assertEqual(results[0], self.first_edition)
# author alias
results = book_search.search("First")
self.assertEqual(len(results), 1)
self.assertEqual(results[0], self.first_edition)
# isbn
results = book_search.search("0000000000")
self.assertEqual(len(results), 1)
@ -140,3 +165,265 @@ class BookSearch(TestCase):
# there's really not much to test here, it's just a dataclass
self.assertEqual(result.confidence, 1)
self.assertEqual(result.title, "Title")
class SearchVectorTest(TestCase):
"""check search_vector is computed correctly"""
def test_search_vector_simple(self):
"""simplest search vector"""
book = self._create_book("Book", "Mary")
self.assertEqual(book.search_vector, "'book':1A 'mary':2C") # A > C (priority)
def test_search_vector_all_parts(self):
"""search vector with subtitle and series"""
# for a book like this we call `to_tsvector("Book Long Mary Bunch")`, hence the
# indexes in the search vector. (priority "D" is the default, and never shown.)
book = self._create_book(
"Book",
"Mary",
subtitle="Long",
series="Bunch",
author_alias=["Maria", "Mary Ann"],
)
self.assertEqual(
book.search_vector,
"'ann':6C 'book':1A 'bunch':7 'long':2B 'maria':4C 'mary':3C,5C",
)
def test_search_vector_parse_book(self):
"""book parts are parsed in english"""
# FIXME: at some point this should stop being the default.
book = self._create_book(
"Edition", "Editor", series="Castle", subtitle="Writing"
)
self.assertEqual(
book.search_vector, "'castl':4 'edit':1A 'editor':3C 'write':2B"
)
def test_search_vector_parse_author(self):
"""author name is not stem'd or affected by stop words"""
book = self._create_book("Writing", "Writes", author_alias=["Reads"])
self.assertEqual(book.search_vector, "'reads':3C 'write':1A 'writes':2C")
book = self._create_book("She Is Writing", "She Writes")
self.assertEqual(book.search_vector, "'she':4C 'write':3A 'writes':5C")
def test_search_vector_parse_title_empty(self):
"""empty parse in English retried as simple title"""
book = self._create_book("Here We", "John")
self.assertEqual(book.search_vector, "'here':1A 'john':3C 'we':2A")
book = self._create_book("Hear We Come", "John")
self.assertEqual(book.search_vector, "'come':3A 'hear':1A 'john':4C")
book = self._create_book("there there", "the")
self.assertEqual(book.search_vector, "'the':3C 'there':1A,2A")
def test_search_vector_no_author(self):
"""book with no authors gets processed normally"""
book = self._create_book("Book", None, series="Bunch")
self.assertEqual(book.search_vector, "'book':1A 'bunch':2")
book = self._create_book("there there", None)
self.assertEqual(book.search_vector, "'there':1A,2A")
# n.b.: the following originally from test_posgres.py
def test_search_vector_on_update(self):
"""make sure that search_vector is being set correctly on edit"""
book = self._create_book("The Long Goodbye", None)
self.assertEqual(book.search_vector, "'goodby':3A 'long':2A")
book.title = "The Even Longer Goodbye"
book.save(broadcast=False)
book.refresh_from_db()
self.assertEqual(book.search_vector, "'even':2A 'goodby':4A 'longer':3A")
def test_search_vector_on_author_update(self):
"""update search when an author name changes"""
book = self._create_book("The Long Goodbye", "The Rays")
self.assertEqual(book.search_vector, "'goodby':3A 'long':2A 'rays':5C 'the':4C")
author = models.Author.objects.get(name="The Rays")
author.name = "Jeremy"
author.save(broadcast=False)
book.refresh_from_db()
self.assertEqual(book.search_vector, "'goodby':3A 'jeremy':4C 'long':2A")
author.aliases = ["Example"]
author.save(broadcast=False)
book.refresh_from_db()
self.assertEqual(
book.search_vector, "'example':5C 'goodby':3A 'jeremy':4C 'long':2A"
)
def test_search_vector_on_author_delete(self):
"""update search when an author is deleted"""
book = self._create_book("The Long Goodbye", "The Rays")
self.assertEqual(book.search_vector, "'goodby':3A 'long':2A 'rays':5C 'the':4C")
author = models.Author.objects.get(name="The Rays")
book.authors.remove(author)
book.refresh_from_db()
self.assertEqual(book.search_vector, "'goodby':3A 'long':2A")
def test_search_vector_fields(self):
"""language field irrelevant for search_vector"""
author = models.Author.objects.create(name="The Rays")
book = models.Edition.objects.create(
title="The Long Goodbye",
subtitle="wow cool",
series="series name",
languages=["irrelevant"],
)
book.authors.add(author)
book.refresh_from_db()
self.assertEqual(
book.search_vector,
# pylint: disable-next=line-too-long
"'cool':5B 'goodby':3A 'long':2A 'name':9 'rays':7C 'seri':8 'the':6C 'wow':4B",
)
@staticmethod
def _create_book(
title, author_name, /, *, subtitle="", series="", author_alias=None
):
"""quickly create a book"""
work = models.Work.objects.create(title="work")
edition = models.Edition.objects.create(
title=title,
series=series or None,
subtitle=subtitle or None,
isbn_10="0000000000",
parent_work=work,
)
if author_name is not None:
author = models.Author.objects.create(
name=author_name, aliases=author_alias or []
)
edition.authors.add(author)
edition.save(broadcast=False)
edition.refresh_from_db()
return edition
class SearchVectorUpdates(TestCase):
"""look for books as they change""" # functional tests of the above
def setUp(self):
"""we need basic test data and mocks"""
self.work = models.Work.objects.create(title="This Work")
self.author = models.Author.objects.create(name="Name", aliases=["Alias"])
self.edition = models.Edition.objects.create(
title="First Edition of Work",
subtitle="Some Extra Words Are Good",
series="A Fabulous Sequence of Items",
parent_work=self.work,
isbn_10="0000000000",
)
self.edition.authors.add(self.author)
self.edition.save(broadcast=False)
@classmethod
def setUpTestData(cls):
"""create conditions that trigger known old bugs"""
with connection.cursor() as cursor:
cursor.execute(
"""
ALTER SEQUENCE bookwyrm_author_id_seq RESTART WITH 20;
ALTER SEQUENCE bookwyrm_book_authors_id_seq RESTART WITH 300;
"""
)
def test_search_after_changed_metadata(self):
"""book found after updating metadata"""
self.assertEqual(self.edition, self._search_first("First")) # title
self.assertEqual(self.edition, self._search_first("Good")) # subtitle
self.assertEqual(self.edition, self._search_first("Sequence")) # series
self.edition.title = "Second Title of Work"
self.edition.subtitle = "Fewer Words Is Better"
self.edition.series = "A Wondrous Bunch"
self.edition.save(broadcast=False)
self.assertEqual(self.edition, self._search_first("Second")) # title new
self.assertEqual(self.edition, self._search_first("Fewer")) # subtitle new
self.assertEqual(self.edition, self._search_first("Wondrous")) # series new
self.assertFalse(self._search_first("First")) # title old
self.assertFalse(self._search_first("Good")) # subtitle old
self.assertFalse(self._search_first("Sequence")) # series old
def test_search_after_author_remove(self):
"""book not found via removed author"""
self.assertEqual(self.edition, self._search_first("Name"))
self.edition.authors.set([])
self.edition.save(broadcast=False)
self.assertFalse(self._search("Name"))
self.assertEqual(self.edition, self._search_first("Edition"))
def test_search_after_author_add(self):
"""book found by newly-added author"""
new_author = models.Author.objects.create(name="Mozilla")
self.assertFalse(self._search("Mozilla"))
self.edition.authors.add(new_author)
self.edition.save(broadcast=False)
self.assertEqual(self.edition, self._search_first("Mozilla"))
self.assertEqual(self.edition, self._search_first("Name"))
def test_search_after_author_add_remove_sql(self):
"""add/remove author through SQL to ensure execution of book_authors trigger"""
# Tests calling edition.save(), above, pass even if the trigger in
# bookwyrm_book_authors is removed (probably because they trigger the one
# in bookwyrm_book directly). Here we make sure to exercise the former.
new_author = models.Author.objects.create(name="Mozilla")
with connection.cursor() as cursor:
cursor.execute(
"DELETE FROM bookwyrm_book_authors WHERE book_id = %s",
[self.edition.id],
)
self.assertFalse(self._search("Name"))
self.assertFalse(self._search("Mozilla"))
with connection.cursor() as cursor:
cursor.execute(
"INSERT INTO bookwyrm_book_authors (book_id,author_id) VALUES (%s,%s)",
[self.edition.id, new_author.id],
)
self.assertFalse(self._search("Name"))
self.assertEqual(self.edition, self._search_first("Mozilla"))
def test_search_after_updated_author_name(self):
"""book found under new author name"""
self.assertEqual(self.edition, self._search_first("Name"))
self.assertEqual(self.edition, self._search_first("Alias"))
self.assertFalse(self._search("Identifier"))
self.assertFalse(self._search("Another"))
self.author.name = "Identifier"
self.author.aliases = ["Another"]
self.author.save(broadcast=False)
self.assertFalse(self._search("Name"))
self.assertFalse(self._search("Aliases"))
self.assertEqual(self.edition, self._search_first("Identifier"))
self.assertEqual(self.edition, self._search_first("Another"))
self.assertEqual(self.edition, self._search_first("Work"))
def _search_first(self, query):
"""wrapper around search_title_author"""
return self._search(query, return_first=True)
@staticmethod
def _search(query, *, return_first=False):
"""wrapper around search_title_author"""
return book_search.search_title_author(
query, min_confidence=0, return_first=return_first
)

View file

@ -1,77 +0,0 @@
""" django configuration of postgres """
from unittest.mock import patch
from django.test import TestCase
from bookwyrm import models
@patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async")
class PostgresTriggers(TestCase):
"""special migrations, fancy stuff ya know"""
def test_search_vector_on_create(self, _):
"""make sure that search_vector is being set correctly on create"""
book = models.Edition.objects.create(title="The Long Goodbye")
book.refresh_from_db()
self.assertEqual(book.search_vector, "'goodby':3A 'long':2A")
def test_search_vector_on_update(self, _):
"""make sure that search_vector is being set correctly on edit"""
book = models.Edition.objects.create(title="The Long Goodbye")
book.title = "The Even Longer Goodbye"
book.save(broadcast=False)
book.refresh_from_db()
self.assertEqual(book.search_vector, "'even':2A 'goodby':4A 'longer':3A")
def test_search_vector_fields(self, _):
"""use multiple fields to create search vector"""
author = models.Author.objects.create(name="The Rays")
book = models.Edition.objects.create(
title="The Long Goodbye",
subtitle="wow cool",
series="series name",
languages=["irrelevant"],
)
book.authors.add(author)
book.refresh_from_db()
# pylint: disable=line-too-long
self.assertEqual(
book.search_vector,
"'cool':5B 'goodby':3A 'long':2A 'name':9 'rays':7C 'seri':8 'the':6C 'wow':4B",
)
def test_search_vector_on_author_update(self, _):
"""update search when an author name changes"""
author = models.Author.objects.create(name="The Rays")
book = models.Edition.objects.create(
title="The Long Goodbye",
)
book.authors.add(author)
author.name = "Jeremy"
author.save(broadcast=False)
book.refresh_from_db()
self.assertEqual(book.search_vector, "'goodby':3A 'jeremy':4C 'long':2A")
def test_search_vector_on_author_delete(self, _):
"""update search when an author name changes"""
author = models.Author.objects.create(name="Jeremy")
book = models.Edition.objects.create(
title="The Long Goodbye",
)
book.authors.add(author)
book.refresh_from_db()
self.assertEqual(book.search_vector, "'goodby':3A 'jeremy':4C 'long':2A")
book.authors.remove(author)
book.refresh_from_db()
self.assertEqual(book.search_vector, "'goodby':3A 'long':2A")
def test_search_vector_stop_word_fallback(self, _):
"""use a fallback when removing stop words leads to an empty vector"""
book = models.Edition.objects.create(
title="there there",
)
book.refresh_from_db()
self.assertEqual(book.search_vector, "'there':1A,2A")

23
bookwyrm/utils/db.py Normal file
View file

@ -0,0 +1,23 @@
""" Database utilities """
from typing import cast
import sqlparse # type: ignore
def format_trigger(sql: str) -> str:
"""format SQL trigger before storing
we remove whitespace and use consistent casing so as to avoid migrations
due to formatting changes.
"""
return cast(
str,
sqlparse.format(
sql,
strip_comments=True,
strip_whitespace=True,
use_space_around_operators=True,
keyword_case="upper",
identifier_case="lower",
),
)

View file

@ -3,6 +3,7 @@ from functools import reduce
import operator
from django.contrib.auth.decorators import login_required
from django.core.cache import cache as django_cache
from django.core.paginator import Paginator
from django.db import transaction
from django.db.models import Q
@ -104,6 +105,13 @@ def switch_edition(request):
readthrough.book = new_edition
readthrough.save()
django_cache.delete_many(
[
f"active_shelf-{request.user.id}-{book_id}"
for book_id in new_edition.parent_work.editions.values_list("id", flat=True)
]
)
reviews = models.Review.objects.filter(
book__parent_work=new_edition.parent_work, user=request.user
)

View file

@ -2,8 +2,9 @@
import re
from django.contrib.postgres.search import TrigramSimilarity
from django.contrib.postgres.search import TrigramSimilarity, SearchRank, SearchQuery
from django.core.paginator import Paginator
from django.db.models import F
from django.db.models.functions import Greatest
from django.http import JsonResponse
from django.template.response import TemplateResponse
@ -94,26 +95,28 @@ def book_search(request):
def author_search(request):
"""search for an author"""
query = request.GET.get("q")
query = query.strip()
data = {"type": "author", "query": query}
query = request.GET.get("q").strip()
search_query = SearchQuery(query, config="simple")
min_confidence = 0
results = (
models.Author.objects.annotate(
similarity=TrigramSimilarity("name", query),
)
.filter(
similarity__gt=0.1,
)
.order_by("-similarity")
models.Author.objects.filter(search_vector=search_query)
.annotate(rank=SearchRank(F("search_vector"), search_query))
.filter(rank__gt=min_confidence)
.order_by("-rank")
)
paginated = Paginator(results, PAGE_LENGTH)
page = paginated.get_page(request.GET.get("page"))
data["results"] = page
data["page_range"] = paginated.get_elided_page_range(
page.number, on_each_side=2, on_ends=1
)
data = {
"type": "author",
"query": query,
"results": page,
"page_range": paginated.get_elided_page_range(
page.number, on_each_side=2, on_ends=1
),
}
return TemplateResponse(request, "search/author.html", data)