diff --git a/bookwyrm/activitypub/note.py b/bookwyrm/activitypub/note.py index dee05efc8..6a081058c 100644 --- a/bookwyrm/activitypub/note.py +++ b/bookwyrm/activitypub/note.py @@ -65,6 +65,7 @@ class Note(ActivityObject): rf'({hashtag.name})', rf"\1{hashtag.remote_id}\2", instance.content, + flags=re.IGNORECASE, ) if instance.content != updated_content: instance.content = updated_content diff --git a/bookwyrm/tests/activitypub/test_note.py b/bookwyrm/tests/activitypub/test_note.py index 465341c0a..c4db2d9b1 100644 --- a/bookwyrm/tests/activitypub/test_note.py +++ b/bookwyrm/tests/activitypub/test_note.py @@ -34,7 +34,7 @@ class Note(TestCase): inReplyToBook=self.book.remote_id, content="

This is interesting " + '' - + "#BookClub

", + + "#bookclub

", published="2023-02-17T23:12:59.398030+00:00", to=[], cc=[], @@ -60,5 +60,5 @@ class Note(TestCase): instance.content, "

This is interesting " + f'' - + "#BookClub

", + + "#bookclub

", ) diff --git a/bookwyrm/tests/views/test_status.py b/bookwyrm/tests/views/test_status.py index f1f3c0d8d..d02c71374 100644 --- a/bookwyrm/tests/views/test_status.py +++ b/bookwyrm/tests/views/test_status.py @@ -6,7 +6,7 @@ from django.test import TestCase, TransactionTestCase from django.test.client import RequestFactory from bookwyrm import forms, models, views -from bookwyrm.views.status import find_mentions, find_hashtags +from bookwyrm.views.status import find_mentions, find_or_create_hashtags from bookwyrm.settings import DOMAIN from bookwyrm.tests.validate_html import validate_html @@ -339,7 +339,8 @@ class StatusViews(TestCase): view = views.CreateStatus.as_view() form = forms.CommentForm( { - "content": "this is an #existing hashtag, this one is #new.", + "content": "this is an #EXISTING hashtag but all uppercase, " + + "this one is #NewTag.", "user": self.local_user.id, "book": self.book.id, "privacy": "public", @@ -356,44 +357,45 @@ class StatusViews(TestCase): self.assertEqual(list(status.mention_hashtags.all()), list(hashtags)) hashtag_exising = models.Hashtag.objects.filter(name="#existing").first() - hashtag_new = models.Hashtag.objects.filter(name="#new").first() + hashtag_new = models.Hashtag.objects.filter(name="#NewTag").first() self.assertEqual( status.content, "

this is an " + f'' - + "#existing hashtag, this one is " - + f'#new.

', + + "#EXISTING hashtag but all uppercase, this one is " + + f'' + + "#NewTag.

", ) - def test_find_hashtags(self, *_): + def test_find_or_create_hashtags(self, *_): """detect and look up #hashtags""" - result = find_hashtags("no hashtag to be found here") + result = find_or_create_hashtags("no hashtag to be found here") self.assertEqual(result, {}) - result = find_hashtags("#existing") + result = find_or_create_hashtags("#existing") self.assertEqual(result["#existing"], self.existing_hashtag) - result = find_hashtags("leading text #existing") + result = find_or_create_hashtags("leading text #existing") self.assertEqual(result["#existing"], self.existing_hashtag) - result = find_hashtags("leading #existing trailing") + result = find_or_create_hashtags("leading #existing trailing") self.assertEqual(result["#existing"], self.existing_hashtag) self.assertIsNone(models.Hashtag.objects.filter(name="new").first()) - result = find_hashtags("leading #new trailing") + result = find_or_create_hashtags("leading #new trailing") new_hashtag = models.Hashtag.objects.filter(name="#new").first() self.assertIsNotNone(new_hashtag) self.assertEqual(result["#new"], new_hashtag) - result = find_hashtags("leading #existing #new trailing") + result = find_or_create_hashtags("leading #existing #new trailing") self.assertEqual(result["#existing"], self.existing_hashtag) self.assertEqual(result["#new"], new_hashtag) - result = find_hashtags("#Braunbär") + result = find_or_create_hashtags("#Braunbär") hashtag = models.Hashtag.objects.filter(name="#Braunbär").first() self.assertEqual(result["#Braunbär"], hashtag) - result = find_hashtags("#ひぐま") + result = find_or_create_hashtags("#ひぐま") hashtag = models.Hashtag.objects.filter(name="#ひぐま").first() self.assertEqual(result["#ひぐま"], hashtag) diff --git a/bookwyrm/views/status.py b/bookwyrm/views/status.py index ef4c33249..498a8b6ba 100644 --- a/bookwyrm/views/status.py +++ b/bookwyrm/views/status.py @@ -116,7 +116,7 @@ class CreateStatus(View): status.mention_users.add(status.reply_parent.user) # inspect the text for hashtags - for (mention_text, mention_hashtag) in find_hashtags(content).items(): + for (mention_text, mention_hashtag) in find_or_create_hashtags(content).items(): # add them to status mentions fk status.mention_hashtags.add(mention_hashtag) @@ -250,25 +250,36 @@ def find_mentions(user, content): return username_dict -def find_hashtags(content): - """detect #hashtags in raw status content""" +def find_or_create_hashtags(content): + """detect #hashtags in raw status content + + it stores hashtags case-sensitive, but ensures that an existing + hashtag with different case are found and re-used. for example, + an existing #BookWyrm hashtag will be found and used even if the + status content is using #bookwyrm. + """ if not content: return {} - hashtags = re.findall(regex.HASHTAG, content) - if len(hashtags) == 0: + found_hashtags = {t.lower(): t for t in re.findall(regex.HASHTAG, content)} + if len(found_hashtags) == 0: return {} - known_tags = models.Hashtag.objects.filter(Q(name__in=hashtags)).distinct() - hashtag_dict = {t.name: t for t in known_tags} + known_hashtags = { + t.name.lower(): t + for t in models.Hashtag.objects.filter( + Q(name__in=found_hashtags.keys()) + ).distinct() + } - not_found = set(hashtags) - set(hashtag_dict.keys()) - for tag_name in not_found: + not_found = found_hashtags.keys() - known_hashtags.keys() + for lower_name in not_found: + tag_name = found_hashtags[lower_name] mention_hashtag = models.Hashtag(name=tag_name) mention_hashtag.save() - hashtag_dict[mention_hashtag.name] = mention_hashtag + known_hashtags[lower_name] = mention_hashtag - return hashtag_dict + return {found_hashtags[k]: v for k, v in known_hashtags.items()} def format_links(content):