1
0
Fork 0

Enable finding existing hashtags case-insensitive

We should store hashtags case-sensitive, but ensures that an existing
hashtag with different case are found and re-used. for example,
an existing #BookWyrm hashtag will be found and used even if the
status content is using #bookwyrm.
This commit is contained in:
Christof Dorner 2023-02-21 17:17:53 +01:00
parent f3334b1550
commit 9ca9883e0b
4 changed files with 41 additions and 27 deletions

View file

@ -116,7 +116,7 @@ class CreateStatus(View):
status.mention_users.add(status.reply_parent.user)
# inspect the text for hashtags
for (mention_text, mention_hashtag) in find_hashtags(content).items():
for (mention_text, mention_hashtag) in find_or_create_hashtags(content).items():
# add them to status mentions fk
status.mention_hashtags.add(mention_hashtag)
@ -250,25 +250,36 @@ def find_mentions(user, content):
return username_dict
def find_hashtags(content):
"""detect #hashtags in raw status content"""
def find_or_create_hashtags(content):
"""detect #hashtags in raw status content
it stores hashtags case-sensitive, but ensures that an existing
hashtag with different case are found and re-used. for example,
an existing #BookWyrm hashtag will be found and used even if the
status content is using #bookwyrm.
"""
if not content:
return {}
hashtags = re.findall(regex.HASHTAG, content)
if len(hashtags) == 0:
found_hashtags = {t.lower(): t for t in re.findall(regex.HASHTAG, content)}
if len(found_hashtags) == 0:
return {}
known_tags = models.Hashtag.objects.filter(Q(name__in=hashtags)).distinct()
hashtag_dict = {t.name: t for t in known_tags}
known_hashtags = {
t.name.lower(): t
for t in models.Hashtag.objects.filter(
Q(name__in=found_hashtags.keys())
).distinct()
}
not_found = set(hashtags) - set(hashtag_dict.keys())
for tag_name in not_found:
not_found = found_hashtags.keys() - known_hashtags.keys()
for lower_name in not_found:
tag_name = found_hashtags[lower_name]
mention_hashtag = models.Hashtag(name=tag_name)
mention_hashtag.save()
hashtag_dict[mention_hashtag.name] = mention_hashtag
known_hashtags[lower_name] = mention_hashtag
return hashtag_dict
return {found_hashtags[k]: v for k, v in known_hashtags.items()}
def format_links(content):