From 0b0b1dabdfa2051d0b1d1ad846892da4801ca588 Mon Sep 17 00:00:00 2001
From: tusooa <tusooa@kazv.moe>
Date: Fri, 21 Jul 2023 13:54:10 -0400
Subject: [PATCH] Fix parsing non-ascii tags

---
 changelog.d/nonascii-tags.fix                    | 1 +
 src/services/matcher/matcher.service.js          | 7 +++++--
 test/unit/specs/services/matcher/matcher.spec.js | 6 ++++++
 3 files changed, 12 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/nonascii-tags.fix

diff --git a/changelog.d/nonascii-tags.fix b/changelog.d/nonascii-tags.fix
new file mode 100644
index 000000000..e4c6dc82d
--- /dev/null
+++ b/changelog.d/nonascii-tags.fix
@@ -0,0 +1 @@
+Fix parsing non-ascii tags
diff --git a/src/services/matcher/matcher.service.js b/src/services/matcher/matcher.service.js
index b6c4e9099..54f02d312 100644
--- a/src/services/matcher/matcher.service.js
+++ b/src/services/matcher/matcher.service.js
@@ -14,8 +14,11 @@ export const mentionMatchesUrl = (attention, url) => {
  * @param {string} url
  */
 export const extractTagFromUrl = (url) => {
-  const regex = /tag[s]*\/(\w+)$/g
-  const result = regex.exec(url)
+  const decoded = decodeURI(url)
+  // https://git.pleroma.social/pleroma/elixir-libraries/linkify/-/blob/master/lib/linkify/parser.ex
+  // https://www.pcre.org/original/doc/html/pcrepattern.html
+  const regex = /tag[s]*\/([\p{L}\p{N}_]*[\p{Alphabetic}_·\u{200c}][\p{L}\p{N}_·\p{M}\u{200c}]*)$/ug
+  const result = regex.exec(decoded)
   if (!result) {
     return false
   }
diff --git a/test/unit/specs/services/matcher/matcher.spec.js b/test/unit/specs/services/matcher/matcher.spec.js
index 7a2494f07..c6e9719d3 100644
--- a/test/unit/specs/services/matcher/matcher.spec.js
+++ b/test/unit/specs/services/matcher/matcher.spec.js
@@ -78,5 +78,11 @@ describe('MatcherService', () => {
 
       expect(MatcherService.extractTagFromUrl(url)).to.eql(false)
     })
+
+    it('should return tag name from non-ascii tags', () => {
+      const url = encodeURI('https://website.com/tag/喵喵喵')
+
+      expect(MatcherService.extractTagFromUrl(url)).to.eql('喵喵喵')
+    })
   })
 })
-- 
GitLab