This repository has been archived on 2024-07-31. You can view files and clone it, but cannot push or open issues or pull requests.
2022-07-13 09:03:28 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
class HashtagNormalizer
|
|
|
|
def normalize(str)
|
|
|
|
remove_invalid_characters(ascii_folding(lowercase(cjk_width(str))))
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def remove_invalid_characters(str)
|
2022-11-09 23:49:30 -05:00
|
|
|
str.gsub(Tag::HASHTAG_INVALID_CHARS_RE, '')
|
2022-07-13 09:03:28 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def ascii_folding(str)
|
|
|
|
ASCIIFolding.new.fold(str)
|
|
|
|
end
|
|
|
|
|
|
|
|
def lowercase(str)
|
|
|
|
str.mb_chars.downcase.to_s
|
|
|
|
end
|
|
|
|
|
|
|
|
def cjk_width(str)
|
|
|
|
str.unicode_normalize(:nfkc)
|
|
|
|
end
|
|
|
|
end
|