423 lines
20 KiB
Python
423 lines
20 KiB
Python
|
from whoosh.compat import u
|
||
|
|
||
|
class RussianStemmer(object):
|
||
|
"""
|
||
|
The Russian Snowball stemmer.
|
||
|
|
||
|
:cvar __perfective_gerund_suffixes: Suffixes to be deleted.
|
||
|
:type __perfective_gerund_suffixes: tuple
|
||
|
:cvar __adjectival_suffixes: Suffixes to be deleted.
|
||
|
:type __adjectival_suffixes: tuple
|
||
|
:cvar __reflexive_suffixes: Suffixes to be deleted.
|
||
|
:type __reflexive_suffixes: tuple
|
||
|
:cvar __verb_suffixes: Suffixes to be deleted.
|
||
|
:type __verb_suffixes: tuple
|
||
|
:cvar __noun_suffixes: Suffixes to be deleted.
|
||
|
:type __noun_suffixes: tuple
|
||
|
:cvar __superlative_suffixes: Suffixes to be deleted.
|
||
|
:type __superlative_suffixes: tuple
|
||
|
:cvar __derivational_suffixes: Suffixes to be deleted.
|
||
|
:type __derivational_suffixes: tuple
|
||
|
:note: A detailed description of the Russian
|
||
|
stemming algorithm can be found under
|
||
|
http://snowball.tartarus.org/algorithms/russian/stemmer.html
|
||
|
|
||
|
"""
|
||
|
|
||
|
__perfective_gerund_suffixes = ("ivshis'", "yvshis'", "vshis'",
|
||
|
"ivshi", "yvshi", "vshi", "iv",
|
||
|
"yv", "v")
|
||
|
__adjectival_suffixes = ('ui^ushchi^ui^u', 'ui^ushchi^ai^a',
|
||
|
'ui^ushchimi', 'ui^ushchymi', 'ui^ushchego',
|
||
|
'ui^ushchogo', 'ui^ushchemu', 'ui^ushchomu',
|
||
|
'ui^ushchikh', 'ui^ushchykh',
|
||
|
'ui^ushchui^u', 'ui^ushchaia',
|
||
|
'ui^ushchoi^u', 'ui^ushchei^u',
|
||
|
'i^ushchi^ui^u', 'i^ushchi^ai^a',
|
||
|
'ui^ushchee', 'ui^ushchie',
|
||
|
'ui^ushchye', 'ui^ushchoe', 'ui^ushchei`',
|
||
|
'ui^ushchii`', 'ui^ushchyi`',
|
||
|
'ui^ushchoi`', 'ui^ushchem', 'ui^ushchim',
|
||
|
'ui^ushchym', 'ui^ushchom', 'i^ushchimi',
|
||
|
'i^ushchymi', 'i^ushchego', 'i^ushchogo',
|
||
|
'i^ushchemu', 'i^ushchomu', 'i^ushchikh',
|
||
|
'i^ushchykh', 'i^ushchui^u', 'i^ushchai^a',
|
||
|
'i^ushchoi^u', 'i^ushchei^u', 'i^ushchee',
|
||
|
'i^ushchie', 'i^ushchye', 'i^ushchoe',
|
||
|
'i^ushchei`', 'i^ushchii`',
|
||
|
'i^ushchyi`', 'i^ushchoi`', 'i^ushchem',
|
||
|
'i^ushchim', 'i^ushchym', 'i^ushchom',
|
||
|
'shchi^ui^u', 'shchi^ai^a', 'ivshi^ui^u',
|
||
|
'ivshi^ai^a', 'yvshi^ui^u', 'yvshi^ai^a',
|
||
|
'shchimi', 'shchymi', 'shchego', 'shchogo',
|
||
|
'shchemu', 'shchomu', 'shchikh', 'shchykh',
|
||
|
'shchui^u', 'shchai^a', 'shchoi^u',
|
||
|
'shchei^u', 'ivshimi', 'ivshymi',
|
||
|
'ivshego', 'ivshogo', 'ivshemu', 'ivshomu',
|
||
|
'ivshikh', 'ivshykh', 'ivshui^u',
|
||
|
'ivshai^a', 'ivshoi^u', 'ivshei^u',
|
||
|
'yvshimi', 'yvshymi', 'yvshego', 'yvshogo',
|
||
|
'yvshemu', 'yvshomu', 'yvshikh', 'yvshykh',
|
||
|
'yvshui^u', 'yvshai^a', 'yvshoi^u',
|
||
|
'yvshei^u', 'vshi^ui^u', 'vshi^ai^a',
|
||
|
'shchee', 'shchie', 'shchye', 'shchoe',
|
||
|
'shchei`', 'shchii`', 'shchyi`', 'shchoi`',
|
||
|
'shchem', 'shchim', 'shchym', 'shchom',
|
||
|
'ivshee', 'ivshie', 'ivshye', 'ivshoe',
|
||
|
'ivshei`', 'ivshii`', 'ivshyi`',
|
||
|
'ivshoi`', 'ivshem', 'ivshim', 'ivshym',
|
||
|
'ivshom', 'yvshee', 'yvshie', 'yvshye',
|
||
|
'yvshoe', 'yvshei`', 'yvshii`',
|
||
|
'yvshyi`', 'yvshoi`', 'yvshem',
|
||
|
'yvshim', 'yvshym', 'yvshom', 'vshimi',
|
||
|
'vshymi', 'vshego', 'vshogo', 'vshemu',
|
||
|
'vshomu', 'vshikh', 'vshykh', 'vshui^u',
|
||
|
'vshai^a', 'vshoi^u', 'vshei^u',
|
||
|
'emi^ui^u', 'emi^ai^a', 'nni^ui^u',
|
||
|
'nni^ai^a', 'vshee',
|
||
|
'vshie', 'vshye', 'vshoe', 'vshei`',
|
||
|
'vshii`', 'vshyi`', 'vshoi`',
|
||
|
'vshem', 'vshim', 'vshym', 'vshom',
|
||
|
'emimi', 'emymi', 'emego', 'emogo',
|
||
|
'ememu', 'emomu', 'emikh', 'emykh',
|
||
|
'emui^u', 'emai^a', 'emoi^u', 'emei^u',
|
||
|
'nnimi', 'nnymi', 'nnego', 'nnogo',
|
||
|
'nnemu', 'nnomu', 'nnikh', 'nnykh',
|
||
|
'nnui^u', 'nnai^a', 'nnoi^u', 'nnei^u',
|
||
|
'emee', 'emie', 'emye', 'emoe',
|
||
|
'emei`', 'emii`', 'emyi`',
|
||
|
'emoi`', 'emem', 'emim', 'emym',
|
||
|
'emom', 'nnee', 'nnie', 'nnye', 'nnoe',
|
||
|
'nnei`', 'nnii`', 'nnyi`',
|
||
|
'nnoi`', 'nnem', 'nnim', 'nnym',
|
||
|
'nnom', 'i^ui^u', 'i^ai^a', 'imi', 'ymi',
|
||
|
'ego', 'ogo', 'emu', 'omu', 'ikh',
|
||
|
'ykh', 'ui^u', 'ai^a', 'oi^u', 'ei^u',
|
||
|
'ee', 'ie', 'ye', 'oe', 'ei`',
|
||
|
'ii`', 'yi`', 'oi`', 'em',
|
||
|
'im', 'ym', 'om')
|
||
|
__reflexive_suffixes = ("si^a", "s'")
|
||
|
__verb_suffixes = ("esh'", 'ei`te', 'ui`te', 'ui^ut',
|
||
|
"ish'", 'ete', 'i`te', 'i^ut', 'nno',
|
||
|
'ila', 'yla', 'ena', 'ite', 'ili', 'yli',
|
||
|
'ilo', 'ylo', 'eno', 'i^at', 'uet', 'eny',
|
||
|
"it'", "yt'", 'ui^u', 'la', 'na', 'li',
|
||
|
'em', 'lo', 'no', 'et', 'ny', "t'",
|
||
|
'ei`', 'ui`', 'il', 'yl', 'im',
|
||
|
'ym', 'en', 'it', 'yt', 'i^u', 'i`',
|
||
|
'l', 'n')
|
||
|
__noun_suffixes = ('ii^ami', 'ii^akh', 'i^ami', 'ii^am', 'i^akh',
|
||
|
'ami', 'iei`', 'i^am', 'iem', 'akh',
|
||
|
'ii^u', "'i^u", 'ii^a', "'i^a", 'ev', 'ov',
|
||
|
'ie', "'e", 'ei', 'ii', 'ei`',
|
||
|
'oi`', 'ii`', 'em', 'am', 'om',
|
||
|
'i^u', 'i^a', 'a', 'e', 'i', 'i`',
|
||
|
'o', 'u', 'y', "'")
|
||
|
__superlative_suffixes = ("ei`she", "ei`sh")
|
||
|
__derivational_suffixes = ("ost'", "ost")
|
||
|
|
||
|
def stem(self, word):
|
||
|
"""
|
||
|
Stem a Russian word and return the stemmed form.
|
||
|
|
||
|
:param word: The word that is stemmed.
|
||
|
:type word: str or unicode
|
||
|
:return: The stemmed form.
|
||
|
:rtype: unicode
|
||
|
|
||
|
"""
|
||
|
chr_exceeded = False
|
||
|
for i in range(len(word)):
|
||
|
if ord(word[i]) > 255:
|
||
|
chr_exceeded = True
|
||
|
break
|
||
|
|
||
|
if chr_exceeded:
|
||
|
word = self.__cyrillic_to_roman(word)
|
||
|
|
||
|
step1_success = False
|
||
|
adjectival_removed = False
|
||
|
verb_removed = False
|
||
|
undouble_success = False
|
||
|
superlative_removed = False
|
||
|
|
||
|
rv, r2 = self.__regions_russian(word)
|
||
|
|
||
|
# Step 1
|
||
|
for suffix in self.__perfective_gerund_suffixes:
|
||
|
if rv.endswith(suffix):
|
||
|
if suffix in ("v", "vshi", "vshis'"):
|
||
|
if (rv[-len(suffix) - 3:-len(suffix)] == "i^a" or
|
||
|
rv[-len(suffix) - 1:-len(suffix)] == "a"):
|
||
|
word = word[:-len(suffix)]
|
||
|
r2 = r2[:-len(suffix)]
|
||
|
rv = rv[:-len(suffix)]
|
||
|
step1_success = True
|
||
|
break
|
||
|
else:
|
||
|
word = word[:-len(suffix)]
|
||
|
r2 = r2[:-len(suffix)]
|
||
|
rv = rv[:-len(suffix)]
|
||
|
step1_success = True
|
||
|
break
|
||
|
|
||
|
if not step1_success:
|
||
|
for suffix in self.__reflexive_suffixes:
|
||
|
if rv.endswith(suffix):
|
||
|
word = word[:-len(suffix)]
|
||
|
r2 = r2[:-len(suffix)]
|
||
|
rv = rv[:-len(suffix)]
|
||
|
break
|
||
|
|
||
|
for suffix in self.__adjectival_suffixes:
|
||
|
if rv.endswith(suffix):
|
||
|
if suffix in ('i^ushchi^ui^u', 'i^ushchi^ai^a',
|
||
|
'i^ushchui^u', 'i^ushchai^a', 'i^ushchoi^u',
|
||
|
'i^ushchei^u', 'i^ushchimi', 'i^ushchymi',
|
||
|
'i^ushchego', 'i^ushchogo', 'i^ushchemu',
|
||
|
'i^ushchomu', 'i^ushchikh', 'i^ushchykh',
|
||
|
'shchi^ui^u', 'shchi^ai^a', 'i^ushchee',
|
||
|
'i^ushchie', 'i^ushchye', 'i^ushchoe',
|
||
|
'i^ushchei`', 'i^ushchii`', 'i^ushchyi`',
|
||
|
'i^ushchoi`', 'i^ushchem', 'i^ushchim',
|
||
|
'i^ushchym', 'i^ushchom', 'vshi^ui^u',
|
||
|
'vshi^ai^a', 'shchui^u', 'shchai^a',
|
||
|
'shchoi^u', 'shchei^u', 'emi^ui^u',
|
||
|
'emi^ai^a', 'nni^ui^u', 'nni^ai^a',
|
||
|
'shchimi', 'shchymi', 'shchego', 'shchogo',
|
||
|
'shchemu', 'shchomu', 'shchikh', 'shchykh',
|
||
|
'vshui^u', 'vshai^a', 'vshoi^u', 'vshei^u',
|
||
|
'shchee', 'shchie', 'shchye', 'shchoe',
|
||
|
'shchei`', 'shchii`', 'shchyi`', 'shchoi`',
|
||
|
'shchem', 'shchim', 'shchym', 'shchom',
|
||
|
'vshimi', 'vshymi', 'vshego', 'vshogo',
|
||
|
'vshemu', 'vshomu', 'vshikh', 'vshykh',
|
||
|
'emui^u', 'emai^a', 'emoi^u', 'emei^u',
|
||
|
'nnui^u', 'nnai^a', 'nnoi^u', 'nnei^u',
|
||
|
'vshee', 'vshie', 'vshye', 'vshoe',
|
||
|
'vshei`', 'vshii`', 'vshyi`', 'vshoi`',
|
||
|
'vshem', 'vshim', 'vshym', 'vshom',
|
||
|
'emimi', 'emymi', 'emego', 'emogo',
|
||
|
'ememu', 'emomu', 'emikh', 'emykh',
|
||
|
'nnimi', 'nnymi', 'nnego', 'nnogo',
|
||
|
'nnemu', 'nnomu', 'nnikh', 'nnykh',
|
||
|
'emee', 'emie', 'emye', 'emoe', 'emei`',
|
||
|
'emii`', 'emyi`', 'emoi`', 'emem', 'emim',
|
||
|
'emym', 'emom', 'nnee', 'nnie', 'nnye',
|
||
|
'nnoe', 'nnei`', 'nnii`', 'nnyi`', 'nnoi`',
|
||
|
'nnem', 'nnim', 'nnym', 'nnom'):
|
||
|
if (rv[-len(suffix) - 3:-len(suffix)] == "i^a" or
|
||
|
rv[-len(suffix) - 1:-len(suffix)] == "a"):
|
||
|
word = word[:-len(suffix)]
|
||
|
r2 = r2[:-len(suffix)]
|
||
|
rv = rv[:-len(suffix)]
|
||
|
adjectival_removed = True
|
||
|
break
|
||
|
else:
|
||
|
word = word[:-len(suffix)]
|
||
|
r2 = r2[:-len(suffix)]
|
||
|
rv = rv[:-len(suffix)]
|
||
|
adjectival_removed = True
|
||
|
break
|
||
|
|
||
|
if not adjectival_removed:
|
||
|
for suffix in self.__verb_suffixes:
|
||
|
if rv.endswith(suffix):
|
||
|
if suffix in ("la", "na", "ete", "i`te", "li",
|
||
|
"i`", "l", "em", "n", "lo", "no",
|
||
|
"et", "i^ut", "ny", "t'", "esh'",
|
||
|
"nno"):
|
||
|
if (rv[-len(suffix) - 3:-len(suffix)] == "i^a" or
|
||
|
rv[-len(suffix) - 1:-len(suffix)] == "a"):
|
||
|
word = word[:-len(suffix)]
|
||
|
r2 = r2[:-len(suffix)]
|
||
|
rv = rv[:-len(suffix)]
|
||
|
verb_removed = True
|
||
|
break
|
||
|
else:
|
||
|
word = word[:-len(suffix)]
|
||
|
r2 = r2[:-len(suffix)]
|
||
|
rv = rv[:-len(suffix)]
|
||
|
verb_removed = True
|
||
|
break
|
||
|
|
||
|
if not adjectival_removed and not verb_removed:
|
||
|
for suffix in self.__noun_suffixes:
|
||
|
if rv.endswith(suffix):
|
||
|
word = word[:-len(suffix)]
|
||
|
r2 = r2[:-len(suffix)]
|
||
|
rv = rv[:-len(suffix)]
|
||
|
break
|
||
|
|
||
|
# Step 2
|
||
|
if rv.endswith("i"):
|
||
|
word = word[:-1]
|
||
|
r2 = r2[:-1]
|
||
|
|
||
|
# Step 3
|
||
|
for suffix in self.__derivational_suffixes:
|
||
|
if r2.endswith(suffix):
|
||
|
word = word[:-len(suffix)]
|
||
|
break
|
||
|
|
||
|
# Step 4
|
||
|
if word.endswith("nn"):
|
||
|
word = word[:-1]
|
||
|
undouble_success = True
|
||
|
|
||
|
if not undouble_success:
|
||
|
for suffix in self.__superlative_suffixes:
|
||
|
if word.endswith(suffix):
|
||
|
word = word[:-len(suffix)]
|
||
|
superlative_removed = True
|
||
|
break
|
||
|
if word.endswith("nn"):
|
||
|
word = word[:-1]
|
||
|
|
||
|
if not undouble_success and not superlative_removed:
|
||
|
if word.endswith("'"):
|
||
|
word = word[:-1]
|
||
|
|
||
|
if chr_exceeded:
|
||
|
word = self.__roman_to_cyrillic(word)
|
||
|
return word
|
||
|
|
||
|
def __regions_russian(self, word):
|
||
|
"""
|
||
|
Return the regions RV and R2 which are used by the Russian stemmer.
|
||
|
|
||
|
In any word, RV is the region after the first vowel,
|
||
|
or the end of the word if it contains no vowel.
|
||
|
|
||
|
R2 is the region after the first non-vowel following
|
||
|
a vowel in R1, or the end of the word if there is no such non-vowel.
|
||
|
|
||
|
R1 is the region after the first non-vowel following a vowel,
|
||
|
or the end of the word if there is no such non-vowel.
|
||
|
|
||
|
:param word: The Russian word whose regions RV and R2 are determined.
|
||
|
:type word: str or unicode
|
||
|
:return: the regions RV and R2 for the respective Russian word.
|
||
|
:rtype: tuple
|
||
|
:note: This helper method is invoked by the stem method of the subclass
|
||
|
RussianStemmer. It is not to be invoked directly!
|
||
|
|
||
|
"""
|
||
|
r1 = ""
|
||
|
r2 = ""
|
||
|
rv = ""
|
||
|
|
||
|
vowels = ("A", "U", "E", "a", "e", "i", "o", "u", "y")
|
||
|
word = (word.replace("i^a", "A")
|
||
|
.replace("i^u", "U")
|
||
|
.replace("e`", "E"))
|
||
|
|
||
|
for i in range(1, len(word)):
|
||
|
if word[i] not in vowels and word[i - 1] in vowels:
|
||
|
r1 = word[i + 1:]
|
||
|
break
|
||
|
|
||
|
for i in range(1, len(r1)):
|
||
|
if r1[i] not in vowels and r1[i - 1] in vowels:
|
||
|
r2 = r1[i + 1:]
|
||
|
break
|
||
|
|
||
|
for i in range(len(word)):
|
||
|
if word[i] in vowels:
|
||
|
rv = word[i + 1:]
|
||
|
break
|
||
|
|
||
|
r2 = (r2.replace("A", "i^a")
|
||
|
.replace("U", "i^u")
|
||
|
.replace("E", "e`"))
|
||
|
rv = (rv.replace("A", "i^a")
|
||
|
.replace("U", "i^u")
|
||
|
.replace("E", "e`"))
|
||
|
return (rv, r2)
|
||
|
|
||
|
def __cyrillic_to_roman(self, word):
|
||
|
"""
|
||
|
Transliterate a Russian word into the Roman alphabet.
|
||
|
|
||
|
A Russian word whose letters consist of the Cyrillic
|
||
|
alphabet are transliterated into the Roman alphabet
|
||
|
in order to ease the forthcoming stemming process.
|
||
|
|
||
|
:param word: The word that is transliterated.
|
||
|
:type word: unicode
|
||
|
:return: the transliterated word.
|
||
|
:rtype: unicode
|
||
|
:note: This helper method is invoked by the stem method of the subclass
|
||
|
RussianStemmer. It is not to be invoked directly!
|
||
|
|
||
|
"""
|
||
|
word = (word.replace(u("\u0410"), "a").replace(u("\u0430"), "a")
|
||
|
.replace(u("\u0411"), "b").replace(u("\u0431"), "b")
|
||
|
.replace(u("\u0412"), "v").replace(u("\u0432"), "v")
|
||
|
.replace(u("\u0413"), "g").replace(u("\u0433"), "g")
|
||
|
.replace(u("\u0414"), "d").replace(u("\u0434"), "d")
|
||
|
.replace(u("\u0415"), "e").replace(u("\u0435"), "e")
|
||
|
.replace(u("\u0401"), "e").replace(u("\u0451"), "e")
|
||
|
.replace(u("\u0416"), "zh").replace(u("\u0436"), "zh")
|
||
|
.replace(u("\u0417"), "z").replace(u("\u0437"), "z")
|
||
|
.replace(u("\u0418"), "i").replace(u("\u0438"), "i")
|
||
|
.replace(u("\u0419"), "i`").replace(u("\u0439"), "i`")
|
||
|
.replace(u("\u041A"), "k").replace(u("\u043A"), "k")
|
||
|
.replace(u("\u041B"), "l").replace(u("\u043B"), "l")
|
||
|
.replace(u("\u041C"), "m").replace(u("\u043C"), "m")
|
||
|
.replace(u("\u041D"), "n").replace(u("\u043D"), "n")
|
||
|
.replace(u("\u041E"), "o").replace(u("\u043E"), "o")
|
||
|
.replace(u("\u041F"), "p").replace(u("\u043F"), "p")
|
||
|
.replace(u("\u0420"), "r").replace(u("\u0440"), "r")
|
||
|
.replace(u("\u0421"), "s").replace(u("\u0441"), "s")
|
||
|
.replace(u("\u0422"), "t").replace(u("\u0442"), "t")
|
||
|
.replace(u("\u0423"), "u").replace(u("\u0443"), "u")
|
||
|
.replace(u("\u0424"), "f").replace(u("\u0444"), "f")
|
||
|
.replace(u("\u0425"), "kh").replace(u("\u0445"), "kh")
|
||
|
.replace(u("\u0426"), "t^s").replace(u("\u0446"), "t^s")
|
||
|
.replace(u("\u0427"), "ch").replace(u("\u0447"), "ch")
|
||
|
.replace(u("\u0428"), "sh").replace(u("\u0448"), "sh")
|
||
|
.replace(u("\u0429"), "shch").replace(u("\u0449"), "shch")
|
||
|
.replace(u("\u042A"), "''").replace(u("\u044A"), "''")
|
||
|
.replace(u("\u042B"), "y").replace(u("\u044B"), "y")
|
||
|
.replace(u("\u042C"), "'").replace(u("\u044C"), "'")
|
||
|
.replace(u("\u042D"), "e`").replace(u("\u044D"), "e`")
|
||
|
.replace(u("\u042E"), "i^u").replace(u("\u044E"), "i^u")
|
||
|
.replace(u("\u042F"), "i^a").replace(u("\u044F"), "i^a"))
|
||
|
return word
|
||
|
|
||
|
def __roman_to_cyrillic(self, word):
|
||
|
"""
|
||
|
Transliterate a Russian word back into the Cyrillic alphabet.
|
||
|
|
||
|
A Russian word formerly transliterated into the Roman alphabet
|
||
|
in order to ease the stemming process, is transliterated back
|
||
|
into the Cyrillic alphabet, its original form.
|
||
|
|
||
|
:param word: The word that is transliterated.
|
||
|
:type word: str or unicode
|
||
|
:return: word, the transliterated word.
|
||
|
:rtype: unicode
|
||
|
:note: This helper method is invoked by the stem method of the subclass
|
||
|
RussianStemmer. It is not to be invoked directly!
|
||
|
|
||
|
"""
|
||
|
word = (word.replace("i^u", u("\u044E")).replace("i^a", u("\u044F"))
|
||
|
.replace("shch", u("\u0449")).replace("kh", u("\u0445"))
|
||
|
.replace("t^s", u("\u0446")).replace("ch", u("\u0447"))
|
||
|
.replace("e`", u("\u044D")).replace("i`", u("\u0439"))
|
||
|
.replace("sh", u("\u0448")).replace("k", u("\u043A"))
|
||
|
.replace("e", u("\u0435")).replace("zh", u("\u0436"))
|
||
|
.replace("a", u("\u0430")).replace("b", u("\u0431"))
|
||
|
.replace("v", u("\u0432")).replace("g", u("\u0433"))
|
||
|
.replace("d", u("\u0434")).replace("e", u("\u0435"))
|
||
|
.replace("z", u("\u0437")).replace("i", u("\u0438"))
|
||
|
.replace("l", u("\u043B")).replace("m", u("\u043C"))
|
||
|
.replace("n", u("\u043D")).replace("o", u("\u043E"))
|
||
|
.replace("p", u("\u043F")).replace("r", u("\u0440"))
|
||
|
.replace("s", u("\u0441")).replace("t", u("\u0442"))
|
||
|
.replace("u", u("\u0443")).replace("f", u("\u0444"))
|
||
|
.replace("''", u("\u044A")).replace("y", u("\u044B"))
|
||
|
.replace("'", u("\u044C")))
|
||
|
return word
|