diff --git a/ocitysmap/i18n.py b/ocitysmap/i18n.py index 3dd13e8..2e6e069 100644 --- a/ocitysmap/i18n.py +++ b/ocitysmap/i18n.py @@ -556,6 +556,87 @@ class i18n_ru_generic(i18n): def first_letter_equal(self, a, b): return self.upper_unaccent_string(a) == self.upper_unaccent_string(b) +class i18n_be_generic(i18n): + # Based on code for Russian language: + STATUS_PARTS = [ + (u"вуліца", [u"вул"]), + (u"плошча", [u"пл"]), + (u"завулак", [u"зав", u"зав-к"]), + (u"праезд", [u"пр-д"]), + (u"шаша", [u"ш"]), + (u"бульвар", [u"бул", u"б-р"]), + (u"тупік", [u"туп"]), + (u"набярэжная", [u"наб"]), + (u"праспект", [u"праспект", u"пр-кт", u"пр-т"]), + (u"алея", []), + (u"мост", []), + (u"парк", []), + (u"тракт", [u"тр-т", u"тр"]), + (u"раён", [u"р-н"]), + (u"мікрараён", [u"мкр-н", u"мк-н", u"мкр", u"мкрн"]), + (u"пасёлак", [u"пас"]), + (u"вёска", [ u"в"]), + (u"квартал", [u"кв-л", u"кв"]), + ] + + # matches one or more spaces + SPACE_REDUCE = re.compile(r"\s+") + # mapping from status abbreviations (w/o '.') to full status names + STATUS_PARTS_ABBREV_MAPPING = dict((f, t) for t, ff in STATUS_PARTS for f in ff) + # set of full (not abbreviated) status parts + STATUS_PARTS_FULL = set((x[0] for x in STATUS_PARTS)) + # matches any abbreviated status part with optional '.' + STATUS_ABBREV_REGEXP = re.compile(r"\b(%s)\.?(?=\W|$)" % u"|".join( + f for t, ff in STATUS_PARTS for f in ff), re.IGNORECASE | re.UNICODE) + # matches status prefixes at start of name used to move prefixes to the end + PREFIX_REGEXP = re.compile( + ur"^(?P\d+-?(і|ы|я))?\s*(?P(%s)\.?)?\s*(?P.+)?" % + (u"|".join(f for f,t in STATUS_PARTS)), re.IGNORECASE | re.UNICODE) + + def __init__(self, language, locale_path): + self.language = str(language) + _install_language(language, locale_path) + + def upper_unaccent_string(self, s): + return s.upper() + + def language_code(self): + return self.language + + @staticmethod + def _rewrite_street_parts(matches): + if (matches.group('num_prefix') is None and + matches.group('prefix') is not None and + matches.group('name') in i18n_be_generic.STATUS_PARTS_FULL): + return matches.group(0) + elif matches.group('num_prefix') is None and matches.group('prefix') is None: + return matches.group(0) + elif matches.group('name') is None: + return matches.group(0) + else: + #print matches.group('num_prefix', 'prefix', 'name') + return ", ".join((matches.group('name'), + " ". join(s.lower() + for s in matches.group('num_prefix', 'prefix') + if s is not None) + )) + + def user_readable_street(self, name): + name = name.strip() + name = self.SPACE_REDUCE.sub(" ", name) + # Normalize abbreviations + name = self.STATUS_ABBREV_REGEXP.sub(lambda m: + self.STATUS_PARTS_ABBREV_MAPPING.get( + m.group(0).replace('.', ''), m.group(0)), + name) + # Move prefixed status parts to the end for sorting + name = self.PREFIX_REGEXP.sub(self._rewrite_street_parts, name) + # TODO: move "малая", "большая" after name but before status + return name + + def first_letter_equal(self, a, b): + return self.upper_unaccent_string(a) == self.upper_unaccent_string(b) + class i18n_nl_generic(i18n): # # Dutch streets are often named after people and include a title. @@ -1031,6 +1112,7 @@ language_class_map = { 'tr_TR.UTF-8': i18n_tr_generic, 'ast_ES.UTF-8': i18n_ast_generic, 'sk_SK.UTF-8': i18n_generic, + 'be_BY.UTF-8': i18n_be_generic, } def install_translation(locale_name, locale_path): diff --git a/ocitysmap/i18n_test.py b/ocitysmap/i18n_test.py old mode 100644 new mode 100755 index 153f467..bb17fd5 --- a/ocitysmap/i18n_test.py +++ b/ocitysmap/i18n_test.py @@ -68,5 +68,28 @@ class i18n_ru_generic_test(unittest.TestCase): for fr, to in conversions: self.assertEqual(to, self.r.user_readable_street(fr)) +class i18n_be_generic_test(unittest.TestCase): + def setUp(self): + self.r = i18n.i18n_be_generic('be', '') + + def test_readable_street(self): + conversions = [ + (u"праспект Незалежнасці", u"Незалежнасці, праспект"), + (u"Кастрычніцкая вуліца", u"Кастрычніцкая вуліца"), + (u"вуліца Янкі Купалы", u"Янкі Купалы, вуліца"), + (u"вуліца Раманаўская Слабада", u"Раманаўская Слабада, вуліца"), + (u"Аляксандраўскі сквер", u"Аляксандраўскі сквер"), + (u"Музычны завулак", u"Музычны завулак"), + (u"Парк Цівалі", u"Цівалі, парк"), + (u"вуліца 60 год БССР", u"60 год БССР, вуліца"), + (u"вул. 8 сакавіка", u"8 сакавіка, вуліца"), + (u"завулак Баўмана", u"Баўмана, завулак"), + (u"плошча 17 Верасня", u"17 Верасня, плошча"), + (u"пл. 17 Верасня", u"17 Верасня, плошча"), + (u"2-і завулак Цімашэнкі", u"Цімашэнкі, 2-і завулак"), + ] + for fr, to in conversions: + self.assertEqual(to, self.r.user_readable_street(fr)) + if __name__ == '__main__': unittest.main()