Implement Belarusian language support. Closes #11710.

Contributed by Viktar Palstsiuk <vipals@gmail.com>. Signed-off-by: Maxime Petazzoni <maxime.petazzoni@bulix.org>
2013-08-28 09:44:35 -07:00 · 2013-08-28 09:44:35 -07:00 · a2783739e9
commit a2783739e9
--- a/ocitysmap/i18n.py
+++ b/ocitysmap/i18n.py
@ -556,6 +556,87 @@ class i18n_ru_generic(i18n):
    def first_letter_equal(self, a, b):
        return self.upper_unaccent_string(a) == self.upper_unaccent_string(b)

+class i18n_be_generic(i18n):
+    # Based on code for Russian language:
+    STATUS_PARTS = [
+        (u"вуліца", [u"вул"]),
+        (u"плошча", [u"пл"]),
+        (u"завулак", [u"зав", u"зав-к"]),
+        (u"праезд", [u"пр-д"]),
+        (u"шаша", [u"ш"]),
+        (u"бульвар", [u"бул", u"б-р"]),
+        (u"тупік", [u"туп"]),
+        (u"набярэжная", [u"наб"]),
+        (u"праспект", [u"праспект", u"пр-кт", u"пр-т"]),
+        (u"алея", []),
+        (u"мост", []),
+        (u"парк", []),
+        (u"тракт", [u"тр-т", u"тр"]),
+        (u"раён", [u"р-н"]),
+        (u"мікрараён", [u"мкр-н", u"мк-н", u"мкр", u"мкрн"]),
+        (u"пасёлак", [u"пас"]),
+        (u"вёска", [ u"в"]),
+        (u"квартал", [u"кв-л", u"кв"]),
+    ]
+
+    # matches one or more spaces
+    SPACE_REDUCE = re.compile(r"\s+")
+    # mapping from status abbreviations (w/o '.') to full status names
+    STATUS_PARTS_ABBREV_MAPPING = dict((f, t) for t, ff in STATUS_PARTS for f in ff)
+    # set of full (not abbreviated) status parts
+    STATUS_PARTS_FULL = set((x[0] for x in STATUS_PARTS))
+    # matches any abbreviated status part with optional '.'
+    STATUS_ABBREV_REGEXP = re.compile(r"\b(%s)\.?(?=\W|$)" % u"|".join(
+        f for t, ff in STATUS_PARTS for f in ff), re.IGNORECASE | re.UNICODE)
+    # matches status prefixes at start of name used to move prefixes to the end
+    PREFIX_REGEXP = re.compile(
+        ur"^(?P<num_prefix>\d+-?(і|ы|я))?\s*(?P<prefix>(%s)\.?)?\s*(?P<name>.+)?" %
+        (u"|".join(f for f,t in STATUS_PARTS)), re.IGNORECASE | re.UNICODE)
+
+    def __init__(self, language, locale_path):
+        self.language = str(language)
+        _install_language(language, locale_path)
+
+    def upper_unaccent_string(self, s):
+        return s.upper()
+
+    def language_code(self):
+        return self.language
+
+    @staticmethod
+    def _rewrite_street_parts(matches):
+        if (matches.group('num_prefix') is None and
+            matches.group('prefix') is not None and
+            matches.group('name') in i18n_be_generic.STATUS_PARTS_FULL):
+            return matches.group(0)
+        elif matches.group('num_prefix') is None and matches.group('prefix') is None:
+            return matches.group(0)
+        elif matches.group('name') is None:
+            return matches.group(0)
+        else:
+            #print matches.group('num_prefix', 'prefix', 'name')
+            return ", ".join((matches.group('name'),
+                " ". join(s.lower()
+                    for s in matches.group('num_prefix', 'prefix')
+                    if s is not None)
+                ))
+
+    def user_readable_street(self, name):
+        name = name.strip()
+        name = self.SPACE_REDUCE.sub(" ", name)
+        # Normalize abbreviations
+        name = self.STATUS_ABBREV_REGEXP.sub(lambda m:
+                self.STATUS_PARTS_ABBREV_MAPPING.get(
+                    m.group(0).replace('.', ''), m.group(0)),
+            name)
+        # Move prefixed status parts to the end for sorting
+        name = self.PREFIX_REGEXP.sub(self._rewrite_street_parts, name)
+        # TODO: move "малая", "большая" after name but before status
+        return name
+
+    def first_letter_equal(self, a, b):
+        return self.upper_unaccent_string(a) == self.upper_unaccent_string(b)
+
 class i18n_nl_generic(i18n):
    #
    # Dutch streets are often named after people and include a title.
@ -1031,6 +1112,7 @@ language_class_map = {
    'tr_TR.UTF-8': i18n_tr_generic,
    'ast_ES.UTF-8': i18n_ast_generic,
    'sk_SK.UTF-8': i18n_generic,
+    'be_BY.UTF-8': i18n_be_generic,
 }

 def install_translation(locale_name, locale_path):
--- a/ocitysmap/i18n_test.py
+++ b/ocitysmap/i18n_test.py
@ -68,5 +68,28 @@ class i18n_ru_generic_test(unittest.TestCase):
        for fr, to in conversions:
            self.assertEqual(to, self.r.user_readable_street(fr))

+class i18n_be_generic_test(unittest.TestCase):
+    def setUp(self):
+        self.r = i18n.i18n_be_generic('be', '')
+
+    def test_readable_street(self):
+        conversions = [
+            (u"праспект Незалежнасці", u"Незалежнасці, праспект"),
+            (u"Кастрычніцкая вуліца", u"Кастрычніцкая вуліца"),
+            (u"вуліца Янкі Купалы", u"Янкі Купалы, вуліца"),
+            (u"вуліца Раманаўская Слабада", u"Раманаўская Слабада, вуліца"),
+            (u"Аляксандраўскі сквер", u"Аляксандраўскі сквер"),
+            (u"Музычны завулак", u"Музычны завулак"),
+            (u"Парк Цівалі", u"Цівалі, парк"),
+            (u"вуліца 60 год БССР", u"60 год БССР, вуліца"),
+            (u"вул. 8 сакавіка", u"8 сакавіка, вуліца"),
+            (u"завулак Баўмана", u"Баўмана, завулак"),
+            (u"плошча 17 Верасня", u"17 Верасня, плошча"),
+            (u"пл. 17 Верасня", u"17 Верасня, плошча"),
+            (u"2-і завулак Цімашэнкі", u"Цімашэнкі, 2-і завулак"),
+        ]
+        for fr, to in conversions:
+            self.assertEqual(to, self.r.user_readable_street(fr))
+
 if __name__ == '__main__':
    unittest.main()