allow some arabic voyels (#3501)

* Update font_variant.py

start allowing voyel
use category to recognise punctuation

* Update font_variant.py

* Update font_variant.py

make it cleaner
pull/3504/head dev-build-claudine-waw_with_hamza_is_non_connecting
Claudine Peyrat 2025-02-12 19:16:57 +01:00 zatwierdzone przez GitHub
rodzic 9ac55934fa
commit 097cf52573
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: B5690EEEBB952194
1 zmienionych plików z 23 dodań i 15 usunięć

Wyświetl plik

@ -5,7 +5,7 @@
import os
from collections import defaultdict
from unicodedata import normalize
from unicodedata import normalize, category
import inkex
@ -139,18 +139,22 @@ class FontVariant(object):
glyph_selection = [glyph_name for glyph_name, glyph_layer in self.glyphs.items() if glyph_name.startswith(character)]
return sorted(glyph_selection, key=lambda glyph: (len(glyph.split('.')[0]), len(glyph)), reverse=True)
def isbinding(self, character):
def is_binding(self, character):
# after a non binding letter a letter can only be in isol or fina shape.
# binding glyph only have two shapes, isol and fina
non_binding_char = ['ا', 'أ', '', 'آ', 'ٱ', 'د', 'ذ', 'ر', 'ز', 'و']
normalized_non_binding_char = [normalize('NFKC', letter) for letter in non_binding_char]
return not (character in normalized_non_binding_char)
def ispunctuation(self, character):
# punctuation sign are not considered as part of the word. They onnly have one shape
punctuation_signs = ['؟', '،', '.', ',', ';', '.', '!', ':', '؛']
normalized_punctuation_signs = [normalize('NFKC', letter) for letter in punctuation_signs]
return (character in normalized_punctuation_signs)
def is_mark(self, character):
# this category includes all the combining diacritics.
return (category(character)[0] == 'M')
def is_letter(self, character):
return (category(character)[0] == 'L')
def get_glyph(self, character, word):
"""
@ -169,26 +173,27 @@ class FontVariant(object):
# in arabic each letter (or ligature) may have up to 4 different shapes, hence 4 glyphs
# this computes the shape of the glyph that represents word[starting:ending+1]
# punctuation is not really part of the word
# punctuation or a combining accent is not really part of the word
# they may appear at begining or end of words
# computes where the actual word begins and ends up
last_char_index = len(word)-1
first_char_index = 0
while self.ispunctuation(word[last_char_index]):
while not self.is_letter(word[last_char_index]):
last_char_index = last_char_index - 1
while self.ispunctuation(word[first_char_index]):
while not self.is_letter(word[first_char_index]):
first_char_index = first_char_index + 1
# first glyph is eithher isol or init depending wether it is also the last glyph of the actual word
# first glyph is either isol or init depending if it is also the last glyph of the actual word
if starting == first_char_index:
if not self.isbinding(word[ending]) or len(word) == 1:
if not self.is_binding(word[ending]) or len(word) == 1:
shape = 'isol'
else:
shape = 'init'
# last glyph is final if previous is binding, isol otherwise
# a non binding glyph behaves like the last glyph
elif ending == last_char_index or not self.isbinding(word[ending]):
elif ending == last_char_index or not self.is_binding(word[ending]):
if previous_is_binding:
shape = 'fina'
else:
@ -211,14 +216,17 @@ class FontVariant(object):
for glyph in glyph_selection:
glyph_name = glyph.split('.')
if len(glyph_name) == 2 and glyph_name[1] in ['isol', 'init', 'medi', 'fina']:
is_binding = self.isbinding(glyph_name[0][-1])
is_binding = self.is_binding(glyph_name[0][-1])
if len(word) < i + len(glyph_name[0]):
continue
shape = self.get_next_glyph_shape(word, i, i + len(glyph_name[0]) - 1, previous_is_binding)
if glyph_name[1] == shape and word[i:].startswith(glyph_name[0]):
return self.glyphs[glyph], len(glyph_name[0]), is_binding
elif word[i:].startswith(glyph):
return self.glyphs[glyph], len(glyph), True
if self.is_mark(word[i]):
return self.glyphs[glyph], len(glyph), previous_is_binding
else:
return self.glyphs[glyph], len(glyph), True
# nothing was found
return self.glyphs.get(self.default_glyph, None), 1, True