Dual indices.

2019-09-08 12:55:09 +01:00 · 2019-09-08 12:55:09 +01:00 · dfb80d975d
commit dfb80d975d
--- a/FONT_TO_PY.md
+++ b/FONT_TO_PY.md
@ -5,12 +5,12 @@ is to save RAM on resource-limited targets: the font file may be incorporated
 into a firmware build such that it occupies flash memory rather than scarce
 RAM. Python code built into firmware is known as frozen bytecode.

-## V0.27/0.28 notes
+## V0.3 notes

-7 Sept 2019
+8 Sept 2019

-Remove redundancy from index file: significantly reduces file size for sparse
-fonts. Add a comment field in the output file showing creation command line.
+Remove redundancy from index file. Emit extra index for sparse fonts, reducing
+code size. Add comment field in the output file showing creation command line.
 Repo includes the file `extended`. This facilitates creating fonts comprising
 the printable ASCII set plus `°μπωϕθαβγδλΩ`. Improvements to `font_test.py`.

@ -207,15 +207,19 @@ With a font of height 20 pixels RAM saving was an order of magnitude. The
 saving will be greater if larger fonts are used as RAM usage is independent of
 the array sizes.

-# Appendix 2: room for improvement
+# Appendix 2: Recent improvements

-The representation of non-contiguous character sets having large gaps (such as
-the `extended` set) is not very efficient. This is because the index table
-becomes sparse. This matters little if the font is to be frozen as bytecode
-because the index is located in Flash rather than RAM.
+The representation of non-contiguous character sets such as the `extended` set
+presents a challenge because the ordinal values of the Unicode characters can
+be expected to span a range much greater than the number of characters in the
+set. Using an index of the type used for the ASCII set would be inefficient as
+most of the elements would be null (pointing to the default character).

-I have implemented a change which removes redundancy in the index file. Further
-improvements would require a further level of indirection which would have the
-drawback of increasing the size of small contiguous character sets - or
-emitting two file formats with the same API. The latter does not appeal from a
-support perspective.
+The code now behaves as follows. If the character set contains no more than 95
+characters (including the default) the emitted Python file is as before. This
+keeps the code small and efficient for the common (default) case).
+
+Larger character sets are assumed to be sparse. Characters with ordinal values
+which place them in the first 95 characters are looked up using the normal
+index. Those above use an index optimised for sparse values and a binary search
+algorithm.
--- a/font_to_py.py
+++ b/font_to_py.py
@ -34,6 +34,11 @@ import sys
 import os
 import freetype

+MINCHAR = 32  # Ordinal values of default printable ASCII set
+MAXCHAR = 126  # 94 chars
+# By default there will be 94 ASCII characters + the default char in element[0]
+ASSUME_SPARSE = MAXCHAR - MINCHAR + 1
+
 # UTILITIES FOR WRITING PYTHON SOURCECODE TO A FILE

 # ByteWriter takes as input a variable name and data values and writes
@ -42,7 +47,6 @@ import freetype

 # Lines are broken with \ for readability.

-
 class ByteWriter(object):
    bytes_per_line = 16

@ -265,6 +269,7 @@ class Font(dict):
        # .def_charset is requested charset or '' if -c was not specified
        self.def_charset = charset
        # .charset has all defined characters with '' for those in range but undefined.
+        # Sort order is increasing ordinal value of the character whether defined or not.
        if defchar is None: # Binary font
            self.charset = [chr(char) for char in range(minchar, maxchar + 1)]
        elif charset == '':
@ -273,7 +278,7 @@ class Font(dict):
            n = sorted([ord(x) for x in chr(defchar) + charset])
            self.minchar = n[0]
            self.maxchar = n[-1]
-            self.charset = [chr(defchar)] + [chr(char) if chr(char) in charset else '' for char in range(n[0], n[-1] + 1)]
+            self.charset = [chr(defchar)] + [chr(ordch) if chr(ordch) in charset else '' for ordch in range(n[0], n[-1] + 1)]
        # .pop_charset has only the defined characters
        self.pop_charset = [c for c in self.charset if c]
        self.max_width = self.get_dimensions(size)
@ -344,17 +349,31 @@ class Font(dict):

    def build_arrays(self, hmap, reverse):
        data = bytearray()
-        index = bytearray() #((0, 0))
-        for char in self.charset:
-            if char == '':
-                index += bytearray((0, 0))
-            else:
-                index += (len(data)).to_bytes(2, byteorder='little')  # Start
-                width = self[char][1]
-                data += (width).to_bytes(2, byteorder='little')
-                data += bytearray(self.stream_char(char, hmap, reverse))
+        index = bytearray()
+        sparse = bytearray()
+        def append_data(data, char):
+            width = self[char][1]
+            data += (width).to_bytes(2, byteorder='little')
+            data += bytearray(self.stream_char(char, hmap, reverse))
+
+        for n, char in enumerate(self.charset):
+            # n = 1 + ord(char) - ord(smallest char in set)
+            # Build normal index for default char + 1st 94 chars. Efficient for
+            # ASCII set.
+            if n <= ASSUME_SPARSE:
+                if char == '':
+                    index += bytearray((0, 0))
+                else:
+                    index += (len(data)).to_bytes(2, byteorder='little')  # Start
+                    append_data(data, char)
+            elif char != '':
+                # Build sparse index. Entries are 4 bytes but only populated if
+                # the char is in the charset.
+                sparse += ord(char).to_bytes(2, byteorder='little')
+                sparse += (len(data)).to_bytes(2, byteorder='little')  # Start
+                append_data(data, char)
        index += (len(data)).to_bytes(2, byteorder='little')  # End
-        return data, index
+        return data, index, sparse

    def build_binary_array(self, hmap, reverse, sig):
        data = bytearray((0x3f + sig, 0xe7, self.max_width, self.height))
@ -371,10 +390,11 @@ class Font(dict):
 STR01 = """# Code generated by font-to-py.py.
 # Font: {}{}
 # Cmd: {}
-version = '0.28'
+version = '0.3'

 """

+# Code emitted for charsets comprising <= 95 chars (including default)
 STR02 = """_mvfont = memoryview(_font)

 def get_ch(ch):
@ -387,6 +407,30 @@ def get_ch(ch):
    width = int.from_bytes(_font[offset:offset + 2], 'little')
 """

+# Code emiited for large charsets, assumed by build_arrays() to be sparse
+STRSP = """_mvfont = memoryview(_font)
+_mvsp = memoryview(_sparse)
+
+def bins(lst, val):
+    n = len(lst) // 4
+    if n == 1:
+        v = int.from_bytes(lst[: 2], 'little')
+        return int.from_bytes(lst[2 : 4], 'little') if v == val else 0
+    sp = (n // 2) * 4
+    res = bins(lst[: sp], val)
+    return res if res else bins(lst[sp :], val)
+
+def get_ch(ch):
+    ordch = ord(ch)
+    if ordch < {1}:
+        idx_offs = 2 * (ordch - {0} + 1) if ordch >= {0} else 0
+        offset = int.from_bytes(_index[idx_offs : idx_offs + 2], 'little')
+    else:
+        offset = bins(_mvsp, ordch)
+    width = int.from_bytes(_font[offset : offset + 2], 'little')
+"""
+
+
 STR02H ="""
    next_offs = offset + 2 + ((width - 1)//8 + 1) * {0}
    return _mvfont[offset + 2:next_offs], {0}, width
@ -402,8 +446,6 @@ STR02V ="""
 def write_func(stream, name, arg):
    stream.write('def {}():\n    return {}\n\n'.format(name, arg))

-# filename, size, minchar=32, maxchar=126, monospaced=False, defchar=ord('?'):
-
 def write_font(op_path, font_path, height, monospaced, hmap, reverse, minchar, maxchar, defchar, charset, iterate):
    try:
        fnt = Font(font_path, height, minchar, maxchar, monospaced, defchar, charset)
@ -443,14 +485,20 @@ def write_data(stream, fnt, font_path, hmap, reverse, iterate):
    write_func(stream, 'max_ch', maxchar)
    if iterate:
        stream.write(STR03.format(''.join(fnt.pop_charset)))
-    data, index = fnt.build_arrays(hmap, reverse)
+    data, index, sparse = fnt.build_arrays(hmap, reverse)
    bw_font = ByteWriter(stream, '_font')
    bw_font.odata(data)
    bw_font.eot()
    bw_index = ByteWriter(stream, '_index')
    bw_index.odata(index)
    bw_index.eot()
-    stream.write(STR02.format(minchar, maxchar, minchar))
+    if sparse:  # build_arrays() has returned a sparse index
+        bw_sparse = ByteWriter(stream, '_sparse')
+        bw_sparse.odata(sparse)
+        bw_sparse.eot()
+        stream.write(STRSP.format(minchar, minchar + ASSUME_SPARSE, len(sparse)))
+    else:
+        stream.write(STR02.format(minchar, maxchar))
    if hmap:
        stream.write(STR02H.format(height))
    else:
@ -525,13 +573,13 @@ if __name__ == "__main__":

    parser.add_argument('-s', '--smallest',
                        type = int,
-                        default = 32,
+                        default = MINCHAR,
                        help = 'Ordinal value of smallest character default %(default)i')

    parser.add_argument('-l', '--largest',
                        type = int,
                        help = 'Ordinal value of largest character default %(default)i',
-                        default = 126)
+                        default = MAXCHAR)

    parser.add_argument('-e', '--errchar',
                        type = int,