Dual indices.

2019-09-08 12:55:09 +01:00 · 2019-09-08 12:55:09 +01:00 · dfb80d975d
commit dfb80d975d
--- a/FONT_TO_PY.md
+++ b/FONT_TO_PY.md
@ -5,12 +5,12 @@ is to save RAM on resource-limited targets: the font file may be incorporated
 into a firmware build such that it occupies flash memory rather than scarce
 RAM. Python code built into firmware is known as frozen bytecode.
-## V0.27/0.28 notes
+## V0.3 notes
-7 Sept 2019
+8 Sept 2019
-Remove redundancy from index file: significantly reduces file size for sparse
+Remove redundancy from index file. Emit extra index for sparse fonts, reducing
-fonts. Add a comment field in the output file showing creation command line.
+code size. Add comment field in the output file showing creation command line.
 Repo includes the file `extended`. This facilitates creating fonts comprising
 the printable ASCII set plus `°μπωϕθαβγδλΩ`. Improvements to `font_test.py`.
@ -207,15 +207,19 @@ With a font of height 20 pixels RAM saving was an order of magnitude. The
 saving will be greater if larger fonts are used as RAM usage is independent of
 the array sizes.
-# Appendix 2: room for improvement
+# Appendix 2: Recent improvements
-The representation of non-contiguous character sets having large gaps (such as
+The representation of non-contiguous character sets such as the `extended` set
-the `extended` set) is not very efficient. This is because the index table
+presents a challenge because the ordinal values of the Unicode characters can
-becomes sparse. This matters little if the font is to be frozen as bytecode
+be expected to span a range much greater than the number of characters in the
-because the index is located in Flash rather than RAM.
+set. Using an index of the type used for the ASCII set would be inefficient as
 most of the elements would be null (pointing to the default character).
-I have implemented a change which removes redundancy in the index file. Further
+The code now behaves as follows. If the character set contains no more than 95
-improvements would require a further level of indirection which would have the
+characters (including the default) the emitted Python file is as before. This
-drawback of increasing the size of small contiguous character sets - or
+keeps the code small and efficient for the common (default) case).
-emitting two file formats with the same API. The latter does not appeal from a
+
-support perspective.
+Larger character sets are assumed to be sparse. Characters with ordinal values
 which place them in the first 95 characters are looked up using the normal
 index. Those above use an index optimised for sparse values and a binary search
 algorithm.
--- a/font_to_py.py
+++ b/font_to_py.py
@ -34,6 +34,11 @@ import sys
 import os
 import freetype
 MINCHAR = 32  # Ordinal values of default printable ASCII set
 MAXCHAR = 126  # 94 chars
 # By default there will be 94 ASCII characters + the default char in element[0]
 ASSUME_SPARSE = MAXCHAR - MINCHAR + 1
 # UTILITIES FOR WRITING PYTHON SOURCECODE TO A FILE
 # ByteWriter takes as input a variable name and data values and writes
@ -42,7 +47,6 @@ import freetype
 # Lines are broken with \ for readability.
 class ByteWriter(object):
    bytes_per_line = 16
@ -265,6 +269,7 @@ class Font(dict):
        # .def_charset is requested charset or '' if -c was not specified
        self.def_charset = charset
        # .charset has all defined characters with '' for those in range but undefined.
        # Sort order is increasing ordinal value of the character whether defined or not.
        if defchar is None: # Binary font
            self.charset = [chr(char) for char in range(minchar, maxchar + 1)]
        elif charset == '':
@ -273,7 +278,7 @@ class Font(dict):
            n = sorted([ord(x) for x in chr(defchar) + charset])
            self.minchar = n[0]
            self.maxchar = n[-1]
-            self.charset = [chr(defchar)] + [chr(char) if chr(char) in charset else '' for char in range(n[0], n[-1] + 1)]
+            self.charset = [chr(defchar)] + [chr(ordch) if chr(ordch) in charset else '' for ordch in range(n[0], n[-1] + 1)]
        # .pop_charset has only the defined characters
        self.pop_charset = [c for c in self.charset if c]
        self.max_width = self.get_dimensions(size)
@ -344,17 +349,31 @@ class Font(dict):
    def build_arrays(self, hmap, reverse):
        data = bytearray()
-        index = bytearray() #((0, 0))
+        index = bytearray()
-        for char in self.charset:
+        sparse = bytearray()
-            if char == '':
+        def append_data(data, char):
-                index += bytearray((0, 0))
+            width = self[char][1]
-            else:
+            data += (width).to_bytes(2, byteorder='little')
-                index += (len(data)).to_bytes(2, byteorder='little')  # Start
+            data += bytearray(self.stream_char(char, hmap, reverse))
-                width = self[char][1]
+
-                data += (width).to_bytes(2, byteorder='little')
+        for n, char in enumerate(self.charset):
-                data += bytearray(self.stream_char(char, hmap, reverse))
+            # n = 1 + ord(char) - ord(smallest char in set)
            # Build normal index for default char + 1st 94 chars. Efficient for
            # ASCII set.
            if n <= ASSUME_SPARSE:
                if char == '':
                    index += bytearray((0, 0))
                else:
                    index += (len(data)).to_bytes(2, byteorder='little')  # Start
                    append_data(data, char)
            elif char != '':
                # Build sparse index. Entries are 4 bytes but only populated if
                # the char is in the charset.
                sparse += ord(char).to_bytes(2, byteorder='little')
                sparse += (len(data)).to_bytes(2, byteorder='little')  # Start
                append_data(data, char)
        index += (len(data)).to_bytes(2, byteorder='little')  # End
-        return data, index
+        return data, index, sparse
    def build_binary_array(self, hmap, reverse, sig):
        data = bytearray((0x3f + sig, 0xe7, self.max_width, self.height))
@ -371,10 +390,11 @@ class Font(dict):
 STR01 = """# Code generated by font-to-py.py.
 # Font: {}{}
 # Cmd: {}
-version = '0.28'
+version = '0.3'
 """
 # Code emitted for charsets comprising <= 95 chars (including default)
 STR02 = """_mvfont = memoryview(_font)
 def get_ch(ch):
@ -387,6 +407,30 @@ def get_ch(ch):
    width = int.from_bytes(_font[offset:offset + 2], 'little')
 """
 # Code emiited for large charsets, assumed by build_arrays() to be sparse
 STRSP = """_mvfont = memoryview(_font)
 _mvsp = memoryview(_sparse)
 def bins(lst, val):
    n = len(lst) // 4
    if n == 1:
        v = int.from_bytes(lst[: 2], 'little')
        return int.from_bytes(lst[2 : 4], 'little') if v == val else 0
    sp = (n // 2) * 4
    res = bins(lst[: sp], val)
    return res if res else bins(lst[sp :], val)
 def get_ch(ch):
    ordch = ord(ch)
    if ordch < {1}:
        idx_offs = 2 * (ordch - {0} + 1) if ordch >= {0} else 0
        offset = int.from_bytes(_index[idx_offs : idx_offs + 2], 'little')
    else:
        offset = bins(_mvsp, ordch)
    width = int.from_bytes(_font[offset : offset + 2], 'little')
 """
 STR02H ="""
    next_offs = offset + 2 + ((width - 1)//8 + 1) * {0}
    return _mvfont[offset + 2:next_offs], {0}, width
@ -402,8 +446,6 @@ STR02V ="""
 def write_func(stream, name, arg):
    stream.write('def {}():\n    return {}\n\n'.format(name, arg))
 # filename, size, minchar=32, maxchar=126, monospaced=False, defchar=ord('?'):
 def write_font(op_path, font_path, height, monospaced, hmap, reverse, minchar, maxchar, defchar, charset, iterate):
    try:
        fnt = Font(font_path, height, minchar, maxchar, monospaced, defchar, charset)
@ -443,14 +485,20 @@ def write_data(stream, fnt, font_path, hmap, reverse, iterate):
    write_func(stream, 'max_ch', maxchar)
    if iterate:
        stream.write(STR03.format(''.join(fnt.pop_charset)))
-    data, index = fnt.build_arrays(hmap, reverse)
+    data, index, sparse = fnt.build_arrays(hmap, reverse)
    bw_font = ByteWriter(stream, '_font')
    bw_font.odata(data)
    bw_font.eot()
    bw_index = ByteWriter(stream, '_index')
    bw_index.odata(index)
    bw_index.eot()
-    stream.write(STR02.format(minchar, maxchar, minchar))
+    if sparse:  # build_arrays() has returned a sparse index
        bw_sparse = ByteWriter(stream, '_sparse')
        bw_sparse.odata(sparse)
        bw_sparse.eot()
        stream.write(STRSP.format(minchar, minchar + ASSUME_SPARSE, len(sparse)))
    else:
        stream.write(STR02.format(minchar, maxchar))
    if hmap:
        stream.write(STR02H.format(height))
    else:
@ -525,13 +573,13 @@ if __name__ == "__main__":
    parser.add_argument('-s', '--smallest',
                        type = int,
-                        default = 32,
+                        default = MINCHAR,
                        help = 'Ordinal value of smallest character default %(default)i')
    parser.add_argument('-l', '--largest',
                        type = int,
                        help = 'Ordinal value of largest character default %(default)i',
-                        default = 126)
+                        default = MAXCHAR)
    parser.add_argument('-e', '--errchar',
                        type = int,