""" Process raw qstr file and output qstr data with length, hash and data bytes. This script works with Python 2.6, 2.7, 3.3 and 3.4. """ from __future__ import print_function import re import sys # Python 2/3 compatibility: # - iterating through bytes is different # - codepoint2name lives in a different module import platform if platform.python_version_tuple()[0] == '2': bytes_cons = lambda val, enc=None: bytearray(val) from htmlentitydefs import codepoint2name elif platform.python_version_tuple()[0] == '3': bytes_cons = bytes from html.entities import codepoint2name # end compatibility code codepoint2name[ord('-')] = 'hyphen'; # add some custom names to map characters that aren't in HTML codepoint2name[ord(' ')] = 'space' codepoint2name[ord('\'')] = 'squot' codepoint2name[ord(',')] = 'comma' codepoint2name[ord('.')] = 'dot' codepoint2name[ord(':')] = 'colon' codepoint2name[ord(';')] = 'semicolon' codepoint2name[ord('/')] = 'slash' codepoint2name[ord('%')] = 'percent' codepoint2name[ord('#')] = 'hash' codepoint2name[ord('(')] = 'paren_open' codepoint2name[ord(')')] = 'paren_close' codepoint2name[ord('[')] = 'bracket_open' codepoint2name[ord(']')] = 'bracket_close' codepoint2name[ord('{')] = 'brace_open' codepoint2name[ord('}')] = 'brace_close' codepoint2name[ord('*')] = 'star' codepoint2name[ord('!')] = 'bang' codepoint2name[ord('\\')] = 'backslash' codepoint2name[ord('+')] = 'plus' codepoint2name[ord('$')] = 'dollar' codepoint2name[ord('=')] = 'equals' codepoint2name[ord('?')] = 'question' codepoint2name[ord('@')] = 'at_sign' codepoint2name[ord('^')] = 'caret' codepoint2name[ord('|')] = 'pipe' codepoint2name[ord('~')] = 'tilde' # this must match the equivalent function in qstr.c def compute_hash(qstr, bytes_hash): hash = 5381 for b in qstr: hash = (hash * 33) ^ b # Make sure that valid hash is never zero, zero means "hash not computed" return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1 def qstr_escape(qst): def esc_char(m): c = ord(m.group(0)) try: name = codepoint2name[c] except KeyError: name = '0x%02x' % c return "_" + name + '_' return re.sub(r'[^A-Za-z0-9_]', esc_char, qst) def parse_input_headers(infiles): # read the qstrs in from the input files qcfgs = {} qstrs = {} for infile in infiles: with open(infile, 'rt') as f: for line in f: line = line.strip() # is this a config line? match = re.match(r'^QCFG\((.+), (.+)\)', line) if match: value = match.group(2) if value[0] == '(' and value[-1] == ')': # strip parenthesis from config value value = value[1:-1] qcfgs[match.group(1)] = value continue # is this a QSTR line? match = re.match(r'^Q\((.*)\)$', line) if not match: continue # get the qstr value qstr = match.group(1) # special case to specify control characters if qstr == '\\n': qstr = '\n' # work out the corresponding qstr name ident = qstr_escape(qstr) # don't add duplicates if ident in qstrs: continue # add the qstr to the list, with order number to retain original order in file order = len(qstrs) # but put special method names like __add__ at the top of list, so # that their id's fit into a byte if ident == "": # Sort empty qstr above all still order = -200000 elif ident == "__dir__": # Put __dir__ after empty qstr for builtin dir() to work order = -190000 elif ident.startswith("__"): order -= 100000 qstrs[ident] = (order, ident, qstr) if not qcfgs: sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n") sys.exit(1) return qcfgs, qstrs def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr): qbytes = bytes_cons(qstr, 'utf8') qlen = len(qbytes) qhash = compute_hash(qbytes, cfg_bytes_hash) if all(32 <= ord(c) <= 126 and c != '\\' and c != '"' for c in qstr): # qstr is all printable ASCII so render it as-is (for easier debugging) qdata = qstr else: # qstr contains non-printable codes so render entire thing as hex pairs qdata = ''.join(('\\x%02x' % b) for b in qbytes) if qlen >= (1 << (8 * cfg_bytes_len)): print('qstr is too long:', qstr) assert False qlen_str = ('\\x%02x' * cfg_bytes_len) % tuple(((qlen >> (8 * i)) & 0xff) for i in range(cfg_bytes_len)) qhash_str = ('\\x%02x' * cfg_bytes_hash) % tuple(((qhash >> (8 * i)) & 0xff) for i in range(cfg_bytes_hash)) return '(const byte*)"%s%s" "%s"' % (qhash_str, qlen_str, qdata) def print_qstr_data(qcfgs, qstrs): # get config variables cfg_bytes_len = int(qcfgs['BYTES_IN_LEN']) cfg_bytes_hash = int(qcfgs['BYTES_IN_HASH']) # print out the starter of the generated C header file print('// This file was automatically generated by makeqstrdata.py') print('') # add NULL qstr with no hash or data print('QDEF(MP_QSTR_NULL, (const byte*)"%s%s" "")' % ('\\x00' * cfg_bytes_hash, '\\x00' * cfg_bytes_len)) # go through each qstr and print it out for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]): qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr) print('QDEF(MP_QSTR_%s, %s)' % (ident, qbytes)) def do_work(infiles): qcfgs, qstrs = parse_input_headers(infiles) print_qstr_data(qcfgs, qstrs) if __name__ == "__main__": do_work(sys.argv[1:])