forked from xuos/xiuos
				
			
		
			
				
	
	
		
			370 lines
		
	
	
		
			8.9 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			370 lines
		
	
	
		
			8.9 KiB
		
	
	
	
		
			Python
		
	
	
	
"""
 | 
						|
Process raw qstr file and output qstr data with length, hash and data bytes.
 | 
						|
 | 
						|
This script works with Python 2.6, 2.7, 3.3 and 3.4.
 | 
						|
"""
 | 
						|
 | 
						|
from __future__ import print_function
 | 
						|
 | 
						|
import re
 | 
						|
import sys
 | 
						|
 | 
						|
# Python 2/3 compatibility:
 | 
						|
#   - iterating through bytes is different
 | 
						|
#   - codepoint2name lives in a different module
 | 
						|
import platform
 | 
						|
 | 
						|
if platform.python_version_tuple()[0] == "2":
 | 
						|
    bytes_cons = lambda val, enc=None: bytearray(val)
 | 
						|
    from htmlentitydefs import codepoint2name
 | 
						|
elif platform.python_version_tuple()[0] == "3":
 | 
						|
    bytes_cons = bytes
 | 
						|
    from html.entities import codepoint2name
 | 
						|
# end compatibility code
 | 
						|
 | 
						|
codepoint2name[ord("-")] = "hyphen"
 | 
						|
 | 
						|
# add some custom names to map characters that aren't in HTML
 | 
						|
codepoint2name[ord(" ")] = "space"
 | 
						|
codepoint2name[ord("'")] = "squot"
 | 
						|
codepoint2name[ord(",")] = "comma"
 | 
						|
codepoint2name[ord(".")] = "dot"
 | 
						|
codepoint2name[ord(":")] = "colon"
 | 
						|
codepoint2name[ord(";")] = "semicolon"
 | 
						|
codepoint2name[ord("/")] = "slash"
 | 
						|
codepoint2name[ord("%")] = "percent"
 | 
						|
codepoint2name[ord("#")] = "hash"
 | 
						|
codepoint2name[ord("(")] = "paren_open"
 | 
						|
codepoint2name[ord(")")] = "paren_close"
 | 
						|
codepoint2name[ord("[")] = "bracket_open"
 | 
						|
codepoint2name[ord("]")] = "bracket_close"
 | 
						|
codepoint2name[ord("{")] = "brace_open"
 | 
						|
codepoint2name[ord("}")] = "brace_close"
 | 
						|
codepoint2name[ord("*")] = "star"
 | 
						|
codepoint2name[ord("!")] = "bang"
 | 
						|
codepoint2name[ord("\\")] = "backslash"
 | 
						|
codepoint2name[ord("+")] = "plus"
 | 
						|
codepoint2name[ord("$")] = "dollar"
 | 
						|
codepoint2name[ord("=")] = "equals"
 | 
						|
codepoint2name[ord("?")] = "question"
 | 
						|
codepoint2name[ord("@")] = "at_sign"
 | 
						|
codepoint2name[ord("^")] = "caret"
 | 
						|
codepoint2name[ord("|")] = "pipe"
 | 
						|
codepoint2name[ord("~")] = "tilde"
 | 
						|
 | 
						|
# static qstrs, should be sorted
 | 
						|
 | 
						|
static_qstr_list = [
 | 
						|
    "",
 | 
						|
    "__dir__",  # Put __dir__ after empty qstr for builtin dir() to work
 | 
						|
    "\n",
 | 
						|
    " ",
 | 
						|
    "*",
 | 
						|
    "/",
 | 
						|
    "<module>",
 | 
						|
    "_",
 | 
						|
    "__call__",
 | 
						|
    "__class__",
 | 
						|
    "__delitem__",
 | 
						|
    "__enter__",
 | 
						|
    "__exit__",
 | 
						|
    "__getattr__",
 | 
						|
    "__getitem__",
 | 
						|
    "__hash__",
 | 
						|
    "__init__",
 | 
						|
    "__int__",
 | 
						|
    "__iter__",
 | 
						|
    "__len__",
 | 
						|
    "__main__",
 | 
						|
    "__module__",
 | 
						|
    "__name__",
 | 
						|
    "__new__",
 | 
						|
    "__next__",
 | 
						|
    "__qualname__",
 | 
						|
    "__repr__",
 | 
						|
    "__setitem__",
 | 
						|
    "__str__",
 | 
						|
    "ArithmeticError",
 | 
						|
    "AssertionError",
 | 
						|
    "AttributeError",
 | 
						|
    "BaseException",
 | 
						|
    "EOFError",
 | 
						|
    "Ellipsis",
 | 
						|
    "Exception",
 | 
						|
    "GeneratorExit",
 | 
						|
    "ImportError",
 | 
						|
    "IndentationError",
 | 
						|
    "IndexError",
 | 
						|
    "KeyError",
 | 
						|
    "KeyboardInterrupt",
 | 
						|
    "LookupError",
 | 
						|
    "MemoryError",
 | 
						|
    "NameError",
 | 
						|
    "NoneType",
 | 
						|
    "NotImplementedError",
 | 
						|
    "OSError",
 | 
						|
    "OverflowError",
 | 
						|
    "RuntimeError",
 | 
						|
    "StopIteration",
 | 
						|
    "SyntaxError",
 | 
						|
    "SystemExit",
 | 
						|
    "TypeError",
 | 
						|
    "ValueError",
 | 
						|
    "ZeroDivisionError",
 | 
						|
    "abs",
 | 
						|
    "all",
 | 
						|
    "any",
 | 
						|
    "append",
 | 
						|
    "args",
 | 
						|
    "bool",
 | 
						|
    "builtins",
 | 
						|
    "bytearray",
 | 
						|
    "bytecode",
 | 
						|
    "bytes",
 | 
						|
    "callable",
 | 
						|
    "chr",
 | 
						|
    "classmethod",
 | 
						|
    "clear",
 | 
						|
    "close",
 | 
						|
    "const",
 | 
						|
    "copy",
 | 
						|
    "count",
 | 
						|
    "dict",
 | 
						|
    "dir",
 | 
						|
    "divmod",
 | 
						|
    "end",
 | 
						|
    "endswith",
 | 
						|
    "eval",
 | 
						|
    "exec",
 | 
						|
    "extend",
 | 
						|
    "find",
 | 
						|
    "format",
 | 
						|
    "from_bytes",
 | 
						|
    "get",
 | 
						|
    "getattr",
 | 
						|
    "globals",
 | 
						|
    "hasattr",
 | 
						|
    "hash",
 | 
						|
    "id",
 | 
						|
    "index",
 | 
						|
    "insert",
 | 
						|
    "int",
 | 
						|
    "isalpha",
 | 
						|
    "isdigit",
 | 
						|
    "isinstance",
 | 
						|
    "islower",
 | 
						|
    "isspace",
 | 
						|
    "issubclass",
 | 
						|
    "isupper",
 | 
						|
    "items",
 | 
						|
    "iter",
 | 
						|
    "join",
 | 
						|
    "key",
 | 
						|
    "keys",
 | 
						|
    "len",
 | 
						|
    "list",
 | 
						|
    "little",
 | 
						|
    "locals",
 | 
						|
    "lower",
 | 
						|
    "lstrip",
 | 
						|
    "main",
 | 
						|
    "map",
 | 
						|
    "micropython",
 | 
						|
    "next",
 | 
						|
    "object",
 | 
						|
    "open",
 | 
						|
    "ord",
 | 
						|
    "pop",
 | 
						|
    "popitem",
 | 
						|
    "pow",
 | 
						|
    "print",
 | 
						|
    "range",
 | 
						|
    "read",
 | 
						|
    "readinto",
 | 
						|
    "readline",
 | 
						|
    "remove",
 | 
						|
    "replace",
 | 
						|
    "repr",
 | 
						|
    "reverse",
 | 
						|
    "rfind",
 | 
						|
    "rindex",
 | 
						|
    "round",
 | 
						|
    "rsplit",
 | 
						|
    "rstrip",
 | 
						|
    "self",
 | 
						|
    "send",
 | 
						|
    "sep",
 | 
						|
    "set",
 | 
						|
    "setattr",
 | 
						|
    "setdefault",
 | 
						|
    "sort",
 | 
						|
    "sorted",
 | 
						|
    "split",
 | 
						|
    "start",
 | 
						|
    "startswith",
 | 
						|
    "staticmethod",
 | 
						|
    "step",
 | 
						|
    "stop",
 | 
						|
    "str",
 | 
						|
    "strip",
 | 
						|
    "sum",
 | 
						|
    "super",
 | 
						|
    "throw",
 | 
						|
    "to_bytes",
 | 
						|
    "tuple",
 | 
						|
    "type",
 | 
						|
    "update",
 | 
						|
    "upper",
 | 
						|
    "utf-8",
 | 
						|
    "value",
 | 
						|
    "values",
 | 
						|
    "write",
 | 
						|
    "zip",
 | 
						|
]
 | 
						|
 | 
						|
# this must match the equivalent function in qstr.c
 | 
						|
def compute_hash(qstr, bytes_hash):
 | 
						|
    hash = 5381
 | 
						|
    for b in qstr:
 | 
						|
        hash = (hash * 33) ^ b
 | 
						|
    # Make sure that valid hash is never zero, zero means "hash not computed"
 | 
						|
    return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1
 | 
						|
 | 
						|
 | 
						|
def qstr_escape(qst):
 | 
						|
    def esc_char(m):
 | 
						|
        c = ord(m.group(0))
 | 
						|
        try:
 | 
						|
            name = codepoint2name[c]
 | 
						|
        except KeyError:
 | 
						|
            name = "0x%02x" % c
 | 
						|
        return "_" + name + "_"
 | 
						|
 | 
						|
    return re.sub(r"[^A-Za-z0-9_]", esc_char, qst)
 | 
						|
 | 
						|
 | 
						|
def parse_input_headers(infiles):
 | 
						|
    qcfgs = {}
 | 
						|
    qstrs = {}
 | 
						|
 | 
						|
    # add static qstrs
 | 
						|
    for qstr in static_qstr_list:
 | 
						|
        # work out the corresponding qstr name
 | 
						|
        ident = qstr_escape(qstr)
 | 
						|
 | 
						|
        # don't add duplicates
 | 
						|
        assert ident not in qstrs
 | 
						|
 | 
						|
        # add the qstr to the list, with order number to retain original order in file
 | 
						|
        order = len(qstrs) - 300000
 | 
						|
        qstrs[ident] = (order, ident, qstr)
 | 
						|
 | 
						|
    # read the qstrs in from the input files
 | 
						|
    for infile in infiles:
 | 
						|
        with open(infile, "rt") as f:
 | 
						|
            for line in f:
 | 
						|
                line = line.strip()
 | 
						|
 | 
						|
                # is this a config line?
 | 
						|
                match = re.match(r"^QCFG\((.+), (.+)\)", line)
 | 
						|
                if match:
 | 
						|
                    value = match.group(2)
 | 
						|
                    if value[0] == "(" and value[-1] == ")":
 | 
						|
                        # strip parenthesis from config value
 | 
						|
                        value = value[1:-1]
 | 
						|
                    qcfgs[match.group(1)] = value
 | 
						|
                    continue
 | 
						|
 | 
						|
                # is this a QSTR line?
 | 
						|
                match = re.match(r"^Q\((.*)\)$", line)
 | 
						|
                if not match:
 | 
						|
                    continue
 | 
						|
 | 
						|
                # get the qstr value
 | 
						|
                qstr = match.group(1)
 | 
						|
 | 
						|
                # special cases to specify control characters
 | 
						|
                if qstr == "\\n":
 | 
						|
                    qstr = "\n"
 | 
						|
                elif qstr == "\\r\\n":
 | 
						|
                    qstr = "\r\n"
 | 
						|
 | 
						|
                # work out the corresponding qstr name
 | 
						|
                ident = qstr_escape(qstr)
 | 
						|
 | 
						|
                # don't add duplicates
 | 
						|
                if ident in qstrs:
 | 
						|
                    continue
 | 
						|
 | 
						|
                # add the qstr to the list, with order number to retain original order in file
 | 
						|
                order = len(qstrs)
 | 
						|
                # but put special method names like __add__ at the top of list, so
 | 
						|
                # that their id's fit into a byte
 | 
						|
                if ident == "":
 | 
						|
                    # Sort empty qstr above all still
 | 
						|
                    order = -200000
 | 
						|
                elif ident == "__dir__":
 | 
						|
                    # Put __dir__ after empty qstr for builtin dir() to work
 | 
						|
                    order = -190000
 | 
						|
                elif ident.startswith("__"):
 | 
						|
                    order -= 100000
 | 
						|
                qstrs[ident] = (order, ident, qstr)
 | 
						|
 | 
						|
    if not qcfgs:
 | 
						|
        sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n")
 | 
						|
        sys.exit(1)
 | 
						|
 | 
						|
    return qcfgs, qstrs
 | 
						|
 | 
						|
 | 
						|
def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr):
 | 
						|
    qbytes = bytes_cons(qstr, "utf8")
 | 
						|
    qlen = len(qbytes)
 | 
						|
    qhash = compute_hash(qbytes, cfg_bytes_hash)
 | 
						|
    if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in qstr):
 | 
						|
        # qstr is all printable ASCII so render it as-is (for easier debugging)
 | 
						|
        qdata = qstr
 | 
						|
    else:
 | 
						|
        # qstr contains non-printable codes so render entire thing as hex pairs
 | 
						|
        qdata = "".join(("\\x%02x" % b) for b in qbytes)
 | 
						|
    if qlen >= (1 << (8 * cfg_bytes_len)):
 | 
						|
        print("qstr is too long:", qstr)
 | 
						|
        assert False
 | 
						|
    qlen_str = ("\\x%02x" * cfg_bytes_len) % tuple(
 | 
						|
        ((qlen >> (8 * i)) & 0xFF) for i in range(cfg_bytes_len)
 | 
						|
    )
 | 
						|
    qhash_str = ("\\x%02x" * cfg_bytes_hash) % tuple(
 | 
						|
        ((qhash >> (8 * i)) & 0xFF) for i in range(cfg_bytes_hash)
 | 
						|
    )
 | 
						|
    return '(const byte*)"%s%s" "%s"' % (qhash_str, qlen_str, qdata)
 | 
						|
 | 
						|
 | 
						|
def print_qstr_data(qcfgs, qstrs):
 | 
						|
    # get config variables
 | 
						|
    cfg_bytes_len = int(qcfgs["BYTES_IN_LEN"])
 | 
						|
    cfg_bytes_hash = int(qcfgs["BYTES_IN_HASH"])
 | 
						|
 | 
						|
    # print out the starter of the generated C header file
 | 
						|
    print("// This file was automatically generated by makeqstrdata.py")
 | 
						|
    print("")
 | 
						|
 | 
						|
    # add NULL qstr with no hash or data
 | 
						|
    print(
 | 
						|
        'QDEF(MP_QSTRnull, (const byte*)"%s%s" "")'
 | 
						|
        % ("\\x00" * cfg_bytes_hash, "\\x00" * cfg_bytes_len)
 | 
						|
    )
 | 
						|
 | 
						|
    # go through each qstr and print it out
 | 
						|
    for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]):
 | 
						|
        qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
 | 
						|
        print("QDEF(MP_QSTR_%s, %s)" % (ident, qbytes))
 | 
						|
 | 
						|
 | 
						|
def do_work(infiles):
 | 
						|
    qcfgs, qstrs = parse_input_headers(infiles)
 | 
						|
    print_qstr_data(qcfgs, qstrs)
 | 
						|
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    do_work(sys.argv[1:])
 |