#!/usr/bin/env python # A check script that just works at the time of writing... # # also builds a structure tree for further reference # # Input file format must be similiar to those headers generated by regtool, or this script makes no sense at all # # Known limitation: # 1. won't accept /* ... */ /* ... */': badly behavior with multiline comment # 2. won't accept multiple expression within same line' (but will info that) # 3. won't accept single line struct/union definition # # Check list: # 1. a structure should not contain bitfield member alongside with nested struct/union # 2. bitfield sum in a struct should be 32 (means being well padded) # 3. each bitfield type should be uint32_t # 4. expecting union to be `union { struct {xxx}; uint32_t val; }` and complain if it is not an u32 val (but not fail) # 5. typedef volatile struct xxx{}: xxx must exists # # Otherwise won't fail but warning import os import re import sys from typing import Any class MemberField: member_type = '' bitfield = None def __init__(self, m_type: str, m_bits: int=None) -> None: self.member_type = m_type self.bitfield = m_bits def __unicode__(self) -> str: return self.__str__() def __repr__(self) -> str: return self.__str__() def __str__(self) -> str: if self.bitfield is None: return '"Field type={}"'.format(self.member_type) return '"Field type={} bit={}"'.format(self.member_type, self.bitfield) class SoCStructureHeaderChecker: # capture: typedef, volatile, struct name __REGEXP_MATCH_STRUCTURE_BEGIN = r'^[\s]*(typedef)?(?:[\s]+(volatile))?[\s]+struct[\s]+([\w]+)?[\s\S]*$' # capture: typedef, volatile, union name __REGEXP_MATCH_UNION_BEGIN = r'^[\s]*(typedef)?(?:[\s]+(volatile))?[\s]+union[\s]+([\w]+)?[\s\S]*$' # capture: type_var_name __REGEXP_MATCH_STRUCT_UNION_END_NAME = r'^[\s]*}[\s]*([\w\[\]\*]*)[\s]*;[\s\S]*$' # capture: type, name, bitfield __REGEXP_MATCH_BITFIELD_MEMBER = (r'^[\s]*(?:(?:volatile[\s]+)|(?:))([\w\*]+)[\s]+([\w\*]+(?:(?:\[[\s\S]*\])|(?:)))' r'[\s]*(?:(?:[\s]*;)|(?::[\s]*([\d]+)[\s]*;))[\s\S]*$') # should be useless and can be safely deleted __REGEXP_MATCH_MULTILINE_COMMENT = r'^[\s]*[\/]{0,2}\*[\/]?[\s\S]*$' __REGEX_MATCH_SIMPLE_VAL_FIELD = r'^[\s]*(?:(?:volatile[\s]+)|(?:))([\w]+)[\s]+([\w\[\]\*]+)[\s]*;[\s]*$' # capture: type, name __REGEX_MATCH_ROOT_EXTERNAL = r'^[\s]*extern[\s]+([\w]+)[\s]+([\w]+)[\s]*;[\s]*$' __linecount = 0 __fd = None # type: Any __is_eof = False # generated reference tree __ref_tree = dict() # type: dict # middle result of generated tree, shared # named typedef, or named struct/union. referd but will not delete __temp_ref_types = dict() # type: dict def __expand_type(self, member_type: str, bitfield: int=None) -> Any: if member_type == 'uint32_t': return MemberField(member_type, bitfield) if bitfield is not None: print('\033[0;31mERROR\033[0m: non-u32 type with bitfield') return None if member_type in self.__temp_ref_types: return self.__temp_ref_types[member_type] return None def __getline(self, incomment:bool=False) -> Any: rawline = self.__fd.readline() if not rawline: self.__is_eof = True return None self.__linecount += 1 if incomment: pos = rawline.find('*/') if pos != -1: # set string that is behind comment rawline = rawline[pos + 2:] else: # continue multiple line return self.__getline(True) # preprocess: remove '// comment' match_obj = re.match(r'^([^(\/\/)]*)\/\/[\s\S]*$', rawline) if match_obj is not None: rawline = match_obj.groups()[0] # preprocess: remove '/* comment' match_obj = re.match(r'^([^(\/\*)]*)\/\*([\s\S]*)$', rawline) if match_obj is not None: rawline = match_obj.groups()[0] # check if multiline commit in oneline pos = match_obj.groups()[1].find('*/') if pos != -1: # apply string that is behind comment rawline = rawline + match_obj.groups()[1][pos + 2:] else: # multiple line return self.__getline(True) if re.match(r'^[\s]*$', rawline): # skip empty line return self.__getline() if rawline.count(';') > 1: print('\033[0;34mINFO\033[0m: line: {}: possibily multiple expression within same line'.format(self.__linecount)) print(rawline) return rawline def __process_structure(self, name: str, is_typedef: bool, is_volatile: bool) -> Any: ret_val = 0 # first check for anonymous register structs if is_typedef and is_volatile and name is None: print('\033[0;31mERROR\033[0m: line {}: annoymous struct'.format(self.__linecount)) ret_val = -1 node_tree = dict() bitcount = 0 has_nested_struct_union = False has_non_bitfield_member = False parsed_varname = '' while not self.__is_eof: rawline = self.__getline() if rawline is None: break # check for nested structure match_obj = re.match(self.__REGEXP_MATCH_STRUCTURE_BEGIN, rawline) if match_obj is not None: has_nested_struct_union = True ret, inherited_node_tree = self.__process_structure( match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile') if ret != 0: ret_val = -2 if inherited_node_tree is not None: for node in inherited_node_tree: node_tree[node] = inherited_node_tree[node] continue match_obj = re.match(self.__REGEXP_MATCH_UNION_BEGIN, rawline) if match_obj is not None: has_nested_struct_union = True ret, inherited_node_tree = self.__process_union(match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile') if ret != 0: ret_val = -2 if inherited_node_tree is not None: for node in inherited_node_tree: node_tree[node] = inherited_node_tree[node] continue # check if end of struct match_obj = re.match(self.__REGEXP_MATCH_STRUCT_UNION_END_NAME, rawline) if match_obj is not None: # end of struct if bitcount not in (0, 32): ret_val = -2 if is_typedef: print('\033[0;31mERROR\033[0m: line {}: bitfield count is {}, type {}'.format(self.__linecount, bitcount, match_obj.groups()[0])) else: print('\033[0;31mERROR\033[0m: line {}: bitfield count is {}, type {}, varname "{}"' .format(self.__linecount, bitcount, name, match_obj.groups()[0])) parsed_varname = match_obj.groups()[0] if is_typedef: # is a typedef if match_obj.groups()[0] == '' or match_obj.groups()[0].find('[') != -1: # should be c error print('\033[0;31mERROR\033[0m: line {}: C error'.format(self.__linecount)) ret_val = -3 if match_obj.groups()[0] in self.__temp_ref_types: # duplication, script bug: we are putting all types into same namespace print('script run into bug...') self.__temp_ref_types[match_obj.groups()[0]] = dict() for member in node_tree: self.__temp_ref_types[match_obj.groups()[0]][member] = node_tree[member] elif name is not None: # currently this kind of expression doesn't exist print('!!!!!!UNDEALED CONDITION!!!!!') elif match_obj.groups()[0] != '': # named member, wrap and overwrite if len(node_tree) == 0: node_tree = None else: array_match = re.match(r'^([\w]*)\[[\s\S]*\]$', match_obj.groups()[0]) if array_match is not None: node_tree = {array_match.groups()[0] + '[]': node_tree} else: node_tree = {match_obj.groups()[0]: node_tree} else: # not a type, no member name, treat its fields as its parent's pass break # check member match_obj = re.match(self.__REGEXP_MATCH_BITFIELD_MEMBER, rawline) if match_obj is not None: field_bit = None if match_obj.groups()[2] is not None: field_bit = int(match_obj.groups()[2]) bitcount += field_bit # bitfield should be u32 if match_obj.groups()[0] != 'uint32_t': print('\033[0;33mWARN\033[0m: line: {}: {} has type {}'.format(self.__linecount, match_obj.groups()[1], match_obj.groups()[0])) else: has_non_bitfield_member = True # append to node tree member_node = self.__expand_type(match_obj.groups()[0], field_bit) if member_node is not None: array_match = re.match(r'^([\w]*)\[[\s\S]*\]$', match_obj.groups()[1]) if array_match is not None: node_tree[array_match.groups()[0] + '[]'] = member_node else: node_tree[match_obj.groups()[1]] = member_node else: if '*' not in match_obj.groups()[0]: print('\033[0;33mWARN\033[0m: line {}: unknown type {}'.format(self.__linecount, match_obj.groups()[0])) else: print('\033[0;33mWARN\033[0m: line {}: pointer type {}'.format(self.__linecount, match_obj.groups()[0])) continue # check comments match_obj = re.match(self.__REGEXP_MATCH_MULTILINE_COMMENT, rawline) if match_obj is not None: # code comments continue # dump out unmatched condition print(('\033[0;33mWARN\033[0m: line: {}: unexpected expression: {}'.format(self.__linecount, rawline)).replace('\n', '')) if bitcount != 0 and has_nested_struct_union: print('\033[0;33mWARN\033[0m: line: {}: mixed bitfield member and nested structure/union'.format(self.__linecount)) if bitcount != 0 and has_non_bitfield_member: print('\033[0;33mWARN\033[0m: line: {}: mixed bitfield member and non-bitfield member'.format(self.__linecount)) if is_typedef and is_volatile and name is None: if parsed_varname != '': print('SUGGEST: {}'.format(parsed_varname.rstrip('t') + 's')) if name is not None and is_typedef and is_volatile and parsed_varname.rstrip('t') != name.rstrip('s'): print('\033[0;33mWARN\033[0m: line: {}: different type and typedef name: {} {}'.format(self.__linecount, name, parsed_varname)) return ret_val, node_tree def __process_union(self, name: str, is_typedef: bool, is_volatile: bool) -> Any: ret_val = 0 # first check for anonymous register structs if is_typedef and is_volatile and name is None: print('\033[0;31mERROR\033[0m: line {}: annoymous union'.format(self.__linecount)) ret_val = -1 node_tree = dict() # type: Any has_struct_count = 0 has_val_field_count = 0 while not self.__is_eof: rawline = self.__getline() if rawline is None: break # check for nested structure match_obj = re.match(self.__REGEXP_MATCH_STRUCTURE_BEGIN, rawline) if match_obj is not None: has_struct_count += 1 ret, inherited_node_tree = self.__process_structure( match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile') if ret != 0: ret_val = -2 if inherited_node_tree is not None: for node in inherited_node_tree: node_tree[node] = inherited_node_tree[node] continue match_obj = re.match(self.__REGEXP_MATCH_UNION_BEGIN, rawline) if match_obj is not None: has_struct_count += 1 ret, inherited_node_tree = self.__process_union(match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile') if ret != 0: ret_val = -2 if inherited_node_tree is not None: for node in inherited_node_tree: node_tree[node] = inherited_node_tree[node] continue match_obj = re.match(self.__REGEXP_MATCH_STRUCT_UNION_END_NAME, rawline) if match_obj is not None: parsed_varname = match_obj.groups()[0] # end of struct if is_typedef: # is a typedef if match_obj.groups()[0] == '': # should be c error print('\033[0;31mERROR\033[0m: line {}: C error'.format(self.__linecount)) ret_val = -3 if match_obj.groups()[0] in self.__temp_ref_types: # duplication, script bug: we are putting all types into same namespace print('script run into bug...') self.__temp_ref_types[match_obj.groups()[0]] = dict() for member in node_tree: self.__temp_ref_types[match_obj.groups()[0]][member] = node_tree[member] node_tree = None elif name is not None: # currently this kind of expression doesn't exist print('!!!!!!UNDEALED CONDITION!!!!!') elif match_obj.groups()[0] != '': # named member, wrap and overwrite if len(node_tree) == 0: node_tree = None else: array_match = re.match(r'^([\w]*)\[[\s\S]*\]$', match_obj.groups()[0]) if array_match is not None: node_tree = {array_match.groups()[0] + '[]': node_tree} else: node_tree = {match_obj.groups()[0]: node_tree} else: # not a type, no member name, treat its fields as its parent's pass break match_obj = re.match(self.__REGEXP_MATCH_MULTILINE_COMMENT, rawline) if match_obj is not None: # code comments continue match_obj = re.match(self.__REGEX_MATCH_SIMPLE_VAL_FIELD, rawline) if match_obj is not None: # expecting to see 'uint32_t val;' if match_obj.groups()[0] != 'uint32_t' or match_obj.groups()[1] != 'val': print(('\033[0;33mWARN\033[0m: unexpected union member at {}: {}'.format(self.__linecount, rawline)).replace('\n', '')) else: has_val_field_count += 1 # append to node tree member_node = self.__expand_type(match_obj.groups()[0], None) if member_node is not None: node_tree[match_obj.groups()[1]] = member_node else: if '*' not in match_obj.groups()[0]: print('\033[0;31mERROR\033[0m: line {}: unknown type {}'.format(self.__linecount, match_obj.groups()[0])) else: print('\033[0;33mWARN\033[0m: line {}: pointer type {}'.format(self.__linecount, match_obj.groups()[0])) continue # dump out unmatched condition print(('\033[0;33mWARN\033[0m: line: {}: unexpected expression: {}'.format(self.__linecount, rawline)).replace('\n', '')) if not (has_struct_count == 1 and has_val_field_count == 1): print('\033[0;34mINFO\033[0m: line: {}: not a typical union: {} nested structures, {} u32 val member' .format(self.__linecount, has_struct_count, has_val_field_count)) if is_typedef and is_volatile and name is None: if parsed_varname != '': print('SUGGEST: {}'.format(parsed_varname.rstrip('t') + 's')) if name is not None and is_typedef and is_volatile and parsed_varname.rstrip('t') != name.rstrip('s'): print('\033[0;33mWARN\033[0m: line: {}: different type and typedef name: {} {}'.format(self.__linecount, name, parsed_varname)) return ret_val, node_tree def __process_root(self) -> int: ret_val = 0 node_tree = dict() while not self.__is_eof: rawline = self.__getline() if rawline is None: break # start checking by finding any of structure or union match_obj = re.match(self.__REGEXP_MATCH_STRUCTURE_BEGIN, rawline) if match_obj is not None: ret, inherited_node_tree = self.__process_structure( match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile') if ret != 0: ret_val = -2 if inherited_node_tree is not None: for node in inherited_node_tree: node_tree[node] = inherited_node_tree[node] continue match_obj = re.match(self.__REGEXP_MATCH_UNION_BEGIN, rawline) if match_obj is not None: ret, inherited_node_tree = self.__process_union(match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile') if ret != 0: ret_val = -2 if inherited_node_tree is not None: for node in inherited_node_tree: node_tree[node] = inherited_node_tree[node] continue # processing root level external declaration match_obj = re.match(self.__REGEX_MATCH_ROOT_EXTERNAL, rawline) if match_obj is not None: self.__ref_tree[match_obj.groups()[1]] = self.__expand_type(match_obj.groups()[0]) continue return ret_val def check(self, file: str) -> int: self.__fd = open(file, 'r', encoding='utf8') self.__linecount = 0 self.__is_eof = False ret_val = self.__process_root() self.__fd.close() if ret_val != 0: print('\033[0;31mCHECK FAILED\033[0m:\t{}'.format(file)) else: print('\033[0;32mCHECK PASSED\033[0m:\t{}'.format(file)) return ret_val def get_ref_tree(self) -> Any: return self.__ref_tree def main() -> None: ret = 0 if len(sys.argv) <= 1 or not os.path.isfile(sys.argv[1]): print('file not exist') exit(-1) checker = SoCStructureHeaderChecker() print('CHECKING:\t{}'.format(sys.argv[1])) ret = checker.check(sys.argv[1]) if len(sys.argv) == 3 and sys.argv[2] == 'print': print(checker.get_ref_tree()) del checker sys.exit(ret) if __name__ == '__main__': main()