import os #Element type definitions. Used in the parse process. ELEMENT_TYPE_UNKNOWN = -1 ELEMENT_TYPE_DEFINE = 1 ELEMENT_TYPE_INCLUDE = 2 ELEMENT_TYPE_UNDEF = 3 ELEMENT_TYPE_IFDEF = 4 ELEMENT_TYPE_IFNDEF = 5 ELEMENT_TYPE_IF = 6 ELEMENT_TYPE_ELSE = 7 ELEMENT_TYPE_ELIF = 8 ELEMENT_TYPE_ENDIF = 9 ELEMENT_TYPE_ERROR = 10 ELEMENT_TYPE_PRAGMA = 11 ELEMENT_TYPE_COMMENT_START = 20 ELEMENT_TYPE_COMMENT_MULTILINE = 21 ELEMENT_TYPE_COMMENT_END = 22 #Keyword : Element type dictionary, for read C-header line. hdr_keywords = {'/*': ELEMENT_TYPE_COMMENT_START, '*': ELEMENT_TYPE_COMMENT_MULTILINE, '*/': ELEMENT_TYPE_COMMENT_END, '#define': ELEMENT_TYPE_DEFINE, '#include': ELEMENT_TYPE_INCLUDE, '#undef': ELEMENT_TYPE_UNDEF, '#ifdef': ELEMENT_TYPE_IFDEF, '#ifndef': ELEMENT_TYPE_IFNDEF, '#if': ELEMENT_TYPE_IF, '#else': ELEMENT_TYPE_ELSE, '#elif': ELEMENT_TYPE_ELIF, '#endif': ELEMENT_TYPE_ENDIF, '#error': ELEMENT_TYPE_ERROR, '#pragma': ELEMENT_TYPE_PRAGMA} #Element type : keyword, for assembly include output file. inc_keywords = {ELEMENT_TYPE_COMMENT_START: ';', ELEMENT_TYPE_COMMENT_MULTILINE: ';', ELEMENT_TYPE_COMMENT_END: '', ELEMENT_TYPE_DEFINE: '%define', ELEMENT_TYPE_INCLUDE: '%include', ELEMENT_TYPE_UNDEF: '%undef', ELEMENT_TYPE_IFDEF: '%ifdef', ELEMENT_TYPE_IFNDEF: '%ifndef', ELEMENT_TYPE_IF: '%if', ELEMENT_TYPE_ELSE: '%else', ELEMENT_TYPE_ELIF: '%elif', ELEMENT_TYPE_ENDIF: '%endif', ELEMENT_TYPE_ERROR: '%error', ELEMENT_TYPE_PRAGMA: '%pragma'} class H2INC: def __init__(self): self.filelist = [] self.folderlist = [] self.sourcedir = "/usr/include" self.destdir = "/data/include" self.filecnt = 0 self.foldercnt = 0 self.tupline = [] self.tupfile = [] def srcfilecnt(self, sourcedir): ### Return the number of files, ending with '.h', in sourcedir - including subdirectories ### for folderName, subfolders, files in os.walk(self.sourcedir): for file in files: if file.lower().endswith('.h'): self.filecnt += 1 self.filelist += [folderName+'/'+file] if self.filecnt > 0: return True else: return False def srcfoldercnt(self, src): ### Return the number of folders, if it contains '*.h' files, in sourcedir - including subdirectories ### for folderName, subfolders, files in os.walk(src): if subfolders: for subfolder in subfolders: self.srcfoldercnt(subfolder) tempf = [file for file in files if file.lower().endswith('.h')] if tempf: self.foldercnt = self.foldercnt+1 self.folderlist += [folderName] if self.foldercnt > 0: return True else: return False def read_file(self, fn): outfile = '' inputfile = fn passes = 0 tempfile = [] templine = [] encodings = ['utf-8', 'latin-1', 'windows-1250', 'windows-1252', 'ascii', 'big5', 'big5hkscs', 'cp037', 'cp273', 'cp424', 'cp437', 'cp500', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862', 'cp863', 'cp864', 'cp865', 'cp866', 'cp869', 'cp874', 'cp875', 'cp932', 'cp949', 'cp950', 'cp1006', 'cp1026', 'cp1125', 'cp1140', 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', 'cp1256', 'cp1257', 'cp1258', 'cp65001', 'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr', 'gb2312', 'gbk', 'gb18030', 'hz', 'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004', 'iso2022-jp-3', 'iso2022-jp-ext', 'iso2022-kr', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5', 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10', 'iso8859-11', 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16', 'johab', 'koi8-r', 'koi8-t', 'koi8-u', 'kz1048', 'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2', 'mac-roman', 'mac-turkish', 'ptcp154', 'shift-jis', 'shift-jis-2004', 'shift-jisx0213', 'utf-32', 'utf-32-be', 'utf-32-le', 'utf-16', 'utf-16-be', 'utf-16-le', 'utf-7', 'utf-8-sig'] for e in encodings: try: fh = open(fn, 'r', encoding=e) fh.readlines() fh.seek(0) except UnicodeDecodeError: print('got unicode error with %s , trying different encoding' % e) else: break self.tupfile = [] for lines in fh: #outfile = outfile+lines #Initial phase self.tupline = [] analyzed_line = self.analyzer(lines) self.tupfile.append(analyzed_line) #self.tupfile.append(lines) passes += 1 fh.close() for l in self.tupfile: if len(l) == 0: continue if l[0] == ELEMENT_TYPE_INCLUDE: templine.append('%include'+' '+str(self.parseinclude(l[1]))) #outputfile = os.path.splitext(inputfile)[0]+'.inc' #outputfile = str(outputfile).replace(self.sourcedir, self.destdir) #print(outputfile) #print(os.path.dirname(outputfile)) #self.write_file(outputfile,outfile) def write_file(self, fn, data): if not os.path.exists(os.path.dirname(fn)): try: os.makedirs(os.path.dirname(fn)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise newfile = open(fn, "w") newfile.write(data) newfile.close() def parseinclude(self, data): tempstr = str(data) if tempstr.startswith('<'): tempstr = tempstr.replace('<', '"') tempstr = tempstr.replace('.h>', '.inc"') if tempstr.endswith('.h'): tempstr = '"'+tempstr tempstr = tempstr.replace('.h', '.inc"') return tempstr def analyzer(self, ln): word = [w for w in ln.split()] for w in word: if w in hdr_keywords: v = hdr_keywords[w] self.tupline.append(v) else: self.tupline.append(w) return self.tupline if __name__ == "__main__": app = H2INC() if app.srcfilecnt(app.sourcedir) == True: print(app.filecnt) #print(app.filelist) if app.srcfoldercnt(app.sourcedir) == True: print(app.foldercnt) #print(app.folderlist) #for f in app.filelist: #app.read_file(f) app.read_file("gtk.h") #testfile for comments and header includes print(app.tupfile)