From 92adc95e66ea24afd20808f77667d0970c3dec51 Mon Sep 17 00:00:00 2001 From: Lerking <33354709+Lerking@users.noreply.github.com> Date: Thu, 19 Jul 2018 11:11:15 +0200 Subject: [PATCH] Add files via upload --- parser.py | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 parser.py diff --git a/parser.py b/parser.py new file mode 100644 index 0000000..ffc1fc6 --- /dev/null +++ b/parser.py @@ -0,0 +1,149 @@ +# (c) 2018 Jan Lerking +# Python parser for c header files. +# Used for creating corresponding NASM include files. + +import keywords +import os +import sys +import multiprocessing as mp + +keywords.init() +num_cores = mp.cpu_count() + +test_folder = "" +filelist = [] + +class worker: + + +class parser: + def __init__(self, file): + self.prep = keywords.preprocessor_directives + self.reg = keywords.regular + self.file = file + self.blockcomment = False + + def get_token(self, keyword): + token = "" + if keyword in self.prep: + token = self.prep(keyword) + if keyword in self.reg: + token = self.reg(keyword) + return token + + def parse_preprocess(self, token): + return False + + # Converts a word into a 'key : value' pair. + def tokenize_word(self, word): + token = {} + if word in self.prep: + token.keys = word + token.values = self.prep(word) + if word in self.reg: + token.keys = word + token.values = self.reg(word) + return token + + # Creates a list of elements per line in file, + # then adding this list as a single element in a global tuple + def parseline(self, line): + tupline = [] + word = [w for w in line.split()] + for w in word: + token = self.tokenize_word(w) + if token.value == 'PREPROCESS': + self.parse_preprocess(token) + return tupline + +def get_script_path(): + return sys.path[0] + +def sourcedir_filecnt(sourcedir): + ### Return the number of files, ending with '.h', in sourcedir - including subdirectories ### + cnt = 0 + global filelist + for folderName, subfolders, files in os.walk(sourcedir): + for file in files: + if file.lower().endswith('.h'): + cnt += 1 + filelist += [folderName+'/'+file] + #print(folderName+'/'+file) + #print(filelist) + return cnt + +def sourcedir_foldercnt(sourcedir): + ### Return the number of folders, if it contains '*.h' files, in sourcedir - including subdirectories ### + global cnt + global folderlist + for folderName, subfolders, files in os.walk(sourcedir): + if subfolders: + for subfolder in subfolders: + sourcedir_foldercnt(subfolder) + tempf = [file for file in files if file.lower().endswith('.h')] + if tempf: + cnt = cnt+1 + #print(folderName) + folderlist += [folderName] + #print(folderlist) + #print(len(folderlist)) + return cnt + +def process_files(gui, source, dest): + global sourcedir + global destdir + sourcedir = source + destdir = dest + pool = mp.Pool(processes=num_cores) + pool.map(process_file, filelist) + +def process_file(data): + outfile = '' + inputfile = data + encodings = ['utf-8', 'latin-1', 'windows-1250', 'windows-1252', 'ascii', + 'big5', 'big5hkscs', 'cp037', 'cp273', 'cp424', 'cp437', 'cp500', + 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 'cp855', 'cp856', + 'cp857', 'cp858', 'cp860', 'cp861', 'cp862', 'cp863', 'cp864', 'cp865', + 'cp866', 'cp869', 'cp874', 'cp875', 'cp932', 'cp949', 'cp950', 'cp1006', + 'cp1026', 'cp1125', 'cp1140', 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', + 'cp1255', 'cp1256', 'cp1257', 'cp1258', 'cp65001', 'euc-jp', 'euc-jis-2004', + 'euc-jisx0213', 'euc-kr', 'gb2312', 'gbk', 'gb18030', 'hz', 'iso2022-jp', + 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004', 'iso2022-jp-3', + 'iso2022-jp-ext', 'iso2022-kr', 'iso8859-2', 'iso8859-3', 'iso8859-4', + 'iso8859-5', 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10', + 'iso8859-11', 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16', 'johab', + 'koi8-r', 'koi8-t', 'koi8-u', 'kz1048', 'mac-cyrillic', 'mac-greek', + 'mac-iceland', 'mac-latin2', 'mac-roman', 'mac-turkish', 'ptcp154', + 'shift-jis', 'shift-jis-2004', 'shift-jisx0213', 'utf-32', 'utf-32-be', + 'utf-32-le', 'utf-16', 'utf-16-be', 'utf-16-le', 'utf-7', 'utf-8-sig'] + for e in encodings: + try: + fh = io.open(data, 'r', encoding=e) + fh.readlines() + fh.seek(0) + except UnicodeDecodeError: + print('got unicode error with %s , trying different encoding' % e) + else: + #print('opening the file with encoding: %s ' % e) + break + #print(os.path.basename(data)) + for lines in fh: + outfile = outfile+lines + fh.close() + outputfile = os.path.splitext(inputfile)[0]+'.inc' + outputfile = str(outputfile).replace(sourcedir, destdir) + #print(outputfile) + if not os.path.exists(os.path.dirname(outputfile)): + try: + os.makedirs(os.path.dirname(outputfile)) + except OSError as exc: # Guard against race condition + if exc.errno != errno.EEXIST: + raise + newfile = open(outputfile, "w") + newfile.write(outfile) + newfile.close() + +test_folder = get_script_path() +print(test_folder) +print('Number of *.h files in directory: ',sourcedir_filecnt(test_folder)) +print(num_cores)