From 92adc95e66ea24afd20808f77667d0970c3dec51 Mon Sep 17 00:00:00 2001
From: Lerking <33354709+Lerking@users.noreply.github.com>
Date: Thu, 19 Jul 2018 11:11:15 +0200
Subject: [PATCH] Add files via upload

---
 parser.py | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 149 insertions(+)
 create mode 100644 parser.py

diff --git a/parser.py b/parser.py
new file mode 100644
index 0000000..ffc1fc6
--- /dev/null
+++ b/parser.py
@@ -0,0 +1,149 @@
+# (c) 2018 Jan Lerking
+# Python parser for c header files.
+# Used for creating corresponding NASM include files.
+
+import keywords
+import os
+import sys
+import multiprocessing as mp
+
+keywords.init()
+num_cores = mp.cpu_count()
+
+test_folder = ""
+filelist = []
+
+class worker:
+    
+    
+class parser:
+    def __init__(self, file):
+        self.prep = keywords.preprocessor_directives
+        self.reg = keywords.regular
+        self.file = file
+        self.blockcomment = False
+
+    def get_token(self, keyword):
+        token = ""
+        if keyword in self.prep:
+            token = self.prep(keyword)
+        if keyword in self.reg:
+            token = self.reg(keyword)
+        return token
+
+    def parse_preprocess(self, token):
+        return False
+
+    # Converts a word into a 'key : value' pair.
+    def tokenize_word(self, word):
+        token = {}
+        if word in self.prep:
+            token.keys = word
+            token.values = self.prep(word)
+        if word in self.reg:
+            token.keys = word
+            token.values = self.reg(word)
+        return token
+
+    # Creates a list of elements per line in file,
+    # then adding this list as a single element in a global tuple
+    def parseline(self, line):
+        tupline = []
+        word = [w for w in line.split()]
+        for w in word:
+            token = self.tokenize_word(w)
+            if token.value == 'PREPROCESS':
+                self.parse_preprocess(token)
+        return tupline
+
+def get_script_path():
+    return sys.path[0]
+
+def sourcedir_filecnt(sourcedir):
+    ### Return the number of files, ending with '.h', in sourcedir - including subdirectories ###
+    cnt = 0
+    global filelist
+    for folderName, subfolders, files in os.walk(sourcedir):
+        for file in files:
+            if file.lower().endswith('.h'):
+                cnt += 1
+                filelist += [folderName+'/'+file]
+                #print(folderName+'/'+file)
+    #print(filelist)
+    return cnt
+
+def sourcedir_foldercnt(sourcedir):
+    ### Return the number of folders, if it contains '*.h' files, in sourcedir - including subdirectories ###
+    global cnt
+    global folderlist
+    for folderName, subfolders, files in os.walk(sourcedir):
+        if subfolders:
+            for subfolder in subfolders:
+                sourcedir_foldercnt(subfolder)
+        tempf = [file for file in files if file.lower().endswith('.h')]
+        if tempf:
+            cnt = cnt+1
+            #print(folderName)
+            folderlist += [folderName]
+    #print(folderlist)
+    #print(len(folderlist))
+    return cnt
+
+def process_files(gui, source, dest):
+    global sourcedir
+    global destdir
+    sourcedir = source
+    destdir = dest
+    pool = mp.Pool(processes=num_cores)
+    pool.map(process_file, filelist)
+
+def process_file(data):
+    outfile = ''
+    inputfile = data
+    encodings = ['utf-8', 'latin-1', 'windows-1250', 'windows-1252', 'ascii',
+                'big5', 'big5hkscs', 'cp037', 'cp273', 'cp424', 'cp437', 'cp500',
+                'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 'cp855', 'cp856',
+                'cp857', 'cp858', 'cp860', 'cp861', 'cp862', 'cp863', 'cp864', 'cp865',
+                'cp866', 'cp869', 'cp874', 'cp875', 'cp932', 'cp949', 'cp950', 'cp1006',
+                'cp1026', 'cp1125', 'cp1140', 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254',
+                'cp1255', 'cp1256', 'cp1257', 'cp1258', 'cp65001', 'euc-jp', 'euc-jis-2004',
+                'euc-jisx0213', 'euc-kr', 'gb2312', 'gbk', 'gb18030', 'hz', 'iso2022-jp',
+                'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004', 'iso2022-jp-3',
+                'iso2022-jp-ext', 'iso2022-kr', 'iso8859-2', 'iso8859-3', 'iso8859-4',
+                'iso8859-5', 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
+                'iso8859-11', 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16', 'johab',
+                'koi8-r', 'koi8-t', 'koi8-u', 'kz1048', 'mac-cyrillic', 'mac-greek',
+                'mac-iceland', 'mac-latin2', 'mac-roman', 'mac-turkish', 'ptcp154',
+                'shift-jis', 'shift-jis-2004', 'shift-jisx0213', 'utf-32', 'utf-32-be',
+                'utf-32-le', 'utf-16', 'utf-16-be', 'utf-16-le', 'utf-7', 'utf-8-sig']
+    for e in encodings:
+        try:
+            fh = io.open(data, 'r', encoding=e)
+            fh.readlines()
+            fh.seek(0)
+        except UnicodeDecodeError:
+            print('got unicode error with %s , trying different encoding' % e)
+        else:
+            #print('opening the file with encoding:  %s ' % e)
+            break 
+    #print(os.path.basename(data))
+    for lines in fh:
+        outfile = outfile+lines
+    fh.close()
+    outputfile = os.path.splitext(inputfile)[0]+'.inc'
+    outputfile = str(outputfile).replace(sourcedir, destdir)
+    #print(outputfile)
+    if not os.path.exists(os.path.dirname(outputfile)):
+        try:  
+            os.makedirs(os.path.dirname(outputfile))
+        except OSError as exc: # Guard against race condition
+            if exc.errno != errno.EEXIST:
+                raise
+    newfile = open(outputfile, "w")
+    newfile.write(outfile)
+    newfile.close()
+    
+test_folder = get_script_path()
+print(test_folder)
+print('Number of *.h files in directory: ',sourcedir_filecnt(test_folder))
+print(num_cores)