From e7d46ddbb1991b9991226414734534b9af518518 Mon Sep 17 00:00:00 2001 From: Sebastian Parborg Date: Thu, 9 Jun 2011 00:23:37 +0200 Subject: Converted the output data from the makefiles to somethint useful --- TODO | 1 + filetypes/__init__.py | 0 filetypes/ctypefiles.py | 183 +++++++++++++++++++++++++++++++++++ filetypes/makefiles.py | 247 +++++++++++++++++++++++++++++++++++++++++++++++ makefiles.py | 252 ------------------------------------------------ scanfiles.py | 214 ++++++---------------------------------- 6 files changed, 458 insertions(+), 439 deletions(-) create mode 100644 filetypes/__init__.py create mode 100644 filetypes/ctypefiles.py create mode 100644 filetypes/makefiles.py delete mode 100644 makefiles.py diff --git a/TODO b/TODO index fcc776f..16db5fa 100644 --- a/TODO +++ b/TODO @@ -4,4 +4,5 @@ Handle "# include" implement ifndef #include "file.h" doesn't restict it to be a local include, add checks! Handle dlopen() "includes" +Clean up the code so that stuff is more organized Perhaps multithread some stuff so the rest of the program doesn't have to wait for the parser to finish diff --git a/filetypes/__init__.py b/filetypes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/filetypes/ctypefiles.py b/filetypes/ctypefiles.py new file mode 100644 index 0000000..97e8561 --- /dev/null +++ b/filetypes/ctypefiles.py @@ -0,0 +1,183 @@ +import glob +from ply import lex +from ply import yacc + +#lex stuff begins here + +def scanincludes(string,inclst,curdir): + tokens = ( + "GINCLUDE", + "LINCLUDE", + "BUNDLEINC", + "IFDEF", + "ENDIF", + ) + + states = ( + ("com","exclusive"), #comment + ("ifdef","inclusive"), + ) + + t_ANY_ignore = " \t" + + def t_begin_com(t): + r"/\*" + t.lexer.push_state("com") + + def t_com_end(t): + r"\*/" + t.lexer.pop_state() + pass + + def t_line_com(t): + r"//.*" + pass + + def t_ANY_begin_if0(t): + r"\#if[ \t]+0" + t.lexer.push_state("com") + + def t_com_endif(t): + r"\#endif" + t.lexer.pop_state() + pass + + def t_com_ifdef(t): + r"\#ifdef" + t.lexer.push_state("com") + + def t_IFDEF(t): + r"\#ifdef[ \t]+[a-zA-Z_][a-zA-Z0-9_]*" + t.value = t.value[6:].strip() #return the ifdef name + t.lexer.push_state("ifdef") + return t + + def t_ifdef_ENDIF(t): + r"\#endif" + t.lexer.pop_state() + return t + + def t_GINCLUDE(t): + r"\#[Ii][Nn][Cc][Ll][Uu][Dd][Ee][ \t]+<.*\.h>" + t.value = t.value[8:].strip().strip("<>") + return t + + def t_LINCLUDE(t): + r"\#[Ii][Nn][Cc][Ll][Uu][Dd][Ee][ \t]+\".*\.h\"" + t.value = t.value[8:].strip().strip('""') + return t + + def t_BUNDLEINC(t): + r"\#[Ii][Nn][Cc][Ll][Uu][Dd][Ee][ \t]+<.*>" + pass + + def t_ANY_error(t): + #print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + + lexer = lex.lex() + + #lexer.input(string) + # + #for tok in lexer: + # print(tok) + # + #YACC stuff here + + def p_includes2(p): + """ + includes : includes ginc + """ + p[1][0].add(p[2]) + p[0] = p[1] + + def p_lincludes(p): + """ + includes : includes linc + """ + if islocalinc(p[2],curdir): + p[1][1].add(p[2]) + else: + p[1][0].add(p[2]) + p[0] = p[1] + + def p_ifdef(p): + """ + includes : includes IFDEF includes ENDIF + | IFDEF includes ENDIF + """ + if len(p) == 5: + p[1][2] = addnewifdefs(p[1][2],{p[2] : p[3]}) + p[0] = p[1] + else: + ifdef = {} + ifdef[p[1]] = p[2] + p[0] = [set(),set(),ifdef] + + def p_ginc(p): + "includes : ginc" + globinc = set() + globinc.add(p[1]) + p[0] = [globinc,set(),{}] + + def p_linc(p): + "includes : linc" + locinc = set() + locinc.add(p[1]) + if islocalinc(p[1], curdir): + p[0] = [set(),locinc,{}] + else: + p[0] = [locinc,set(),{}] + + def p_ginclude(p): + "ginc : GINCLUDE" + p[0] = p[1] + + def p_linclude(p): + "linc : LINCLUDE" + p[0] = p[1] + + def p_error(p): + #print("syntax error at '%s'" % p.type) + pass + + yacc.yacc() + + newinclst = yacc.parse(string) + if newinclst == None: + #Check if the file didn't have any includes + return(inclst) + newinclst = addnewincludes(newinclst,inclst) + return(newinclst) + +def islocalinc(inc, curdir): + print(inc) + if glob.glob(curdir + inc) == []: + return False + else: + return True + + +def addnewincludes(inclist1,inclist2): + #come up with better names!! + inclist1[0] = inclist1[0] | inclist2[0] + inclist1[1] = inclist1[1] | inclist2[1] + inclist1[2] = addnewifdefs(inclist1[2],inclist2[2]) + return(inclist1) + +def addnewifdefs(dict1,dict2): + if dict1 == {} and dict2 == {}: + #we are done here + return(dict()) + dups = dict1.keys() & dict2.keys() + if dups == set(): + #no duplicates, empty set() + for name in dict2: + dict1[name] = dict2[name] + return(dict1) + + for name in dups: + dict1[name][0] = dict1[name][0] | dict2[name][0] + dict1[name][1] = dict1[name][1] | dict2[name][1] + dict1[name][2] = addnewifdefs(dict1[name][2],dict2[name][2]) + return(dict1) diff --git a/filetypes/makefiles.py b/filetypes/makefiles.py new file mode 100644 index 0000000..7448f5d --- /dev/null +++ b/filetypes/makefiles.py @@ -0,0 +1,247 @@ +from ply import lex +from ply import yacc + +def scanmakefile(makefile): + tokens = ( + "VAR", + "COLON", + "PERCENT", + "TEXT", + "DOLLAR", + "LPAR", + "RPAR", + "END", + "EQUAL", + "ENDTAB", + "LESS", + ) + + states = ( + ("com", "exclusive"), + ) + + def t_begin_com(t): + r"\#" + t.lexer.push_state("com") + + def t_com_newline(t): + r".*\\[ \t]*\n" + pass + + def t_com_END(t): + r"\n" + t.lexer.pop_state() + return t + + def t_VAR(t): + r"[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=" + t.value = t.value.split()[0].rstrip("=") #get the name of the var + return t + + def t_TEXT(t): + #make sure it grabs "file-name" and "-flags" + r"-*\.*[a-zA-Z_][-|a-zA-Z0-9_]*" + return t + + def t_LESS(t): + r"\$<" + pass + + def t_DOLLAR(t): + r"\$" + return t + + def t_COLON(t): + r"\:" + return t + + def t_EQUAL(t): + r"\=" + return t + + def t_LPAR(t): + r"\(" + return t + + def t_RPAR(t): + r"\)" + return t + + def t_PERCENT(t): + r"\%" + return t + + def t_contline(t): + r"\\\n" + pass + + def t_ENDTAB(t): + r"\n\t" + return t + + def t_END(t): + r"[\n]+" + return t + + def t_ANY_error(t): + t.lexer.skip(1) + + lexer = lex.lex() + + #lexer.input(makefile) + #for tok in lexer: + # print(tok) + + + #YACC begins here + + #a dict with values of defined variables + variables = {} + targets = [] #buildtargets, [[target,deps,options],[target2,.... + + def p_target(p): + """ + var : var textlst COLON textlst end + | textlst COLON textlst end + | var textlst COLON textlst options end + | textlst COLON textlst options end + """ + if len(p) == 6: + if p[3] == ":": + targets.append([p[2][0],p[4],[]]) + else: + targets.append([p[1][0],p[3],p[4]]) + elif len(p) == 5: + targets.append([p[1][0],p[3],[]]) + else: + targets.append([p[2][0],p[4],p[5]]) + + def p_lonetarget(p): + """ + var : var textlst COLON options end + | textlst COLON options end + """ + if len(p) == 6: + targets.append([p[2][0],[],p[4]]) + else: + targets.append([p[1][0],[],p[3]]) + + def p_depconv(p): + """ + var : var command COLON command end + | var command COLON command options end + """ + if len(p) == 6: + options = [] + else: + options = p[5] + + if p[2][0] == p[4][0] == "%": + for target in targets: + for dep in target[1]: + if p[2][1] in dep: + targets.append([dep,[(dep.replace(p[2][1],p[4][1]))],options]) + else: + print("Unknown command") + + def p_var(p): + """ + var : VAR textlst end + | VAR end + | var VAR textlst end + | var VAR end + """ + if isinstance(p[2],list): + variables[p[1]] = p[2] + elif len(p) == 5: + variables[p[2]] = p[3] + elif len(p) == 3: + variables[p[1]] = [] + else: + variables[p[2]] = [] + + def p_endtab(p): + """ + options : ENDTAB textlst + | options ENDTAB textlst + """ + if len(p) == 3: + p[0] = p[2] + else: + p[0] = p[1] + p[3] + + def p_usecom(p): + """ + textlst : DOLLAR LPAR textlst COLON command RPAR + | textlst DOLLAR LPAR textlst COLON command RPAR + """ + if len(p) == 8: + o = 1 #offset + else: + o = 0 + p[3+o] = variables[p[3+o][0]] + p[0] = [] + if p[5][0] == "replace": + for text in p[3+o]: + p[0] += [text.replace(p[5+o][1],p[5+o][2])] + else: + for text in p[3+o]: + p[0] += [text + p[5+o][1]] + + def p_textlst(p): + """ + textlst : textlst TEXT + | TEXT + | DOLLAR LPAR textlst RPAR + | textlst DOLLAR LPAR textlst RPAR + """ + if len(p) == 2: + p[0] = [p[1]] + elif len(p) == 3: + p[0] = p[1] + [p[2]] + elif len(p) == 5: + if p[3][0] in variables: + var = variables[p[3][0]] + p[0] = var + else: + p[0] = ["not defined"] + else: + if p[4][0] in variables: + var = variables[p[4][0]] + p[0] = p[1] + var + else: + p[0] = ["not defined"] + + def p_command(p): + """ + command : TEXT EQUAL TEXT + | PERCENT EQUAL PERCENT TEXT + | PERCENT TEXT + """ + if len(p) == 4: + p[0] = ["replace", p[1], p[3]] + elif len(p) == 5: + p[0] = ["append", p[4]] + else: + p[0] = [p[1],p[2]] + + def p_end(p): + """ + end : end END + | END + """ + + def p_error(p): + print("syntax error at '%s'" % p.type,p.lexpos) + pass + + yacc.yacc() + + yacc.parse(makefile) + + #for target in targets: + # print(target) + #print(variables) + + return targets + diff --git a/makefiles.py b/makefiles.py deleted file mode 100644 index 43f6de5..0000000 --- a/makefiles.py +++ /dev/null @@ -1,252 +0,0 @@ -import os -from ply import lex -from ply import yacc - -def scanmakefile(makefile): - tokens = ( - "VAR", - "COLON", - "PERCENT", - "TEXT", - "DOLLAR", - "LPAR", - "RPAR", - "END", - "EQUAL", - "ENDTAB", - "LESS", - ) - - states = ( - ("com", "exclusive"), - ) - - def t_begin_com(t): - r"\#" - t.lexer.push_state("com") - - def t_com_newline(t): - r".*\\[ \t]*\n" - pass - - def t_com_END(t): - r"\n" - t.lexer.pop_state() - return t - - def t_VAR(t): - r"[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=" - t.value = t.value.split()[0].rstrip("=") #get the name of the var - return t - - def t_TEXT(t): - #make sure it grabs "file-name" and "-flags" - r"-*\.*[a-zA-Z_][-|a-zA-Z0-9_]*" - return t - - def t_LESS(t): - r"\$<" - pass - - def t_DOLLAR(t): - r"\$" - return t - - def t_COLON(t): - r"\:" - return t - - def t_EQUAL(t): - r"\=" - return t - - def t_LPAR(t): - r"\(" - return t - - def t_RPAR(t): - r"\)" - return t - - def t_PERCENT(t): - r"\%" - return t - - def t_contline(t): - r"\\\n" - pass - - def t_ENDTAB(t): - r"\n\t" - return t - - def t_END(t): - r"[\n]+" - return t - - def t_ANY_error(t): - t.lexer.skip(1) - - lexer = lex.lex() - - lexer.input(makefile) - for tok in lexer: - print(tok) - - - #YACC begins here - - #a dict with values of defined variables - variables = {} - targets = [] #buildtargets, [[target,deps,options],[target2,.... - - def p_target(p): - """ - var : var textlst COLON textlst end - | textlst COLON textlst end - | var textlst COLON textlst options end - | textlst COLON textlst options end - """ - if len(p) == 6: - if p[3] == ":": - targets.append([p[2][0],p[4],[]]) - else: - targets.append([p[1][0],p[3],p[4]]) - elif len(p) == 5: - targets.append([p[1][0],p[3],[]]) - else: - targets.append([p[2][0],p[4],p[5]]) - - def p_lonetarget(p): - """ - var : var textlst COLON options end - | textlst COLON options end - """ - if len(p) == 6: - targets.append([p[2][0],[],p[4]]) - else: - targets.append([p[1][0],[],p[3]]) - - def p_depconv(p): - """ - var : var command COLON command end - | var command COLON command options end - """ - if len(p) == 6: - options = [] - else: - options = p[5] - - if p[2][0] == p[4][0] == "%": - for target in targets: - for dep in target[1]: - if p[2][1] in dep: - targets.append([dep,[(dep.replace(p[2][1],p[4][1]))],options]) - else: - print("Unknown command") - - def p_var(p): - """ - var : VAR textlst end - | VAR end - | var VAR textlst end - | var VAR end - """ - if isinstance(p[2],list): - variables[p[1]] = p[2] - elif len(p) == 5: - variables[p[2]] = p[3] - elif len(p) == 3: - variables[p[1]] = [] - else: - variables[p[2]] = [] - - def p_endtab(p): - """ - options : ENDTAB textlst - | options ENDTAB textlst - """ - if len(p) == 3: - p[0] = p[2] - else: - p[0] = p[1] + p[3] - - def p_usecom(p): - """ - textlst : DOLLAR LPAR textlst COLON command RPAR - | textlst DOLLAR LPAR textlst COLON command RPAR - """ - if len(p) == 8: - o = 1 #offset - else: - o = 0 - p[3+o] = variables[p[3+o][0]] - p[0] = [] - if p[5][0] == "replace": - for text in p[3+o]: - p[0] += [text.replace(p[5+o][1],p[5+o][2])] - else: - for text in p[3+o]: - p[0] += [text + p[5+o][1]] - - def p_textlst(p): - """ - textlst : textlst TEXT - | TEXT - | DOLLAR LPAR textlst RPAR - | textlst DOLLAR LPAR textlst RPAR - """ - if len(p) == 2: - p[0] = [p[1]] - elif len(p) == 3: - p[0] = p[1] + [p[2]] - elif len(p) == 5: - if p[3][0] in variables: - var = variables[p[3][0]] - p[0] = var - else: - p[0] = ["not defined"] - else: - if p[4][0] in variables: - var = variables[p[4][0]] - p[0] = p[1] + var - else: - p[0] = ["not defined"] - - def p_command(p): - """ - command : TEXT EQUAL TEXT - | PERCENT EQUAL PERCENT TEXT - | PERCENT TEXT - """ - if len(p) == 4: - p[0] = ["replace", p[1], p[3]] - elif len(p) == 5: - p[0] = ["append", p[4]] - else: - p[0] = [p[1],p[2]] - - def p_end(p): - """ - end : end END - | END - """ - - def p_error(p): - print("syntax error at '%s'" % p.type,p.lexpos) - pass - - yacc.yacc() - - yacc.parse(makefile) - - for target in targets: - print(target) - #print(variables) - - -file = "/usr/portage/distfiles/svn-src/doneyet-read-only/trunk/Makefile" - -with open(file, encoding="utf-8", errors="replace") as inputfile: - scanmakefile(inputfile.read()) - diff --git a/scanfiles.py b/scanfiles.py index 14f233b..e59084f 100644 --- a/scanfiles.py +++ b/scanfiles.py @@ -1,9 +1,9 @@ import os import glob -from ply import lex -from ply import yacc +from filetypes.ctypefiles import scanincludes +from filetypes.makefiles import scanmakefile -def scandir(dir, filetypes): +def scandirfor(dir, filetypes): files = [] dirs = [f for f in os.listdir(dir) if os.path.isdir(os.path.join(dir, f))] @@ -13,196 +13,36 @@ def scandir(dir, filetypes): files += glob.glob(dir + "/*" + filetype) return files -#lex stuff begins here - -def scanincludes(string,inclst,curdir): - tokens = ( - "GINCLUDE", - "LINCLUDE", - "BUNDLEINC", - "IFDEF", - "ENDIF", - ) - - states = ( - ("com","exclusive"), #comment - ("ifdef","inclusive"), - ) - - t_ANY_ignore = " \t" - - def t_begin_com(t): - r"/\*" - t.lexer.push_state("com") - - def t_com_end(t): - r"\*/" - t.lexer.pop_state() - pass - - def t_line_com(t): - r"//.*" - pass - - def t_ANY_begin_if0(t): - r"\#if[ \t]+0" - t.lexer.push_state("com") - - def t_com_endif(t): - r"\#endif" - t.lexer.pop_state() - pass - - def t_com_ifdef(t): - r"\#ifdef" - t.lexer.push_state("com") - - def t_IFDEF(t): - r"\#ifdef[ \t]+[a-zA-Z_][a-zA-Z0-9_]*" - t.value = t.value[6:].strip() #return the ifdef name - t.lexer.push_state("ifdef") - return t - - def t_ifdef_ENDIF(t): - r"\#endif" - t.lexer.pop_state() - return t - - def t_GINCLUDE(t): - r"\#[Ii][Nn][Cc][Ll][Uu][Dd][Ee][ \t]+<.*\.h>" - t.value = t.value[8:].strip().strip("<>") - return t - - def t_LINCLUDE(t): - r"\#[Ii][Nn][Cc][Ll][Uu][Dd][Ee][ \t]+\".*\.h\"" - t.value = t.value[8:].strip().strip('""') - return t - - def t_BUNDLEINC(t): - r"\#[Ii][Nn][Cc][Ll][Uu][Dd][Ee][ \t]+<.*>" - pass - - def t_ANY_error(t): - #print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - - lexer = lex.lex() - - #lexer.input(string) - # - #for tok in lexer: - # print(tok) - # - #YACC stuff here - - def p_includes2(p): - """ - includes : includes ginc - """ - p[1][0].add(p[2]) - p[0] = p[1] - - def p_lincludes(p): - """ - includes : includes linc - """ - if islocalinc(p[2],curdir): - p[1][1].add(p[2]) - else: - p[1][0].add(p[2]) - p[0] = p[1] - - def p_ifdef(p): - """ - includes : includes IFDEF includes ENDIF - | IFDEF includes ENDIF - """ - if len(p) == 5: - p[1][2] = addnewifdefs(p[1][2],{p[2] : p[3]}) - p[0] = p[1] - else: - ifdef = {} - ifdef[p[1]] = p[2] - p[0] = [set(),set(),ifdef] - - def p_ginc(p): - "includes : ginc" - globinc = set() - globinc.add(p[1]) - p[0] = [globinc,set(),{}] - - def p_linc(p): - "includes : linc" - locinc = set() - locinc.add(p[1]) - if islocalinc(p[1], curdir): - p[0] = [set(),locinc,{}] - else: - p[0] = [locinc,set(),{}] - - def p_ginclude(p): - "ginc : GINCLUDE" - p[0] = p[1] - - def p_linclude(p): - "linc : LINCLUDE" - p[0] = p[1] - - def p_error(p): - #print("syntax error at '%s'" % p.type) - pass - - yacc.yacc() - - newinclst = yacc.parse(string) - if newinclst == None: - #Check if the file didn't have any includes - return(inclst) - newinclst = addnewincludes(newinclst,inclst) - return(newinclst) - -def islocalinc(inc, curdir): - print(inc) - if glob.glob(curdir + inc) == []: - return False - else: - return True - - -def addnewincludes(inclist1,inclist2): - #come up with better names!! - inclist1[0] = inclist1[0] | inclist2[0] - inclist1[1] = inclist1[1] | inclist2[1] - inclist1[2] = addnewifdefs(inclist1[2],inclist2[2]) - return(inclist1) - -def addnewifdefs(dict1,dict2): - if dict1 == {} and dict2 == {}: - #we are done here - return(dict()) - dups = dict1.keys() & dict2.keys() - if dups == set(): - #no duplicates, empty set() - for name in dict2: - dict1[name] = dict2[name] - return(dict1) - - for name in dups: - dict1[name][0] = dict1[name][0] | dict2[name][0] - dict1[name][1] = dict1[name][1] | dict2[name][1] - dict1[name][2] = addnewifdefs(dict1[name][2],dict2[name][2]) - return(dict1) - -def startscan(dir,filetypes): +def scanmakefiledeps(makefile): + filestoscan = [] + impfiles = [] #look for these files + targets = scanmakefile(makefile) + deps = targets[0][1] #Use first make target + while deps != []: + newdeps = [] + for dep in deps: + for target in targets: + if target[0] == dep: + newdeps += target[1] + if ".o" in dep or dep in impfiles: + impfiles += target[1] + deps = newdeps + + #impfiles.sort() + #print(impfiles) + return impfiles + +def scanfilelist(filelist): global_hfiles = set() local_hfiles = set() inclst = [global_hfiles,local_hfiles,{}] - for file in scandir(dir, filetypes): - print(file) - + for file in filelist: with open(file, encoding="utf-8", errors="replace") as inputfile: inclst = scanincludes(inputfile.read(),inclst,os.path.split(file)[0]) return(inclst) +fle = "/usr/portage/distfiles/svn-src/doneyet-read-only/trunk/Makefile" +with open(fle, encoding="utf-8", errors="replace") as inputfile: + scanmakefiledeps(inputfile.read()) -- cgit v1.2.3-65-gdbad