summaryrefslogtreecommitdiff
path: root/filter_disasm.py
diff options
context:
space:
mode:
authorGuillaume Knispel <gknispel@avencall.com>2012-07-11 14:46:59 +0200
committerGuillaume Knispel <gknispel@avencall.com>2012-07-11 14:46:59 +0200
commit4b1bbd979b08f88cccfad99f8e2b360276269511 (patch)
treebc3b5e401c8425d527e0ebeb9443bce54172a5ca /filter_disasm.py
parent2ad8f6a34d9545ff3ce830a36d6f56dba76cb631 (diff)
add script to filter out some addresses from asm, and sort funcs
Diffstat (limited to 'filter_disasm.py')
-rwxr-xr-xfilter_disasm.py119
1 files changed, 119 insertions, 0 deletions
diff --git a/filter_disasm.py b/filter_disasm.py
new file mode 100755
index 0000000..96c6abc
--- /dev/null
+++ b/filter_disasm.py
@@ -0,0 +1,119 @@
+#!/usr/bin/python
+
+import re
+import sys
+# from difflib import SequenceMatcher
+from optparse import OptionParser
+
+
+INSTRUCTION_PREFIXES = ['rep', 'repe', 'repne']
+
+
+def split_instr(full_instr):
+ """
+ Given a line of assembly language (symbolic part from objdump -d),
+ return (instr, operands). operands can be an empty string.
+ """
+ assert full_instr.strip()
+ split = full_instr.split(None, 1)
+ if len(split) == 1:
+ return (split[0], '')
+ start, end = split
+ if start in INSTRUCTION_PREFIXES:
+ next, end = end.split(None, 1)
+ start += " " + next
+ return (start, end)
+
+
+def relative_only(operand):
+ mo = re.match(r"[0-9A-Fa-f]+ (<.*)$", operand)
+ return mo.group(1) if mo else operand
+
+
+def split_filter_instr(full_instr):
+ mnemonic, operands = split_instr(full_instr)
+ return mnemonic, relative_only(operands)
+
+
+class Func(object):
+ def __init__(self, name, addr):
+ self.name = name
+ self.addr = addr
+ self.instructions = []
+ def push_line(self, line):
+ line = line.strip()
+ mo = re.match(r"([0-9A-Fa-f]+):[\t]([^\t]+)[\t](.+)$", line)
+ if not mo:
+ if not re.match(r"([0-9A-Fa-f]+):[\t]([^\t]+)$", line):
+ self.close()
+ return False
+ else:
+ return True
+ addr, bytes, symb = mo.groups()
+ instr = split_filter_instr(symb)
+ self.instructions.append(instr)
+ return True
+ def close(self):
+ while self.instructions and self.instructions[-1] == ('nop', ''):
+ del self.instructions[-1]
+ def dump(self, f):
+ f.write("%s %08x\n" % (self.name, self.addr))
+ for instr in self.instructions:
+ f.write("\t%s\t%s\n" % (instr[0], instr[1]))
+ f.write("\n")
+
+
+def match_func_header(line):
+ line = line.strip()
+ mo = re.match(r"([0-9A-Fa-f]{8}) <([^>]+)>:$", line)
+ return None if mo is None else mo.groups()
+
+
+def load_disasm(filename):
+ funcs = {} # name: Func()
+ current = None
+ with open(filename) as f:
+ for line in f:
+ if current:
+ cont = current.push_line(line)
+ if not cont:
+ current = None
+ else:
+ fh = match_func_header(line)
+ if fh:
+ current = Func(fh[1], int(fh[0], 16))
+ funcs[current.name] = current
+ else:
+ print >> sys.stderr, "Unused:", line.rstrip()
+ if current:
+ current.close()
+ return funcs
+
+
+def dump_disasm(filename, funcs):
+ func_names = funcs.keys()
+ func_names.sort()
+ with open(filename, "w") as f:
+ for name in func_names:
+ funcs[name].dump(f)
+
+
+def main():
+ parser = OptionParser(usage="usage: %prog [options] file.disasm")
+ (options, args) = parser.parse_args()
+ if len(args) != 1:
+ parser.print_help()
+ sys.exit(1)
+ filename = args[0]
+ funcs = load_disasm(filename)
+ # now we are trying to be extramely lazy:
+ # just dump the damn files sorted and stripped, so we can try
+ # do diff them externally
+ dump_disasm(filename + ".dump", funcs)
+
+
+if __name__ == '__main__':
+ main()
+
+
+# PS: i like "extramely"