#!/usr/bin/python import re import sys # from difflib import SequenceMatcher from optparse import OptionParser INSTRUCTION_PREFIXES = ['rep', 'repe', 'repne'] def split_instr(full_instr): """ Given a line of assembly language (symbolic part from objdump -d), return (instr, operands). operands can be an empty string. """ assert full_instr.strip() split = full_instr.split(None, 1) if len(split) == 1: return (split[0], '') start, end = split if start in INSTRUCTION_PREFIXES: next, end = end.split(None, 1) start += " " + next return (start, end) def relative_only(operand): mo = re.match(r"[0-9A-Fa-f]+ (<.*)$", operand) return mo.group(1) if mo else operand def split_filter_instr(full_instr): mnemonic, operands = split_instr(full_instr) return mnemonic, relative_only(operands) class Func(object): def __init__(self, name, addr): self.name = name self.addr = addr self.instructions = [] def push_line(self, line): line = line.strip() mo = re.match(r"([0-9A-Fa-f]+):[\t]([^\t]+)[\t](.+)$", line) if not mo: if not re.match(r"([0-9A-Fa-f]+):[\t]([^\t]+)$", line): self.close() return False else: return True addr, bytes, symb = mo.groups() instr = split_filter_instr(symb) self.instructions.append(instr) return True def close(self): while self.instructions and self.instructions[-1] == ('nop', ''): del self.instructions[-1] def dump(self, f): f.write("%s %08x\n" % (self.name, self.addr)) for instr in self.instructions: f.write("\t%s\t%s\n" % (instr[0], instr[1])) f.write("\n") def match_func_header(line): line = line.strip() mo = re.match(r"([0-9A-Fa-f]{8}) <([^>]+)>:$", line) return None if mo is None else mo.groups() def load_disasm(filename): funcs = {} # name: Func() current = None with open(filename) as f: for line in f: if current: cont = current.push_line(line) if not cont: current = None else: fh = match_func_header(line) if fh: current = Func(fh[1], int(fh[0], 16)) funcs[current.name] = current else: print >> sys.stderr, "Unused:", line.rstrip() if current: current.close() return funcs def dump_disasm(filename, funcs): func_names = funcs.keys() func_names.sort() with open(filename, "w") as f: for name in func_names: funcs[name].dump(f) def main(): parser = OptionParser(usage="usage: %prog [options] file.disasm") (options, args) = parser.parse_args() if len(args) != 1: parser.print_help() sys.exit(1) filename = args[0] funcs = load_disasm(filename) # now we are trying to be extramely lazy: # just dump the damn files sorted and stripped, so we can try # do diff them externally dump_disasm(filename + ".dump", funcs) if __name__ == '__main__': main() # PS: i like "extramely"