summaryrefslogtreecommitdiff
path: root/filter_disasm.py
blob: 96c6abc3adc36c834e7d94203b75224d93d0ec89 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/python

import re
import sys
# from difflib import SequenceMatcher
from optparse import OptionParser


INSTRUCTION_PREFIXES = ['rep', 'repe', 'repne']


def split_instr(full_instr):
    """
    Given a line of assembly language (symbolic part from objdump -d),
    return (instr, operands). operands can be an empty string.
    """
    assert full_instr.strip()
    split = full_instr.split(None, 1)
    if len(split) == 1:
        return (split[0], '')
    start, end = split
    if start in INSTRUCTION_PREFIXES:
        next, end = end.split(None, 1)
        start += " " + next
    return (start, end)


def relative_only(operand):
    mo = re.match(r"[0-9A-Fa-f]+ (<.*)$", operand)
    return mo.group(1) if mo else operand


def split_filter_instr(full_instr):
    mnemonic, operands = split_instr(full_instr)
    return mnemonic, relative_only(operands)


class Func(object):
    def __init__(self, name, addr):
        self.name = name
        self.addr = addr
        self.instructions = []
    def push_line(self, line):
        line = line.strip()
        mo = re.match(r"([0-9A-Fa-f]+):[\t]([^\t]+)[\t](.+)$", line)
        if not mo:
            if not re.match(r"([0-9A-Fa-f]+):[\t]([^\t]+)$", line):
                self.close()
                return False
            else:
                return True
        addr, bytes, symb = mo.groups()
        instr = split_filter_instr(symb)
        self.instructions.append(instr)
        return True
    def close(self):
        while self.instructions and self.instructions[-1] == ('nop', ''):
            del self.instructions[-1]
    def dump(self, f):
        f.write("%s %08x\n" % (self.name, self.addr))
        for instr in self.instructions:
            f.write("\t%s\t%s\n" % (instr[0], instr[1]))
        f.write("\n")


def match_func_header(line):
    line = line.strip()
    mo = re.match(r"([0-9A-Fa-f]{8}) <([^>]+)>:$", line)
    return None if mo is None else mo.groups()        


def load_disasm(filename):
    funcs = {} # name: Func()
    current = None
    with open(filename) as f:
        for line in f:
            if current:
                cont = current.push_line(line)
                if not cont:
                    current = None
            else:
                fh = match_func_header(line)
                if fh:
                    current = Func(fh[1], int(fh[0], 16))
                    funcs[current.name] = current
                else:
                    print >> sys.stderr, "Unused:", line.rstrip()
        if current:
            current.close()
    return funcs


def dump_disasm(filename, funcs):
    func_names = funcs.keys()
    func_names.sort()
    with open(filename, "w") as f:
        for name in func_names:
            funcs[name].dump(f)


def main():
    parser = OptionParser(usage="usage: %prog [options] file.disasm")
    (options, args) = parser.parse_args()
    if len(args) != 1:
        parser.print_help()
        sys.exit(1)
    filename = args[0]
    funcs = load_disasm(filename)
    # now we are trying to be extramely lazy:
    # just dump the damn files sorted and stripped, so we can try
    # do diff them externally
    dump_disasm(filename + ".dump", funcs)


if __name__ == '__main__':
    main()


# PS: i like "extramely"