Python-逆向实用工具

PEfile

解析头部数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import hashlib,pefile

# 计算得到数据长度,自动使用推荐大小
def NumberOfBytesHumanRepresentation(value):
if value <= 1024:
return '%s bytes' % value
elif value < 1024 * 1024:
return '%.1f KB' % (float(value) / 1024.0)
elif value < 1024 * 1024 * 1024:
return '%.1f MB' % (float(value) / 1024.0 / 1024.0)
else:
return '%.1f GB' % (float(value) / 1024.0 / 1024.0 / 1024.0)

# 获取PE头部基本信息
def GetHeader(pe):
raw = pe.write()
# 扫描基本信息
if (hex(pe.FILE_HEADER.Machine) == "0x14c"):
print("程序位数: {}".format("x86"))
if (hex(pe.FILE_HEADER.Machine) == "0x8664"):
print("程序位数: {}".format("x64"))
if (hex(pe.FILE_HEADER.Characteristics) == "0x102"):
print("程序类型: Executable")
elif (hex(pe.FILE_HEADER.Characteristics) == "0x2102"):
print("程序类型: Dynamic link library")
if pe.OPTIONAL_HEADER.AddressOfEntryPoint:
oep = pe.OPTIONAL_HEADER.AddressOfEntryPoint + pe.OPTIONAL_HEADER.ImageBase
print("实际入口: {}".format(hex(oep)))
print("映像基址: {}".format(hex(pe.OPTIONAL_HEADER.ImageBase)))
print("虚拟入口: {}".format(hex(pe.OPTIONAL_HEADER.AddressOfEntryPoint)))
print("映像大小: {}".format(hex(pe.OPTIONAL_HEADER.SizeOfImage)))
print("区段对齐: {}".format(hex(pe.OPTIONAL_HEADER.SectionAlignment)))
print("文件对齐: {}".format(hex(pe.OPTIONAL_HEADER.FileAlignment)))
print("区块数量: {}".format(int(pe.FILE_HEADER.NumberOfSections + 1)))
print('熵值比例: %f (Min=0.0, Max=8.0)' % pe.sections[0].entropy_H(raw))
# 计算压缩数据
print('MD5 : %s' % hashlib.md5(raw).hexdigest())
print('SHA-1 : %s' % hashlib.sha1(raw).hexdigest())
print('SHA-256 : %s' % hashlib.sha256(raw).hexdigest())
print('SHA-512 : %s' % hashlib.sha512(raw).hexdigest())
# 扫描文件末尾是否存在附加数据
overlayOffset = pe.get_overlay_data_start_offset()
if overlayOffset != None:
print("起始文件位置: 0x%08x"%overlayOffset)
overlaySize = len(raw[overlayOffset:])
print("长度: 0x%08x %s %.2f%%"%(overlaySize, NumberOfBytesHumanRepresentation(overlaySize), float(overlaySize) / float(len(raw)) * 100.0))
print("MD5: %s" %hashlib.md5(raw[overlayOffset:]).hexdigest())
print("SHA-256: %s" %hashlib.sha256(raw[overlayOffset:]).hexdigest())

if __name__ == "__main__":
pe = pefile.PE("d://lyshark.exe")
print(pe.FILE_HEADER.dump())
print(pe.dump_dict())
GetHeader(pe)

解析节表数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import hashlib,pefile

# 计算得到数据长度,自动使用推荐大小
def NumberOfBytesHumanRepresentation(value):
if value <= 1024:
return '%s bytes' % value
elif value < 1024 * 1024:
return '%.1f KB' % (float(value) / 1024.0)
elif value < 1024 * 1024 * 1024:
return '%.1f MB' % (float(value) / 1024.0 / 1024.0)
else:
return '%.1f GB' % (float(value) / 1024.0 / 1024.0 / 1024.0)

# 输出所有的节
def ScanSection(pe):
print("{:10s}{:10s}{:10s}{:10s}{:10s}{:10s}{:10s}{:10s}".format("序号","节区名称","虚拟偏移","虚拟大小","实际偏移","实际大小","节区属性","熵值"))
section_count = int(pe.FILE_HEADER.NumberOfSections + 1)
for count,item in zip(range(1,section_count),pe.sections):
print("%d\t\t\t%-10s\t0x%.8X\t0x%.8X\t0x%.8X\t0x%.8X\t0x%.8X\t%f"%(count,(item.Name).decode("utf-8"),item.VirtualAddress,item.Misc_VirtualSize,item.PointerToRawData,item.SizeOfRawData,item.Characteristics,item.get_entropy()))

# 计算所有节的MD5
def CheckSection(pe):
print("序号\t\t节名称\t\t文件偏移\t\t大小\t\tMD5\t\t\t\t\t\t\t\t\t\t节大小")
# 读取PE文件到内存
image_data = pe.get_memory_mapped_image()
section_count = int(pe.FILE_HEADER.NumberOfSections + 1)
for count,item in zip(range(1,section_count),pe.sections):
section_data = image_data[item.PointerToRawData: item.PointerToRawData + item.SizeOfRawData - 1]
data_size = NumberOfBytesHumanRepresentation(len(section_data))
hash_value = hashlib.md5(section_data).hexdigest()
print("{}\t{:10s}\t{:10X}\t{:10X}\t{:30s}\t{}".format(count,(item.Name).decode("utf-8"),item.PointerToRawData,item.SizeOfRawData,hash_value,data_size))

if __name__ == "__main__":
pe = pefile.PE("d://lyshark.exe")
ScanSection(pe)
CheckSection(pe)

节区RVA与FOA互转

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import pefile

# 将RVA转换为FOA的函数
def RVAToFOA(pe,rva):
for item in pe.sections:
Section_Start = item.VirtualAddress
Section_Ends = item.VirtualAddress + item.SizeOfRawData
if rva >= Section_Start and rva < Section_Ends:
return rva - item.VirtualAddress + item.PointerToRawData
return -1

# 将FOA文件偏移转换为RVA相对地址
def FOAToRVA(pe,foa):
ImageBase = pe.OPTIONAL_HEADER.ImageBase
NumberOfSectionsCount = pe.FILE_HEADER.NumberOfSections
for index in range(0,NumberOfSectionsCount):
PointerRawStart = pe.sections[index].PointerToRawData
PointerRawEnds = pe.sections[index].PointerToRawData + pe.sections[index].SizeOfRawData
if foa >= PointerRawStart and foa <= PointerRawEnds:
rva = pe.sections[index].VirtualAddress + (foa - pe.sections[index].PointerToRawData)
return rva
return -1

# 内部功能实现FOA->RVA互转
def inside(pe):
# 从FOA获取RVA 传入十进制
rva = pe.get_rva_from_offset(3952)
print("对应内存RVA: {}".format(hex(rva)))
# 从RVA获取FOA 传入十进制
foa = pe.get_offset_from_rva(rva)
print("对应文件FOA: {}".format(foa))

if __name__ == "__main__":
pe = pefile.PE("d://lyshark.exe")
ref = RVAToFOA(pe,4128)
print("RVA转FOA => 输出十进制: {}".format(ref))
ref = FOAToRVA(pe,1056)
print("FOA转RVA => 输出十进制: {}".format(ref))

解析HEX数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import pefile,sys,re
from io import StringIO

dumplinelength = 16

def CIC(expression):
if callable(expression):
return expression()
else:
return expression

def IFF(expression, valueTrue, valueFalse):
if expression:
return CIC(valueTrue)
else:
return CIC(valueFalse)

class cDump():
def __init__(self, data, prefix='', offset=0, dumplinelength=16):
self.data = data
self.prefix = prefix
self.offset = offset
self.dumplinelength = dumplinelength

# 输出指定位置的十六进制格式
def HexDump(self):
oDumpStream = self.cDumpStream(self.prefix)
hexDump = ''
for i, b in enumerate(self.data):
if i % self.dumplinelength == 0 and hexDump != '':
oDumpStream.Addline(hexDump)
hexDump = ''
hexDump += IFF(hexDump == '', '', ' ') + '%02X' % self.C2IIP2(b)
oDumpStream.Addline(hexDump)
return oDumpStream.Content()

def CombineHexAscii(self, hexDump, asciiDump):
if hexDump == '':
return ''
countSpaces = 3 * (self.dumplinelength - len(asciiDump))
if len(asciiDump) <= self.dumplinelength / 2:
countSpaces += 1
return hexDump + ' ' + (' ' * countSpaces) + asciiDump

# 输出指定位置的十六进制格式以及ASCII字符串
def HexAsciiDump(self):
oDumpStream = self.cDumpStream(self.prefix)
hexDump = ''
asciiDump = ''
for i, b in enumerate(self.data):
b = self.C2IIP2(b)
if i % self.dumplinelength == 0:
if hexDump != '':
oDumpStream.Addline(self.CombineHexAscii(hexDump, asciiDump))
hexDump = '%08X:' % (i + self.offset)
asciiDump = ''
if i % self.dumplinelength == self.dumplinelength / 2:
hexDump += ' '
hexDump += ' %02X' % b
asciiDump += IFF(b >= 32 and b <= 128, chr(b), '.')
oDumpStream.Addline(self.CombineHexAscii(hexDump, asciiDump))
return oDumpStream.Content()

class cDumpStream():
def __init__(self, prefix=''):
self.oStringIO = StringIO()
self.prefix = prefix

def Addline(self, line):
if line != '':
self.oStringIO.write(self.prefix + line + '\n')

def Content(self):
return self.oStringIO.getvalue()

@staticmethod
def C2IIP2(data):
if sys.version_info[0] > 2:
return data
else:
return ord(data)

# 只输出十六进制数据
def HexDump(data):
return cDump(data, dumplinelength=dumplinelength).HexDump()

# 输出十六进制与ASCII字符串
def HexAsciiDump(data):
return cDump(data, dumplinelength=dumplinelength).HexAsciiDump()

# 找到指定节并读取hex数据
def GetSectionHex(pe):
ImageBase = pe.OPTIONAL_HEADER.ImageBase
for item in pe.sections:
# 判断是否是.text节
if str(item.Name.decode('UTF-8').strip(b'\x00'.decode())) == ".text":
# print("虚拟地址: 0x%.8X 虚拟大小: 0x%.8X" %(item.VirtualAddress,item.Misc_VirtualSize))
VirtualAddress = item.VirtualAddress
VirtualSize = item.Misc_VirtualSize
ActualOffset = item.PointerToRawData
StartVA = hex(ImageBase + VirtualAddress)
StopVA = hex(ImageBase + VirtualAddress + VirtualSize)
print("[+] 代码段起始地址: {} 结束: {} 实际偏移:{} 长度: {}".format(StartVA, StopVA, ActualOffset, VirtualSize))
# 获取到.text节区间内的数据
hex_code = pe.write()[ActualOffset: VirtualSize]
return hex_code
else:
print("程序中不存在.text节")
return 0
return 0

REGEX_STANDARD = '[\x09\x20-\x7E]'

def ExtractStringsASCII(data):
regex = REGEX_STANDARD + '{%d,}'
return re.findall(regex % 4, data)

def ExtractStringsUNICODE(data):
regex = '((' + REGEX_STANDARD + '\x00){%d,})'
return [foundunicodestring.replace('\x00', '') for foundunicodestring, dummy in re.findall(regex % 4, data)]

# 将传入Hex字符串以每16字符分割在一个列表内
def ExtractStrings(data):
return ExtractStringsASCII(data) + ExtractStringsUNICODE(data)

if __name__ == "__main__":
pe = pefile.PE("d://lyshark.exe")
# 得到.text节内数据
ref = GetSectionHex(pe)
# 转为十六进制格式
dump_hex = HexDump(ref)
print(dump_hex)
# 打包为每16字符一个列表
dump_list = ExtractStrings(dump_hex)
print(dump_list)

解析数据目录表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import pefile

# 将RVA转换为FOA的函数
def RVAToFOA(pe,rva):
for item in pe.sections:
Section_Start = item.VirtualAddress
Section_Ends = item.VirtualAddress + item.SizeOfRawData
if rva >= Section_Start and rva < Section_Ends:
return rva - item.VirtualAddress + item.PointerToRawData
return -1

# 扫描数据目录表
def ScanOptional(pe):
optional_size = pe.OPTIONAL_HEADER.NumberOfRvaAndSizes
print("数据目录表个数: {}".format(optional_size))
print("编号 \t\t\t 目录RVA\t\t 目录FOA\t\t\t 长度\t\t 描述信息")
for index in range(0,optional_size):
va = int(pe.OPTIONAL_HEADER.DATA_DIRECTORY[index].VirtualAddress)
print("%03d \t\t 0x%08X\t\t 0x%08X\t\t %08d \t\t"%(index,pe.OPTIONAL_HEADER.DATA_DIRECTORY[index].VirtualAddress,RVAToFOA(pe,va),pe.OPTIONAL_HEADER.DATA_DIRECTORY[index].Size),end="")
if index == 0:
print("Export symbols")
if index == 1:
print("Import symbols")
if index == 2:
print("Resources")
if index == 3:
print("Exception")
if index == 4:
print("Security")
if index == 5:
print("Base relocation")
if index == 6:
print("Debug")
if index == 7:
print("Copyright string")
if index == 8:
print("Globalptr")
if index == 9:
print("Thread local storage (TLS)")
if index == 10:
print("Load configuration")
if index == 11:
print("Bound Import")
if index == 12:
print("Import Address Table")
if index == 13:
print("Delay Import")
if index == 14:
print("COM descriptor")
if index == 15:
print("NoUse")

if __name__ == "__main__":
pe = pefile.PE("d://lyshark.exe")
ScanOptional(pe)

解析导入导出表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import pefile

# 输出所有导入表模块
def ScanImport(pe):
try:
for x in pe.DIRECTORY_ENTRY_IMPORT:
for y in x.imports:
print("[*] 模块名称: %-20s 导入函数: %-14s" %((x.dll).decode("utf-8"),(y.name).decode("utf-8")))
except Exception:
pass

# 输出所有导出表模块
def ScanExport(pe):
try:
for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols:
print("[*] 导出序号: %-5s 模块地址: %-20s 模块名称: %-15s" %(exp.ordinal,hex(pe.OPTIONAL_HEADER.ImageBase + exp.address),(exp.name).decode("utf-8")))
except:
pass

if __name__ == "__main__":
pe = pefile.PE("d://lyshark.exe")
ScanImport(pe)
ScanExport(pe)