安卓逆向入门-DEX文件格式初探

整体

本篇为Android 10的DEX文件格式,分为:

1
2
3
4
5
6
7
8
9
10
11
12
13
DEX头部:
Header

DEX数据索引:
STRING_IDS
TYPE_IDS
PROTO_IDS
FIELD_IDS
METHOD_IDS

DEX数据:
CLASS_DEF
DATA

定义在dalvik/libdex/DexFile.h:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/*
* Structure representing a DEX file.
*
* Code should regard DexFile as opaque, using the API calls provided here
* to access specific structures.
*/
struct DexFile {
/* directly-mapped "opt" header */
const DexOptHeader* pOptHeader;
/* pointers to directly-mapped structs and arrays in base DEX */
const DexHeader* pHeader;
const DexStringId* pStringIds;
const DexTypeId* pTypeIds;
const DexFieldId* pFieldIds;
const DexMethodId* pMethodIds;
const DexProtoId* pProtoIds;
const DexClassDef* pClassDefs;
const DexLink* pLinkData;
/*
* These are mapped out of the "auxillary" section, and may not be
* included in the file.
*/
const DexClassLookup* pClassLookup;
const void* pRegisterMapPool; // RegisterMapClassPool
/* points to start of DEX file data */
const u1* baseAddr;
/* track memory overhead for auxillary structures */
int overhead;
/* additional app-specific data structures associated with the DEX */
//void* auxData;
};

指针对应关系有:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
/*
* Set up the basic raw data pointers of a DexFile. This function isn't
* meant for general use.
*/
void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
DexHeader *pHeader = (DexHeader*) data;
pDexFile->baseAddr = data;
pDexFile->pHeader = pHeader;
pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
}

其中DEX文件的数据类型被改过了:

1
2
3
4
5
6
7
8
9
10
11
/*
* These match the definitions in the VM specification.
*/
typedef uint8_t u1;
typedef uint16_t u2;
typedef uint32_t u4;
typedef uint64_t u8;
typedef int8_t s1;
typedef int16_t s2;
typedef int32_t s4;
typedef int64_t s8;

DEX文件格式还有一种LEB128数据类型,有符号和无符号分为sleb128、uleb128、uleb128p1。LEB128由1~5字节组成一个32位数据,第一字节最高位为1则需要使用第二字节,同理若第二字节最高位也为1则需要第三字节。LEB128类型数据可用/dalvik/libdex/Leb128.h的readSignedLeb128readUnsignedLeb128读取:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
/*
* Reads an unsigned LEB128 value, updating the given pointer to point
* just past the end of the read value and also indicating whether the
* value was syntactically valid. The only syntactically *invalid*
* values are ones that are five bytes long where the final byte has
* any but the low-order four bits set. Additionally, if the limit is
* passed as non-NULL and bytes would need to be read past the limit,
* then the read is considered invalid.
*/
int readAndVerifyUnsignedLeb128(const u1** pStream, const u1* limit, bool* okay) {
const u1* ptr = *pStream;
int result = readUnsignedLeb128(pStream);
if (((limit != NULL) && (*pStream > limit)) || (((*pStream - ptr) == 5) && (ptr[4] > 0x0f))) {
*okay = false;
}
return result;
}
/*
* Reads a signed LEB128 value, updating the given pointer to point
* just past the end of the read value and also indicating whether the
* value was syntactically valid. The only syntactically *invalid*
* values are ones that are five bytes long where the final byte has
* any but the low-order four bits set. Additionally, if the limit is
* passed as non-NULL and bytes would need to be read past the limit,
* then the read is considered invalid.
*/
int readAndVerifySignedLeb128(const u1** pStream, const u1* limit, bool* okay) {
const u1* ptr = *pStream;
int result = readSignedLeb128(pStream);
if (((limit != NULL) && (*pStream > limit)) || (((*pStream - ptr) == 5) && (ptr[4] > 0x0f))) {
*okay = false;
}
return result;
}

各结构体数据结构

DexHeader:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
/*
* Direct-mapped "header_item" struct.
*/
struct DexHeader {
u1 magic[8]; /* includes version number 标识与版本号*/
u4 checksum; /* adler32 checksum 文件校验和 算法alder32*/
u1 signature[kSHA1DigestLen]; /* SHA-1 hash 签名sha-1*/
u4 fileSize; /* length of entire file 大小*/
u4 headerSize; /* offset to start of next section 头部大小*/
u4 endianTag; //字节序
u4 linkSize;
u4 linkOff;
u4 mapOff; //指向DexMapList结构 映射后每一部分索引
u4 stringIdsSize; //DexStringId数量
u4 stringIdsOff; //DexStringId索引结构 所有字符串
u4 typeIdsSize;
u4 typeIdsOff; //DexTypeId结构 使用的所有数据类型
u4 protoIdsSize;
u4 protoIdsOff; //DexProtoId结构 Java中方法原型
u4 fieldIdsSize;
u4 fieldIdsOff; //DexFieldId结构 所有field属性
u4 methodIdsSize;
u4 methodIdsOff; //DexMethodId结构 所有方法
u4 classDefsSize;
u4 classDefsOff; //DexClassDef结构 所有结构体
u4 dataSize;
u4 dataOff;
};

对于magic字段:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
/* DEX file magic number */
#define DEX_MAGIC "dex\n"
/* The version for android N, encoded in 4 bytes of ASCII. This differentiates dex files that may
* use default methods.
*/
#define DEX_MAGIC_VERS_37 "037\0"
/* The version for android O, encoded in 4 bytes of ASCII. This differentiates dex files that may
* contain invoke-custom, invoke-polymorphic, call-sites, and method handles.
*/
#define DEX_MAGIC_VERS_38 "038\0"
/* The version for android P, encoded in 4 bytes of ASCII. This differentiates dex files that may
* contain const-method-handle and const-proto.
*/
#define DEX_MAGIC_VERS_39 "039\0"
/* current version, encoded in 4 bytes of ASCII */
#define DEX_MAGIC_VERS "036\0"
/*
* older but still-recognized version (corresponding to Android API
* levels 13 and earlier
*/
#define DEX_MAGIC_VERS_API_13 "035\0"

对于mapOff字段:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
/*
* Direct-mapped "map_list".
*/
struct DexMapList {
u4 size; /* #of entries in list 结构体DexMapItem数量*/
DexMapItem list[1]; /* entries */
};
/*
* Direct-mapped "map_item".
*/
struct DexMapItem {
u2 type; /* type code (see kDexType* above) */
u2 unused;
u4 size; /* count of items of the indicated type 指向类型数据数量*/
u4 offset; /* file offset to the start of data 指向类型数据偏移*/
};
//type枚举值
/* map item type codes */
enum {
kDexTypeHeaderItem = 0x0000,
kDexTypeStringIdItem = 0x0001, //为这个类型时遍历对应DexMapItem.offset得到DexStringId索引结构
kDexTypeTypeIdItem = 0x0002,
kDexTypeProtoIdItem = 0x0003,
kDexTypeFieldIdItem = 0x0004,
kDexTypeMethodIdItem = 0x0005,
kDexTypeClassDefItem = 0x0006,
kDexTypeCallSiteIdItem = 0x0007,
kDexTypeMethodHandleItem = 0x0008,
kDexTypeMapList = 0x1000,
kDexTypeTypeList = 0x1001,
kDexTypeAnnotationSetRefList = 0x1002,
kDexTypeAnnotationSetItem = 0x1003,
kDexTypeClassDataItem = 0x2000,
kDexTypeCodeItem = 0x2001,
kDexTypeStringDataItem = 0x2002,
kDexTypeDebugInfoItem = 0x2003,
kDexTypeAnnotationItem = 0x2004,
kDexTypeEncodedArrayItem = 0x2005,
kDexTypeAnnotationsDirectoryItem = 0x2006,
};

DexStringId结构为:

1
2
3
4
5
6
7
8
9
10
/*
* Direct-mapped "string_id_item".
*/
struct DexStringId {
u4 stringDataOff; /* file offset to string_data_item 指向DexStringItem*/
};
struct DexStringItem {
uleb128 size; //字符串长度
ubyte data; //字符串MUTF-8
};

对于typeIdsOff字段:

1
2
3
4
5
6
/*
* Direct-mapped "type_id_item".
*/
struct DexTypeId {
u4 descriptorIdx; /* index into stringIds list for type descriptor 结构DexStringItem索引值*/
};

对于protoIdsOff字段:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
/*
* Direct-mapped "proto_id_item".
*/
struct DexProtoId {
u4 shortyIdx; /* index into stringIds for shorty descriptor 指向DexStringIds列表*/
u4 returnTypeIdx; /* index into typeIds list for return type 指向DexTypeIds列表索引 方法返回值类型*/
u4 parametersOff; /* file offset to type_list for parameter types 指向DexTypeList偏移*/
};
/*
* Direct-mapped "type_item".
*/
struct DexTypeItem {
u2 typeIdx; /* index into typeIds 索引*/
};
/*
* Direct-mapped "type_list".
*/
struct DexTypeList {
u4 size; /* #of entries in list 参数数量*/
DexTypeItem list[1]; /* entries 参数列表*/
};

其中具体类型有:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
* Converts a single-character primitive type into its human-readable
* equivalent.
*/
static const char* primitiveTypeLabel(char typeChar)
{
switch (typeChar) {
case 'B': return "byte";
case 'C': return "char";
case 'D': return "double";
case 'F': return "float";
case 'I': return "int";
case 'J': return "long";
case 'S': return "short";
case 'V': return "void";
case 'Z': return "boolean";
default:
return "UNKNOWN";
}
}

对于fieldIdsOff字段有:

1
2
3
4
5
6
7
8
/*
* Direct-mapped "field_id_item".
*/
struct DexFieldId {
u2 classIdx; /* index into typeIds list for defining class 所属class DexTypeIds索引*/
u2 typeIdx; /* index into typeIds for field type 类型 DexTypeIds索引*/
u4 nameIdx; /* index into stringIds for field name 名称 DexStringsIds索引*/
};

对于methodIdsOff字段有:

1
2
3
4
5
6
7
8
/*
* Direct-mapped "method_id_item".
*/
struct DexMethodId {
u2 classIdx; /* index into typeIds list for defining class 所属类 DexTypeIds索引*/
u2 protoIdx; /* index into protoIds for method prototype 方法原型 DexProtoIds索引*/
u4 nameIdx; /* index into stringIds for method name 方法名 DexStringIds索引*/
};

对于classDefsOff字段有:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
/*
* Direct-mapped "class_def_item".
*/
struct DexClassDef {
u4 classIdx; /* index into typeIds for this class 类型 DexTypeIds索引*/
u4 accessFlags; //类的访问类型 ACC_*
u4 superclassIdx; /* index into typeIds for superclass 父类类型 DexTypeIds索引*/
u4 interfacesOff; /* file offset to DexTypeList 接口偏移地址 DexTypeList结构*/
u4 sourceFileIdx; /* index into stringIds for source file name 源代码字符串 DexStringIds索引*/
u4 annotationsOff; /* file offset to annotations_directory_item 注解 DexAnnotationsDirectoryItem结构*/
u4 classDataOff; /* file offset to class_data_item 数据 DexClassData结构*/
u4 staticValuesOff; /* file offset to DexEncodedArray 静态数据 DexEncodedArray结构*/
};
/*
* access flags and masks; the "standard" ones are all <= 0x4000
*
* Note: There are related declarations in vm/oo/Object.h in the ClassFlags
* enum.
*/
enum {
ACC_PUBLIC = 0x00000001, // class, field, method, ic
ACC_PRIVATE = 0x00000002, // field, method, ic
ACC_PROTECTED = 0x00000004, // field, method, ic
ACC_STATIC = 0x00000008, // field, method, ic
ACC_FINAL = 0x00000010, // class, field, method, ic
ACC_SYNCHRONIZED = 0x00000020, // method (only allowed on natives)
ACC_SUPER = 0x00000020, // class (not used in Dalvik)
ACC_VOLATILE = 0x00000040, // field
ACC_BRIDGE = 0x00000040, // method (1.5)
ACC_TRANSIENT = 0x00000080, // field
ACC_VARARGS = 0x00000080, // method (1.5)
ACC_NATIVE = 0x00000100, // method
ACC_INTERFACE = 0x00000200, // class, ic
ACC_ABSTRACT = 0x00000400, // class, method, ic
ACC_STRICT = 0x00000800, // method
ACC_SYNTHETIC = 0x00001000, // field, method, ic
ACC_ANNOTATION = 0x00002000, // class, ic (1.5)
ACC_ENUM = 0x00004000, // class, field, ic (1.5)
ACC_CONSTRUCTOR = 0x00010000, // method (Dalvik only)
ACC_DECLARED_SYNCHRONIZED =
0x00020000, // method (Dalvik only)
ACC_CLASS_MASK =
(ACC_PUBLIC | ACC_FINAL | ACC_INTERFACE | ACC_ABSTRACT
| ACC_SYNTHETIC | ACC_ANNOTATION | ACC_ENUM),
ACC_INNER_CLASS_MASK =
(ACC_CLASS_MASK | ACC_PRIVATE | ACC_PROTECTED | ACC_STATIC),
ACC_FIELD_MASK =
(ACC_PUBLIC | ACC_PRIVATE | ACC_PROTECTED | ACC_STATIC | ACC_FINAL
| ACC_VOLATILE | ACC_TRANSIENT | ACC_SYNTHETIC | ACC_ENUM),
ACC_METHOD_MASK =
(ACC_PUBLIC | ACC_PRIVATE | ACC_PROTECTED | ACC_STATIC | ACC_FINAL
| ACC_SYNCHRONIZED | ACC_BRIDGE | ACC_VARARGS | ACC_NATIVE
| ACC_ABSTRACT | ACC_STRICT | ACC_SYNTHETIC | ACC_CONSTRUCTOR
| ACC_DECLARED_SYNCHRONIZED),
};
/*
* Direct-mapped "annotations_directory_item".
*/
struct DexAnnotationsDirectoryItem {
u4 classAnnotationsOff; /* offset to DexAnnotationSetItem */
u4 fieldsSize; /* count of DexFieldAnnotationsItem */
u4 methodsSize; /* count of DexMethodAnnotationsItem */
u4 parametersSize; /* count of DexParameterAnnotationsItem */
/* followed by DexFieldAnnotationsItem[fieldsSize] */
/* followed by DexMethodAnnotationsItem[methodsSize] */
/* followed by DexParameterAnnotationsItem[parametersSize] */
};
/* expanded form of class_data_item. Note: If a particular item is
* absent (e.g., no static fields), then the corresponding pointer
* is set to NULL. */
struct DexClassData {
DexClassDataHeader header;
DexField* staticFields;
DexField* instanceFields;
DexMethod* directMethods;
DexMethod* virtualMethods;
};
/* expanded form of a class_data_item header */
struct DexClassDataHeader {
u4 staticFieldsSize; //静态字段 DexField结构
u4 instanceFieldsSize; //实例字段 DexField结构
u4 directMethodsSize; //直接方法 DexMethod结构
u4 virtualMethodsSize; //虚方法 DexMethod结构
};
/* expanded form of encoded_field */
struct DexField {
u4 fieldIdx; /* index to a field_id_item 结构DexFieldIds索引*/
u4 accessFlags; //访问标识符 ACC_*
};
/* expanded form of encoded_method */
struct DexMethod {
u4 methodIdx; /* index to a method_id_item 结构DexMethodIds索引*/
u4 accessFlags; //访问标识符
u4 codeOff; /* file offset to a code_item 指向DexCode结构*/
};
/*
* Direct-mapped "code_item".
*
* The "catches" table is used when throwing an exception,
* "debugInfo" is used when displaying an exception stack trace or
* debugging. An offset of zero indicates that there are no entries.
*/
struct DexCode {
u2 registersSize; //使用寄存器个数
u2 insSize; //参数个数
u2 outsSize; //调用其他方法使用的寄存器个数
u2 triesSize; //异常处理try/catch个数
u4 debugInfoOff; /* file offset to debug info stream 调试信息在文件中偏移*/
u4 insnsSize; /* size of the insns array, in u2 units 方法包含指令数 单位字*/
u2 insns[1]; //该方法指令起始位置
/* followed by optional u2 padding */
/* followed by try_item[triesSize] */
/* followed by uleb128 handlersSize */
/* followed by catch_handler_item[handlersSize] */
};
/*
* Direct-mapped "encoded_array".
*
* NOTE: this structure is byte-aligned.
*/
struct DexEncodedArray {
u1 array[1]; /* data in encoded_array format */
};

DEX解析

文件结构部分

主函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#include <windows.h>
#include "DexParse.h"
static CONST PCHAR gProgName = "HelloWorld.dex";
struct FileMap {
HANDLE hFile;
HANDLE hMap;
LPVOID lpBase;
};
VOID dexOpenAndMap(CONST PCHAR fileName, FileMap* map) { //打开文件 创建文件映射
map->hFile = CreateFile(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
map->hMap = CreateFileMapping(map->hFile, NULL, PAGE_READONLY, 0, 0, NULL);
map->lpBase = MapViewOfFile(map->hMap, FILE_MAP_READ, 0, 0, NULL);
return;
};
VOID sysReleaseShmem(FileMap* map) { //关闭文件 关闭文件映射
UnmapViewOfFile(map->lpBase);
CloseHandle(map->hMap);
CloseHandle(map->hFile);
return;
};
INT process(CONST PCHAR fileName) {
FileMap map;
dexOpenAndMap(fileName, &map);
if (map.lpBase)
sysReleaseShmem(&map);
return 0;
};
INT main(INT argc, CONST PCHAR argv[]) {
process(gProgName);
return 0;
};

下面程序直接从Android10源码复制出来,自己改。解析头部:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
DexFile* dexFileParse(const u1* data, size_t length, int flags) {
DexFile* pDexFile = NULL;
const DexHeader* pHeader;
const u1* magic;
int result = -1;
if (length < sizeof(DexHeader)) { //检查文件长度
ALOGE("too short to be a valid .dex");
goto bail; /* bad file format */
};
pDexFile = (DexFile*)malloc(sizeof(DexFile)); //用dexFileFree释放
if (pDexFile == NULL)
goto bail; /* alloc failure */
memset(pDexFile, 0, sizeof(DexFile));
/*
* Peel off the optimized header.
*/
if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
magic = data;
if (memcmp(magic + 4, DEX_OPT_MAGIC_VERS, 4) != 0) {
ALOGE("bad opt version (0x%02x %02x %02x %02x)", magic[4], magic[5], magic[6], magic[7]);
goto bail;
};
pDexFile->pOptHeader = (const DexOptHeader*)data;
ALOGV("Good opt header, DEX offset is %d, flags=0x%02x", pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
/* parse the optimized dex file tables */
if (!dexParseOptData(data, length, pDexFile))
goto bail;
/* ignore the opt header and appended data from here on out */
data += pDexFile->pOptHeader->dexOffset;
length -= pDexFile->pOptHeader->dexOffset;
if (pDexFile->pOptHeader->dexLength > length) {
ALOGE("File truncated? stored len=%d, rem len=%d", pDexFile->pOptHeader->dexLength, (int)length);
goto bail;
};
length = pDexFile->pOptHeader->dexLength;
};
dexFileSetupBasicPointers(pDexFile, data); //设置DEX文件格式指针 上文有源码
pHeader = pDexFile->pHeader;
if (!dexHasValidMagic(pHeader)) //检查文件有效性
goto bail;
/*
* Verify the checksum(s). This is reasonably quick, but does require
* touching every byte in the DEX file. The base checksum changes after
* byte-swapping and DEX optimization.
*/
if (flags & kDexParseVerifyChecksum) {
u4 adler = dexComputeChecksum(pHeader);
if (adler != pHeader->checksum) {
ALOGE("ERROR: bad checksum (%08x vs %08x)", adler, pHeader->checksum);
if (!(flags & kDexParseContinueOnError))
goto bail;
}
else
ALOGV("+++ adler32 checksum (%08x) verified", adler);
const DexOptHeader* pOptHeader = pDexFile->pOptHeader;
if (pOptHeader != NULL) {
adler = dexComputeOptChecksum(pOptHeader);
if (adler != pOptHeader->checksum) {
ALOGE("ERROR: bad opt checksum (%08x vs %08x)", adler, pOptHeader->checksum);
if (!(flags & kDexParseContinueOnError))
goto bail;
}
else
ALOGV("+++ adler32 opt checksum (%08x) verified", adler);
};
};
/*
* Verify the SHA-1 digest. (Normally we don't want to do this --
* the digest is used to uniquely identify the original DEX file, and
* can't be computed for verification after the DEX is byte-swapped
* and optimized.)
*/
if (kVerifySignature) {
unsigned char sha1Digest[kSHA1DigestLen];
const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) + kSHA1DigestLen;
dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
char tmpBuf1[kSHA1DigestOutputLen];
char tmpBuf2[kSHA1DigestOutputLen];
ALOGE("ERROR: bad SHA1 digest (%s vs %s)", dexSHA1DigestToStr(sha1Digest, tmpBuf1), dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
if (!(flags & kDexParseContinueOnError))
goto bail;
}
else
ALOGV("+++ sha1 digest verified");
};
if (pHeader->fileSize != length) {
ALOGE("ERROR: stored file size (%d) != expected (%d)", (int)pHeader->fileSize, (int)length);
if (!(flags & kDexParseContinueOnError))
goto bail;
};
if (pHeader->classDefsSize == 0) {
ALOGE("ERROR: DEX file has no classes in it, failing");
goto bail;
};
/*
* Success!
*/
result = 0;
bail:
if (result != 0 && pDexFile != NULL) {
dexFileFree(pDexFile);
pDexFile = NULL;
};
return pDexFile;
};
void dexFileFree(DexFile* pDexFile) {
if (pDexFile == NULL)
return;
free(pDexFile);
return;
};

检查文件有效性:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
bool dexHasValidMagic(const DexHeader* pHeader) {
const u1* magic = pHeader->magic;
const u1* version = &magic[4];
if (memcmp(magic, DEX_MAGIC, 4) != 0) {
ALOGE("ERROR: unrecognized magic number (%02x %02x %02x %02x)", magic[0], magic[1], magic[2], magic[3]);
return false;
};
if ((memcmp(version, DEX_MAGIC_VERS, 4) != 0) && (memcmp(version, DEX_MAGIC_VERS_API_13, 4) != 0) && (memcmp(version, DEX_MAGIC_VERS_37, 4) != 0) && (memcmp(version, DEX_MAGIC_VERS_38, 4) != 0) && (memcmp(version, DEX_MAGIC_VERS_39, 4) != 0)) {
/*
* Magic was correct, but this is an unsupported older or
* newer format variant.
*/
ALOGE("ERROR: unsupported dex version (%02x %02x %02x %02x)", version[0], version[1], version[2], version[3]);
return false;
};
return true;
};

接下来处理DEX文件格式,直接抄袭dumpFileHeader

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
void dumpFileHeader(const DexFile* pDexFile) {
const DexOptHeader* pOptHeader = pDexFile->pOptHeader;
const DexHeader* pHeader = pDexFile->pHeader;
char sanitized[sizeof(pHeader->magic) * 2 + 1];
printf("Opened '%s', DEX version '%.3s'\n", fileName,pDexFile->pHeader->magic + 4);
assert(sizeof(pHeader->magic) == sizeof(pOptHeader->magic));
if (pOptHeader != NULL) {
printf("Optimized DEX file header:\n");
asciify(sanitized, pOptHeader->magic, sizeof(pOptHeader->magic));
printf("magic : '%s'\n", sanitized);
printf("dex_offset : %d (0x%06x)\n", pOptHeader->dexOffset, pOptHeader->dexOffset);
printf("dex_length : %d\n", pOptHeader->dexLength);
printf("deps_offset : %d (0x%06x)\n", pOptHeader->depsOffset, pOptHeader->depsOffset);
printf("deps_length : %d\n", pOptHeader->depsLength);
printf("opt_offset : %d (0x%06x)\n", pOptHeader->optOffset, pOptHeader->optOffset);
printf("opt_length : %d\n", pOptHeader->optLength);
printf("flags : %08x\n", pOptHeader->flags);
printf("checksum : %08x\n", pOptHeader->checksum);
printf("\n");
};
printf("DEX file header:\n");
asciify(sanitized, pHeader->magic, sizeof(pHeader->magic));
printf("magic : '%s'\n", sanitized);
printf("checksum : %08x\n", pHeader->checksum);
printf("signature : %02x%02x...%02x%02x\n", pHeader->signature[0], pHeader->signature[1], pHeader->signature[kSHA1DigestLen - 2], pHeader->signature[kSHA1DigestLen - 1]);
printf("file_size : %d\n", pHeader->fileSize);
printf("header_size : %d\n", pHeader->headerSize);
printf("link_size : %d\n", pHeader->linkSize);
printf("link_off : %d (0x%06x)\n", pHeader->linkOff, pHeader->linkOff);
printf("string_ids_size : %d\n", pHeader->stringIdsSize);
printf("string_ids_off : %d (0x%06x)\n", pHeader->stringIdsOff, pHeader->stringIdsOff);
printf("type_ids_size : %d\n", pHeader->typeIdsSize);
printf("type_ids_off : %d (0x%06x)\n", pHeader->typeIdsOff, pHeader->typeIdsOff);
printf("proto_ids_size : %d\n", pHeader->protoIdsSize);
printf("proto_ids_off : %d (0x%06x)\n", pHeader->protoIdsOff, pHeader->protoIdsOff);
printf("field_ids_size : %d\n", pHeader->fieldIdsSize);
printf("field_ids_off : %d (0x%06x)\n", pHeader->fieldIdsOff, pHeader->fieldIdsOff);
printf("method_ids_size : %d\n", pHeader->methodIdsSize);
printf("method_ids_off : %d (0x%06x)\n", pHeader->methodIdsOff, pHeader->methodIdsOff);
printf("class_defs_size : %d\n", pHeader->classDefsSize);
printf("class_defs_off : %d (0x%06x)\n", pHeader->classDefsOff, pHeader->classDefsOff);
printf("data_size : %d\n", pHeader->dataSize);
printf("data_off : %d (0x%06x)\n", pHeader->dataOff, pHeader->dataOff);
printf("\n");
return;
};

解析DexMapList:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
DEX_INLINE CONST DexMapList* dexGetMap(CONST DexFile* pDexFile) {
u4 mapOff = pDexFile->pHeader->mapOff;
if (mapOff == 0)
return NULL;
else
return (CONST DexMapList*)(pDexFile->baseAddr + mapOff);
};
VOID PrintDexMapList(DexFile* pDexFile) {
CONST DexMapList* pDexMapList = dexGetMap(pDexFile);
for (u4 i = 0; i < pDexMapList->size; i++) {
switch (pDexMapList->list[i].type) {
case 0x0000:printf("kDexTypeHeaderItem");break;
...
};
...
};
return;
};

StringIds解析:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
VOID PrintStringIds(DexFile* pDexFile) {
for (u4 i = 0; i < pDexFile->pHeader->stringIdsSize; i++)
printf("%d.%s\r\n", i, dexStringById(pDexFile, i));
return;
};
DEX_INLINE CONST PCHAR dexGetStringData(CONST DexFile* pDexFile, CONST DexStringId* pStringId) {
CONST u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
while (*(ptr++) > 0x7f);
return (CONST PCHAR)ptr;
};
DEX_INLINE CONST DexStringId* dexGetStringId(CONST DexFile* pDexFile, u4 idx) {
assert(idx < pDexFile->pHeader->stringIdsSize);
return &pDexFile->pStringIds[idx];
};
DEX_INLINE CONST PCHAR dexStringById(CONST DexFile* pDexFile, u4 idx) {
CONST DexStringId* pStringId = dexGetStringId(pDexFile, idx);
return dexGetStringData(pDataFile, pStringId);
};

解析TypeIds:

1
2
3
4
5
6
7
8
9
10
11
12
13
VOID PrintTypeIds(DexFile* pDexFile) {
for (u4 i = 0; i < pDexFile->pHeader->typeIdsSize; i++)
printf("%d %s\r\n", i, dexStringByTypeIdx(pDexFile, i));
return;
};
DEX_INLINE CONST DexTypeId* dexGetTypeId(CONST DexFile* pDexFile, u4 idx) {
assert(idx < pDexFile->pHeader->typeIdsSize);
return &pDexFile->pTypeIds[idx];
};
DEX_INLINE CONST PCHAR dexStringByTypeIdx(CONST DexFile* pDexFile, u4 idx) {
CONST DexType* typeI = dexGetTypeId(pDexFile, idx);
return dexStringById(pDexFile, typeId->descriptorIdx);
};

解析ProtoIds:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
VOID PrintProtoIds(DexFile* pDexFile) {
for (u4 i = 0; i < pDexFile->pHeader->protoIdsSize; i++) {
CONST DexProtoId* pDexProtoId = dexGetProtoId(pDexFile, i);
printf("%08X %08X %08X\r\n", pDexProtoId->shortyIdx, pDexProtoId->returnTypeIdx, pDexProtoId->parametersOff); //输出原始数据
printf("%s %s\r\n", dexStringById(pDexFile, pDexProtoId->shortyIdx), dexStringByTypeIdx(pDexFile, pDexProtoId->returnTypeIdx)); //输出对应TypeId
CONST DexTypeList* pDexTypeList = dexGetProtoParameters(pDexFile, pDexProtoId); //获得参数列表
u4 num = pDexTypeList != NULL ? pDexTypeList->size : 0;
for (u4 j = 0; j < num; j++) //参数
printf("%s ", dexStringByTypeIdx(pDexFile, pDexTypeList->list[j].typeIdx));
printf("\r\n");
};
for (u4 i = 0; i < pDexFile - pHeader->protoIdsSize; i++) { //还原解析数据
CONST DexProtoId* pDexProtoId = dexGetProtoId(pDexFile, i);
printf("%s(", dexStringByTypeIdx(pDexFile, pDexProtoId->returnTypeIdx));
CONST DexTypeList* pDexTypeList = dexGetProtoParameters(pDexFile, pDexProtoId); //获得参数列表
u4 num = pDexTypeList != NULL ? pDexTypeList->size : 0;
for (u4 j = 0; j < num; j++) //输出参数
printf("%s\b,", dexStringByTypeIdx(pDexFile, pDexTypeList->list[j].typeIdx));
if (num == 0)
printf(");\r\n");
else
printf("\b\b);\r\n");
};
return;
};
DEX_INLINE CONST DexProtoId* dexGetProtoId(CONST DexFile* pDexFile, u4 idx) {
assert(idx < pDexFile->pHeader->protoIdsSize);
return &pDexFile->pProtoIds[idx];
};
DEX_INLINE CONST DexTypeList* dexGetProtoParamenters(CONST DexFile* pDexFile, CONST DexProtoId* pProtoId) {
if (pProtoId->parametersOff == 0)
return NULL;
return (CONST DexTypeList*)(pDexFile->baseAddr + pProtoId->paramtersOff);
};
static const char* primitiveTypeLabel(char typeChar) { //基础数据类型
switch (typeChar) {
case 'B': return "byte";
case 'C': return "char";
case 'D': return "double";
case 'F': return "float";
case 'I': return "int";
case 'J': return "long";
case 'S': return "short";
case 'V': return "void";
case 'Z': return "boolean";
default:
return "UNKNOWN";
};
};
static char* descriptorToDot(const char* str) { //完全限定名
int targetLen = strlen(str);
int offset = 0;
int arrayDepth = 0;
char* newStr;
/* strip leading [s; will be added to end */
while (targetLen > 1 && str[offset] == '[') {
offset++;
targetLen--;
};
arrayDepth = offset;
if (targetLen == 1) {
/* primitive type */
str = primitiveTypeLabel(str[offset]);
offset = 0;
targetLen = strlen(str);
}
else {
/* account for leading 'L' and trailing ';' */
if (targetLen >= 2 && str[offset] == 'L' &&
str[offset + targetLen - 1] == ';') {
targetLen -= 2;
offset++;
};
};
newStr = (char*)malloc(targetLen + arrayDepth * 2 + 1);
/* copy class name over */
int i;
for (i = 0; i < targetLen; i++) {
char ch = str[offset + i];
newStr[i] = (ch == '/') ? '.' : ch;
};
/* add the appropriate number of brackets for arrays */
while (arrayDepth-- > 0) {
newStr[i++] = '[';
newStr[i++] = ']';
};
newStr[i] = '\0';
assert(i == targetLen + arrayDepth * 2);
return newStr;
};

解析MethodIds:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
CONST DexMethodId* dexGetMethodId(CONST DexFile* pDexFile, u4 idx) {
return &pDexFile->pMethodIds[idx];
};
VOID PrintMethodIds(DexFile* pDexFile) {
for (u4 i = 0; i < pDexFile->pHeader->methodIdsSize; i++) {
CONST DexMethodId* pDexMethodId = dexGetMethodId(pDexFile, i);
printf("%04X %04X %08X\r\n", pDexMethodId->classIdx, pDexMethodId->protoIdx, pDexMethodId->nameIdx);
printf("%s %s\r\n", dexStringByTypeIdx(pDexFile, pDexMethodId->classIdx), dexStringById(pDexFile, pDexMethodId->nameIdx));
};
for (u4 i = 0; i < pDexFile->pHeader->methodIdSize; i++) {
CONST DexMethodId* pDexMethodId = dexGetMethodId(pDexFile, i);
CONST DexProtoId* pDexProtoId = dexGetProtoId(pDexFile, pDexMethodId->protoIdx);
printf("%s %s\b.%s(", dexStringByTypeIdx(pDexFile, pDexProtoId->returnTypeIdx), dexStringByTypeIdx(pDexFile, pDexMethodId->classIdx), dexStringById(pDexFile, pDexMethodId->nameIdx));
CONST DexTypeList* pDexTypeList = dexGetProtoParameters(pDexFile, pDexProtoId);
u4 num = pDexTypeList != NULL ? pDexTypeList->size : 0;
for (u4 j = 0; j < num; j++)
printf("%s\b", dexStringByTypeIdx(pDexFile, pDexTypeList->list[j].typeIdx));
if (num == 0)
printf(");");
else
printf("\b\b");
};
for (u4 i = 0; i < pDexFile->pHeader->methodIdsSize; i++) {
CONST DexMethodId* pDexMethodId = dexGetMethodId(pDexFile, i);
CONST DexProtoId* pDexProtoId = dexGetProtoId(pDexFile, pDexMethodId->protoIdx);
printf("%s %s.%s(", descriptorToDot(dexStringByTypeIdx(pDexFile, pDexProtoId->returnTypeIdx)), descriptorToDot(dexStringByTypeIdx(pDexFile, pDexMethodId->classIdx)), descriptorToDot(dexStringById(pDexFile, pDexMethodId->nameIdx)));
CONST DexTypeList* pDexTypeList = dexGetProtoParameters(pDexFile, pDexProtoId);
u4 num = pDexTypeList != NULL ? pDexTypeList->size : 0;
for (u4 j = 0; j < num; j++)
printf("%s, ", descriptorToDot(dexStringByTypeIdx(pDexFile, pDexTypeList->list[j].typeIdx)));
if (num == 0)
printf(");");
else
printf("\b\b);");
};
return;
};

解析DexClassDef:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
VOID PrintClassDef(DexFile* pDexFile) {
for (u4 i = 0; i < pDexFile->pHeader->classDefsSize; i++) {
CONST DexClassDef* pDexClassDef = dexGetClassDef(pDexFile, i);
printf("SourceFile: %s\r\n", dexGetSourceFile(pDexFile, pDexClassDef));
printf("class %s\b externs %s\b {\r\n", dexGetClassDescriptor(pDexFile, pDexClassDef), dexGetSuperClassDescriptor(pDexFile, pDexClassDef));
CONST u1* pu1 = dexGetClassData(pDexFile, pDexClassDef);
DexClassData* pDexClassData = dexReadAndVerifyClassData(&pu1, NULL);
for (u4 z = 0; z < pDexClassData->header.instanceFieldsSize; z++) {
CONST DexFieldId* pDexField = dexGetFieldId(pDexFile, pDexClassData->instanceFields[z].fieldIdx);
printf("%s %s\r\n", dexStringByTypeIdx(pDexFile, pDexField->typeIdx), dexStringById(pDexFile, pDexField->nameIdx));
};
for (u4 z = 0; z < pDexClassData->header.directMethodsSize; z++) {
CONST DexMethodId* pDexMethod = dexGetMethodId(pDexFile, pDexClassData->directMethods[z].methodIdx);
CONST DexProtoId* pDexProtoId = dexGetProtoId(pDexFile, pDexMethod->protoIdx);
printf("\t%s %s\b.%s(", dexStringByTypeIdx(pDexFile, pDexProtoId->returnTypeIdx), dexStringByTypeIdx(pDexFile, pDexMethod->classIdx), dexStringById(pDexFile, pDexMethod->nameIdx));
CONST DexTypeList* pDexTypeList = dexGetProtoParameters(pDexFile, pDexProtoId);
u4 num = pDexTypeList != NULL ? pDexTypeList->size : 0;
for (u4 k = 0; k < num; k++)
printf("%s\b v%d, ", dexStringByTypeIdx(pDexFile, pDexTypeList->list[k].typeIdx), k);
if (num == 0)
printf(")");
else
printf("\b\b");
printf("{\r\n");
CONST DexCode* pDexCode = dexGetCode(pDexFile, (CONST DexMethod*) & pDexClassData->directMethods[z]);
printf("register:%d insnsSize:%d insSize:%d\r\n", pDexCode->registerSize, pDexCode->insSize, pDexCode->outsSize);
for (u2 x = 0; x < pDexCode->insnsSize; x++)
printf("%04X ", pDexCode->insns[x]);
printf("\t}\r\n\r\n");
};
printf("}\r\n");
};
return;
};

字节码部分

本节参考有https://source.android.google.cn/docs/core/runtime/dalvik-bytecode?hl=zh-cnhttps://source.android.google.cn/docs/core/runtime/instruction-formats?hl=zh-cn

例如其中init方法时默认的构造方法,字节码如下:

1
1070 0003 0000 000E

从第一个16位字开始,按小端序排列。例如先取1070中的70。其中70表示invoke-direct指令,格式为35c,意思是该指令由3个16位字组成,最多用5个寄存器,c类常量池索引,格式如下:

1
2
3
4
5
6
7
8
A|G|op BBBB F|E|D|C
[A=5] op {vC,vD,vE,vF,vG},meth@BBBB
[A=5] op {vC,vD,vE,vF,vG},type@BBBB
[A=4] op {vC,vD,vE,vF},kind@BBBB
[A=3] op {vC,vD,vE},kind@BBBB
[A=2] op {vC,vD},kind@BBBB
[A=1] op {vC},kind@BBBB <-A=1是这个 kind@0003 0000分别为vC/vD/vE/vF 这里vC为v0
[A=0] op {},kind@BBBB

对于最后的000E,0E为return-void指令,格式10x,x为无额外数据,格式00|op。

此外例如62代表sget-object,格式21c;1A代表const-string,格式21c;6E为invoke-virtual,格式35c。

DexDump中反汇编函数dumpBytecodes直接使用即可:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
/*
* Dump a bytecode disassembly.
*/
void dumpBytecodes(DexFile* pDexFile, const DexMethod* pDexMethod) {
const DexCode* pCode = dexGetCode(pDexFile, pDexMethod);
const u2* insns;
int insnIdx; //字节码当前位置
FieldMethodInfo methInfo;
int startAddr;
char* className = NULL;
assert(pCode->insnsSize > 0);
insns = pCode->insns; //该方法字节码起始地址
methInfo.classDescriptor = methInfo.name = methInfo.signature = NULL;
getMethodInfo(pDexFile, pDexMethod->methodIdx, &methInfo);
startAddr = ((u1*)pCode - pDexFile->baseAddr);
className = descriptorToDot(methInfo.classDescriptor);
printf("%06x:\t|[%06x] %s.%s:%s\n", startAddr, startAddr, className, methInfo.name, methInfo.signature);
free((void*)methInfo.signature);
insnIdx = 0;
while (insnIdx < (int)pCode->insnsSize) { //循环解析字节码
int insnWidth;
DecodedInstruction decInsn;
u2 instr;
/*
* Note: This code parallels the function
* dexGetWidthFromInstruction() in InstrUtils.c, but this version
* can deal with data in either endianness.
*
* TODO: Figure out if this really matters, and possibly change
* this to just use dexGetWidthFromInstruction().
*/
instr = get2LE((const u1*)insns);
if (instr == kPackedSwitchSignature)
insnWidth = 4 + get2LE((const u1*)(insns + 1)) * 2;
else if (instr == kSparseSwitchSignature)
insnWidth = 2 + get2LE((const u1*)(insns + 1)) * 4;
else if (instr == kArrayDataSignature) {
int width = get2LE((const u1*)(insns + 1));
int size = get2LE((const u1*)(insns + 2)) | (get2LE((const u1*)(insns + 3)) << 16);
// The plus 1 is to round up for odd size and width.
insnWidth = 4 + ((size * width) + 1) / 2;
}
else {
Opcode opcode = dexOpcodeFromCodeUnit(instr); //查表获取字节码对应指令枚举
insnWidth = dexGetWidthFromOpcode(opcode); //查表获取指令对应长度
if (insnWidth == 0) {
fprintf(stderr, "GLITCH: zero-width instruction at idx=0x%04x\n", insnIdx);
break;
};
};
dexDecodeInstruction(insns, &decInsn); //解码insns指向的字节码 并将解码后内容填充至decInsn中
dumpInstruction(pDexFile, pCode, insnIdx, insnWidth, &decInsn); //输出
insns += insnWidth; //下一条指令起始地址
insnIdx += insnWidth; //已解析字节码个数
};
free(className);
return;
};
VOID PrintClassDefNew(DexFile* pDexFile) {
for (u4 i = 0; i < pDexFile->pHeader->classDefsSize; i++) {
CONST DexClassDef* pDexClassDef = dexGetClassDef(pDexFile, i);
printf("%s\r\n", dexGetSourceFile(pDexFile, pDexClassDef)); //类所属源文件
};
//类和父类、类中属性略
for (u4 z = 0; z < pDexClassData->header.directMethodsSize; z++) { //类中方法
CONST DexTypeList* pDexTypeList = dexGetProtoParameters(pDexFile, pDexProtoId); //参数列表
u4 num = pDexTypeList != NULL ? pDexTypeList->size : 0;
//输出参数、方法中具体数据略
for (u2 x = 0; x < pDexCode->insnSize; x++) //字节码
printf("%04X ", pDexCode->insns[x]);
dumpBytecodes(pDexFile, (CONST DexMethod*) & pDexClassData->directMethods[z]);
printf("\r\n\t}\r\n\r\n");
};
printf("}\r\n");
return;
};

其中上述重要流程如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
/*
* Get the DexCode for a DexMethod. Returns NULL if the class is native
* or abstract.
*/
DEX_INLINE const DexCode* dexGetCode(const DexFile* pDexFile, const DexMethod* pDexMethod) { //获取该DEX文件当前被解析方法的信息
if (pDexMethod->codeOff == 0)
return NULL;
return (const DexCode*)(pDexFile->baseAddr + pDexMethod->codeOff);
};
/*
* Holds the contents of a decoded instruction.
*/
struct DecodedInstruction {
u4 vA;
u4 vB;
u8 vB_wide; /* for kFmt51l */
u4 vC;
u4 arg[5]; /* vC/D/E/F/G in invoke or filled-new-array */
Opcode opcode;
InstructionIndexType indexType;
};
/*
* Decode the instruction pointed to by "insns".
*
* Fills out the pieces of "pDec" that are affected by the current
* instruction. Does not touch anything else.
*/
void dexDecodeInstruction(const u2* insns, DecodedInstruction* pDec) {
u2 inst = *insns;
Opcode opcode = dexOpcodeFromCodeUnit(inst); //获取对应指令
InstructionFormat format = dexGetFormatFromOpcode(opcode); //获取对应指令格式
pDec->opcode = opcode; //OP对应指令
pDec->indexType = dexGetIndexTypeFromOpcode(opcode); //获取指定操作码指令索引类型
switch (format) {
case kFmt10x: // op
/* nothing to do; copy the AA bits out for the verifier */
pDec->vA = INST_AA(inst);
break;
case kFmt12x: // op vA, vB
pDec->vA = INST_A(inst);
pDec->vB = INST_B(inst);
break;
case kFmt11n: // op vA, #+B
pDec->vA = INST_A(inst);
pDec->vB = (s4)(INST_B(inst) << 28) >> 28; // sign extend 4-bit value
break;
case kFmt11x: // op vAA
pDec->vA = INST_AA(inst);
break;
case kFmt10t: // op +AA
pDec->vA = (s1)INST_AA(inst); // sign-extend 8-bit value
break;
case kFmt20t: // op +AAAA
pDec->vA = (s2)FETCH(1); // sign-extend 16-bit value
break;
case kFmt20bc: // [opt] op AA, thing@BBBB
case kFmt21c: // op vAA, thing@BBBB
case kFmt22x: // op vAA, vBBBB
pDec->vA = INST_AA(inst);
pDec->vB = FETCH(1);
break;
case kFmt21s: // op vAA, #+BBBB
case kFmt21t: // op vAA, +BBBB
pDec->vA = INST_AA(inst);
pDec->vB = (s2)FETCH(1); // sign-extend 16-bit value
break;
case kFmt21h: // op vAA, #+BBBB0000[00000000]
pDec->vA = INST_AA(inst);
/*
* The value should be treated as right-zero-extended, but we don't
* actually do that here. Among other things, we don't know if it's
* the top bits of a 32- or 64-bit value.
*/
pDec->vB = FETCH(1);
break;
case kFmt23x: // op vAA, vBB, vCC
pDec->vA = INST_AA(inst);
pDec->vB = FETCH(1) & 0xff;
pDec->vC = FETCH(1) >> 8;
break;
case kFmt22b: // op vAA, vBB, #+CC
pDec->vA = INST_AA(inst);
pDec->vB = FETCH(1) & 0xff;
pDec->vC = (s1)(FETCH(1) >> 8); // sign-extend 8-bit value
break;
case kFmt22s: // op vA, vB, #+CCCC
case kFmt22t: // op vA, vB, +CCCC
pDec->vA = INST_A(inst);
pDec->vB = INST_B(inst);
pDec->vC = (s2)FETCH(1); // sign-extend 16-bit value
break;
case kFmt22c: // op vA, vB, thing@CCCC
case kFmt22cs: // [opt] op vA, vB, field offset CCCC
pDec->vA = INST_A(inst);
pDec->vB = INST_B(inst);
pDec->vC = FETCH(1);
break;
case kFmt30t: // op +AAAAAAAA
pDec->vA = FETCH_u4(1); // signed 32-bit value
break;
case kFmt31t: // op vAA, +BBBBBBBB
case kFmt31c: // op vAA, string@BBBBBBBB
pDec->vA = INST_AA(inst);
pDec->vB = FETCH_u4(1); // 32-bit value
break;
case kFmt32x: // op vAAAA, vBBBB
pDec->vA = FETCH(1);
pDec->vB = FETCH(2);
break;
case kFmt31i: // op vAA, #+BBBBBBBB
pDec->vA = INST_AA(inst);
pDec->vB = FETCH_u4(1); // signed 32-bit value
break;
case kFmt35c: // op {vC, vD, vE, vF, vG}, thing@BBBB
case kFmt35ms: // [opt] invoke-virtual+super
case kFmt35mi: // [opt] inline invoke
{
/*
* Note that the fields mentioned in the spec don't appear in
* their "usual" positions here compared to most formats. This
* was done so that the field names for the argument count and
* reference index match between this format and the corresponding
* range formats (3rc and friends).
*
* Bottom line: The argument count is always in vA, and the
* method constant (or equivalent) is always in vB.
*/
u2 regList;
int count;
pDec->vA = INST_B(inst); // This is labeled A in the spec.
pDec->vB = FETCH(1);
regList = FETCH(2);
count = pDec->vA;
/*
* Copy the argument registers into the arg[] array, and
* also copy the first argument (if any) into vC. (The
* DecodedInstruction structure doesn't have separate
* fields for {vD, vE, vF, vG}, so there's no need to make
* copies of those.) Note that cases 5..2 fall through.
*/
switch (count) {
case 5:
{
if (format == kFmt35mi) {
/* A fifth arg is verboten for inline invokes. */
ALOGW("Invalid arg count in 35mi (5)");
goto bail;
};
/*
* Per note at the top of this format decoder, the
* fifth argument comes from the A field in the
* instruction, but it's labeled G in the spec.
*/
pDec->arg[4] = INST_A(inst);
FALLTHROUGH_INTENDED;
};
case 4: pDec->arg[3] = (regList >> 12) & 0x0f; FALLTHROUGH_INTENDED;
case 3: pDec->arg[2] = (regList >> 8) & 0x0f; FALLTHROUGH_INTENDED;
case 2: pDec->arg[1] = (regList >> 4) & 0x0f; FALLTHROUGH_INTENDED;
case 1: pDec->vC = pDec->arg[0] = regList & 0x0f; break;
case 0: break; // Valid, but no need to do anything.
default:
ALOGW("Invalid arg count in 35c/35ms/35mi (%d)", count);
goto bail;
};
};
break;
case kFmt3rc: // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB
case kFmt3rms: // [opt] invoke-virtual+super/range
case kFmt3rmi: // [opt] execute-inline/range
pDec->vA = INST_AA(inst);
pDec->vB = FETCH(1);
pDec->vC = FETCH(2);
break;
case kFmt51l: // op vAA, #+BBBBBBBBBBBBBBBB
pDec->vA = INST_AA(inst);
pDec->vB_wide = FETCH_u4(1) | ((u8)FETCH_u4(3) << 32);
break;
case kFmt45cc:
{
// AG op BBBB FEDC HHHH
pDec->vA = INST_B(inst); // This is labelled A in the spec.
pDec->vB = FETCH(1); // vB meth@BBBB
u2 fedc = FETCH(2);
pDec->vC = fedc & 0xf;
pDec->arg[0] = (fedc >> 4) & 0xf; // vD
pDec->arg[1] = (fedc >> 8) & 0xf; // vE
pDec->arg[2] = (fedc >> 12); // vF
pDec->arg[3] = INST_A(inst); // vG
pDec->arg[4] = FETCH(3); // vH proto@HHHH
};
break;
case kFmt4rcc:
{
// AA op BBBB CCCC HHHH
pDec->vA = INST_AA(inst);
pDec->vB = FETCH(1);
pDec->vC = FETCH(2);
pDec->arg[4] = FETCH(3); // vH proto@HHHH
};
break;
default:
ALOGW("Can't decode unexpected format %d (op=%d)", format, opcode);
assert(false);
break;
};
bail:
;
};
/*
* Return the Opcode for a given raw opcode code unit (which may
* include data payload). The packed index is a zero-based index which
* can be used to point into various opcode-related tables. The Dalvik
* opcode space is inherently sparse, in that the opcode unit is 16
* bits wide, but for most opcodes, eight of those bits are for data.
*/
DEX_INLINE Opcode dexOpcodeFromCodeUnit(u2 codeUnit) {
/*
* This will want to become table-driven should the opcode layout
* get more complicated.
*
* Note: This has to match the corresponding code in opcode-gen, so
* that data tables get generated in a consistent way.
*/
int lowByte = codeUnit & 0xff;
return (Opcode)lowByte;
};
/*
* Enumeration of all Dalvik opcodes, where the enumeration value
* associated with each is the corresponding packed opcode number.
* This is different than the opcode value from the Dalvik bytecode
* spec for opcode values >= 0xff; see dexOpcodeFromCodeUnit() below.
*
* A note about the "breakpoint" opcode. This instruction is special,
* in that it should never be seen by anything but the debug
* interpreter. During debugging it takes the place of an arbitrary
* opcode, which means operations like "tell me the opcode width so I
* can find the next instruction" aren't possible. (This is
* correctable, but probably not useful.)
*/
enum Opcode {
// BEGIN(libdex-opcode-enum); GENERATED AUTOMATICALLY BY opcode-gen
OP_NOP = 0x00,
OP_MOVE = 0x01,
//...
OP_CONST_METHOD_TYPE = 0xff,
// END(libdex-opcode-enum)
};
/*
* Return the instruction format for the specified opcode.
*/
DEX_INLINE InstructionFormat dexGetFormatFromOpcode(Opcode opcode) {
assert((u4)opcode < kNumPackedOpcodes);
return (InstructionFormat)gDexOpcodeInfo.formats[opcode];
};
/*
* Return the instruction index type for the specified opcode.
*/
DEX_INLINE InstructionIndexType dexGetIndexTypeFromOpcode(Opcode opcode) {
assert((u4)opcode < kNumPackedOpcodes);
return (InstructionIndexType)gDexOpcodeInfo.indexTypes[opcode];
};
/*
* Struct that includes a pointer to each of the opcode information
* tables.
*
* Note: We use "u1*" here instead of the names of the enumerated
* types to guarantee that elements don't use much space. We hold out
* hope for a standard way to indicate the size of an enumerated type
* that works for both C and C++, but in the mean time, this will
* suffice.
*/
struct InstructionInfoTables {
u1* formats; /* InstructionFormat elements 指令格式*/
u1* indexTypes; /* InstructionIndexType elements 指令操作数索引类型*/
OpcodeFlags* flags;
InstructionWidth* widths; //指令长度
};
/*
* Table that maps each opcode to the full width of instructions that
* use that opcode, in (16-bit) code units. Unimplemented opcodes as
* well as the "breakpoint" opcode have a width of zero.
*/
static InstructionWidth gInstructionWidthTable[kNumPackedOpcodes] = {
// BEGIN(libdex-widths); GENERATED AUTOMATICALLY BY opcode-gen
1, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 1, 1,
//...
// END(libdex-widths)
};
/*
* Table that maps each opcode to the instruction format associated
* that opcode.
*/
static u1 gInstructionFormatTable[kNumPackedOpcodes] = {
// BEGIN(libdex-formats); GENERATED AUTOMATICALLY BY opcode-gen
kFmt10x, kFmt12x, kFmt22x, kFmt32x, kFmt12x, kFmt22x, kFmt32x,
//...
// END(libdex-formats)
};
/*
* Table that maps each opcode to the index type implied by that
* opcode.
*/
static u1 gInstructionIndexTypeTable[kNumPackedOpcodes] = {
// BEGIN(libdex-index-types); GENERATED AUTOMATICALLY BY opcode-gen
kIndexNone, kIndexNone, kIndexNone,
//...
// END(libdex-index-types)
};
/*
* Global InstructionInfoTables struct.
*/
InstructionInfoTables gDexOpcodeInfo = {
gInstructionFormatTable,
gInstructionIndexTypeTable,
gOpcodeFlagsTable,
gInstructionWidthTable
};
/*
* Handy macros for helping decode instructions.
*/
#define FETCH(_offset) (insns[(_offset)])
#define FETCH_u4(_offset) (fetch_u4_impl((_offset), insns))
#define INST_A(_inst) (((u2)(_inst) >> 8) & 0x0f)
#define INST_B(_inst) ((u2)(_inst) >> 12)
#define INST_AA(_inst) ((_inst) >> 8)

还有在dumpBytecodes中调用的dexDumpInstruction函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
/*
* Dump a single instruction.
*/
void dumpInstruction(DexFile* pDexFile, const DexCode* pCode, int insnIdx,int insnWidth, const DecodedInstruction* pDecInsn) {
const u2* insns = pCode->insns; //insns指向字节码
int i;
// Address of instruction (expressed as byte offset).
printf("%06zx:", ((u1*)insns - pDexFile->baseAddr) + insnIdx * 2);
for (i = 0; i < 8; i++) //输出字节码
if (i < insnWidth) {
if (i == 7)
printf(" ... ");
else {
/* print 16-bit value in little-endian order */
const u1* bytePtr = (const u1*)&insns[insnIdx + i];
printf(" %02x%02x", bytePtr[0], bytePtr[1]);
};
}
else
fputs(" ", stdout);
if (pDecInsn->opcode == OP_NOP) {
u2 instr = get2LE((const u1*)&insns[insnIdx]);
if (instr == kPackedSwitchSignature)
printf("|%04x: packed-switch-data (%d units)",insnIdx, insnWidth);
else if (instr == kSparseSwitchSignature)
printf("|%04x: sparse-switch-data (%d units)",insnIdx, insnWidth);
else if (instr == kArrayDataSignature)
printf("|%04x: array-data (%d units)",insnIdx, insnWidth);
else
printf("|%04x: nop // spacer", insnIdx); //输出指令
}
else
printf("|%04x: %s", insnIdx, dexGetOpcodeName(pDecInsn->opcode));
// Provide an initial buffer that usually suffices, although indexString()
// may reallocate the buffer if more space is needed.
char* indexBuf = NULL;
if (pDecInsn->indexType != kIndexNone)
indexBuf = indexString(pDexFile, pDecInsn, 200); //指令参数构建
switch (dexGetFormatFromOpcode(pDecInsn->opcode)) {
case kFmt10x: // op
break;
case kFmt12x: // op vA, vB
printf(" v%d, v%d", pDecInsn->vA, pDecInsn->vB);
break;
case kFmt11n: // op vA, #+B
printf(" v%d, #int %d // #%x",pDecInsn->vA, (s4)pDecInsn->vB, (u1)pDecInsn->vB);
break;
case kFmt11x: // op vAA
printf(" v%d", pDecInsn->vA);
break;
case kFmt10t: // op +AA
case kFmt20t: // op +AAAA
{
s4 targ = (s4)pDecInsn->vA;
printf(" %04x // %c%04x", insnIdx + targ, (targ < 0) ? '-' : '+', (targ < 0) ? -targ : targ);
};
break;
case kFmt22x: // op vAA, vBBBB
printf(" v%d, v%d", pDecInsn->vA, pDecInsn->vB);
break;
case kFmt21t: // op vAA, +BBBB
{
s4 targ = (s4)pDecInsn->vB;
printf(" v%d, %04x // %c%04x", pDecInsn->vA, insnIdx + targ, (targ < 0) ? '-' : '+', (targ < 0) ? -targ : targ);
};
break;
case kFmt21s: // op vAA, #+BBBB
printf(" v%d, #int %d // #%x",pDecInsn->vA, (s4)pDecInsn->vB, (u2)pDecInsn->vB);
break;
case kFmt21h: // op vAA, #+BBBB0000[00000000]
// The printed format varies a bit based on the actual opcode.
if (pDecInsn->opcode == OP_CONST_HIGH16) {
s4 value = pDecInsn->vB << 16;
printf(" v%d, #int %d // #%x", pDecInsn->vA, value, (u2)pDecInsn->vB);
}
else {
s8 value = ((s8)pDecInsn->vB) << 48;
printf(" v%d, #long %" PRId64 " // #%x", pDecInsn->vA, value, (u2)pDecInsn->vB);
};
break;
case kFmt21c: // op vAA, thing@BBBB
case kFmt31c: // op vAA, thing@BBBBBBBB
printf(" v%d, %s", pDecInsn->vA, indexBuf);
break;
case kFmt23x: // op vAA, vBB, vCC
printf(" v%d, v%d, v%d", pDecInsn->vA, pDecInsn->vB, pDecInsn->vC);
break;
case kFmt22b: // op vAA, vBB, #+CC
printf(" v%d, v%d, #int %d // #%02x",pDecInsn->vA, pDecInsn->vB, (s4)pDecInsn->vC, (u1)pDecInsn->vC);
break;
case kFmt22t: // op vA, vB, +CCCC
{
s4 targ = (s4)pDecInsn->vC;
printf(" v%d, v%d, %04x // %c%04x", pDecInsn->vA, pDecInsn->vB, insnIdx + targ, (targ < 0) ? '-' : '+', (targ < 0) ? -targ : targ);
};
break;
case kFmt22s: // op vA, vB, #+CCCC
printf(" v%d, v%d, #int %d // #%04x",pDecInsn->vA, pDecInsn->vB, (s4)pDecInsn->vC, (u2)pDecInsn->vC);
break;
case kFmt22c: // op vA, vB, thing@CCCC
case kFmt22cs: // [opt] op vA, vB, field offset CCCC
printf(" v%d, v%d, %s", pDecInsn->vA, pDecInsn->vB, indexBuf);
break;
case kFmt30t:
printf(" #%08x", pDecInsn->vA);
break;
case kFmt31i: // op vAA, #+BBBBBBBB
{
/* this is often, but not always, a float */
union {
float f;
u4 i;
} conv;
conv.i = pDecInsn->vB;
printf(" v%d, #float %f // #%08x", pDecInsn->vA, conv.f, pDecInsn->vB);
};
break;
case kFmt31t: // op vAA, offset +BBBBBBBB
printf(" v%d, %08x // +%08x",pDecInsn->vA, insnIdx + pDecInsn->vB, pDecInsn->vB);
break;
case kFmt32x: // op vAAAA, vBBBB
printf(" v%d, v%d", pDecInsn->vA, pDecInsn->vB);
break;
case kFmt35c: // op {vC, vD, vE, vF, vG}, thing@BBBB
case kFmt35ms: // [opt] invoke-virtual+super
case kFmt35mi: // [opt] inline invoke
{ //输出指令中寄存器列表
fputs(" {", stdout);
for (i = 0; i < (int)pDecInsn->vA; i++)
if (i == 0)
printf("v%d", pDecInsn->arg[i]);
else
printf(", v%d", pDecInsn->arg[i]);
printf("}, %s", indexBuf);
};
break;
case kFmt3rc: // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB
case kFmt3rms: // [opt] invoke-virtual+super/range
case kFmt3rmi: // [opt] execute-inline/range
{
/*
* This doesn't match the "dx" output when some of the args are
* 64-bit values -- dx only shows the first register.
*/
fputs(" {", stdout);
for (i = 0; i < (int)pDecInsn->vA; i++)
if (i == 0)
printf("v%d", pDecInsn->vC + i);
else
printf(", v%d", pDecInsn->vC + i);
printf("}, %s", indexBuf);
};
break;
case kFmt51l: // op vAA, #+BBBBBBBBBBBBBBBB
{
/* this is often, but not always, a double */
union {
double d;
u8 j;
} conv;
conv.j = pDecInsn->vB_wide;
printf(" v%d, #double %f // #%016" PRIx64, pDecInsn->vA, conv.d, pDecInsn->vB_wide);
};
break;
case kFmt00x: // unknown op or breakpoint
break;
case kFmt45cc:
{
fputs(" {", stdout);
printf("v%d", pDecInsn->vC);
for (int i = 0; i < (int)pDecInsn->vA - 1; ++i)
printf(", v%d", pDecInsn->arg[i]);
printf("}, %s", indexBuf);
};
break;
case kFmt4rcc:
{
fputs(" {", stdout);
printf("v%d", pDecInsn->vC);
for (int i = 1; i < (int)pDecInsn->vA; ++i)
printf(", v%d", pDecInsn->vC + i);
printf("}, %s", indexBuf);
};
break;
default:
printf(" ???");
break;
};
putchar('\n');
free(indexBuf);
return;
};