- 论坛徽章:
- 0
|
在x86-64 指令集中,比较难产生代码的指令是那么转移指令,例如:jmp 和 call之类的指令,难在由于x86指令是不定长的,jmp 和 call这些转移指令,最短2个字节,最长可达8-10个字节。这有点类似:鸡与鸡蛋的关系!所以难度可想而之!
我在a64编译器产品的处理上,用了我自认为是极为巧妙的方法实现,并没有用到什么特别的数据结构。
以下举几个例子,例中分别用了 a64, gnu assembler(gas) 以及 nasm 作为对比。它们的指令是一样的,只是表达的语法不同而已。
1、a64 源文件:
- .bits 32
- @label:
- mov [ecx+0x0c], edx
- mov eax, edx
- @label2:
- mov (dword)[ecx], 0x01
- call @label1
- jmp @label1
- jmp @label
- @label3:
- mov eax,1
- @label1: call @label
复制代码
2、gas 源文件:
- label:
- mov %edx, 0x0c(%ecx)
- mov %edx, %eax
- label2:
- movl $0x01, (%ecx)
- call label1
- jmp label1
- jmp label
- label3:
- mov $0x01, %eax
- label1: call label
复制代码
3、nasm 源文件:
- bits 32
- label:
- mov [ecx+0x0c], edx
- mov eax, edx
- label2:
- mov dword[ecx], 0x01
- call label1
- jmp label1
- jmp label
- label3:
- mov eax,1
- label1: call label
复制代码
以下是编译生成的代码
1、 a64生成的:
- 89 51 c 89 d0 c7 1 1 0 0 0 e8 9 0 0 0 eb 7 eb ec b8 1 0 0 0 e8 e2 ff ff ff
复制代码
2、 gas 生成的:
- 00000000 <label>:
- 0: 89 51 0c mov %edx,0xc(%ecx)
- 3: 89 d0 mov %edx,%eax
- 00000005 <label2>:
- 5: c7 01 01 00 00 00 movl $0x1,(%ecx)
- b: e8 09 00 00 00 call 19 <label1>
- 10: eb 07 jmp 19 <label1>
- 12: eb ec jmp 0 <label>
- 00000014 <label3>:
- 14: b8 01 00 00 00 mov $0x1,%eax
- 00000019 <label1>:
- 19: e8 e2 ff ff ff call 0 <label>
复制代码
3、 nasm 生成的:
- 00000000 89510C mov [ecx+0xc],edx
- 00000003 89D0 mov eax,edx
- 00000005 C70101000000 mov dword [ecx],0x1
- 0000000B E80C000000 call 0x1c
- 00000010 E907000000 jmp 0x1c
- 00000015 EBE9 jmp short 0x0
- 00000017 B801000000 mov eax,0x1
- 0000001C E8DFFFFFFF call 0x0
复制代码
a64 和 gnu as 生成的代码是一样的,而 与nasm生成代码有些差异,但nasm的也是对的,
nasm在处理 jmp label1这条指令上用的是5个字节的指令。
a64 和 gas 产生的是2个字节的指令。在这方面 nasm 稍差一点。
下面,我们再来看几条更为复杂的转移指令形式的结果。
1、 首先是a64源文件:
- .bits 32
- @label:
- mov [ecx+0x0c], edx
- mov eax, edx
- @label2:
- mov (dword)[ecx], 0x01
- call @label1
- jmp @label1
- jmp @label3
- @label3:
- jmp @label3
- call @label4
- @label4:
- call @label4
- @label1: call @label
复制代码
2、 来看看gas 的源文件
- label:
- mov %edx, 0x0c(%ecx)
- mov %dex, %eax
- label2:
- movl $0x01, (%ecx)
- call label1
- jmp label1
- jmp label3
- label3:
- jmp label3
- call label4
- label4:
- call label4
- label1: call label
复制代码
3、 最后是nasm的源文件,基本上与 a64的语法一致,只是有一点点不同
- bits 32
- label:
- mov [ecx+0x0c], edx
- mov eax, edx
- label2:
- mov dword [ecx], 0x01
- call label1
- jmp label1
- jmp label3
- label3:
- jmp label3
- call label4
- label4:
- call label4
- label1: call label
复制代码
以上示例代码中,有7条转移指令,相互交错在一起。看一看结果
1、 a64产生的结果
- 89 51 c 89 d0 c7 1 1 0 0 0 e8 10 0 0 0 eb e eb 0 eb fe e8 0 0 0 0 e8 fb ff ff ff e8 db ff ff ff
复制代码
2、 gas生产的结果子(objdump 出来的)
- 00000000 <label>:
- 0: 89 51 0c mov %edx,0xc(%ecx)
- 3: 89 d0 mov %edx,%eax
- 00000005 <label2>:
- 5: c7 01 01 00 00 00 movl $0x1,(%ecx)
- b: e8 10 00 00 00 call 20 <label1>
- 10: eb 0e jmp 20 <label1>
- 12: eb 00 jmp 14 <label3>
- 00000014 <label3>:
- 14: eb fe jmp 14 <label3>
- 16: e8 00 00 00 00 call 1b <label4>
- 0000001b <label4>:
- 1b: e8 fb ff ff ff call 1b <label4>
- 00000020 <label1>:
- 20: e8 db ff ff ff call 0 <label>
复制代码
3、 nasm产生的结果 (用nasm自带的ndisasm )
- 00000000 89510C mov [ecx+0xc],edx
- 00000003 89D0 mov eax,edx
- 00000005 C70101000000 mov dword [ecx],0x1
- 0000000B E816000000 call 0x26
- 00000010 E911000000 jmp 0x26
- 00000015 E900000000 jmp 0x1a
- 0000001A EBFE jmp short 0x1a
- 0000001C E800000000 call 0x21
- 00000021 E8FBFFFFFF call 0x21
- 00000026 E8D5FFFFFF call 0x0
复制代码
总结:a64与gas的结果是一样的,nasm的稍有不同, nasm用了5个字节来产生jmp指令,nasm的代码稍为差一些。
对比 a64 、gas 与代码
1、 a64文件
- !------------------------------------------
- ! unsigned int str_len(const char *s)
- !
- ! return value:
- ! unsigned int: the length of strings
- !-----------------------------------------
- .bits 32
- !.global str_len
- @str_len:
- xor eax, eax
- mov edx, [esp+4]
- @loop:
- mov bl, [edx]
- inc eax
- inc edx
- test bl,bl
- jnz @loop
- dec eax
- ret
- !-------------------------------------
- ! char get_char(const char *s)
- !
- ! return value:
- ! char: get char of string[ith]
- !--------------------------------------
- !.global get_char
- @get_char:
- mov edx, [esp+4]
- mov ecx,[esp+8]
-
- @loop1:
- mov al,[edx]
- test al, al
- jz @out
- inc edx
- dec ecx
- test ecx, ecx
- jnz @loop1
- @out:
- ret
- !----------------------------------
- ! int str_ch(const char *d, char c)
- !
- ! return: 0: no found
- ! value: found & postion;
- !---------------------------
- !.global str_ch
- @str_ch:
- xor eax, eax
- mov edx,[esp+4]
- mov ecx,[esp+8]
- @loop2:
- inc eax
- mov bl, [edx]
- test bl, bl
- jz @out1
- inc edx
- cmp cl, bl
- jnz @loop2
- jmp @out2
- @out1:
- xor eax, eax
- @out2:
- ret
- !--------------------------------------------------------------
- ! unsigned int str_str(const char *dest, const char* source)
- !
- ! reture value:
- ! 0: no found
- ! unsigned int: found & get postion
- !--------------------------------------------------------------
- !.global str_str
- @str_str:
- mov edi, [esp+4]
- mov esi, [esp+8]
-
- sub esp, 8
- mov [esp+4], edi
- mov [esp], esi
- mov eax, 1
- @loop3:
- mov ebx, esi
- mov edx, edi
- inc esi
- inc edi
- mov cl, [ebx]
- !-----------
- ! found & no found
- test cl,cl
- jz @result
- cmp (byte)[edx], 0
- jz @no
- !-----------
- cmp cl, [edx]
- jz @loop3
- @yes:
- inc (dword)[esp+4]
- mov edi, [esp+4]
- mov esi, [esp]
- inc eax
- jmp @loop3
- @no:
- xor eax, eax
- @result:
- add esp, 8
- ret
- !----------------------------
- ! ungisgned get_c_sum(char *s, char c)
- !
- ! return char c sum in string s
- !
- !------------------------------------
- !.global get_c_sum
- @get_c_sum:
- mov edx, [esp+4]
- mov bl, [esp+8]
- xor eax, eax
-
- test edx, edx
- jz @out_get_c
- @loop_get_c:
- inc edx
- mov cl, [edx-1]
- test cl, cl
- jz @out_get_c
- cmp cl, bl
- jnz @loop_get_c
- inc eax
- jmp @loop_get_c
-
- @out_get_c:
- ret
- !
- !--------------------------
- !.global get_pc
- @get_pc:
- call @next
- @next: pop eax
- ret
- !-----------------------------------------
- !.global dump_banry
- @dump_banry:
- push ebp
- mov ebp, esp
- mov edx, [ebp+8]
- mov al, [ebp+12]
- mov ecx, 9
- @loop5:
- dec ecx
- jz @out5
- shl al, 1
- jc @one
- @zero:
- mov (byte)[edx], 48
- inc edx
- jmp @loop5
- @one:
- mov (byte)[edx], 49
- inc edx
- jmp @loop5
- @out5:
- pop ebp
- ret
复制代码
结果:用以下命令:a64 –print-encode test.s –o test
- 31 c0 8b 54 24 4 8a 1a 40 42 84 db 75 f8 48 c3 8b 54 24 4 8b 4c 24 8 8a 2 84 c0 74 6 42 49 85 c9 75 f4 c3 31 c0 8b 54 24 4 8b 4c 24 8 40 8a 1a 84 db 74 7 42 38 d9 75 f4 eb 2 31 c0 c3 8b 7c 24 4 8b 74 24 8 83 ec 8 89 7c 24 4 89 34 24 b8 1 0 0 0 89 f3 89 fa 46 47 8a b 84 c9 74 19 80 3a 0 74 12 3a a 74 eb ff 44 24 4 8b 7c 24 4 8b 34 24 40 eb dd 31 c0 83 c4 8 c3 8b 54 24 4 8a 5c 24 8 31 c0 85 d2 74 f 42 8a 4a ff 84 c9 74 7 38 d9 75 f4 40 eb f1 c3 e8 0 0 0 0 58 c3 55 89 e5 8b 55 8 8a 45 c b9 9 0 0 0 49 74 10 d0 e0 72 6 c6 2 30 42 eb f3 c6 2 31 42 eb ed 5d c3
复制代码
2、 gas 的结果:
- string.o: file format elf32-i386
- Disassembly of section .text:
- 00000000 <str_len>:
- 0: 31 c0 xor %eax,%eax
- 2: 8b 54 24 04 mov 0x4(%esp),%edx
- 00000006 <loop>:
- 6: 8a 1a mov (%edx),%bl
- 8: 40 inc %eax
- 9: 42 inc %edx
- a: 84 db test %bl,%bl
- c: 75 f8 jne 6 <loop>
- e: 48 dec %eax
- f: c3 ret
- 00000010 <get_char>:
- 10: 8b 54 24 04 mov 0x4(%esp),%edx
- 14: 8b 4c 24 08 mov 0x8(%esp),%ecx
- 00000018 <loop1>:
- 18: 8a 02 mov (%edx),%al
- 1a: 84 c0 test %al,%al
- 1c: 74 06 je 24 <out>
- 1e: 42 inc %edx
- 1f: 49 dec %ecx
- 20: 85 c9 test %ecx,%ecx
- 22: 75 f4 jne 18 <loop1>
- 00000024 <out>:
- 24: c3 ret
- 00000025 <str_ch>:
- 25: 31 c0 xor %eax,%eax
- 27: 8b 54 24 04 mov 0x4(%esp),%edx
- 2b: 8b 4c 24 08 mov 0x8(%esp),%ecx
- 0000002f <loop2>:
- 2f: 40 inc %eax
- 30: 8a 1a mov (%edx),%bl
- 32: 84 db test %bl,%bl
- 34: 74 07 je 3d <out1>
- 36: 42 inc %edx
- 37: 38 d9 cmp %bl,%cl
- 39: 75 f4 jne 2f <loop2>
- 3b: eb 02 jmp 3f <out2>
- 0000003d <out1>:
- 3d: 31 c0 xor %eax,%eax
- 0000003f <out2>:
- 3f: c3 ret
- 00000040 <str_str>:
- 40: 8b 7c 24 04 mov 0x4(%esp),%edi
- 44: 8b 74 24 08 mov 0x8(%esp),%esi
- 48: 83 ec 08 sub $0x8,%esp
- 4b: 89 7c 24 04 mov %edi,0x4(%esp)
- 4f: 89 34 24 mov %esi,(%esp)
- 52: b8 01 00 00 00 mov $0x1,%eax
- 00000057 <loop3>:
- 57: 89 f3 mov %esi,%ebx
- 59: 89 fa mov %edi,%edx
- 5b: 46 inc %esi
- 5c: 47 inc %edi
- 5d: 8a 0b mov (%ebx),%cl
- 5f: 84 c9 test %cl,%cl
- 61: 74 19 je 7c <result>
- 63: 80 3a 00 cmpb $0x0,(%edx)
- 66: 74 12 je 7a <no>
- 68: 3a 0a cmp (%edx),%cl
- 6a: 74 eb je 57 <loop3>
- 0000006c <yes>:
- 6c: ff 44 24 04 incl 0x4(%esp)
- 70: 8b 7c 24 04 mov 0x4(%esp),%edi
- 74: 8b 34 24 mov (%esp),%esi
- 77: 40 inc %eax
- 78: eb dd jmp 57 <loop3>
- 0000007a <no>:
- 7a: 31 c0 xor %eax,%eax
- 0000007c <result>:
- 7c: 83 c4 08 add $0x8,%esp
- 7f: c3 ret
- 00000080 <get_c_sum>:
- 80: 8b 54 24 04 mov 0x4(%esp),%edx
- 84: 8a 5c 24 08 mov 0x8(%esp),%bl
- 88: 31 c0 xor %eax,%eax
- 8a: 85 d2 test %edx,%edx
- 8c: 74 0f je 9d <out_get_c>
- 0000008e <loop_get_c>:
- 8e: 42 inc %edx
- 8f: 8a 4a ff mov 0xffffffff(%edx),%cl
- 92: 84 c9 test %cl,%cl
- 94: 74 07 je 9d <out_get_c>
- 96: 38 d9 cmp %bl,%cl
- 98: 75 f4 jne 8e <loop_get_c>
- 9a: 40 inc %eax
- 9b: eb f1 jmp 8e <loop_get_c>
- 0000009d <out_get_c>:
- 9d: c3 ret
- 0000009e <get_pc>:
- 9e: e8 00 00 00 00 call a3 <next>
- 000000a3 <next>:
- a3: 58 pop %eax
- a4: c3 ret
- 000000a5 <dump_banry>:
- a5: 55 push %ebp
- a6: 89 e5 mov %esp,%ebp
- a8: 8b 55 08 mov 0x8(%ebp),%edx
- ab: 8a 45 0c mov 0xc(%ebp),%al
- ae: b9 09 00 00 00 mov $0x9,%ecx
- 000000b3 <loop5>:
- b3: 49 dec %ecx
- b4: 74 10 je c6 <out5>
- b6: d0 e0 shl %al
- b8: 72 06 jb c0 <one>
- 000000ba <zero>:
- ba: c6 02 30 movb $0x30,(%edx)
- bd: 42 inc %edx
- be: eb f3 jmp b3 <loop5>
- 000000c0 <one>:
- c0: c6 02 31 movb $0x31,(%edx)
- c3: 42 inc %edx
- c4: eb ed jmp b3 <loop5>
- 000000c6 <out5>:
- c6: 5d pop %ebp
- c7: c3 ret
复制代码 |
|