- 论坛徽章:
- 0
|
strlen_chk.c计算sum1与sum2的目标相同,但是效率是不一样;参考汇编代码strlen_chk.s ,计算sum2时,strlen函数每次循环都被调用。
编译器对于循环优化--不变量外提; 一般的表达式都能够进行归纳优化, 但对函数一般无法进行优化;写程序时最好进行人工优化。
strlen_chk.c:
- #include <stdio.h>
- #include <string.h>
- int main(int argc, char **argv)
- {
- char buf[1024];
- int len = 0;
- int sum1 = 0, sum2 = 0;
- int i = 0;
- len = strlen( buf );
- for( i = 0; i < len; i++ )
- sum1 += i;
- for( i = 0; i < strlen(buf); i++ )
- sum2 += i;
- return sum1 + sum2;
- }
复制代码
编译 cc -S -O3 strlen_chk.c
strlen_chk.s
- .file "strlen_chk.c"
- .text
- .p2align 4,,15
- .globl main
- .type main, @function
- main:
- .LFB24:
- subq $912, %rsp
- .LCFI0:
- xorl %r11d, %r11d
- xorl %r10d, %r10d
- leaq -120(%rsp), %r9
- movq %r9, %rsi
- ---------------------------------------
- *** .L2 计算 len = strlen( buf ); store in eax;
- *** .L25 .L23 计算 for( i = 0; i < len; i++ ) sum1 += i;
- .L2:
- movl (%rsi), %eax
- addq $4, %rsi
- leal -16843009(%rax), %edx
- notl %eax
- andl %eax, %edx
- movl %edx, %ecx
- andl $-2139062144, %ecx
- je .L2
- movl %ecx, %r8d
- leaq 2(%rsi), %rdi
- shrl $16, %r8d
- andl $32896, %edx
- cmove %r8d, %ecx
- cmove %rdi, %rsi
- movl %ecx, %edx
- addb %cl, %dl
- sbbq $3, %rsi
- xorl %edx, %edx
- movl %esi, %eax
- subl %r9d, %eax
- jmp .L23
- .L25:
- addl %edx, %r11d
- incl %edx
- .L23:
- cmpl %eax, %edx
- jl .L25
- xorl %edi, %edi
- .p2align 4,,7
- -------------------------------------------
- -------------------------------------------
- *** .L8 .L11 计算 for( i = 0; i < strlen(buf); i++ ) sum2 += i;
- *** 在.L11中,每次循环 strlen(buf)都被调用;
- *** 对于循环优化--不变量外提,gcc -O3无法归纳strlen函数为不变量。
- .L8:
- movslq %edi,%r8
- movq %r9, %rsi
- .p2align 4,,7
- .L11:
- movl (%rsi), %ecx
- addq $4, %rsi
- leal -16843009(%rcx), %edx
- notl %ecx
- andl %ecx, %edx
- movl %edx, %ecx
- andl $-2139062144, %ecx
- je .L11
- movl %ecx, %eax
- shrl $16, %eax
- andl $32896, %edx
- leaq 2(%rsi), %rdx
- cmove %eax, %ecx
- movl %ecx, %eax
- cmove %rdx, %rsi
- addb %cl, %al
- sbbq $3, %rsi
- subq %r9, %rsi
- cmpq %rsi, %r8
- jae .L26
- addl %edi, %r10d
- incl %edi
- jmp .L8
- ---------------------------------------
- .L26:
- leal (%r11,%r10), %eax
- addq $912, %rsp
- ret
- .LFE24:
- .size main, .-main
- .section .eh_frame,"a",@progbits
- .Lframe1:
- .long .LECIE1-.LSCIE1
- .LSCIE1:
- .long 0x0
- .byte 0x1
- .string ""
- .uleb128 0x1
- .sleb128 -8
- .byte 0x10
- .byte 0xc
- .uleb128 0x7
- .uleb128 0x8
- .byte 0x90
- .uleb128 0x1
- .align 8
- .LECIE1:
- .LSFDE1:
- .long .LEFDE1-.LASFDE1
- .LASFDE1:
- .long .LASFDE1-.Lframe1
- .quad .LFB24
- .quad .LFE24-.LFB24
- .byte 0x4
- .long .LCFI0-.LFB24
- .byte 0xe
- .uleb128 0x398
- .align 8
- .LEFDE1:
- .section .note.GNU-stack,"",@progbits
- .ident "GCC: (GNU) 3.4.6 20060404 (Red Hat 3.4.6-3)"
复制代码 |
|