- 论坛徽章:
- 15
|
本帖最后由 yulihua49 于 2016-11-03 14:59 编辑
我查不到。
-O3是STL快了。大概是宏,更容易优化吧。
优化的汇编码看了,完全看不懂,二者都被打乱了函数调用的次序。比较函数都被嵌入到代码里了。
又试了下使用库函数,这个完全不能与应用代码揉合,就得老老实实调函数。速度更慢了。但是看汇编码,简单明了,已经无法优化了。
-O3对宏的优化水平的确超过了一般汇编程序员的能力。
附上汇编码,还看得出来就是前边的cpp程序吗?
- [sdbc@erg0devprc01 test]$ cat stl_test.s
- .file "stl_test.cpp"
- .text
- .p2align 4,,15
- .globl _Z3cmpPvS_i
- .type _Z3cmpPvS_i, @function
- _Z3cmpPvS_i:
- .LFB1218:
- .cfi_startproc
- movslq %edx, %rdx
- movl (%rsi,%rdx,4), %eax
- subl (%rdi), %eax
- ret
- .cfi_endproc
- .LFE1218:
- .size _Z3cmpPvS_i, .-_Z3cmpPvS_i
- .p2align 4,,15
- .globl _Z5test2v
- .type _Z5test2v, @function
- _Z5test2v:
- .LFB1221:
- .cfi_startproc
- movl $100000, %edi
- movl $test_data, %esi
- .L3:
- movq %rdi, %rax
- sarq %rax
- leaq (%rsi,%rax,4), %rcx
- cmpl $8000, (%rcx)
- jg .L7
- jmp .L11
- .p2align 4,,10
- .p2align 3
- .L5:
- movq %rax, %rdx
- sarq %rdx
- leaq (%rsi,%rdx,4), %rcx
- cmpl $8000, (%rcx)
- jle .L4
- movq %rdx, %rax
- .L7:
- testq %rax, %rax
- jg .L5
- .L6:
- subq $test_data, %rsi
- sarq $2, %rsi
- subq $1, %rsi
- movq %rsi, garbage(%rip)
- ret
- .L11:
- movq %rax, %rdx
- movq %rdi, %rax
- .p2align 4,,10
- .p2align 3
- .L4:
- subq %rdx, %rax
- leaq 4(%rcx), %rsi
- leaq -1(%rax), %rdi
- testq %rdi, %rdi
- jg .L3
- jmp .L6
- .cfi_endproc
- .LFE1221:
- .size _Z5test2v, .-_Z5test2v
- .p2align 4,,15
- .globl _Z5test1v
- .type _Z5test1v, @function
- _Z5test1v:
- .LFB1219:
- .cfi_startproc
- subq $24, %rsp
- .cfi_def_cfa_offset 32
- movl $_Z3cmpPvS_i, %ecx
- movl $100000, %edx
- movq %rsp, %rdi
- movl $test_data, %esi
- movl $8000, (%rsp)
- call upperBound
- subl $1, %eax
- cltq
- movq %rax, garbage(%rip)
- addq $24, %rsp
- .cfi_def_cfa_offset 8
- ret
- .cfi_endproc
- .LFE1219:
- .size _Z5test1v, .-_Z5test1v
- .section .text.unlikely,"ax",@progbits
- .type _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc.part.1, @function
- _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc.part.1:
- .LFB1242:
- .cfi_startproc
- pushq %rax
- .cfi_def_cfa_offset 16
- movq (%rdi), %rax
- addq -24(%rax), %rdi
- movl 32(%rdi), %esi
- orl $1, %esi
- call _ZNSt9basic_iosIcSt11char_traitsIcEE5clearESt12_Ios_Iostate
- popq %rdx
- .cfi_def_cfa_offset 8
- ret
- .cfi_endproc
- .LFE1242:
- .size _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc.part.1, .-_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc.part.1
- .text
- .p2align 4,,15
- .globl _Z18generate_test_datav
- .type _Z18generate_test_datav, @function
- _Z18generate_test_datav:
- .LFB1217:
- .cfi_startproc
- xorl %ecx, %ecx
- movabsq $-3689348814741910323, %rsi
- .p2align 4,,10
- .p2align 3
- .L18:
- movq %rcx, %rax
- mulq %rsi
- shrq $3, %rdx
- movl %edx, test_data(,%rcx,4)
- addq $1, %rcx
- cmpq $100000, %rcx
- jne .L18
- rep ret
- .cfi_endproc
- .LFE1217:
- .size _Z18generate_test_datav, .-_Z18generate_test_datav
- .section .rodata.str1.1,"aMS",@progbits,1
- .LC0:
- .string "Duration: "
- .LC1:
- .string ",N="
- .text
- .p2align 4,,15
- .globl _Z7time_itmPFvvE
- .type _Z7time_itmPFvvE, @function
- _Z7time_itmPFvvE:
- .LFB1222:
- .cfi_startproc
- pushq %r13
- .cfi_def_cfa_offset 16
- .cfi_offset 13, -16
- pushq %r12
- .cfi_def_cfa_offset 24
- .cfi_offset 12, -24
- movq %rsi, %r12
- pushq %rbp
- .cfi_def_cfa_offset 32
- .cfi_offset 6, -32
- movq %rdi, %rbp
- pushq %rbx
- .cfi_def_cfa_offset 40
- .cfi_offset 3, -40
- xorl %ebx, %ebx
- subq $8, %rsp
- .cfi_def_cfa_offset 48
- call now_usec
- testq %rbp, %rbp
- movq %rax, %r13
- je .L23
- .p2align 4,,10
- .p2align 3
- .L27:
- addq $1, %rbx
- call *%r12
- cmpq %rbp, %rbx
- jne .L27
- .L23:
- movq garbage(%rip), %rbp
- call now_usec
- movl $10, %edx
- movq %rax, %rbx
- movl $.LC0, %esi
- movl $_ZSt4cout, %edi
- call _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l
- movl %ebx, %esi
- movl $_ZSt4cout, %edi
- subl %r13d, %esi
- call _ZNSolsEi
- movl $3, %edx
- movq %rax, %rbx
- movl $.LC1, %esi
- movq %rax, %rdi
- call _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l
- movq %rbp, %rsi
- movq %rbx, %rdi
- call _ZNSo9_M_insertImEERSoT_
- movq %rax, %rbp
- movq (%rax), %rax
- movq -24(%rax), %rax
- movq 240(%rbp,%rax), %rbx
- testq %rbx, %rbx
- je .L30
- cmpb $0, 56(%rbx)
- je .L25
- movzbl 67(%rbx), %eax
- .L26:
- movq %rbp, %rdi
- movsbl %al, %esi
- call _ZNSo3putEc
- addq $8, %rsp
- .cfi_remember_state
- .cfi_def_cfa_offset 40
- movq %rax, %rdi
- popq %rbx
- .cfi_def_cfa_offset 32
- popq %rbp
- .cfi_def_cfa_offset 24
- popq %r12
- .cfi_def_cfa_offset 16
- popq %r13
- .cfi_def_cfa_offset 8
- jmp _ZNSo5flushEv
- .p2align 4,,10
- .p2align 3
- .L25:
- .cfi_restore_state
- movq %rbx, %rdi
- call _ZNKSt5ctypeIcE13_M_widen_initEv
- movq (%rbx), %rax
- movl $10, %esi
- movq %rbx, %rdi
- call *48(%rax)
- jmp .L26
- .L30:
- call _ZSt16__throw_bad_castv
- .cfi_endproc
- .LFE1222:
- .size _Z7time_itmPFvvE, .-_Z7time_itmPFvvE
- .section .rodata.str1.1
- .LC2:
- .string "Binary "
- .LC3:
- .string "MAP "
- .section .text.startup,"ax",@progbits
- .p2align 4,,15
- .globl main
- .type main, @function
- main:
- .LFB1223:
- .cfi_startproc
- xorl %ecx, %ecx
- movabsq $-3689348814741910323, %rsi
- .p2align 4,,10
- .p2align 3
- .L33:
- movq %rcx, %rax
- mulq %rsi
- shrq $3, %rdx
- movl %edx, test_data(,%rcx,4)
- addq $1, %rcx
- cmpq $100000, %rcx
- jne .L33
- pushq %rbx
- .cfi_def_cfa_offset 16
- .cfi_offset 3, -16
- movl $100, %ebx
- subq $16, %rsp
- .cfi_def_cfa_offset 32
- .p2align 4,,10
- .p2align 3
- .L40:
- movl $test_data, %esi
- movq %rsp, %rdi
- movl $_Z3cmpPvS_i, %ecx
- movl $100000, %edx
- movl $8000, (%rsp)
- call upperBound
- movl $test_data, %esi
- movl $100000, %edi
- .L34:
- movq %rdi, %rax
- sarq %rax
- leaq (%rsi,%rax,4), %rcx
- cmpl $8000, (%rcx)
- jg .L38
- jmp .L44
- .p2align 4,,10
- .p2align 3
- .L36:
- movq %rax, %rdx
- sarq %rdx
- leaq (%rsi,%rdx,4), %rcx
- cmpl $8000, (%rcx)
- jle .L35
- movq %rdx, %rax
- .L38:
- testq %rax, %rax
- jg .L36
- .L37:
- subq $test_data, %rsi
- sarq $2, %rsi
- subq $1, %rsi
- subq $1, %rbx
- movq %rsi, garbage(%rip)
- jne .L40
- movl $.LC2, %esi
- movl $_ZSt4cout, %edi
- call _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
- movl $_Z5test1v, %esi
- movl $100000, %edi
- call _Z7time_itmPFvvE
- movl $.LC3, %esi
- movl $_ZSt4cout, %edi
- call _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
- movl $_Z5test2v, %esi
- movl $100000, %edi
- call _Z7time_itmPFvvE
- addq $16, %rsp
- .cfi_remember_state
- .cfi_def_cfa_offset 16
- xorl %eax, %eax
- popq %rbx
- .cfi_restore 3
- .cfi_def_cfa_offset 8
- ret
- .L44:
- .cfi_restore_state
- movq %rax, %rdx
- movq %rdi, %rax
- .p2align 4,,10
- .p2align 3
- .L35:
- subq %rdx, %rax
- leaq 4(%rcx), %rsi
- leaq -1(%rax), %rdi
- testq %rdi, %rdi
- jg .L34
- jmp .L37
- .cfi_endproc
- .LFE1223:
- .size main, .-main
- .p2align 4,,15
- .type _GLOBAL__sub_I_test_data, @function
- _GLOBAL__sub_I_test_data:
- .LFB1240:
- .cfi_startproc
- subq $8, %rsp
- .cfi_def_cfa_offset 16
- movl $_ZStL8__ioinit, %edi
- call _ZNSt8ios_base4InitC1Ev
- movl $__dso_handle, %edx
- movl $_ZStL8__ioinit, %esi
- movl $_ZNSt8ios_base4InitD1Ev, %edi
- addq $8, %rsp
- .cfi_def_cfa_offset 8
- jmp __cxa_atexit
- .cfi_endproc
- .LFE1240:
- .size _GLOBAL__sub_I_test_data, .-_GLOBAL__sub_I_test_data
- .section .init_array,"aw"
- .align 8
- .quad _GLOBAL__sub_I_test_data
- .globl garbage
- .bss
- .align 16
- .type garbage, @object
- .size garbage, 8
- garbage:
- .zero 8
- .globl test_data
- .align 32
- .type test_data, @object
- .size test_data, 400000
- test_data:
- .zero 400000
- .local _ZStL8__ioinit
- .comm _ZStL8__ioinit,1,1
- .hidden __dso_handle
- .ident "GCC: (GNU) 4.8.3 20140911 (Red Hat 4.8.3-9)"
- .section .note.GNU-stack,"",@progbits
- [sdbc@erg0devprc01 test]$
复制代码
|
|