我想试做一个多核编译器,哪位同行愿意贴些循环体代码来评估?
目标希望能实现多核自动绑定。 make -j就行了 不是GCC的并行编译,是多条线程并行执行循环体。for ( i=0; i<4000; i+=1)
a = 123;
如果是4核芯片,转为
for ( i=0; i<1000; i+=1) a = 123;
for ( i=1000; i<2000; i+=1) a = 123;
for ( i=2000; i<3000; i+=1) a = 123;
for ( i=3000; i<4000; i+=1) a = 123;
创建4条线程,各自绑定4个物理核 回复 3# blackoil
真实的循序往往有太多数据/控制依赖,根本没办法建立多个线程同时执行。 类似OpenMP的功能? 这不就是OpenMP吗?
相比较之下,我更喜欢自己用thread来实现并行。 就是这种东西?
https://en.wikipedia.org/wiki/Cilk 本帖最后由 blackoil 于 2016-08-05 23:52 编辑
类似CILK,其实原理都一样,大家都知道。
OPENMP需要手写,我希望语义分析做到并行自动化,带上SIMD。for ( i=3000; i<4000; i+=1) a = 123;
for ( i=3000; i<4000; i+=4) vector.a = {123,123,123,123}; 生成的伪汇编大致这样for (i=0;i<4000;i+=1)
z+=456;section .code align=4
bits 32
global _main
_main:
push ebp
mov ebp, esp
and esp, 0xFFFFFFF0
jmp .L0:
thread_1:
push ebp
mov ebp, esp
mov esi, 0
jmp .L7
.L5:
add [ _z + esi * 4 ], dword 456
.L6:
add esi, 1
.L7:
cmp esi, 1000
jb .L5
.L8:
mov esp, ebp
pop ebp
thread_2:
push ebp
mov ebp, esp
mov esi, 1000
jmp .L11
.L9:
add [ _z + esi * 4 ], dword 456
.L10:
add esi, 1
.L11:
cmp esi, 2000
jb .L9
.L12:
mov esp, ebp
pop ebp
thread_3:
push ebp
mov ebp, esp
mov esi, 2000
jmp .L15
.L13:
add [ _z + esi * 4 ], dword 456
.L14:
add esi, 1
.L15:
cmp esi, 3000
jb .L13
.L16:
mov esp, ebp
pop ebp
thread_4:
push ebp
mov ebp, esp
mov esi, 3000
jmp .L19
.L17:
add [ _z + esi * 4 ], dword 456
.L18:
add esi, 1
.L19:
cmp esi, 4000
jb .L17
.L20:
mov esp, ebp
pop ebp
.L0:
call __beginthreadex(thread_1)
call __beginthreadex(thread_2)
call __beginthreadex(thread_3)
call __beginthreadex(thread_4)
call _WaitForMultipleObjects(thread_1,thread_2,thread_3,thread_4)
mov esp, ebp
pop ebp
ret
赞,楼主加油~~