内嵌汇编应用sse计算矩阵的问题 请教!!
我想利用sse指令计算一个4*4的float矩阵,然后写了一段内嵌汇编的c程序,结果编译没问题,运行时出了问题。程序在输出“hello”之后就卡在那里不动了,请教各位是什么问题啊~~#include <stdio.h>
float a __attribute__ ((aligned (16))) = {
{1.0,2.0,3.0,4.0},
{1.0,2.0,3.0,4.0},
{1.0,2.0,3.0,4.0},
{1.0,2.0,3.0,4.0}
};
float b __attribute__ ((aligned (16))) = {
{1.0,2.0,3.0,4.0},
{1.0,2.0,3.0,4.0},
{1.0,2.0,3.0,4.0},
{1.0,2.0,3.0,4.0}
};
float c __attribute__ ((aligned (16)));
void matMul(float result,float left,float right)
{
printf("hello\n");
__asm__ __volatile__
(
"movaps (%2),%%xmm4 \n\t"
"movaps 16(%2),%%xmm5 \n\t"
"movaps 32(%2),%%xmm6 \n\t"
"movaps 48(%2),%%xmm7 \n\t"
"movl $0,%%edi \n\t"
"movl $4,%%ecx \n\t"
"lab:\n\t" "movups (%1,%2,1),%%xmm0 \n\t"
"movaps %%xmm0,%%xmm1 \n\t"
"movaps %%xmm0,%%xmm2 \n\t"
"movaps %%xmm0,%%xmm3 \n\t"
"shufps $0x00,%%xmm0,%%xmm0 \n\t"
"shufps $0x55,%%xmm1,%%xmm1 \n\t"
"shufps $0xAA,%%xmm2,%%xmm2 \n\t"
"shufps $0xFF,%%xmm3,%%xmm3 \n\t"
"mulps %%xmm4,%%xmm0 \n\t"
"mulps %%xmm5,%%xmm1 \n\t"
"mulps %%xmm6,%%xmm2 \n\t"
"mulps %%xmm7,%%xmm3 \n\t"
"addps %%xmm0,%%xmm2 \n\t"
"addps %%xmm1,%%xmm3 \n\t"
"addps %%xmm2,%%xmm3 \n\t"
"movups %%xmm3,(%0,%2,1) \n\t"
"addl $16,%%edi \n\t"
"loop lab \n\t"
:"=d"(result)
:"a"(left),"D"(right)
:"%ecx"
);
}
int main()
{
int i,j;
matMul(c,a,b);
for(i = 0; i<4; i++)
{
for(j = 0; j<4; j++)
printf("%lf ",c);
printf("\n");
}
} 哪位大侠帮帮忙啊!!~~~814014134 可能是出现浮点异常了,SSE指令异常比较复杂要查手册
页:
[1]