- 论坛徽章:
- 0
|
void dgemm(const Matrix &A, const Matrix &B, Matrix &C)
竟然比
Matrix operator % (const Matrix &a, const Matrix &b)
慢了好多。
operator overloading用的是最基本的算法,return 一个matrix,
而dgemm是pure pointer access但是多了一个loop.
怎么回事啊?好多人都说pure access要快一些,高手指点一下啊。
Matrix operator % (const Matrix &a, const Matrix &b)
{
#ifdef DEBUG__
if (a.ncol_ != b.nrow_)
errorReport("(Matrix operator %) : matrices do not conform in multiplication");
#endif
Matrix c(a.nrow_, b.ncol_);
double *ptr_a = a.data_, *ptr_b = b.data_, *ptr_c = c.data_;
double sum;
for (int i = 0; i < a.nrow_; i++)
for (int j = 0; j < b.ncol_; j++)
{
sum = 0.0;
for (int k = 0; k < a.ncol_; k++)
sum += ptr_a[i*a.ncol_+k]*ptr_b[k*b.ncol_+j];
ptr_c[i*c.ncol_+j]=sum;
}
return c;
}
void dgemm(const Matrix &A, const Matrix &B, Matrix &C)
// A[m][n] * B[n][l] = C[m][l]
{
int i, k, j;
const double *ptr_a = A.data();
int m = A.nrows(), n = A.ncols(), l = B.ncols();
for ( i = 0; i < m; i++ )
{
double *ptr_c = C.data() + i*l, Aik = *ptr_a++;
const double *ptr_b = B.data();
for ( j = 0; j < l; j++ )
*ptr_c++ = Aik * (*ptr_b++);
for ( k = 1; k < n; k++ )
{
ptr_b = B.data() + k*l;
ptr_c = C.data() + i*l;
Aik = *ptr_a++;
for ( j = 0; j < l; j++ )
{
*ptr_c++ += Aik * (*ptr_b++);
}
}
}
} |
|