- 论坛徽章:
- 0
|
测试程序和结果
是memcpy速度的1/3多点。
其实你根本不应该写这样的程序。因为你是用C复制*dst = *src的循环,怎么能和优化为最快机器串操作指令比速度?
./a.out
src[AFFEC008], dst[A7FEB008], start :
4480000
1850000
- #include <stdio.h>
- #include <string.h>
- #include <time.h>
- #include <stdlib.h>
- /*
- void * mmcp(void*, const void*, size_t);
- void * mmcp(void* dst, const void *src, size_t size)
- {
- typedef struct {char bytes[size]; } buffer_t;
- const buffer_t *psrc = (const buffer_t *)src;
- buffer_t *pdst = (buffer_t *)dst;
- *pdst = *psrc;
- return dst;
- }
- */
- typedef unsigned char ylib_byte_t;
- typedef unsigned int ylib_word_t;
- enum YOUNG_LIBRARY_MEMORY_FUNCTION_CONSTANT
- {
- OUTSPREAD = 16,
- RESIDUE = OUTSPREAD - 1
- };
- #define CYCLE_OUTSPREAD( expression ) \
- expression; expression; expression; expression; \
- expression; expression; expression; expression; \
- expression; expression; expression; expression; \
- expression; expression; expression; expression;
- void* mmcp( void* dst, const void* src, size_t count )
- {
- ylib_byte_t *d, *s;
- ylib_word_t* dstword = (ylib_word_t*)dst;
- const ylib_word_t* srcword = (const ylib_word_t*)src;
- size_t tmp, word_count = count / sizeof(ylib_word_t);
- size_t word_outspread = word_count & ~((size_t)RESIDUE);
- /* 赂麓脰驴杀禄循禄路展驴陋碌幕煤 */
- for( tmp = 0; tmp < word_outspread; tmp += OUTSPREAD )
- {
- CYCLE_OUTSPREAD( *dstword++ = *srcword++ )
- }
- /* 赂麓脰剩脧碌幕煤 */
- for( ; tmp < word_count; ++tmp )
- *dstword++ = *srcword++;
- /* 赂麓脰剩脧碌脛纸脷*/
- d = (ylib_byte_t*)dstword;
- s = (ylib_byte_t*)srcword;
- for( tmp *= sizeof(ylib_word_t); tmp < count; ++tmp )
- *d++ = *s++;
- return dst;
- }
- int main(void)
- {
- size_t size = 1024 * 1024 * 128;
- size_t count = 30;
- clock_t t;
- int i;
- char *src = (char *)malloc(size);
- char *dst = (char *)malloc(size);
- memcpy(dst, src, size);
- printf("src[%08X], dst[%08X], start :\n", src, dst);
- t = clock();
- for (i = 0; i < count; i ++)
- mmcp(dst, src, size);
- printf("%ld\n", clock() - t);
- t = clock();
- for (i = 0; i < count; i ++)
- memcpy(dst, src, size);
- printf("%ld\n", clock() - t);
- return 0;
- }
复制代码 |
|