- 论坛徽章:
- 9
|
本帖最后由 wlmqgzm 于 2015-10-08 12:32 编辑
测试了一下, 对比boost::crc32的性能, 64位系统下, 硬件处理比软件处理, 大约提高了24倍, 还不错. 硬件就是快.
我的双核G3258 大约处理性能每秒超亿字节,跟内存的读写速度基本一致.
#include <cpuid.h>
#include <boost/crc.hpp>
#include "error_message.hpp"
#ifndef _crc32c_hpp
#define _crc32c_hpp
class Crc32c
{
public:
Crc32c();
unsigned int run( const char *chars, unsigned int int_length );
void test( const char *chars, unsigned int int_length );
private:
boost::crc_optimal<32, 0x1EDC6F41, 0, 0, true, true> boost_crc1;
bool is_support_sse4_2;
bool is_run_64;
std::string str_msg;
unsigned int intel_crc32c_u8( const char *chars, unsigned int int_length );
unsigned int intel_crc32c_u64( const char *chars, unsigned int int_length );
bool bool_cpu_is_support_sse4_2(void);
unsigned int boost_crc32c_u8( const char *chars, unsigned int int_length );
};
Crc32c::Crc32c()
{
char *chars = 0;
unsigned int address_size = sizeof(chars); // =8判断地址长度是64位, =4判断是32位
if(8==address_size) is_run_64 = true;
else is_run_64 = false;
is_support_sse4_2 = bool_cpu_is_support_sse4_2();
return;
}
unsigned int Crc32c::intel_crc32c_u8( const char *chars, unsigned int int_length )
{
const char *p_begin;
const char *p_end;
unsigned int int_crc = 0;
{
boost::timer::auto_cpu_timer a1;
p_begin = chars;
p_end = chars + int_length;
while( p_begin < p_end ) {
int_crc = _mm_crc32_u8(int_crc,*p_begin);
++p_begin;
continue;
}
}
str_msg = "intel crc32c_u8=";
str_msg += std::to_string(int_crc);
log_message_str(str_msg,5) ;
return int_crc;
}
// 要求 支持64位CPU指令, 支持SSE4.2
unsigned int Crc32c::intel_crc32c_u64( const char *chars, unsigned int int_length )
{
unsigned long *long_begin;
unsigned long *long_end;
unsigned int i ;
unsigned int n ;
unsigned int int_crc = 0;
unsigned int long_size = sizeof(long);
{
boost::timer::auto_cpu_timer a1;
long_begin = (unsigned long *) chars;
long_end = long_begin + (int_length/long_size);
while( long_begin<long_end ) {
int_crc = _mm_crc32_u64(int_crc, *long_begin);
++long_begin;
continue;
}
i = int_length/long_size*long_size;
n = int_length;
while( i < n ) {
int_crc = _mm_crc32_u8(int_crc,chars);
++i;
continue;
}
}
str_msg = "intel crc32c_u64=";
str_msg += std::to_string(int_crc);
log_message_str(str_msg,5) ;
return int_crc;
}
/* 判断处理器是否支持SSE4.2,应采取如下方法: true if CPUID.01H:ECX.SSE4_2[bit 20] = 1
eax == 1,则在eax中返回Family/Model/Stepping等信息, 在EBX ECX和EDX返回一些信息
* ECX[0] SSE3
* ECX[3] MONITOR/MWAIT
* ECX[4] CPL Qualified Debug Store
* ECX[5] VMX
* ECX[7] EST, Enhanced SpeedStep Technology
* ECX[8] TM2, Thermal Monitor2
* ECX[9] SSSE3, Supplemental SSE3
* ECX[10] CNXT-ID, L1 Context ID. 如果为1,则说明L1 D-Cache可以设置为 自适应(adaptive)模式
* 或shared模式. 为0则说明不支持。
* ECX[13] CMPXCHG16B. 如果为1则说明支持该指令。
* ECX[14] xTPR Update Control. 为1则说明支持改变MSR寄存器IA32_MISC_ENABLES[23]的值
* ECX[16] PDCM, Perfmon and Debug Capability,1说明支持MSR寄存器IA32_PERF_CAPABILITIES
* ECX[21] x2APIC. 并且,如果该bit为1,则需要使eax==0xb && ecx==0,执行cpuid,如果ebx!=0,则CPU 支持extended topology enumeration leaf
// define ecx
#define bit_SSE3 (1 << 0)
#define bit_PCLMUL (1 << 1)
#define bit_LZCNT (1 << 5)
#define bit_SSSE3 (1 << 9)
#define bit_FMA (1 << 12)
#define bit_CMPXCHG16B (1 << 13)
#define bit_SSE4_1 (1 << 19)
#define bit_SSE4_2 (1 << 20)
#define bit_MOVBE (1 << 22)
#define bit_POPCNT (1 << 23)
#define bit_AES (1 << 25)
#define bit_XSAVE (1 << 26)
#define bit_OSXSAVE (1 << 27)
#define bit_AVX (1 << 2
#define bit_F16C (1 << 29)
#define bit_RDRND (1 << 30) */
bool Crc32c::bool_cpu_is_support_sse4_2(void)
{
unsigned int eax, ebx, ecx, edx;
// 64位CPU都支持 CPUID指令
asm volatile ("cpuid"
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "0"(1) );
str_msg = std::to_string(ecx) ;
log_message_str(str_msg,5);
if(ecx & bit_SSE4_2) {
log_message("cpu support SSE4.2",5);
return true;
}
return false;
}
unsigned int Crc32c::boost_crc32c_u8( const char *chars, unsigned int int_length )
{
std::string str_msg;
unsigned int int_crc;
{
boost::timer::auto_cpu_timer a1;
//boost::crc_optimal<32, 0x1EDC6F41, 0, 0, true, true> boost_crc32c;
boost_crc1.process_bytes(chars, int_length);
int_crc = boost_crc1();
}
str_msg = "boost crc32c=";
str_msg += std::to_string(int_crc);
log_message_str(str_msg,5) ;
return int_crc;
}
unsigned int Crc32c::run( const char *chars, unsigned int int_length )
{
if( is_support_sse4_2 ) {
if(is_run_64 ) { // 运行64位系统, 测试发现即使不是8字节位对齐,也可以正常运行
return intel_crc32c_u64( chars, int_length );
}
// 32位系统,选择最简单的单字节提交, 不考虑做优化32位的代码了, 因为已经淘汰了
return intel_crc32c_u8( chars, int_length );
}
// 不支持SSE4.2的CPU, 还是软件方式吧.
return boost_crc32c_u8( chars, int_length );
}
void Crc32c::test( const char *chars, unsigned int int_length )
{
boost_crc32c_u8( chars, int_length );
intel_crc32c_u8( chars, int_length );
intel_crc32c_u64( chars, int_length );
return;
}
#endif // _crc32c_hpp
|
|