- 论坛徽章:
- 0
|
这是最底层函数,基于GB18030,判断首字符的字节数。
- /******************************************************************************
- Function: GetCharSize
- Description:
- This function return the size of lead character of specified data string.
- Input:
- 1. string to get character size
- 2. string size
- Return:
- -1. invalid character
- 0. null string
- >;0. character size
- ******************************************************************************/
- int GetCharSize( const char *Data, int Size )
- {
- const unsigned char *p = (unsigned char *) Data;
- // check arguments
- if ( p == NULL || Size <= 0 )
- return 0;
- // Chinese 1st byte 0x81-0xFE
- if ( p[0] < 0x81 || p[0] >; 0xFE )
- return 1;
- // Chinese code size = 2, 4
- if ( Size < 2 )
- return -1;
- // Chinese 2nd byte 0x30-0x39, 0x40-0x7E, 0x80-0xFE
- if ( p[1] < 0x30 || p[1] >; 0x39 && p[1] < 0x40 || p[1] == 0x7F
- || p[1] >; 0xFE )
- return -1;
- // 2 bytes Chinese code
- if ( p[1] >;= 0x40 )
- return 2;
- // Chinese code size = 4
- if ( Size < 4 )
- return -1;
- // Chinese 3rd byte 0x81-0xFE
- if ( p[2] < 0x81 || p[2] >; 0xFE )
- return -1;
- // Chinese 4th byte 0x30-0x39
- if ( p[3] < 0x30 || p[3] >; 0x39 )
- return -1;
- return 4;
- }
复制代码 |
|