- 论坛徽章:
- 0
|
为啥不用libiconv呢?
UCS2-BE(近似UTF-16)与UTF-8的转换。
- static int utf8toucs2(const char **inbuf, size_t *inbytes,
- char **outbuf, size_t *outbytes)
- {
- u_int8_t mark;
- u_int16_t uc = 0;
- char * obuf = NULL;
- const char *ibuf, *ibuf_end, *obuf_end;
- if ((inbuf&&inbytes&&outbuf&&outbytes)
- && (*inbuf&&*inbytes&&*outbuf&&*outbytes)){
- ibuf = *inbuf;
- ibuf_end = *inbuf+*inbytes;
- obuf = *outbuf;
- obuf_end = *outbuf+*outbytes;
- int follow = 0;
- while(ibuf<ibuf_end && &obuf[1]<obuf_end){
- mark = (u_int8_t)*ibuf++;
- if (mark<0xF0 && mark>0xE0){
- /* 1110XXXX */
- uc = mark&0x0F;
- follow = 2;
- }else if (mark<0xE0 && mark>0xC0){
- /* 110XXXXX */
- uc = mark&0x1F;
- follow = 1;
- }else if (mark<0x80){
- /* 0XXXXXXX */
- uc = mark;
- follow = 0;
- }else{
- /* convert fail: 0xF0 0xE0 should NOT in UTF-8 seq */
- printf("convert fail 0xF0 0xE0\n");
- break;
- }
- if (&ibuf[follow] > ibuf_end){
- /* unexpect input end */
- break;
- }
- for (; follow>0; follow--){
- /* 10XX.XXXX 0x80-0xBF*/
- if ((*ibuf&0xC0) != 0x80){
- *outbytes = obuf_end - *outbuf;
- *inbytes = ibuf_end - *inbuf;
- printf("convert fail SEQ\n");
- return 0;
- }
- uc = (uc<<6)|(*ibuf++&0x3F);
- }
- *obuf++ = (uc>>8);
- *obuf++ = uc;
- *outbuf = obuf;
- *inbuf = ibuf;
- }
- *outbytes = obuf_end - *outbuf;
- *inbytes = ibuf_end - *inbuf;
- }
- return 0;
- }
- static int cs2toutf8(const char **inbuf, size_t *inbytes,
- char **outbuf, size_t *outbytes)
- {
- u_int16_t uc = 0;
- char *obuf = NULL;
- const char *ibuf, *ibuf_end, *obuf_end;
- if ((inbuf&&inbytes&&outbuf&&outbytes)
- && (*inbuf&&*inbytes&&*outbuf&&*outbytes)){
- ibuf = *inbuf;
- ibuf_end = *inbuf+*inbytes;
- obuf = *outbuf;
- obuf_end = *outbuf+*outbytes;
- int follow = 0;
- while(&ibuf[1]<ibuf_end && obuf<obuf_end){
- uc = (0xFF&*ibuf++);
- uc = (0xFF&*ibuf++)|(uc<<8);
- if (uc < 0x80){
- *obuf++ = (uc);
- follow = 0;
- }else if (uc < 0x800){
- *obuf++ = (uc>>6)|0xC0;
- follow = 1;
- }else {
- /* assert(uc<=0xFFFF); */
- *obuf++ = (uc>>12)|0xE0;
- follow = 2;
- }
- if (&obuf[follow] > obuf_end){
- /*no output buffer */
- break;
- }
- for (follow--;follow>=0;follow--){
- int shift = follow*6;
- u_int8_t ch = uc>>shift;
- *obuf++ = (ch&0x3F)|0x80;
- }
- *outbuf = obuf;
- *inbuf = ibuf;
- }
- *outbytes = obuf_end - *outbuf;
- *inbytes = ibuf_end - *inbuf;
- }
- return 0;
- }
- int main(int argc, char *argv[])
- {
- char buff[]="中国";
- char obuff[1024];
- char *pin, *pout;
- size_t inlen, outlen;
- pin = buff, pout=obuff;
- inlen = strlen(buff);
- outlen = sizeof(obuff);
- utf8toucs2(&pin, &inlen, &pout, &outlen);
- write(1, obuff, sizeof(obuff)-outlen);
- return 0;
- }
复制代码
[ 本帖最后由 pagx 于 2009-5-13 18:08 编辑 ] |
|