- 论坛徽章:
- 0
|
惟吾无为 发表于 2012-06-18 23:19 ![]()
转为utf-8不就得了,gbk不是国际标准,别指望外国人解决。
是啊要转码,不过我转了几次都没成功- iconv_t iconv_utf8_gbk;
- iconv_t iconv_gbk_utf8;
- int gbk_input (unsigned char *out, int *outlen, const unsigned char *in,
- int *inlen)
- {
- char *outbuf = (char *) out;
- char *inbuf = (char *) in;
- size_t rslt;
- rslt = iconv(iconv_utf8_gbk, ( char **)&inbuf, (size_t *) inlen,( char**)&outbuf, (size_t *) outlen);
- if (rslt < 0)
- return rslt;
- *outlen = ((unsigned char *) outbuf - out);
- *inlen = ((unsigned char *) inbuf - in);
- return *outlen;
- }
- int gbk_output (unsigned char *out,
- int *outlen, const unsigned char *in, int *inlen)
- {
- char *outbuf = (char *) out;
- char *inbuf = (char *) in;
- size_t rslt;
- rslt =
- iconv (iconv_gbk_utf8, (char **) &inbuf, (size_t *) inlen,
- &outbuf, (size_t *) outlen);
- if (rslt < 0)
- return rslt;
- *outlen = ((unsigned char *) outbuf - out);
- *inlen = ((unsigned char *) inbuf - in);
- return *outlen;
- }
- string& replace_all_distinct(string& str,const string& old_value,const string& new_value)
- {
- for(string::size_type pos(0); pos!=string::npos; pos+=new_value.length()) {
- if( (pos=str.find(old_value,pos))!=string::npos )
- str.replace(pos,old_value.length(),new_value);
- else
- break;
- }
- return str;
- }
- int main(int argc, const char** argv)
- {
- xmlDocPtr doc;
- xmlNodePtr curNode;
- xmlChar *pszContent;
- string docName = argv[1];
- iconv_utf8_gbk = iconv_open("utf-8", "gbk");
- iconv_gbk_utf8 = iconv_open("gbk", "utf-8");
- xmlNewCharEncodingHandler("gb2312", gbk_input, gbk_output);//添加gb2312编码支持
- xmlNewCharEncodingHandler("gbk", gbk_input, gbk_output);//添加gbk编码支持
- //doc = xmlReadFile(docName.c_str(), "GBK", XML_PARSE_NOBLANKS);
- //doc = xmlParseFile(docName.c_str());
- doc = xmlReadFile(docName.c_str(), "gbk", 0);
- //doc = xmlReadFile(docName.c_str(), "GBK", XML_PARSE_RECOVER);
- if (NULL == doc)
- {
- cerr << "xmlReadFile failed!" << endl;
- return -1;
- }
- //获取doc根节点
- curNode = xmlDocGetRootElement(doc);
- if (NULL == curNode)
- {
- cerr << "xmlDocGetRootElement failed!" << endl;
- xmlFreeDoc(doc);
- return -2;
- }
- cout << "root node name = " << curNode->name << endl;
- curNode = curNode->xmlChildrenNode;
- while (xmlStrcmp(curNode->name, (const xmlChar*)"docset"))
- {
- curNode = curNode->next;
- if (NULL == curNode)
- {
- cerr << "this xmldoc hasn't docset node!!!" << endl;
- return -4;
- }
- }
- //get docele node
- curNode = curNode->xmlChildrenNode;
- xmlNodePtr doceleNode;
- doceleNode = curNode;
- xmlAttrPtr attrPtr;
- xmlChar* pszAttr;
- while (NULL != doceleNode && !xmlStrcmp(doceleNode->name, (const xmlChar*)"docele"))
- {
- //get docitem node
- curNode = doceleNode->xmlChildrenNode;
- while (NULL != curNode && !xmlStrcmp(curNode->name, (const xmlChar*)"docitem"))
- {
- //get node properties value
- attrPtr = curNode->properties;
- if (!xmlStrcmp(attrPtr->name, (const xmlChar*)"key"))
- {
- pszAttr = xmlGetProp(curNode, BAD_CAST "key");
- }
- //get node content value
- pszContent =xmlNodeGetContent(curNode);
- //ConvertEnc("utf-8", "gb2312", (char *)xmlNodeGetContent(curNode));
- cout << pszAttr << " = " << pszContent << endl;
- //get next docitem node
- curNode = curNode->next;
- }
- //get next docele node
- doceleNode = doceleNode->next;
- }
- #if 0
- //get resultset node child node!!!
- curNode = curNode->xmlChildrenNode;
- //find docset node!!!
- while (xmlStrcmp(curNode->name, (const xmlChar*)"docset"))
- {
- curNode = curNode->next;
- }
- cout << "docset node name = " << curNode->name << endl;
- //get docele node!!!
- curNode = curNode->children;
- cout << "decele node name = " << curNode->name << endl;
- if (xmlStrcmp(curNode->name, BAD_CAST "docele"))
- {
- cerr << "xmlStrcmp failed!" << endl;
- xmlFreeDoc(doc);
- return -3;
- }
- curNode = curNode->xmlChildrenNode;
复制代码 |
|