- 论坛徽章:
- 0
|
xml文件的中文问题
谢谢apile的指点。
出错原因:因为调用expat.xs 中的 parse_stream
而parse_stream又调用XML_ParseBuffer
而XML_ParseBuffer调用processor
而processor被初始化成prologInitProcessor
prologInitProcessor调用initializeEncoding
initializeEncoding正常应该返回XML_ERROR_NONE
而initializeEncoding调用handleUnknownEncoding
而在expat.xs中的XML_ParserCreate有 XML_SetUnknownEncodingHandler(RETVAL, unknownEncoding, 0)
而unknownEncoding正常应该返回1,的确返回了1
prologInitProcessor 调用的下一个函数是handleUnknownEncoding 函数代码如下:
if (unknownEncodingHandler) {
XML_Encoding info;
int i;
for (i = 0; i < 256; i++)
info.map = -1;
info.convert = NULL;
info.data = NULL;
info.release = NULL;
if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName,
&info)) {
ENCODING *enc;
unknownEncodingMem = MALLOC(XmlSizeOfUnknownEncoding());
if (!unknownEncodingMem) {
if (info.release)
info.release(info.data);
return XML_ERROR_NO_MEMORY;
}
enc = (ns
? XmlInitUnknownEncodingNS
: XmlInitUnknownEncoding)(unknownEncodingMem,
info.map,
info.convert,
info.data);
if (enc) {
unknownEncodingData = info.data;
unknownEncodingRelease = info.release;
encoding = enc;
return XML_ERROR_NONE;
}
}
if (info.release != NULL)
info.release(info.data);
}
return XML_ERROR_UNKNOWN_ENCODING;
执行 enc = (ns
? XmlInitUnknownEncodingNS
: XmlInitUnknownEncoding)(unknownEncodingMem,
info.map,
info.convert,
info.data);
后enc应该<>;0 这样才能return XML_ERROR_NONE
实际上只有XmlInitUnknownEncoding函数存在
XmlInitUnknownEncoding(void *mem,
int *table,
CONVERTER convert,
void *userData)
{
int i;
struct unknown_encoding *e = (struct unknown_encoding *)mem;
for (i = 0; i < (int)sizeof(struct normal_encoding); i++)
((char *)mem) = ((char *)&latin1_encoding);
for (i = 0; i < 128; i++)
if (latin1_encoding.type != BT_OTHER
&& latin1_encoding.type != BT_NONXML
&& table != i)
return 0;
for (i = 0; i < 256; i++) {
int c = table;
if (c == -1) {
e->;normal.type = BT_MALFORM;
/* This shouldn't really get used. */
e->;utf16 = 0xFFFF;
e->;utf8[0] = 1;
e->;utf8[1] = 0;
}
else if (c < 0) {
if (c < -4)
return 0;
e->;normal.type = (unsigned char)(BT_LEAD2 - (c + 2));
e->;utf8[0] = 0;
e->;utf16 = 0;
}
else if (c < 0x80) {
if (latin1_encoding.type[c] != BT_OTHER
&& latin1_encoding.type[c] != BT_NONXML
&& c != i)
return 0;
e->;normal.type = latin1_encoding.type[c];
e->;utf8[0] = 1;
e->;utf8[1] = (char)c;
e->;utf16 = (unsigned short)(c == 0 ? 0xFFFF : c);
}
else if (checkCharRefNumber(c) < 0) {
e->;normal.type = BT_NONXML;
/* This shouldn't really get used. */
e->;utf16 = 0xFFFF;
e->;utf8[0] = 1;
e->;utf8[1] = 0;
}
else {
if (c >; 0xFFFF)
return 0;
if (UCS2_GET_NAMING(nmstrtPages, c >;>; 8, c & 0xff))
e->;normal.type = BT_NMSTRT;
else if (UCS2_GET_NAMING(namePages, c >;>; 8, c & 0xff))
e->;normal.type = BT_NAME;
else
e->;normal.type = BT_OTHER;
e->;utf8[0] = (char)XmlUtf8Encode(c, e->;utf8 + 1);
e->;utf16 = (unsigned short)c;
}
}
e->;userData = userData;
e->;convert = convert;
if (convert) {
e->;normal.isName2 = unknown_isName;
e->;normal.isName3 = unknown_isName;
e->;normal.isName4 = unknown_isName;
e->;normal.isNmstrt2 = unknown_isNmstrt;
e->;normal.isNmstrt3 = unknown_isNmstrt;
e->;normal.isNmstrt4 = unknown_isNmstrt;
e->;normal.isInvalid2 = unknown_isInvalid;
e->;normal.isInvalid3 = unknown_isInvalid;
e->;normal.isInvalid4 = unknown_isInvalid;
}
e->;normal.enc.utf8Convert = unknown_toUtf8;
e->;normal.enc.utf16Convert = unknown_toUtf16;
return &(e->;normal.enc);
}
发现是因为:
for (i = 0; i < 128; i++)
if (latin1_encoding.type != BT_OTHER
&& latin1_encoding.type != BT_NONXML
&& table != i)
return 0; |
|