- 论坛徽章:
- 0
|
编码转换的kiconv 好象最早出现在 netbsd 的哦。不过还是使用UTF-8比较方便。假如有测试环境的话,倒是可以写个UTF-8支持的patch。
- diff -rupd src/sys/fs/msdosfs/direntry.h ../src/sys/fs/msdosfs/direntry.h
- --- src/sys/fs/msdosfs/direntry.h Sun Dec 4 01:34:44 2005
- +++ ../src/sys/fs/msdosfs/direntry.h Wed Nov 11 20:17:28 2009
- @@ -133,6 +133,7 @@ int unix2winfn(const u_char *un, int unl
- int winChkName(const u_char *un, int unlen, struct winentry *wep,
- int chksum);
- int win2unixfn(struct winentry *wep, struct dirent *dp, int chksum);
- +int fn_utf8_conv(struct dirent *dp, int chksum);
- u_int8_t winChksum(u_int8_t *name);
- int winSlotCnt(const u_char *un, int unlen);
- #endif /* _KERNEL */
- diff -rupd src/sys/fs/msdosfs/msdosfs_conv.c ../src/sys/fs/msdosfs/msdosfs_conv.c
- --- src/sys/fs/msdosfs/msdosfs_conv.c Mon Mar 16 01:15:58 2009
- +++ ../src/sys/fs/msdosfs/msdosfs_conv.c Wed Nov 11 20:34:24 2009
- @@ -703,7 +703,7 @@ win2unixfn(struct winentry *wep, struct
- u_int8_t *np, *ep = dp->d_name + WIN_MAXLEN;
- int i;
-
- - if ((wep->weCnt&WIN_CNT) > howmany(WIN_MAXLEN, WIN_CHARS)
- + if ((wep->weCnt&WIN_CNT) > howmany(WIN_MAXLEN, (WIN_CHARS*2))
- || !(wep->weCnt&WIN_CNT))
- return -1;
-
- @@ -725,8 +725,16 @@ win2unixfn(struct winentry *wep, struct
- * Offset of this entry
- */
- i = ((wep->weCnt&WIN_CNT) - 1) * WIN_CHARS;
- - np = (u_int8_t *)dp->d_name + i;
- + np = (u_int8_t *)dp->d_name + i*2;
-
- + memcpy(np, wep->wePart1, sizeof(wep->wePart1));
- + np += sizeof(wep->wePart1);
- + memcpy(np, wep->wePart2, sizeof(wep->wePart2));
- + np += sizeof(wep->wePart2);
- + memcpy(np, wep->wePart3, sizeof(wep->wePart3));
- + np += sizeof(wep->wePart3);
- +
- +#ifdef UTF8_SUPPORT
- /*
- * Convert the name parts
- */
- @@ -793,6 +801,7 @@ win2unixfn(struct winentry *wep, struct
- if (*cp++)
- return -1;
- }
- +#endif
- return chksum;
- }
-
- @@ -822,4 +831,143 @@ winSlotCnt(const u_char *un, int unlen)
- if (unlen > WIN_MAXLEN)
- return 0;
- return howmany(unlen, WIN_CHARS);
- +}
- +
- +static int iconv_u2w(const char **inbuf, size_t *inbytes,
- + char **outbuf, size_t *outbytes)
- +{
- + u_int8_t mark;
- + u_int16_t uc = 0;
- + char * obuf = NULL;
- + const char *ibuf, *ibuf_end, *obuf_end;
- + if ((inbuf&&inbytes&&outbuf&&outbytes)
- + && (*inbuf&&*inbytes&&*outbuf&&*outbytes)){
- + ibuf = *inbuf;
- + ibuf_end = *inbuf+*inbytes;
- + obuf = *outbuf;
- + obuf_end = *outbuf+*outbytes;
- + int follow = 0;
- + while(ibuf<ibuf_end && &obuf[1]<obuf_end){
- + mark = (u_int8_t)*ibuf++;
- + if (mark<0xF0 && mark>0xE0){
- + /* 1110XXXX */
- + uc = mark&0x0F;
- + follow = 2;
- + }else if (mark<0xE0 && mark>0xC0){
- + /* 110XXXXX */
- + uc = mark&0x1F;
- + follow = 1;
- + }else if (mark<0x80){
- + /* 0XXXXXXX */
- + uc = mark;
- + follow = 0;
- + }else{
- + /* convert fail: 0xF0 0xE0 should NOT in UTF-8 seq */
- + printf("convert fail 0xF0 0xE0\n");
- + break;
- + }
- + if (&ibuf[follow] > ibuf_end){
- + /* unexpect input end */
- + break;
- + }
- + for (; follow>0; follow--){
- + /* 10XX.XXXX 0x80-0xBF*/
- + if ((*ibuf&0xC0) != 0x80){
- + *outbytes = obuf_end - *outbuf;
- + *inbytes = ibuf_end - *inbuf;
- + printf("convert fail SEQ\n");
- + return 0;
- + }
- + uc = (uc<<6)|(*ibuf++&0x3F);
- + }
- + *obuf++ = (uc>>8);
- + *obuf++ = uc;
- + *outbuf = obuf;
- + *inbuf = ibuf;
- + }
- + *outbytes = obuf_end - *outbuf;
- + *inbytes = ibuf_end - *inbuf;
- + }
- + return 0;
- +}
- +
- +static int iconv_w2u(const char **inbuf, size_t *inbytes,
- + char **outbuf, size_t *outbytes)
- +{
- + u_int16_t uc = 0;
- + char *obuf = NULL;
- + const char *ibuf, *ibuf_end, *obuf_end;
- + if ((inbuf&&inbytes&&outbuf&&outbytes)
- + && (*inbuf&&*inbytes&&*outbuf&&*outbytes)){
- + ibuf = *inbuf;
- + ibuf_end = *inbuf+*inbytes;
- + obuf = *outbuf;
- + obuf_end = *outbuf+*outbytes;
- + int follow = 0;
- + while(&ibuf[1]<ibuf_end && obuf<obuf_end){
- + uc = (0xFF&*ibuf++);
- + uc = (0xFF&*ibuf++)|(uc<<8);
- + if (uc < 0x80){
- + *obuf++ = (uc);
- + follow = 0;
- + }else if (uc < 0x800){
- + *obuf++ = (uc>>6)|0xC0;
- + follow = 1;
- + }else {
- + /* assert(uc<=0xFFFF); */
- + *obuf++ = (uc>>12)|0xE0;
- + follow = 2;
- + }
- + if (&obuf[follow] > obuf_end){
- + /*no output buffer */
- + break;
- + }
- + for (follow--;follow>=0;follow--){
- + int shift = follow*6;
- + u_int8_t ch = uc>>shift;
- + *obuf++ = (ch&0x3F)|0x80;
- + }
- + *outbuf = obuf;
- + *inbuf = ibuf;
- + }
- + *outbytes = obuf_end - *outbuf;
- + *inbytes = ibuf_end - *inbuf;
- + }
- + return 0;
- +}
- +
- +int fn_utf8_conv(struct dirent *dp, int chksum)
- +{
- + size_t ucs2len, utf8len;
- + u_int8_t *ucs2buf, *utf8buf, *cp;
- +
- + u_int8_t buff[WIN_MAXLEN];
- +
- + utf8buf = buff;
- + utf8len = sizeof(buff)-1;
- +
- + ucs2buf = dp->d_name;
- + ucs2len = dp->d_namlen;
- +
- + iconv_w2u(&ucs2buf, &ucs2len, &utf8buf, &utf8len);
- +
- + /* utf-8 name too long, just return */
- + if (ucs2len > 0)
- + return -1;
- +
- + cp = dp->d_name;
- + /* notice: WIN_MAXLEN < sizeof(dp->d_name), so sizeof(buff)-utf8len+1 < sizeof(dp->d_name) */
- + for (utf8buf=buff; utf8len<sizeof(buff)-1; utf8len++) {
- + switch (*cp++ = *utf8buf++) {
- + case '0':
- + dp->d_namlen = (cp-dp->d_name);
- + return chksum;
- + case '/':
- + *(cp-1) = 0;
- + return -1;
- + }
- + }
- +
- + dp->d_namlen = (cp-dp->d_name);
- + return chksum;
- }
- diff -rupd src/sys/fs/msdosfs/msdosfs_vnops.c ../src/sys/fs/msdosfs/msdosfs_vnops.c
- --- src/sys/fs/msdosfs/msdosfs_vnops.c Sat Jul 4 05:17:40 2009
- +++ ../src/sys/fs/msdosfs/msdosfs_vnops.c Wed Nov 11 20:16:04 2009
- @@ -1634,6 +1634,7 @@ msdosfs_readdir(void *v)
- offset / sizeof(struct direntry);
- dirbuf->d_type = DT_REG;
- }
- + chksum = fn_utf8_conv(dirbuf, chksum);
- if (chksum != winChksum(dentp->deName))
- dirbuf->d_namlen = dos2unixfn(dentp->deName,
- (u_char *)dirbuf->d_name,
复制代码
打上这个补丁,应该可以看到中文文件名(当然了不能写入)。netbsd_msdosfs_utf8_patch.diff
[ 本帖最后由 pagx 于 2009-11-11 23:26 编辑 ] |
|