免费注册 查看新帖 |

Chinaunix

  平台 论坛 博客 文库
最近访问板块 发新帖
楼主: sisi8408
打印 上一主题 下一主题

Kernel Bug-Vulnerability-Comment library [复制链接]

论坛徽章:
0
101 [报告]
发表于 2008-02-17 11:05 |只看该作者

  1. static void balance_leaf_shift_left(struct tree_balance *tb,
  2.                                         struct item_head *ih,
  3.                                         const char *body,
  4.                                         int flag,
  5.                                         int *p_pos_in_item,
  6.                                         int *p_item_pos,
  7.                                         int *p_zeros_num);

  8. static void balance_leaf_shift_right(struct tree_balance *tb,
  9.                                         struct item_head *ih,
  10.                                         const char *body,
  11.                                         int flag,
  12.                                         int *p_pos_in_item,
  13.                                         int *p_item_pos,
  14.                                         int *p_zeros_num);

  15. static void balance_leaf_do_split(struct tree_balance *tb,
  16.                                 struct item_head *ih,
  17.                                 const char *body,
  18.                                 int flag,
  19.                                 struct item_head *insert_key,
  20.                                 struct buffer_head **insert_ptr,
  21.                                 int *p_pos_in_item,
  22.                                 int *p_item_pos,
  23.                                 int *p_zeros_num);

  24. static void balance_leaf_do_remain(struct tree_balance *tb,
  25.                                 struct item_head *ih,
  26.                                 const char *body,
  27.                                 int flag,
  28.                                 struct item_head *insert_key,
  29.                                 struct buffer_head **insert_ptr,
  30.                                 int *p_pos_in_item,
  31.                                 int *p_item_pos,
  32.                                 int *p_zeros_num);


  33. static int balance_leaf(struct tree_balance *tb,
  34.                         struct item_head *ih,
  35.                         const char *body,
  36.                         int flag,
  37.                         struct item_head *insert_key,
  38.                         struct buffer_head **insert_ptr)
  39. {
  40.         int pos_in_item = tb->tb_path->pos_in_item;
  41.         int item_pos = PATH_LAST_POSITION(tb->tb_path);
  42.        
  43.         PROC_INFO_INC(tb->tb_sb, balance_at[0]);

  44.         if (tb->insert_size[0] < 0)
  45.                 return balance_leaf_when_delete(tb, flag);

  46.         if (flag != M_INSERT &&
  47.             is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos)))
  48.                 pos_in_item *= UNFM_P_SIZE;

  49.         if (tb->lnum[0] > 0) /* shift left */
  50.                 balance_leaf_shift_left(tb, ih, body, flag,
  51.                                         /*insert_key, insert_ptr,*/
  52.                                         &pos_in_item, &item_pos,
  53.                                         &zeros_num);
  54.         /* compute new item position */
  55.         item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));

  56.         if (tb->rnum[0] > 0) /* shift right */
  57.                 balance_leaf_shift_right(tb, ih, body, flag,
  58.                                         /*insert_key, insert_ptr,*/
  59.                                         &pos_in_item, &item_pos,
  60.                                         &zeros_num);
  61.         /* 2008-2-17 10:00
  62.          * after shift, check sInum or do split.
  63.          */
  64.         if (tb->blknum[0] == 0) {        /* node S[0] is empty now */
  65.                 RFALSE(!tb->lnum[0] || !tb->rnum[0],
  66.                        "PAP-12190: lnum and rnum must not be zero");
  67.                 /* if insertion was done before 0-th position in R[0], right
  68.                    delimiting key of the tb->L[0]'s and left delimiting key are
  69.                    not set correctly */
  70.                 if (tb->CFL[0]) {
  71.                         if (!tb->CFR[0])
  72.                                 reiserfs_panic(tb->tb_sb,
  73.                                        "vs-12195: balance_leaf: CFR not initialized");
  74.                         copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
  75.                                  B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]));

  76.                         do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
  77.                 }
  78.                 reiserfs_invalidate_buffer(tb, tbS0);
  79.                 return 0;
  80.         }
  81.         /* do SInum, where I = 1, 2 */       
  82.         balance_leaf_do_split(tb, ih, body, flag,
  83.                                 insert_key, insert_ptr,
  84.                                 &pos_in_item, &item_pos, &zeros_num);

  85.         /* if the affected item was not wholly shifted,
  86.          * then we perform all necessary operations on that part
  87.          * or whole of the affected item which remains in S
  88.          */
  89.         if (0 <= item_pos && item_pos < tb->s0num)
  90.                 balance_leaf_do_remain(tb, ih, body, flag,
  91.                                         insert_key, insert_ptr,
  92.                                         &pos_in_item, &item_pos,
  93.                                         &zeros_num);

  94. #ifdef CONFIG_REISERFS_CHECK
  95.         if (flag == M_PASTE && tb->insert_size[0]) {
  96.                 print_cur_tb("12290");
  97.                 reiserfs_panic(tb->tb_sb,
  98.                        "PAP-12290: balance_leaf: insert_size is still not 0 (%d)",
  99.                        tb->insert_size[0]);
  100.         }
  101. #endif
  102.         return 0;
  103. } /* Leaf level of the tree is balanced (end of balance_leaf) ,hehe */

复制代码

论坛徽章:
0
102 [报告]
发表于 2008-02-17 13:47 |只看该作者

  1. /*
  2. * Key of an item determines its location in the S+tree, and
  3. * is composed of 4 components
  4. */
  5. struct reiserfs_key
  6. {
  7.         __le32 k_dir_id; /* packing locality: by default parent directory object id */
  8.         __le32 k_objectid; /* object identifier */
  9.         union {
  10.                 struct offset_v1        k_offset_v1;
  11.                 struct offset_v2        k_offset_v2;
  12.         } __attribute__ ((__packed__)) u;
  13. } __attribute__ ((__packed__));

  14. #define KEY_SIZE        (sizeof(struct reiserfs_key))

  15. struct in_core_key
  16. {
  17.         __u32 k_dir_id;
  18.         __u32 k_objectid;
  19.         __u64 k_offset;
  20.         __u8  k_type;
  21. };
复制代码


By def, the basic element of Stree, item, maybe understood as two parts:
o node{dir-id, obj-id}, cooresponding to phy disk block,
                        which is, as in the case of ext2,
                        managed in groups by bitmap.

o item-entry{type, off}, due to the fact that inode, one of the hot ideas in FS,
                         is understood by HR, i gas, as a set of SD, DE, IND and DIR,
                         the socalled four types of item.

To locate any item in Stree, i have to first compute the node,
which is in 64-bit space in total, and IOW RFS is able to address 2^64 inodes,
as shown by search_by_key,

  1.         while (reada_count < SEARCH_BY_KEY_READA) {
  2.                 if (pos == limit)
  3.                         break;
  4.                                
  5.                 reada_blocks[reada_count++] = B_N_CHILD_NUM(p_s_bh, pos);
  6.                                
  7.                 if (p_s_search_path->reada & PATH_READA_BACK)
  8.                         pos--;
  9.                 else
  10.                         pos++;
  11.                 /*
  12.                  * check to make sure we're in the same object
  13.                  */
  14.                 le_key = B_N_PDELIM_KEY(p_s_bh, pos);
  15.                 if (le32_to_cpu(le_key->k_objectid) != p_s_key->on_disk_key.k_objectid)
  16.                         break;
  17.         }
复制代码

or 2^64 disk blocks, with nothing to do block_sz.

The power of Stree to manage 2^64 inodes is garunteed by the principles of Btree,
i gas, and in certain cases Btree is simple game like,


  1. /* linux-2.6.23.12/drivers/net/ppp_generic.c
  2. *
  3. * A cardmap represents a mapping from unsigned integers to pointers,
  4. * and provides a fast "find lowest unused number" operation.
  5. *
  6. * It uses a broad (32-way) tree with a bitmap at each level.
  7. * It is designed to be space-efficient for small numbers of entries
  8. * and time-efficient for large numbers of entries.
  9. */
  10. #define CARDMAP_ORDER        5
  11. #define CARDMAP_WIDTH        (1U << CARDMAP_ORDER)
  12. #define CARDMAP_MASK        (CARDMAP_WIDTH - 1)

  13. struct cardmap
  14. {
  15.         int shift;
  16.         unsigned long inuse;
  17.         struct cardmap *parent;
  18.         void *ptr[CARDMAP_WIDTH];
  19. };
复制代码


more funny case, say LC-tries playing game in 32-bit space,
can be checked in linux-2.6.23.12/net/ipv4/fib_trie.c.

As learn in RFS, x-tree may, if defed and played in nice way,
replace hash table in general cases.

What is more, even in the kingdom of a inode, say a regular file,
game is again palyed in 64-bit space,
and in summary RFS addresses in 128-bit space,
though the Stree height is confined to 5.

Unlike ext2, i gas, the ops in Stree is focusing not on inode but upon item,
as shown by,

  1.         if (paste_entry_position == 0) {
  2.                 /* change delimiting keys */
  3.                 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
  4.         }
复制代码


and in general, for a certain inode, or file if u like, if it has N items,
there are, accordingly, N paths in Stree to locate each item,
of cough, certain nodes, at least the root, can be used by multi paths.

Since binary search employed in Stree, key is ordered in 128-bit space,
and it maybe, i gas, the core reason why RFS is faster in reading,
as shown by get_l/rkey functions.

In writing, insert/paste/del/cut, it is hard to understand what HR is playing,
especialy on the leaf level, though fix_node.c is mainly to teach do_balance.c
what to do upon shifting and packing.

[ 本帖最后由 sisi8408 于 2008-2-17 19:29 编辑 ]

论坛徽章:
0
103 [报告]
发表于 2008-02-17 19:22 |只看该作者

  1. int search_for_position_by_key(struct super_block *p_s_sb,
  2.                            const struct cpu_key *p_cpu_key,
  3.                            struct treepath *p_s_search_path)
  4. {
  5. [...]
  6.         /* Item not found.
  7.          * Set path to the previous item.
  8.          */
  9.         p_le_ih = B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path),
  10.                                  --PATH_LAST_POSITION(p_s_search_path));
  11.         /*
  12.          *
  13.         if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key))
  14.                 return FILE_NOT_FOUND;

  15.         //still same inode
  16.         //2.6.23.12
  17.         //
  18.         n_blk_size = p_s_sb->s_blocksize;
  19.          *
  20.          */
  21.         n_blk_size = p_s_sb->s_blocksize;

  22.         if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key))
  23.                 return FILE_NOT_FOUND;
  24. [...]
  25. }
复制代码

论坛徽章:
0
104 [报告]
发表于 2008-02-18 20:25 |只看该作者

  1. int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
  2.                          struct treepath *p_s_path,        /* Path to the deleted item. */
  3.                          const struct cpu_key *p_s_item_key,        /* Key to search for the deleted item.  */
  4.                          struct inode *p_s_inode,        /* inode is here just to update i_blocks and quotas */
  5.                          struct buffer_head *p_s_un_bh) /* NULL or unformatted node pointer.    */
  6. {
  7.         struct super_block *p_s_sb = p_s_inode->i_sb;
  8.         struct tree_balance s_del_balance;
  9.         struct item_head s_ih;
  10.         struct item_head *q_ih;
  11.         int quota_cut_bytes;
  12.         int n_ret_value, n_del_size, n_removed;

  13. #ifdef CONFIG_REISERFS_CHECK
  14.         char c_mode;
  15.         int n_iter = 0;
  16. #endif

  17.         BUG_ON(!th->t_trans_id);

  18.         init_tb_struct(th, &s_del_balance, p_s_sb, p_s_path,
  19.                        0 /*size is unknown */);
  20.         while(1) {
  21.                 n_removed = 0;

  22. #ifdef CONFIG_REISERFS_CHECK
  23.                 n_iter++;
  24.                 c_mode =
  25. #endif
  26.                 prepare_for_delete_or_cut(th, p_s_inode, p_s_path,
  27.                                               p_s_item_key, &n_removed,
  28.                                               &n_del_size,
  29.                                               max_reiserfs_offset(p_s_inode));
  30.                 /* 2.6.23.12
  31.                  * 2008-2-18 20:22
  32.                  * RFALSE is potential BUG
  33.                  */
  34.                 RFALSE(c_mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
  35. [...]
  36. }
复制代码

论坛徽章:
0
105 [报告]
发表于 2008-02-24 16:19 |只看该作者

  1. void reiserfs_release_objectid(struct reiserfs_transaction_handle *th,
  2.                                __u32 objectid_to_release)
  3. {
  4. [...]
  5.                         if (sb_oid_cursize(rs) == sb_oid_maxsize(rs)) {
  6.                                 /* objectid map must be expanded, but there is no space */
  7.                                 PROC_INFO_INC(s, leaked_oid);
  8.                                 /*
  9. 2008-2-24 16:01 2.6.23.12

  10.    "The exception is immediately after a sequence
  11.    of operations which deletes a large number of objects of
  12.    non-sequential objectids,
  13.    and even then it will become compact
  14.    again as soon as more objects are created."

  15.         map[0] = 1, map[1] = j, map[2] = j +1, ... map[sb_oid_maxsize(rs)] = k
  16.        
  17.         then for objid to be released, it is possible that it is not
  18.         recorded by this small cache, map[], and
  19.         it will not become compact again as soon as more objects are created,
  20.         especially in cases that objids to be released are in the range of (1, j).
  21.                                 */
  22.                                 return;
  23.                         }
  24. [...]
  25. }
复制代码

论坛徽章:
0
106 [报告]
发表于 2008-02-29 20:46 |只看该作者

  1. static int tcp_error(struct sk_buff *skb,
  2.                      unsigned int dataoff,
  3.                      enum ip_conntrack_info *ctinfo,
  4.                      int pf,
  5.                      unsigned int hooknum)
  6. {
  7.         struct tcphdr _tcph, *th;
  8.         unsigned int tcplen = skb->len - dataoff;
  9.         u_int8_t tcpflags;

  10.         /* Smaller that minimal TCP header? */
  11.         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
  12.         if (th == NULL) {
  13.                 if (LOG_INVALID(IPPROTO_TCP))
  14.                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
  15.                                 "nf_ct_tcp: short packet ");
  16.                 return -NF_ACCEPT;
  17.         } /* 2.6.23.12 */
  18.         else if (th == &_tcph)
  19.                 return -NF_ACCEPT;
  20. [...]
  21. }
复制代码

论坛徽章:
0
107 [报告]
发表于 2008-03-02 12:00 |只看该作者

  1. void wait_task_inactive (struct task_struct *p)
  2. {
  3. [..]
  4.         /* 2.6.23.12
  5.          * Ok, time to look more closely!
  6.          *
  7.          * We need the rq lock now, to be *sure*.
  8.          * If we're wrong, we'll just go back and repeat.
  9.          */
  10.         rq = task_rq_lock(p, &flags);
  11.         running = task_running(rq, p);
  12.         if (unlikely(running)) {
  13.                 task_rq_unlock(rq, &flags);
  14.                 cpu_relax();
  15.                 goto repeat;
  16.         }
  17.         on_rq = p->se.on_rq;
  18.         if (unlikely(on_rq)) {
  19.                 task_rq_unlock(rq, &flags);
  20.                 yield();
  21.                 goto repeat;
  22.         }
  23.         task_rq_unlock(rq, &flags);
  24.         /*
  25.          * Ahh, all good. It wasn't running, and it wasn't
  26.          * runnable, which means that it will never become
  27.          * running in the future either. We're all done!
  28.          */
  29. }
复制代码

论坛徽章:
0
108 [报告]
发表于 2008-03-02 13:46 |只看该作者
典型的oops shooting过程

  1. Hello :)

  2. My system just crashed because of a power fluctuation and the root
  3. filesystem was damaged.
  4. The system booted up just fine, but when samba tried to start up
  5. the kernel oops'd.

  6. xfs_repair was apparently able to repair the damage, though I seem
  7. to have lost some files.

  8. I do realize that a lot of awful things can happen if you just cut
  9. the power, but the kernel shouldn't oops on a mounted file
  10. system, right?

  11. Please CC me, as I'm not subscribed to the lists.


  12. Regards
  13. Thomas

  14. $ rpm -q xfsprogs
  15. xfsprogs-2.9.4-4.fc8

  16. $ uname -a
  17. Linux linux.local.loc 2.6.23.15-137.fc8 #1 SMP Sun Feb 10 17:48:34 EST 2008 i686 i686 i386
  18. GNU/Linux

  19. ["xfs_check" (text/plain)]

  20. block 0/19018 expected type unknown got free2
  21. agi unlinked bucket 6 is 103430 in ag 3 (inode=12686342)
  22. agi unlinked bucket 14 is 91278 in ag 3 (inode=12674190)
  23. agi unlinked bucket 23 is 106135 in ag 3 (inode=12689047)
  24. agi unlinked bucket 31 is 53279 in ag 3 (inode=12636191)
  25. agi unlinked bucket 35 is 106147 in ag 3 (inode=12689059)
  26. agi unlinked bucket 36 is 60836 in ag 3 (inode=12643748)
  27. agi unlinked bucket 39 is 60839 in ag 3 (inode=12643751)
  28. agi unlinked bucket 41 is 378537 in ag 3 (inode=12961449)
  29. agi unlinked bucket 50 is 91250 in ag 3 (inode=12674162)
  30. agi unlinked bucket 20 is 38996 in ag 4 (inode=16816212)
  31. agi unlinked bucket 57 is 95353 in ag 4 (inode=16872569)
  32. agi unlinked bucket 4 is 199940 in ag 8 (inode=33754372)
  33. agi unlinked bucket 8 is 56392 in ag 8 (inode=33610824)
  34. agi unlinked bucket 21 is 177621 in ag 8 (inode=33732053)
  35. agi unlinked bucket 22 is 56406 in ag 8 (inode=33610838)
  36. agi unlinked bucket 23 is 56407 in ag 8 (inode=33610839)
  37. agi unlinked bucket 27 is 54747 in ag 8 (inode=33609179)
  38. agi unlinked bucket 32 is 67232 in ag 8 (inode=33621664)
  39. agi unlinked bucket 37 is 54757 in ag 8 (inode=33609189)
  40. agi unlinked bucket 39 is 67239 in ag 8 (inode=33621671)
  41. agi unlinked bucket 40 is 67240 in ag 8 (inode=33621672)
  42. agi unlinked bucket 47 is 56367 in ag 8 (inode=33610799)
  43. agi unlinked bucket 0 is 34944 in ag 10 (inode=41977984)
  44. agi unlinked bucket 20 is 42516 in ag 11 (inode=46179860)
  45. agi unlinked bucket 15 is 463 in ag 13 (inode=54526415)
  46. agi unlinked bucket 62 is 154430 in ag 13 (inode=54680382)
  47. block 0/21136 type unknown not expected
  48. allocated inode 12689047 has 0 link count
  49. allocated inode 12689059 has 0 link count
  50. allocated inode 12674162 has 0 link count
  51. allocated inode 12674190 has 0 link count
  52. allocated inode 12636191 has 0 link count
  53. allocated inode 12961449 has 0 link count
  54. allocated inode 12643748 has 0 link count
  55. allocated inode 12643751 has 0 link count
  56. allocated inode 12686342 has 0 link count
  57. allocated inode 16816212 has 0 link count
  58. allocated inode 16872569 has 0 link count
  59. allocated inode 33754372 has 0 link count
  60. allocated inode 33732053 has 0 link count
  61. allocated inode 33621664 has 0 link count
  62. allocated inode 33621671 has 0 link count
  63. allocated inode 33621672 has 0 link count
  64. allocated inode 33609179 has 0 link count
  65. allocated inode 33609189 has 0 link count
  66. allocated inode 33610799 has 0 link count
  67. allocated inode 33610824 has 0 link count
  68. allocated inode 33610838 has 0 link count
  69. allocated inode 33610839 has 0 link count
  70. allocated inode 41977984 has 0 link count
  71. allocated inode 46179860 has 0 link count
  72. allocated inode 54680382 has 0 link count
  73. allocated inode 54526415 has 0 link count
  74. sb_ifree 3257, counted 3259
  75. sb_fdblocks 7248513, counted 7248904

  76. ["xfs_oops" (text/plain)]

  77. Mar  1 10:32:03 linux kernel: BUG: unable to handle kernel NULL pointer dereference \
  78.                 at virtual address 00000002
  79. Mar  1 10:32:03 linux kernel: printing eip: f8a96141 *pde = 38ccb067
  80. Mar  1 10:32:03 linux kernel: Oops: 0000 [#1] SMP
  81. Mar  1 10:32:03 linux kernel: Modules linked in: asb100 hwmon_vid hwmon tun sch_sfq \
  82. sch_htb pppoe pppox ppp_synctty ppp_async crc_ccitt ppp_generic slhc bridge \
  83. xt_NOTRACK iptable_raw ipt_MASQUERADE iptable_nat nf_nat ipt_REJECT xt_mac ipt_LOG \
  84. nf_conntrack_ipv4 xt_state nf_conntrack nfnetlink iptable_filter xt_CLASSIFY \
  85. xt_length ipt_owner xt_TCPMSS xt_comment xt_tcpudp iptable_mangle ip_tables x_tables \
  86. ext2 mbcache dm_mirror dm_mod 8139too r8169 mii i2c_i801 iTCO_wdt iTCO_vendor_support \
  87.                 i2c_core sg sr_mod cdrom ata_generic ata_piix libata sd_mod scsi_mod \
  88.                 xfs ehci_hcd
  89. Mar  1 10:32:03 linux kernel: CPU:    0
  90. Mar  1 10:32:03 linux kernel: EIP:    0060:[<f8a96141>]    Not tainted VLI
  91. Mar  1 10:32:03 linux kernel: EFLAGS: 00010292   (2.6.23.15-137.fc8 #1)
  92. Mar  1 10:32:03 linux kernel: EIP is at xfs_attr_shortform_getvalue+0x15/0xdb [xfs]
  93. Mar  1 10:32:03 linux kernel: eax: 00000000   ebx: f268cddc   ecx: f8ae4d9d   edx: \
  94.                 08d26645
  95. Mar  1 10:32:03 linux kernel: esi: f04d1600   edi: 00000004   ebp: f8ae4d91   esp: \
  96.                 f268cdbc
  97. Mar  1 10:32:03 linux kernel: ds: 007b   es: 007b   fs: 00d8  gs: 0033  ss: 0068
  98. Mar  1 10:32:03 linux kernel: Process smbd (pid: 2036, ti=f268c000 task=f7207840 \
  99.                 task.ti=f268c000)
  100. Mar  1 10:32:03 linux kernel: Stack: 00000003 f37888d4 00000003 f04d1600 f04d1600 \
  101.                 f268ce38 f8ae4d91 f8a93a97
  102. Mar  1 10:32:03 linux kernel: f8ae4d91 0000000c c1ba6000 00000130 00000402 275b19c4 \
  103.                 00000000 00000000
  104. Mar  1 10:32:03 linux kernel: f04d1600 00000000 00000000 00000000 00000000 00000001 \
  105.                 00000000 00000000
  106. Mar  1 10:32:03 linux kernel: Call Trace:
  107. Mar  1 10:32:03 linux kernel: [<f8a93a97>] xfs_attr_fetch+0x9e/0xee [xfs]
  108. Mar  1 10:32:03 linux kernel: [<f8a8d843>] xfs_acl_iaccess+0x59/0xc2 [xfs]
  109. Mar  1 10:32:03 linux kernel: [<f8abe3c2>] xfs_iaccess+0x87/0x15c [xfs]
  110. Mar  1 10:32:03 linux kernel: [<f8ad53ec>] xfs_access+0x26/0x3a [xfs]
  111. Mar  1 10:32:03 linux kernel: [<f8ae08ae>] xfs_vn_permission+0x0/0x13 [xfs]
  112. Mar  1 10:32:03 linux kernel: [<f8ae08bd>] xfs_vn_permission+0xf/0x13 [xfs]
  113. Mar  1 10:32:03 linux kernel: [<c0487419>] permission+0x9e/0xdb
  114. Mar  1 10:32:03 linux kernel: [<c04887d0>] may_open+0x5c/0x205
  115. Mar  1 10:32:03 linux kernel: [<c048a8b4>] open_namei+0x27d/0x576
  116. Mar  1 10:32:03 linux kernel: [<c047fdb7>] do_filp_open+0x2a/0x3e
  117. Mar  1 10:32:03 linux kernel: [<c047fafe>] get_unused_fd_flags+0x52/0xc5
  118. Mar  1 10:32:03 linux kernel: [<c047fe13>] do_sys_open+0x48/0xca
  119. Mar  1 10:32:03 linux kernel: [<c047fece>] sys_open+0x1c/0x1e
  120. Mar  1 10:32:03 linux kernel: [<c040518a>] syscall_call+0x7/0xb
  121. Mar  1 10:32:03 linux kernel: =======================
  122. Mar  1 10:32:03 linux kernel: Code: 00 00 c6 40 02 00 66 c7 00 00 04 8b 47 2c 5b 5e \
  123. 5f e9 08 bc 03 00 55 57 56 53 89 c3 83 ec 0c 8b 40 20 8b 40 4c 8b 40 14 8d 78 04 <0f> \
  124.                 b6 40 02 c7 44 24 08 00 00 00 00 89 44 24 04 e9 96 00 00 00
  125. Mar  1 10:32:03 linux kernel: EIP: [<f8a96141>] xfs_attr_shortform_getvalue+0x15/0xdb \
  126. [xfs] SS:ESP 0068:f268cdbc

复制代码

  1. > Hello :)
  2. >
  3. > My system just crashed because of a power fluctuation and the root
  4. > filesystem was damaged.
  5. > The system booted up just fine, but when samba tried to start up
  6. > the kernel oops'd.
  7. >
  8. > xfs_repair was apparently able to repair the damage, though I seem
  9. > to have lost some files.
  10. >
  11. > I do realize that a lot of awful things can happen if you just cut
  12. > the power, but the kernel shouldn't oops on a mounted file
  13. > system, right?

  14. right.

  15. here's the disassembly of that function in your kernrel FWIW:

  16. 0001012c <xfs_attr_shortform_getvalue>:
  17.    1012c:       55                      push   %ebp
  18.    1012d:       57                      push   %edi
  19.    1012e:       56                      push   %esi
  20.    1012f:       53                      push   %ebx
  21.    10130:       89 c3                   mov    %eax,%ebx
  22.    10132:       83 ec 0c                sub    $0xc,%esp
  23.    10135:       8b 40 20                mov    0x20(%eax),%eax
  24.    10138:       8b 40 4c                mov    0x4c(%eax),%eax
  25.    1013b:       8b 40 14                mov    0x14(%eax),%eax
  26.    1013e:       8d 78 04                lea    0x4(%eax),%edi
  27.    10141:       0f b6 40 02             movzbl 0x2(%eax),%eax <--- boom.
  28.    10145:       c7 44 24 08 00 00 00    movl   $0x0,0x8(%esp)
  29.    1014c:       00
  30.    1014d:       89 44 24 04             mov    %eax,0x4(%esp)
  31.    10151:       e9 96 00 00 00          jmp    101ec
  32. <xfs_attr_shortform_getvalue+0xc0>
  33. ...

  34. at this point eax is "sf" (0x0) and edi is "sfe" (0x04)

  35. Mar  1 10:32:03 linux kernel: eax: 00000000   ebx: f268cddc   ecx:
  36. f8ae4d9d   edx: 08d26645
  37. Mar  1 10:32:03 linux kernel: esi: f04d1600   edi: 00000004   ebp:
  38. f8ae4d91   esp: f268cdbc

  39. first part of the function:

  40. int
  41. xfs_attr_shortform_getvalue(xfs_da_args_t *args)
  42. {
  43.         xfs_attr_shortform_t *sf;
  44.         xfs_attr_sf_entry_t *sfe;
  45.         int i;

  46.         ASSERT(args->dp->i_d.di_aformat == XFS_IFINLINE);
  47.         sf = (xfs_attr_shortform_t *)args->dp->i_afp->if_u1.if_data;
  48.         sfe = &sf->list[0];
  49.         for (i = 0;
  50.              i < sf->hdr.count; <--- died here, sf is 0
  51.              sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {

  52. we blew up on sf->hdr.count because sf is NULL (hdr.count is 0x2 into sf)

  53. maybe the sgi guys can take it from there ;)  Did you also happen to
  54. save the xfs_repair output?

  55. -Eric
复制代码

问题的定位,in this case nothing to do with xdb,
简洁明快,且该收手时就收手。

  1. Eric Sandeen wrote:
  2. > Did you also happen to save the xfs_repair output?

  3. No, but I made a complete copy of the file system before
  4. repairing it, so I can easily recreate it... :)

  5. Thomas


  6. ["xfs_repair" (text/plain)]

  7. Phase 1 - find and verify superblock...

  8. Phase 2 - using internal log
  9.         - zero log...
  10.         - scan filesystem freespace and inode maps...
  11.         - found root inode chunk

  12. Phase 3 - for each AG...
  13.         - scan and clear agi unlinked lists...
  14.         - process known inodes and perform inode discovery...
  15.         - agno = 0

  16. data fork in ino 128638 claims free block 19018
  17.         - agno = 1
  18.         - agno = 2
  19. b5ac7b90: Badness in key lookup (length)
  20. bp=(bno 11701280, len 32768 bytes) key=(bno 11701280, len 8192 bytes)
  21. b5ac7b90: Badness in key lookup (length)
  22. bp=(bno 11708896, len 32768 bytes) key=(bno 11708896, len 8192 bytes)
  23. b5ac7b90: Badness in key lookup (length)
  24. bp=(bno 11739296, len 32768 bytes) key=(bno 11739296, len 8192 bytes)
  25. b5ac7b90: Badness in key lookup (length)
  26. bp=(bno 11751440, len 32768 bytes) key=(bno 11751440, len 8192 bytes)
  27. b5ac7b90: Badness in key lookup (length)
  28. bp=(bno 11754176, len 32768 bytes) key=(bno 11754176, len 8192 bytes)
  29. b5ac7b90: Badness in key lookup (length)
  30. bp=(bno 12026592, len 32768 bytes) key=(bno 12026592, len 8192 bytes)
  31.         - agno = 3
  32. b50c6b90: Badness in key lookup (length)
  33. bp=(bno 15569728, len 32768 bytes) key=(bno 15569728, len 8192 bytes)
  34. b50c6b90: Badness in key lookup (length)
  35. bp=(bno 15626080, len 32768 bytes) key=(bno 15626080, len 8192 bytes)
  36.         - agno = 4
  37.         - agno = 5
  38.         - agno = 6
  39.         - agno = 7
  40. b41ffb90: Badness in key lookup (length)
  41. bp=(bno 31116224, len 32768 bytes) key=(bno 31116224, len 8192 bytes)
  42. b41ffb90: Badness in key lookup (length)
  43. bp=(bno 31117856, len 32768 bytes) key=(bno 31117856, len 8192 bytes)
  44. b41ffb90: Badness in key lookup (length)
  45. bp=(bno 31128704, len 32768 bytes) key=(bno 31128704, len 8192 bytes)
  46. b41ffb90: Badness in key lookup (length)
  47. bp=(bno 31239104, len 32768 bytes) key=(bno 31239104, len 8192 bytes)
  48. b41ffb90: Badness in key lookup (length)
  49. bp=(bno 31261408, len 32768 bytes) key=(bno 31261408, len 8192 bytes)
  50.         - agno = 8
  51. local inode 33609156 attr too small (size = 0, min size = 4)
  52. bad attribute fork in inode 33609156, clearing attr fork
  53. clearing inode 33609156 attributes
  54. cleared inode 33609156
  55.         - agno = 9
  56. b50c6b90: Badness in key lookup (length)
  57. bp=(bno 38861808, len 32768 bytes) key=(bno 38861808, len 8192 bytes)
  58.         - agno = 10
  59. b41ffb90: Badness in key lookup (length)
  60. bp=(bno 42752032, len 32768 bytes) key=(bno 42752032, len 8192 bytes)
  61.         - agno = 11
  62.         - agno = 12
  63. b50c6b90: Badness in key lookup (length)
  64. bp=(bno 50475360, len 32768 bytes) key=(bno 50475360, len 8192 bytes)
  65. b50c6b90: Badness in key lookup (length)
  66. bp=(bno 50629312, len 32768 bytes) key=(bno 50629312, len 8192 bytes)
  67.         - agno = 13
  68.         - agno = 14
  69.         - agno = 15
  70.         - process newly discovered inodes...

  71. Phase 4 - check for duplicate blocks...
  72.         - setting up duplicate extent list...
  73.         - check for inodes claiming duplicate blocks...
  74.         - agno = 0
  75.         - agno = 1
  76.         - agno = 2
  77.         - agno = 3
  78.         - agno = 4
  79.         - agno = 5
  80.         - agno = 6
  81.         - agno = 7
  82.         - agno = 8
  83. bad bmap btree ptr 0xc3a0000100000000 in ino 33609156
  84. bad data fork in inode 33609156
  85. cleared inode 33609156
  86.         - agno = 9
  87.         - agno = 10
  88.         - agno = 11
  89.         - agno = 12
  90.         - agno = 13
  91.         - agno = 14
  92.         - agno = 15

  93. Phase 5 - rebuild AG headers and trees...
  94.         - reset superblock...

  95. Phase 6 - check inode connectivity...
  96.         - resetting contents of realtime bitmap and summary inodes
  97.         - traversing filesystem ...
  98. entry "locking.tdb" in directory inode 33585205 points to free inode 33609156
  99. bad hash table for directory inode 33585205 (no data entry): rebuilding
  100. rebuilding directory inode 33585205
  101.         - traversal finished ...
  102.         - moving disconnected inodes to lost+found ...
  103. disconnected inode 12636191, moving to lost+found
  104. disconnected inode 12643748, moving to lost+found
  105. disconnected inode 12643751, moving to lost+found
  106. disconnected inode 12674162, moving to lost+found
  107. disconnected inode 12674190, moving to lost+found
  108. disconnected inode 12686342, moving to lost+found
  109. disconnected inode 12689047, moving to lost+found
  110. disconnected inode 12689059, moving to lost+found
  111. disconnected inode 12961449, moving to lost+found
  112. disconnected inode 16816212, moving to lost+found
  113. disconnected inode 16872569, moving to lost+found
  114. disconnected inode 33609179, moving to lost+found
  115. disconnected inode 33609189, moving to lost+found
  116. disconnected inode 33610799, moving to lost+found
  117. disconnected inode 33610824, moving to lost+found
  118. disconnected inode 33610838, moving to lost+found
  119. disconnected inode 33610839, moving to lost+found
  120. disconnected inode 33621664, moving to lost+found
  121. disconnected inode 33621671, moving to lost+found
  122. disconnected inode 33621672, moving to lost+found
  123. disconnected inode 33732053, moving to lost+found
  124. disconnected inode 33754372, moving to lost+found
  125. disconnected inode 41977984, moving to lost+found
  126. disconnected inode 46179860, moving to lost+found
  127. disconnected inode 54526415, moving to lost+found
  128. disconnected inode 54680382, moving to lost+found

  129. Phase 7 - verify and correct link counts...
  130. done
复制代码

认真劲,令人佩服。

  1. oh, like a dd image?  great.  You can use xfs_metadump to make a more
  2. transportable image... xfs folks might even be able to use that to recreate the oops.

  3. -Eric
复制代码

虽然收了手,留句话,交个朋友。

[ 本帖最后由 sisi8408 于 2008-3-2 13:48 编辑 ]

论坛徽章:
0
109 [报告]
发表于 2008-05-09 23:08 |只看该作者

  1. static int wake_idle (int cpu, struct task_struct *p)
  2. {
  3.         cpumask_t tmp;
  4.         struct sched_domain *sd;
  5.         int i;
  6.         /*
  7.          * If it is idle, then it is the best cpu to run this task.
  8.          *
  9.          * This cpu is also the best, if it has more than one task already.
  10.          *
  11.          * Siblings must be also busy(in most cases) as they didn't already
  12.          * pickup the extra load from this cpu and hence we need not check
  13.          * sibling runqueue info. This will avoid the checks and cache miss
  14.          * penalities associated with that.
  15.          */
  16.         if (idle_cpu(cpu) || cpu_rq(cpu)->nr_running > 1)
  17.                 return cpu;

  18.         for_each_domain(cpu, sd) {
  19.                 if (sd->flags & SD_WAKE_IDLE) {
  20.                         cpus_and(tmp, sd->span, p->cpus_allowed);
  21.                         for_each_cpu_mask(i, tmp) {
  22.                                 if (idle_cpu(i)) {
  23.                                         if (i != task_cpu(p)) {
  24.                                                 schedstat_inc(p, se.nr_wakeups_idle);
  25.                                         }
  26.                                         return i;
  27.                                 }
  28.                         }
  29.                 } else {
  30.                         /* 2.6.24.4
  31.                          * how about the second sd with flag SD_WAKE_IDLE ??
  32.                          */
  33.                         break;
  34.                 }
  35.         }
  36.         return cpu;
  37. }
复制代码

论坛徽章:
0
110 [报告]
发表于 2008-05-10 14:39 |只看该作者

  1. Index: linux-2.6.24.4-rt4/kernel/sched_cpupri.h
  2. ===================================================================
  3. --- /dev/null        1970-01-01 00:00:00.000000000 +0000
  4. +++ linux-2.6.24.4-rt4/kernel/sched_cpupri.h        2008-03-24 19:06:32.000000000 -0400
  5. @@ -0,0 +1,36 @@
  6. +#ifndef _LINUX_CPUPRI_H
  7. +#define _LINUX_CPUPRI_H
  8. +
  9. +#include <linux/sched.h>
  10. +
  11. +#define CPUPRI_NR_PRIORITIES 2+MAX_RT_PRIO /*nicer: (2+MAX_RT_PRIO) */
  12. +#define CPUPRI_NR_PRI_WORDS CPUPRI_NR_PRIORITIES/BITS_PER_LONG
  13. +

复制代码
您需要登录后才可以回帖 登录 | 注册

本版积分规则 发表回复

  

北京盛拓优讯信息技术有限公司. 版权所有 京ICP备16024965号-6 北京市公安局海淀分局网监中心备案编号:11010802020122 niuxiaotong@pcpop.com 17352615567
未成年举报专区
中国互联网协会会员  联系我们:huangweiwei@itpub.net
感谢所有关心和支持过ChinaUnix的朋友们 转载本站内容请注明原作者名及出处

清除 Cookies - ChinaUnix - Archiver - WAP - TOP