Added patch for ext4 which fix data corruption by Tigerite
authorPali Rohár <pali.rohar@gmail.com>
Fri, 20 May 2011 14:08:48 +0000 (16:08 +0200)
committerPali Rohár <pali.rohar@gmail.com>
Thu, 11 Aug 2011 08:25:04 +0000 (10:25 +0200)
kernel-power-2.6.28/debian/patches/ext4-data-corruption.diff [new file with mode: 0644]
kernel-power-2.6.28/debian/patches/series

diff --git a/kernel-power-2.6.28/debian/patches/ext4-data-corruption.diff b/kernel-power-2.6.28/debian/patches/ext4-data-corruption.diff
new file mode 100644 (file)
index 0000000..8cd7c3d
--- /dev/null
@@ -0,0 +1,608 @@
+diff -uprN linux-2.6.28.orig/Documentation/filesystems/ext4.txt linux-2.6.28/Documentation/filesystems/ext4.txt\r
+--- linux-2.6.28.orig/Documentation/filesystems/ext4.txt       2009-05-02 20:54:43.000000000 +0200\r
++++ linux-2.6.28/Documentation/filesystems/ext4.txt    2009-05-23 16:05:41.000000000 +0200\r
+@@ -76,7 +76,7 @@ Note: More extensive information for get\r
+ * extent format more robust in face of on-disk corruption due to magics,\r
+ * internal redunancy in tree\r
+ * improved file allocation (multi-block alloc)\r
+-* fix 32000 subdirectory limit\r
++* lift 32000 subdirectory limit imposed by i_links_count[1]\r
+ * nsec timestamps for mtime, atime, ctime, create time\r
+ * inode version field on disk (NFSv4, Lustre)\r
+ * reduced e2fsck time via uninit_bg feature\r
+@@ -91,6 +91,9 @@ Note: More extensive information for get\r
+ * efficent new ordered mode in JBD2 and ext4(avoid using buffer head to force\r
+   the ordering)\r
\r
++[1] Filesystems with a block size of 1k may see a limit imposed by the\r
++directory hash tree having a maximum depth of two.\r
++\r
+ 2.2 Candidate features for future inclusion\r
\r
+ * Online defrag (patches available but not well tested)\r
+diff -uprN linux-2.6.28.orig/fs/ext4/balloc.c linux-2.6.28/fs/ext4/balloc.c\r
+--- linux-2.6.28.orig/fs/ext4/balloc.c 2009-05-02 20:54:43.000000000 +0200\r
++++ linux-2.6.28/fs/ext4/balloc.c      2009-05-23 16:05:41.000000000 +0200\r
+@@ -608,7 +608,9 @@ int ext4_claim_free_blocks(struct ext4_s\r
+  */\r
+ int ext4_should_retry_alloc(struct super_block *sb, int *retries)\r
+ {\r
+-      if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || (*retries)++ > 3)\r
++      if (!ext4_has_free_blocks(EXT4_SB(sb), 1) ||\r
++          (*retries)++ > 3 ||\r
++          !EXT4_SB(sb)->s_journal)\r
+               return 0;\r
\r
+       jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);\r
+diff -uprN linux-2.6.28.orig/fs/ext4/ext4.h linux-2.6.28/fs/ext4/ext4.h\r
+--- linux-2.6.28.orig/fs/ext4/ext4.h   2009-05-02 20:54:43.000000000 +0200\r
++++ linux-2.6.28/fs/ext4/ext4.h        2009-05-23 16:05:41.000000000 +0200\r
+@@ -248,6 +248,30 @@ struct flex_groups {\r
+ #define EXT4_FL_USER_VISIBLE          0x000BDFFF /* User visible flags */\r
+ #define EXT4_FL_USER_MODIFIABLE               0x000B80FF /* User modifiable flags */\r
\r
++/* Flags that should be inherited by new inodes from their parent. */\r
++#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\\r
++                         EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\\r
++                         EXT4_NODUMP_FL | EXT4_NOATIME_FL |\\r
++                         EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\\r
++                         EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)\r
++\r
++/* Flags that are appropriate for regular files (all but dir-specific ones). */\r
++#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))\r
++\r
++/* Flags that are appropriate for non-directories/regular files. */\r
++#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)\r
++\r
++/* Mask out flags that are inappropriate for the given type of inode. */\r
++static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)\r
++{\r
++      if (S_ISDIR(mode))\r
++              return flags;\r
++      else if (S_ISREG(mode))\r
++              return flags & EXT4_REG_FLMASK;\r
++      else\r
++              return flags & EXT4_OTHER_FLMASK;\r
++}\r
++\r
+ /*\r
+  * Inode dynamic state flags\r
+  */\r
+@@ -529,7 +556,7 @@ do {                                                                              \\r
+ #define EXT4_MOUNT_NO_UID32           0x02000  /* Disable 32-bit UIDs */\r
+ #define EXT4_MOUNT_XATTR_USER         0x04000 /* Extended user attributes */\r
+ #define EXT4_MOUNT_POSIX_ACL          0x08000 /* POSIX Access Control Lists */\r
+-#define EXT4_MOUNT_RESERVATION                0x10000 /* Preallocation */\r
++#define EXT4_MOUNT_NO_AUTO_DA_ALLOC   0x10000 /* No auto delalloc mapping */\r
+ #define EXT4_MOUNT_BARRIER            0x20000 /* Use block barriers */\r
+ #define EXT4_MOUNT_NOBH                       0x40000 /* No bufferheads */\r
+ #define EXT4_MOUNT_QUOTA              0x80000 /* Some quota option set */\r
+diff -uprN linux-2.6.28.orig/fs/ext4/extents.c linux-2.6.28/fs/ext4/extents.c\r
+--- linux-2.6.28.orig/fs/ext4/extents.c        2009-05-02 20:54:43.000000000 +0200\r
++++ linux-2.6.28/fs/ext4/extents.c     2009-05-23 16:05:41.000000000 +0200\r
+@@ -1120,7 +1120,8 @@ ext4_ext_search_right(struct inode *inod\r
+       struct ext4_extent_idx *ix;\r
+       struct ext4_extent *ex;\r
+       ext4_fsblk_t block;\r
+-      int depth, ee_len;\r
++      int depth;      /* Note, NOT eh_depth; depth from top of tree */\r
++      int ee_len;\r
\r
+       BUG_ON(path == NULL);\r
+       depth = path->p_depth;\r
+@@ -1179,7 +1180,8 @@ ext4_ext_search_right(struct inode *inod\r
+               if (bh == NULL)\r
+                       return -EIO;\r
+               eh = ext_block_hdr(bh);\r
+-              if (ext4_ext_check_header(inode, eh, depth)) {\r
++              /* subtract from p_depth to get proper eh_depth */\r
++              if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) {\r
+                       put_bh(bh);\r
+                       return -EIO;\r
+               }\r
+@@ -1740,11 +1742,13 @@ ext4_ext_put_in_cache(struct inode *inod\r
+ {\r
+       struct ext4_ext_cache *cex;\r
+       BUG_ON(len == 0);\r
++      spin_lock(&EXT4_I(inode)->i_block_reservation_lock);\r
+       cex = &EXT4_I(inode)->i_cached_extent;\r
+       cex->ec_type = type;\r
+       cex->ec_block = block;\r
+       cex->ec_len = len;\r
+       cex->ec_start = start;\r
++      spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);\r
+ }\r
\r
+ /*\r
+@@ -1801,12 +1805,17 @@ ext4_ext_in_cache(struct inode *inode, e\r
+                       struct ext4_extent *ex)\r
+ {\r
+       struct ext4_ext_cache *cex;\r
++      int ret = EXT4_EXT_CACHE_NO;\r
\r
++      /* \r
++       * We borrow i_block_reservation_lock to protect i_cached_extent\r
++       */\r
++      spin_lock(&EXT4_I(inode)->i_block_reservation_lock);\r
+       cex = &EXT4_I(inode)->i_cached_extent;\r
\r
+       /* has cache valid data? */\r
+       if (cex->ec_type == EXT4_EXT_CACHE_NO)\r
+-              return EXT4_EXT_CACHE_NO;\r
++              goto errout;\r
\r
+       BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&\r
+                       cex->ec_type != EXT4_EXT_CACHE_EXTENT);\r
+@@ -1817,11 +1826,11 @@ ext4_ext_in_cache(struct inode *inode, e\r
+               ext_debug("%u cached by %u:%u:%llu\n",\r
+                               block,\r
+                               cex->ec_block, cex->ec_len, cex->ec_start);\r
+-              return cex->ec_type;\r
++              ret = cex->ec_type;\r
+       }\r
+-\r
+-      /* not in cache */\r
+-      return EXT4_EXT_CACHE_NO;\r
++errout:\r
++      spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);\r
++      return ret;\r
+ }\r
\r
+ /*\r
+@@ -2777,6 +2786,8 @@ int ext4_ext_get_blocks(handle_t *handle\r
+                               if (allocated > max_blocks)\r
+                                       allocated = max_blocks;\r
+                               set_buffer_unwritten(bh_result);\r
++                              bh_result->b_bdev = inode->i_sb->s_bdev;\r
++                              bh_result->b_blocknr = newblock;\r
+                               goto out2;\r
+                       }\r
\r
+diff -uprN linux-2.6.28.orig/fs/ext4/ialloc.c linux-2.6.28/fs/ext4/ialloc.c\r
+--- linux-2.6.28.orig/fs/ext4/ialloc.c 2009-05-02 20:54:43.000000000 +0200\r
++++ linux-2.6.28/fs/ext4/ialloc.c      2009-05-23 16:05:41.000000000 +0200\r
+@@ -188,7 +188,7 @@ void ext4_free_inode(handle_t *handle, s\r
+       struct ext4_group_desc *gdp;\r
+       struct ext4_super_block *es;\r
+       struct ext4_sb_info *sbi;\r
+-      int fatal = 0, err;\r
++      int fatal = 0, err, cleared;\r
+       ext4_group_t flex_group;\r
\r
+       if (atomic_read(&inode->i_count) > 1) {\r
+@@ -243,8 +243,10 @@ void ext4_free_inode(handle_t *handle, s\r
+               goto error_return;\r
\r
+       /* Ok, now we can actually update the inode bitmaps.. */\r
+-      if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),\r
+-                                      bit, bitmap_bh->b_data))\r
++      spin_lock(sb_bgl_lock(sbi, block_group));\r
++      cleared = ext4_clear_bit(bit, bitmap_bh->b_data);\r
++      spin_unlock(sb_bgl_lock(sbi, block_group));\r
++      if (!cleared)\r
+               ext4_error(sb, "ext4_free_inode",\r
+                          "bit already cleared for inode %lu", ino);\r
+       else {\r
+@@ -686,6 +688,7 @@ struct inode *ext4_new_inode(handle_t *h\r
+       struct inode *ret;\r
+       ext4_group_t i;\r
+       int free = 0;\r
++      static int once = 1;\r
+       ext4_group_t flex_group;\r
\r
+       /* Cannot create files in a deleted directory */\r
+@@ -705,10 +708,12 @@ struct inode *ext4_new_inode(handle_t *h\r
+               ret2 = find_group_flex(sb, dir, &group);\r
+               if (ret2 == -1) {\r
+                       ret2 = find_group_other(sb, dir, &group);\r
+-                      if (ret2 == 0 && printk_ratelimit())\r
++                      if (ret2 == 0 && once) {\r
++                              once = 0;\r
+                               printk(KERN_NOTICE "ext4: find_group_flex "\r
+                                      "failed, fallback succeeded dir %lu\n",\r
+                                      dir->i_ino);\r
++                      }\r
+               }\r
+               goto got_group;\r
+       }\r
+@@ -862,16 +867,12 @@ got:\r
+       ei->i_disksize = 0;\r
\r
+       /*\r
+-       * Don't inherit extent flag from directory. We set extent flag on\r
+-       * newly created directory and file only if -o extent mount option is\r
+-       * specified\r
++       * Don't inherit extent flag from directory, amongst others. We set\r
++       * extent flag on newly created directory and file only if -o extent\r
++       * mount option is specified\r
+        */\r
+-      ei->i_flags = EXT4_I(dir)->i_flags & ~(EXT4_INDEX_FL|EXT4_EXTENTS_FL);\r
+-      if (S_ISLNK(mode))\r
+-              ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL);\r
+-      /* dirsync only applies to directories */\r
+-      if (!S_ISDIR(mode))\r
+-              ei->i_flags &= ~EXT4_DIRSYNC_FL;\r
++      ei->i_flags =\r
++              ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);\r
+       ei->i_file_acl = 0;\r
+       ei->i_dtime = 0;\r
+       ei->i_block_group = group;\r
+diff -uprN linux-2.6.28.orig/fs/ext4/inode.c linux-2.6.28/fs/ext4/inode.c\r
+--- linux-2.6.28.orig/fs/ext4/inode.c  2009-05-02 20:54:43.000000000 +0200\r
++++ linux-2.6.28/fs/ext4/inode.c       2009-05-23 16:05:41.000000000 +0200\r
+@@ -1052,6 +1059,7 @@ int ext4_get_blocks_wrap(handle_t *handl\r
+       int retval;\r
\r
+       clear_buffer_mapped(bh);\r
++      clear_buffer_unwritten(bh);\r
\r
+       /*\r
+        * Try to see if we can get  the block without requesting\r
+@@ -1082,6 +1090,18 @@ int ext4_get_blocks_wrap(handle_t *handl\r
+               return retval;\r
\r
+       /*\r
++       * When we call get_blocks without the create flag, the\r
++       * BH_Unwritten flag could have gotten set if the blocks\r
++       * requested were part of a uninitialized extent.  We need to\r
++       * clear this flag now that we are committed to convert all or\r
++       * part of the uninitialized extent to be an initialized\r
++       * extent.  This is because we need to avoid the combination\r
++       * of BH_Unwritten and BH_Mapped flags being simultaneously\r
++       * set on the buffer_head.\r
++       */\r
++      clear_buffer_unwritten(bh);\r
++\r
++      /*\r
+        * New blocks allocate and/or writing to uninitialized extent\r
+        * will possibly result in updating i_data, so we take\r
+        * the write lock of i_data_sem, and call get_blocks()\r
+@@ -2180,6 +2200,10 @@ static int ext4_da_get_block_prep(struct\r
+                                 struct buffer_head *bh_result, int create)\r
+ {\r
+       int ret = 0;\r
++      sector_t invalid_block = ~((sector_t) 0xffff);\r
++\r
++      if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))\r
++              invalid_block = ~0;\r
\r
+       BUG_ON(create == 0);\r
+       BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);\r
+@@ -2201,11 +2225,18 @@ static int ext4_da_get_block_prep(struct\r
+                       /* not enough space to reserve */\r
+                       return ret;\r
\r
+-              map_bh(bh_result, inode->i_sb, 0);\r
++              map_bh(bh_result, inode->i_sb, invalid_block);\r
+               set_buffer_new(bh_result);\r
+               set_buffer_delay(bh_result);\r
+       } else if (ret > 0) {\r
+               bh_result->b_size = (ret << inode->i_blkbits);\r
++              /*\r
++               * With sub-block writes into unwritten extents\r
++               * we also need to mark the buffer as new so that\r
++               * the unwritten parts of the buffer gets correctly zeroed.\r
++               */\r
++              if (buffer_unwritten(bh_result))\r
++                      set_buffer_new(bh_result);\r
+               ret = 0;\r
+       }\r
\r
+@@ -2493,7 +2524,7 @@ retry:\r
\r
+               ext4_journal_stop(handle);\r
\r
+-              if (mpd.retval == -ENOSPC) {\r
++              if ((mpd.retval == -ENOSPC) && sbi->s_journal) {\r
+                       /* commit the transaction which would\r
+                        * free blocks released in the transaction\r
+                        * and try again\r
+@@ -4167,11 +4243,9 @@ struct inode *ext4_iget(struct super_blo\r
+       ei->i_flags = le32_to_cpu(raw_inode->i_flags);\r
+       inode->i_blocks = ext4_inode_blocks(raw_inode, ei);\r
+       ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);\r
+-      if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=\r
+-          cpu_to_le32(EXT4_OS_HURD)) {\r
++      if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT))\r
+               ei->i_file_acl |=\r
+                       ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;\r
+-      }\r
+       inode->i_size = ext4_isize(raw_inode);\r
+       ei->i_disksize = inode->i_size;\r
+       inode->i_generation = le32_to_cpu(raw_inode->i_generation);\r
+@@ -4218,6 +4292,18 @@ struct inode *ext4_iget(struct super_blo\r
+                       (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;\r
+       }\r
\r
++      if (ei->i_file_acl &&\r
++          ((ei->i_file_acl < \r
++            (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +\r
++             EXT4_SB(sb)->s_gdb_count)) ||\r
++           (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {\r
++              ext4_error(sb, __func__,\r
++                         "bad extended attribute block %llu in inode #%lu",\r
++                         ei->i_file_acl, inode->i_ino);\r
++              ret = -EIO;\r
++              goto bad_inode;\r
++      }\r
++\r
+       if (S_ISREG(inode->i_mode)) {\r
+               inode->i_op = &ext4_file_inode_operations;\r
+               inode->i_fop = &ext4_file_operations;\r
+@@ -4232,7 +4318,8 @@ struct inode *ext4_iget(struct super_blo\r
+                       inode->i_op = &ext4_symlink_inode_operations;\r
+                       ext4_set_aops(inode);\r
+               }\r
+-      } else {\r
++      } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||\r
++            S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {\r
+               inode->i_op = &ext4_special_inode_operations;\r
+               if (raw_inode->i_block[0])\r
+                       init_special_inode(inode, inode->i_mode,\r
+@@ -4240,6 +4327,13 @@ struct inode *ext4_iget(struct super_blo\r
+               else\r
+                       init_special_inode(inode, inode->i_mode,\r
+                          new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));\r
++      } else {\r
++              brelse(bh);\r
++              ret = -EIO;\r
++              ext4_error(inode->i_sb, __func__, \r
++                         "bogus i_mode (%o) for inode=%lu",\r
++                         inode->i_mode, inode->i_ino);\r
++              goto bad_inode;\r
+       }\r
+       brelse(iloc.bh);\r
+       ext4_set_inode_flags(inode);\r
+diff -uprN linux-2.6.28.orig/fs/ext4/ioctl.c linux-2.6.28/fs/ext4/ioctl.c\r
+--- linux-2.6.28.orig/fs/ext4/ioctl.c  2009-05-02 20:54:43.000000000 +0200\r
++++ linux-2.6.28/fs/ext4/ioctl.c       2009-05-23 16:05:41.000000000 +0200\r
+@@ -48,8 +48,7 @@ long ext4_ioctl(struct file *filp, unsig\r
+               if (err)\r
+                       return err;\r
\r
+-              if (!S_ISDIR(inode->i_mode))\r
+-                      flags &= ~EXT4_DIRSYNC_FL;\r
++              flags = ext4_mask_flags(inode->i_mode, flags);\r
\r
+               err = -EPERM;\r
+               mutex_lock(&inode->i_mutex);\r
+diff -uprN linux-2.6.28.orig/fs/ext4/mballoc.c linux-2.6.28/fs/ext4/mballoc.c\r
+--- linux-2.6.28.orig/fs/ext4/mballoc.c        2009-05-02 20:54:43.000000000 +0200\r
++++ linux-2.6.28/fs/ext4/mballoc.c     2009-05-23 16:05:41.000000000 +0200\r
+@@ -1448,7 +1448,7 @@ static void ext4_mb_measure_extent(struc\r
+       struct ext4_free_extent *gex = &ac->ac_g_ex;\r
\r
+       BUG_ON(ex->fe_len <= 0);\r
+-      BUG_ON(ex->fe_len >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));\r
++      BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));\r
+       BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));\r
+       BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);\r
\r
+@@ -2692,7 +2692,7 @@ int ext4_mb_init(struct super_block *sb,\r
+       i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int);\r
+       sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);\r
+       if (sbi->s_mb_maxs == NULL) {\r
+-              kfree(sbi->s_mb_maxs);\r
++              kfree(sbi->s_mb_offsets);\r
+               return -ENOMEM;\r
+       }\r
\r
+@@ -3289,7 +3289,7 @@ ext4_mb_normalize_request(struct ext4_al\r
+       }\r
+       BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&\r
+                       start > ac->ac_o_ex.fe_logical);\r
+-      BUG_ON(size <= 0 || size >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));\r
++      BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));\r
\r
+       /* now prepare goal request */\r
\r
+@@ -3586,6 +3586,7 @@ static void ext4_mb_put_pa(struct ext4_a\r
+                       struct super_block *sb, struct ext4_prealloc_space *pa)\r
+ {\r
+       unsigned long grp;\r
++      ext4_fsblk_t grp_blk;\r
\r
+       if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)\r
+               return;\r
+@@ -3600,8 +3601,12 @@ static void ext4_mb_put_pa(struct ext4_a\r
+       pa->pa_deleted = 1;\r
+       spin_unlock(&pa->pa_lock);\r
\r
+-      /* -1 is to protect from crossing allocation group */\r
+-      ext4_get_group_no_and_offset(sb, pa->pa_pstart - 1, &grp, NULL);\r
++      grp_blk = pa->pa_pstart;\r
++      /* If linear, pa_pstart may be in the next group when pa is used up */\r
++      if (pa->pa_linear)\r
++              grp_blk--;\r
++\r
++      ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);\r
\r
+       /*\r
+        * possible race:\r
+@@ -4414,7 +4419,7 @@ static void ext4_mb_add_n_trim(struct ex\r
+                                               pa_inode_list) {\r
+               spin_lock(&tmp_pa->pa_lock);\r
+               if (tmp_pa->pa_deleted) {\r
+-                      spin_unlock(&pa->pa_lock);\r
++                      spin_unlock(&tmp_pa->pa_lock);\r
+                       continue;\r
+               }\r
+               if (!added && pa->pa_free < tmp_pa->pa_free) {\r
+diff -uprN linux-2.6.28.orig/fs/ext4/namei.c linux-2.6.28/fs/ext4/namei.c\r
+--- linux-2.6.28.orig/fs/ext4/namei.c  2009-05-02 20:54:43.000000000 +0200\r
++++ linux-2.6.28/fs/ext4/namei.c       2009-05-23 16:05:41.000000000 +0200\r
+@@ -1056,8 +1056,16 @@ static struct dentry *ext4_lookup(struct\r
+                       return ERR_PTR(-EIO);\r
+               }\r
+               inode = ext4_iget(dir->i_sb, ino);\r
+-              if (IS_ERR(inode))\r
+-                      return ERR_CAST(inode);\r
++              if (unlikely(IS_ERR(inode))) {\r
++                      if (PTR_ERR(inode) == -ESTALE) {\r
++                              ext4_error(dir->i_sb, __func__,\r
++                                              "deleted inode referenced: %u",\r
++                                              ino);\r
++                              return ERR_PTR(-EIO);\r
++                      } else {\r
++                              return ERR_CAST(inode);\r
++                      }\r
++              }\r
+       }\r
+       return d_splice_alias(inode, dentry);\r
+ }\r
+@@ -2436,7 +2444,8 @@ static int ext4_rename(struct inode *old\r
+               ext4_mark_inode_dirty(handle, new_inode);\r
+               if (!new_inode->i_nlink)\r
+                       ext4_orphan_add(handle, new_inode);\r
+-              force_da_alloc = 1;\r
++              if (!test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC))\r
++                      force_da_alloc = 1;\r
+       }\r
+       retval = 0;\r
\r
+diff -uprN linux-2.6.28.orig/fs/ext4/super.c linux-2.6.28/fs/ext4/super.c\r
+--- linux-2.6.28.orig/fs/ext4/super.c  2009-05-02 20:54:43.000000000 +0200\r
++++ linux-2.6.28/fs/ext4/super.c       2009-05-23 16:05:41.000000000 +0200\r
+@@ -679,8 +679,6 @@ static int ext4_show_options(struct seq_\r
+       if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))\r
+               seq_puts(seq, ",noacl");\r
+ #endif\r
+-      if (!test_opt(sb, RESERVATION))\r
+-              seq_puts(seq, ",noreservation");\r
+       if (sbi->s_commit_interval) {\r
+               seq_printf(seq, ",commit=%u",\r
+                          (unsigned) (sbi->s_commit_interval / HZ));\r
+@@ -724,6 +722,9 @@ static int ext4_show_options(struct seq_\r
+       if (test_opt(sb, DATA_ERR_ABORT))\r
+               seq_puts(seq, ",data_err=abort");\r
\r
++      if (test_opt(sb, NO_AUTO_DA_ALLOC))\r
++              seq_puts(seq, ",auto_da_alloc=0");\r
++\r
+       ext4_show_quota_options(seq, sb);\r
+       return 0;\r
+ }\r
+@@ -849,7 +850,7 @@ enum {\r
+       Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,\r
+       Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,\r
+       Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,\r
+-      Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,\r
++      Opt_auto_da_alloc, Opt_noload, Opt_nobh, Opt_bh,\r
+       Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,\r
+       Opt_journal_checksum, Opt_journal_async_commit,\r
+       Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,\r
+@@ -883,8 +884,6 @@ static const match_table_t tokens = {\r
+       {Opt_nouser_xattr, "nouser_xattr"},\r
+       {Opt_acl, "acl"},\r
+       {Opt_noacl, "noacl"},\r
+-      {Opt_reservation, "reservation"},\r
+-      {Opt_noreservation, "noreservation"},\r
+       {Opt_noload, "noload"},\r
+       {Opt_nobh, "nobh"},\r
+       {Opt_bh, "bh"},\r
+@@ -919,6 +918,7 @@ static const match_table_t tokens = {\r
+       {Opt_delalloc, "delalloc"},\r
+       {Opt_nodelalloc, "nodelalloc"},\r
+       {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},\r
++      {Opt_auto_da_alloc, "auto_da_alloc=%u"},\r
+       {Opt_err, NULL},\r
+ };\r
\r
+@@ -1049,12 +1049,6 @@ static int parse_options(char *options, \r
+                              "not supported\n");\r
+                       break;\r
+ #endif\r
+-              case Opt_reservation:\r
+-                      set_opt(sbi->s_mount_opt, RESERVATION);\r
+-                      break;\r
+-              case Opt_noreservation:\r
+-                      clear_opt(sbi->s_mount_opt, RESERVATION);\r
+-                      break;\r
+               case Opt_journal_update:\r
+                       /* @@@ FIXME */\r
+                       /* Eventually we will want to be able to create\r
+@@ -1331,6 +1325,14 @@ set_qf_format:\r
+                               return 0;\r
+                       sbi->s_inode_readahead_blks = option;\r
+                       break;\r
++              case Opt_auto_da_alloc:\r
++                      if (match_int(&args[0], &option))\r
++                              return 0;\r
++                      if (option)\r
++                              clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);\r
++                      else\r
++                              set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);\r
++                      break;\r
+               default:\r
+                       printk(KERN_ERR\r
+                              "EXT4-fs: Unrecognized mount option \"%s\" "\r
+@@ -1956,7 +1958,6 @@ static int ext4_fill_super(struct super_\r
+       sbi->s_resuid = le16_to_cpu(es->s_def_resuid);\r
+       sbi->s_resgid = le16_to_cpu(es->s_def_resgid);\r
\r
+-      set_opt(sbi->s_mount_opt, RESERVATION);\r
+       set_opt(sbi->s_mount_opt, BARRIER);\r
\r
+       /*\r
+diff -uprN linux-2.6.28.orig/fs/jbd2/revoke.c linux-2.6.28/fs/jbd2/revoke.c\r
+--- linux-2.6.28.orig/fs/jbd2/revoke.c 2009-05-02 20:54:43.000000000 +0200\r
++++ linux-2.6.28/fs/jbd2/revoke.c      2009-05-23 16:05:41.000000000 +0200\r
+@@ -55,6 +55,25 @@\r
+  *                    need do nothing.\r
+  * RevokeValid set, Revoked set:\r
+  *                    buffer has been revoked.\r
++ *\r
++ * Locking rules:\r
++ * We keep two hash tables of revoke records. One hashtable belongs to the\r
++ * running transaction (is pointed to by journal->j_revoke), the other one\r
++ * belongs to the committing transaction. Accesses to the second hash table\r
++ * happen only from the kjournald and no other thread touches this table.  Also\r
++ * journal_switch_revoke_table() which switches which hashtable belongs to the\r
++ * running and which to the committing transaction is called only from\r
++ * kjournald. Therefore we need no locks when accessing the hashtable belonging\r
++ * to the committing transaction.\r
++ *\r
++ * All users operating on the hash table belonging to the running transaction\r
++ * have a handle to the transaction. Therefore they are safe from kjournald\r
++ * switching hash tables under them. For operations on the lists of entries in\r
++ * the hash table j_revoke_lock is used.\r
++ *\r
++ * Finally, also replay code uses the hash tables but at this moment noone else\r
++ * can touch them (filesystem isn't mounted yet) and hence no locking is\r
++ * needed.\r
+  */\r
\r
+ #ifndef __KERNEL__\r
+@@ -401,8 +420,6 @@ int jbd2_journal_revoke(handle_t *handle\r
+  * the second time we would still have a pending revoke to cancel.  So,\r
+  * do not trust the Revoked bit on buffers unless RevokeValid is also\r
+  * set.\r
+- *\r
+- * The caller must have the journal locked.\r
+  */\r
+ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)\r
+ {\r
+@@ -480,10 +497,7 @@ void jbd2_journal_switch_revoke_table(jo\r
+ /*\r
+  * Write revoke records to the journal for all entries in the current\r
+  * revoke hash, deleting the entries as we go.\r
+- *\r
+- * Called with the journal lock held.\r
+  */\r
+-\r
+ void jbd2_journal_write_revoke_records(journal_t *journal,\r
+                                 transaction_t *transaction)\r
+ {\r
+diff -uprN linux-2.6.28.orig/fs/ocfs2/ocfs2_jbd_compat.h linux-2.6.28/fs/ocfs2/ocfs2_jbd_compat.h\r
+--- linux-2.6.28.orig/fs/ocfs2/ocfs2_jbd_compat.h      2009-05-02 20:54:43.000000000 +0200\r
++++ linux-2.6.28/fs/ocfs2/ocfs2_jbd_compat.h   2009-05-23 16:05:41.000000000 +0200\r
+@@ -60,7 +60,8 @@ static inline int jbd2_journal_file_inod\r
+       return 0;\r
+ }\r
\r
+-static inline int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,\r
++static inline int jbd2_journal_begin_ordered_truncate(journal_t *journal,\r
++                                                    struct jbd2_inode *inode,\r
+                                                     loff_t new_size)\r
+ {\r
+       return 0;
\ No newline at end of file
index 7c895e0..764cc59 100644 (file)
@@ -38,3 +38,4 @@ vanilla-2.6.28-anti-io-stalling.diff
 joikuspot.diff
 dspbridge.diff
 phys_to_page.diff
+ext4-data-corruption.diff