-diff -uprN linux-2.6.28.orig/Documentation/filesystems/ext4.txt linux-2.6.28/Documentation/filesystems/ext4.txt\r
---- linux-2.6.28.orig/Documentation/filesystems/ext4.txt 2009-05-02 20:54:43.000000000 +0200\r
-+++ linux-2.6.28/Documentation/filesystems/ext4.txt 2009-05-23 16:05:41.000000000 +0200\r
-@@ -76,7 +76,7 @@ Note: More extensive information for get\r
- * extent format more robust in face of on-disk corruption due to magics,\r
- * internal redunancy in tree\r
- * improved file allocation (multi-block alloc)\r
--* fix 32000 subdirectory limit\r
-+* lift 32000 subdirectory limit imposed by i_links_count[1]\r
- * nsec timestamps for mtime, atime, ctime, create time\r
- * inode version field on disk (NFSv4, Lustre)\r
- * reduced e2fsck time via uninit_bg feature\r
-@@ -91,6 +91,9 @@ Note: More extensive information for get\r
- * efficent new ordered mode in JBD2 and ext4(avoid using buffer head to force\r
- the ordering)\r
- \r
-+[1] Filesystems with a block size of 1k may see a limit imposed by the\r
-+directory hash tree having a maximum depth of two.\r
-+\r
- 2.2 Candidate features for future inclusion\r
- \r
- * Online defrag (patches available but not well tested)\r
-diff -uprN linux-2.6.28.orig/fs/ext4/balloc.c linux-2.6.28/fs/ext4/balloc.c\r
---- linux-2.6.28.orig/fs/ext4/balloc.c 2009-05-02 20:54:43.000000000 +0200\r
-+++ linux-2.6.28/fs/ext4/balloc.c 2009-05-23 16:05:41.000000000 +0200\r
-@@ -608,7 +608,9 @@ int ext4_claim_free_blocks(struct ext4_s\r
- */\r
- int ext4_should_retry_alloc(struct super_block *sb, int *retries)\r
- {\r
-- if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || (*retries)++ > 3)\r
-+ if (!ext4_has_free_blocks(EXT4_SB(sb), 1) ||\r
-+ (*retries)++ > 3 ||\r
-+ !EXT4_SB(sb)->s_journal)\r
- return 0;\r
- \r
- jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);\r
-diff -uprN linux-2.6.28.orig/fs/ext4/ext4.h linux-2.6.28/fs/ext4/ext4.h\r
---- linux-2.6.28.orig/fs/ext4/ext4.h 2009-05-02 20:54:43.000000000 +0200\r
-+++ linux-2.6.28/fs/ext4/ext4.h 2009-05-23 16:05:41.000000000 +0200\r
-@@ -248,6 +248,30 @@ struct flex_groups {\r
- #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */\r
- #define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */\r
- \r
-+/* Flags that should be inherited by new inodes from their parent. */\r
-+#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\\r
-+ EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\\r
-+ EXT4_NODUMP_FL | EXT4_NOATIME_FL |\\r
-+ EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\\r
-+ EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)\r
-+\r
-+/* Flags that are appropriate for regular files (all but dir-specific ones). */\r
-+#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))\r
-+\r
-+/* Flags that are appropriate for non-directories/regular files. */\r
-+#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)\r
-+\r
-+/* Mask out flags that are inappropriate for the given type of inode. */\r
-+static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)\r
-+{\r
-+ if (S_ISDIR(mode))\r
-+ return flags;\r
-+ else if (S_ISREG(mode))\r
-+ return flags & EXT4_REG_FLMASK;\r
-+ else\r
-+ return flags & EXT4_OTHER_FLMASK;\r
-+}\r
-+\r
- /*\r
- * Inode dynamic state flags\r
- */\r
-@@ -529,7 +556,7 @@ do { \\r
- #define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */\r
- #define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */\r
- #define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */\r
--#define EXT4_MOUNT_RESERVATION 0x10000 /* Preallocation */\r
-+#define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */\r
- #define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */\r
- #define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */\r
- #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */\r
-diff -uprN linux-2.6.28.orig/fs/ext4/extents.c linux-2.6.28/fs/ext4/extents.c\r
---- linux-2.6.28.orig/fs/ext4/extents.c 2009-05-02 20:54:43.000000000 +0200\r
-+++ linux-2.6.28/fs/ext4/extents.c 2009-05-23 16:05:41.000000000 +0200\r
-@@ -1120,7 +1120,8 @@ ext4_ext_search_right(struct inode *inod\r
- struct ext4_extent_idx *ix;\r
- struct ext4_extent *ex;\r
- ext4_fsblk_t block;\r
-- int depth, ee_len;\r
-+ int depth; /* Note, NOT eh_depth; depth from top of tree */\r
-+ int ee_len;\r
- \r
- BUG_ON(path == NULL);\r
- depth = path->p_depth;\r
-@@ -1179,7 +1180,8 @@ ext4_ext_search_right(struct inode *inod\r
- if (bh == NULL)\r
- return -EIO;\r
- eh = ext_block_hdr(bh);\r
-- if (ext4_ext_check_header(inode, eh, depth)) {\r
-+ /* subtract from p_depth to get proper eh_depth */\r
-+ if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) {\r
- put_bh(bh);\r
- return -EIO;\r
- }\r
-@@ -1740,11 +1742,13 @@ ext4_ext_put_in_cache(struct inode *inod\r
- {\r
- struct ext4_ext_cache *cex;\r
- BUG_ON(len == 0);\r
-+ spin_lock(&EXT4_I(inode)->i_block_reservation_lock);\r
- cex = &EXT4_I(inode)->i_cached_extent;\r
- cex->ec_type = type;\r
- cex->ec_block = block;\r
- cex->ec_len = len;\r
- cex->ec_start = start;\r
-+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);\r
- }\r
- \r
- /*\r
-@@ -1801,12 +1805,17 @@ ext4_ext_in_cache(struct inode *inode, e\r
- struct ext4_extent *ex)\r
- {\r
- struct ext4_ext_cache *cex;\r
-+ int ret = EXT4_EXT_CACHE_NO;\r
- \r
-+ /* \r
-+ * We borrow i_block_reservation_lock to protect i_cached_extent\r
-+ */\r
-+ spin_lock(&EXT4_I(inode)->i_block_reservation_lock);\r
- cex = &EXT4_I(inode)->i_cached_extent;\r
- \r
- /* has cache valid data? */\r
- if (cex->ec_type == EXT4_EXT_CACHE_NO)\r
-- return EXT4_EXT_CACHE_NO;\r
-+ goto errout;\r
- \r
- BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&\r
- cex->ec_type != EXT4_EXT_CACHE_EXTENT);\r
-@@ -1817,11 +1826,11 @@ ext4_ext_in_cache(struct inode *inode, e\r
- ext_debug("%u cached by %u:%u:%llu\n",\r
- block,\r
- cex->ec_block, cex->ec_len, cex->ec_start);\r
-- return cex->ec_type;\r
-+ ret = cex->ec_type;\r
- }\r
--\r
-- /* not in cache */\r
-- return EXT4_EXT_CACHE_NO;\r
-+errout:\r
-+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);\r
-+ return ret;\r
- }\r
- \r
- /*\r
-@@ -2777,6 +2786,8 @@ int ext4_ext_get_blocks(handle_t *handle\r
- if (allocated > max_blocks)\r
- allocated = max_blocks;\r
- set_buffer_unwritten(bh_result);\r
-+ bh_result->b_bdev = inode->i_sb->s_bdev;\r
-+ bh_result->b_blocknr = newblock;\r
- goto out2;\r
- }\r
- \r
-diff -uprN linux-2.6.28.orig/fs/ext4/ialloc.c linux-2.6.28/fs/ext4/ialloc.c\r
---- linux-2.6.28.orig/fs/ext4/ialloc.c 2009-05-02 20:54:43.000000000 +0200\r
-+++ linux-2.6.28/fs/ext4/ialloc.c 2009-05-23 16:05:41.000000000 +0200\r
-@@ -188,7 +188,7 @@ void ext4_free_inode(handle_t *handle, s\r
- struct ext4_group_desc *gdp;\r
- struct ext4_super_block *es;\r
- struct ext4_sb_info *sbi;\r
-- int fatal = 0, err;\r
-+ int fatal = 0, err, cleared;\r
- ext4_group_t flex_group;\r
- \r
- if (atomic_read(&inode->i_count) > 1) {\r
-@@ -243,8 +243,10 @@ void ext4_free_inode(handle_t *handle, s\r
- goto error_return;\r
- \r
- /* Ok, now we can actually update the inode bitmaps.. */\r
-- if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),\r
-- bit, bitmap_bh->b_data))\r
-+ spin_lock(sb_bgl_lock(sbi, block_group));\r
-+ cleared = ext4_clear_bit(bit, bitmap_bh->b_data);\r
-+ spin_unlock(sb_bgl_lock(sbi, block_group));\r
-+ if (!cleared)\r
- ext4_error(sb, "ext4_free_inode",\r
- "bit already cleared for inode %lu", ino);\r
- else {\r
-@@ -686,6 +688,7 @@ struct inode *ext4_new_inode(handle_t *h\r
- struct inode *ret;\r
- ext4_group_t i;\r
- int free = 0;\r
-+ static int once = 1;\r
- ext4_group_t flex_group;\r
- \r
- /* Cannot create files in a deleted directory */\r
-@@ -705,10 +708,12 @@ struct inode *ext4_new_inode(handle_t *h\r
- ret2 = find_group_flex(sb, dir, &group);\r
- if (ret2 == -1) {\r
- ret2 = find_group_other(sb, dir, &group);\r
-- if (ret2 == 0 && printk_ratelimit())\r
-+ if (ret2 == 0 && once) {\r
-+ once = 0;\r
- printk(KERN_NOTICE "ext4: find_group_flex "\r
- "failed, fallback succeeded dir %lu\n",\r
- dir->i_ino);\r
-+ }\r
- }\r
- goto got_group;\r
- }\r
-@@ -862,16 +867,12 @@ got:\r
- ei->i_disksize = 0;\r
- \r
- /*\r
-- * Don't inherit extent flag from directory. We set extent flag on\r
-- * newly created directory and file only if -o extent mount option is\r
-- * specified\r
-+ * Don't inherit extent flag from directory, amongst others. We set\r
-+ * extent flag on newly created directory and file only if -o extent\r
-+ * mount option is specified\r
- */\r
-- ei->i_flags = EXT4_I(dir)->i_flags & ~(EXT4_INDEX_FL|EXT4_EXTENTS_FL);\r
-- if (S_ISLNK(mode))\r
-- ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL);\r
-- /* dirsync only applies to directories */\r
-- if (!S_ISDIR(mode))\r
-- ei->i_flags &= ~EXT4_DIRSYNC_FL;\r
-+ ei->i_flags =\r
-+ ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);\r
- ei->i_file_acl = 0;\r
- ei->i_dtime = 0;\r
- ei->i_block_group = group;\r
-diff -uprN linux-2.6.28.orig/fs/ext4/inode.c linux-2.6.28/fs/ext4/inode.c\r
---- linux-2.6.28.orig/fs/ext4/inode.c 2009-05-02 20:54:43.000000000 +0200\r
-+++ linux-2.6.28/fs/ext4/inode.c 2009-05-23 16:05:41.000000000 +0200\r
-@@ -1052,6 +1059,7 @@ int ext4_get_blocks_wrap(handle_t *handl\r
- int retval;\r
- \r
- clear_buffer_mapped(bh);\r
-+ clear_buffer_unwritten(bh);\r
- \r
- /*\r
- * Try to see if we can get the block without requesting\r
-@@ -1082,6 +1090,18 @@ int ext4_get_blocks_wrap(handle_t *handl\r
- return retval;\r
- \r
- /*\r
-+ * When we call get_blocks without the create flag, the\r
-+ * BH_Unwritten flag could have gotten set if the blocks\r
-+ * requested were part of a uninitialized extent. We need to\r
-+ * clear this flag now that we are committed to convert all or\r
-+ * part of the uninitialized extent to be an initialized\r
-+ * extent. This is because we need to avoid the combination\r
-+ * of BH_Unwritten and BH_Mapped flags being simultaneously\r
-+ * set on the buffer_head.\r
-+ */\r
-+ clear_buffer_unwritten(bh);\r
-+\r
-+ /*\r
- * New blocks allocate and/or writing to uninitialized extent\r
- * will possibly result in updating i_data, so we take\r
- * the write lock of i_data_sem, and call get_blocks()\r
-@@ -2180,6 +2200,10 @@ static int ext4_da_get_block_prep(struct\r
- struct buffer_head *bh_result, int create)\r
- {\r
- int ret = 0;\r
-+ sector_t invalid_block = ~((sector_t) 0xffff);\r
-+\r
-+ if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))\r
-+ invalid_block = ~0;\r
- \r
- BUG_ON(create == 0);\r
- BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);\r
-@@ -2201,11 +2225,18 @@ static int ext4_da_get_block_prep(struct\r
- /* not enough space to reserve */\r
- return ret;\r
- \r
-- map_bh(bh_result, inode->i_sb, 0);\r
-+ map_bh(bh_result, inode->i_sb, invalid_block);\r
- set_buffer_new(bh_result);\r
- set_buffer_delay(bh_result);\r
- } else if (ret > 0) {\r
- bh_result->b_size = (ret << inode->i_blkbits);\r
-+ /*\r
-+ * With sub-block writes into unwritten extents\r
-+ * we also need to mark the buffer as new so that\r
-+ * the unwritten parts of the buffer gets correctly zeroed.\r
-+ */\r
-+ if (buffer_unwritten(bh_result))\r
-+ set_buffer_new(bh_result);\r
- ret = 0;\r
- }\r
- \r
-@@ -2493,7 +2524,7 @@ retry:\r
- \r
- ext4_journal_stop(handle);\r
- \r
-- if (mpd.retval == -ENOSPC) {\r
-+ if ((mpd.retval == -ENOSPC) && sbi->s_journal) {\r
- /* commit the transaction which would\r
- * free blocks released in the transaction\r
- * and try again\r
-@@ -4167,11 +4243,9 @@ struct inode *ext4_iget(struct super_blo\r
- ei->i_flags = le32_to_cpu(raw_inode->i_flags);\r
- inode->i_blocks = ext4_inode_blocks(raw_inode, ei);\r
- ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);\r
-- if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=\r
-- cpu_to_le32(EXT4_OS_HURD)) {\r
-+ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT))\r
- ei->i_file_acl |=\r
- ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;\r
-- }\r
- inode->i_size = ext4_isize(raw_inode);\r
- ei->i_disksize = inode->i_size;\r
- inode->i_generation = le32_to_cpu(raw_inode->i_generation);\r
-@@ -4218,6 +4292,18 @@ struct inode *ext4_iget(struct super_blo\r
- (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;\r
- }\r
- \r
-+ if (ei->i_file_acl &&\r
-+ ((ei->i_file_acl < \r
-+ (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +\r
-+ EXT4_SB(sb)->s_gdb_count)) ||\r
-+ (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {\r
-+ ext4_error(sb, __func__,\r
-+ "bad extended attribute block %llu in inode #%lu",\r
-+ ei->i_file_acl, inode->i_ino);\r
-+ ret = -EIO;\r
-+ goto bad_inode;\r
-+ }\r
-+\r
- if (S_ISREG(inode->i_mode)) {\r
- inode->i_op = &ext4_file_inode_operations;\r
- inode->i_fop = &ext4_file_operations;\r
-@@ -4232,7 +4318,8 @@ struct inode *ext4_iget(struct super_blo\r
- inode->i_op = &ext4_symlink_inode_operations;\r
- ext4_set_aops(inode);\r
- }\r
-- } else {\r
-+ } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||\r
-+ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {\r
- inode->i_op = &ext4_special_inode_operations;\r
- if (raw_inode->i_block[0])\r
- init_special_inode(inode, inode->i_mode,\r
-@@ -4240,6 +4327,13 @@ struct inode *ext4_iget(struct super_blo\r
- else\r
- init_special_inode(inode, inode->i_mode,\r
- new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));\r
-+ } else {\r
-+ brelse(bh);\r
-+ ret = -EIO;\r
-+ ext4_error(inode->i_sb, __func__, \r
-+ "bogus i_mode (%o) for inode=%lu",\r
-+ inode->i_mode, inode->i_ino);\r
-+ goto bad_inode;\r
- }\r
- brelse(iloc.bh);\r
- ext4_set_inode_flags(inode);\r
-diff -uprN linux-2.6.28.orig/fs/ext4/ioctl.c linux-2.6.28/fs/ext4/ioctl.c\r
---- linux-2.6.28.orig/fs/ext4/ioctl.c 2009-05-02 20:54:43.000000000 +0200\r
-+++ linux-2.6.28/fs/ext4/ioctl.c 2009-05-23 16:05:41.000000000 +0200\r
-@@ -48,8 +48,7 @@ long ext4_ioctl(struct file *filp, unsig\r
- if (err)\r
- return err;\r
- \r
-- if (!S_ISDIR(inode->i_mode))\r
-- flags &= ~EXT4_DIRSYNC_FL;\r
-+ flags = ext4_mask_flags(inode->i_mode, flags);\r
- \r
- err = -EPERM;\r
- mutex_lock(&inode->i_mutex);\r
-diff -uprN linux-2.6.28.orig/fs/ext4/mballoc.c linux-2.6.28/fs/ext4/mballoc.c\r
---- linux-2.6.28.orig/fs/ext4/mballoc.c 2009-05-02 20:54:43.000000000 +0200\r
-+++ linux-2.6.28/fs/ext4/mballoc.c 2009-05-23 16:05:41.000000000 +0200\r
-@@ -1448,7 +1448,7 @@ static void ext4_mb_measure_extent(struc\r
- struct ext4_free_extent *gex = &ac->ac_g_ex;\r
- \r
- BUG_ON(ex->fe_len <= 0);\r
-- BUG_ON(ex->fe_len >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));\r
-+ BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));\r
- BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));\r
- BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);\r
- \r
-@@ -2692,7 +2692,7 @@ int ext4_mb_init(struct super_block *sb,\r
- i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int);\r
- sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);\r
- if (sbi->s_mb_maxs == NULL) {\r
-- kfree(sbi->s_mb_maxs);\r
-+ kfree(sbi->s_mb_offsets);\r
- return -ENOMEM;\r
- }\r
- \r
-@@ -3289,7 +3289,7 @@ ext4_mb_normalize_request(struct ext4_al\r
- }\r
- BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&\r
- start > ac->ac_o_ex.fe_logical);\r
-- BUG_ON(size <= 0 || size >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));\r
-+ BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));\r
- \r
- /* now prepare goal request */\r
- \r
-@@ -3586,6 +3586,7 @@ static void ext4_mb_put_pa(struct ext4_a\r
- struct super_block *sb, struct ext4_prealloc_space *pa)\r
- {\r
- unsigned long grp;\r
-+ ext4_fsblk_t grp_blk;\r
- \r
- if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)\r
- return;\r
-@@ -3600,8 +3601,12 @@ static void ext4_mb_put_pa(struct ext4_a\r
- pa->pa_deleted = 1;\r
- spin_unlock(&pa->pa_lock);\r
- \r
-- /* -1 is to protect from crossing allocation group */\r
-- ext4_get_group_no_and_offset(sb, pa->pa_pstart - 1, &grp, NULL);\r
-+ grp_blk = pa->pa_pstart;\r
-+ /* If linear, pa_pstart may be in the next group when pa is used up */\r
-+ if (pa->pa_linear)\r
-+ grp_blk--;\r
-+\r
-+ ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);\r
- \r
- /*\r
- * possible race:\r
-@@ -4414,7 +4419,7 @@ static void ext4_mb_add_n_trim(struct ex\r
- pa_inode_list) {\r
- spin_lock(&tmp_pa->pa_lock);\r
- if (tmp_pa->pa_deleted) {\r
-- spin_unlock(&pa->pa_lock);\r
-+ spin_unlock(&tmp_pa->pa_lock);\r
- continue;\r
- }\r
- if (!added && pa->pa_free < tmp_pa->pa_free) {\r
-diff -uprN linux-2.6.28.orig/fs/ext4/namei.c linux-2.6.28/fs/ext4/namei.c\r
---- linux-2.6.28.orig/fs/ext4/namei.c 2009-05-02 20:54:43.000000000 +0200\r
-+++ linux-2.6.28/fs/ext4/namei.c 2009-05-23 16:05:41.000000000 +0200\r
-@@ -1056,8 +1056,16 @@ static struct dentry *ext4_lookup(struct\r
- return ERR_PTR(-EIO);\r
- }\r
- inode = ext4_iget(dir->i_sb, ino);\r
-- if (IS_ERR(inode))\r
-- return ERR_CAST(inode);\r
-+ if (unlikely(IS_ERR(inode))) {\r
-+ if (PTR_ERR(inode) == -ESTALE) {\r
-+ ext4_error(dir->i_sb, __func__,\r
-+ "deleted inode referenced: %u",\r
-+ ino);\r
-+ return ERR_PTR(-EIO);\r
-+ } else {\r
-+ return ERR_CAST(inode);\r
-+ }\r
-+ }\r
- }\r
- return d_splice_alias(inode, dentry);\r
- }\r
-@@ -2436,7 +2444,8 @@ static int ext4_rename(struct inode *old\r
- ext4_mark_inode_dirty(handle, new_inode);\r
- if (!new_inode->i_nlink)\r
- ext4_orphan_add(handle, new_inode);\r
-- force_da_alloc = 1;\r
-+ if (!test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC))\r
-+ force_da_alloc = 1;\r
- }\r
- retval = 0;\r
- \r
-diff -uprN linux-2.6.28.orig/fs/ext4/super.c linux-2.6.28/fs/ext4/super.c\r
---- linux-2.6.28.orig/fs/ext4/super.c 2009-05-02 20:54:43.000000000 +0200\r
-+++ linux-2.6.28/fs/ext4/super.c 2009-05-23 16:05:41.000000000 +0200\r
-@@ -679,8 +679,6 @@ static int ext4_show_options(struct seq_\r
- if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))\r
- seq_puts(seq, ",noacl");\r
- #endif\r
-- if (!test_opt(sb, RESERVATION))\r
-- seq_puts(seq, ",noreservation");\r
- if (sbi->s_commit_interval) {\r
- seq_printf(seq, ",commit=%u",\r
- (unsigned) (sbi->s_commit_interval / HZ));\r
-@@ -724,6 +722,9 @@ static int ext4_show_options(struct seq_\r
- if (test_opt(sb, DATA_ERR_ABORT))\r
- seq_puts(seq, ",data_err=abort");\r
- \r
-+ if (test_opt(sb, NO_AUTO_DA_ALLOC))\r
-+ seq_puts(seq, ",auto_da_alloc=0");\r
-+\r
- ext4_show_quota_options(seq, sb);\r
- return 0;\r
- }\r
-@@ -849,7 +850,7 @@ enum {\r
- Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,\r
- Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,\r
- Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,\r
-- Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,\r
-+ Opt_auto_da_alloc, Opt_noload, Opt_nobh, Opt_bh,\r
- Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,\r
- Opt_journal_checksum, Opt_journal_async_commit,\r
- Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,\r
-@@ -883,8 +884,6 @@ static const match_table_t tokens = {\r
- {Opt_nouser_xattr, "nouser_xattr"},\r
- {Opt_acl, "acl"},\r
- {Opt_noacl, "noacl"},\r
-- {Opt_reservation, "reservation"},\r
-- {Opt_noreservation, "noreservation"},\r
- {Opt_noload, "noload"},\r
- {Opt_nobh, "nobh"},\r
- {Opt_bh, "bh"},\r
-@@ -919,6 +918,7 @@ static const match_table_t tokens = {\r
- {Opt_delalloc, "delalloc"},\r
- {Opt_nodelalloc, "nodelalloc"},\r
- {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},\r
-+ {Opt_auto_da_alloc, "auto_da_alloc=%u"},\r
- {Opt_err, NULL},\r
- };\r
- \r
-@@ -1049,12 +1049,6 @@ static int parse_options(char *options, \r
- "not supported\n");\r
- break;\r
- #endif\r
-- case Opt_reservation:\r
-- set_opt(sbi->s_mount_opt, RESERVATION);\r
-- break;\r
-- case Opt_noreservation:\r
-- clear_opt(sbi->s_mount_opt, RESERVATION);\r
-- break;\r
- case Opt_journal_update:\r
- /* @@@ FIXME */\r
- /* Eventually we will want to be able to create\r
-@@ -1331,6 +1325,14 @@ set_qf_format:\r
- return 0;\r
- sbi->s_inode_readahead_blks = option;\r
- break;\r
-+ case Opt_auto_da_alloc:\r
-+ if (match_int(&args[0], &option))\r
-+ return 0;\r
-+ if (option)\r
-+ clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);\r
-+ else\r
-+ set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);\r
-+ break;\r
- default:\r
- printk(KERN_ERR\r
- "EXT4-fs: Unrecognized mount option \"%s\" "\r
-@@ -1956,7 +1958,6 @@ static int ext4_fill_super(struct super_\r
- sbi->s_resuid = le16_to_cpu(es->s_def_resuid);\r
- sbi->s_resgid = le16_to_cpu(es->s_def_resgid);\r
- \r
-- set_opt(sbi->s_mount_opt, RESERVATION);\r
- set_opt(sbi->s_mount_opt, BARRIER);\r
- \r
- /*\r
-diff -uprN linux-2.6.28.orig/fs/jbd2/revoke.c linux-2.6.28/fs/jbd2/revoke.c\r
---- linux-2.6.28.orig/fs/jbd2/revoke.c 2009-05-02 20:54:43.000000000 +0200\r
-+++ linux-2.6.28/fs/jbd2/revoke.c 2009-05-23 16:05:41.000000000 +0200\r
-@@ -55,6 +55,25 @@\r
- * need do nothing.\r
- * RevokeValid set, Revoked set:\r
- * buffer has been revoked.\r
-+ *\r
-+ * Locking rules:\r
-+ * We keep two hash tables of revoke records. One hashtable belongs to the\r
-+ * running transaction (is pointed to by journal->j_revoke), the other one\r
-+ * belongs to the committing transaction. Accesses to the second hash table\r
-+ * happen only from the kjournald and no other thread touches this table. Also\r
-+ * journal_switch_revoke_table() which switches which hashtable belongs to the\r
-+ * running and which to the committing transaction is called only from\r
-+ * kjournald. Therefore we need no locks when accessing the hashtable belonging\r
-+ * to the committing transaction.\r
-+ *\r
-+ * All users operating on the hash table belonging to the running transaction\r
-+ * have a handle to the transaction. Therefore they are safe from kjournald\r
-+ * switching hash tables under them. For operations on the lists of entries in\r
-+ * the hash table j_revoke_lock is used.\r
-+ *\r
-+ * Finally, also replay code uses the hash tables but at this moment noone else\r
-+ * can touch them (filesystem isn't mounted yet) and hence no locking is\r
-+ * needed.\r
- */\r
- \r
- #ifndef __KERNEL__\r
-@@ -401,8 +420,6 @@ int jbd2_journal_revoke(handle_t *handle\r
- * the second time we would still have a pending revoke to cancel. So,\r
- * do not trust the Revoked bit on buffers unless RevokeValid is also\r
- * set.\r
-- *\r
-- * The caller must have the journal locked.\r
- */\r
- int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)\r
- {\r
-@@ -480,10 +497,7 @@ void jbd2_journal_switch_revoke_table(jo\r
- /*\r
- * Write revoke records to the journal for all entries in the current\r
- * revoke hash, deleting the entries as we go.\r
-- *\r
-- * Called with the journal lock held.\r
- */\r
--\r
- void jbd2_journal_write_revoke_records(journal_t *journal,\r
- transaction_t *transaction)\r
- {\r
-diff -uprN linux-2.6.28.orig/fs/ocfs2/ocfs2_jbd_compat.h linux-2.6.28/fs/ocfs2/ocfs2_jbd_compat.h\r
---- linux-2.6.28.orig/fs/ocfs2/ocfs2_jbd_compat.h 2009-05-02 20:54:43.000000000 +0200\r
-+++ linux-2.6.28/fs/ocfs2/ocfs2_jbd_compat.h 2009-05-23 16:05:41.000000000 +0200\r
-@@ -60,7 +60,8 @@ static inline int jbd2_journal_file_inod\r
- return 0;\r
- }\r
- \r
--static inline int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,\r
-+static inline int jbd2_journal_begin_ordered_truncate(journal_t *journal,\r
-+ struct jbd2_inode *inode,\r
- loff_t new_size)\r
- {\r
- return 0;\r
+diff -uprN linux-2.6.28.orig/Documentation/filesystems/ext4.txt linux-2.6.28/Documentation/filesystems/ext4.txt
+--- linux-2.6.28.orig/Documentation/filesystems/ext4.txt 2009-05-02 20:54:43.000000000 +0200
++++ linux-2.6.28/Documentation/filesystems/ext4.txt 2009-05-23 16:05:41.000000000 +0200
+@@ -76,7 +76,7 @@ Note: More extensive information for get
+ * extent format more robust in face of on-disk corruption due to magics,
+ * internal redunancy in tree
+ * improved file allocation (multi-block alloc)
+-* fix 32000 subdirectory limit
++* lift 32000 subdirectory limit imposed by i_links_count[1]
+ * nsec timestamps for mtime, atime, ctime, create time
+ * inode version field on disk (NFSv4, Lustre)
+ * reduced e2fsck time via uninit_bg feature
+@@ -91,6 +91,9 @@ Note: More extensive information for get
+ * efficent new ordered mode in JBD2 and ext4(avoid using buffer head to force
+ the ordering)
+
++[1] Filesystems with a block size of 1k may see a limit imposed by the
++directory hash tree having a maximum depth of two.
++
+ 2.2 Candidate features for future inclusion
+
+ * Online defrag (patches available but not well tested)
+diff -uprN linux-2.6.28.orig/fs/ext4/balloc.c linux-2.6.28/fs/ext4/balloc.c
+--- linux-2.6.28.orig/fs/ext4/balloc.c 2009-05-02 20:54:43.000000000 +0200
++++ linux-2.6.28/fs/ext4/balloc.c 2009-05-23 16:05:41.000000000 +0200
+@@ -608,7 +608,9 @@ int ext4_claim_free_blocks(struct ext4_s
+ */
+ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
+ {
+- if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || (*retries)++ > 3)
++ if (!ext4_has_free_blocks(EXT4_SB(sb), 1) ||
++ (*retries)++ > 3 ||
++ !EXT4_SB(sb)->s_journal)
+ return 0;
+
+ jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
+diff -uprN linux-2.6.28.orig/fs/ext4/ext4.h linux-2.6.28/fs/ext4/ext4.h
+--- linux-2.6.28.orig/fs/ext4/ext4.h 2009-05-02 20:54:43.000000000 +0200
++++ linux-2.6.28/fs/ext4/ext4.h 2009-05-23 16:05:41.000000000 +0200
+@@ -248,6 +248,30 @@ struct flex_groups {
+ #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
+ #define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */
+
++/* Flags that should be inherited by new inodes from their parent. */
++#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
++ EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\
++ EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
++ EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
++ EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
++
++/* Flags that are appropriate for regular files (all but dir-specific ones). */
++#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
++
++/* Flags that are appropriate for non-directories/regular files. */
++#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
++
++/* Mask out flags that are inappropriate for the given type of inode. */
++static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
++{
++ if (S_ISDIR(mode))
++ return flags;
++ else if (S_ISREG(mode))
++ return flags & EXT4_REG_FLMASK;
++ else
++ return flags & EXT4_OTHER_FLMASK;
++}
++
+ /*
+ * Inode dynamic state flags
+ */
+@@ -529,7 +556,7 @@ do { \
+ #define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */
+ #define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */
+ #define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
+-#define EXT4_MOUNT_RESERVATION 0x10000 /* Preallocation */
++#define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */
+ #define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
+ #define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */
+ #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
+diff -uprN linux-2.6.28.orig/fs/ext4/extents.c linux-2.6.28/fs/ext4/extents.c
+--- linux-2.6.28.orig/fs/ext4/extents.c 2009-05-02 20:54:43.000000000 +0200
++++ linux-2.6.28/fs/ext4/extents.c 2009-05-23 16:05:41.000000000 +0200
+@@ -1120,7 +1120,8 @@ ext4_ext_search_right(struct inode *inod
+ struct ext4_extent_idx *ix;
+ struct ext4_extent *ex;
+ ext4_fsblk_t block;
+- int depth, ee_len;
++ int depth; /* Note, NOT eh_depth; depth from top of tree */
++ int ee_len;
+
+ BUG_ON(path == NULL);
+ depth = path->p_depth;
+@@ -1179,7 +1180,8 @@ ext4_ext_search_right(struct inode *inod
+ if (bh == NULL)
+ return -EIO;
+ eh = ext_block_hdr(bh);
+- if (ext4_ext_check_header(inode, eh, depth)) {
++ /* subtract from p_depth to get proper eh_depth */
++ if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) {
+ put_bh(bh);
+ return -EIO;
+ }
+@@ -1740,11 +1742,13 @@ ext4_ext_put_in_cache(struct inode *inod
+ {
+ struct ext4_ext_cache *cex;
+ BUG_ON(len == 0);
++ spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+ cex = &EXT4_I(inode)->i_cached_extent;
+ cex->ec_type = type;
+ cex->ec_block = block;
+ cex->ec_len = len;
+ cex->ec_start = start;
++ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ }
+
+ /*
+@@ -1801,12 +1805,17 @@ ext4_ext_in_cache(struct inode *inode, e
+ struct ext4_extent *ex)
+ {
+ struct ext4_ext_cache *cex;
++ int ret = EXT4_EXT_CACHE_NO;
+
++ /*
++ * We borrow i_block_reservation_lock to protect i_cached_extent
++ */
++ spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+ cex = &EXT4_I(inode)->i_cached_extent;
+
+ /* has cache valid data? */
+ if (cex->ec_type == EXT4_EXT_CACHE_NO)
+- return EXT4_EXT_CACHE_NO;
++ goto errout;
+
+ BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
+ cex->ec_type != EXT4_EXT_CACHE_EXTENT);
+@@ -1817,11 +1826,11 @@ ext4_ext_in_cache(struct inode *inode, e
+ ext_debug("%u cached by %u:%u:%llu\n",
+ block,
+ cex->ec_block, cex->ec_len, cex->ec_start);
+- return cex->ec_type;
++ ret = cex->ec_type;
+ }
+-
+- /* not in cache */
+- return EXT4_EXT_CACHE_NO;
++errout:
++ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
++ return ret;
+ }
+
+ /*
+@@ -2777,6 +2786,8 @@ int ext4_ext_get_blocks(handle_t *handle
+ if (allocated > max_blocks)
+ allocated = max_blocks;
+ set_buffer_unwritten(bh_result);
++ bh_result->b_bdev = inode->i_sb->s_bdev;
++ bh_result->b_blocknr = newblock;
+ goto out2;
+ }
+
+diff -uprN linux-2.6.28.orig/fs/ext4/ialloc.c linux-2.6.28/fs/ext4/ialloc.c
+--- linux-2.6.28.orig/fs/ext4/ialloc.c 2009-05-02 20:54:43.000000000 +0200
++++ linux-2.6.28/fs/ext4/ialloc.c 2009-05-23 16:05:41.000000000 +0200
+@@ -188,7 +188,7 @@ void ext4_free_inode(handle_t *handle, s
+ struct ext4_group_desc *gdp;
+ struct ext4_super_block *es;
+ struct ext4_sb_info *sbi;
+- int fatal = 0, err;
++ int fatal = 0, err, cleared;
+ ext4_group_t flex_group;
+
+ if (atomic_read(&inode->i_count) > 1) {
+@@ -243,8 +243,10 @@ void ext4_free_inode(handle_t *handle, s
+ goto error_return;
+
+ /* Ok, now we can actually update the inode bitmaps.. */
+- if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
+- bit, bitmap_bh->b_data))
++ spin_lock(sb_bgl_lock(sbi, block_group));
++ cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
++ spin_unlock(sb_bgl_lock(sbi, block_group));
++ if (!cleared)
+ ext4_error(sb, "ext4_free_inode",
+ "bit already cleared for inode %lu", ino);
+ else {
+@@ -686,6 +688,7 @@ struct inode *ext4_new_inode(handle_t *h
+ struct inode *ret;
+ ext4_group_t i;
+ int free = 0;
++ static int once = 1;
+ ext4_group_t flex_group;
+
+ /* Cannot create files in a deleted directory */
+@@ -705,10 +708,12 @@ struct inode *ext4_new_inode(handle_t *h
+ ret2 = find_group_flex(sb, dir, &group);
+ if (ret2 == -1) {
+ ret2 = find_group_other(sb, dir, &group);
+- if (ret2 == 0 && printk_ratelimit())
++ if (ret2 == 0 && once) {
++ once = 0;
+ printk(KERN_NOTICE "ext4: find_group_flex "
+ "failed, fallback succeeded dir %lu\n",
+ dir->i_ino);
++ }
+ }
+ goto got_group;
+ }
+@@ -862,16 +867,12 @@ got:
+ ei->i_disksize = 0;
+
+ /*
+- * Don't inherit extent flag from directory. We set extent flag on
+- * newly created directory and file only if -o extent mount option is
+- * specified
++ * Don't inherit extent flag from directory, amongst others. We set
++ * extent flag on newly created directory and file only if -o extent
++ * mount option is specified
+ */
+- ei->i_flags = EXT4_I(dir)->i_flags & ~(EXT4_INDEX_FL|EXT4_EXTENTS_FL);
+- if (S_ISLNK(mode))
+- ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL);
+- /* dirsync only applies to directories */
+- if (!S_ISDIR(mode))
+- ei->i_flags &= ~EXT4_DIRSYNC_FL;
++ ei->i_flags =
++ ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
+ ei->i_file_acl = 0;
+ ei->i_dtime = 0;
+ ei->i_block_group = group;
+diff -uprN linux-2.6.28.orig/fs/ext4/inode.c linux-2.6.28/fs/ext4/inode.c
+--- linux-2.6.28.orig/fs/ext4/inode.c 2009-05-02 20:54:43.000000000 +0200
++++ linux-2.6.28/fs/ext4/inode.c 2009-05-23 16:05:41.000000000 +0200
+@@ -1052,6 +1059,7 @@ int ext4_get_blocks_wrap(handle_t *handl
+ int retval;
+
+ clear_buffer_mapped(bh);
++ clear_buffer_unwritten(bh);
+
+ /*
+ * Try to see if we can get the block without requesting
+@@ -1082,6 +1090,18 @@ int ext4_get_blocks_wrap(handle_t *handl
+ return retval;
+
+ /*
++ * When we call get_blocks without the create flag, the
++ * BH_Unwritten flag could have gotten set if the blocks
++ * requested were part of a uninitialized extent. We need to
++ * clear this flag now that we are committed to convert all or
++ * part of the uninitialized extent to be an initialized
++ * extent. This is because we need to avoid the combination
++ * of BH_Unwritten and BH_Mapped flags being simultaneously
++ * set on the buffer_head.
++ */
++ clear_buffer_unwritten(bh);
++
++ /*
+ * New blocks allocate and/or writing to uninitialized extent
+ * will possibly result in updating i_data, so we take
+ * the write lock of i_data_sem, and call get_blocks()
+@@ -2180,6 +2200,10 @@ static int ext4_da_get_block_prep(struct
+ struct buffer_head *bh_result, int create)
+ {
+ int ret = 0;
++ sector_t invalid_block = ~((sector_t) 0xffff);
++
++ if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
++ invalid_block = ~0;
+
+ BUG_ON(create == 0);
+ BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
+@@ -2201,11 +2225,18 @@ static int ext4_da_get_block_prep(struct
+ /* not enough space to reserve */
+ return ret;
+
+- map_bh(bh_result, inode->i_sb, 0);
++ map_bh(bh_result, inode->i_sb, invalid_block);
+ set_buffer_new(bh_result);
+ set_buffer_delay(bh_result);
+ } else if (ret > 0) {
+ bh_result->b_size = (ret << inode->i_blkbits);
++ /*
++ * With sub-block writes into unwritten extents
++ * we also need to mark the buffer as new so that
++ * the unwritten parts of the buffer gets correctly zeroed.
++ */
++ if (buffer_unwritten(bh_result))
++ set_buffer_new(bh_result);
+ ret = 0;
+ }
+
+@@ -2493,7 +2524,7 @@ retry:
+
+ ext4_journal_stop(handle);
+
+- if (mpd.retval == -ENOSPC) {
++ if ((mpd.retval == -ENOSPC) && sbi->s_journal) {
+ /* commit the transaction which would
+ * free blocks released in the transaction
+ * and try again
+@@ -4167,11 +4243,9 @@ struct inode *ext4_iget(struct super_blo
+ ei->i_flags = le32_to_cpu(raw_inode->i_flags);
+ inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
+ ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
+- if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
+- cpu_to_le32(EXT4_OS_HURD)) {
++ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT))
+ ei->i_file_acl |=
+ ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
+- }
+ inode->i_size = ext4_isize(raw_inode);
+ ei->i_disksize = inode->i_size;
+ inode->i_generation = le32_to_cpu(raw_inode->i_generation);
+@@ -4218,6 +4292,18 @@ struct inode *ext4_iget(struct super_blo
+ (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
+ }
+
++ if (ei->i_file_acl &&
++ ((ei->i_file_acl <
++ (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
++ EXT4_SB(sb)->s_gdb_count)) ||
++ (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
++ ext4_error(sb, __func__,
++ "bad extended attribute block %llu in inode #%lu",
++ ei->i_file_acl, inode->i_ino);
++ ret = -EIO;
++ goto bad_inode;
++ }
++
+ if (S_ISREG(inode->i_mode)) {
+ inode->i_op = &ext4_file_inode_operations;
+ inode->i_fop = &ext4_file_operations;
+@@ -4232,7 +4318,8 @@ struct inode *ext4_iget(struct super_blo
+ inode->i_op = &ext4_symlink_inode_operations;
+ ext4_set_aops(inode);
+ }
+- } else {
++ } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
++ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+ inode->i_op = &ext4_special_inode_operations;
+ if (raw_inode->i_block[0])
+ init_special_inode(inode, inode->i_mode,
+@@ -4240,6 +4327,13 @@ struct inode *ext4_iget(struct super_blo
+ else
+ init_special_inode(inode, inode->i_mode,
+ new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
++ } else {
++ brelse(bh);
++ ret = -EIO;
++ ext4_error(inode->i_sb, __func__,
++ "bogus i_mode (%o) for inode=%lu",
++ inode->i_mode, inode->i_ino);
++ goto bad_inode;
+ }
+ brelse(iloc.bh);
+ ext4_set_inode_flags(inode);
+diff -uprN linux-2.6.28.orig/fs/ext4/ioctl.c linux-2.6.28/fs/ext4/ioctl.c
+--- linux-2.6.28.orig/fs/ext4/ioctl.c 2009-05-02 20:54:43.000000000 +0200
++++ linux-2.6.28/fs/ext4/ioctl.c 2009-05-23 16:05:41.000000000 +0200
+@@ -48,8 +48,7 @@ long ext4_ioctl(struct file *filp, unsig
+ if (err)
+ return err;
+
+- if (!S_ISDIR(inode->i_mode))
+- flags &= ~EXT4_DIRSYNC_FL;
++ flags = ext4_mask_flags(inode->i_mode, flags);
+
+ err = -EPERM;
+ mutex_lock(&inode->i_mutex);
+diff -uprN linux-2.6.28.orig/fs/ext4/mballoc.c linux-2.6.28/fs/ext4/mballoc.c
+--- linux-2.6.28.orig/fs/ext4/mballoc.c 2009-05-02 20:54:43.000000000 +0200
++++ linux-2.6.28/fs/ext4/mballoc.c 2009-05-23 16:05:41.000000000 +0200
+@@ -1448,7 +1448,7 @@ static void ext4_mb_measure_extent(struc
+ struct ext4_free_extent *gex = &ac->ac_g_ex;
+
+ BUG_ON(ex->fe_len <= 0);
+- BUG_ON(ex->fe_len >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
++ BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+ BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+ BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
+
+@@ -2692,7 +2692,7 @@ int ext4_mb_init(struct super_block *sb,
+ i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int);
+ sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
+ if (sbi->s_mb_maxs == NULL) {
+- kfree(sbi->s_mb_maxs);
++ kfree(sbi->s_mb_offsets);
+ return -ENOMEM;
+ }
+
+@@ -3289,7 +3289,7 @@ ext4_mb_normalize_request(struct ext4_al
+ }
+ BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
+ start > ac->ac_o_ex.fe_logical);
+- BUG_ON(size <= 0 || size >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
++ BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+
+ /* now prepare goal request */
+
+@@ -3586,6 +3586,7 @@ static void ext4_mb_put_pa(struct ext4_a
+ struct super_block *sb, struct ext4_prealloc_space *pa)
+ {
+ unsigned long grp;
++ ext4_fsblk_t grp_blk;
+
+ if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
+ return;
+@@ -3600,8 +3601,12 @@ static void ext4_mb_put_pa(struct ext4_a
+ pa->pa_deleted = 1;
+ spin_unlock(&pa->pa_lock);
+
+- /* -1 is to protect from crossing allocation group */
+- ext4_get_group_no_and_offset(sb, pa->pa_pstart - 1, &grp, NULL);
++ grp_blk = pa->pa_pstart;
++ /* If linear, pa_pstart may be in the next group when pa is used up */
++ if (pa->pa_linear)
++ grp_blk--;
++
++ ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
+
+ /*
+ * possible race:
+@@ -4414,7 +4419,7 @@ static void ext4_mb_add_n_trim(struct ex
+ pa_inode_list) {
+ spin_lock(&tmp_pa->pa_lock);
+ if (tmp_pa->pa_deleted) {
+- spin_unlock(&pa->pa_lock);
++ spin_unlock(&tmp_pa->pa_lock);
+ continue;
+ }
+ if (!added && pa->pa_free < tmp_pa->pa_free) {
+diff -uprN linux-2.6.28.orig/fs/ext4/namei.c linux-2.6.28/fs/ext4/namei.c
+--- linux-2.6.28.orig/fs/ext4/namei.c 2009-05-02 20:54:43.000000000 +0200
++++ linux-2.6.28/fs/ext4/namei.c 2009-05-23 16:05:41.000000000 +0200
+@@ -1056,8 +1056,16 @@ static struct dentry *ext4_lookup(struct
+ return ERR_PTR(-EIO);
+ }
+ inode = ext4_iget(dir->i_sb, ino);
+- if (IS_ERR(inode))
+- return ERR_CAST(inode);
++ if (unlikely(IS_ERR(inode))) {
++ if (PTR_ERR(inode) == -ESTALE) {
++ ext4_error(dir->i_sb, __func__,
++ "deleted inode referenced: %u",
++ ino);
++ return ERR_PTR(-EIO);
++ } else {
++ return ERR_CAST(inode);
++ }
++ }
+ }
+ return d_splice_alias(inode, dentry);
+ }
+@@ -2436,7 +2444,8 @@ static int ext4_rename(struct inode *old
+ ext4_mark_inode_dirty(handle, new_inode);
+ if (!new_inode->i_nlink)
+ ext4_orphan_add(handle, new_inode);
+- force_da_alloc = 1;
++ if (!test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC))
++ force_da_alloc = 1;
+ }
+ retval = 0;
+
+diff -uprN linux-2.6.28.orig/fs/ext4/super.c linux-2.6.28/fs/ext4/super.c
+--- linux-2.6.28.orig/fs/ext4/super.c 2009-05-02 20:54:43.000000000 +0200
++++ linux-2.6.28/fs/ext4/super.c 2009-05-23 16:05:41.000000000 +0200
+@@ -679,8 +679,6 @@ static int ext4_show_options(struct seq_
+ if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
+ seq_puts(seq, ",noacl");
+ #endif
+- if (!test_opt(sb, RESERVATION))
+- seq_puts(seq, ",noreservation");
+ if (sbi->s_commit_interval) {
+ seq_printf(seq, ",commit=%u",
+ (unsigned) (sbi->s_commit_interval / HZ));
+@@ -724,6 +722,9 @@ static int ext4_show_options(struct seq_
+ if (test_opt(sb, DATA_ERR_ABORT))
+ seq_puts(seq, ",data_err=abort");
+
++ if (test_opt(sb, NO_AUTO_DA_ALLOC))
++ seq_puts(seq, ",auto_da_alloc=0");
++
+ ext4_show_quota_options(seq, sb);
+ return 0;
+ }
+@@ -849,7 +850,7 @@ enum {
+ Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
+ Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
+ Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
+- Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
++ Opt_auto_da_alloc, Opt_noload, Opt_nobh, Opt_bh,
+ Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
+ Opt_journal_checksum, Opt_journal_async_commit,
+ Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+@@ -883,8 +884,6 @@ static const match_table_t tokens = {
+ {Opt_nouser_xattr, "nouser_xattr"},
+ {Opt_acl, "acl"},
+ {Opt_noacl, "noacl"},
+- {Opt_reservation, "reservation"},
+- {Opt_noreservation, "noreservation"},
+ {Opt_noload, "noload"},
+ {Opt_nobh, "nobh"},
+ {Opt_bh, "bh"},
+@@ -919,6 +918,7 @@ static const match_table_t tokens = {
+ {Opt_delalloc, "delalloc"},
+ {Opt_nodelalloc, "nodelalloc"},
+ {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
++ {Opt_auto_da_alloc, "auto_da_alloc=%u"},
+ {Opt_err, NULL},
+ };
+
+@@ -1049,12 +1049,6 @@ static int parse_options(char *options,
+ "not supported\n");
+ break;
+ #endif
+- case Opt_reservation:
+- set_opt(sbi->s_mount_opt, RESERVATION);
+- break;
+- case Opt_noreservation:
+- clear_opt(sbi->s_mount_opt, RESERVATION);
+- break;
+ case Opt_journal_update:
+ /* @@@ FIXME */
+ /* Eventually we will want to be able to create
+@@ -1331,6 +1325,14 @@ set_qf_format:
+ return 0;
+ sbi->s_inode_readahead_blks = option;
+ break;
++ case Opt_auto_da_alloc:
++ if (match_int(&args[0], &option))
++ return 0;
++ if (option)
++ clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
++ else
++ set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
++ break;
+ default:
+ printk(KERN_ERR
+ "EXT4-fs: Unrecognized mount option \"%s\" "
+@@ -1956,7 +1958,6 @@ static int ext4_fill_super(struct super_
+ sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
+ sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
+
+- set_opt(sbi->s_mount_opt, RESERVATION);
+ set_opt(sbi->s_mount_opt, BARRIER);
+
+ /*
+diff -uprN linux-2.6.28.orig/fs/jbd2/revoke.c linux-2.6.28/fs/jbd2/revoke.c
+--- linux-2.6.28.orig/fs/jbd2/revoke.c 2009-05-02 20:54:43.000000000 +0200
++++ linux-2.6.28/fs/jbd2/revoke.c 2009-05-23 16:05:41.000000000 +0200
+@@ -55,6 +55,25 @@
+ * need do nothing.
+ * RevokeValid set, Revoked set:
+ * buffer has been revoked.
++ *
++ * Locking rules:
++ * We keep two hash tables of revoke records. One hashtable belongs to the
++ * running transaction (is pointed to by journal->j_revoke), the other one
++ * belongs to the committing transaction. Accesses to the second hash table
++ * happen only from the kjournald and no other thread touches this table. Also
++ * journal_switch_revoke_table() which switches which hashtable belongs to the
++ * running and which to the committing transaction is called only from
++ * kjournald. Therefore we need no locks when accessing the hashtable belonging
++ * to the committing transaction.
++ *
++ * All users operating on the hash table belonging to the running transaction
++ * have a handle to the transaction. Therefore they are safe from kjournald
++ * switching hash tables under them. For operations on the lists of entries in
++ * the hash table j_revoke_lock is used.
++ *
++ * Finally, also replay code uses the hash tables but at this moment noone else
++ * can touch them (filesystem isn't mounted yet) and hence no locking is
++ * needed.
+ */
+
+ #ifndef __KERNEL__
+@@ -401,8 +420,6 @@ int jbd2_journal_revoke(handle_t *handle
+ * the second time we would still have a pending revoke to cancel. So,
+ * do not trust the Revoked bit on buffers unless RevokeValid is also
+ * set.
+- *
+- * The caller must have the journal locked.
+ */
+ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
+ {
+@@ -480,10 +497,7 @@ void jbd2_journal_switch_revoke_table(jo
+ /*
+ * Write revoke records to the journal for all entries in the current
+ * revoke hash, deleting the entries as we go.
+- *
+- * Called with the journal lock held.
+ */
+-
+ void jbd2_journal_write_revoke_records(journal_t *journal,
+ transaction_t *transaction)
+ {
+diff -uprN linux-2.6.28.orig/fs/ocfs2/ocfs2_jbd_compat.h linux-2.6.28/fs/ocfs2/ocfs2_jbd_compat.h
+--- linux-2.6.28.orig/fs/ocfs2/ocfs2_jbd_compat.h 2009-05-02 20:54:43.000000000 +0200
++++ linux-2.6.28/fs/ocfs2/ocfs2_jbd_compat.h 2009-05-23 16:05:41.000000000 +0200
+@@ -60,7 +60,8 @@ static inline int jbd2_journal_file_inod
+ return 0;
+ }
+
+-static inline int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
++static inline int jbd2_journal_begin_ordered_truncate(journal_t *journal,
++ struct jbd2_inode *inode,
+ loff_t new_size)
+ {
+ return 0;