--- /dev/null
+diff -uprN linux-2.6.28/arch/x86/include/asm/proto.h ubifs-v2.6.28/arch/x86/include/asm/proto.h
+--- linux-2.6.28/arch/x86/include/asm/proto.h 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/arch/x86/include/asm/proto.h 2011-06-15 14:22:06.000000000 -0400
+@@ -26,7 +26,4 @@ static const int reboot_force = 0;
+
+ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
+
+-#define round_up(x, y) (((x) + (y) - 1) & ~((y) - 1))
+-#define round_down(x, y) ((x) & ~((y) - 1))
+-
+ #endif /* _ASM_X86_PROTO_H */
+diff -uprN linux-2.6.28/Documentation/filesystems/ubifs.txt ubifs-v2.6.28/Documentation/filesystems/ubifs.txt
+--- linux-2.6.28/Documentation/filesystems/ubifs.txt 2011-06-15 15:12:26.000000000 -0400
++++ ubifs-v2.6.28/Documentation/filesystems/ubifs.txt 2011-06-15 14:22:04.000000000 -0400
+@@ -82,12 +82,12 @@ Mount options
+ bulk_read read more in one go to take advantage of flash
+ media that read faster sequentially
+ no_bulk_read (*) do not bulk-read
+-no_chk_data_crc skip checking of CRCs on data nodes in order to
++no_chk_data_crc (*) skip checking of CRCs on data nodes in order to
+ improve read performance. Use this option only
+ if the flash media is highly reliable. The effect
+ of this option is that corruption of the contents
+ of a file can go unnoticed.
+-chk_data_crc (*) do not skip checking CRCs on data nodes
++chk_data_crc do not skip checking CRCs on data nodes
+ compr=none override default compressor and set it to "none"
+ compr=lzo override default compressor and set it to "lzo"
+ compr=zlib override default compressor and set it to "zlib"
+@@ -115,28 +115,8 @@ ubi.mtd=0 root=ubi0:rootfs rootfstype=ub
+ Module Parameters for Debugging
+ ===============================
+
+-When UBIFS has been compiled with debugging enabled, there are 3 module
++When UBIFS has been compiled with debugging enabled, there are 2 module
+ parameters that are available to control aspects of testing and debugging.
+-The parameters are unsigned integers where each bit controls an option.
+-The parameters are:
+-
+-debug_msgs Selects which debug messages to display, as follows:
+-
+- Message Type Flag value
+-
+- General messages 1
+- Journal messages 2
+- Mount messages 4
+- Commit messages 8
+- LEB search messages 16
+- Budgeting messages 32
+- Garbage collection messages 64
+- Tree Node Cache (TNC) messages 128
+- LEB properties (lprops) messages 256
+- Input/output messages 512
+- Log messages 1024
+- Scan messages 2048
+- Recovery messages 4096
+
+ debug_chks Selects extra checks that UBIFS can do while running:
+
+@@ -154,11 +134,9 @@ debug_tsts Selects a mode of testing, as
+
+ Test mode Flag value
+
+- Force in-the-gaps method 2
+ Failure mode for recovery testing 4
+
+-For example, set debug_msgs to 5 to display General messages and Mount
+-messages.
++For example, set debug_chks to 3 to enable general and TNC checks.
+
+
+ References
+diff -uprN linux-2.6.28/drivers/char/random.c ubifs-v2.6.28/drivers/char/random.c
+--- linux-2.6.28/drivers/char/random.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/drivers/char/random.c 2011-06-15 15:16:03.000000000 -0400
+@@ -1018,12 +1018,6 @@ random_read(struct file *file, char __us
+ /* like a named pipe */
+ }
+
+- /*
+- * If we gave the user some bytes, update the access time.
+- */
+- if (count)
+- file_accessed(file);
+-
+ return (count ? count : retval);
+ }
+
+@@ -1074,7 +1068,6 @@ static ssize_t random_write(struct file
+ size_t count, loff_t *ppos)
+ {
+ size_t ret;
+- struct inode *inode = file->f_path.dentry->d_inode;
+
+ ret = write_pool(&blocking_pool, buffer, count);
+ if (ret)
+@@ -1083,8 +1076,6 @@ static ssize_t random_write(struct file
+ if (ret)
+ return ret;
+
+- inode->i_mtime = current_fs_time(inode->i_sb);
+- mark_inode_dirty(inode);
+ return (ssize_t)count;
+ }
+
+diff -uprN linux-2.6.28/drivers/mtd/chips/cfi_cmdset_0001.c ubifs-v2.6.28/drivers/mtd/chips/cfi_cmdset_0001.c
+--- linux-2.6.28/drivers/mtd/chips/cfi_cmdset_0001.c 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/drivers/mtd/chips/cfi_cmdset_0001.c 2011-06-15 14:22:07.000000000 -0400
+@@ -421,6 +421,7 @@ struct mtd_info *cfi_cmdset_0001(struct
+ mtd->flags = MTD_CAP_NORFLASH;
+ mtd->name = map->name;
+ mtd->writesize = 1;
++ mtd->writebufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
+
+ mtd->reboot_notifier.notifier_call = cfi_intelext_reboot;
+
+diff -uprN linux-2.6.28/drivers/mtd/chips/cfi_cmdset_0002.c ubifs-v2.6.28/drivers/mtd/chips/cfi_cmdset_0002.c
+--- linux-2.6.28/drivers/mtd/chips/cfi_cmdset_0002.c 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/drivers/mtd/chips/cfi_cmdset_0002.c 2011-06-15 14:22:07.000000000 -0400
+@@ -346,6 +346,10 @@ struct mtd_info *cfi_cmdset_0002(struct
+ mtd->flags = MTD_CAP_NORFLASH;
+ mtd->name = map->name;
+ mtd->writesize = 1;
++ mtd->writebufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
++
++ DEBUG(MTD_DEBUG_LEVEL3, "MTD %s(): write buffer size %d\n",
++ __func__, mtd->writebufsize);
+
+ if (cfi->cfi_mode==CFI_MODE_CFI){
+ unsigned char bootloc;
+diff -uprN linux-2.6.28/drivers/mtd/chips/cfi_cmdset_0020.c ubifs-v2.6.28/drivers/mtd/chips/cfi_cmdset_0020.c
+--- linux-2.6.28/drivers/mtd/chips/cfi_cmdset_0020.c 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/drivers/mtd/chips/cfi_cmdset_0020.c 2011-06-15 14:22:07.000000000 -0400
+@@ -239,6 +239,7 @@ static struct mtd_info *cfi_staa_setup(s
+ mtd->resume = cfi_staa_resume;
+ mtd->flags = MTD_CAP_NORFLASH & ~MTD_BIT_WRITEABLE;
+ mtd->writesize = 8; /* FIXME: Should be 0 for STMicro flashes w/out ECC */
++ mtd->writebufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
+ map->fldrv = &cfi_staa_chipdrv;
+ __module_get(THIS_MODULE);
+ mtd->name = map->name;
+diff -uprN linux-2.6.28/drivers/mtd/devices/mtdram.c ubifs-v2.6.28/drivers/mtd/devices/mtdram.c
+--- linux-2.6.28/drivers/mtd/devices/mtdram.c 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/drivers/mtd/devices/mtdram.c 2011-06-15 14:22:07.000000000 -0400
+@@ -109,6 +109,7 @@ int mtdram_init_device(struct mtd_info *
+ mtd->flags = MTD_CAP_RAM;
+ mtd->size = size;
+ mtd->writesize = 1;
++ mtd->writebufsize = 64; /* Mimic CFI NOR flashes */
+ mtd->erasesize = MTDRAM_ERASE_SIZE;
+ mtd->priv = mapped_address;
+
+diff -uprN linux-2.6.28/drivers/mtd/mtd_blkdevs.c ubifs-v2.6.28/drivers/mtd/mtd_blkdevs.c
+--- linux-2.6.28/drivers/mtd/mtd_blkdevs.c 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/drivers/mtd/mtd_blkdevs.c 2011-06-15 14:22:07.000000000 -0400
+@@ -139,7 +139,7 @@ static int blktrans_open(struct block_de
+ struct mtd_blktrans_ops *tr = dev->tr;
+ int ret = -ENODEV;
+
+- if (!try_module_get(dev->mtd->owner))
++ if (!get_mtd_device(NULL, dev->mtd->index))
+ goto out;
+
+ if (!try_module_get(tr->owner))
+@@ -153,7 +153,7 @@ static int blktrans_open(struct block_de
+ ret = 0;
+ if (tr->open && (ret = tr->open(dev))) {
+ dev->mtd->usecount--;
+- module_put(dev->mtd->owner);
++ put_mtd_device(dev->mtd);
+ out_tr:
+ module_put(tr->owner);
+ }
+@@ -172,7 +172,7 @@ static int blktrans_release(struct gendi
+
+ if (!ret) {
+ dev->mtd->usecount--;
+- module_put(dev->mtd->owner);
++ put_mtd_device(dev->mtd);
+ module_put(tr->owner);
+ }
+
+diff -uprN linux-2.6.28/drivers/mtd/mtdconcat.c ubifs-v2.6.28/drivers/mtd/mtdconcat.c
+--- linux-2.6.28/drivers/mtd/mtdconcat.c 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/drivers/mtd/mtdconcat.c 2011-06-15 14:22:07.000000000 -0400
+@@ -698,6 +698,7 @@ struct mtd_info *mtd_concat_create(struc
+ struct mtd_concat *concat;
+ u_int32_t max_erasesize, curr_erasesize;
+ int num_erase_region;
++ int max_writebufsize = 0;
+
+ printk(KERN_NOTICE "Concatenating MTD devices:\n");
+ for (i = 0; i < num_devs; i++)
+@@ -724,6 +725,12 @@ struct mtd_info *mtd_concat_create(struc
+ concat->mtd.size = subdev[0]->size;
+ concat->mtd.erasesize = subdev[0]->erasesize;
+ concat->mtd.writesize = subdev[0]->writesize;
++
++ for (i = 0; i < num_devs; i++)
++ if (max_writebufsize < subdev[i]->writebufsize)
++ max_writebufsize = subdev[i]->writebufsize;
++ concat->mtd.writebufsize = max_writebufsize;
++
+ concat->mtd.subpage_sft = subdev[0]->subpage_sft;
+ concat->mtd.oobsize = subdev[0]->oobsize;
+ concat->mtd.oobavail = subdev[0]->oobavail;
+diff -uprN linux-2.6.28/drivers/mtd/mtdpart.c ubifs-v2.6.28/drivers/mtd/mtdpart.c
+--- linux-2.6.28/drivers/mtd/mtdpart.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/drivers/mtd/mtdpart.c 2011-06-15 15:16:03.000000000 -0400
+@@ -363,6 +363,7 @@ static struct mtd_part *add_one_partitio
+ slave->mtd.flags = master->flags & ~part->mask_flags;
+ slave->mtd.size = part->size;
+ slave->mtd.writesize = master->writesize;
++ slave->mtd.writebufsize = master->writebufsize;
+ slave->mtd.oobsize = master->oobsize;
+ slave->mtd.oobavail = master->oobavail;
+ slave->mtd.subpage_sft = master->subpage_sft;
+diff -uprN linux-2.6.28/drivers/mtd/nand/nand_base.c ubifs-v2.6.28/drivers/mtd/nand/nand_base.c
+--- linux-2.6.28/drivers/mtd/nand/nand_base.c 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/drivers/mtd/nand/nand_base.c 2011-06-15 14:22:07.000000000 -0400
+@@ -1084,7 +1084,8 @@ static int nand_do_read_ops(struct mtd_i
+
+ /* Transfer not aligned data */
+ if (!aligned) {
+- if (!NAND_SUBPAGE_READ(chip) && !oob)
++ if (!NAND_SUBPAGE_READ(chip) && !oob &&
++ !(mtd->ecc_stats.failed - stats.failed))
+ chip->pagebuf = realpage;
+ memcpy(buf, chip->buffers->databuf + col, bytes);
+ }
+@@ -2703,6 +2704,7 @@ int nand_scan_tail(struct mtd_info *mtd)
+ mtd->resume = nand_resume;
+ mtd->block_isbad = nand_block_isbad;
+ mtd->block_markbad = nand_block_markbad;
++ mtd->writebufsize = mtd->writesize;
+
+ /* propagate ecc.layout to mtd_info */
+ mtd->ecclayout = chip->ecc.layout;
+diff -uprN linux-2.6.28/drivers/mtd/nand/nandsim.c ubifs-v2.6.28/drivers/mtd/nand/nandsim.c
+--- linux-2.6.28/drivers/mtd/nand/nandsim.c 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/drivers/mtd/nand/nandsim.c 2011-06-15 14:22:07.000000000 -0400
+@@ -1736,13 +1736,17 @@ static void ns_nand_write_byte(struct mt
+
+ /* Check if chip is expecting command */
+ if (NS_STATE(ns->nxstate) != STATE_UNKNOWN && !(ns->nxstate & STATE_CMD_MASK)) {
+- /*
+- * We are in situation when something else (not command)
+- * was expected but command was input. In this case ignore
+- * previous command(s)/state(s) and accept the last one.
+- */
+- NS_WARN("write_byte: command (%#x) wasn't expected, expected state is %s, "
+- "ignore previous states\n", (uint)byte, get_state_name(ns->nxstate));
++ /* Do not warn if only 2 id bytes are read */
++ if (!(ns->regs.command == NAND_CMD_READID &&
++ NS_STATE(ns->state) == STATE_DATAOUT_ID && ns->regs.count == 2)) {
++ /*
++ * We are in situation when something else (not command)
++ * was expected but command was input. In this case ignore
++ * previous command(s)/state(s) and accept the last one.
++ */
++ NS_WARN("write_byte: command (%#x) wasn't expected, expected state is %s, "
++ "ignore previous states\n", (uint)byte, get_state_name(ns->nxstate));
++ }
+ switch_to_ready_state(ns, NS_STATUS_FAILED(ns));
+ }
+
+diff -uprN linux-2.6.28/drivers/mtd/onenand/onenand_base.c ubifs-v2.6.28/drivers/mtd/onenand/onenand_base.c
+--- linux-2.6.28/drivers/mtd/onenand/onenand_base.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/drivers/mtd/onenand/onenand_base.c 2011-06-15 15:16:03.000000000 -0400
+@@ -2858,6 +2858,7 @@ int onenand_scan(struct mtd_info *mtd, i
+ mtd->block_isbad = onenand_block_isbad;
+ mtd->block_markbad = onenand_block_markbad;
+ mtd->owner = THIS_MODULE;
++ mtd->writebufsize = mtd->writesize;
+
+ /* Unlock whole block */
+ onenand_unlock_all(mtd);
+diff -uprN linux-2.6.28/drivers/mtd/ubi/build.c ubifs-v2.6.28/drivers/mtd/ubi/build.c
+--- linux-2.6.28/drivers/mtd/ubi/build.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/drivers/mtd/ubi/build.c 2011-06-15 14:22:07.000000000 -0400
+@@ -37,6 +37,7 @@
+ #include <linux/module.h>
+ #include <linux/moduleparam.h>
+ #include <linux/stringify.h>
++#include <linux/namei.h>
+ #include <linux/stat.h>
+ #include <linux/miscdevice.h>
+ #include <linux/log2.h>
+@@ -46,9 +47,16 @@
+ /* Maximum length of the 'mtd=' parameter */
+ #define MTD_PARAM_LEN_MAX 64
+
++#ifdef CONFIG_MTD_UBI_MODULE
++#define ubi_is_module() 1
++#else
++#define ubi_is_module() 0
++#endif
++
+ /**
+ * struct mtd_dev_param - MTD device parameter description data structure.
+- * @name: MTD device name or number string
++ * @name: MTD character device node path, MTD device name, or MTD device number
++ * string
+ * @vid_hdr_offs: VID header offset
+ */
+ struct mtd_dev_param {
+@@ -57,10 +65,10 @@ struct mtd_dev_param {
+ };
+
+ /* Numbers of elements set in the @mtd_dev_param array */
+-static int mtd_devs;
++static int __initdata mtd_devs;
+
+ /* MTD devices specification parameters */
+-static struct mtd_dev_param mtd_dev_param[UBI_MAX_DEVICES];
++static struct mtd_dev_param __initdata mtd_dev_param[UBI_MAX_DEVICES];
+
+ /* Root UBI "class" object (corresponds to '/<sysfs>/class/ubi/') */
+ struct class *ubi_class;
+@@ -122,6 +130,94 @@ static struct device_attribute dev_mtd_n
+ __ATTR(mtd_num, S_IRUGO, dev_attribute_show, NULL);
+
+ /**
++ * ubi_volume_notify - send a volume change notification.
++ * @ubi: UBI device description object
++ * @vol: volume description object of the changed volume
++ * @ntype: notification type to send (%UBI_VOLUME_ADDED, etc)
++ *
++ * This is a helper function which notifies all subscribers about a volume
++ * change event (creation, removal, re-sizing, re-naming, updating). Returns
++ * zero in case of success and a negative error code in case of failure.
++ */
++int ubi_volume_notify(struct ubi_device *ubi, struct ubi_volume *vol, int ntype)
++{
++ struct ubi_notification nt;
++
++ ubi_do_get_device_info(ubi, &nt.di);
++ ubi_do_get_volume_info(ubi, vol, &nt.vi);
++ return blocking_notifier_call_chain(&ubi_notifiers, ntype, &nt);
++}
++
++/**
++ * ubi_notify_all - send a notification to all volumes.
++ * @ubi: UBI device description object
++ * @ntype: notification type to send (%UBI_VOLUME_ADDED, etc)
++ * @nb: the notifier to call
++ *
++ * This function walks all volumes of UBI device @ubi and sends the @ntype
++ * notification for each volume. If @nb is %NULL, then all registered notifiers
++ * are called, otherwise only the @nb notifier is called. Returns the number of
++ * sent notifications.
++ */
++int ubi_notify_all(struct ubi_device *ubi, int ntype, struct notifier_block *nb)
++{
++ struct ubi_notification nt;
++ int i, count = 0;
++
++ ubi_do_get_device_info(ubi, &nt.di);
++
++ mutex_lock(&ubi->device_mutex);
++ for (i = 0; i < ubi->vtbl_slots; i++) {
++ /*
++ * Since the @ubi->device is locked, and we are not going to
++ * change @ubi->volumes, we do not have to lock
++ * @ubi->volumes_lock.
++ */
++ if (!ubi->volumes[i])
++ continue;
++
++ ubi_do_get_volume_info(ubi, ubi->volumes[i], &nt.vi);
++ if (nb)
++ nb->notifier_call(nb, ntype, &nt);
++ else
++ blocking_notifier_call_chain(&ubi_notifiers, ntype,
++ &nt);
++ count += 1;
++ }
++ mutex_unlock(&ubi->device_mutex);
++
++ return count;
++}
++
++/**
++ * ubi_enumerate_volumes - send "add" notification for all existing volumes.
++ * @nb: the notifier to call
++ *
++ * This function walks all UBI devices and volumes and sends the
++ * %UBI_VOLUME_ADDED notification for each volume. If @nb is %NULL, then all
++ * registered notifiers are called, otherwise only the @nb notifier is called.
++ * Returns the number of sent notifications.
++ */
++int ubi_enumerate_volumes(struct notifier_block *nb)
++{
++ int i, count = 0;
++
++ /*
++ * Since the @ubi_devices_mutex is locked, and we are not going to
++ * change @ubi_devices, we do not have to lock @ubi_devices_lock.
++ */
++ for (i = 0; i < UBI_MAX_DEVICES; i++) {
++ struct ubi_device *ubi = ubi_devices[i];
++
++ if (!ubi)
++ continue;
++ count += ubi_notify_all(ubi, UBI_VOLUME_ADDED, nb);
++ }
++
++ return count;
++}
++
++/**
+ * ubi_get_device - get UBI device.
+ * @ubi_num: UBI device number
+ *
+@@ -263,17 +359,23 @@ static ssize_t dev_attribute_show(struct
+ return ret;
+ }
+
+-/* Fake "release" method for UBI devices */
+-static void dev_release(struct device *dev) { }
++static void dev_release(struct device *dev)
++{
++ struct ubi_device *ubi = container_of(dev, struct ubi_device, dev);
++
++ kfree(ubi);
++}
+
+ /**
+ * ubi_sysfs_init - initialize sysfs for an UBI device.
+ * @ubi: UBI device description object
++ * @ref: set to %1 on exit in case of failure if a reference to @ubi->dev was
++ * taken
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+-static int ubi_sysfs_init(struct ubi_device *ubi)
++static int ubi_sysfs_init(struct ubi_device *ubi, int *ref)
+ {
+ int err;
+
+@@ -285,6 +387,7 @@ static int ubi_sysfs_init(struct ubi_dev
+ if (err)
+ return err;
+
++ *ref = 1;
+ err = device_create_file(&ubi->dev, &dev_eraseblock_size);
+ if (err)
+ return err;
+@@ -340,7 +443,7 @@ static void ubi_sysfs_close(struct ubi_d
+ }
+
+ /**
+- * kill_volumes - destroy all volumes.
++ * kill_volumes - destroy all user volumes.
+ * @ubi: UBI device description object
+ */
+ static void kill_volumes(struct ubi_device *ubi)
+@@ -353,36 +456,29 @@ static void kill_volumes(struct ubi_devi
+ }
+
+ /**
+- * free_user_volumes - free all user volumes.
+- * @ubi: UBI device description object
+- *
+- * Normally the volumes are freed at the release function of the volume device
+- * objects. However, on error paths the volumes have to be freed before the
+- * device objects have been initialized.
+- */
+-static void free_user_volumes(struct ubi_device *ubi)
+-{
+- int i;
+-
+- for (i = 0; i < ubi->vtbl_slots; i++)
+- if (ubi->volumes[i]) {
+- kfree(ubi->volumes[i]->eba_tbl);
+- kfree(ubi->volumes[i]);
+- }
+-}
+-
+-/**
+ * uif_init - initialize user interfaces for an UBI device.
+ * @ubi: UBI device description object
++ * @ref: set to %1 on exit in case of failure if a reference to @ubi->dev was
++ * taken, otherwise set to %0
++ *
++ * This function initializes various user interfaces for an UBI device. If the
++ * initialization fails at an early stage, this function frees all the
++ * resources it allocated, returns an error, and @ref is set to %0. However,
++ * if the initialization fails after the UBI device was registered in the
++ * driver core subsystem, this function takes a reference to @ubi->dev, because
++ * otherwise the release function ('dev_release()') would free whole @ubi
++ * object. The @ref argument is set to %1 in this case. The caller has to put
++ * this reference.
+ *
+ * This function returns zero in case of success and a negative error code in
+- * case of failure. Note, this function destroys all volumes if it failes.
++ * case of failure.
+ */
+-static int uif_init(struct ubi_device *ubi)
++static int uif_init(struct ubi_device *ubi, int *ref)
+ {
+- int i, err, do_free = 0;
++ int i, err;
+ dev_t dev;
+
++ *ref = 0;
+ sprintf(ubi->ubi_name, UBI_NAME_STR "%d", ubi->ubi_num);
+
+ /*
+@@ -410,7 +506,7 @@ static int uif_init(struct ubi_device *u
+ goto out_unreg;
+ }
+
+- err = ubi_sysfs_init(ubi);
++ err = ubi_sysfs_init(ubi, ref);
+ if (err)
+ goto out_sysfs;
+
+@@ -427,13 +523,12 @@ static int uif_init(struct ubi_device *u
+
+ out_volumes:
+ kill_volumes(ubi);
+- do_free = 0;
+ out_sysfs:
++ if (*ref)
++ get_device(&ubi->dev);
+ ubi_sysfs_close(ubi);
+ cdev_del(&ubi->cdev);
+ out_unreg:
+- if (do_free)
+- free_user_volumes(ubi);
+ unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1);
+ ubi_err("cannot initialize UBI %s, error %d", ubi->ubi_name, err);
+ return err;
+@@ -493,8 +588,10 @@ static int attach_by_scanning(struct ubi
+
+ ubi->bad_peb_count = si->bad_peb_count;
+ ubi->good_peb_count = ubi->peb_count - ubi->bad_peb_count;
++ ubi->corr_peb_count = si->corr_peb_count;
+ ubi->max_ec = si->max_ec;
+ ubi->mean_ec = si->mean_ec;
++ ubi_msg("max. sequence number: %llu", si->max_sqnum);
+
+ err = ubi_read_volume_table(ubi, si);
+ if (err)
+@@ -567,6 +664,11 @@ static int io_init(struct ubi_device *ub
+ if (ubi->mtd->block_isbad && ubi->mtd->block_markbad)
+ ubi->bad_allowed = 1;
+
++ if (ubi->mtd->type == MTD_NORFLASH) {
++ ubi_assert(ubi->mtd->writesize == 1);
++ ubi->nor_flash = 1;
++ }
++
+ ubi->min_io_size = ubi->mtd->writesize;
+ ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft;
+
+@@ -585,11 +687,25 @@ static int io_init(struct ubi_device *ub
+ ubi_assert(ubi->hdrs_min_io_size <= ubi->min_io_size);
+ ubi_assert(ubi->min_io_size % ubi->hdrs_min_io_size == 0);
+
++ ubi->max_write_size = ubi->mtd->writebufsize;
++ /*
++ * Maximum write size has to be greater or equivalent to min. I/O
++ * size, and be multiple of min. I/O size.
++ */
++ if (ubi->max_write_size < ubi->min_io_size ||
++ ubi->max_write_size % ubi->min_io_size ||
++ !is_power_of_2(ubi->max_write_size)) {
++ ubi_err("bad write buffer size %d for %d min. I/O unit",
++ ubi->max_write_size, ubi->min_io_size);
++ return -EINVAL;
++ }
++
+ /* Calculate default aligned sizes of EC and VID headers */
+ ubi->ec_hdr_alsize = ALIGN(UBI_EC_HDR_SIZE, ubi->hdrs_min_io_size);
+ ubi->vid_hdr_alsize = ALIGN(UBI_VID_HDR_SIZE, ubi->hdrs_min_io_size);
+
+ dbg_msg("min_io_size %d", ubi->min_io_size);
++ dbg_msg("max_write_size %d", ubi->max_write_size);
+ dbg_msg("hdrs_min_io_size %d", ubi->hdrs_min_io_size);
+ dbg_msg("ec_hdr_alsize %d", ubi->ec_hdr_alsize);
+ dbg_msg("vid_hdr_alsize %d", ubi->vid_hdr_alsize);
+@@ -606,7 +722,7 @@ static int io_init(struct ubi_device *ub
+ }
+
+ /* Similar for the data offset */
+- ubi->leb_start = ubi->vid_hdr_offset + UBI_EC_HDR_SIZE;
++ ubi->leb_start = ubi->vid_hdr_offset + UBI_VID_HDR_SIZE;
+ ubi->leb_start = ALIGN(ubi->leb_start, ubi->min_io_size);
+
+ dbg_msg("vid_hdr_offset %d", ubi->vid_hdr_offset);
+@@ -751,7 +867,7 @@ static int autoresize(struct ubi_device
+ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
+ {
+ struct ubi_device *ubi;
+- int i, err, do_free = 1;
++ int i, err, ref = 0;
+
+ /*
+ * Check if we already have the same MTD device attached.
+@@ -814,11 +930,12 @@ int ubi_attach_mtd_dev(struct mtd_info *
+
+ mutex_init(&ubi->buf_mutex);
+ mutex_init(&ubi->ckvol_mutex);
+- mutex_init(&ubi->mult_mutex);
+- mutex_init(&ubi->volumes_mutex);
++ mutex_init(&ubi->device_mutex);
+ spin_lock_init(&ubi->volumes_lock);
+
+ ubi_msg("attaching mtd%d to ubi%d", mtd->index, ubi_num);
++ dbg_msg("sizeof(struct ubi_scan_leb) %zu", sizeof(struct ubi_scan_leb));
++ dbg_msg("sizeof(struct ubi_wl_entry) %zu", sizeof(struct ubi_wl_entry));
+
+ err = io_init(ubi);
+ if (err)
+@@ -833,13 +950,6 @@ int ubi_attach_mtd_dev(struct mtd_info *
+ if (!ubi->peb_buf2)
+ goto out_free;
+
+-#ifdef CONFIG_MTD_UBI_DEBUG
+- mutex_init(&ubi->dbg_buf_mutex);
+- ubi->dbg_peb_buf = vmalloc(ubi->peb_size);
+- if (!ubi->dbg_peb_buf)
+- goto out_free;
+-#endif
+-
+ err = attach_by_scanning(ubi);
+ if (err) {
+ dbg_err("failed to attach by scanning, error %d", err);
+@@ -852,9 +962,9 @@ int ubi_attach_mtd_dev(struct mtd_info *
+ goto out_detach;
+ }
+
+- err = uif_init(ubi);
++ err = uif_init(ubi, &ref);
+ if (err)
+- goto out_nofree;
++ goto out_detach;
+
+ ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name);
+ if (IS_ERR(ubi->bgt_thread)) {
+@@ -869,6 +979,7 @@ int ubi_attach_mtd_dev(struct mtd_info *
+ ubi_msg("MTD device size: %llu MiB", ubi->flash_size >> 20);
+ ubi_msg("number of good PEBs: %d", ubi->good_peb_count);
+ ubi_msg("number of bad PEBs: %d", ubi->bad_peb_count);
++ ubi_msg("number of corrupted PEBs: %d", ubi->corr_peb_count);
+ ubi_msg("max. allowed volumes: %d", ubi->vtbl_slots);
+ ubi_msg("wear-leveling threshold: %d", CONFIG_MTD_UBI_WL_THRESHOLD);
+ ubi_msg("number of internal volumes: %d", UBI_INT_VOL_COUNT);
+@@ -879,32 +990,34 @@ int ubi_attach_mtd_dev(struct mtd_info *
+ ubi_msg("number of PEBs reserved for bad PEB handling: %d",
+ ubi->beb_rsvd_pebs);
+ ubi_msg("max/mean erase counter: %d/%d", ubi->max_ec, ubi->mean_ec);
+- ubi_msg("image sequence number: %d", ubi->image_seq);
++ ubi_msg("image sequence number: %d", ubi->image_seq);
+
+- if (!DBG_DISABLE_BGT)
+- ubi->thread_enabled = 1;
++ /*
++ * The below lock makes sure we do not race with 'ubi_thread()' which
++ * checks @ubi->thread_enabled. Otherwise we may fail to wake it up.
++ */
++ spin_lock(&ubi->wl_lock);
++ ubi->thread_enabled = 1;
+ wake_up_process(ubi->bgt_thread);
++ spin_unlock(&ubi->wl_lock);
+
+ ubi_devices[ubi_num] = ubi;
++ ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL);
+ return ubi_num;
+
+ out_uif:
+ uif_close(ubi);
+-out_nofree:
+- do_free = 0;
+ out_detach:
+ ubi_wl_close(ubi);
+- if (do_free)
+- free_user_volumes(ubi);
+ free_internal_volumes(ubi);
+ vfree(ubi->vtbl);
+ out_free:
+ vfree(ubi->peb_buf1);
+ vfree(ubi->peb_buf2);
+-#ifdef CONFIG_MTD_UBI_DEBUG
+- vfree(ubi->dbg_peb_buf);
+-#endif
+- kfree(ubi);
++ if (ref)
++ put_device(&ubi->dev);
++ else
++ kfree(ubi);
+ return err;
+ }
+
+@@ -928,13 +1041,13 @@ int ubi_detach_mtd_dev(int ubi_num, int
+ if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES)
+ return -EINVAL;
+
+- spin_lock(&ubi_devices_lock);
+- ubi = ubi_devices[ubi_num];
+- if (!ubi) {
+- spin_unlock(&ubi_devices_lock);
++ ubi = ubi_get_device(ubi_num);
++ if (!ubi)
+ return -EINVAL;
+- }
+
++ spin_lock(&ubi_devices_lock);
++ put_device(&ubi->dev);
++ ubi->ref_count -= 1;
+ if (ubi->ref_count) {
+ if (!anyway) {
+ spin_unlock(&ubi_devices_lock);
+@@ -948,6 +1061,7 @@ int ubi_detach_mtd_dev(int ubi_num, int
+ spin_unlock(&ubi_devices_lock);
+
+ ubi_assert(ubi_num == ubi->ubi_num);
++ ubi_notify_all(ubi, UBI_VOLUME_REMOVED, NULL);
+ dbg_msg("detaching mtd%d from ubi%d", ubi->mtd->index, ubi_num);
+
+ /*
+@@ -957,6 +1071,12 @@ int ubi_detach_mtd_dev(int ubi_num, int
+ if (ubi->bgt_thread)
+ kthread_stop(ubi->bgt_thread);
+
++ /*
++ * Get a reference to the device in order to prevent 'dev_release()'
++ * from freeing the @ubi object.
++ */
++ get_device(&ubi->dev);
++
+ uif_close(ubi);
+ ubi_wl_close(ubi);
+ free_internal_volumes(ubi);
+@@ -964,22 +1084,56 @@ int ubi_detach_mtd_dev(int ubi_num, int
+ put_mtd_device(ubi->mtd);
+ vfree(ubi->peb_buf1);
+ vfree(ubi->peb_buf2);
+-#ifdef CONFIG_MTD_UBI_DEBUG
+- vfree(ubi->dbg_peb_buf);
+-#endif
+ ubi_msg("mtd%d is detached from ubi%d", ubi->mtd->index, ubi->ubi_num);
+- kfree(ubi);
++ put_device(&ubi->dev);
+ return 0;
+ }
+
+ /**
+- * find_mtd_device - open an MTD device by its name or number.
+- * @mtd_dev: name or number of the device
++ * open_mtd_by_chdev - open an MTD device by its character device node path.
++ * @mtd_dev: MTD character device node path
++ *
++ * This helper function opens an MTD device by its character node device path.
++ * Returns MTD device description object in case of success and a negative
++ * error code in case of failure.
++ */
++static struct mtd_info * __init open_mtd_by_chdev(const char *mtd_dev)
++{
++ int err, major, minor, mode;
++ struct path path;
++
++ /* Probably this is an MTD character device node path */
++ err = kern_path(mtd_dev, LOOKUP_FOLLOW, &path);
++ if (err)
++ return ERR_PTR(err);
++
++ /* MTD device number is defined by the major / minor numbers */
++ major = imajor(path.dentry->d_inode);
++ minor = iminor(path.dentry->d_inode);
++ mode = path.dentry->d_inode->i_mode;
++ path_put(&path);
++ if (major != MTD_CHAR_MAJOR || !S_ISCHR(mode))
++ return ERR_PTR(-EINVAL);
++
++ if (minor & 1)
++ /*
++ * Just do not think the "/dev/mtdrX" devices support is need,
++ * so do not support them to avoid doing extra work.
++ */
++ return ERR_PTR(-EINVAL);
++
++ return get_mtd_device(NULL, minor / 2);
++}
++
++/**
++ * open_mtd_device - open MTD device by name, character device path, or number.
++ * @mtd_dev: name, character device node path, or MTD device device number
+ *
+ * This function tries to open and MTD device described by @mtd_dev string,
+- * which is first treated as an ASCII number, and if it is not true, it is
+- * treated as MTD device name. Returns MTD device description object in case of
+- * success and a negative error code in case of failure.
++ * which is first treated as ASCII MTD device number, and if it is not true, it
++ * is treated as MTD device name, and if that is also not true, it is treated
++ * as MTD character device node path. Returns MTD device description object in
++ * case of success and a negative error code in case of failure.
+ */
+ static struct mtd_info * __init open_mtd_device(const char *mtd_dev)
+ {
+@@ -994,6 +1148,9 @@ static struct mtd_info * __init open_mtd
+ * MTD device name.
+ */
+ mtd = get_mtd_device_nm(mtd_dev);
++ if (IS_ERR(mtd) && PTR_ERR(mtd) == -ENODEV)
++ /* Probably this is an MTD character device node path */
++ mtd = open_mtd_by_chdev(mtd_dev);
+ } else
+ mtd = get_mtd_device(NULL, mtd_num);
+
+@@ -1057,9 +1214,24 @@ static int __init ubi_init(void)
+ p->vid_hdr_offs);
+ mutex_unlock(&ubi_devices_mutex);
+ if (err < 0) {
+- put_mtd_device(mtd);
+ ubi_err("cannot attach mtd%d", mtd->index);
+- goto out_detach;
++ put_mtd_device(mtd);
++
++ /*
++ * Originally UBI stopped initializing on any error.
++ * However, later on it was found out that this
++ * behavior is not very good when UBI is compiled into
++ * the kernel and the MTD devices to attach are passed
++ * through the command line. Indeed, UBI failure
++ * stopped whole boot sequence.
++ *
++ * To fix this, we changed the behavior for the
++ * non-module case, but preserved the old behavior for
++ * the module case, just for compatibility. This is a
++ * little inconsistent, though.
++ */
++ if (ubi_is_module())
++ goto out_detach;
+ }
+ }
+
+@@ -1209,13 +1381,15 @@ static int __init ubi_mtd_param_parse(co
+
+ module_param_call(mtd, ubi_mtd_param_parse, NULL, NULL, 000);
+ MODULE_PARM_DESC(mtd, "MTD devices to attach. Parameter format: "
+- "mtd=<name|num>[,<vid_hdr_offs>].\n"
++ "mtd=<name|num|path>[,<vid_hdr_offs>].\n"
+ "Multiple \"mtd\" parameters may be specified.\n"
+- "MTD devices may be specified by their number or name.\n"
++ "MTD devices may be specified by their number, name, or "
++ "path to the MTD character device node.\n"
+ "Optional \"vid_hdr_offs\" parameter specifies UBI VID "
+- "header position and data starting position to be used "
+- "by UBI.\n"
+- "Example: mtd=content,1984 mtd=4 - attach MTD device"
++ "header position to be used by UBI.\n"
++ "Example 1: mtd=/dev/mtd0 - attach MTD device "
++ "/dev/mtd0.\n"
++ "Example 2: mtd=content,1984 mtd=4 - attach MTD device "
+ "with name \"content\" using VID header offset 1984, and "
+ "MTD device number 4 with default VID header offset.");
+
+diff -uprN linux-2.6.28/drivers/mtd/ubi/cdev.c ubifs-v2.6.28/drivers/mtd/ubi/cdev.c
+--- linux-2.6.28/drivers/mtd/ubi/cdev.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/drivers/mtd/ubi/cdev.c 2011-06-15 14:22:07.000000000 -0400
+@@ -40,9 +40,9 @@
+ #include <linux/ioctl.h>
+ #include <linux/capability.h>
+ #include <linux/uaccess.h>
+-#include <linux/smp_lock.h>
++#include <linux/compat.h>
++#include <linux/math64.h>
+ #include <mtd/ubi-user.h>
+-#include <asm/div64.h>
+ #include "ubi.h"
+
+ /**
+@@ -113,7 +113,8 @@ static int vol_cdev_open(struct inode *i
+ else
+ mode = UBI_READONLY;
+
+- dbg_gen("open volume %d, mode %d", vol_id, mode);
++ dbg_gen("open device %d, volume %d, mode %d",
++ ubi_num, vol_id, mode);
+
+ desc = ubi_open_volume(ubi_num, vol_id, mode);
+ if (IS_ERR(desc))
+@@ -128,7 +129,8 @@ static int vol_cdev_release(struct inode
+ struct ubi_volume_desc *desc = file->private_data;
+ struct ubi_volume *vol = desc->vol;
+
+- dbg_gen("release volume %d, mode %d", vol->vol_id, desc->mode);
++ dbg_gen("release device %d, volume %d, mode %d",
++ vol->ubi->ubi_num, vol->vol_id, desc->mode);
+
+ if (vol->updating) {
+ ubi_warn("update of volume %d not finished, volume is damaged",
+@@ -155,7 +157,7 @@ static loff_t vol_cdev_llseek(struct fil
+ loff_t new_offset;
+
+ if (vol->updating) {
+- /* Update is in progress, seeking is prohibited */
++ /* Update is in progress, seeking is prohibited */
+ dbg_err("updating");
+ return -EBUSY;
+ }
+@@ -186,6 +188,16 @@ static loff_t vol_cdev_llseek(struct fil
+ return new_offset;
+ }
+
++static int vol_cdev_fsync(struct file *file, struct dentry *dentry,
++ int datasync)
++{
++ struct ubi_volume_desc *desc = file->private_data;
++ struct ubi_device *ubi = desc->vol->ubi;
++
++ return ubi_sync(ubi->ubi_num);
++}
++
++
+ static ssize_t vol_cdev_read(struct file *file, __user char *buf, size_t count,
+ loff_t *offp)
+ {
+@@ -195,7 +207,6 @@ static ssize_t vol_cdev_read(struct file
+ int err, lnum, off, len, tbuf_size;
+ size_t count_save = count;
+ void *tbuf;
+- uint64_t tmp;
+
+ dbg_gen("read %zd bytes from offset %lld of volume %d",
+ count, *offp, vol->vol_id);
+@@ -225,10 +236,7 @@ static ssize_t vol_cdev_read(struct file
+ return -ENOMEM;
+
+ len = count > tbuf_size ? tbuf_size : count;
+-
+- tmp = *offp;
+- off = do_div(tmp, vol->usable_leb_size);
+- lnum = tmp;
++ lnum = div_u64_rem(*offp, vol->usable_leb_size, &off);
+
+ do {
+ cond_resched();
+@@ -263,12 +271,9 @@ static ssize_t vol_cdev_read(struct file
+ return err ? err : count_save - count;
+ }
+
+-#ifdef CONFIG_MTD_UBI_DEBUG_USERSPACE_IO
+-
+ /*
+ * This function allows to directly write to dynamic UBI volumes, without
+- * issuing the volume update operation. Available only as a debugging feature.
+- * Very useful for testing UBI.
++ * issuing the volume update operation.
+ */
+ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offp)
+@@ -279,7 +284,9 @@ static ssize_t vol_cdev_direct_write(str
+ int lnum, off, len, tbuf_size, err = 0;
+ size_t count_save = count;
+ char *tbuf;
+- uint64_t tmp;
++
++ if (!vol->direct_writes)
++ return -EPERM;
+
+ dbg_gen("requested: write %zd bytes to offset %lld of volume %u",
+ count, *offp, vol->vol_id);
+@@ -287,10 +294,7 @@ static ssize_t vol_cdev_direct_write(str
+ if (vol->vol_type == UBI_STATIC_VOLUME)
+ return -EROFS;
+
+- tmp = *offp;
+- off = do_div(tmp, vol->usable_leb_size);
+- lnum = tmp;
+-
++ lnum = div_u64_rem(*offp, vol->usable_leb_size, &off);
+ if (off & (ubi->min_io_size - 1)) {
+ dbg_err("unaligned position");
+ return -EINVAL;
+@@ -347,10 +351,6 @@ static ssize_t vol_cdev_direct_write(str
+ return err ? err : count_save - count;
+ }
+
+-#else
+-#define vol_cdev_direct_write(file, buf, count, offp) (-EPERM)
+-#endif /* CONFIG_MTD_UBI_DEBUG_USERSPACE_IO */
+-
+ static ssize_t vol_cdev_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offp)
+ {
+@@ -395,15 +395,15 @@ static ssize_t vol_cdev_write(struct fil
+ vol->corrupted = 1;
+ }
+ vol->checked = 1;
+- ubi_gluebi_updated(vol);
++ ubi_volume_notify(ubi, vol, UBI_VOLUME_UPDATED);
+ revoke_exclusive(desc, UBI_READWRITE);
+ }
+
+ return count;
+ }
+
+-static int vol_cdev_ioctl(struct inode *inode, struct file *file,
+- unsigned int cmd, unsigned long arg)
++static long vol_cdev_ioctl(struct file *file, unsigned int cmd,
++ unsigned long arg)
+ {
+ int err = 0;
+ struct ubi_volume_desc *desc = file->private_data;
+@@ -487,7 +487,6 @@ static int vol_cdev_ioctl(struct inode *
+ break;
+ }
+
+-#ifdef CONFIG_MTD_UBI_DEBUG_USERSPACE_IO
+ /* Logical eraseblock erasure command */
+ case UBI_IOCEBER:
+ {
+@@ -518,13 +517,77 @@ static int vol_cdev_ioctl(struct inode *
+ err = ubi_wl_flush(ubi);
+ break;
+ }
+-#endif
++
++ /* Logical eraseblock map command */
++ case UBI_IOCEBMAP:
++ {
++ struct ubi_map_req req;
++
++ err = copy_from_user(&req, argp, sizeof(struct ubi_map_req));
++ if (err) {
++ err = -EFAULT;
++ break;
++ }
++ err = ubi_leb_map(desc, req.lnum, req.dtype);
++ break;
++ }
++
++ /* Logical eraseblock un-map command */
++ case UBI_IOCEBUNMAP:
++ {
++ int32_t lnum;
++
++ err = get_user(lnum, (__user int32_t *)argp);
++ if (err) {
++ err = -EFAULT;
++ break;
++ }
++ err = ubi_leb_unmap(desc, lnum);
++ break;
++ }
++
++ /* Check if logical eraseblock is mapped command */
++ case UBI_IOCEBISMAP:
++ {
++ int32_t lnum;
++
++ err = get_user(lnum, (__user int32_t *)argp);
++ if (err) {
++ err = -EFAULT;
++ break;
++ }
++ err = ubi_is_mapped(desc, lnum);
++ break;
++ }
++
++ /* Set volume property command */
++ case UBI_IOCSETVOLPROP:
++ {
++ struct ubi_set_vol_prop_req req;
++
++ err = copy_from_user(&req, argp,
++ sizeof(struct ubi_set_vol_prop_req));
++ if (err) {
++ err = -EFAULT;
++ break;
++ }
++ switch (req.property) {
++ case UBI_VOL_PROP_DIRECT_WRITE:
++ mutex_lock(&ubi->device_mutex);
++ desc->vol->direct_writes = !!req.value;
++ mutex_unlock(&ubi->device_mutex);
++ break;
++ default:
++ err = -EINVAL;
++ break;
++ }
++ break;
++ }
+
+ default:
+ err = -ENOTTY;
+ break;
+ }
+-
+ return err;
+ }
+
+@@ -735,23 +798,23 @@ static int rename_volumes(struct ubi_dev
+ goto out_free;
+ }
+
+- re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL);
+- if (!re) {
++ re1 = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL);
++ if (!re1) {
+ err = -ENOMEM;
+ ubi_close_volume(desc);
+ goto out_free;
+ }
+
+- re->remove = 1;
+- re->desc = desc;
+- list_add(&re->list, &rename_list);
++ re1->remove = 1;
++ re1->desc = desc;
++ list_add(&re1->list, &rename_list);
+ dbg_msg("will remove volume %d, name \"%s\"",
+- re->desc->vol->vol_id, re->desc->vol->name);
++ re1->desc->vol->vol_id, re1->desc->vol->name);
+ }
+
+- mutex_lock(&ubi->volumes_mutex);
++ mutex_lock(&ubi->device_mutex);
+ err = ubi_rename_volumes(ubi, &rename_list);
+- mutex_unlock(&ubi->volumes_mutex);
++ mutex_unlock(&ubi->device_mutex);
+
+ out_free:
+ list_for_each_entry_safe(re, re1, &rename_list, list) {
+@@ -762,8 +825,8 @@ out_free:
+ return err;
+ }
+
+-static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
+- unsigned int cmd, unsigned long arg)
++static long ubi_cdev_ioctl(struct file *file, unsigned int cmd,
++ unsigned long arg)
+ {
+ int err = 0;
+ struct ubi_device *ubi;
+@@ -773,7 +836,7 @@ static int ubi_cdev_ioctl(struct inode *
+ if (!capable(CAP_SYS_RESOURCE))
+ return -EPERM;
+
+- ubi = ubi_get_by_major(imajor(inode));
++ ubi = ubi_get_by_major(imajor(file->f_mapping->host));
+ if (!ubi)
+ return -ENODEV;
+
+@@ -794,9 +857,9 @@ static int ubi_cdev_ioctl(struct inode *
+ if (err)
+ break;
+
+- mutex_lock(&ubi->volumes_mutex);
++ mutex_lock(&ubi->device_mutex);
+ err = ubi_create_volume(ubi, &req);
+- mutex_unlock(&ubi->volumes_mutex);
++ mutex_unlock(&ubi->device_mutex);
+ if (err)
+ break;
+
+@@ -825,9 +887,9 @@ static int ubi_cdev_ioctl(struct inode *
+ break;
+ }
+
+- mutex_lock(&ubi->volumes_mutex);
++ mutex_lock(&ubi->device_mutex);
+ err = ubi_remove_volume(desc, 0);
+- mutex_unlock(&ubi->volumes_mutex);
++ mutex_unlock(&ubi->device_mutex);
+
+ /*
+ * The volume is deleted (unless an error occurred), and the
+@@ -842,7 +904,6 @@ static int ubi_cdev_ioctl(struct inode *
+ case UBI_IOCRSVOL:
+ {
+ int pebs;
+- uint64_t tmp;
+ struct ubi_rsvol_req req;
+
+ dbg_gen("re-size volume");
+@@ -862,13 +923,12 @@ static int ubi_cdev_ioctl(struct inode *
+ break;
+ }
+
+- tmp = req.bytes;
+- pebs = !!do_div(tmp, desc->vol->usable_leb_size);
+- pebs += tmp;
++ pebs = div_u64(req.bytes + desc->vol->usable_leb_size - 1,
++ desc->vol->usable_leb_size);
+
+- mutex_lock(&ubi->volumes_mutex);
++ mutex_lock(&ubi->device_mutex);
+ err = ubi_resize_volume(desc, pebs);
+- mutex_unlock(&ubi->volumes_mutex);
++ mutex_unlock(&ubi->device_mutex);
+ ubi_close_volume(desc);
+ break;
+ }
+@@ -892,9 +952,7 @@ static int ubi_cdev_ioctl(struct inode *
+ break;
+ }
+
+- mutex_lock(&ubi->mult_mutex);
+ err = rename_volumes(ubi, req);
+- mutex_unlock(&ubi->mult_mutex);
+ kfree(req);
+ break;
+ }
+@@ -908,8 +966,8 @@ static int ubi_cdev_ioctl(struct inode *
+ return err;
+ }
+
+-static int ctrl_cdev_ioctl(struct inode *inode, struct file *file,
+- unsigned int cmd, unsigned long arg)
++static long ctrl_cdev_ioctl(struct file *file, unsigned int cmd,
++ unsigned long arg)
+ {
+ int err = 0;
+ void __user *argp = (void __user *)arg;
+@@ -985,26 +1043,61 @@ static int ctrl_cdev_ioctl(struct inode
+ return err;
+ }
+
+-/* UBI control character device operations */
+-struct file_operations ubi_ctrl_cdev_operations = {
+- .ioctl = ctrl_cdev_ioctl,
+- .owner = THIS_MODULE,
++#ifdef CONFIG_COMPAT
++static long vol_cdev_compat_ioctl(struct file *file, unsigned int cmd,
++ unsigned long arg)
++{
++ unsigned long translated_arg = (unsigned long)compat_ptr(arg);
++
++ return vol_cdev_ioctl(file, cmd, translated_arg);
++}
++
++static long ubi_cdev_compat_ioctl(struct file *file, unsigned int cmd,
++ unsigned long arg)
++{
++ unsigned long translated_arg = (unsigned long)compat_ptr(arg);
++
++ return ubi_cdev_ioctl(file, cmd, translated_arg);
++}
++
++static long ctrl_cdev_compat_ioctl(struct file *file, unsigned int cmd,
++ unsigned long arg)
++{
++ unsigned long translated_arg = (unsigned long)compat_ptr(arg);
++
++ return ctrl_cdev_ioctl(file, cmd, translated_arg);
++}
++#else
++#define vol_cdev_compat_ioctl NULL
++#define ubi_cdev_compat_ioctl NULL
++#define ctrl_cdev_compat_ioctl NULL
++#endif
++
++/* UBI volume character device operations */
++const struct file_operations ubi_vol_cdev_operations = {
++ .owner = THIS_MODULE,
++ .open = vol_cdev_open,
++ .release = vol_cdev_release,
++ .llseek = vol_cdev_llseek,
++ .read = vol_cdev_read,
++ .write = vol_cdev_write,
++ .fsync = vol_cdev_fsync,
++ .unlocked_ioctl = vol_cdev_ioctl,
++ .compat_ioctl = vol_cdev_compat_ioctl,
+ };
+
+ /* UBI character device operations */
+-struct file_operations ubi_cdev_operations = {
+- .owner = THIS_MODULE,
+- .ioctl = ubi_cdev_ioctl,
+- .llseek = no_llseek,
++const struct file_operations ubi_cdev_operations = {
++ .owner = THIS_MODULE,
++ .llseek = no_llseek,
++ .unlocked_ioctl = ubi_cdev_ioctl,
++ .compat_ioctl = ubi_cdev_compat_ioctl,
+ };
+
+-/* UBI volume character device operations */
+-struct file_operations ubi_vol_cdev_operations = {
+- .owner = THIS_MODULE,
+- .open = vol_cdev_open,
+- .release = vol_cdev_release,
+- .llseek = vol_cdev_llseek,
+- .read = vol_cdev_read,
+- .write = vol_cdev_write,
+- .ioctl = vol_cdev_ioctl,
++/* UBI control character device operations */
++const struct file_operations ubi_ctrl_cdev_operations = {
++ .owner = THIS_MODULE,
++ .unlocked_ioctl = ctrl_cdev_ioctl,
++ .compat_ioctl = ctrl_cdev_compat_ioctl,
++ .llseek = no_llseek,
+ };
+diff -uprN linux-2.6.28/drivers/mtd/ubi/debug.c ubifs-v2.6.28/drivers/mtd/ubi/debug.c
+--- linux-2.6.28/drivers/mtd/ubi/debug.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/drivers/mtd/ubi/debug.c 2011-06-15 14:22:07.000000000 -0400
+@@ -27,6 +27,17 @@
+ #ifdef CONFIG_MTD_UBI_DEBUG
+
+ #include "ubi.h"
++#include <linux/module.h>
++#include <linux/moduleparam.h>
++
++unsigned int ubi_chk_flags;
++unsigned int ubi_tst_flags;
++
++module_param_named(debug_chks, ubi_chk_flags, uint, S_IRUGO | S_IWUSR);
++module_param_named(debug_tsts, ubi_chk_flags, uint, S_IRUGO | S_IWUSR);
++
++MODULE_PARM_DESC(debug_chks, "Debug check flags");
++MODULE_PARM_DESC(debug_tsts, "Debug special test flags");
+
+ /**
+ * ubi_dbg_dump_ec_hdr - dump an erase counter header.
+@@ -61,15 +72,15 @@ void ubi_dbg_dump_vid_hdr(const struct u
+ {
+ printk(KERN_DEBUG "Volume identifier header dump:\n");
+ printk(KERN_DEBUG "\tmagic %08x\n", be32_to_cpu(vid_hdr->magic));
+- printk(KERN_DEBUG "\tversion %d\n", (int)vid_hdr->version);
+- printk(KERN_DEBUG "\tvol_type %d\n", (int)vid_hdr->vol_type);
+- printk(KERN_DEBUG "\tcopy_flag %d\n", (int)vid_hdr->copy_flag);
+- printk(KERN_DEBUG "\tcompat %d\n", (int)vid_hdr->compat);
+- printk(KERN_DEBUG "\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id));
+- printk(KERN_DEBUG "\tlnum %d\n", be32_to_cpu(vid_hdr->lnum));
+- printk(KERN_DEBUG "\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size));
+- printk(KERN_DEBUG "\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs));
+- printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad));
++ printk(KERN_DEBUG "\tversion %d\n", (int)vid_hdr->version);
++ printk(KERN_DEBUG "\tvol_type %d\n", (int)vid_hdr->vol_type);
++ printk(KERN_DEBUG "\tcopy_flag %d\n", (int)vid_hdr->copy_flag);
++ printk(KERN_DEBUG "\tcompat %d\n", (int)vid_hdr->compat);
++ printk(KERN_DEBUG "\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id));
++ printk(KERN_DEBUG "\tlnum %d\n", be32_to_cpu(vid_hdr->lnum));
++ printk(KERN_DEBUG "\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size));
++ printk(KERN_DEBUG "\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs));
++ printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad));
+ printk(KERN_DEBUG "\tsqnum %llu\n",
+ (unsigned long long)be64_to_cpu(vid_hdr->sqnum));
+ printk(KERN_DEBUG "\thdr_crc %08x\n", be32_to_cpu(vid_hdr->hdr_crc));
+@@ -196,4 +207,36 @@ void ubi_dbg_dump_mkvol_req(const struct
+ printk(KERN_DEBUG "\t1st 16 characters of name: %s\n", nm);
+ }
+
++/**
++ * ubi_dbg_dump_flash - dump a region of flash.
++ * @ubi: UBI device description object
++ * @pnum: the physical eraseblock number to dump
++ * @offset: the starting offset within the physical eraseblock to dump
++ * @len: the length of the region to dump
++ */
++void ubi_dbg_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len)
++{
++ int err;
++ size_t read;
++ void *buf;
++ loff_t addr = (loff_t)pnum * ubi->peb_size + offset;
++
++ buf = vmalloc(len);
++ if (!buf)
++ return;
++ err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf);
++ if (err && err != -EUCLEAN) {
++ ubi_err("error %d while reading %d bytes from PEB %d:%d, "
++ "read %zd bytes", err, len, pnum, offset, read);
++ goto out;
++ }
++
++ dbg_msg("dumping %d bytes of data from PEB %d, offset %d",
++ len, pnum, offset);
++ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, buf, len, 1);
++out:
++ vfree(buf);
++ return;
++}
++
+ #endif /* CONFIG_MTD_UBI_DEBUG */
+diff -uprN linux-2.6.28/drivers/mtd/ubi/debug.h ubifs-v2.6.28/drivers/mtd/ubi/debug.h
+--- linux-2.6.28/drivers/mtd/ubi/debug.h 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/drivers/mtd/ubi/debug.h 2011-06-15 14:22:07.000000000 -0400
+@@ -21,11 +21,17 @@
+ #ifndef __UBI_DEBUG_H__
+ #define __UBI_DEBUG_H__
+
++struct ubi_ec_hdr;
++struct ubi_vid_hdr;
++struct ubi_volume;
++struct ubi_vtbl_record;
++struct ubi_scan_volume;
++struct ubi_scan_leb;
++struct ubi_mkvol_req;
++
+ #ifdef CONFIG_MTD_UBI_DEBUG
+ #include <linux/random.h>
+
+-#define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__)
+-
+ #define ubi_assert(expr) do { \
+ if (unlikely(!(expr))) { \
+ printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \
+@@ -34,19 +40,28 @@
+ } \
+ } while (0)
+
+-#define dbg_msg(fmt, ...) \
+- printk(KERN_DEBUG "UBI DBG (pid %d): %s: " fmt "\n", \
+- current->pid, __func__, ##__VA_ARGS__)
++#define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__)
+
+ #define ubi_dbg_dump_stack() dump_stack()
+
+-struct ubi_ec_hdr;
+-struct ubi_vid_hdr;
+-struct ubi_volume;
+-struct ubi_vtbl_record;
+-struct ubi_scan_volume;
+-struct ubi_scan_leb;
+-struct ubi_mkvol_req;
++#define ubi_dbg_print_hex_dump(l, ps, pt, r, g, b, len, a) \
++ print_hex_dump(l, ps, pt, r, g, b, len, a)
++
++#define ubi_dbg_msg(type, fmt, ...) \
++ pr_debug("UBI DBG " type ": " fmt "\n", ##__VA_ARGS__)
++
++/* Just a debugging messages not related to any specific UBI subsystem */
++#define dbg_msg(fmt, ...) ubi_dbg_msg("msg", fmt, ##__VA_ARGS__)
++/* General debugging messages */
++#define dbg_gen(fmt, ...) ubi_dbg_msg("gen", fmt, ##__VA_ARGS__)
++/* Messages from the eraseblock association sub-system */
++#define dbg_eba(fmt, ...) ubi_dbg_msg("eba", fmt, ##__VA_ARGS__)
++/* Messages from the wear-leveling sub-system */
++#define dbg_wl(fmt, ...) ubi_dbg_msg("wl", fmt, ##__VA_ARGS__)
++/* Messages from the input/output sub-system */
++#define dbg_io(fmt, ...) ubi_dbg_msg("io", fmt, ##__VA_ARGS__)
++/* Initialization and build messages */
++#define dbg_bld(fmt, ...) ubi_dbg_msg("bld", fmt, ##__VA_ARGS__)
+
+ void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr);
+ void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr);
+@@ -55,51 +70,53 @@ void ubi_dbg_dump_vtbl_record(const stru
+ void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv);
+ void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type);
+ void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req);
++void ubi_dbg_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len);
+
+-#ifdef CONFIG_MTD_UBI_DEBUG_MSG
+-/* General debugging messages */
+-#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+-#else
+-#define dbg_gen(fmt, ...) ({})
+-#endif
+-
+-#ifdef CONFIG_MTD_UBI_DEBUG_MSG_EBA
+-/* Messages from the eraseblock association sub-system */
+-#define dbg_eba(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+-#else
+-#define dbg_eba(fmt, ...) ({})
+-#endif
++extern unsigned int ubi_chk_flags;
+
+-#ifdef CONFIG_MTD_UBI_DEBUG_MSG_WL
+-/* Messages from the wear-leveling sub-system */
+-#define dbg_wl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+-#else
+-#define dbg_wl(fmt, ...) ({})
+-#endif
++/*
++ * Debugging check flags.
++ *
++ * UBI_CHK_GEN: general checks
++ * UBI_CHK_IO: check writes and erases
++ */
++enum {
++ UBI_CHK_GEN = 0x1,
++ UBI_CHK_IO = 0x2,
++};
++
++int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len);
++int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum,
++ int offset, int len);
+
+-#ifdef CONFIG_MTD_UBI_DEBUG_MSG_IO
+-/* Messages from the input/output sub-system */
+-#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+-#else
+-#define dbg_io(fmt, ...) ({})
+-#endif
++extern unsigned int ubi_tst_flags;
+
+-#ifdef CONFIG_MTD_UBI_DEBUG_MSG_BLD
+-/* Initialization and build messages */
+-#define dbg_bld(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+-#define UBI_IO_DEBUG 1
+-#else
+-#define dbg_bld(fmt, ...) ({})
+-#define UBI_IO_DEBUG 0
+-#endif
++/*
++ * Special testing flags.
++ *
++ * UBIFS_TST_DISABLE_BGT: disable the background thread
++ * UBI_TST_EMULATE_BITFLIPS: emulate bit-flips
++ * UBI_TST_EMULATE_WRITE_FAILURES: emulate write failures
++ * UBI_TST_EMULATE_ERASE_FAILURES: emulate erase failures
++ */
++enum {
++ UBI_TST_DISABLE_BGT = 0x1,
++ UBI_TST_EMULATE_BITFLIPS = 0x2,
++ UBI_TST_EMULATE_WRITE_FAILURES = 0x4,
++ UBI_TST_EMULATE_ERASE_FAILURES = 0x8,
++};
+
+-#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT
+-#define DBG_DISABLE_BGT 1
+-#else
+-#define DBG_DISABLE_BGT 0
+-#endif
++/**
++ * ubi_dbg_is_bgt_disabled - if the background thread is disabled.
++ *
++ * Returns non-zero if the UBI background thread is disabled for testing
++ * purposes.
++ */
++static inline int ubi_dbg_is_bgt_disabled(void)
++{
++ return ubi_tst_flags & UBI_TST_DISABLE_BGT;
++}
+
+-#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_BITFLIPS
+ /**
+ * ubi_dbg_is_bitflip - if it is time to emulate a bit-flip.
+ *
+@@ -107,13 +124,11 @@ void ubi_dbg_dump_mkvol_req(const struct
+ */
+ static inline int ubi_dbg_is_bitflip(void)
+ {
+- return !(random32() % 200);
++ if (ubi_tst_flags & UBI_TST_EMULATE_BITFLIPS)
++ return !(random32() % 200);
++ return 0;
+ }
+-#else
+-#define ubi_dbg_is_bitflip() 0
+-#endif
+
+-#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_WRITE_FAILURES
+ /**
+ * ubi_dbg_is_write_failure - if it is time to emulate a write failure.
+ *
+@@ -122,13 +137,11 @@ static inline int ubi_dbg_is_bitflip(voi
+ */
+ static inline int ubi_dbg_is_write_failure(void)
+ {
+- return !(random32() % 500);
++ if (ubi_tst_flags & UBI_TST_EMULATE_WRITE_FAILURES)
++ return !(random32() % 500);
++ return 0;
+ }
+-#else
+-#define ubi_dbg_is_write_failure() 0
+-#endif
+
+-#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_ERASE_FAILURES
+ /**
+ * ubi_dbg_is_erase_failure - if its time to emulate an erase failure.
+ *
+@@ -137,36 +150,68 @@ static inline int ubi_dbg_is_write_failu
+ */
+ static inline int ubi_dbg_is_erase_failure(void)
+ {
++ if (ubi_tst_flags & UBI_TST_EMULATE_ERASE_FAILURES)
+ return !(random32() % 400);
++ return 0;
+ }
+-#else
+-#define ubi_dbg_is_erase_failure() 0
+-#endif
+
+ #else
+
+-#define ubi_assert(expr) ({})
+-#define dbg_err(fmt, ...) ({})
+-#define dbg_msg(fmt, ...) ({})
+-#define dbg_gen(fmt, ...) ({})
+-#define dbg_eba(fmt, ...) ({})
+-#define dbg_wl(fmt, ...) ({})
+-#define dbg_io(fmt, ...) ({})
+-#define dbg_bld(fmt, ...) ({})
+-#define ubi_dbg_dump_stack() ({})
+-#define ubi_dbg_dump_ec_hdr(ec_hdr) ({})
+-#define ubi_dbg_dump_vid_hdr(vid_hdr) ({})
+-#define ubi_dbg_dump_vol_info(vol) ({})
+-#define ubi_dbg_dump_vtbl_record(r, idx) ({})
+-#define ubi_dbg_dump_sv(sv) ({})
+-#define ubi_dbg_dump_seb(seb, type) ({})
+-#define ubi_dbg_dump_mkvol_req(req) ({})
+-
+-#define UBI_IO_DEBUG 0
+-#define DBG_DISABLE_BGT 0
+-#define ubi_dbg_is_bitflip() 0
+-#define ubi_dbg_is_write_failure() 0
+-#define ubi_dbg_is_erase_failure() 0
++/* Use "if (0)" to make compiler check arguments even if debugging is off */
++#define ubi_assert(expr) do { \
++ if (0) { \
++ printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \
++ __func__, __LINE__, current->pid); \
++ } \
++} while (0)
++
++#define dbg_err(fmt, ...) do { \
++ if (0) \
++ ubi_err(fmt, ##__VA_ARGS__); \
++} while (0)
++
++#define ubi_dbg_msg(fmt, ...) do { \
++ if (0) \
++ pr_debug(fmt "\n", ##__VA_ARGS__); \
++} while (0)
++
++#define dbg_msg(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__)
++#define dbg_gen(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__)
++#define dbg_eba(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__)
++#define dbg_wl(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__)
++#define dbg_io(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__)
++#define dbg_bld(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__)
++
++static inline void ubi_dbg_dump_stack(void) { return; }
++static inline void
++ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr) { return; }
++static inline void
++ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr) { return; }
++static inline void
++ubi_dbg_dump_vol_info(const struct ubi_volume *vol) { return; }
++static inline void
++ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx) { return; }
++static inline void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv) { return; }
++static inline void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb,
++ int type) { return; }
++static inline void
++ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req) { return; }
++static inline void ubi_dbg_dump_flash(struct ubi_device *ubi,
++ int pnum, int offset, int len) { return; }
++static inline void
++ubi_dbg_print_hex_dump(const char *l, const char *ps, int pt, int r,
++ int g, const void *b, size_t len, bool a) { return; }
++
++static inline int ubi_dbg_is_bgt_disabled(void) { return 0; }
++static inline int ubi_dbg_is_bitflip(void) { return 0; }
++static inline int ubi_dbg_is_write_failure(void) { return 0; }
++static inline int ubi_dbg_is_erase_failure(void) { return 0; }
++static inline int ubi_dbg_check_all_ff(struct ubi_device *ubi,
++ int pnum, int offset,
++ int len) { return 0; }
++static inline int ubi_dbg_check_write(struct ubi_device *ubi,
++ const void *buf, int pnum,
++ int offset, int len) { return 0; }
+
+ #endif /* !CONFIG_MTD_UBI_DEBUG */
+ #endif /* !__UBI_DEBUG_H__ */
+diff -uprN linux-2.6.28/drivers/mtd/ubi/eba.c ubifs-v2.6.28/drivers/mtd/ubi/eba.c
+--- linux-2.6.28/drivers/mtd/ubi/eba.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/drivers/mtd/ubi/eba.c 2011-06-15 14:22:07.000000000 -0400
+@@ -418,7 +418,8 @@ retry:
+ * may try to recover data. FIXME: but this is
+ * not implemented.
+ */
+- if (err == UBI_IO_BAD_VID_HDR) {
++ if (err == UBI_IO_BAD_HDR_EBADMSG ||
++ err == UBI_IO_BAD_HDR) {
+ ubi_warn("corrupted VID header at PEB "
+ "%d, LEB %d:%d", pnum, vol_id,
+ lnum);
+@@ -718,7 +719,7 @@ write_error:
+ * to the real data size, although the @buf buffer has to contain the
+ * alignment. In all other cases, @len has to be aligned.
+ *
+- * It is prohibited to write more then once to logical eraseblocks of static
++ * It is prohibited to write more than once to logical eraseblocks of static
+ * volumes. This function returns zero in case of success and a negative error
+ * code in case of failure.
+ */
+@@ -961,8 +962,8 @@ write_error:
+ */
+ static int is_error_sane(int err)
+ {
+- if (err == -EIO || err == -ENOMEM || err == UBI_IO_BAD_VID_HDR ||
+- err == -ETIMEDOUT)
++ if (err == -EIO || err == -ENOMEM || err == UBI_IO_BAD_HDR ||
++ err == UBI_IO_BAD_HDR_EBADMSG || err == -ETIMEDOUT)
+ return 0;
+ return 1;
+ }
+@@ -1165,6 +1166,47 @@ out_unlock_leb:
+ }
+
+ /**
++ * print_rsvd_warning - warn about not having enough reserved PEBs.
++ * @ubi: UBI device description object
++ *
++ * This is a helper function for 'ubi_eba_init_scan()' which is called when UBI
++ * cannot reserve enough PEBs for bad block handling. This function makes a
++ * decision whether we have to print a warning or not. The algorithm is as
++ * follows:
++ * o if this is a new UBI image, then just print the warning
++ * o if this is an UBI image which has already been used for some time, print
++ * a warning only if we can reserve less than 10% of the expected amount of
++ * the reserved PEB.
++ *
++ * The idea is that when UBI is used, PEBs become bad, and the reserved pool
++ * of PEBs becomes smaller, which is normal and we do not want to scare users
++ * with a warning every time they attach the MTD device. This was an issue
++ * reported by real users.
++ */
++static void print_rsvd_warning(struct ubi_device *ubi,
++ struct ubi_scan_info *si)
++{
++ /*
++ * The 1 << 18 (256KiB) number is picked randomly, just a reasonably
++ * large number to distinguish between newly flashed and used images.
++ */
++ if (si->max_sqnum > (1 << 18)) {
++ int min = ubi->beb_rsvd_level / 10;
++
++ if (!min)
++ min = 1;
++ if (ubi->beb_rsvd_pebs > min)
++ return;
++ }
++
++ ubi_warn("cannot reserve enough PEBs for bad PEB handling, reserved %d,"
++ " need %d", ubi->beb_rsvd_pebs, ubi->beb_rsvd_level);
++ if (ubi->corr_peb_count)
++ ubi_warn("%d PEBs are corrupted and not used",
++ ubi->corr_peb_count);
++}
++
++/**
+ * ubi_eba_init_scan - initialize the EBA sub-system using scanning information.
+ * @ubi: UBI device description object
+ * @si: scanning information
+@@ -1224,6 +1266,9 @@ int ubi_eba_init_scan(struct ubi_device
+ if (ubi->avail_pebs < EBA_RESERVED_PEBS) {
+ ubi_err("no enough physical eraseblocks (%d, need %d)",
+ ubi->avail_pebs, EBA_RESERVED_PEBS);
++ if (ubi->corr_peb_count)
++ ubi_err("%d PEBs are corrupted and not used",
++ ubi->corr_peb_count);
+ err = -ENOSPC;
+ goto out_free;
+ }
+@@ -1236,9 +1281,7 @@ int ubi_eba_init_scan(struct ubi_device
+ if (ubi->avail_pebs < ubi->beb_rsvd_level) {
+ /* No enough free physical eraseblocks */
+ ubi->beb_rsvd_pebs = ubi->avail_pebs;
+- ubi_warn("cannot reserve enough PEBs for bad PEB "
+- "handling, reserved %d, need %d",
+- ubi->beb_rsvd_pebs, ubi->beb_rsvd_level);
++ print_rsvd_warning(ubi, si);
+ } else
+ ubi->beb_rsvd_pebs = ubi->beb_rsvd_level;
+
+@@ -1254,6 +1297,7 @@ out_free:
+ if (!ubi->volumes[i])
+ continue;
+ kfree(ubi->volumes[i]->eba_tbl);
++ ubi->volumes[i]->eba_tbl = NULL;
+ }
+ return err;
+ }
+diff -uprN linux-2.6.28/drivers/mtd/ubi/gluebi.c ubifs-v2.6.28/drivers/mtd/ubi/gluebi.c
+--- linux-2.6.28/drivers/mtd/ubi/gluebi.c 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/drivers/mtd/ubi/gluebi.c 2011-06-15 14:22:07.000000000 -0400
+@@ -19,17 +19,78 @@
+ */
+
+ /*
+- * This file includes implementation of fake MTD devices for each UBI volume.
+- * This sounds strange, but it is in fact quite useful to make MTD-oriented
+- * software (including all the legacy software) to work on top of UBI.
++ * This is a small driver which implements fake MTD devices on top of UBI
++ * volumes. This sounds strange, but it is in fact quite useful to make
++ * MTD-oriented software (including all the legacy software) work on top of
++ * UBI.
+ *
+ * Gluebi emulates MTD devices of "MTD_UBIVOLUME" type. Their minimal I/O unit
+- * size (mtd->writesize) is equivalent to the UBI minimal I/O unit. The
++ * size (@mtd->writesize) is equivalent to the UBI minimal I/O unit. The
+ * eraseblock size is equivalent to the logical eraseblock size of the volume.
+ */
+
++#include <linux/err.h>
++#include <linux/list.h>
++#include <linux/sched.h>
++#include <linux/module.h>
++#include <linux/mutex.h>
++#include <linux/math64.h>
++#include <linux/mtd/ubi.h>
++#include <linux/mtd/mtd.h>
+ #include <asm/div64.h>
+-#include "ubi.h"
++#include "ubi-media.h"
++
++#define err_msg(fmt, ...) \
++ printk(KERN_DEBUG "gluebi (pid %d): %s: " fmt "\n", \
++ current->pid, __func__, ##__VA_ARGS__)
++
++static inline uint32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd)
++{
++ do_div(sz, mtd->erasesize);
++ return sz;
++}
++
++/**
++ * struct gluebi_device - a gluebi device description data structure.
++ * @mtd: emulated MTD device description object
++ * @refcnt: gluebi device reference count
++ * @desc: UBI volume descriptor
++ * @ubi_num: UBI device number this gluebi device works on
++ * @vol_id: ID of UBI volume this gluebi device works on
++ * @list: link in a list of gluebi devices
++ */
++struct gluebi_device {
++ struct mtd_info mtd;
++ int refcnt;
++ struct ubi_volume_desc *desc;
++ int ubi_num;
++ int vol_id;
++ struct list_head list;
++};
++
++/* List of all gluebi devices */
++static LIST_HEAD(gluebi_devices);
++static DEFINE_MUTEX(devices_mutex);
++
++/**
++ * find_gluebi_nolock - find a gluebi device.
++ * @ubi_num: UBI device number
++ * @vol_id: volume ID
++ *
++ * This function seraches for gluebi device corresponding to UBI device
++ * @ubi_num and UBI volume @vol_id. Returns the gluebi device description
++ * object in case of success and %NULL in case of failure. The caller has to
++ * have the &devices_mutex locked.
++ */
++static struct gluebi_device *find_gluebi_nolock(int ubi_num, int vol_id)
++{
++ struct gluebi_device *gluebi;
++
++ list_for_each_entry(gluebi, &gluebi_devices, list)
++ if (gluebi->ubi_num == ubi_num && gluebi->vol_id == vol_id)
++ return gluebi;
++ return NULL;
++}
+
+ /**
+ * gluebi_get_device - get MTD device reference.
+@@ -41,15 +102,18 @@
+ */
+ static int gluebi_get_device(struct mtd_info *mtd)
+ {
+- struct ubi_volume *vol;
++ struct gluebi_device *gluebi;
++ int ubi_mode = UBI_READONLY;
+
+- vol = container_of(mtd, struct ubi_volume, gluebi_mtd);
++ if (!try_module_get(THIS_MODULE))
++ return -ENODEV;
+
+- /*
+- * We do not introduce locks for gluebi reference count because the
+- * get_device()/put_device() calls are already serialized at MTD.
+- */
+- if (vol->gluebi_refcount > 0) {
++ if (mtd->flags & MTD_WRITEABLE)
++ ubi_mode = UBI_READWRITE;
++
++ gluebi = container_of(mtd, struct gluebi_device, mtd);
++ mutex_lock(&devices_mutex);
++ if (gluebi->refcnt > 0) {
+ /*
+ * The MTD device is already referenced and this is just one
+ * more reference. MTD allows many users to open the same
+@@ -58,7 +122,8 @@ static int gluebi_get_device(struct mtd_
+ * open the UBI volume again - just increase the reference
+ * counter and return.
+ */
+- vol->gluebi_refcount += 1;
++ gluebi->refcnt += 1;
++ mutex_unlock(&devices_mutex);
+ return 0;
+ }
+
+@@ -66,11 +131,15 @@ static int gluebi_get_device(struct mtd_
+ * This is the first reference to this UBI volume via the MTD device
+ * interface. Open the corresponding volume in read-write mode.
+ */
+- vol->gluebi_desc = ubi_open_volume(vol->ubi->ubi_num, vol->vol_id,
+- UBI_READWRITE);
+- if (IS_ERR(vol->gluebi_desc))
+- return PTR_ERR(vol->gluebi_desc);
+- vol->gluebi_refcount += 1;
++ gluebi->desc = ubi_open_volume(gluebi->ubi_num, gluebi->vol_id,
++ ubi_mode);
++ if (IS_ERR(gluebi->desc)) {
++ mutex_unlock(&devices_mutex);
++ module_put(THIS_MODULE);
++ return PTR_ERR(gluebi->desc);
++ }
++ gluebi->refcnt += 1;
++ mutex_unlock(&devices_mutex);
+ return 0;
+ }
+
+@@ -83,13 +152,15 @@ static int gluebi_get_device(struct mtd_
+ */
+ static void gluebi_put_device(struct mtd_info *mtd)
+ {
+- struct ubi_volume *vol;
++ struct gluebi_device *gluebi;
+
+- vol = container_of(mtd, struct ubi_volume, gluebi_mtd);
+- vol->gluebi_refcount -= 1;
+- ubi_assert(vol->gluebi_refcount >= 0);
+- if (vol->gluebi_refcount == 0)
+- ubi_close_volume(vol->gluebi_desc);
++ gluebi = container_of(mtd, struct gluebi_device, mtd);
++ mutex_lock(&devices_mutex);
++ gluebi->refcnt -= 1;
++ if (gluebi->refcnt == 0)
++ ubi_close_volume(gluebi->desc);
++ module_put(THIS_MODULE);
++ mutex_unlock(&devices_mutex);
+ }
+
+ /**
+@@ -107,21 +178,14 @@ static int gluebi_read(struct mtd_info *
+ size_t *retlen, unsigned char *buf)
+ {
+ int err = 0, lnum, offs, total_read;
+- struct ubi_volume *vol;
+- struct ubi_device *ubi;
+- uint64_t tmp = from;
+-
+- dbg_gen("read %zd bytes from offset %lld", len, from);
++ struct gluebi_device *gluebi;
+
+ if (len < 0 || from < 0 || from + len > mtd->size)
+ return -EINVAL;
+
+- vol = container_of(mtd, struct ubi_volume, gluebi_mtd);
+- ubi = vol->ubi;
+-
+- offs = do_div(tmp, mtd->erasesize);
+- lnum = tmp;
++ gluebi = container_of(mtd, struct gluebi_device, mtd);
+
++ lnum = div_u64_rem(from, mtd->erasesize, &offs);
+ total_read = len;
+ while (total_read) {
+ size_t to_read = mtd->erasesize - offs;
+@@ -129,7 +193,7 @@ static int gluebi_read(struct mtd_info *
+ if (to_read > total_read)
+ to_read = total_read;
+
+- err = ubi_eba_read_leb(ubi, vol, lnum, buf, offs, to_read, 0);
++ err = ubi_read(gluebi->desc, lnum, buf, offs, to_read);
+ if (err)
+ break;
+
+@@ -155,26 +219,20 @@ static int gluebi_read(struct mtd_info *
+ * case of failure.
+ */
+ static int gluebi_write(struct mtd_info *mtd, loff_t to, size_t len,
+- size_t *retlen, const u_char *buf)
++ size_t *retlen, const u_char *buf)
+ {
+ int err = 0, lnum, offs, total_written;
+- struct ubi_volume *vol;
+- struct ubi_device *ubi;
+- uint64_t tmp = to;
+-
+- dbg_gen("write %zd bytes to offset %lld", len, to);
++ struct gluebi_device *gluebi;
+
+ if (len < 0 || to < 0 || len + to > mtd->size)
+ return -EINVAL;
+
+- vol = container_of(mtd, struct ubi_volume, gluebi_mtd);
+- ubi = vol->ubi;
++ gluebi = container_of(mtd, struct gluebi_device, mtd);
+
+- if (ubi->ro_mode)
++ if (!(mtd->flags & MTD_WRITEABLE))
+ return -EROFS;
+
+- offs = do_div(tmp, mtd->erasesize);
+- lnum = tmp;
++ lnum = div_u64_rem(to, mtd->erasesize, &offs);
+
+ if (len % mtd->writesize || offs % mtd->writesize)
+ return -EINVAL;
+@@ -186,8 +244,7 @@ static int gluebi_write(struct mtd_info
+ if (to_write > total_written)
+ to_write = total_written;
+
+- err = ubi_eba_write_leb(ubi, vol, lnum, buf, offs, to_write,
+- UBI_UNKNOWN);
++ err = ubi_write(gluebi->desc, lnum, buf, offs, to_write);
+ if (err)
+ break;
+
+@@ -212,40 +269,36 @@ static int gluebi_write(struct mtd_info
+ static int gluebi_erase(struct mtd_info *mtd, struct erase_info *instr)
+ {
+ int err, i, lnum, count;
+- struct ubi_volume *vol;
+- struct ubi_device *ubi;
+-
+- dbg_gen("erase %u bytes at offset %u", instr->len, instr->addr);
++ struct gluebi_device *gluebi;
+
+ if (instr->addr < 0 || instr->addr > mtd->size - mtd->erasesize)
+ return -EINVAL;
+-
+ if (instr->len < 0 || instr->addr + instr->len > mtd->size)
+ return -EINVAL;
+-
+ if (instr->addr % mtd->writesize || instr->len % mtd->writesize)
+ return -EINVAL;
+
+- lnum = instr->addr / mtd->erasesize;
+- count = instr->len / mtd->erasesize;
++ lnum = mtd_div_by_eb(instr->addr, mtd);
++ count = mtd_div_by_eb(instr->len, mtd);
+
+- vol = container_of(mtd, struct ubi_volume, gluebi_mtd);
+- ubi = vol->ubi;
++ gluebi = container_of(mtd, struct gluebi_device, mtd);
+
+- if (ubi->ro_mode)
++ if (!(mtd->flags & MTD_WRITEABLE))
+ return -EROFS;
+
+- for (i = 0; i < count; i++) {
+- err = ubi_eba_unmap_leb(ubi, vol, lnum + i);
++ for (i = 0; i < count - 1; i++) {
++ err = ubi_leb_unmap(gluebi->desc, lnum + i);
+ if (err)
+ goto out_err;
+ }
+-
+ /*
+ * MTD erase operations are synchronous, so we have to make sure the
+ * physical eraseblock is wiped out.
++ *
++ * Thus, perform leb_erase instead of leb_unmap operation - leb_erase
++ * will wait for the end of operations
+ */
+- err = ubi_wl_flush(ubi);
++ err = ubi_leb_erase(gluebi->desc, lnum + i);
+ if (err)
+ goto out_err;
+
+@@ -255,33 +308,44 @@ static int gluebi_erase(struct mtd_info
+
+ out_err:
+ instr->state = MTD_ERASE_FAILED;
+- instr->fail_addr = lnum * mtd->erasesize;
++ instr->fail_addr = (long long)lnum * mtd->erasesize;
+ return err;
+ }
+
+ /**
+- * ubi_create_gluebi - initialize gluebi for an UBI volume.
+- * @ubi: UBI device description object
+- * @vol: volume description object
++ * gluebi_create - create a gluebi device for an UBI volume.
++ * @di: UBI device description object
++ * @vi: UBI volume description object
+ *
+- * This function is called when an UBI volume is created in order to create
++ * This function is called when a new UBI volume is created in order to create
+ * corresponding fake MTD device. Returns zero in case of success and a
+ * negative error code in case of failure.
+ */
+-int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol)
++static int gluebi_create(struct ubi_device_info *di,
++ struct ubi_volume_info *vi)
+ {
+- struct mtd_info *mtd = &vol->gluebi_mtd;
++ struct gluebi_device *gluebi, *g;
++ struct mtd_info *mtd;
+
+- mtd->name = kmemdup(vol->name, vol->name_len + 1, GFP_KERNEL);
+- if (!mtd->name)
++ gluebi = kzalloc(sizeof(struct gluebi_device), GFP_KERNEL);
++ if (!gluebi)
+ return -ENOMEM;
+
++ mtd = &gluebi->mtd;
++ mtd->name = kmemdup(vi->name, vi->name_len + 1, GFP_KERNEL);
++ if (!mtd->name) {
++ kfree(gluebi);
++ return -ENOMEM;
++ }
++
++ gluebi->vol_id = vi->vol_id;
++ gluebi->ubi_num = vi->ubi_num;
+ mtd->type = MTD_UBIVOLUME;
+- if (!ubi->ro_mode)
++ if (!di->ro_mode)
+ mtd->flags = MTD_WRITEABLE;
+- mtd->writesize = ubi->min_io_size;
+ mtd->owner = THIS_MODULE;
+- mtd->erasesize = vol->usable_leb_size;
++ mtd->writesize = di->min_io_size;
++ mtd->erasesize = vi->usable_leb_size;
+ mtd->read = gluebi_read;
+ mtd->write = gluebi_write;
+ mtd->erase = gluebi_erase;
+@@ -289,60 +353,196 @@ int ubi_create_gluebi(struct ubi_device
+ mtd->put_device = gluebi_put_device;
+
+ /*
+- * In case of dynamic volume, MTD device size is just volume size. In
++ * In case of dynamic a volume, MTD device size is just volume size. In
+ * case of a static volume the size is equivalent to the amount of data
+ * bytes.
+ */
+- if (vol->vol_type == UBI_DYNAMIC_VOLUME)
+- mtd->size = vol->usable_leb_size * vol->reserved_pebs;
++ if (vi->vol_type == UBI_DYNAMIC_VOLUME)
++ mtd->size = (unsigned long long)vi->usable_leb_size * vi->size;
+ else
+- mtd->size = vol->used_bytes;
++ mtd->size = vi->used_bytes;
++
++ /* Just a sanity check - make sure this gluebi device does not exist */
++ mutex_lock(&devices_mutex);
++ g = find_gluebi_nolock(vi->ubi_num, vi->vol_id);
++ if (g)
++ err_msg("gluebi MTD device %d form UBI device %d volume %d "
++ "already exists", g->mtd.index, vi->ubi_num,
++ vi->vol_id);
++ mutex_unlock(&devices_mutex);
+
+ if (add_mtd_device(mtd)) {
+- ubi_err("cannot not add MTD device");
++ err_msg("cannot add MTD device");
+ kfree(mtd->name);
++ kfree(gluebi);
+ return -ENFILE;
+ }
+
+- dbg_gen("added mtd%d (\"%s\"), size %u, EB size %u",
+- mtd->index, mtd->name, mtd->size, mtd->erasesize);
++ mutex_lock(&devices_mutex);
++ list_add_tail(&gluebi->list, &gluebi_devices);
++ mutex_unlock(&devices_mutex);
+ return 0;
+ }
+
+ /**
+- * ubi_destroy_gluebi - close gluebi for an UBI volume.
+- * @vol: volume description object
++ * gluebi_remove - remove a gluebi device.
++ * @vi: UBI volume description object
+ *
+- * This function is called when an UBI volume is removed in order to remove
++ * This function is called when an UBI volume is removed and it removes
+ * corresponding fake MTD device. Returns zero in case of success and a
+ * negative error code in case of failure.
+ */
+-int ubi_destroy_gluebi(struct ubi_volume *vol)
++static int gluebi_remove(struct ubi_volume_info *vi)
+ {
+- int err;
+- struct mtd_info *mtd = &vol->gluebi_mtd;
++ int err = 0;
++ struct mtd_info *mtd;
++ struct gluebi_device *gluebi;
++
++ mutex_lock(&devices_mutex);
++ gluebi = find_gluebi_nolock(vi->ubi_num, vi->vol_id);
++ if (!gluebi) {
++ err_msg("got remove notification for unknown UBI device %d "
++ "volume %d", vi->ubi_num, vi->vol_id);
++ err = -ENOENT;
++ } else if (gluebi->refcnt)
++ err = -EBUSY;
++ else
++ list_del(&gluebi->list);
++ mutex_unlock(&devices_mutex);
++ if (err)
++ return err;
+
+- dbg_gen("remove mtd%d", mtd->index);
++ mtd = &gluebi->mtd;
+ err = del_mtd_device(mtd);
+- if (err)
++ if (err) {
++ err_msg("cannot remove fake MTD device %d, UBI device %d, "
++ "volume %d, error %d", mtd->index, gluebi->ubi_num,
++ gluebi->vol_id, err);
++ mutex_lock(&devices_mutex);
++ list_add_tail(&gluebi->list, &gluebi_devices);
++ mutex_unlock(&devices_mutex);
+ return err;
++ }
++
+ kfree(mtd->name);
++ kfree(gluebi);
+ return 0;
+ }
+
+ /**
+- * ubi_gluebi_updated - UBI volume was updated notifier.
+- * @vol: volume description object
++ * gluebi_updated - UBI volume was updated notifier.
++ * @vi: volume info structure
+ *
+- * This function is called every time an UBI volume is updated. This function
+- * does nothing if volume @vol is dynamic, and changes MTD device size if the
++ * This function is called every time an UBI volume is updated. It does nothing
++ * if te volume @vol is dynamic, and changes MTD device size if the
+ * volume is static. This is needed because static volumes cannot be read past
+- * data they contain.
++ * data they contain. This function returns zero in case of success and a
++ * negative error code in case of error.
+ */
+-void ubi_gluebi_updated(struct ubi_volume *vol)
++static int gluebi_updated(struct ubi_volume_info *vi)
+ {
+- struct mtd_info *mtd = &vol->gluebi_mtd;
++ struct gluebi_device *gluebi;
++
++ mutex_lock(&devices_mutex);
++ gluebi = find_gluebi_nolock(vi->ubi_num, vi->vol_id);
++ if (!gluebi) {
++ mutex_unlock(&devices_mutex);
++ err_msg("got update notification for unknown UBI device %d "
++ "volume %d", vi->ubi_num, vi->vol_id);
++ return -ENOENT;
++ }
+
+- if (vol->vol_type == UBI_STATIC_VOLUME)
+- mtd->size = vol->used_bytes;
++ if (vi->vol_type == UBI_STATIC_VOLUME)
++ gluebi->mtd.size = vi->used_bytes;
++ mutex_unlock(&devices_mutex);
++ return 0;
+ }
++
++/**
++ * gluebi_resized - UBI volume was re-sized notifier.
++ * @vi: volume info structure
++ *
++ * This function is called every time an UBI volume is re-size. It changes the
++ * corresponding fake MTD device size. This function returns zero in case of
++ * success and a negative error code in case of error.
++ */
++static int gluebi_resized(struct ubi_volume_info *vi)
++{
++ struct gluebi_device *gluebi;
++
++ mutex_lock(&devices_mutex);
++ gluebi = find_gluebi_nolock(vi->ubi_num, vi->vol_id);
++ if (!gluebi) {
++ mutex_unlock(&devices_mutex);
++ err_msg("got update notification for unknown UBI device %d "
++ "volume %d", vi->ubi_num, vi->vol_id);
++ return -ENOENT;
++ }
++ gluebi->mtd.size = vi->used_bytes;
++ mutex_unlock(&devices_mutex);
++ return 0;
++}
++
++/**
++ * gluebi_notify - UBI notification handler.
++ * @nb: registered notifier block
++ * @l: notification type
++ * @ptr: pointer to the &struct ubi_notification object
++ */
++static int gluebi_notify(struct notifier_block *nb, unsigned long l,
++ void *ns_ptr)
++{
++ struct ubi_notification *nt = ns_ptr;
++
++ switch (l) {
++ case UBI_VOLUME_ADDED:
++ gluebi_create(&nt->di, &nt->vi);
++ break;
++ case UBI_VOLUME_REMOVED:
++ gluebi_remove(&nt->vi);
++ break;
++ case UBI_VOLUME_RESIZED:
++ gluebi_resized(&nt->vi);
++ break;
++ case UBI_VOLUME_UPDATED:
++ gluebi_updated(&nt->vi);
++ break;
++ default:
++ break;
++ }
++ return NOTIFY_OK;
++}
++
++static struct notifier_block gluebi_notifier = {
++ .notifier_call = gluebi_notify,
++};
++
++static int __init ubi_gluebi_init(void)
++{
++ return ubi_register_volume_notifier(&gluebi_notifier, 0);
++}
++
++static void __exit ubi_gluebi_exit(void)
++{
++ struct gluebi_device *gluebi, *g;
++
++ list_for_each_entry_safe(gluebi, g, &gluebi_devices, list) {
++ int err;
++ struct mtd_info *mtd = &gluebi->mtd;
++
++ err = del_mtd_device(mtd);
++ if (err)
++ err_msg("error %d while removing gluebi MTD device %d, "
++ "UBI device %d, volume %d - ignoring", err,
++ mtd->index, gluebi->ubi_num, gluebi->vol_id);
++ kfree(mtd->name);
++ kfree(gluebi);
++ }
++ ubi_unregister_volume_notifier(&gluebi_notifier);
++}
++
++module_init(ubi_gluebi_init);
++module_exit(ubi_gluebi_exit);
++MODULE_DESCRIPTION("MTD emulation layer over UBI volumes");
++MODULE_AUTHOR("Artem Bityutskiy, Joern Engel");
++MODULE_LICENSE("GPL");
+diff -uprN linux-2.6.28/drivers/mtd/ubi/io.c ubifs-v2.6.28/drivers/mtd/ubi/io.c
+--- linux-2.6.28/drivers/mtd/ubi/io.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/drivers/mtd/ubi/io.c 2011-06-15 14:22:07.000000000 -0400
+@@ -64,9 +64,9 @@
+ * device, e.g., make @ubi->min_io_size = 512 in the example above?
+ *
+ * A: because when writing a sub-page, MTD still writes a full 2K page but the
+- * bytes which are no relevant to the sub-page are 0xFF. So, basically, writing
+- * 4x512 sub-pages is 4 times slower then writing one 2KiB NAND page. Thus, we
+- * prefer to use sub-pages only for EV and VID headers.
++ * bytes which are not relevant to the sub-page are 0xFF. So, basically,
++ * writing 4x512 sub-pages is 4 times slower than writing one 2KiB NAND page.
++ * Thus, we prefer to use sub-pages only for EC and VID headers.
+ *
+ * As it was noted above, the VID header may start at a non-aligned offset.
+ * For example, in case of a 2KiB page NAND flash with a 512 bytes sub-page,
+@@ -90,7 +90,7 @@
+ #include <linux/err.h>
+ #include "ubi.h"
+
+-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
++#ifdef CONFIG_MTD_UBI_DEBUG
+ static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum);
+ static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum);
+ static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum,
+@@ -98,15 +98,12 @@ static int paranoid_check_ec_hdr(const s
+ static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum);
+ static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum,
+ const struct ubi_vid_hdr *vid_hdr);
+-static int paranoid_check_all_ff(struct ubi_device *ubi, int pnum, int offset,
+- int len);
+ #else
+ #define paranoid_check_not_bad(ubi, pnum) 0
+ #define paranoid_check_peb_ec_hdr(ubi, pnum) 0
+ #define paranoid_check_ec_hdr(ubi, pnum, ec_hdr) 0
+ #define paranoid_check_peb_vid_hdr(ubi, pnum) 0
+ #define paranoid_check_vid_hdr(ubi, pnum, vid_hdr) 0
+-#define paranoid_check_all_ff(ubi, pnum, offset, len) 0
+ #endif
+
+ /**
+@@ -146,12 +143,36 @@ int ubi_io_read(const struct ubi_device
+
+ err = paranoid_check_not_bad(ubi, pnum);
+ if (err)
+- return err > 0 ? -EINVAL : err;
++ return err;
++
++ /*
++ * Deliberately corrupt the buffer to improve robustness. Indeed, if we
++ * do not do this, the following may happen:
++ * 1. The buffer contains data from previous operation, e.g., read from
++ * another PEB previously. The data looks like expected, e.g., if we
++ * just do not read anything and return - the caller would not
++ * notice this. E.g., if we are reading a VID header, the buffer may
++ * contain a valid VID header from another PEB.
++ * 2. The driver is buggy and returns us success or -EBADMSG or
++ * -EUCLEAN, but it does not actually put any data to the buffer.
++ *
++ * This may confuse UBI or upper layers - they may think the buffer
++ * contains valid data while in fact it is just old data. This is
++ * especially possible because UBI (and UBIFS) relies on CRC, and
++ * treats data as correct even in case of ECC errors if the CRC is
++ * correct.
++ *
++ * Try to prevent this situation by changing the first byte of the
++ * buffer.
++ */
++ *((uint8_t *)buf) ^= 0xFF;
+
+ addr = (loff_t)pnum * ubi->peb_size + offset;
+ retry:
+ err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf);
+ if (err) {
++ const char *errstr = (err == -EBADMSG) ? " (ECC error)" : "";
++
+ if (err == -EUCLEAN) {
+ /*
+ * -EUCLEAN is reported if there was a bit-flip which
+@@ -166,16 +187,16 @@ retry:
+ return UBI_IO_BITFLIPS;
+ }
+
+- if (read != len && retries++ < UBI_IO_RETRIES) {
+- dbg_io("error %d while reading %d bytes from PEB %d:%d,"
+- " read only %zd bytes, retry",
+- err, len, pnum, offset, read);
++ if (retries++ < UBI_IO_RETRIES) {
++ dbg_io("error %d%s while reading %d bytes from PEB "
++ "%d:%d, read only %zd bytes, retry",
++ err, errstr, len, pnum, offset, read);
+ yield();
+ goto retry;
+ }
+
+- ubi_err("error %d while reading %d bytes from PEB %d:%d, "
+- "read %zd bytes", err, len, pnum, offset, read);
++ ubi_err("error %d%s while reading %d bytes from PEB %d:%d, "
++ "read %zd bytes", err, errstr, len, pnum, offset, read);
+ ubi_dbg_dump_stack();
+
+ /*
+@@ -239,12 +260,12 @@ int ubi_io_write(struct ubi_device *ubi,
+
+ err = paranoid_check_not_bad(ubi, pnum);
+ if (err)
+- return err > 0 ? -EINVAL : err;
++ return err;
+
+ /* The area we are writing to has to contain all 0xFF bytes */
+- err = paranoid_check_all_ff(ubi, pnum, offset, len);
++ err = ubi_dbg_check_all_ff(ubi, pnum, offset, len);
+ if (err)
+- return err > 0 ? -EINVAL : err;
++ return err;
+
+ if (offset >= ubi->leb_start) {
+ /*
+@@ -253,10 +274,10 @@ int ubi_io_write(struct ubi_device *ubi,
+ */
+ err = paranoid_check_peb_ec_hdr(ubi, pnum);
+ if (err)
+- return err > 0 ? -EINVAL : err;
++ return err;
+ err = paranoid_check_peb_vid_hdr(ubi, pnum);
+ if (err)
+- return err > 0 ? -EINVAL : err;
++ return err;
+ }
+
+ if (ubi_dbg_is_write_failure()) {
+@@ -269,12 +290,28 @@ int ubi_io_write(struct ubi_device *ubi,
+ addr = (loff_t)pnum * ubi->peb_size + offset;
+ err = ubi->mtd->write(ubi->mtd, addr, len, &written, buf);
+ if (err) {
+- ubi_err("error %d while writing %d bytes to PEB %d:%d, written"
+- " %zd bytes", err, len, pnum, offset, written);
++ ubi_err("error %d while writing %d bytes to PEB %d:%d, written "
++ "%zd bytes", err, len, pnum, offset, written);
+ ubi_dbg_dump_stack();
++ ubi_dbg_dump_flash(ubi, pnum, offset, len);
+ } else
+ ubi_assert(written == len);
+
++ if (!err) {
++ err = ubi_dbg_check_write(ubi, buf, pnum, offset, len);
++ if (err)
++ return err;
++
++ /*
++ * Since we always write sequentially, the rest of the PEB has
++ * to contain only 0xFF bytes.
++ */
++ offset += len;
++ len = ubi->peb_size - offset;
++ if (len)
++ err = ubi_dbg_check_all_ff(ubi, pnum, offset, len);
++ }
++
+ return err;
+ }
+
+@@ -306,6 +343,12 @@ static int do_sync_erase(struct ubi_devi
+ wait_queue_head_t wq;
+
+ dbg_io("erase PEB %d", pnum);
++ ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
++
++ if (ubi->ro_mode) {
++ ubi_err("read-only mode");
++ return -EROFS;
++ }
+
+ retry:
+ init_waitqueue_head(&wq);
+@@ -348,11 +391,11 @@ retry:
+ return -EIO;
+ }
+
+- err = paranoid_check_all_ff(ubi, pnum, 0, ubi->peb_size);
++ err = ubi_dbg_check_all_ff(ubi, pnum, 0, ubi->peb_size);
+ if (err)
+- return err > 0 ? -EINVAL : err;
++ return err;
+
+- if (ubi_dbg_is_erase_failure() && !err) {
++ if (ubi_dbg_is_erase_failure()) {
+ dbg_err("cannot erase PEB %d (emulated)", pnum);
+ return -EIO;
+ }
+@@ -360,25 +403,6 @@ retry:
+ return 0;
+ }
+
+-/**
+- * check_pattern - check if buffer contains only a certain byte pattern.
+- * @buf: buffer to check
+- * @patt: the pattern to check
+- * @size: buffer size in bytes
+- *
+- * This function returns %1 in there are only @patt bytes in @buf, and %0 if
+- * something else was also found.
+- */
+-static int check_pattern(const void *buf, uint8_t patt, int size)
+-{
+- int i;
+-
+- for (i = 0; i < size; i++)
+- if (((const uint8_t *)buf)[i] != patt)
+- return 0;
+- return 1;
+-}
+-
+ /* Patterns to write to a physical eraseblock when torturing it */
+ static uint8_t patterns[] = {0xa5, 0x5a, 0x0};
+
+@@ -410,7 +434,7 @@ static int torture_peb(struct ubi_device
+ if (err)
+ goto out;
+
+- err = check_pattern(ubi->peb_buf1, 0xFF, ubi->peb_size);
++ err = ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->peb_size);
+ if (err == 0) {
+ ubi_err("erased PEB %d, but a non-0xFF byte found",
+ pnum);
+@@ -429,7 +453,8 @@ static int torture_peb(struct ubi_device
+ if (err)
+ goto out;
+
+- err = check_pattern(ubi->peb_buf1, patterns[i], ubi->peb_size);
++ err = ubi_check_pattern(ubi->peb_buf1, patterns[i],
++ ubi->peb_size);
+ if (err == 0) {
+ ubi_err("pattern %x checking failed for PEB %d",
+ patterns[i], pnum);
+@@ -439,7 +464,7 @@ static int torture_peb(struct ubi_device
+ }
+
+ err = patt_count;
+- ubi_msg("PEB %d passed torture test, do not mark it a bad", pnum);
++ ubi_msg("PEB %d passed torture test, do not mark it as bad", pnum);
+
+ out:
+ mutex_unlock(&ubi->buf_mutex);
+@@ -457,6 +482,92 @@ out:
+ }
+
+ /**
++ * nor_erase_prepare - prepare a NOR flash PEB for erasure.
++ * @ubi: UBI device description object
++ * @pnum: physical eraseblock number to prepare
++ *
++ * NOR flash, or at least some of them, have peculiar embedded PEB erasure
++ * algorithm: the PEB is first filled with zeroes, then it is erased. And
++ * filling with zeroes starts from the end of the PEB. This was observed with
++ * Spansion S29GL512N NOR flash.
++ *
++ * This means that in case of a power cut we may end up with intact data at the
++ * beginning of the PEB, and all zeroes at the end of PEB. In other words, the
++ * EC and VID headers are OK, but a large chunk of data at the end of PEB is
++ * zeroed. This makes UBI mistakenly treat this PEB as used and associate it
++ * with an LEB, which leads to subsequent failures (e.g., UBIFS fails).
++ *
++ * This function is called before erasing NOR PEBs and it zeroes out EC and VID
++ * magic numbers in order to invalidate them and prevent the failures. Returns
++ * zero in case of success and a negative error code in case of failure.
++ */
++static int nor_erase_prepare(struct ubi_device *ubi, int pnum)
++{
++ int err, err1;
++ size_t written;
++ loff_t addr;
++ uint32_t data = 0;
++ /*
++ * Note, we cannot generally define VID header buffers on stack,
++ * because of the way we deal with these buffers (see the header
++ * comment in this file). But we know this is a NOR-specific piece of
++ * code, so we can do this. But yes, this is error-prone and we should
++ * (pre-)allocate VID header buffer instead.
++ */
++ struct ubi_vid_hdr vid_hdr;
++
++ /*
++ * It is important to first invalidate the EC header, and then the VID
++ * header. Otherwise a power cut may lead to valid EC header and
++ * invalid VID header, in which case UBI will treat this PEB as
++ * corrupted and will try to preserve it, and print scary warnings (see
++ * the header comment in scan.c for more information).
++ */
++ addr = (loff_t)pnum * ubi->peb_size;
++ err = ubi->mtd->write(ubi->mtd, addr, 4, &written, (void *)&data);
++ if (!err) {
++ addr += ubi->vid_hdr_aloffset;
++ err = ubi->mtd->write(ubi->mtd, addr, 4, &written,
++ (void *)&data);
++ if (!err)
++ return 0;
++ }
++
++ /*
++ * We failed to write to the media. This was observed with Spansion
++ * S29GL512N NOR flash. Most probably the previously eraseblock erasure
++ * was interrupted at a very inappropriate moment, so it became
++ * unwritable. In this case we probably anyway have garbage in this
++ * PEB.
++ */
++ err1 = ubi_io_read_vid_hdr(ubi, pnum, &vid_hdr, 0);
++ if (err1 == UBI_IO_BAD_HDR_EBADMSG || err1 == UBI_IO_BAD_HDR ||
++ err1 == UBI_IO_FF) {
++ struct ubi_ec_hdr ec_hdr;
++
++ err1 = ubi_io_read_ec_hdr(ubi, pnum, &ec_hdr, 0);
++ if (err1 == UBI_IO_BAD_HDR_EBADMSG || err1 == UBI_IO_BAD_HDR ||
++ err1 == UBI_IO_FF)
++ /*
++ * Both VID and EC headers are corrupted, so we can
++ * safely erase this PEB and not afraid that it will be
++ * treated as a valid PEB in case of an unclean reboot.
++ */
++ return 0;
++ }
++
++ /*
++ * The PEB contains a valid VID header, but we cannot invalidate it.
++ * Supposedly the flash media or the driver is screwed up, so return an
++ * error.
++ */
++ ubi_err("cannot invalidate PEB %d, write returned %d read returned %d",
++ pnum, err, err1);
++ ubi_dbg_dump_flash(ubi, pnum, 0, ubi->peb_size);
++ return -EIO;
++}
++
++/**
+ * ubi_io_sync_erase - synchronously erase a physical eraseblock.
+ * @ubi: UBI device description object
+ * @pnum: physical eraseblock number to erase
+@@ -465,7 +576,7 @@ out:
+ * This function synchronously erases physical eraseblock @pnum. If @torture
+ * flag is not zero, the physical eraseblock is checked by means of writing
+ * different patterns to it and reading them back. If the torturing is enabled,
+- * the physical eraseblock is erased more then once.
++ * the physical eraseblock is erased more than once.
+ *
+ * This function returns the number of erasures made in case of success, %-EIO
+ * if the erasure failed or the torturing test failed, and other negative error
+@@ -480,13 +591,19 @@ int ubi_io_sync_erase(struct ubi_device
+
+ err = paranoid_check_not_bad(ubi, pnum);
+ if (err != 0)
+- return err > 0 ? -EINVAL : err;
++ return err;
+
+ if (ubi->ro_mode) {
+ ubi_err("read-only mode");
+ return -EROFS;
+ }
+
++ if (ubi->nor_flash) {
++ err = nor_erase_prepare(ubi, pnum);
++ if (err)
++ return err;
++ }
++
+ if (torture) {
+ ret = torture_peb(ubi, pnum);
+ if (ret < 0)
+@@ -566,16 +683,15 @@ int ubi_io_mark_bad(const struct ubi_dev
+ * This function returns zero if the erase counter header is OK, and %1 if
+ * not.
+ */
+-static int validate_ec_hdr(struct ubi_device *ubi,
++static int validate_ec_hdr(const struct ubi_device *ubi,
+ const struct ubi_ec_hdr *ec_hdr)
+ {
+ long long ec;
+- int vid_hdr_offset, leb_start, image_seq;
++ int vid_hdr_offset, leb_start;
+
+ ec = be64_to_cpu(ec_hdr->ec);
+ vid_hdr_offset = be32_to_cpu(ec_hdr->vid_hdr_offset);
+ leb_start = be32_to_cpu(ec_hdr->data_offset);
+- image_seq = be32_to_cpu(ec_hdr->image_seq);
+
+ if (ec_hdr->version != UBI_VERSION) {
+ ubi_err("node with incompatible UBI version found: "
+@@ -601,15 +717,6 @@ static int validate_ec_hdr(struct ubi_de
+ goto bad;
+ }
+
+- if (!ubi->image_seq_set) {
+- ubi->image_seq = image_seq;
+- ubi->image_seq_set = 1;
+- } else if (ubi->image_seq && image_seq && ubi->image_seq != image_seq) {
+- ubi_err("bad image sequence number %d, expected %d",
+- image_seq, ubi->image_seq);
+- goto bad;
+- }
+-
+ return 0;
+
+ bad:
+@@ -635,68 +742,58 @@ bad:
+ * o %UBI_IO_BITFLIPS if the CRC is correct, but bit-flips were detected
+ * and corrected by the flash driver; this is harmless but may indicate that
+ * this eraseblock may become bad soon (but may be not);
+- * o %UBI_IO_BAD_EC_HDR if the erase counter header is corrupted (a CRC error);
+- * o %UBI_IO_PEB_EMPTY if the physical eraseblock is empty;
++ * o %UBI_IO_BAD_HDR if the erase counter header is corrupted (a CRC error);
++ * o %UBI_IO_BAD_HDR_EBADMSG is the same as %UBI_IO_BAD_HDR, but there also was
++ * a data integrity error (uncorrectable ECC error in case of NAND);
++ * o %UBI_IO_FF if only 0xFF bytes were read (the PEB is supposedly empty)
+ * o a negative error code in case of failure.
+ */
+ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum,
+ struct ubi_ec_hdr *ec_hdr, int verbose)
+ {
+- int err, read_err = 0;
++ int err, read_err;
+ uint32_t crc, magic, hdr_crc;
+
+ dbg_io("read EC header from PEB %d", pnum);
+ ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
+
+- err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE);
+- if (err) {
+- if (err != UBI_IO_BITFLIPS && err != -EBADMSG)
+- return err;
++ read_err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE);
++ if (read_err) {
++ if (read_err != UBI_IO_BITFLIPS && read_err != -EBADMSG)
++ return read_err;
+
+ /*
+ * We read all the data, but either a correctable bit-flip
+- * occurred, or MTD reported about some data integrity error,
+- * like an ECC error in case of NAND. The former is harmless,
+- * the later may mean that the read data is corrupted. But we
+- * have a CRC check-sum and we will detect this. If the EC
+- * header is still OK, we just report this as there was a
+- * bit-flip.
++ * occurred, or MTD reported a data integrity error
++ * (uncorrectable ECC error in case of NAND). The former is
++ * harmless, the later may mean that the read data is
++ * corrupted. But we have a CRC check-sum and we will detect
++ * this. If the EC header is still OK, we just report this as
++ * there was a bit-flip, to force scrubbing.
+ */
+- read_err = err;
+ }
+
+ magic = be32_to_cpu(ec_hdr->magic);
+ if (magic != UBI_EC_HDR_MAGIC) {
++ if (read_err == -EBADMSG)
++ return UBI_IO_BAD_HDR_EBADMSG;
++
+ /*
+ * The magic field is wrong. Let's check if we have read all
+ * 0xFF. If yes, this physical eraseblock is assumed to be
+ * empty.
+- *
+- * But if there was a read error, we do not test it for all
+- * 0xFFs. Even if it does contain all 0xFFs, this error
+- * indicates that something is still wrong with this physical
+- * eraseblock and we anyway cannot treat it as empty.
+ */
+- if (read_err != -EBADMSG &&
+- check_pattern(ec_hdr, 0xFF, UBI_EC_HDR_SIZE)) {
++ if (ubi_check_pattern(ec_hdr, 0xFF, UBI_EC_HDR_SIZE)) {
+ /* The physical eraseblock is supposedly empty */
+-
+- /*
+- * The below is just a paranoid check, it has to be
+- * compiled out if paranoid checks are disabled.
+- */
+- err = paranoid_check_all_ff(ubi, pnum, 0,
+- ubi->peb_size);
+- if (err)
+- return err > 0 ? UBI_IO_BAD_EC_HDR : err;
+-
+ if (verbose)
+ ubi_warn("no EC header found at PEB %d, "
+ "only 0xFF bytes", pnum);
+- else if (UBI_IO_DEBUG)
+- dbg_msg("no EC header found at PEB %d, "
+- "only 0xFF bytes", pnum);
+- return UBI_IO_PEB_EMPTY;
++ dbg_bld("no EC header found at PEB %d, "
++ "only 0xFF bytes", pnum);
++ if (!read_err)
++ return UBI_IO_FF;
++ else
++ return UBI_IO_FF_BITFLIPS;
+ }
+
+ /*
+@@ -707,10 +804,10 @@ int ubi_io_read_ec_hdr(struct ubi_device
+ ubi_warn("bad magic number at PEB %d: %08x instead of "
+ "%08x", pnum, magic, UBI_EC_HDR_MAGIC);
+ ubi_dbg_dump_ec_hdr(ec_hdr);
+- } else if (UBI_IO_DEBUG)
+- dbg_msg("bad magic number at PEB %d: %08x instead of "
+- "%08x", pnum, magic, UBI_EC_HDR_MAGIC);
+- return UBI_IO_BAD_EC_HDR;
++ }
++ dbg_bld("bad magic number at PEB %d: %08x instead of "
++ "%08x", pnum, magic, UBI_EC_HDR_MAGIC);
++ return UBI_IO_BAD_HDR;
+ }
+
+ crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC);
+@@ -721,10 +818,14 @@ int ubi_io_read_ec_hdr(struct ubi_device
+ ubi_warn("bad EC header CRC at PEB %d, calculated "
+ "%#08x, read %#08x", pnum, crc, hdr_crc);
+ ubi_dbg_dump_ec_hdr(ec_hdr);
+- } else if (UBI_IO_DEBUG)
+- dbg_msg("bad EC header CRC at PEB %d, calculated "
+- "%#08x, read %#08x", pnum, crc, hdr_crc);
+- return UBI_IO_BAD_EC_HDR;
++ }
++ dbg_bld("bad EC header CRC at PEB %d, calculated "
++ "%#08x, read %#08x", pnum, crc, hdr_crc);
++
++ if (!read_err)
++ return UBI_IO_BAD_HDR;
++ else
++ return UBI_IO_BAD_HDR_EBADMSG;
+ }
+
+ /* And of course validate what has just been read from the media */
+@@ -734,6 +835,10 @@ int ubi_io_read_ec_hdr(struct ubi_device
+ return -EINVAL;
+ }
+
++ /*
++ * If there was %-EBADMSG, but the header CRC is still OK, report about
++ * a bit-flip to force scrubbing on this PEB.
++ */
+ return read_err ? UBI_IO_BITFLIPS : 0;
+ }
+
+@@ -771,7 +876,7 @@ int ubi_io_write_ec_hdr(struct ubi_devic
+
+ err = paranoid_check_ec_hdr(ubi, pnum, ec_hdr);
+ if (err)
+- return -EINVAL;
++ return err;
+
+ err = ubi_io_write(ubi, ec_hdr, pnum, 0, ubi->ec_hdr_alsize);
+ return err;
+@@ -907,22 +1012,16 @@ bad:
+ *
+ * This function reads the volume identifier header from physical eraseblock
+ * @pnum and stores it in @vid_hdr. It also checks CRC checksum of the read
+- * volume identifier header. The following codes may be returned:
++ * volume identifier header. The error codes are the same as in
++ * 'ubi_io_read_ec_hdr()'.
+ *
+- * o %0 if the CRC checksum is correct and the header was successfully read;
+- * o %UBI_IO_BITFLIPS if the CRC is correct, but bit-flips were detected
+- * and corrected by the flash driver; this is harmless but may indicate that
+- * this eraseblock may become bad soon;
+- * o %UBI_IO_BAD_VID_HDR if the volume identifier header is corrupted (a CRC
+- * error detected);
+- * o %UBI_IO_PEB_FREE if the physical eraseblock is free (i.e., there is no VID
+- * header there);
+- * o a negative error code in case of failure.
++ * Note, the implementation of this function is also very similar to
++ * 'ubi_io_read_ec_hdr()', so refer commentaries in 'ubi_io_read_ec_hdr()'.
+ */
+ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum,
+ struct ubi_vid_hdr *vid_hdr, int verbose)
+ {
+- int err, read_err = 0;
++ int err, read_err;
+ uint32_t crc, magic, hdr_crc;
+ void *p;
+
+@@ -930,68 +1029,36 @@ int ubi_io_read_vid_hdr(struct ubi_devic
+ ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
+
+ p = (char *)vid_hdr - ubi->vid_hdr_shift;
+- err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset,
++ read_err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset,
+ ubi->vid_hdr_alsize);
+- if (err) {
+- if (err != UBI_IO_BITFLIPS && err != -EBADMSG)
+- return err;
+-
+- /*
+- * We read all the data, but either a correctable bit-flip
+- * occurred, or MTD reported about some data integrity error,
+- * like an ECC error in case of NAND. The former is harmless,
+- * the later may mean the read data is corrupted. But we have a
+- * CRC check-sum and we will identify this. If the VID header is
+- * still OK, we just report this as there was a bit-flip.
+- */
+- read_err = err;
+- }
++ if (read_err && read_err != UBI_IO_BITFLIPS && read_err != -EBADMSG)
++ return read_err;
+
+ magic = be32_to_cpu(vid_hdr->magic);
+ if (magic != UBI_VID_HDR_MAGIC) {
+- /*
+- * If we have read all 0xFF bytes, the VID header probably does
+- * not exist and the physical eraseblock is assumed to be free.
+- *
+- * But if there was a read error, we do not test the data for
+- * 0xFFs. Even if it does contain all 0xFFs, this error
+- * indicates that something is still wrong with this physical
+- * eraseblock and it cannot be regarded as free.
+- */
+- if (read_err != -EBADMSG &&
+- check_pattern(vid_hdr, 0xFF, UBI_VID_HDR_SIZE)) {
+- /* The physical eraseblock is supposedly free */
+-
+- /*
+- * The below is just a paranoid check, it has to be
+- * compiled out if paranoid checks are disabled.
+- */
+- err = paranoid_check_all_ff(ubi, pnum, ubi->leb_start,
+- ubi->leb_size);
+- if (err)
+- return err > 0 ? UBI_IO_BAD_VID_HDR : err;
++ if (read_err == -EBADMSG)
++ return UBI_IO_BAD_HDR_EBADMSG;
+
++ if (ubi_check_pattern(vid_hdr, 0xFF, UBI_VID_HDR_SIZE)) {
+ if (verbose)
+ ubi_warn("no VID header found at PEB %d, "
+ "only 0xFF bytes", pnum);
+- else if (UBI_IO_DEBUG)
+- dbg_msg("no VID header found at PEB %d, "
+- "only 0xFF bytes", pnum);
+- return UBI_IO_PEB_FREE;
++ dbg_bld("no VID header found at PEB %d, "
++ "only 0xFF bytes", pnum);
++ if (!read_err)
++ return UBI_IO_FF;
++ else
++ return UBI_IO_FF_BITFLIPS;
+ }
+
+- /*
+- * This is not a valid VID header, and these are not 0xFF
+- * bytes. Report that the header is corrupted.
+- */
+ if (verbose) {
+ ubi_warn("bad magic number at PEB %d: %08x instead of "
+ "%08x", pnum, magic, UBI_VID_HDR_MAGIC);
+ ubi_dbg_dump_vid_hdr(vid_hdr);
+- } else if (UBI_IO_DEBUG)
+- dbg_msg("bad magic number at PEB %d: %08x instead of "
+- "%08x", pnum, magic, UBI_VID_HDR_MAGIC);
+- return UBI_IO_BAD_VID_HDR;
++ }
++ dbg_bld("bad magic number at PEB %d: %08x instead of "
++ "%08x", pnum, magic, UBI_VID_HDR_MAGIC);
++ return UBI_IO_BAD_HDR;
+ }
+
+ crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC);
+@@ -1002,13 +1069,15 @@ int ubi_io_read_vid_hdr(struct ubi_devic
+ ubi_warn("bad CRC at PEB %d, calculated %#08x, "
+ "read %#08x", pnum, crc, hdr_crc);
+ ubi_dbg_dump_vid_hdr(vid_hdr);
+- } else if (UBI_IO_DEBUG)
+- dbg_msg("bad CRC at PEB %d, calculated %#08x, "
+- "read %#08x", pnum, crc, hdr_crc);
+- return UBI_IO_BAD_VID_HDR;
++ }
++ dbg_bld("bad CRC at PEB %d, calculated %#08x, "
++ "read %#08x", pnum, crc, hdr_crc);
++ if (!read_err)
++ return UBI_IO_BAD_HDR;
++ else
++ return UBI_IO_BAD_HDR_EBADMSG;
+ }
+
+- /* Validate the VID header that we have just read */
+ err = validate_vid_hdr(ubi, vid_hdr);
+ if (err) {
+ ubi_err("validation failed for PEB %d", pnum);
+@@ -1045,7 +1114,7 @@ int ubi_io_write_vid_hdr(struct ubi_devi
+
+ err = paranoid_check_peb_ec_hdr(ubi, pnum);
+ if (err)
+- return err > 0 ? -EINVAL : err;
++ return err;
+
+ vid_hdr->magic = cpu_to_be32(UBI_VID_HDR_MAGIC);
+ vid_hdr->version = UBI_VERSION;
+@@ -1054,7 +1123,7 @@ int ubi_io_write_vid_hdr(struct ubi_devi
+
+ err = paranoid_check_vid_hdr(ubi, pnum, vid_hdr);
+ if (err)
+- return -EINVAL;
++ return err;
+
+ p = (char *)vid_hdr - ubi->vid_hdr_shift;
+ err = ubi_io_write(ubi, p, pnum, ubi->vid_hdr_aloffset,
+@@ -1062,27 +1131,30 @@ int ubi_io_write_vid_hdr(struct ubi_devi
+ return err;
+ }
+
+-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
++#ifdef CONFIG_MTD_UBI_DEBUG
+
+ /**
+ * paranoid_check_not_bad - ensure that a physical eraseblock is not bad.
+ * @ubi: UBI device description object
+ * @pnum: physical eraseblock number to check
+ *
+- * This function returns zero if the physical eraseblock is good, a positive
+- * number if it is bad and a negative error code if an error occurred.
++ * This function returns zero if the physical eraseblock is good, %-EINVAL if
++ * it is bad and a negative error code if an error occurred.
+ */
+ static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum)
+ {
+ int err;
+
++ if (!(ubi_chk_flags & UBI_CHK_IO))
++ return 0;
++
+ err = ubi_io_is_bad(ubi, pnum);
+ if (!err)
+ return err;
+
+ ubi_err("paranoid check failed for PEB %d", pnum);
+ ubi_dbg_dump_stack();
+- return err;
++ return err > 0 ? -EINVAL : err;
+ }
+
+ /**
+@@ -1092,7 +1164,7 @@ static int paranoid_check_not_bad(const
+ * @ec_hdr: the erase counter header to check
+ *
+ * This function returns zero if the erase counter header contains valid
+- * values, and %1 if not.
++ * values, and %-EINVAL if not.
+ */
+ static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum,
+ const struct ubi_ec_hdr *ec_hdr)
+@@ -1100,6 +1172,9 @@ static int paranoid_check_ec_hdr(const s
+ int err;
+ uint32_t magic;
+
++ if (!(ubi_chk_flags & UBI_CHK_IO))
++ return 0;
++
+ magic = be32_to_cpu(ec_hdr->magic);
+ if (magic != UBI_EC_HDR_MAGIC) {
+ ubi_err("bad magic %#08x, must be %#08x",
+@@ -1118,7 +1193,7 @@ static int paranoid_check_ec_hdr(const s
+ fail:
+ ubi_dbg_dump_ec_hdr(ec_hdr);
+ ubi_dbg_dump_stack();
+- return 1;
++ return -EINVAL;
+ }
+
+ /**
+@@ -1126,8 +1201,8 @@ fail:
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock number to check
+ *
+- * This function returns zero if the erase counter header is all right, %1 if
+- * not, and a negative error code if an error occurred.
++ * This function returns zero if the erase counter header is all right and and
++ * a negative error code if not or if an error occurred.
+ */
+ static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum)
+ {
+@@ -1135,6 +1210,9 @@ static int paranoid_check_peb_ec_hdr(con
+ uint32_t crc, hdr_crc;
+ struct ubi_ec_hdr *ec_hdr;
+
++ if (!(ubi_chk_flags & UBI_CHK_IO))
++ return 0;
++
+ ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
+ if (!ec_hdr)
+ return -ENOMEM;
+@@ -1150,7 +1228,7 @@ static int paranoid_check_peb_ec_hdr(con
+ ubi_err("paranoid check failed for PEB %d", pnum);
+ ubi_dbg_dump_ec_hdr(ec_hdr);
+ ubi_dbg_dump_stack();
+- err = 1;
++ err = -EINVAL;
+ goto exit;
+ }
+
+@@ -1168,7 +1246,7 @@ exit:
+ * @vid_hdr: the volume identifier header to check
+ *
+ * This function returns zero if the volume identifier header is all right, and
+- * %1 if not.
++ * %-EINVAL if not.
+ */
+ static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum,
+ const struct ubi_vid_hdr *vid_hdr)
+@@ -1176,6 +1254,9 @@ static int paranoid_check_vid_hdr(const
+ int err;
+ uint32_t magic;
+
++ if (!(ubi_chk_flags & UBI_CHK_IO))
++ return 0;
++
+ magic = be32_to_cpu(vid_hdr->magic);
+ if (magic != UBI_VID_HDR_MAGIC) {
+ ubi_err("bad VID header magic %#08x at PEB %d, must be %#08x",
+@@ -1195,7 +1276,7 @@ fail:
+ ubi_err("paranoid check failed for PEB %d", pnum);
+ ubi_dbg_dump_vid_hdr(vid_hdr);
+ ubi_dbg_dump_stack();
+- return 1;
++ return -EINVAL;
+
+ }
+
+@@ -1205,7 +1286,7 @@ fail:
+ * @pnum: the physical eraseblock number to check
+ *
+ * This function returns zero if the volume identifier header is all right,
+- * %1 if not, and a negative error code if an error occurred.
++ * and a negative error code if not or if an error occurred.
+ */
+ static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum)
+ {
+@@ -1214,6 +1295,9 @@ static int paranoid_check_peb_vid_hdr(co
+ struct ubi_vid_hdr *vid_hdr;
+ void *p;
+
++ if (!(ubi_chk_flags & UBI_CHK_IO))
++ return 0;
++
+ vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
+ if (!vid_hdr)
+ return -ENOMEM;
+@@ -1232,7 +1316,7 @@ static int paranoid_check_peb_vid_hdr(co
+ ubi_err("paranoid check failed for PEB %d", pnum);
+ ubi_dbg_dump_vid_hdr(vid_hdr);
+ ubi_dbg_dump_stack();
+- err = 1;
++ err = -EINVAL;
+ goto exit;
+ }
+
+@@ -1244,51 +1328,124 @@ exit:
+ }
+
+ /**
+- * paranoid_check_all_ff - check that a region of flash is empty.
++ * ubi_dbg_check_write - make sure write succeeded.
++ * @ubi: UBI device description object
++ * @buf: buffer with data which were written
++ * @pnum: physical eraseblock number the data were written to
++ * @offset: offset within the physical eraseblock the data were written to
++ * @len: how many bytes were written
++ *
++ * This functions reads data which were recently written and compares it with
++ * the original data buffer - the data have to match. Returns zero if the data
++ * match and a negative error code if not or in case of failure.
++ */
++int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum,
++ int offset, int len)
++{
++ int err, i;
++ size_t read;
++ void *buf1;
++ loff_t addr = (loff_t)pnum * ubi->peb_size + offset;
++
++ if (!(ubi_chk_flags & UBI_CHK_IO))
++ return 0;
++
++ buf1 = __vmalloc(len, GFP_NOFS, PAGE_KERNEL);
++ if (!buf1) {
++ ubi_err("cannot allocate memory to check writes");
++ return 0;
++ }
++
++ err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf1);
++ if (err && err != -EUCLEAN)
++ goto out_free;
++
++ for (i = 0; i < len; i++) {
++ uint8_t c = ((uint8_t *)buf)[i];
++ uint8_t c1 = ((uint8_t *)buf1)[i];
++ int dump_len;
++
++ if (c == c1)
++ continue;
++
++ ubi_err("paranoid check failed for PEB %d:%d, len %d",
++ pnum, offset, len);
++ ubi_msg("data differ at position %d", i);
++ dump_len = max_t(int, 128, len - i);
++ ubi_msg("hex dump of the original buffer from %d to %d",
++ i, i + dump_len);
++ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
++ buf + i, dump_len, 1);
++ ubi_msg("hex dump of the read buffer from %d to %d",
++ i, i + dump_len);
++ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
++ buf1 + i, dump_len, 1);
++ ubi_dbg_dump_stack();
++ err = -EINVAL;
++ goto out_free;
++ }
++
++ vfree(buf1);
++ return 0;
++
++out_free:
++ vfree(buf1);
++ return err;
++}
++
++/**
++ * ubi_dbg_check_all_ff - check that a region of flash is empty.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock number to check
+ * @offset: the starting offset within the physical eraseblock to check
+ * @len: the length of the region to check
+ *
+ * This function returns zero if only 0xFF bytes are present at offset
+- * @offset of the physical eraseblock @pnum, %1 if not, and a negative error
+- * code if an error occurred.
++ * @offset of the physical eraseblock @pnum, and a negative error code if not
++ * or if an error occurred.
+ */
+-static int paranoid_check_all_ff(struct ubi_device *ubi, int pnum, int offset,
+- int len)
++int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len)
+ {
+ size_t read;
+ int err;
++ void *buf;
+ loff_t addr = (loff_t)pnum * ubi->peb_size + offset;
+
+- mutex_lock(&ubi->dbg_buf_mutex);
+- err = ubi->mtd->read(ubi->mtd, addr, len, &read, ubi->dbg_peb_buf);
++ if (!(ubi_chk_flags & UBI_CHK_IO))
++ return 0;
++
++ buf = __vmalloc(len, GFP_NOFS, PAGE_KERNEL);
++ if (!buf) {
++ ubi_err("cannot allocate memory to check for 0xFFs");
++ return 0;
++ }
++
++ err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf);
+ if (err && err != -EUCLEAN) {
+ ubi_err("error %d while reading %d bytes from PEB %d:%d, "
+ "read %zd bytes", err, len, pnum, offset, read);
+ goto error;
+ }
+
+- err = check_pattern(ubi->dbg_peb_buf, 0xFF, len);
++ err = ubi_check_pattern(buf, 0xFF, len);
+ if (err == 0) {
+ ubi_err("flash region at PEB %d:%d, length %d does not "
+ "contain all 0xFF bytes", pnum, offset, len);
+ goto fail;
+ }
+- mutex_unlock(&ubi->dbg_buf_mutex);
+
++ vfree(buf);
+ return 0;
+
+ fail:
+ ubi_err("paranoid check failed for PEB %d", pnum);
+ ubi_msg("hex dump of the %d-%d region", offset, offset + len);
+- print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
+- ubi->dbg_peb_buf, len, 1);
+- err = 1;
++ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, buf, len, 1);
++ err = -EINVAL;
+ error:
+ ubi_dbg_dump_stack();
+- mutex_unlock(&ubi->dbg_buf_mutex);
++ vfree(buf);
+ return err;
+ }
+
+-#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */
++#endif /* CONFIG_MTD_UBI_DEBUG */
+diff -uprN linux-2.6.28/drivers/mtd/ubi/kapi.c ubifs-v2.6.28/drivers/mtd/ubi/kapi.c
+--- linux-2.6.28/drivers/mtd/ubi/kapi.c 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/drivers/mtd/ubi/kapi.c 2011-06-15 14:22:07.000000000 -0400
+@@ -22,10 +22,32 @@
+
+ #include <linux/module.h>
+ #include <linux/err.h>
++#include <linux/namei.h>
++#include <linux/fs.h>
+ #include <asm/div64.h>
+ #include "ubi.h"
+
+ /**
++ * ubi_do_get_device_info - get information about UBI device.
++ * @ubi: UBI device description object
++ * @di: the information is stored here
++ *
++ * This function is the same as 'ubi_get_device_info()', but it assumes the UBI
++ * device is locked and cannot disappear.
++ */
++void ubi_do_get_device_info(struct ubi_device *ubi, struct ubi_device_info *di)
++{
++ di->ubi_num = ubi->ubi_num;
++ di->leb_size = ubi->leb_size;
++ di->leb_start = ubi->leb_start;
++ di->min_io_size = ubi->min_io_size;
++ di->max_write_size = ubi->max_write_size;
++ di->ro_mode = ubi->ro_mode;
++ di->cdev = ubi->cdev.dev;
++}
++EXPORT_SYMBOL_GPL(ubi_do_get_device_info);
++
++/**
+ * ubi_get_device_info - get information about UBI device.
+ * @ubi_num: UBI device number
+ * @di: the information is stored here
+@@ -39,33 +61,24 @@ int ubi_get_device_info(int ubi_num, str
+
+ if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES)
+ return -EINVAL;
+-
+ ubi = ubi_get_device(ubi_num);
+ if (!ubi)
+ return -ENODEV;
+-
+- di->ubi_num = ubi->ubi_num;
+- di->leb_size = ubi->leb_size;
+- di->min_io_size = ubi->min_io_size;
+- di->ro_mode = ubi->ro_mode;
+- di->cdev = ubi->cdev.dev;
+-
++ ubi_do_get_device_info(ubi, di);
+ ubi_put_device(ubi);
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(ubi_get_device_info);
+
+ /**
+- * ubi_get_volume_info - get information about UBI volume.
+- * @desc: volume descriptor
++ * ubi_do_get_volume_info - get information about UBI volume.
++ * @ubi: UBI device description object
++ * @vol: volume description object
+ * @vi: the information is stored here
+ */
+-void ubi_get_volume_info(struct ubi_volume_desc *desc,
+- struct ubi_volume_info *vi)
++void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol,
++ struct ubi_volume_info *vi)
+ {
+- const struct ubi_volume *vol = desc->vol;
+- const struct ubi_device *ubi = vol->ubi;
+-
+ vi->vol_id = vol->vol_id;
+ vi->ubi_num = ubi->ubi_num;
+ vi->size = vol->reserved_pebs;
+@@ -79,6 +92,17 @@ void ubi_get_volume_info(struct ubi_volu
+ vi->name = vol->name;
+ vi->cdev = vol->cdev.dev;
+ }
++
++/**
++ * ubi_get_volume_info - get information about UBI volume.
++ * @desc: volume descriptor
++ * @vi: the information is stored here
++ */
++void ubi_get_volume_info(struct ubi_volume_desc *desc,
++ struct ubi_volume_info *vi)
++{
++ ubi_do_get_volume_info(desc->vol->ubi, desc->vol, vi);
++}
+ EXPORT_SYMBOL_GPL(ubi_get_volume_info);
+
+ /**
+@@ -106,7 +130,7 @@ struct ubi_volume_desc *ubi_open_volume(
+ struct ubi_device *ubi;
+ struct ubi_volume *vol;
+
+- dbg_gen("open device %d volume %d, mode %d", ubi_num, vol_id, mode);
++ dbg_gen("open device %d, volume %d, mode %d", ubi_num, vol_id, mode);
+
+ if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES)
+ return ERR_PTR(-EINVAL);
+@@ -196,6 +220,8 @@ out_free:
+ kfree(desc);
+ out_put_ubi:
+ ubi_put_device(ubi);
++ dbg_err("cannot open device %d, volume %d, error %d",
++ ubi_num, vol_id, err);
+ return ERR_PTR(err);
+ }
+ EXPORT_SYMBOL_GPL(ubi_open_volume);
+@@ -215,7 +241,7 @@ struct ubi_volume_desc *ubi_open_volume_
+ struct ubi_device *ubi;
+ struct ubi_volume_desc *ret;
+
+- dbg_gen("open volume %s, mode %d", name, mode);
++ dbg_gen("open device %d, volume %s, mode %d", ubi_num, name, mode);
+
+ if (!name)
+ return ERR_PTR(-EINVAL);
+@@ -258,6 +284,43 @@ struct ubi_volume_desc *ubi_open_volume_
+ EXPORT_SYMBOL_GPL(ubi_open_volume_nm);
+
+ /**
++ * ubi_open_volume_path - open UBI volume by its character device node path.
++ * @pathname: volume character device node path
++ * @mode: open mode
++ *
++ * This function is similar to 'ubi_open_volume()', but opens a volume the path
++ * to its character device node.
++ */
++struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode)
++{
++ int error, ubi_num, vol_id, mod;
++ struct inode *inode;
++ struct path path;
++
++ dbg_gen("open volume %s, mode %d", pathname, mode);
++
++ if (!pathname || !*pathname)
++ return ERR_PTR(-EINVAL);
++
++ error = kern_path(pathname, LOOKUP_FOLLOW, &path);
++ if (error)
++ return ERR_PTR(error);
++
++ inode = path.dentry->d_inode;
++ mod = inode->i_mode;
++ ubi_num = ubi_major2num(imajor(inode));
++ vol_id = iminor(inode) - 1;
++ path_put(&path);
++
++ if (!S_ISCHR(mod))
++ return ERR_PTR(-EINVAL);
++ if (vol_id >= 0 && ubi_num >= 0)
++ return ubi_open_volume(ubi_num, vol_id, mode);
++ return ERR_PTR(-ENODEV);
++}
++EXPORT_SYMBOL_GPL(ubi_open_volume_path);
++
++/**
+ * ubi_close_volume - close UBI volume.
+ * @desc: volume descriptor
+ */
+@@ -266,7 +329,8 @@ void ubi_close_volume(struct ubi_volume_
+ struct ubi_volume *vol = desc->vol;
+ struct ubi_device *ubi = vol->ubi;
+
+- dbg_gen("close volume %d, mode %d", vol->vol_id, desc->mode);
++ dbg_gen("close device %d, volume %d, mode %d",
++ ubi->ubi_num, vol->vol_id, desc->mode);
+
+ spin_lock(&ubi->volumes_lock);
+ switch (desc->mode) {
+@@ -425,7 +489,7 @@ EXPORT_SYMBOL_GPL(ubi_leb_write);
+ *
+ * This function changes the contents of a logical eraseblock atomically. @buf
+ * has to contain new logical eraseblock data, and @len - the length of the
+- * data, which has to be aligned. The length may be shorter then the logical
++ * data, which has to be aligned. The length may be shorter than the logical
+ * eraseblock size, ant the logical eraseblock may be appended to more times
+ * later on. This function guarantees that in case of an unclean reboot the old
+ * contents is preserved. Returns zero in case of success and a negative error
+@@ -508,7 +572,7 @@ EXPORT_SYMBOL_GPL(ubi_leb_erase);
+ *
+ * This function un-maps logical eraseblock @lnum and schedules the
+ * corresponding physical eraseblock for erasure, so that it will eventually be
+- * physically erased in background. This operation is much faster then the
++ * physically erased in background. This operation is much faster than the
+ * erase operation.
+ *
+ * Unlike erase, the un-map operation does not guarantee that the logical
+@@ -527,7 +591,7 @@ EXPORT_SYMBOL_GPL(ubi_leb_erase);
+ *
+ * The main and obvious use-case of this function is when the contents of a
+ * logical eraseblock has to be re-written. Then it is much more efficient to
+- * first un-map it, then write new data, rather then first erase it, then write
++ * first un-map it, then write new data, rather than first erase it, then write
+ * new data. Note, once new data has been written to the logical eraseblock,
+ * UBI guarantees that the old contents has gone forever. In other words, if an
+ * unclean reboot happens after the logical eraseblock has been un-mapped and
+@@ -558,13 +622,13 @@ int ubi_leb_unmap(struct ubi_volume_desc
+ EXPORT_SYMBOL_GPL(ubi_leb_unmap);
+
+ /**
+- * ubi_leb_map - map logical erasblock to a physical eraseblock.
++ * ubi_leb_map - map logical eraseblock to a physical eraseblock.
+ * @desc: volume descriptor
+ * @lnum: logical eraseblock number
+ * @dtype: expected data type
+ *
+ * This function maps an un-mapped logical eraseblock @lnum to a physical
+- * eraseblock. This means, that after a successfull invocation of this
++ * eraseblock. This means, that after a successful invocation of this
+ * function the logical eraseblock @lnum will be empty (contain only %0xFF
+ * bytes) and be mapped to a physical eraseblock, even if an unclean reboot
+ * happens.
+@@ -656,3 +720,59 @@ int ubi_sync(int ubi_num)
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(ubi_sync);
++
++BLOCKING_NOTIFIER_HEAD(ubi_notifiers);
++
++/**
++ * ubi_register_volume_notifier - register a volume notifier.
++ * @nb: the notifier description object
++ * @ignore_existing: if non-zero, do not send "added" notification for all
++ * already existing volumes
++ *
++ * This function registers a volume notifier, which means that
++ * 'nb->notifier_call()' will be invoked when an UBI volume is created,
++ * removed, re-sized, re-named, or updated. The first argument of the function
++ * is the notification type. The second argument is pointer to a
++ * &struct ubi_notification object which describes the notification event.
++ * Using UBI API from the volume notifier is prohibited.
++ *
++ * This function returns zero in case of success and a negative error code
++ * in case of failure.
++ */
++int ubi_register_volume_notifier(struct notifier_block *nb,
++ int ignore_existing)
++{
++ int err;
++
++ err = blocking_notifier_chain_register(&ubi_notifiers, nb);
++ if (err != 0)
++ return err;
++ if (ignore_existing)
++ return 0;
++
++ /*
++ * We are going to walk all UBI devices and all volumes, and
++ * notify the user about existing volumes by the %UBI_VOLUME_ADDED
++ * event. We have to lock the @ubi_devices_mutex to make sure UBI
++ * devices do not disappear.
++ */
++ mutex_lock(&ubi_devices_mutex);
++ ubi_enumerate_volumes(nb);
++ mutex_unlock(&ubi_devices_mutex);
++
++ return err;
++}
++EXPORT_SYMBOL_GPL(ubi_register_volume_notifier);
++
++/**
++ * ubi_unregister_volume_notifier - unregister the volume notifier.
++ * @nb: the notifier description object
++ *
++ * This function unregisters volume notifier @nm and returns zero in case of
++ * success and a negative error code in case of failure.
++ */
++int ubi_unregister_volume_notifier(struct notifier_block *nb)
++{
++ return blocking_notifier_chain_unregister(&ubi_notifiers, nb);
++}
++EXPORT_SYMBOL_GPL(ubi_unregister_volume_notifier);
+diff -uprN linux-2.6.28/drivers/mtd/ubi/Kconfig ubifs-v2.6.28/drivers/mtd/ubi/Kconfig
+--- linux-2.6.28/drivers/mtd/ubi/Kconfig 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/drivers/mtd/ubi/Kconfig 2011-06-15 14:22:07.000000000 -0400
+@@ -1,11 +1,7 @@
+ # drivers/mtd/ubi/Kconfig
+
+-menu "UBI - Unsorted block images"
+- depends on MTD
+-
+-config MTD_UBI
+- tristate "Enable UBI"
+- depends on MTD
++menuconfig MTD_UBI
++ tristate "Enable UBI - Unsorted block images"
+ select CRC32
+ help
+ UBI is a software layer above MTD layer which admits of LVM-like
+@@ -14,11 +10,12 @@ config MTD_UBI
+ capabilities. Please, consult the MTD web site for more details
+ (www.linux-mtd.infradead.org).
+
++if MTD_UBI
++
+ config MTD_UBI_WL_THRESHOLD
+ int "UBI wear-leveling threshold"
+ default 4096
+ range 2 65536
+- depends on MTD_UBI
+ help
+ This parameter defines the maximum difference between the highest
+ erase counter value and the lowest erase counter value of eraseblocks
+@@ -29,14 +26,13 @@ config MTD_UBI_WL_THRESHOLD
+ The default value should be OK for SLC NAND flashes, NOR flashes and
+ other flashes which have eraseblock life-cycle 100000 or more.
+ However, in case of MLC NAND flashes which typically have eraseblock
+- life-cycle less then 10000, the threshold should be lessened (e.g.,
++ life-cycle less than 10000, the threshold should be lessened (e.g.,
+ to 128 or 256, although it does not have to be power of 2).
+
+ config MTD_UBI_BEB_RESERVE
+ int "Percentage of reserved eraseblocks for bad eraseblocks handling"
+ default 1
+ range 0 25
+- depends on MTD_UBI
+ help
+ If the MTD device admits of bad eraseblocks (e.g. NAND flash), UBI
+ reserves some amount of physical eraseblocks to handle new bad
+@@ -49,15 +45,21 @@ config MTD_UBI_BEB_RESERVE
+ reserved. Leave the default value if unsure.
+
+ config MTD_UBI_GLUEBI
+- bool "Emulate MTD devices"
+- default n
+- depends on MTD_UBI
++ tristate "MTD devices emulation driver (gluebi)"
++ help
++ This option enables gluebi - an additional driver which emulates MTD
++ devices on top of UBI volumes: for each UBI volumes an MTD device is
++ created, and all I/O to this MTD device is redirected to the UBI
++ volume. This is handy to make MTD-oriented software (like JFFS2)
++ work on top of UBI. Do not enable this unless you use legacy
++ software.
++
++config MTD_UBI_DEBUG
++ bool "UBI debugging"
++ depends on SYSFS
++ select DEBUG_FS
++ select KALLSYMS
+ help
+- This option enables MTD devices emulation on top of UBI volumes: for
+- each UBI volumes an MTD device is created, and all I/O to this MTD
+- device is redirected to the UBI volume. This is handy to make
+- MTD-oriented software (like JFFS2) work on top of UBI. Do not enable
+- this if no legacy software will be used.
++ This option enables UBI debugging.
+
+-source "drivers/mtd/ubi/Kconfig.debug"
+-endmenu
++endif # MTD_UBI
+diff -uprN linux-2.6.28/drivers/mtd/ubi/Kconfig.debug ubifs-v2.6.28/drivers/mtd/ubi/Kconfig.debug
+--- linux-2.6.28/drivers/mtd/ubi/Kconfig.debug 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/drivers/mtd/ubi/Kconfig.debug 1969-12-31 19:00:00.000000000 -0500
+@@ -1,104 +0,0 @@
+-comment "UBI debugging options"
+- depends on MTD_UBI
+-
+-config MTD_UBI_DEBUG
+- bool "UBI debugging"
+- depends on SYSFS
+- depends on MTD_UBI
+- select DEBUG_FS
+- select KALLSYMS_ALL
+- help
+- This option enables UBI debugging.
+-
+-config MTD_UBI_DEBUG_MSG
+- bool "UBI debugging messages"
+- depends on MTD_UBI_DEBUG
+- default n
+- help
+- This option enables UBI debugging messages.
+-
+-config MTD_UBI_DEBUG_PARANOID
+- bool "Extra self-checks"
+- default n
+- depends on MTD_UBI_DEBUG
+- help
+- This option enables extra checks in UBI code. Note this slows UBI down
+- significantly.
+-
+-config MTD_UBI_DEBUG_DISABLE_BGT
+- bool "Do not enable the UBI background thread"
+- depends on MTD_UBI_DEBUG
+- default n
+- help
+- This option switches the background thread off by default. The thread
+- may be also be enabled/disabled via UBI sysfs.
+-
+-config MTD_UBI_DEBUG_USERSPACE_IO
+- bool "Direct user-space write/erase support"
+- default n
+- depends on MTD_UBI_DEBUG
+- help
+- By default, users cannot directly write and erase individual
+- eraseblocks of dynamic volumes, and have to use update operation
+- instead. This option enables this capability - it is very useful for
+- debugging and testing.
+-
+-config MTD_UBI_DEBUG_EMULATE_BITFLIPS
+- bool "Emulate flash bit-flips"
+- depends on MTD_UBI_DEBUG
+- default n
+- help
+- This option emulates bit-flips with probability 1/50, which in turn
+- causes scrubbing. Useful for debugging and stressing UBI.
+-
+-config MTD_UBI_DEBUG_EMULATE_WRITE_FAILURES
+- bool "Emulate flash write failures"
+- depends on MTD_UBI_DEBUG
+- default n
+- help
+- This option emulates write failures with probability 1/100. Useful for
+- debugging and testing how UBI handlines errors.
+-
+-config MTD_UBI_DEBUG_EMULATE_ERASE_FAILURES
+- bool "Emulate flash erase failures"
+- depends on MTD_UBI_DEBUG
+- default n
+- help
+- This option emulates erase failures with probability 1/100. Useful for
+- debugging and testing how UBI handlines errors.
+-
+-menu "Additional UBI debugging messages"
+- depends on MTD_UBI_DEBUG
+-
+-config MTD_UBI_DEBUG_MSG_BLD
+- bool "Additional UBI initialization and build messages"
+- default n
+- depends on MTD_UBI_DEBUG
+- help
+- This option enables detailed UBI initialization and device build
+- debugging messages.
+-
+-config MTD_UBI_DEBUG_MSG_EBA
+- bool "Eraseblock association unit messages"
+- default n
+- depends on MTD_UBI_DEBUG
+- help
+- This option enables debugging messages from the UBI eraseblock
+- association unit.
+-
+-config MTD_UBI_DEBUG_MSG_WL
+- bool "Wear-leveling unit messages"
+- default n
+- depends on MTD_UBI_DEBUG
+- help
+- This option enables debugging messages from the UBI wear-leveling
+- unit.
+-
+-config MTD_UBI_DEBUG_MSG_IO
+- bool "Input/output unit messages"
+- default n
+- depends on MTD_UBI_DEBUG
+- help
+- This option enables debugging messages from the UBI input/output unit.
+-
+-endmenu # UBI debugging messages
+diff -uprN linux-2.6.28/drivers/mtd/ubi/Makefile ubifs-v2.6.28/drivers/mtd/ubi/Makefile
+--- linux-2.6.28/drivers/mtd/ubi/Makefile 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/drivers/mtd/ubi/Makefile 2011-06-15 14:22:07.000000000 -0400
+@@ -4,4 +4,4 @@ ubi-y += vtbl.o vmt.o upd.o build.o cdev
+ ubi-y += misc.o
+
+ ubi-$(CONFIG_MTD_UBI_DEBUG) += debug.o
+-ubi-$(CONFIG_MTD_UBI_GLUEBI) += gluebi.o
++obj-$(CONFIG_MTD_UBI_GLUEBI) += gluebi.o
+diff -uprN linux-2.6.28/drivers/mtd/ubi/misc.c ubifs-v2.6.28/drivers/mtd/ubi/misc.c
+--- linux-2.6.28/drivers/mtd/ubi/misc.c 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/drivers/mtd/ubi/misc.c 2011-06-15 14:22:07.000000000 -0400
+@@ -103,3 +103,22 @@ void ubi_calculate_reserved(struct ubi_d
+ if (ubi->beb_rsvd_level < MIN_RESEVED_PEBS)
+ ubi->beb_rsvd_level = MIN_RESEVED_PEBS;
+ }
++
++/**
++ * ubi_check_pattern - check if buffer contains only a certain byte pattern.
++ * @buf: buffer to check
++ * @patt: the pattern to check
++ * @size: buffer size in bytes
++ *
++ * This function returns %1 in there are only @patt bytes in @buf, and %0 if
++ * something else was also found.
++ */
++int ubi_check_pattern(const void *buf, uint8_t patt, int size)
++{
++ int i;
++
++ for (i = 0; i < size; i++)
++ if (((const uint8_t *)buf)[i] != patt)
++ return 0;
++ return 1;
++}
+diff -uprN linux-2.6.28/drivers/mtd/ubi/scan.c ubifs-v2.6.28/drivers/mtd/ubi/scan.c
+--- linux-2.6.28/drivers/mtd/ubi/scan.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/drivers/mtd/ubi/scan.c 2011-06-15 14:22:07.000000000 -0400
+@@ -29,7 +29,7 @@
+ * objects which are kept in volume RB-tree with root at the @volumes field.
+ * The RB-tree is indexed by the volume ID.
+ *
+- * Found logical eraseblocks are represented by &struct ubi_scan_leb objects.
++ * Scanned logical eraseblocks are represented by &struct ubi_scan_leb objects.
+ * These objects are kept in per-volume RB-trees with the root at the
+ * corresponding &struct ubi_scan_volume object. To put it differently, we keep
+ * an RB-tree of per-volume objects and each of these objects is the root of
+@@ -38,14 +38,56 @@
+ * Corrupted physical eraseblocks are put to the @corr list, free physical
+ * eraseblocks are put to the @free list and the physical eraseblock to be
+ * erased are put to the @erase list.
++ *
++ * About corruptions
++ * ~~~~~~~~~~~~~~~~~
++ *
++ * UBI protects EC and VID headers with CRC-32 checksums, so it can detect
++ * whether the headers are corrupted or not. Sometimes UBI also protects the
++ * data with CRC-32, e.g., when it executes the atomic LEB change operation, or
++ * when it moves the contents of a PEB for wear-leveling purposes.
++ *
++ * UBI tries to distinguish between 2 types of corruptions.
++ *
++ * 1. Corruptions caused by power cuts. These are expected corruptions and UBI
++ * tries to handle them gracefully, without printing too many warnings and
++ * error messages. The idea is that we do not lose important data in these case
++ * - we may lose only the data which was being written to the media just before
++ * the power cut happened, and the upper layers (e.g., UBIFS) are supposed to
++ * handle such data losses (e.g., by using the FS journal).
++ *
++ * When UBI detects a corruption (CRC-32 mismatch) in a PEB, and it looks like
++ * the reason is a power cut, UBI puts this PEB to the @erase list, and all
++ * PEBs in the @erase list are scheduled for erasure later.
++ *
++ * 2. Unexpected corruptions which are not caused by power cuts. During
++ * scanning, such PEBs are put to the @corr list and UBI preserves them.
++ * Obviously, this lessens the amount of available PEBs, and if at some point
++ * UBI runs out of free PEBs, it switches to R/O mode. UBI also loudly informs
++ * about such PEBs every time the MTD device is attached.
++ *
++ * However, it is difficult to reliably distinguish between these types of
++ * corruptions and UBI's strategy is as follows. UBI assumes corruption type 2
++ * if the VID header is corrupted and the data area does not contain all 0xFFs,
++ * and there were no bit-flips or integrity errors while reading the data area.
++ * Otherwise UBI assumes corruption type 1. So the decision criteria are as
++ * follows.
++ * o If the data area contains only 0xFFs, there is no data, and it is safe
++ * to just erase this PEB - this is corruption type 1.
++ * o If the data area has bit-flips or data integrity errors (ECC errors on
++ * NAND), it is probably a PEB which was being erased when power cut
++ * happened, so this is corruption type 1. However, this is just a guess,
++ * which might be wrong.
++ * o Otherwise this it corruption type 2.
+ */
+
+ #include <linux/err.h>
+ #include <linux/crc32.h>
+-#include <asm/div64.h>
++#include <linux/math64.h>
++#include <linux/random.h>
+ #include "ubi.h"
+
+-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
++#ifdef CONFIG_MTD_UBI_DEBUG
+ static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si);
+ #else
+ #define paranoid_check_si(ubi, si) 0
+@@ -60,35 +102,69 @@ static struct ubi_vid_hdr *vidh;
+ * @si: scanning information
+ * @pnum: physical eraseblock number to add
+ * @ec: erase counter of the physical eraseblock
++ * @to_head: if not zero, add to the head of the list
+ * @list: the list to add to
+ *
+- * This function adds physical eraseblock @pnum to free, erase, corrupted or
+- * alien lists. Returns zero in case of success and a negative error code in
+- * case of failure.
++ * This function adds physical eraseblock @pnum to free, erase, or alien lists.
++ * If @to_head is not zero, PEB will be added to the head of the list, which
++ * basically means it will be processed first later. E.g., we add corrupted
++ * PEBs (corrupted due to power cuts) to the head of the erase list to make
++ * sure we erase them first and get rid of corruptions ASAP. This function
++ * returns zero in case of success and a negative error code in case of
++ * failure.
+ */
+-static int add_to_list(struct ubi_scan_info *si, int pnum, int ec,
++static int add_to_list(struct ubi_scan_info *si, int pnum, int ec, int to_head,
+ struct list_head *list)
+ {
+ struct ubi_scan_leb *seb;
+
+- if (list == &si->free)
++ if (list == &si->free) {
+ dbg_bld("add to free: PEB %d, EC %d", pnum, ec);
+- else if (list == &si->erase)
++ } else if (list == &si->erase) {
+ dbg_bld("add to erase: PEB %d, EC %d", pnum, ec);
+- else if (list == &si->corr)
+- dbg_bld("add to corrupted: PEB %d, EC %d", pnum, ec);
+- else if (list == &si->alien)
++ } else if (list == &si->alien) {
+ dbg_bld("add to alien: PEB %d, EC %d", pnum, ec);
+- else
++ si->alien_peb_count += 1;
++ } else
+ BUG();
+
+- seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL);
++ seb = kmem_cache_alloc(si->scan_leb_slab, GFP_KERNEL);
++ if (!seb)
++ return -ENOMEM;
++
++ seb->pnum = pnum;
++ seb->ec = ec;
++ if (to_head)
++ list_add(&seb->u.list, list);
++ else
++ list_add_tail(&seb->u.list, list);
++ return 0;
++}
++
++/**
++ * add_corrupted - add a corrupted physical eraseblock.
++ * @si: scanning information
++ * @pnum: physical eraseblock number to add
++ * @ec: erase counter of the physical eraseblock
++ *
++ * This function adds corrupted physical eraseblock @pnum to the 'corr' list.
++ * The corruption was presumably not caused by a power cut. Returns zero in
++ * case of success and a negative error code in case of failure.
++ */
++static int add_corrupted(struct ubi_scan_info *si, int pnum, int ec)
++{
++ struct ubi_scan_leb *seb;
++
++ dbg_bld("add to corrupted: PEB %d, EC %d", pnum, ec);
++
++ seb = kmem_cache_alloc(si->scan_leb_slab, GFP_KERNEL);
+ if (!seb)
+ return -ENOMEM;
+
++ si->corr_peb_count += 1;
+ seb->pnum = pnum;
+ seb->ec = ec;
+- list_add_tail(&seb->u.list, list);
++ list_add(&seb->u.list, &si->corr);
+ return 0;
+ }
+
+@@ -229,7 +305,7 @@ static struct ubi_scan_volume *add_volum
+ * case of success this function returns a positive value, in case of failure, a
+ * negative error code is returned. The success return codes use the following
+ * bits:
+- * o bit 0 is cleared: the first PEB (described by @seb) is newer then the
++ * o bit 0 is cleared: the first PEB (described by @seb) is newer than the
+ * second PEB (described by @pnum and @vid_hdr);
+ * o bit 0 is set: the second PEB is newer;
+ * o bit 1 is cleared: no bit-flips were detected in the newer LEB;
+@@ -252,8 +328,8 @@ static int compare_lebs(struct ubi_devic
+ * created before sequence numbers support has been added. At
+ * that times we used 32-bit LEB versions stored in logical
+ * eraseblocks. That was before UBI got into mainline. We do not
+- * support these images anymore. Well, those images will work
+- * still work, but only if no unclean reboots happened.
++ * support these images anymore. Well, those images still work,
++ * but only if no unclean reboots happened.
+ */
+ ubi_err("unsupported on-flash UBI format\n");
+ return -EINVAL;
+@@ -279,19 +355,25 @@ static int compare_lebs(struct ubi_devic
+ return 1;
+ }
+ } else {
+- pnum = seb->pnum;
++ if (!seb->copy_flag) {
++ /* It is not a copy, so it is newer */
++ dbg_bld("first PEB %d is newer, copy_flag is unset",
++ pnum);
++ return bitflips << 1;
++ }
+
+ vh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL);
+ if (!vh)
+ return -ENOMEM;
+
++ pnum = seb->pnum;
+ err = ubi_io_read_vid_hdr(ubi, pnum, vh, 0);
+ if (err) {
+ if (err == UBI_IO_BITFLIPS)
+ bitflips = 1;
+ else {
+ dbg_err("VID of PEB %d header is bad, but it "
+- "was OK earlier", pnum);
++ "was OK earlier, err %d", pnum, err);
+ if (err > 0)
+ err = -EIO;
+
+@@ -299,14 +381,6 @@ static int compare_lebs(struct ubi_devic
+ }
+ }
+
+- if (!vh->copy_flag) {
+- /* It is not a copy, so it is newer */
+- dbg_bld("first PEB %d is newer, copy_flag is unset",
+- pnum);
+- err = bitflips << 1;
+- goto out_free_vidh;
+- }
+-
+ vid_hdr = vh;
+ }
+
+@@ -450,25 +524,22 @@ int ubi_scan_add_used(struct ubi_device
+
+ if (cmp_res & 1) {
+ /*
+- * This logical eraseblock is newer then the one
++ * This logical eraseblock is newer than the one
+ * found earlier.
+ */
+ err = validate_vid_hdr(vid_hdr, sv, pnum);
+ if (err)
+ return err;
+
+- if (cmp_res & 4)
+- err = add_to_list(si, seb->pnum, seb->ec,
+- &si->corr);
+- else
+- err = add_to_list(si, seb->pnum, seb->ec,
+- &si->erase);
++ err = add_to_list(si, seb->pnum, seb->ec, cmp_res & 4,
++ &si->erase);
+ if (err)
+ return err;
+
+ seb->ec = ec;
+ seb->pnum = pnum;
+ seb->scrub = ((cmp_res & 2) || bitflips);
++ seb->copy_flag = vid_hdr->copy_flag;
+ seb->sqnum = sqnum;
+
+ if (sv->highest_lnum == lnum)
+@@ -478,13 +549,11 @@ int ubi_scan_add_used(struct ubi_device
+ return 0;
+ } else {
+ /*
+- * This logical eraseblock is older then the one found
++ * This logical eraseblock is older than the one found
+ * previously.
+ */
+- if (cmp_res & 4)
+- return add_to_list(si, pnum, ec, &si->corr);
+- else
+- return add_to_list(si, pnum, ec, &si->erase);
++ return add_to_list(si, pnum, ec, cmp_res & 4,
++ &si->erase);
+ }
+ }
+
+@@ -497,15 +566,16 @@ int ubi_scan_add_used(struct ubi_device
+ if (err)
+ return err;
+
+- seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL);
++ seb = kmem_cache_alloc(si->scan_leb_slab, GFP_KERNEL);
+ if (!seb)
+ return -ENOMEM;
+
+ seb->ec = ec;
+ seb->pnum = pnum;
+ seb->lnum = lnum;
+- seb->sqnum = sqnum;
+ seb->scrub = bitflips;
++ seb->copy_flag = vid_hdr->copy_flag;
++ seb->sqnum = sqnum;
+
+ if (sv->highest_lnum <= lnum) {
+ sv->highest_lnum = lnum;
+@@ -661,8 +731,8 @@ out_free:
+ struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi,
+ struct ubi_scan_info *si)
+ {
+- int err = 0, i;
+- struct ubi_scan_leb *seb;
++ int err = 0;
++ struct ubi_scan_leb *seb, *tmp_seb;
+
+ if (!list_empty(&si->free)) {
+ seb = list_entry(si->free.next, struct ubi_scan_leb, u.list);
+@@ -671,38 +741,88 @@ struct ubi_scan_leb *ubi_scan_get_free_p
+ return seb;
+ }
+
+- for (i = 0; i < 2; i++) {
+- struct list_head *head;
+- struct ubi_scan_leb *tmp_seb;
++ /*
++ * We try to erase the first physical eraseblock from the erase list
++ * and pick it if we succeed, or try to erase the next one if not. And
++ * so forth. We don't want to take care about bad eraseblocks here -
++ * they'll be handled later.
++ */
++ list_for_each_entry_safe(seb, tmp_seb, &si->erase, u.list) {
++ if (seb->ec == UBI_SCAN_UNKNOWN_EC)
++ seb->ec = si->mean_ec;
+
+- if (i == 0)
+- head = &si->erase;
+- else
+- head = &si->corr;
++ err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1);
++ if (err)
++ continue;
+
++ seb->ec += 1;
++ list_del(&seb->u.list);
++ dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec);
++ return seb;
++ }
++
++ ubi_err("no free eraseblocks");
++ return ERR_PTR(-ENOSPC);
++}
++
++/**
++ * check_corruption - check the data area of PEB.
++ * @ubi: UBI device description object
++ * @vid_hrd: the (corrupted) VID header of this PEB
++ * @pnum: the physical eraseblock number to check
++ *
++ * This is a helper function which is used to distinguish between VID header
++ * corruptions caused by power cuts and other reasons. If the PEB contains only
++ * 0xFF bytes in the data area, the VID header is most probably corrupted
++ * because of a power cut (%0 is returned in this case). Otherwise, it was
++ * probably corrupted for some other reasons (%1 is returned in this case). A
++ * negative error code is returned if a read error occurred.
++ *
++ * If the corruption reason was a power cut, UBI can safely erase this PEB.
++ * Otherwise, it should preserve it to avoid possibly destroying important
++ * information.
++ */
++static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr,
++ int pnum)
++{
++ int err;
++
++ mutex_lock(&ubi->buf_mutex);
++ memset(ubi->peb_buf1, 0x00, ubi->leb_size);
++
++ err = ubi_io_read(ubi, ubi->peb_buf1, pnum, ubi->leb_start,
++ ubi->leb_size);
++ if (err == UBI_IO_BITFLIPS || err == -EBADMSG) {
+ /*
+- * We try to erase the first physical eraseblock from the @head
+- * list and pick it if we succeed, or try to erase the
+- * next one if not. And so forth. We don't want to take care
+- * about bad eraseblocks here - they'll be handled later.
++ * Bit-flips or integrity errors while reading the data area.
++ * It is difficult to say for sure what type of corruption is
++ * this, but presumably a power cut happened while this PEB was
++ * erased, so it became unstable and corrupted, and should be
++ * erased.
+ */
+- list_for_each_entry_safe(seb, tmp_seb, head, u.list) {
+- if (seb->ec == UBI_SCAN_UNKNOWN_EC)
+- seb->ec = si->mean_ec;
++ err = 0;
++ goto out_unlock;
++ }
+
+- err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1);
+- if (err)
+- continue;
++ if (err)
++ goto out_unlock;
+
+- seb->ec += 1;
+- list_del(&seb->u.list);
+- dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec);
+- return seb;
+- }
+- }
++ if (ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->leb_size))
++ goto out_unlock;
+
+- ubi_err("no eraseblocks found");
+- return ERR_PTR(-ENOSPC);
++ ubi_err("PEB %d contains corrupted VID header, and the data does not "
++ "contain all 0xFF, this may be a non-UBI PEB or a severe VID "
++ "header corruption which requires manual inspection", pnum);
++ ubi_dbg_dump_vid_hdr(vid_hdr);
++ dbg_msg("hexdump of PEB %d offset %d, length %d",
++ pnum, ubi->leb_start, ubi->leb_size);
++ ubi_dbg_print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
++ ubi->peb_buf1, ubi->leb_size, 1);
++ err = 1;
++
++out_unlock:
++ mutex_unlock(&ubi->buf_mutex);
++ return err;
+ }
+
+ /**
+@@ -718,7 +838,7 @@ static int process_eb(struct ubi_device
+ int pnum)
+ {
+ long long uninitialized_var(ec);
+- int err, bitflips = 0, vol_id, ec_corr = 0;
++ int err, bitflips = 0, vol_id, ec_err = 0;
+
+ dbg_bld("scan PEB %d", pnum);
+
+@@ -739,24 +859,39 @@ static int process_eb(struct ubi_device
+ err = ubi_io_read_ec_hdr(ubi, pnum, ech, 0);
+ if (err < 0)
+ return err;
+- else if (err == UBI_IO_BITFLIPS)
++ switch (err) {
++ case 0:
++ break;
++ case UBI_IO_BITFLIPS:
+ bitflips = 1;
+- else if (err == UBI_IO_PEB_EMPTY)
+- return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, &si->erase);
+- else if (err == UBI_IO_BAD_EC_HDR) {
++ break;
++ case UBI_IO_FF:
++ si->empty_peb_count += 1;
++ return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, 0,
++ &si->erase);
++ case UBI_IO_FF_BITFLIPS:
++ si->empty_peb_count += 1;
++ return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, 1,
++ &si->erase);
++ case UBI_IO_BAD_HDR_EBADMSG:
++ case UBI_IO_BAD_HDR:
+ /*
+ * We have to also look at the VID header, possibly it is not
+ * corrupted. Set %bitflips flag in order to make this PEB be
+ * moved and EC be re-created.
+ */
+- ec_corr = 1;
++ ec_err = err;
+ ec = UBI_SCAN_UNKNOWN_EC;
+ bitflips = 1;
++ break;
++ default:
++ ubi_err("'ubi_io_read_ec_hdr()' returned unknown code %d", err);
++ return -EINVAL;
+ }
+
+- si->is_empty = 0;
++ if (!ec_err) {
++ int image_seq;
+
+- if (!ec_corr) {
+ /* Make sure UBI version is OK */
+ if (ech->version != UBI_VERSION) {
+ ubi_err("this UBI version is %d, image version is %d",
+@@ -778,6 +913,28 @@ static int process_eb(struct ubi_device
+ ubi_dbg_dump_ec_hdr(ech);
+ return -EINVAL;
+ }
++
++ /*
++ * Make sure that all PEBs have the same image sequence number.
++ * This allows us to detect situations when users flash UBI
++ * images incorrectly, so that the flash has the new UBI image
++ * and leftovers from the old one. This feature was added
++ * relatively recently, and the sequence number was always
++ * zero, because old UBI implementations always set it to zero.
++ * For this reasons, we do not panic if some PEBs have zero
++ * sequence number, while other PEBs have non-zero sequence
++ * number.
++ */
++ image_seq = be32_to_cpu(ech->image_seq);
++ if (!ubi->image_seq && image_seq)
++ ubi->image_seq = image_seq;
++ if (ubi->image_seq && image_seq &&
++ ubi->image_seq != image_seq) {
++ ubi_err("bad image sequence number %d in PEB %d, "
++ "expected %d", image_seq, pnum, ubi->image_seq);
++ ubi_dbg_dump_ec_hdr(ech);
++ return -EINVAL;
++ }
+ }
+
+ /* OK, we've done with the EC header, let's look at the VID header */
+@@ -785,21 +942,71 @@ static int process_eb(struct ubi_device
+ err = ubi_io_read_vid_hdr(ubi, pnum, vidh, 0);
+ if (err < 0)
+ return err;
+- else if (err == UBI_IO_BITFLIPS)
++ switch (err) {
++ case 0:
++ break;
++ case UBI_IO_BITFLIPS:
+ bitflips = 1;
+- else if (err == UBI_IO_BAD_VID_HDR ||
+- (err == UBI_IO_PEB_FREE && ec_corr)) {
+- /* VID header is corrupted */
+- err = add_to_list(si, pnum, ec, &si->corr);
++ break;
++ case UBI_IO_BAD_HDR_EBADMSG:
++ if (ec_err == UBI_IO_BAD_HDR_EBADMSG)
++ /*
++ * Both EC and VID headers are corrupted and were read
++ * with data integrity error, probably this is a bad
++ * PEB, bit it is not marked as bad yet. This may also
++ * be a result of power cut during erasure.
++ */
++ si->maybe_bad_peb_count += 1;
++ case UBI_IO_BAD_HDR:
++ if (ec_err)
++ /*
++ * Both headers are corrupted. There is a possibility
++ * that this a valid UBI PEB which has corresponding
++ * LEB, but the headers are corrupted. However, it is
++ * impossible to distinguish it from a PEB which just
++ * contains garbage because of a power cut during erase
++ * operation. So we just schedule this PEB for erasure.
++ *
++ * Besides, in case of NOR flash, we deliberatly
++ * corrupt both headers because NOR flash erasure is
++ * slow and can start from the end.
++ */
++ err = 0;
++ else
++ /*
++ * The EC was OK, but the VID header is corrupted. We
++ * have to check what is in the data area.
++ */
++ err = check_corruption(ubi, vidh, pnum);
++
++ if (err < 0)
++ return err;
++ else if (!err)
++ /* This corruption is caused by a power cut */
++ err = add_to_list(si, pnum, ec, 1, &si->erase);
++ else
++ /* This is an unexpected corruption */
++ err = add_corrupted(si, pnum, ec);
+ if (err)
+ return err;
+ goto adjust_mean_ec;
+- } else if (err == UBI_IO_PEB_FREE) {
+- /* No VID header - the physical eraseblock is free */
+- err = add_to_list(si, pnum, ec, &si->free);
++ case UBI_IO_FF_BITFLIPS:
++ err = add_to_list(si, pnum, ec, 1, &si->erase);
+ if (err)
+ return err;
+ goto adjust_mean_ec;
++ case UBI_IO_FF:
++ if (ec_err)
++ err = add_to_list(si, pnum, ec, 1, &si->erase);
++ else
++ err = add_to_list(si, pnum, ec, 0, &si->free);
++ if (err)
++ return err;
++ goto adjust_mean_ec;
++ default:
++ ubi_err("'ubi_io_read_vid_hdr()' returned unknown code %d",
++ err);
++ return -EINVAL;
+ }
+
+ vol_id = be32_to_cpu(vidh->vol_id);
+@@ -810,11 +1017,11 @@ static int process_eb(struct ubi_device
+ switch (vidh->compat) {
+ case UBI_COMPAT_DELETE:
+ ubi_msg("\"delete\" compatible internal volume %d:%d"
+- " found, remove it", vol_id, lnum);
+- err = add_to_list(si, pnum, ec, &si->corr);
++ " found, will remove it", vol_id, lnum);
++ err = add_to_list(si, pnum, ec, 1, &si->erase);
+ if (err)
+ return err;
+- break;
++ return 0;
+
+ case UBI_COMPAT_RO:
+ ubi_msg("read-only compatible internal volume %d:%d"
+@@ -826,10 +1033,9 @@ static int process_eb(struct ubi_device
+ case UBI_COMPAT_PRESERVE:
+ ubi_msg("\"preserve\" compatible internal volume %d:%d"
+ " found", vol_id, lnum);
+- err = add_to_list(si, pnum, ec, &si->alien);
++ err = add_to_list(si, pnum, ec, 0, &si->alien);
+ if (err)
+ return err;
+- si->alien_peb_count += 1;
+ return 0;
+
+ case UBI_COMPAT_REJECT:
+@@ -839,13 +1045,15 @@ static int process_eb(struct ubi_device
+ }
+ }
+
+- /* Both UBI headers seem to be fine */
++ if (ec_err)
++ ubi_warn("valid VID header but corrupted EC header at PEB %d",
++ pnum);
+ err = ubi_scan_add_used(ubi, si, pnum, ec, vidh, bitflips);
+ if (err)
+ return err;
+
+ adjust_mean_ec:
+- if (!ec_corr) {
++ if (!ec_err) {
+ si->ec_sum += ec;
+ si->ec_count += 1;
+ if (ec > si->max_ec)
+@@ -858,6 +1066,80 @@ adjust_mean_ec:
+ }
+
+ /**
++ * check_what_we_have - check what PEB were found by scanning.
++ * @ubi: UBI device description object
++ * @si: scanning information
++ *
++ * This is a helper function which takes a look what PEBs were found by
++ * scanning, and decides whether the flash is empty and should be formatted and
++ * whether there are too many corrupted PEBs and we should not attach this
++ * MTD device. Returns zero if we should proceed with attaching the MTD device,
++ * and %-EINVAL if we should not.
++ */
++static int check_what_we_have(struct ubi_device *ubi, struct ubi_scan_info *si)
++{
++ struct ubi_scan_leb *seb;
++ int max_corr, peb_count;
++
++ peb_count = ubi->peb_count - si->bad_peb_count - si->alien_peb_count;
++ max_corr = peb_count / 20 ?: 8;
++
++ /*
++ * Few corrupted PEBs is not a problem and may be just a result of
++ * unclean reboots. However, many of them may indicate some problems
++ * with the flash HW or driver.
++ */
++ if (si->corr_peb_count) {
++ ubi_err("%d PEBs are corrupted and preserved",
++ si->corr_peb_count);
++ printk(KERN_ERR "Corrupted PEBs are:");
++ list_for_each_entry(seb, &si->corr, u.list)
++ printk(KERN_CONT " %d", seb->pnum);
++ printk(KERN_CONT "\n");
++
++ /*
++ * If too many PEBs are corrupted, we refuse attaching,
++ * otherwise, only print a warning.
++ */
++ if (si->corr_peb_count >= max_corr) {
++ ubi_err("too many corrupted PEBs, refusing");
++ return -EINVAL;
++ }
++ }
++
++ if (si->empty_peb_count + si->maybe_bad_peb_count == peb_count) {
++ /*
++ * All PEBs are empty, or almost all - a couple PEBs look like
++ * they may be bad PEBs which were not marked as bad yet.
++ *
++ * This piece of code basically tries to distinguish between
++ * the following situations:
++ *
++ * 1. Flash is empty, but there are few bad PEBs, which are not
++ * marked as bad so far, and which were read with error. We
++ * want to go ahead and format this flash. While formatting,
++ * the faulty PEBs will probably be marked as bad.
++ *
++ * 2. Flash contains non-UBI data and we do not want to format
++ * it and destroy possibly important information.
++ */
++ if (si->maybe_bad_peb_count <= 2) {
++ si->is_empty = 1;
++ ubi_msg("empty MTD device detected");
++ get_random_bytes(&ubi->image_seq,
++ sizeof(ubi->image_seq));
++ } else {
++ ubi_err("MTD device is not UBI-formatted and possibly "
++ "contains non-UBI data - refusing it");
++ return -EINVAL;
++ }
++
++ }
++
++ return 0;
++}
++
++/**
+ * ubi_scan - scan an MTD device.
+ * @ubi: UBI device description object
+ *
+@@ -881,12 +1163,17 @@ struct ubi_scan_info *ubi_scan(struct ub
+ INIT_LIST_HEAD(&si->erase);
+ INIT_LIST_HEAD(&si->alien);
+ si->volumes = RB_ROOT;
+- si->is_empty = 1;
+
+ err = -ENOMEM;
++ si->scan_leb_slab = kmem_cache_create("ubi_scan_leb_slab",
++ sizeof(struct ubi_scan_leb),
++ 0, 0, NULL);
++ if (!si->scan_leb_slab)
++ goto out_si;
++
+ ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL);
+ if (!ech)
+- goto out_si;
++ goto out_slab;
+
+ vidh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL);
+ if (!vidh)
+@@ -904,15 +1191,12 @@ struct ubi_scan_info *ubi_scan(struct ub
+ dbg_msg("scanning is finished");
+
+ /* Calculate mean erase counter */
+- if (si->ec_count) {
+- do_div(si->ec_sum, si->ec_count);
+- si->mean_ec = si->ec_sum;
+- }
++ if (si->ec_count)
++ si->mean_ec = div_u64(si->ec_sum, si->ec_count);
+
+- if (si->is_empty)
+- ubi_msg("empty MTD device detected");
+-
+- ubi->image_seq_set = 1;
++ err = check_what_we_have(ubi, si);
++ if (err)
++ goto out_vidh;
+
+ /*
+ * In case of unknown erase counter we use the mean erase counter
+@@ -938,11 +1222,8 @@ struct ubi_scan_info *ubi_scan(struct ub
+ seb->ec = si->mean_ec;
+
+ err = paranoid_check_si(ubi, si);
+- if (err) {
+- if (err > 0)
+- err = -EINVAL;
++ if (err)
+ goto out_vidh;
+- }
+
+ ubi_free_vid_hdr(ubi, vidh);
+ kfree(ech);
+@@ -953,6 +1234,8 @@ out_vidh:
+ ubi_free_vid_hdr(ubi, vidh);
+ out_ech:
+ kfree(ech);
++out_slab:
++ kmem_cache_destroy(si->scan_leb_slab);
+ out_si:
+ ubi_scan_destroy_si(si);
+ return ERR_PTR(err);
+@@ -961,11 +1244,12 @@ out_si:
+ /**
+ * destroy_sv - free the scanning volume information
+ * @sv: scanning volume information
++ * @si: scanning information
+ *
+ * This function destroys the volume RB-tree (@sv->root) and the scanning
+ * volume information.
+ */
+-static void destroy_sv(struct ubi_scan_volume *sv)
++static void destroy_sv(struct ubi_scan_info *si, struct ubi_scan_volume *sv)
+ {
+ struct ubi_scan_leb *seb;
+ struct rb_node *this = sv->root.rb_node;
+@@ -985,7 +1269,7 @@ static void destroy_sv(struct ubi_scan_v
+ this->rb_right = NULL;
+ }
+
+- kfree(seb);
++ kmem_cache_free(si->scan_leb_slab, seb);
+ }
+ }
+ kfree(sv);
+@@ -1003,19 +1287,19 @@ void ubi_scan_destroy_si(struct ubi_scan
+
+ list_for_each_entry_safe(seb, seb_tmp, &si->alien, u.list) {
+ list_del(&seb->u.list);
+- kfree(seb);
++ kmem_cache_free(si->scan_leb_slab, seb);
+ }
+ list_for_each_entry_safe(seb, seb_tmp, &si->erase, u.list) {
+ list_del(&seb->u.list);
+- kfree(seb);
++ kmem_cache_free(si->scan_leb_slab, seb);
+ }
+ list_for_each_entry_safe(seb, seb_tmp, &si->corr, u.list) {
+ list_del(&seb->u.list);
+- kfree(seb);
++ kmem_cache_free(si->scan_leb_slab, seb);
+ }
+ list_for_each_entry_safe(seb, seb_tmp, &si->free, u.list) {
+ list_del(&seb->u.list);
+- kfree(seb);
++ kmem_cache_free(si->scan_leb_slab, seb);
+ }
+
+ /* Destroy the volume RB-tree */
+@@ -1036,22 +1320,23 @@ void ubi_scan_destroy_si(struct ubi_scan
+ rb->rb_right = NULL;
+ }
+
+- destroy_sv(sv);
++ destroy_sv(si, sv);
+ }
+ }
+
++ kmem_cache_destroy(si->scan_leb_slab);
+ kfree(si);
+ }
+
+-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
++#ifdef CONFIG_MTD_UBI_DEBUG
+
+ /**
+ * paranoid_check_si - check the scanning information.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ *
+- * This function returns zero if the scanning information is all right, %1 if
+- * not and a negative error code if an error occurred.
++ * This function returns zero if the scanning information is all right, and a
++ * negative error code if not or if an error occurred.
+ */
+ static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si)
+ {
+@@ -1061,6 +1346,9 @@ static int paranoid_check_si(struct ubi_
+ struct ubi_scan_leb *seb, *last_seb;
+ uint8_t *buf;
+
++ if (!(ubi_chk_flags & UBI_CHK_GEN))
++ return 0;
++
+ /*
+ * At first, check that scanning information is OK.
+ */
+@@ -1310,7 +1598,7 @@ bad_vid_hdr:
+
+ out:
+ ubi_dbg_dump_stack();
+- return 1;
++ return -EINVAL;
+ }
+
+-#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */
++#endif /* CONFIG_MTD_UBI_DEBUG */
+diff -uprN linux-2.6.28/drivers/mtd/ubi/scan.h ubifs-v2.6.28/drivers/mtd/ubi/scan.h
+--- linux-2.6.28/drivers/mtd/ubi/scan.h 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/drivers/mtd/ubi/scan.h 2011-06-15 14:22:07.000000000 -0400
+@@ -30,6 +30,7 @@
+ * @pnum: physical eraseblock number
+ * @lnum: logical eraseblock number
+ * @scrub: if this physical eraseblock needs scrubbing
++ * @copy_flag: this LEB is a copy (@copy_flag is set in VID header of this LEB)
+ * @sqnum: sequence number
+ * @u: unions RB-tree or @list links
+ * @u.rb: link in the per-volume RB-tree of &struct ubi_scan_leb objects
+@@ -42,7 +43,8 @@ struct ubi_scan_leb {
+ int ec;
+ int pnum;
+ int lnum;
+- int scrub;
++ unsigned int scrub:1;
++ unsigned int copy_flag:1;
+ unsigned long long sqnum;
+ union {
+ struct rb_node rb;
+@@ -91,10 +93,15 @@ struct ubi_scan_volume {
+ * @erase: list of physical eraseblocks which have to be erased
+ * @alien: list of physical eraseblocks which should not be used by UBI (e.g.,
+ * those belonging to "preserve"-compatible internal volumes)
++ * @corr_peb_count: count of PEBs in the @corr list
++ * @empty_peb_count: count of PEBs which are presumably empty (contain only
++ * 0xFF bytes)
++ * @alien_peb_count: count of PEBs in the @alien list
+ * @bad_peb_count: count of bad physical eraseblocks
++ * @maybe_bad_peb_count: count of bad physical eraseblocks which are not marked
++ * as bad yet, but which look like bad
+ * @vols_found: number of volumes found during scanning
+ * @highest_vol_id: highest volume ID
+- * @alien_peb_count: count of physical eraseblocks in the @alien list
+ * @is_empty: flag indicating whether the MTD device is empty or not
+ * @min_ec: lowest erase counter value
+ * @max_ec: highest erase counter value
+@@ -102,6 +109,7 @@ struct ubi_scan_volume {
+ * @mean_ec: mean erase counter value
+ * @ec_sum: a temporary variable used when calculating @mean_ec
+ * @ec_count: a temporary variable used when calculating @mean_ec
++ * @scan_leb_slab: slab cache for &struct ubi_scan_leb objects
+ *
+ * This data structure contains the result of scanning and may be used by other
+ * UBI sub-systems to build final UBI data structures, further error-recovery
+@@ -113,10 +121,13 @@ struct ubi_scan_info {
+ struct list_head free;
+ struct list_head erase;
+ struct list_head alien;
++ int corr_peb_count;
++ int empty_peb_count;
++ int alien_peb_count;
+ int bad_peb_count;
++ int maybe_bad_peb_count;
+ int vols_found;
+ int highest_vol_id;
+- int alien_peb_count;
+ int is_empty;
+ int min_ec;
+ int max_ec;
+@@ -124,6 +135,7 @@ struct ubi_scan_info {
+ int mean_ec;
+ uint64_t ec_sum;
+ int ec_count;
++ struct kmem_cache *scan_leb_slab;
+ };
+
+ struct ubi_device;
+@@ -133,7 +145,7 @@ struct ubi_vid_hdr;
+ * ubi_scan_move_to_list - move a PEB from the volume tree to a list.
+ *
+ * @sv: volume scanning information
+- * @seb: scanning eraseblock infprmation
++ * @seb: scanning eraseblock information
+ * @list: the list to move to
+ */
+ static inline void ubi_scan_move_to_list(struct ubi_scan_volume *sv,
+diff -uprN linux-2.6.28/drivers/mtd/ubi/ubi.h ubifs-v2.6.28/drivers/mtd/ubi/ubi.h
+--- linux-2.6.28/drivers/mtd/ubi/ubi.h 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/drivers/mtd/ubi/ubi.h 2011-06-15 14:22:07.000000000 -0400
+@@ -36,8 +36,10 @@
+ #include <linux/device.h>
+ #include <linux/string.h>
+ #include <linux/vmalloc.h>
++#include <linux/notifier.h>
+ #include <linux/mtd/mtd.h>
+ #include <linux/mtd/ubi.h>
++#include <asm/pgtable.h>
+
+ #include "ubi-media.h"
+ #include "scan.h"
+@@ -83,21 +85,26 @@
+ /*
+ * Error codes returned by the I/O sub-system.
+ *
+- * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only
+- * %0xFF bytes
+- * UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a
+- * valid erase counter header, and the rest are %0xFF bytes
+- * UBI_IO_BAD_EC_HDR: the erase counter header is corrupted (bad magic or CRC)
+- * UBI_IO_BAD_VID_HDR: the volume identifier header is corrupted (bad magic or
+- * CRC)
++ * UBI_IO_FF: the read region of flash contains only 0xFFs
++ * UBI_IO_FF_BITFLIPS: the same as %UBI_IO_FF, but also also there was a data
++ * integrity error reported by the MTD driver
++ * (uncorrectable ECC error in case of NAND)
++ * UBI_IO_BAD_HDR: the EC or VID header is corrupted (bad magic or CRC)
++ * UBI_IO_BAD_HDR_EBADMSG: the same as %UBI_IO_BAD_HDR, but also there was a
++ * data integrity error reported by the MTD driver
++ * (uncorrectable ECC error in case of NAND)
+ * UBI_IO_BITFLIPS: bit-flips were detected and corrected
++ *
++ * Note, it is probably better to have bit-flip and ebadmsg as flags which can
++ * be or'ed with other error code. But this is a big change because there are
++ * may callers, so it does not worth the risk of introducing a bug
+ */
+ enum {
+- UBI_IO_PEB_EMPTY = 1,
+- UBI_IO_PEB_FREE,
+- UBI_IO_BAD_EC_HDR,
+- UBI_IO_BAD_VID_HDR,
+- UBI_IO_BITFLIPS
++ UBI_IO_FF = 1,
++ UBI_IO_FF_BITFLIPS,
++ UBI_IO_BAD_HDR,
++ UBI_IO_BAD_HDR_EBADMSG,
++ UBI_IO_BITFLIPS,
+ };
+
+ /*
+@@ -228,10 +235,7 @@ struct ubi_volume_desc;
+ * @upd_marker: %1 if the update marker is set for this volume
+ * @updating: %1 if the volume is being updated
+ * @changing_leb: %1 if the atomic LEB change ioctl command is in progress
+- *
+- * @gluebi_desc: gluebi UBI volume descriptor
+- * @gluebi_refcount: reference count of the gluebi MTD device
+- * @gluebi_mtd: MTD device description object of the gluebi MTD device
++ * @direct_writes: %1 if direct writes are enabled for this volume
+ *
+ * The @corrupted field indicates that the volume's contents is corrupted.
+ * Since UBI protects only static volumes, this field is not relevant to
+@@ -275,17 +279,7 @@ struct ubi_volume {
+ unsigned int upd_marker:1;
+ unsigned int updating:1;
+ unsigned int changing_leb:1;
+-
+-#ifdef CONFIG_MTD_UBI_GLUEBI
+- /*
+- * Gluebi-related stuff may be compiled out.
+- * Note: this should not be built into UBI but should be a separate
+- * ubimtd driver which works on top of UBI and emulates MTD devices.
+- */
+- struct ubi_volume_desc *gluebi_desc;
+- int gluebi_refcount;
+- struct mtd_info gluebi_mtd;
+-#endif
++ unsigned int direct_writes:1;
+ };
+
+ /**
+@@ -314,7 +308,6 @@ struct ubi_wl_entry;
+ * @vol->ref_count, @vol->mapping and @vol->eba_tbl.
+ * @ref_count: count of references on the UBI device
+ * @image_seq: image sequence number recorded on EC headers
+- * @image_seq_set: indicates @image_seq is known
+ *
+ * @rsvd_pebs: count of reserved physical eraseblocks
+ * @avail_pebs: count of available physical eraseblocks
+@@ -327,8 +320,9 @@ struct ubi_wl_entry;
+ * @vtbl_slots: how many slots are available in the volume table
+ * @vtbl_size: size of the volume table in bytes
+ * @vtbl: in-RAM volume table copy
+- * @volumes_mutex: protects on-flash volume table and serializes volume
+- * changes, like creation, deletion, update, re-size and re-name
++ * @device_mutex: protects on-flash volume table and serializes volume
++ * creation, deletion, update, re-size, re-name and set
++ * property
+ *
+ * @max_ec: current highest erase counter value
+ * @mean_ec: current mean erase counter value
+@@ -346,8 +340,8 @@ struct ubi_wl_entry;
+ * protected from the wear-leveling worker)
+ * @pq_head: protection queue head
+ * @wl_lock: protects the @used, @free, @pq, @pq_head, @lookuptbl, @move_from,
+- * @move_to, @move_to_put @erase_pending, @wl_scheduled, @works,
+- * @erroneous, and @erroneous_peb_count fields
++ * @move_to, @move_to_put @erase_pending, @wl_scheduled, @works,
++ * @erroneous, and @erroneous_peb_count fields
+ * @move_mutex: serializes eraseblock moves
+ * @work_sem: synchronizes the WL worker with use tasks
+ * @wl_scheduled: non-zero if the wear-leveling was scheduled
+@@ -367,6 +361,8 @@ struct ubi_wl_entry;
+ * @peb_size: physical eraseblock size
+ * @bad_peb_count: count of bad physical eraseblocks
+ * @good_peb_count: count of good physical eraseblocks
++ * @corr_peb_count: count of corrupted physical eraseblocks (preserved and not
++ * used by UBI)
+ * @erroneous_peb_count: count of erroneous physical eraseblocks in @erroneous
+ * @max_erroneous: maximum allowed amount of erroneous physical eraseblocks
+ * @min_io_size: minimal input/output unit size of the underlying MTD device
+@@ -384,15 +380,15 @@ struct ubi_wl_entry;
+ * @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset
+ * @bad_allowed: whether the MTD device admits of bad physical eraseblocks or
+ * not
++ * @nor_flash: non-zero if working on top of NOR flash
++ * @max_write_size: maximum amount of bytes the underlying flash can write at a
++ * time (MTD write buffer size)
+ * @mtd: MTD device descriptor
+ *
+ * @peb_buf1: a buffer of PEB size used for different purposes
+ * @peb_buf2: another buffer of PEB size used for different purposes
+ * @buf_mutex: protects @peb_buf1 and @peb_buf2
+ * @ckvol_mutex: serializes static volume checking when opening
+- * @mult_mutex: serializes operations on multiple volumes, like re-naming
+- * @dbg_peb_buf: buffer of PEB size used for debugging
+- * @dbg_buf_mutex: protects @dbg_peb_buf
+ */
+ struct ubi_device {
+ struct cdev cdev;
+@@ -404,7 +400,6 @@ struct ubi_device {
+ spinlock_t volumes_lock;
+ int ref_count;
+ int image_seq;
+- int image_seq_set;
+
+ int rsvd_pebs;
+ int avail_pebs;
+@@ -415,7 +410,7 @@ struct ubi_device {
+ int vtbl_slots;
+ int vtbl_size;
+ struct ubi_vtbl_record *vtbl;
+- struct mutex volumes_mutex;
++ struct mutex device_mutex;
+
+ int max_ec;
+ /* Note, mean_ec is not updated run-time - should be fixed */
+@@ -454,6 +449,7 @@ struct ubi_device {
+ int peb_size;
+ int bad_peb_count;
+ int good_peb_count;
++ int corr_peb_count;
+ int erroneous_peb_count;
+ int max_erroneous;
+ int min_io_size;
+@@ -466,26 +462,24 @@ struct ubi_device {
+ int vid_hdr_offset;
+ int vid_hdr_aloffset;
+ int vid_hdr_shift;
+- int bad_allowed;
++ unsigned int bad_allowed:1;
++ unsigned int nor_flash:1;
++ int max_write_size;
+ struct mtd_info *mtd;
+
+ void *peb_buf1;
+ void *peb_buf2;
+ struct mutex buf_mutex;
+ struct mutex ckvol_mutex;
+- struct mutex mult_mutex;
+-#ifdef CONFIG_MTD_UBI_DEBUG
+- void *dbg_peb_buf;
+- struct mutex dbg_buf_mutex;
+-#endif
+ };
+
+ extern struct kmem_cache *ubi_wl_entry_slab;
+-extern struct file_operations ubi_ctrl_cdev_operations;
+-extern struct file_operations ubi_cdev_operations;
+-extern struct file_operations ubi_vol_cdev_operations;
++extern const struct file_operations ubi_ctrl_cdev_operations;
++extern const struct file_operations ubi_cdev_operations;
++extern const struct file_operations ubi_vol_cdev_operations;
+ extern struct class *ubi_class;
+ extern struct mutex ubi_devices_mutex;
++extern struct blocking_notifier_head ubi_notifiers;
+
+ /* vtbl.c */
+ int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
+@@ -517,17 +511,7 @@ int ubi_calc_data_len(const struct ubi_d
+ int length);
+ int ubi_check_volume(struct ubi_device *ubi, int vol_id);
+ void ubi_calculate_reserved(struct ubi_device *ubi);
+-
+-/* gluebi.c */
+-#ifdef CONFIG_MTD_UBI_GLUEBI
+-int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol);
+-int ubi_destroy_gluebi(struct ubi_volume *vol);
+-void ubi_gluebi_updated(struct ubi_volume *vol);
+-#else
+-#define ubi_create_gluebi(ubi, vol) 0
+-#define ubi_destroy_gluebi(vol) 0
+-#define ubi_gluebi_updated(vol)
+-#endif
++int ubi_check_pattern(const void *buf, uint8_t patt, int size);
+
+ /* eba.c */
+ int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol,
+@@ -578,6 +562,16 @@ struct ubi_device *ubi_get_device(int ub
+ void ubi_put_device(struct ubi_device *ubi);
+ struct ubi_device *ubi_get_by_major(int major);
+ int ubi_major2num(int major);
++int ubi_volume_notify(struct ubi_device *ubi, struct ubi_volume *vol,
++ int ntype);
++int ubi_notify_all(struct ubi_device *ubi, int ntype,
++ struct notifier_block *nb);
++int ubi_enumerate_volumes(struct notifier_block *nb);
++
++/* kapi.c */
++void ubi_do_get_device_info(struct ubi_device *ubi, struct ubi_device_info *di);
++void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol,
++ struct ubi_volume_info *vi);
+
+ /*
+ * ubi_rb_for_each_entry - walk an RB-tree.
+@@ -590,7 +584,8 @@ int ubi_major2num(int major);
+ for (rb = rb_first(root), \
+ pos = (rb ? container_of(rb, typeof(*pos), member) : NULL); \
+ rb; \
+- rb = rb_next(rb), pos = container_of(rb, typeof(*pos), member))
++ rb = rb_next(rb), \
++ pos = (rb ? container_of(rb, typeof(*pos), member) : NULL))
+
+ /**
+ * ubi_zalloc_vid_hdr - allocate a volume identifier header object.
+diff -uprN linux-2.6.28/drivers/mtd/ubi/ubi-media.h ubifs-v2.6.28/drivers/mtd/ubi/ubi-media.h
+--- linux-2.6.28/drivers/mtd/ubi/ubi-media.h 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/drivers/mtd/ubi/ubi-media.h 2011-06-15 14:22:07.000000000 -0400
+@@ -136,7 +136,7 @@ enum {
+ * The erase counter header takes 64 bytes and has a plenty of unused space for
+ * future usage. The unused fields are zeroed. The @version field is used to
+ * indicate the version of UBI implementation which is supposed to be able to
+- * work with this UBI image. If @version is greater then the current UBI
++ * work with this UBI image. If @version is greater than the current UBI
+ * version, the image is rejected. This may be useful in future if something
+ * is changed radically. This field is duplicated in the volume identifier
+ * header.
+@@ -164,7 +164,7 @@ struct ubi_ec_hdr {
+ __be32 image_seq;
+ __u8 padding2[32];
+ __be32 hdr_crc;
+-} __attribute__ ((packed));
++} __packed;
+
+ /**
+ * struct ubi_vid_hdr - on-flash UBI volume identifier header.
+@@ -197,7 +197,7 @@ struct ubi_ec_hdr {
+ * (sequence number) is used to distinguish between older and newer versions of
+ * logical eraseblocks.
+ *
+- * There are 2 situations when there may be more then one physical eraseblock
++ * There are 2 situations when there may be more than one physical eraseblock
+ * corresponding to the same logical eraseblock, i.e., having the same @vol_id
+ * and @lnum values in the volume identifier header. Suppose we have a logical
+ * eraseblock L and it is mapped to the physical eraseblock P.
+@@ -292,7 +292,7 @@ struct ubi_vid_hdr {
+ __be64 sqnum;
+ __u8 padding3[12];
+ __be32 hdr_crc;
+-} __attribute__ ((packed));
++} __packed;
+
+ /* Internal UBI volumes count */
+ #define UBI_INT_VOL_COUNT 1
+@@ -373,6 +373,6 @@ struct ubi_vtbl_record {
+ __u8 flags;
+ __u8 padding[23];
+ __be32 crc;
+-} __attribute__ ((packed));
++} __packed;
+
+ #endif /* !__UBI_MEDIA_H__ */
+diff -uprN linux-2.6.28/drivers/mtd/ubi/upd.c ubifs-v2.6.28/drivers/mtd/ubi/upd.c
+--- linux-2.6.28/drivers/mtd/ubi/upd.c 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/drivers/mtd/ubi/upd.c 2011-06-15 14:22:07.000000000 -0400
+@@ -40,7 +40,7 @@
+
+ #include <linux/err.h>
+ #include <linux/uaccess.h>
+-#include <asm/div64.h>
++#include <linux/math64.h>
+ #include "ubi.h"
+
+ /**
+@@ -68,10 +68,10 @@ static int set_update_marker(struct ubi_
+ sizeof(struct ubi_vtbl_record));
+ vtbl_rec.upd_marker = 1;
+
+- mutex_lock(&ubi->volumes_mutex);
++ mutex_lock(&ubi->device_mutex);
+ err = ubi_change_vtbl_record(ubi, vol->vol_id, &vtbl_rec);
+- mutex_unlock(&ubi->volumes_mutex);
+ vol->upd_marker = 1;
++ mutex_unlock(&ubi->device_mutex);
+ return err;
+ }
+
+@@ -89,7 +89,6 @@ static int clear_update_marker(struct ub
+ long long bytes)
+ {
+ int err;
+- uint64_t tmp;
+ struct ubi_vtbl_record vtbl_rec;
+
+ dbg_gen("clear update marker for volume %d", vol->vol_id);
+@@ -101,19 +100,19 @@ static int clear_update_marker(struct ub
+
+ if (vol->vol_type == UBI_STATIC_VOLUME) {
+ vol->corrupted = 0;
+- vol->used_bytes = tmp = bytes;
+- vol->last_eb_bytes = do_div(tmp, vol->usable_leb_size);
+- vol->used_ebs = tmp;
++ vol->used_bytes = bytes;
++ vol->used_ebs = div_u64_rem(bytes, vol->usable_leb_size,
++ &vol->last_eb_bytes);
+ if (vol->last_eb_bytes)
+ vol->used_ebs += 1;
+ else
+ vol->last_eb_bytes = vol->usable_leb_size;
+ }
+
+- mutex_lock(&ubi->volumes_mutex);
++ mutex_lock(&ubi->device_mutex);
+ err = ubi_change_vtbl_record(ubi, vol->vol_id, &vtbl_rec);
+- mutex_unlock(&ubi->volumes_mutex);
+ vol->upd_marker = 0;
++ mutex_unlock(&ubi->device_mutex);
+ return err;
+ }
+
+@@ -131,7 +130,6 @@ int ubi_start_update(struct ubi_device *
+ long long bytes)
+ {
+ int i, err;
+- uint64_t tmp;
+
+ dbg_gen("start update of volume %d, %llu bytes", vol->vol_id, bytes);
+ ubi_assert(!vol->updating && !vol->changing_leb);
+@@ -149,21 +147,23 @@ int ubi_start_update(struct ubi_device *
+ }
+
+ if (bytes == 0) {
++ err = ubi_wl_flush(ubi);
++ if (err)
++ return err;
++
+ err = clear_update_marker(ubi, vol, 0);
+ if (err)
+ return err;
+- err = ubi_wl_flush(ubi);
+- if (!err)
+- vol->updating = 0;
++ vol->updating = 0;
++ return 0;
+ }
+
+ vol->upd_buf = vmalloc(ubi->leb_size);
+ if (!vol->upd_buf)
+ return -ENOMEM;
+
+- tmp = bytes;
+- vol->upd_ebs = !!do_div(tmp, vol->usable_leb_size);
+- vol->upd_ebs += tmp;
++ vol->upd_ebs = div_u64(bytes + vol->usable_leb_size - 1,
++ vol->usable_leb_size);
+ vol->upd_bytes = bytes;
+ vol->upd_received = 0;
+ return 0;
+@@ -282,7 +282,6 @@ static int write_leb(struct ubi_device *
+ int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol,
+ const void __user *buf, int count)
+ {
+- uint64_t tmp;
+ int lnum, offs, err = 0, len, to_write = count;
+
+ dbg_gen("write %d of %lld bytes, %lld already passed",
+@@ -291,10 +290,7 @@ int ubi_more_update_data(struct ubi_devi
+ if (ubi->ro_mode)
+ return -EROFS;
+
+- tmp = vol->upd_received;
+- offs = do_div(tmp, vol->usable_leb_size);
+- lnum = tmp;
+-
++ lnum = div_u64_rem(vol->upd_received, vol->usable_leb_size, &offs);
+ if (vol->upd_received + count > vol->upd_bytes)
+ to_write = count = vol->upd_bytes - vol->upd_received;
+
+@@ -369,16 +365,16 @@ int ubi_more_update_data(struct ubi_devi
+
+ ubi_assert(vol->upd_received <= vol->upd_bytes);
+ if (vol->upd_received == vol->upd_bytes) {
++ err = ubi_wl_flush(ubi);
++ if (err)
++ return err;
+ /* The update is finished, clear the update marker */
+ err = clear_update_marker(ubi, vol, vol->upd_bytes);
+ if (err)
+ return err;
+- err = ubi_wl_flush(ubi);
+- if (err == 0) {
+- vol->updating = 0;
+- err = to_write;
+- vfree(vol->upd_buf);
+- }
++ vol->updating = 0;
++ err = to_write;
++ vfree(vol->upd_buf);
+ }
+
+ return err;
+diff -uprN linux-2.6.28/drivers/mtd/ubi/vmt.c ubifs-v2.6.28/drivers/mtd/ubi/vmt.c
+--- linux-2.6.28/drivers/mtd/ubi/vmt.c 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/drivers/mtd/ubi/vmt.c 2011-06-15 14:22:07.000000000 -0400
+@@ -24,10 +24,10 @@
+ */
+
+ #include <linux/err.h>
+-#include <asm/div64.h>
++#include <linux/math64.h>
+ #include "ubi.h"
+
+-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
++#ifdef CONFIG_MTD_UBI_DEBUG
+ static int paranoid_check_volumes(struct ubi_device *ubi);
+ #else
+ #define paranoid_check_volumes(ubi) 0
+@@ -198,14 +198,13 @@ static void volume_sysfs_close(struct ub
+ * %UBI_VOL_NUM_AUTO, this function automatically assign ID to the new volume
+ * and saves it in @req->vol_id. Returns zero in case of success and a negative
+ * error code in case of failure. Note, the caller has to have the
+- * @ubi->volumes_mutex locked.
++ * @ubi->device_mutex locked.
+ */
+ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
+ {
+ int i, err, vol_id = req->vol_id, do_free = 1;
+ struct ubi_volume *vol;
+ struct ubi_vtbl_record vtbl_rec;
+- uint64_t bytes;
+ dev_t dev;
+
+ if (ubi->ro_mode)
+@@ -233,8 +232,8 @@ int ubi_create_volume(struct ubi_device
+ req->vol_id = vol_id;
+ }
+
+- dbg_gen("volume ID %d, %llu bytes, type %d, name %s",
+- vol_id, (unsigned long long)req->bytes,
++ dbg_gen("create device %d, volume %d, %llu bytes, type %d, name %s",
++ ubi->ubi_num, vol_id, (unsigned long long)req->bytes,
+ (int)req->vol_type, req->name);
+
+ /* Ensure that this volume does not exist */
+@@ -255,14 +254,15 @@ int ubi_create_volume(struct ubi_device
+
+ /* Calculate how many eraseblocks are requested */
+ vol->usable_leb_size = ubi->leb_size - ubi->leb_size % req->alignment;
+- bytes = req->bytes;
+- if (do_div(bytes, vol->usable_leb_size))
+- vol->reserved_pebs = 1;
+- vol->reserved_pebs += bytes;
++ vol->reserved_pebs += div_u64(req->bytes + vol->usable_leb_size - 1,
++ vol->usable_leb_size);
+
+ /* Reserve physical eraseblocks */
+ if (vol->reserved_pebs > ubi->avail_pebs) {
+ dbg_err("not enough PEBs, only %d available", ubi->avail_pebs);
++ if (ubi->corr_peb_count)
++ dbg_err("%d PEBs are corrupted and not used",
++ ubi->corr_peb_count);
+ err = -ENOSPC;
+ goto out_unlock;
+ }
+@@ -301,10 +301,10 @@ int ubi_create_volume(struct ubi_device
+ vol->used_bytes =
+ (long long)vol->used_ebs * vol->usable_leb_size;
+ } else {
+- bytes = vol->used_bytes;
+- vol->last_eb_bytes = do_div(bytes, vol->usable_leb_size);
+- vol->used_ebs = bytes;
+- if (vol->last_eb_bytes)
++ vol->used_ebs = div_u64_rem(vol->used_bytes,
++ vol->usable_leb_size,
++ &vol->last_eb_bytes);
++ if (vol->last_eb_bytes != 0)
+ vol->used_ebs += 1;
+ else
+ vol->last_eb_bytes = vol->usable_leb_size;
+@@ -320,10 +320,6 @@ int ubi_create_volume(struct ubi_device
+ goto out_mapping;
+ }
+
+- err = ubi_create_gluebi(ubi, vol);
+- if (err)
+- goto out_cdev;
+-
+ vol->dev.release = vol_release;
+ vol->dev.parent = &ubi->dev;
+ vol->dev.devt = dev;
+@@ -333,7 +329,7 @@ int ubi_create_volume(struct ubi_device
+ err = device_register(&vol->dev);
+ if (err) {
+ ubi_err("cannot register device");
+- goto out_gluebi;
++ goto out_cdev;
+ }
+
+ err = volume_sysfs_init(ubi, vol);
+@@ -361,7 +357,9 @@ int ubi_create_volume(struct ubi_device
+ ubi->vol_count += 1;
+ spin_unlock(&ubi->volumes_lock);
+
+- err = paranoid_check_volumes(ubi);
++ ubi_volume_notify(ubi, vol, UBI_VOLUME_ADDED);
++ if (paranoid_check_volumes(ubi))
++ dbg_err("check failed while creating volume %d", vol_id);
+ return err;
+
+ out_sysfs:
+@@ -376,10 +374,6 @@ out_sysfs:
+ do_free = 0;
+ get_device(&vol->dev);
+ volume_sysfs_close(vol);
+-out_gluebi:
+- if (ubi_destroy_gluebi(vol))
+- dbg_err("cannot destroy gluebi for volume %d:%d",
+- ubi->ubi_num, vol_id);
+ out_cdev:
+ cdev_del(&vol->cdev);
+ out_mapping:
+@@ -406,7 +400,7 @@ out_unlock:
+ *
+ * This function removes volume described by @desc. The volume has to be opened
+ * in "exclusive" mode. Returns zero in case of success and a negative error
+- * code in case of failure. The caller has to have the @ubi->volumes_mutex
++ * code in case of failure. The caller has to have the @ubi->device_mutex
+ * locked.
+ */
+ int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl)
+@@ -415,7 +409,7 @@ int ubi_remove_volume(struct ubi_volume_
+ struct ubi_device *ubi = vol->ubi;
+ int i, err, vol_id = vol->vol_id, reserved_pebs = vol->reserved_pebs;
+
+- dbg_gen("remove UBI volume %d", vol_id);
++ dbg_gen("remove device %d, volume %d", ubi->ubi_num, vol_id);
+ ubi_assert(desc->mode == UBI_EXCLUSIVE);
+ ubi_assert(vol == ubi->volumes[vol_id]);
+
+@@ -434,10 +428,6 @@ int ubi_remove_volume(struct ubi_volume_
+ ubi->volumes[vol_id] = NULL;
+ spin_unlock(&ubi->volumes_lock);
+
+- err = ubi_destroy_gluebi(vol);
+- if (err)
+- goto out_err;
+-
+ if (!no_vtbl) {
+ err = ubi_change_vtbl_record(ubi, vol_id, NULL);
+ if (err)
+@@ -468,8 +458,10 @@ int ubi_remove_volume(struct ubi_volume_
+ ubi->vol_count -= 1;
+ spin_unlock(&ubi->volumes_lock);
+
+- if (!no_vtbl)
+- err = paranoid_check_volumes(ubi);
++ ubi_volume_notify(ubi, vol, UBI_VOLUME_REMOVED);
++ if (!no_vtbl && paranoid_check_volumes(ubi))
++ dbg_err("check failed while removing volume %d", vol_id);
++
+ return err;
+
+ out_err:
+@@ -488,7 +480,7 @@ out_unlock:
+ *
+ * This function re-sizes the volume and returns zero in case of success, and a
+ * negative error code in case of failure. The caller has to have the
+- * @ubi->volumes_mutex locked.
++ * @ubi->device_mutex locked.
+ */
+ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
+ {
+@@ -501,8 +493,8 @@ int ubi_resize_volume(struct ubi_volume_
+ if (ubi->ro_mode)
+ return -EROFS;
+
+- dbg_gen("re-size volume %d to from %d to %d PEBs",
+- vol_id, vol->reserved_pebs, reserved_pebs);
++ dbg_gen("re-size device %d, volume %d to from %d to %d PEBs",
++ ubi->ubi_num, vol_id, vol->reserved_pebs, reserved_pebs);
+
+ if (vol->vol_type == UBI_STATIC_VOLUME &&
+ reserved_pebs < vol->used_ebs) {
+@@ -537,6 +529,9 @@ int ubi_resize_volume(struct ubi_volume_
+ if (pebs > ubi->avail_pebs) {
+ dbg_err("not enough PEBs: requested %d, available %d",
+ pebs, ubi->avail_pebs);
++ if (ubi->corr_peb_count)
++ dbg_err("%d PEBs are corrupted and not used",
++ ubi->corr_peb_count);
+ spin_unlock(&ubi->volumes_lock);
+ err = -ENOSPC;
+ goto out_free;
+@@ -590,7 +585,9 @@ int ubi_resize_volume(struct ubi_volume_
+ (long long)vol->used_ebs * vol->usable_leb_size;
+ }
+
+- err = paranoid_check_volumes(ubi);
++ ubi_volume_notify(ubi, vol, UBI_VOLUME_RESIZED);
++ if (paranoid_check_volumes(ubi))
++ dbg_err("check failed while re-sizing volume %d", vol_id);
+ return err;
+
+ out_acc:
+@@ -635,11 +632,12 @@ int ubi_rename_volumes(struct ubi_device
+ vol->name_len = re->new_name_len;
+ memcpy(vol->name, re->new_name, re->new_name_len + 1);
+ spin_unlock(&ubi->volumes_lock);
++ ubi_volume_notify(ubi, vol, UBI_VOLUME_RENAMED);
+ }
+ }
+
+- if (!err)
+- err = paranoid_check_volumes(ubi);
++ if (!err && paranoid_check_volumes(ubi))
++ ;
+ return err;
+ }
+
+@@ -670,10 +668,6 @@ int ubi_add_volume(struct ubi_device *ub
+ return err;
+ }
+
+- err = ubi_create_gluebi(ubi, vol);
+- if (err)
+- goto out_cdev;
+-
+ vol->dev.release = vol_release;
+ vol->dev.parent = &ubi->dev;
+ vol->dev.devt = dev;
+@@ -681,21 +675,19 @@ int ubi_add_volume(struct ubi_device *ub
+ sprintf(&vol->dev.bus_id[0], "%s_%d", ubi->ubi_name, vol->vol_id);
+ err = device_register(&vol->dev);
+ if (err)
+- goto out_gluebi;
++ goto out_cdev;
+
+ err = volume_sysfs_init(ubi, vol);
+ if (err) {
+ cdev_del(&vol->cdev);
+- err = ubi_destroy_gluebi(vol);
+ volume_sysfs_close(vol);
+ return err;
+ }
+
+- err = paranoid_check_volumes(ubi);
++ if (paranoid_check_volumes(ubi))
++ dbg_err("check failed while adding volume %d", vol_id);
+ return err;
+
+-out_gluebi:
+- err = ubi_destroy_gluebi(vol);
+ out_cdev:
+ cdev_del(&vol->cdev);
+ return err;
+@@ -711,17 +703,14 @@ out_cdev:
+ */
+ void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol)
+ {
+- int err;
+-
+ dbg_gen("free volume %d", vol->vol_id);
+
+ ubi->volumes[vol->vol_id] = NULL;
+- err = ubi_destroy_gluebi(vol);
+ cdev_del(&vol->cdev);
+ volume_sysfs_close(vol);
+ }
+
+-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
++#ifdef CONFIG_MTD_UBI_DEBUG
+
+ /**
+ * paranoid_check_volume - check volume information.
+@@ -800,11 +789,6 @@ static int paranoid_check_volume(struct
+ goto fail;
+ }
+
+- if (!vol->name) {
+- ubi_err("NULL volume name");
+- goto fail;
+- }
+-
+ n = strnlen(vol->name, vol->name_len + 1);
+ if (n != vol->name_len) {
+ ubi_err("bad name_len %lld", n);
+@@ -871,6 +855,7 @@ fail:
+ if (vol)
+ ubi_dbg_dump_vol_info(vol);
+ ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id);
++ dump_stack();
+ spin_unlock(&ubi->volumes_lock);
+ return -EINVAL;
+ }
+@@ -885,6 +870,9 @@ static int paranoid_check_volumes(struct
+ {
+ int i, err = 0;
+
++ if (!(ubi_chk_flags & UBI_CHK_GEN))
++ return 0;
++
+ for (i = 0; i < ubi->vtbl_slots; i++) {
+ err = paranoid_check_volume(ubi, i);
+ if (err)
+diff -uprN linux-2.6.28/drivers/mtd/ubi/vtbl.c ubifs-v2.6.28/drivers/mtd/ubi/vtbl.c
+--- linux-2.6.28/drivers/mtd/ubi/vtbl.c 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/drivers/mtd/ubi/vtbl.c 2011-06-15 14:22:07.000000000 -0400
+@@ -61,7 +61,7 @@
+ #include <asm/div64.h>
+ #include "ubi.h"
+
+-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
++#ifdef CONFIG_MTD_UBI_DEBUG
+ static void paranoid_vtbl_check(const struct ubi_device *ubi);
+ #else
+ #define paranoid_vtbl_check(ubi)
+@@ -365,7 +365,7 @@ write_error:
+ * Probably this physical eraseblock went bad, try to pick
+ * another one.
+ */
+- list_add_tail(&new_seb->u.list, &si->corr);
++ list_add(&new_seb->u.list, &si->erase);
+ goto retry;
+ }
+ kfree(new_seb);
+@@ -413,7 +413,7 @@ static struct ubi_vtbl_record *process_l
+ * 0 contains more recent information.
+ *
+ * So the plan is to first check LEB 0. Then
+- * a. if LEB 0 is OK, it must be containing the most resent data; then
++ * a. if LEB 0 is OK, it must be containing the most recent data; then
+ * we compare it with LEB 1, and if they are different, we copy LEB
+ * 0 to LEB 1;
+ * b. if LEB 0 is corrupted, but LEB 1 has to be OK, and we copy LEB 1
+@@ -566,6 +566,7 @@ static int init_volumes(struct ubi_devic
+ vol->reserved_pebs = be32_to_cpu(vtbl[i].reserved_pebs);
+ vol->alignment = be32_to_cpu(vtbl[i].alignment);
+ vol->data_pad = be32_to_cpu(vtbl[i].data_pad);
++ vol->upd_marker = vtbl[i].upd_marker;
+ vol->vol_type = vtbl[i].vol_type == UBI_VID_DYNAMIC ?
+ UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME;
+ vol->name_len = be16_to_cpu(vtbl[i].name_len);
+@@ -577,7 +578,7 @@ static int init_volumes(struct ubi_devic
+ if (vtbl[i].flags & UBI_VTBL_AUTORESIZE_FLG) {
+ /* Auto re-size flag may be set only for one volume */
+ if (ubi->autoresize_vol_id != -1) {
+- ubi_err("more then one auto-resize volume (%d "
++ ubi_err("more than one auto-resize volume (%d "
+ "and %d)", ubi->autoresize_vol_id, i);
+ kfree(vol);
+ return -EINVAL;
+@@ -660,9 +661,13 @@ static int init_volumes(struct ubi_devic
+ ubi->vol_count += 1;
+ vol->ubi = ubi;
+
+- if (reserved_pebs > ubi->avail_pebs)
++ if (reserved_pebs > ubi->avail_pebs) {
+ ubi_err("not enough PEBs, required %d, available %d",
+ reserved_pebs, ubi->avail_pebs);
++ if (ubi->corr_peb_count)
++ ubi_err("%d PEBs are corrupted and not used",
++ ubi->corr_peb_count);
++ }
+ ubi->rsvd_pebs += reserved_pebs;
+ ubi->avail_pebs -= reserved_pebs;
+
+@@ -835,7 +840,7 @@ int ubi_read_volume_table(struct ubi_dev
+ return PTR_ERR(ubi->vtbl);
+ }
+
+- ubi->avail_pebs = ubi->good_peb_count;
++ ubi->avail_pebs = ubi->good_peb_count - ubi->corr_peb_count;
+
+ /*
+ * The layout volume is OK, initialize the corresponding in-RAM data
+@@ -846,7 +851,7 @@ int ubi_read_volume_table(struct ubi_dev
+ goto out_free;
+
+ /*
+- * Get sure that the scanning information is consistent to the
++ * Make sure that the scanning information is consistent to the
+ * information stored in the volume table.
+ */
+ err = check_scanning_info(ubi, si);
+@@ -864,7 +869,7 @@ out_free:
+ return err;
+ }
+
+-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
++#ifdef CONFIG_MTD_UBI_DEBUG
+
+ /**
+ * paranoid_vtbl_check - check volume table.
+@@ -872,10 +877,13 @@ out_free:
+ */
+ static void paranoid_vtbl_check(const struct ubi_device *ubi)
+ {
++ if (!(ubi_chk_flags & UBI_CHK_GEN))
++ return;
++
+ if (vtbl_check(ubi, ubi->vtbl)) {
+ ubi_err("paranoid check failed");
+ BUG();
+ }
+ }
+
+-#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */
++#endif /* CONFIG_MTD_UBI_DEBUG */
+diff -uprN linux-2.6.28/drivers/mtd/ubi/wl.c ubifs-v2.6.28/drivers/mtd/ubi/wl.c
+--- linux-2.6.28/drivers/mtd/ubi/wl.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/drivers/mtd/ubi/wl.c 2011-06-15 14:22:07.000000000 -0400
+@@ -130,7 +130,7 @@
+ * situation when the picked physical eraseblock is constantly erased after the
+ * data is written to it. So, we have a constant which limits the highest erase
+ * counter of the free physical eraseblock to pick. Namely, the WL sub-system
+- * does not pick eraseblocks with erase counter greater then the lowest erase
++ * does not pick eraseblocks with erase counter greater than the lowest erase
+ * counter plus %WL_FREE_MAX_DIFF.
+ */
+ #define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD)
+@@ -161,7 +161,7 @@ struct ubi_work {
+ int torture;
+ };
+
+-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
++#ifdef CONFIG_MTD_UBI_DEBUG
+ static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec);
+ static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
+ struct rb_root *root);
+@@ -350,7 +350,7 @@ static void prot_queue_add(struct ubi_de
+ * @max: highest possible erase counter
+ *
+ * This function looks for a wear leveling entry with erase counter closest to
+- * @max and less then @max.
++ * @max and less than @max.
+ */
+ static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max)
+ {
+@@ -459,6 +459,14 @@ retry:
+ dbg_wl("PEB %d EC %d", e->pnum, e->ec);
+ prot_queue_add(ubi, e);
+ spin_unlock(&ubi->wl_lock);
++
++ err = ubi_dbg_check_all_ff(ubi, e->pnum, ubi->vid_hdr_aloffset,
++ ubi->peb_size - ubi->vid_hdr_aloffset);
++ if (err) {
++ ubi_err("new PEB %d does not contain all 0xFF bytes", e->pnum);
++ return err;
++ }
++
+ return e->pnum;
+ }
+
+@@ -505,7 +513,7 @@ static int sync_erase(struct ubi_device
+ dbg_wl("erase PEB %d, old EC %llu", e->pnum, ec);
+
+ err = paranoid_check_ec(ubi, e->pnum, e->ec);
+- if (err > 0)
++ if (err)
+ return -EINVAL;
+
+ ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
+@@ -605,7 +613,7 @@ static void schedule_ubi_work(struct ubi
+ list_add_tail(&wrk->list, &ubi->works);
+ ubi_assert(ubi->works_count >= 0);
+ ubi->works_count += 1;
+- if (ubi->thread_enabled)
++ if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled())
+ wake_up_process(ubi->bgt_thread);
+ spin_unlock(&ubi->wl_lock);
+ }
+@@ -656,6 +664,7 @@ static int wear_leveling_worker(struct u
+ int cancel)
+ {
+ int err, scrubbing = 0, torture = 0, protect = 0, erroneous = 0;
++ int vol_id = -1, uninitialized_var(lnum);
+ struct ubi_wl_entry *e1, *e2;
+ struct ubi_vid_hdr *vid_hdr;
+
+@@ -736,7 +745,7 @@ static int wear_leveling_worker(struct u
+
+ err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0);
+ if (err && err != UBI_IO_BITFLIPS) {
+- if (err == UBI_IO_PEB_FREE) {
++ if (err == UBI_IO_FF) {
+ /*
+ * We are trying to move PEB without a VID header. UBI
+ * always write VID headers shortly after the PEB was
+@@ -750,6 +759,16 @@ static int wear_leveling_worker(struct u
+ dbg_wl("PEB %d has no VID header", e1->pnum);
+ protect = 1;
+ goto out_not_moved;
++ } else if (err == UBI_IO_FF_BITFLIPS) {
++ /*
++ * The same situation as %UBI_IO_FF, but bit-flips were
++ * detected. It is better to schedule this PEB for
++ * scrubbing.
++ */
++ dbg_wl("PEB %d has no VID header but has bit-flips",
++ e1->pnum);
++ scrubbing = 1;
++ goto out_not_moved;
+ }
+
+ ubi_err("error %d while reading VID header from PEB %d",
+@@ -757,6 +776,9 @@ static int wear_leveling_worker(struct u
+ goto out_error;
+ }
+
++ vol_id = be32_to_cpu(vid_hdr->vol_id);
++ lnum = be32_to_cpu(vid_hdr->lnum);
++
+ err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr);
+ if (err) {
+ if (err == MOVE_CANCEL_RACE) {
+@@ -773,7 +795,9 @@ static int wear_leveling_worker(struct u
+
+ if (err == MOVE_CANCEL_BITFLIPS || err == MOVE_TARGET_WR_ERR ||
+ err == MOVE_TARGET_RD_ERR) {
+- /* Target PEB bit-flips or write error, torture it */
++ /*
++ * Target PEB had bit-flips or write error - torture it.
++ */
+ torture = 1;
+ goto out_not_moved;
+ }
+@@ -803,10 +827,10 @@ static int wear_leveling_worker(struct u
+ }
+
+ /* The PEB has been successfully moved */
+- ubi_free_vid_hdr(ubi, vid_hdr);
+ if (scrubbing)
+- ubi_msg("scrubbed PEB %d, data moved to PEB %d",
+- e1->pnum, e2->pnum);
++ ubi_msg("scrubbed PEB %d (LEB %d:%d), data moved to PEB %d",
++ e1->pnum, vol_id, lnum, e2->pnum);
++ ubi_free_vid_hdr(ubi, vid_hdr);
+
+ spin_lock(&ubi->wl_lock);
+ if (!ubi->move_to_put) {
+@@ -830,7 +854,8 @@ static int wear_leveling_worker(struct u
+ * Well, the target PEB was put meanwhile, schedule it for
+ * erasure.
+ */
+- dbg_wl("PEB %d was put meanwhile, erase", e2->pnum);
++ dbg_wl("PEB %d (LEB %d:%d) was put meanwhile, erase",
++ e2->pnum, vol_id, lnum);
+ err = schedule_erase(ubi, e2, 0);
+ if (err) {
+ kmem_cache_free(ubi_wl_entry_slab, e2);
+@@ -848,8 +873,12 @@ static int wear_leveling_worker(struct u
+ * have been changed, schedule it for erasure.
+ */
+ out_not_moved:
+- dbg_wl("cancel moving PEB %d to PEB %d (%d)",
+- e1->pnum, e2->pnum, err);
++ if (vol_id != -1)
++ dbg_wl("cancel moving PEB %d (LEB %d:%d) to PEB %d (%d)",
++ e1->pnum, vol_id, lnum, e2->pnum, err);
++ else
++ dbg_wl("cancel moving PEB %d to PEB %d (%d)",
++ e1->pnum, e2->pnum, err);
+ spin_lock(&ubi->wl_lock);
+ if (protect)
+ prot_queue_add(ubi, e1);
+@@ -875,8 +904,12 @@ out_not_moved:
+ return 0;
+
+ out_error:
+- ubi_err("error %d while moving PEB %d to PEB %d",
+- err, e1->pnum, e2->pnum);
++ if (vol_id != -1)
++ ubi_err("error %d while moving PEB %d to PEB %d",
++ err, e1->pnum, e2->pnum);
++ else
++ ubi_err("error %d while moving PEB %d (LEB %d:%d) to PEB %d",
++ err, e1->pnum, vol_id, lnum, e2->pnum);
+ spin_lock(&ubi->wl_lock);
+ ubi->move_from = ubi->move_to = NULL;
+ ubi->move_to_put = ubi->wl_scheduled = 0;
+@@ -932,7 +965,7 @@ static int ensure_wear_leveling(struct u
+ /*
+ * We schedule wear-leveling only if the difference between the
+ * lowest erase counter of used physical eraseblocks and a high
+- * erase counter of free physical eraseblocks is greater then
++ * erase counter of free physical eraseblocks is greater than
+ * %UBI_WL_THRESHOLD.
+ */
+ e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
+@@ -1058,10 +1091,9 @@ static int erase_worker(struct ubi_devic
+ ubi_err("no reserved physical eraseblocks");
+ goto out_ro;
+ }
+-
+ spin_unlock(&ubi->volumes_lock);
+- ubi_msg("mark PEB %d as bad", pnum);
+
++ ubi_msg("mark PEB %d as bad", pnum);
+ err = ubi_io_mark_bad(ubi, pnum);
+ if (err)
+ goto out_ro;
+@@ -1071,7 +1103,9 @@ static int erase_worker(struct ubi_devic
+ ubi->bad_peb_count += 1;
+ ubi->good_peb_count -= 1;
+ ubi_calculate_reserved(ubi);
+- if (ubi->beb_rsvd_pebs == 0)
++ if (ubi->beb_rsvd_pebs)
++ ubi_msg("%d PEBs left in the reserve", ubi->beb_rsvd_pebs);
++ else
+ ubi_warn("last PEB from the reserved pool was used");
+ spin_unlock(&ubi->volumes_lock);
+
+@@ -1188,7 +1222,8 @@ int ubi_wl_scrub_peb(struct ubi_device *
+ retry:
+ spin_lock(&ubi->wl_lock);
+ e = ubi->lookuptbl[pnum];
+- if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub)) {
++ if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub) ||
++ in_wl_tree(e, &ubi->erroneous)) {
+ spin_unlock(&ubi->wl_lock);
+ return 0;
+ }
+@@ -1329,7 +1364,7 @@ int ubi_thread(void *u)
+
+ spin_lock(&ubi->wl_lock);
+ if (list_empty(&ubi->works) || ubi->ro_mode ||
+- !ubi->thread_enabled) {
++ !ubi->thread_enabled || ubi_dbg_is_bgt_disabled()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ spin_unlock(&ubi->wl_lock);
+ schedule();
+@@ -1443,22 +1478,6 @@ int ubi_wl_init_scan(struct ubi_device *
+ ubi->lookuptbl[e->pnum] = e;
+ }
+
+- list_for_each_entry(seb, &si->corr, u.list) {
+- cond_resched();
+-
+- e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
+- if (!e)
+- goto out_free;
+-
+- e->pnum = seb->pnum;
+- e->ec = seb->ec;
+- ubi->lookuptbl[e->pnum] = e;
+- if (schedule_erase(ubi, e, 0)) {
+- kmem_cache_free(ubi_wl_entry_slab, e);
+- goto out_free;
+- }
+- }
+-
+ ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) {
+ ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) {
+ cond_resched();
+@@ -1485,6 +1504,9 @@ int ubi_wl_init_scan(struct ubi_device *
+ if (ubi->avail_pebs < WL_RESERVED_PEBS) {
+ ubi_err("no enough physical eraseblocks (%d, need %d)",
+ ubi->avail_pebs, WL_RESERVED_PEBS);
++ if (ubi->corr_peb_count)
++ ubi_err("%d PEBs are corrupted and not used",
++ ubi->corr_peb_count);
+ goto out_free;
+ }
+ ubi->avail_pebs -= WL_RESERVED_PEBS;
+@@ -1539,7 +1561,7 @@ void ubi_wl_close(struct ubi_device *ubi
+ kfree(ubi->lookuptbl);
+ }
+
+-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
++#ifdef CONFIG_MTD_UBI_DEBUG
+
+ /**
+ * paranoid_check_ec - make sure that the erase counter of a PEB is correct.
+@@ -1548,7 +1570,7 @@ void ubi_wl_close(struct ubi_device *ubi
+ * @ec: the erase counter to check
+ *
+ * This function returns zero if the erase counter of physical eraseblock @pnum
+- * is equivalent to @ec, %1 if not, and a negative error code if an error
++ * is equivalent to @ec, and a negative error code if not or if an error
+ * occurred.
+ */
+ static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec)
+@@ -1557,6 +1579,9 @@ static int paranoid_check_ec(struct ubi_
+ long long read_ec;
+ struct ubi_ec_hdr *ec_hdr;
+
++ if (!(ubi_chk_flags & UBI_CHK_GEN))
++ return 0;
++
+ ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
+ if (!ec_hdr)
+ return -ENOMEM;
+@@ -1587,19 +1612,22 @@ out_free:
+ * @e: the wear-leveling entry to check
+ * @root: the root of the tree
+ *
+- * This function returns zero if @e is in the @root RB-tree and %1 if it is
+- * not.
++ * This function returns zero if @e is in the @root RB-tree and %-EINVAL if it
++ * is not.
+ */
+ static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
+ struct rb_root *root)
+ {
++ if (!(ubi_chk_flags & UBI_CHK_GEN))
++ return 0;
++
+ if (in_wl_tree(e, root))
+ return 0;
+
+ ubi_err("paranoid check failed for PEB %d, EC %d, RB-tree %p ",
+ e->pnum, e->ec, root);
+ ubi_dbg_dump_stack();
+- return 1;
++ return -EINVAL;
+ }
+
+ /**
+@@ -1608,13 +1636,16 @@ static int paranoid_check_in_wl_tree(str
+ * @ubi: UBI device description object
+ * @e: the wear-leveling entry to check
+ *
+- * This function returns zero if @e is in @ubi->pq and %1 if it is not.
++ * This function returns zero if @e is in @ubi->pq and %-EINVAL if it is not.
+ */
+ static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e)
+ {
+ struct ubi_wl_entry *p;
+ int i;
+
++ if (!(ubi_chk_flags & UBI_CHK_GEN))
++ return 0;
++
+ for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i)
+ list_for_each_entry(p, &ubi->pq[i], u.list)
+ if (p == e)
+@@ -1623,6 +1654,7 @@ static int paranoid_check_in_pq(struct u
+ ubi_err("paranoid check failed for PEB %d, EC %d, Protect queue",
+ e->pnum, e->ec);
+ ubi_dbg_dump_stack();
+- return 1;
++ return -EINVAL;
+ }
+-#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */
++
++#endif /* CONFIG_MTD_UBI_DEBUG */
+diff -uprN linux-2.6.28/fs/ubifs/budget.c ubifs-v2.6.28/fs/ubifs/budget.c
+--- linux-2.6.28/fs/ubifs/budget.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/budget.c 2011-06-15 14:22:09.000000000 -0400
+@@ -91,7 +91,6 @@ static int shrink_liability(struct ubifs
+ return nr_written;
+ }
+
+-
+ /**
+ * run_gc - run garbage collector.
+ * @c: UBIFS file-system description object
+@@ -131,7 +130,7 @@ static long long get_liability(struct ub
+ long long liab;
+
+ spin_lock(&c->space_lock);
+- liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth;
++ liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth;
+ spin_unlock(&c->space_lock);
+ return liab;
+ }
+@@ -142,7 +141,7 @@ static long long get_liability(struct ub
+ *
+ * This function is called when an operation cannot be budgeted because there
+ * is supposedly no free space. But in most cases there is some free space:
+- * o budgeting is pessimistic, so it always budgets more then it is actually
++ * o budgeting is pessimistic, so it always budgets more than it is actually
+ * needed, so shrinking the liability is one way to make free space - the
+ * cached data will take less space then it was budgeted for;
+ * o GC may turn some dark space into free space (budgeting treats dark space
+@@ -194,29 +193,26 @@ static int make_free_space(struct ubifs_
+ }
+
+ /**
+- * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index.
++ * ubifs_calc_min_idx_lebs - calculate amount of LEBs for the index.
+ * @c: UBIFS file-system description object
+ *
+- * This function calculates and returns the number of eraseblocks which should
+- * be kept for index usage.
++ * This function calculates and returns the number of LEBs which should be kept
++ * for index usage.
+ */
+ int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
+ {
+- int idx_lebs, eff_leb_size = c->leb_size - c->max_idx_node_sz;
++ int idx_lebs;
+ long long idx_size;
+
+- idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx;
+-
++ idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx;
+ /* And make sure we have thrice the index size of space reserved */
+- idx_size = idx_size + (idx_size << 1);
+-
++ idx_size += idx_size << 1;
+ /*
+ * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes'
+ * pair, nor similarly the two variables for the new index size, so we
+ * have to do this costly 64-bit division on fast-path.
+ */
+- idx_size += eff_leb_size - 1;
+- idx_lebs = div_u64(idx_size, eff_leb_size);
++ idx_lebs = div_u64(idx_size + c->idx_leb_size - 1, c->idx_leb_size);
+ /*
+ * The index head is not available for the in-the-gaps method, so add an
+ * extra LEB to compensate.
+@@ -300,7 +296,7 @@ long long ubifs_calc_available(const str
+ */
+ static int can_use_rp(struct ubifs_info *c)
+ {
+- if (current->fsuid == c->rp_uid || capable(CAP_SYS_RESOURCE) ||
++ if (current_fsuid() == c->rp_uid || capable(CAP_SYS_RESOURCE) ||
+ (c->rp_gid != 0 && in_group_p(c->rp_gid)))
+ return 1;
+ return 0;
+@@ -310,23 +306,23 @@ static int can_use_rp(struct ubifs_info
+ * do_budget_space - reserve flash space for index and data growth.
+ * @c: UBIFS file-system description object
+ *
+- * This function makes sure UBIFS has enough free eraseblocks for index growth
+- * and data.
++ * This function makes sure UBIFS has enough free LEBs for index growth and
++ * data.
+ *
+ * When budgeting index space, UBIFS reserves thrice as many LEBs as the index
+ * would take if it was consolidated and written to the flash. This guarantees
+ * that the "in-the-gaps" commit method always succeeds and UBIFS will always
+ * be able to commit dirty index. So this function basically adds amount of
+ * budgeted index space to the size of the current index, multiplies this by 3,
+- * and makes sure this does not exceed the amount of free eraseblocks.
++ * and makes sure this does not exceed the amount of free LEBs.
+ *
+- * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables:
++ * Notes about @c->bi.min_idx_lebs and @c->lst.idx_lebs variables:
+ * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might
+ * be large, because UBIFS does not do any index consolidation as long as
+ * there is free space. IOW, the index may take a lot of LEBs, but the LEBs
+ * will contain a lot of dirt.
+- * o @c->min_idx_lebs is the the index presumably takes. IOW, the index may be
+- * consolidated to take up to @c->min_idx_lebs LEBs.
++ * o @c->bi.min_idx_lebs is the number of LEBS the index presumably takes. IOW,
++ * the index may be consolidated to take up to @c->bi.min_idx_lebs LEBs.
+ *
+ * This function returns zero in case of success, and %-ENOSPC in case of
+ * failure.
+@@ -371,13 +367,13 @@ static int do_budget_space(struct ubifs_
+ c->lst.taken_empty_lebs;
+ if (unlikely(rsvd_idx_lebs > lebs)) {
+ dbg_budg("out of indexing space: min_idx_lebs %d (old %d), "
+- "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs,
++ "rsvd_idx_lebs %d", min_idx_lebs, c->bi.min_idx_lebs,
+ rsvd_idx_lebs);
+ return -ENOSPC;
+ }
+
+ available = ubifs_calc_available(c, min_idx_lebs);
+- outstanding = c->budg_data_growth + c->budg_dd_growth;
++ outstanding = c->bi.data_growth + c->bi.dd_growth;
+
+ if (unlikely(available < outstanding)) {
+ dbg_budg("out of data space: available %lld, outstanding %lld",
+@@ -388,7 +384,7 @@ static int do_budget_space(struct ubifs_
+ if (available - outstanding <= c->rp_size && !can_use_rp(c))
+ return -ENOSPC;
+
+- c->min_idx_lebs = min_idx_lebs;
++ c->bi.min_idx_lebs = min_idx_lebs;
+ return 0;
+ }
+
+@@ -421,11 +417,11 @@ static int calc_data_growth(const struct
+ {
+ int data_growth;
+
+- data_growth = req->new_ino ? c->inode_budget : 0;
++ data_growth = req->new_ino ? c->bi.inode_budget : 0;
+ if (req->new_page)
+- data_growth += c->page_budget;
++ data_growth += c->bi.page_budget;
+ if (req->new_dent)
+- data_growth += c->dent_budget;
++ data_growth += c->bi.dent_budget;
+ data_growth += req->new_ino_d;
+ return data_growth;
+ }
+@@ -441,12 +437,12 @@ static int calc_dd_growth(const struct u
+ {
+ int dd_growth;
+
+- dd_growth = req->dirtied_page ? c->page_budget : 0;
++ dd_growth = req->dirtied_page ? c->bi.page_budget : 0;
+
+ if (req->dirtied_ino)
+- dd_growth += c->inode_budget << (req->dirtied_ino - 1);
++ dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1);
+ if (req->mod_dent)
+- dd_growth += c->dent_budget;
++ dd_growth += c->bi.dent_budget;
+ dd_growth += req->dirtied_ino_d;
+ return dd_growth;
+ }
+@@ -488,19 +484,19 @@ int ubifs_budget_space(struct ubifs_info
+
+ again:
+ spin_lock(&c->space_lock);
+- ubifs_assert(c->budg_idx_growth >= 0);
+- ubifs_assert(c->budg_data_growth >= 0);
+- ubifs_assert(c->budg_dd_growth >= 0);
++ ubifs_assert(c->bi.idx_growth >= 0);
++ ubifs_assert(c->bi.data_growth >= 0);
++ ubifs_assert(c->bi.dd_growth >= 0);
+
+- if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) {
++ if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) {
+ dbg_budg("no space");
+ spin_unlock(&c->space_lock);
+ return -ENOSPC;
+ }
+
+- c->budg_idx_growth += idx_growth;
+- c->budg_data_growth += data_growth;
+- c->budg_dd_growth += dd_growth;
++ c->bi.idx_growth += idx_growth;
++ c->bi.data_growth += data_growth;
++ c->bi.dd_growth += dd_growth;
+
+ err = do_budget_space(c);
+ if (likely(!err)) {
+@@ -512,9 +508,9 @@ again:
+ }
+
+ /* Restore the old values */
+- c->budg_idx_growth -= idx_growth;
+- c->budg_data_growth -= data_growth;
+- c->budg_dd_growth -= dd_growth;
++ c->bi.idx_growth -= idx_growth;
++ c->bi.data_growth -= data_growth;
++ c->bi.dd_growth -= dd_growth;
+ spin_unlock(&c->space_lock);
+
+ if (req->fast) {
+@@ -534,9 +530,9 @@ again:
+ goto again;
+ }
+ dbg_budg("FS is full, -ENOSPC");
+- c->nospace = 1;
++ c->bi.nospace = 1;
+ if (can_use_rp(c) || c->rp_size == 0)
+- c->nospace_rp = 1;
++ c->bi.nospace_rp = 1;
+ smp_wmb();
+ } else
+ ubifs_err("cannot budget space, error %d", err);
+@@ -551,8 +547,8 @@ again:
+ * This function releases the space budgeted by 'ubifs_budget_space()'. Note,
+ * since the index changes (which were budgeted for in @req->idx_growth) will
+ * only be written to the media on commit, this function moves the index budget
+- * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be
+- * zeroed by the commit operation.
++ * from @c->bi.idx_growth to @c->bi.uncommitted_idx. The latter will be zeroed
++ * by the commit operation.
+ */
+ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
+ {
+@@ -581,23 +577,23 @@ void ubifs_release_budget(struct ubifs_i
+ if (!req->data_growth && !req->dd_growth)
+ return;
+
+- c->nospace = c->nospace_rp = 0;
++ c->bi.nospace = c->bi.nospace_rp = 0;
+ smp_wmb();
+
+ spin_lock(&c->space_lock);
+- c->budg_idx_growth -= req->idx_growth;
+- c->budg_uncommitted_idx += req->idx_growth;
+- c->budg_data_growth -= req->data_growth;
+- c->budg_dd_growth -= req->dd_growth;
+- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+-
+- ubifs_assert(c->budg_idx_growth >= 0);
+- ubifs_assert(c->budg_data_growth >= 0);
+- ubifs_assert(c->budg_dd_growth >= 0);
+- ubifs_assert(c->min_idx_lebs < c->main_lebs);
+- ubifs_assert(!(c->budg_idx_growth & 7));
+- ubifs_assert(!(c->budg_data_growth & 7));
+- ubifs_assert(!(c->budg_dd_growth & 7));
++ c->bi.idx_growth -= req->idx_growth;
++ c->bi.uncommitted_idx += req->idx_growth;
++ c->bi.data_growth -= req->data_growth;
++ c->bi.dd_growth -= req->dd_growth;
++ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
++
++ ubifs_assert(c->bi.idx_growth >= 0);
++ ubifs_assert(c->bi.data_growth >= 0);
++ ubifs_assert(c->bi.dd_growth >= 0);
++ ubifs_assert(c->bi.min_idx_lebs < c->main_lebs);
++ ubifs_assert(!(c->bi.idx_growth & 7));
++ ubifs_assert(!(c->bi.data_growth & 7));
++ ubifs_assert(!(c->bi.dd_growth & 7));
+ spin_unlock(&c->space_lock);
+ }
+
+@@ -606,7 +602,7 @@ void ubifs_release_budget(struct ubifs_i
+ * @c: UBIFS file-system description object
+ *
+ * This function converts budget which was allocated for a new page of data to
+- * the budget of changing an existing page of data. The latter is smaller then
++ * the budget of changing an existing page of data. The latter is smaller than
+ * the former, so this function only does simple re-calculation and does not
+ * involve any write-back.
+ */
+@@ -614,13 +610,13 @@ void ubifs_convert_page_budget(struct ub
+ {
+ spin_lock(&c->space_lock);
+ /* Release the index growth reservation */
+- c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
++ c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
+ /* Release the data growth reservation */
+- c->budg_data_growth -= c->page_budget;
++ c->bi.data_growth -= c->bi.page_budget;
+ /* Increase the dirty data growth reservation instead */
+- c->budg_dd_growth += c->page_budget;
++ c->bi.dd_growth += c->bi.page_budget;
+ /* And re-calculate the indexing space reservation */
+- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
++ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+ spin_unlock(&c->space_lock);
+ }
+
+@@ -640,7 +636,7 @@ void ubifs_release_dirty_inode_budget(st
+
+ memset(&req, 0, sizeof(struct ubifs_budget_req));
+ /* The "no space" flags will be cleared because dd_growth is > 0 */
+- req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8);
++ req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8);
+ ubifs_release_budget(c, &req);
+ }
+
+@@ -696,12 +692,12 @@ long long ubifs_reported_space(const str
+ * This function calculates amount of free space to report to user-space.
+ *
+ * Because UBIFS may introduce substantial overhead (the index, node headers,
+- * alignment, wastage at the end of eraseblocks, etc), it cannot report real
+- * amount of free flash space it has (well, because not all dirty space is
+- * reclaimable, UBIFS does not actually know the real amount). If UBIFS did so,
+- * it would bread user expectations about what free space is. Users seem to
+- * accustomed to assume that if the file-system reports N bytes of free space,
+- * they would be able to fit a file of N bytes to the FS. This almost works for
++ * alignment, wastage at the end of LEBs, etc), it cannot report real amount of
++ * free flash space it has (well, because not all dirty space is reclaimable,
++ * UBIFS does not actually know the real amount). If UBIFS did so, it would
++ * bread user expectations about what free space is. Users seem to accustomed
++ * to assume that if the file-system reports N bytes of free space, they would
++ * be able to fit a file of N bytes to the FS. This almost works for
+ * traditional file-systems, because they have way less overhead than UBIFS.
+ * So, to keep users happy, UBIFS tries to take the overhead into account.
+ */
+@@ -710,9 +706,9 @@ long long ubifs_get_free_space_nolock(st
+ int rsvd_idx_lebs, lebs;
+ long long available, outstanding, free;
+
+- ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c));
+- outstanding = c->budg_data_growth + c->budg_dd_growth;
+- available = ubifs_calc_available(c, c->min_idx_lebs);
++ ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
++ outstanding = c->bi.data_growth + c->bi.dd_growth;
++ available = ubifs_calc_available(c, c->bi.min_idx_lebs);
+
+ /*
+ * When reporting free space to user-space, UBIFS guarantees that it is
+@@ -725,8 +721,8 @@ long long ubifs_get_free_space_nolock(st
+ * Note, the calculations below are similar to what we have in
+ * 'do_budget_space()', so refer there for comments.
+ */
+- if (c->min_idx_lebs > c->lst.idx_lebs)
+- rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs;
++ if (c->bi.min_idx_lebs > c->lst.idx_lebs)
++ rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
+ else
+ rsvd_idx_lebs = 0;
+ lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
+@@ -745,7 +741,7 @@ long long ubifs_get_free_space_nolock(st
+ * ubifs_get_free_space - return amount of free space.
+ * @c: UBIFS file-system description object
+ *
+- * This function calculates and retuns amount of free space to report to
++ * This function calculates and returns amount of free space to report to
+ * user-space.
+ */
+ long long ubifs_get_free_space(struct ubifs_info *c)
+diff -uprN linux-2.6.28/fs/ubifs/commit.c ubifs-v2.6.28/fs/ubifs/commit.c
+--- linux-2.6.28/fs/ubifs/commit.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/commit.c 2011-06-15 14:22:09.000000000 -0400
+@@ -47,6 +47,56 @@
+ #include <linux/kthread.h>
+ #include "ubifs.h"
+
++/*
++ * nothing_to_commit - check if there is nothing to commit.
++ * @c: UBIFS file-system description object
++ *
++ * This is a helper function which checks if there is anything to commit. It is
++ * used as an optimization to avoid starting the commit if it is not really
++ * necessary. Indeed, the commit operation always assumes flash I/O (e.g.,
++ * writing the commit start node to the log), and it is better to avoid doing
++ * this unnecessarily. E.g., 'ubifs_sync_fs()' runs the commit, but if there is
++ * nothing to commit, it is more optimal to avoid any flash I/O.
++ *
++ * This function has to be called with @c->commit_sem locked for writing -
++ * this function does not take LPT/TNC locks because the @c->commit_sem
++ * guarantees that we have exclusive access to the TNC and LPT data structures.
++ *
++ * This function returns %1 if there is nothing to commit and %0 otherwise.
++ */
++static int nothing_to_commit(struct ubifs_info *c)
++{
++ /*
++ * During mounting or remounting from R/O mode to R/W mode we may
++ * commit for various recovery-related reasons.
++ */
++ if (c->mounting || c->remounting_rw)
++ return 0;
++
++ /*
++ * If the root TNC node is dirty, we definitely have something to
++ * commit.
++ */
++ if (c->zroot.znode && test_bit(DIRTY_ZNODE, &c->zroot.znode->flags))
++ return 0;
++
++ /*
++ * Even though the TNC is clean, the LPT tree may have dirty nodes. For
++ * example, this may happen if the budgeting subsystem invoked GC to
++ * make some free space, and the GC found an LEB with only dirty and
++ * free space. In this case GC would just change the lprops of this
++ * LEB (by turning all space into free space) and unmap it.
++ */
++ if (c->nroot && test_bit(DIRTY_CNODE, &c->nroot->flags))
++ return 0;
++
++ ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0);
++ ubifs_assert(c->dirty_pn_cnt == 0);
++ ubifs_assert(c->dirty_nn_cnt == 0);
++
++ return 1;
++}
++
+ /**
+ * do_commit - commit the journal.
+ * @c: UBIFS file-system description object
+@@ -62,11 +112,19 @@ static int do_commit(struct ubifs_info *
+ struct ubifs_lp_stats lst;
+
+ dbg_cmt("start");
+- if (c->ro_media) {
++ ubifs_assert(!c->ro_media && !c->ro_mount);
++
++ if (c->ro_error) {
+ err = -EROFS;
+ goto out_up;
+ }
+
++ if (nothing_to_commit(c)) {
++ up_write(&c->commit_sem);
++ err = 0;
++ goto out_cancel;
++ }
++
+ /* Sync all write buffers (necessary for recovery) */
+ for (i = 0; i < c->jhead_cnt; i++) {
+ err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
+@@ -123,7 +181,7 @@ static int do_commit(struct ubifs_info *
+ c->mst_node->root_len = cpu_to_le32(zroot.len);
+ c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum);
+ c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs);
+- c->mst_node->index_size = cpu_to_le64(c->old_idx_sz);
++ c->mst_node->index_size = cpu_to_le64(c->bi.old_idx_sz);
+ c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum);
+ c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs);
+ c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum);
+@@ -159,12 +217,12 @@ static int do_commit(struct ubifs_info *
+ if (err)
+ goto out;
+
++out_cancel:
+ spin_lock(&c->cs_lock);
+ c->cmt_state = COMMIT_RESTING;
+ wake_up(&c->cmt_wq);
+ dbg_cmt("commit end");
+ spin_unlock(&c->cs_lock);
+-
+ return 0;
+
+ out_up:
+@@ -510,7 +568,7 @@ int dbg_check_old_index(struct ubifs_inf
+ int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt;
+ int first = 1, iip;
+ struct ubifs_debug_info *d = c->dbg;
+- union ubifs_key lower_key, upper_key, l_key, u_key;
++ union ubifs_key uninitialized_var(lower_key), upper_key, l_key, u_key;
+ unsigned long long uninitialized_var(last_sqnum);
+ struct ubifs_idx_node *idx;
+ struct list_head list;
+@@ -518,7 +576,7 @@ int dbg_check_old_index(struct ubifs_inf
+ size_t sz;
+
+ if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX))
+- goto out;
++ return 0;
+
+ INIT_LIST_HEAD(&list);
+
+diff -uprN linux-2.6.28/fs/ubifs/compress.c ubifs-v2.6.28/fs/ubifs/compress.c
+--- linux-2.6.28/fs/ubifs/compress.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/compress.c 2011-06-15 14:22:09.000000000 -0400
+@@ -46,24 +46,11 @@ static struct ubifs_compressor lzo_compr
+ .name = "lzo",
+ .capi_name = "lzo",
+ };
+-
+-static DEFINE_MUTEX(lzo999_mutex);
+-
+-static struct ubifs_compressor lzo999_compr = {
+- .compr_type = UBIFS_COMPR_LZO999,
+- .comp_mutex = &lzo999_mutex,
+- .name = "lzo999",
+- .capi_name = "lzo999",
+-};
+ #else
+ static struct ubifs_compressor lzo_compr = {
+ .compr_type = UBIFS_COMPR_LZO,
+ .name = "lzo",
+ };
+-static struct ubifs_compressor lzo_compr = {
+- .compr_type = UBIFS_COMPR_LZO999,
+- .name = "lzo999",
+-};
+ #endif
+
+ #ifdef CONFIG_UBIFS_FS_ZLIB
+@@ -138,9 +125,6 @@ void ubifs_compress(const void *in_buf,
+ if (in_len - *out_len < UBIFS_MIN_COMPRESS_DIFF)
+ goto no_compr;
+
+- if (*compr_type == UBIFS_COMPR_LZO999)
+- *compr_type = UBIFS_COMPR_LZO;
+-
+ return;
+
+ no_compr:
+@@ -245,19 +229,13 @@ int __init ubifs_compressors_init(void)
+ if (err)
+ return err;
+
+- err = compr_init(&lzo999_compr);
+- if (err)
+- goto out_lzo;
+-
+ err = compr_init(&zlib_compr);
+ if (err)
+- goto out_lzo999;
++ goto out_lzo;
+
+ ubifs_compressors[UBIFS_COMPR_NONE] = &none_compr;
+ return 0;
+
+-out_lzo999:
+- compr_exit(&lzo999_compr);
+ out_lzo:
+ compr_exit(&lzo_compr);
+ return err;
+@@ -268,7 +246,6 @@ out_lzo:
+ */
+ void ubifs_compressors_exit(void)
+ {
+- compr_exit(&lzo999_compr);
+ compr_exit(&lzo_compr);
+ compr_exit(&zlib_compr);
+ }
+diff -uprN linux-2.6.28/fs/ubifs/debug.c ubifs-v2.6.28/fs/ubifs/debug.c
+--- linux-2.6.28/fs/ubifs/debug.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/debug.c 2011-06-15 14:22:09.000000000 -0400
+@@ -42,15 +42,12 @@ DEFINE_SPINLOCK(dbg_lock);
+ static char dbg_key_buf0[128];
+ static char dbg_key_buf1[128];
+
+-unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT;
+-unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT;
++unsigned int ubifs_chk_flags;
+ unsigned int ubifs_tst_flags;
+
+-module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR);
+ module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR);
+ module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR);
+
+-MODULE_PARM_DESC(debug_msgs, "Debug message type flags");
+ MODULE_PARM_DESC(debug_chks, "Debug check flags");
+ MODULE_PARM_DESC(debug_tsts, "Debug special test flags");
+
+@@ -210,6 +207,20 @@ const char *dbg_cstate(int cmt_state)
+ }
+ }
+
++const char *dbg_jhead(int jhead)
++{
++ switch (jhead) {
++ case GCHD:
++ return "0 (GC)";
++ case BASEHD:
++ return "1 (base)";
++ case DATAHD:
++ return "2 (data)";
++ default:
++ return "unknown journal head";
++ }
++}
++
+ static void dump_ch(const struct ubifs_ch *ch)
+ {
+ printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic));
+@@ -302,6 +313,8 @@ void dbg_dump_node(const struct ubifs_in
+ printk(KERN_DEBUG "\tflags %#x\n", sup_flags);
+ printk(KERN_DEBUG "\t big_lpt %u\n",
+ !!(sup_flags & UBIFS_FLG_BIGLPT));
++ printk(KERN_DEBUG "\t space_fixup %u\n",
++ !!(sup_flags & UBIFS_FLG_SPACE_FIXUP));
+ printk(KERN_DEBUG "\tmin_io_size %u\n",
+ le32_to_cpu(sup->min_io_size));
+ printk(KERN_DEBUG "\tleb_size %u\n",
+@@ -479,9 +492,9 @@ void dbg_dump_node(const struct ubifs_in
+ "bad or corrupted node)");
+ else {
+ for (i = 0; i < nlen && dent->name[i]; i++)
+- printk("%c", dent->name[i]);
++ printk(KERN_CONT "%c", dent->name[i]);
+ }
+- printk("\n");
++ printk(KERN_CONT "\n");
+
+ break;
+ }
+@@ -592,7 +605,7 @@ void dbg_dump_lstats(const struct ubifs_
+ spin_unlock(&dbg_lock);
+ }
+
+-void dbg_dump_budg(struct ubifs_info *c)
++void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
+ {
+ int i;
+ struct rb_node *rb;
+@@ -600,31 +613,48 @@ void dbg_dump_budg(struct ubifs_info *c)
+ struct ubifs_gced_idx_leb *idx_gc;
+ long long available, outstanding, free;
+
+- ubifs_assert(spin_is_locked(&c->space_lock));
++ spin_lock(&c->space_lock);
+ spin_lock(&dbg_lock);
+- printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, "
+- "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid,
+- c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth);
+- printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, "
+- "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth,
+- c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth,
+- c->freeable_cnt);
+- printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, "
+- "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs,
+- c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt);
++ printk(KERN_DEBUG "(pid %d) Budgeting info: data budget sum %lld, "
++ "total budget sum %lld\n", current->pid,
++ bi->data_growth + bi->dd_growth,
++ bi->data_growth + bi->dd_growth + bi->idx_growth);
++ printk(KERN_DEBUG "\tbudg_data_growth %lld, budg_dd_growth %lld, "
++ "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth,
++ bi->idx_growth);
++ printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %llu, "
++ "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz,
++ bi->uncommitted_idx);
++ printk(KERN_DEBUG "\tpage_budget %d, inode_budget %d, dent_budget %d\n",
++ bi->page_budget, bi->inode_budget, bi->dent_budget);
++ printk(KERN_DEBUG "\tnospace %u, nospace_rp %u\n",
++ bi->nospace, bi->nospace_rp);
++ printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
++ c->dark_wm, c->dead_wm, c->max_idx_node_sz);
++
++ if (bi != &c->bi)
++ /*
++ * If we are dumping saved budgeting data, do not print
++ * additional information which is about the current state, not
++ * the old one which corresponded to the saved budgeting data.
++ */
++ goto out_unlock;
++
++ printk(KERN_DEBUG "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n",
++ c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt);
+ printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, "
+ "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt),
+ atomic_long_read(&c->dirty_zn_cnt),
+ atomic_long_read(&c->clean_zn_cnt));
+- printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
+- c->dark_wm, c->dead_wm, c->max_idx_node_sz);
+ printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n",
+ c->gc_lnum, c->ihead_lnum);
++
+ /* If we are in R/O mode, journal heads do not exist */
+ if (c->jheads)
+ for (i = 0; i < c->jhead_cnt; i++)
+- printk(KERN_DEBUG "\tjhead %d\t LEB %d\n",
+- c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum);
++ printk(KERN_DEBUG "\tjhead %s\t LEB %d\n",
++ dbg_jhead(c->jheads[i].wbuf.jhead),
++ c->jheads[i].wbuf.lnum);
+ for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) {
+ bud = rb_entry(rb, struct ubifs_bud, rb);
+ printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum);
+@@ -637,20 +667,109 @@ void dbg_dump_budg(struct ubifs_info *c)
+ printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state);
+
+ /* Print budgeting predictions */
+- available = ubifs_calc_available(c, c->min_idx_lebs);
+- outstanding = c->budg_data_growth + c->budg_dd_growth;
++ available = ubifs_calc_available(c, c->bi.min_idx_lebs);
++ outstanding = c->bi.data_growth + c->bi.dd_growth;
+ free = ubifs_get_free_space_nolock(c);
+ printk(KERN_DEBUG "Budgeting predictions:\n");
+ printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n",
+ available, outstanding, free);
++out_unlock:
+ spin_unlock(&dbg_lock);
++ spin_unlock(&c->space_lock);
+ }
+
+ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
+ {
+- printk(KERN_DEBUG "LEB %d lprops: free %d, dirty %d (used %d), "
+- "flags %#x\n", lp->lnum, lp->free, lp->dirty,
+- c->leb_size - lp->free - lp->dirty, lp->flags);
++ int i, spc, dark = 0, dead = 0;
++ struct rb_node *rb;
++ struct ubifs_bud *bud;
++
++ spc = lp->free + lp->dirty;
++ if (spc < c->dead_wm)
++ dead = spc;
++ else
++ dark = ubifs_calc_dark(c, spc);
++
++ if (lp->flags & LPROPS_INDEX)
++ printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d "
++ "free + dirty %-8d flags %#x (", lp->lnum, lp->free,
++ lp->dirty, c->leb_size - spc, spc, lp->flags);
++ else
++ printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d "
++ "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d "
++ "flags %#-4x (", lp->lnum, lp->free, lp->dirty,
++ c->leb_size - spc, spc, dark, dead,
++ (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags);
++
++ if (lp->flags & LPROPS_TAKEN) {
++ if (lp->flags & LPROPS_INDEX)
++ printk(KERN_CONT "index, taken");
++ else
++ printk(KERN_CONT "taken");
++ } else {
++ const char *s;
++
++ if (lp->flags & LPROPS_INDEX) {
++ switch (lp->flags & LPROPS_CAT_MASK) {
++ case LPROPS_DIRTY_IDX:
++ s = "dirty index";
++ break;
++ case LPROPS_FRDI_IDX:
++ s = "freeable index";
++ break;
++ default:
++ s = "index";
++ }
++ } else {
++ switch (lp->flags & LPROPS_CAT_MASK) {
++ case LPROPS_UNCAT:
++ s = "not categorized";
++ break;
++ case LPROPS_DIRTY:
++ s = "dirty";
++ break;
++ case LPROPS_FREE:
++ s = "free";
++ break;
++ case LPROPS_EMPTY:
++ s = "empty";
++ break;
++ case LPROPS_FREEABLE:
++ s = "freeable";
++ break;
++ default:
++ s = NULL;
++ break;
++ }
++ }
++ printk(KERN_CONT "%s", s);
++ }
++
++ for (rb = rb_first((struct rb_root *)&c->buds); rb; rb = rb_next(rb)) {
++ bud = rb_entry(rb, struct ubifs_bud, rb);
++ if (bud->lnum == lp->lnum) {
++ int head = 0;
++ for (i = 0; i < c->jhead_cnt; i++) {
++ /*
++ * Note, if we are in R/O mode or in the middle
++ * of mounting/re-mounting, the write-buffers do
++ * not exist.
++ */
++ if (c->jheads &&
++ lp->lnum == c->jheads[i].wbuf.lnum) {
++ printk(KERN_CONT ", jhead %s",
++ dbg_jhead(i));
++ head = 1;
++ }
++ }
++ if (!head)
++ printk(KERN_CONT ", bud of jhead %s",
++ dbg_jhead(bud->jhead));
++ }
++ }
++ if (lp->lnum == c->gc_lnum)
++ printk(KERN_CONT ", GC LEB");
++ printk(KERN_CONT ")\n");
+ }
+
+ void dbg_dump_lprops(struct ubifs_info *c)
+@@ -718,16 +837,24 @@ void dbg_dump_leb(const struct ubifs_inf
+ {
+ struct ubifs_scan_leb *sleb;
+ struct ubifs_scan_node *snod;
++ void *buf;
+
+ if (dbg_failure_mode)
+ return;
+
+ printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
+ current->pid, lnum);
+- sleb = ubifs_scan(c, lnum, 0, c->dbg->buf);
++
++ buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
++ if (!buf) {
++ ubifs_err("cannot allocate memory for dumping LEB %d", lnum);
++ return;
++ }
++
++ sleb = ubifs_scan(c, lnum, 0, buf, 0);
+ if (IS_ERR(sleb)) {
+ ubifs_err("scan error %d", (int)PTR_ERR(sleb));
+- return;
++ goto out;
+ }
+
+ printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum,
+@@ -743,6 +870,9 @@ void dbg_dump_leb(const struct ubifs_inf
+ printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n",
+ current->pid, lnum);
+ ubifs_scan_destroy(sleb);
++
++out:
++ vfree(buf);
+ return;
+ }
+
+@@ -869,11 +999,41 @@ void dbg_dump_index(struct ubifs_info *c
+ void dbg_save_space_info(struct ubifs_info *c)
+ {
+ struct ubifs_debug_info *d = c->dbg;
+-
+- ubifs_get_lp_stats(c, &d->saved_lst);
++ int freeable_cnt;
+
+ spin_lock(&c->space_lock);
++ memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats));
++ memcpy(&d->saved_bi, &c->bi, sizeof(struct ubifs_budg_info));
++ d->saved_idx_gc_cnt = c->idx_gc_cnt;
++
++ /*
++ * We use a dirty hack here and zero out @c->freeable_cnt, because it
++ * affects the free space calculations, and UBIFS might not know about
++ * all freeable eraseblocks. Indeed, we know about freeable eraseblocks
++ * only when we read their lprops, and we do this only lazily, upon the
++ * need. So at any given point of time @c->freeable_cnt might be not
++ * exactly accurate.
++ *
++ * Just one example about the issue we hit when we did not zero
++ * @c->freeable_cnt.
++ * 1. The file-system is mounted R/O, c->freeable_cnt is %0. We save the
++ * amount of free space in @d->saved_free
++ * 2. We re-mount R/W, which makes UBIFS to read the "lsave"
++ * information from flash, where we cache LEBs from various
++ * categories ('ubifs_remount_fs()' -> 'ubifs_lpt_init()'
++ * -> 'lpt_init_wr()' -> 'read_lsave()' -> 'ubifs_lpt_lookup()'
++ * -> 'ubifs_get_pnode()' -> 'update_cats()'
++ * -> 'ubifs_add_to_cat()').
++ * 3. Lsave contains a freeable eraseblock, and @c->freeable_cnt
++ * becomes %1.
++ * 4. We calculate the amount of free space when the re-mount is
++ * finished in 'dbg_check_space_info()' and it does not match
++ * @d->saved_free.
++ */
++ freeable_cnt = c->freeable_cnt;
++ c->freeable_cnt = 0;
+ d->saved_free = ubifs_get_free_space_nolock(c);
++ c->freeable_cnt = freeable_cnt;
+ spin_unlock(&c->space_lock);
+ }
+
+@@ -890,12 +1050,15 @@ int dbg_check_space_info(struct ubifs_in
+ {
+ struct ubifs_debug_info *d = c->dbg;
+ struct ubifs_lp_stats lst;
+- long long avail, free;
++ long long free;
++ int freeable_cnt;
+
+ spin_lock(&c->space_lock);
+- avail = ubifs_calc_available(c, c->min_idx_lebs);
++ freeable_cnt = c->freeable_cnt;
++ c->freeable_cnt = 0;
++ free = ubifs_get_free_space_nolock(c);
++ c->freeable_cnt = freeable_cnt;
+ spin_unlock(&c->space_lock);
+- free = ubifs_get_free_space(c);
+
+ if (free != d->saved_free) {
+ ubifs_err("free space changed from %lld to %lld",
+@@ -908,12 +1071,14 @@ int dbg_check_space_info(struct ubifs_in
+ out:
+ ubifs_msg("saved lprops statistics dump");
+ dbg_dump_lstats(&d->saved_lst);
+- ubifs_get_lp_stats(c, &lst);
++ ubifs_msg("saved budgeting info dump");
++ dbg_dump_budg(c, &d->saved_bi);
++ ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt);
+ ubifs_msg("current lprops statistics dump");
+- dbg_dump_lstats(&d->saved_lst);
+- spin_lock(&c->space_lock);
+- dbg_dump_budg(c);
+- spin_unlock(&c->space_lock);
++ ubifs_get_lp_stats(c, &lst);
++ dbg_dump_lstats(&lst);
++ ubifs_msg("current budgeting info dump");
++ dbg_dump_budg(c, &c->bi);
+ dump_stack();
+ return -EINVAL;
+ }
+@@ -1214,7 +1379,7 @@ static int dbg_check_znode(struct ubifs_
+
+ /*
+ * Make sure the last key in our znode is less or
+- * equivalent than the the key in zbranch which goes
++ * equivalent than the key in the zbranch which goes
+ * after our pointing zbranch.
+ */
+ cmp = keys_cmp(c, max,
+@@ -1657,6 +1822,8 @@ static struct fsck_inode *add_inode(stru
+ struct rb_node **p, *parent = NULL;
+ struct fsck_inode *fscki;
+ ino_t inum = key_inum_flash(c, &ino->key);
++ struct inode *inode;
++ struct ubifs_inode *ui;
+
+ p = &fsckd->inodes.rb_node;
+ while (*p) {
+@@ -1680,19 +1847,46 @@ static struct fsck_inode *add_inode(stru
+ if (!fscki)
+ return ERR_PTR(-ENOMEM);
+
++ inode = ilookup(c->vfs_sb, inum);
++
+ fscki->inum = inum;
+- fscki->nlink = le32_to_cpu(ino->nlink);
+- fscki->size = le64_to_cpu(ino->size);
+- fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt);
+- fscki->xattr_sz = le32_to_cpu(ino->xattr_size);
+- fscki->xattr_nms = le32_to_cpu(ino->xattr_names);
+- fscki->mode = le32_to_cpu(ino->mode);
++ /*
++ * If the inode is present in the VFS inode cache, use it instead of
++ * the on-flash inode which might be out-of-date. E.g., the size might
++ * be out-of-date. If we do not do this, the following may happen, for
++ * example:
++ * 1. A power cut happens
++ * 2. We mount the file-system R/O, the replay process fixes up the
++ * inode size in the VFS cache, but on on-flash.
++ * 3. 'check_leaf()' fails because it hits a data node beyond inode
++ * size.
++ */
++ if (!inode) {
++ fscki->nlink = le32_to_cpu(ino->nlink);
++ fscki->size = le64_to_cpu(ino->size);
++ fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt);
++ fscki->xattr_sz = le32_to_cpu(ino->xattr_size);
++ fscki->xattr_nms = le32_to_cpu(ino->xattr_names);
++ fscki->mode = le32_to_cpu(ino->mode);
++ } else {
++ ui = ubifs_inode(inode);
++ fscki->nlink = inode->i_nlink;
++ fscki->size = inode->i_size;
++ fscki->xattr_cnt = ui->xattr_cnt;
++ fscki->xattr_sz = ui->xattr_size;
++ fscki->xattr_nms = ui->xattr_names;
++ fscki->mode = inode->i_mode;
++ iput(inode);
++ }
++
+ if (S_ISDIR(fscki->mode)) {
+ fscki->calc_sz = UBIFS_INO_NODE_SZ;
+ fscki->calc_cnt = 2;
+ }
++
+ rb_link_node(&fscki->rb, parent, p);
+ rb_insert_color(&fscki->rb, &fsckd->inodes);
++
+ return fscki;
+ }
+
+@@ -1916,7 +2110,7 @@ static int check_leaf(struct ubifs_info
+ inum = key_inum_flash(c, &dent->key);
+ fscki1 = read_add_inode(c, priv, inum);
+ if (IS_ERR(fscki1)) {
+- err = PTR_ERR(fscki);
++ err = PTR_ERR(fscki1);
+ ubifs_err("error %d while processing entry node and "
+ "trying to find parent inode node %lu",
+ err, (unsigned long)inum);
+@@ -2145,14 +2339,169 @@ out_free:
+ return err;
+ }
+
+-static int invocation_cnt;
++/**
++ * dbg_check_data_nodes_order - check that list of data nodes is sorted.
++ * @c: UBIFS file-system description object
++ * @head: the list of nodes ('struct ubifs_scan_node' objects)
++ *
++ * This function returns zero if the list of data nodes is sorted correctly,
++ * and %-EINVAL if not.
++ */
++int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head)
++{
++ struct list_head *cur;
++ struct ubifs_scan_node *sa, *sb;
++
++ if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
++ return 0;
++
++ for (cur = head->next; cur->next != head; cur = cur->next) {
++ ino_t inuma, inumb;
++ uint32_t blka, blkb;
++
++ cond_resched();
++ sa = container_of(cur, struct ubifs_scan_node, list);
++ sb = container_of(cur->next, struct ubifs_scan_node, list);
++
++ if (sa->type != UBIFS_DATA_NODE) {
++ ubifs_err("bad node type %d", sa->type);
++ dbg_dump_node(c, sa->node);
++ return -EINVAL;
++ }
++ if (sb->type != UBIFS_DATA_NODE) {
++ ubifs_err("bad node type %d", sb->type);
++ dbg_dump_node(c, sb->node);
++ return -EINVAL;
++ }
++
++ inuma = key_inum(c, &sa->key);
++ inumb = key_inum(c, &sb->key);
++
++ if (inuma < inumb)
++ continue;
++ if (inuma > inumb) {
++ ubifs_err("larger inum %lu goes before inum %lu",
++ (unsigned long)inuma, (unsigned long)inumb);
++ goto error_dump;
++ }
++
++ blka = key_block(c, &sa->key);
++ blkb = key_block(c, &sb->key);
++
++ if (blka > blkb) {
++ ubifs_err("larger block %u goes before %u", blka, blkb);
++ goto error_dump;
++ }
++ if (blka == blkb) {
++ ubifs_err("two data nodes for the same block");
++ goto error_dump;
++ }
++ }
++
++ return 0;
++
++error_dump:
++ dbg_dump_node(c, sa->node);
++ dbg_dump_node(c, sb->node);
++ return -EINVAL;
++}
++
++/**
++ * dbg_check_nondata_nodes_order - check that list of data nodes is sorted.
++ * @c: UBIFS file-system description object
++ * @head: the list of nodes ('struct ubifs_scan_node' objects)
++ *
++ * This function returns zero if the list of non-data nodes is sorted correctly,
++ * and %-EINVAL if not.
++ */
++int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
++{
++ struct list_head *cur;
++ struct ubifs_scan_node *sa, *sb;
++
++ if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
++ return 0;
++
++ for (cur = head->next; cur->next != head; cur = cur->next) {
++ ino_t inuma, inumb;
++ uint32_t hasha, hashb;
++
++ cond_resched();
++ sa = container_of(cur, struct ubifs_scan_node, list);
++ sb = container_of(cur->next, struct ubifs_scan_node, list);
++
++ if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE &&
++ sa->type != UBIFS_XENT_NODE) {
++ ubifs_err("bad node type %d", sa->type);
++ dbg_dump_node(c, sa->node);
++ return -EINVAL;
++ }
++ if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE &&
++ sa->type != UBIFS_XENT_NODE) {
++ ubifs_err("bad node type %d", sb->type);
++ dbg_dump_node(c, sb->node);
++ return -EINVAL;
++ }
++
++ if (sa->type != UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) {
++ ubifs_err("non-inode node goes before inode node");
++ goto error_dump;
++ }
++
++ if (sa->type == UBIFS_INO_NODE && sb->type != UBIFS_INO_NODE)
++ continue;
++
++ if (sa->type == UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) {
++ /* Inode nodes are sorted in descending size order */
++ if (sa->len < sb->len) {
++ ubifs_err("smaller inode node goes first");
++ goto error_dump;
++ }
++ continue;
++ }
++
++ /*
++ * This is either a dentry or xentry, which should be sorted in
++ * ascending (parent ino, hash) order.
++ */
++ inuma = key_inum(c, &sa->key);
++ inumb = key_inum(c, &sb->key);
++
++ if (inuma < inumb)
++ continue;
++ if (inuma > inumb) {
++ ubifs_err("larger inum %lu goes before inum %lu",
++ (unsigned long)inuma, (unsigned long)inumb);
++ goto error_dump;
++ }
++
++ hasha = key_block(c, &sa->key);
++ hashb = key_block(c, &sb->key);
++
++ if (hasha > hashb) {
++ ubifs_err("larger hash %u goes before %u",
++ hasha, hashb);
++ goto error_dump;
++ }
++ }
++
++ return 0;
++
++error_dump:
++ ubifs_msg("dumping first node");
++ dbg_dump_node(c, sa->node);
++ ubifs_msg("dumping second node");
++ dbg_dump_node(c, sb->node);
++ return -EINVAL;
++ return 0;
++}
+
+ int dbg_force_in_the_gaps(void)
+ {
+- if (!dbg_force_in_the_gaps_enabled)
++ if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+ return 0;
+- /* Force in-the-gaps every 8th commit */
+- return !((invocation_cnt++) & 0x7);
++
++ return !(random32() & 7);
+ }
+
+ /* Failure mode for recovery testing */
+@@ -2340,7 +2689,7 @@ int dbg_leb_read(struct ubi_volume_desc
+ int len, int check)
+ {
+ if (in_failure_mode(desc))
+- return -EIO;
++ return -EROFS;
+ return ubi_leb_read(desc, lnum, buf, offset, len, check);
+ }
+
+@@ -2350,7 +2699,7 @@ int dbg_leb_write(struct ubi_volume_desc
+ int err, failing;
+
+ if (in_failure_mode(desc))
+- return -EIO;
++ return -EROFS;
+ failing = do_fail(desc, lnum, 1);
+ if (failing)
+ cut_data(buf, len);
+@@ -2358,7 +2707,7 @@ int dbg_leb_write(struct ubi_volume_desc
+ if (err)
+ return err;
+ if (failing)
+- return -EIO;
++ return -EROFS;
+ return 0;
+ }
+
+@@ -2368,12 +2717,12 @@ int dbg_leb_change(struct ubi_volume_des
+ int err;
+
+ if (do_fail(desc, lnum, 1))
+- return -EIO;
++ return -EROFS;
+ err = ubi_leb_change(desc, lnum, buf, len, dtype);
+ if (err)
+ return err;
+ if (do_fail(desc, lnum, 1))
+- return -EIO;
++ return -EROFS;
+ return 0;
+ }
+
+@@ -2382,12 +2731,12 @@ int dbg_leb_erase(struct ubi_volume_desc
+ int err;
+
+ if (do_fail(desc, lnum, 0))
+- return -EIO;
++ return -EROFS;
+ err = ubi_leb_erase(desc, lnum);
+ if (err)
+ return err;
+ if (do_fail(desc, lnum, 0))
+- return -EIO;
++ return -EROFS;
+ return 0;
+ }
+
+@@ -2396,19 +2745,19 @@ int dbg_leb_unmap(struct ubi_volume_desc
+ int err;
+
+ if (do_fail(desc, lnum, 0))
+- return -EIO;
++ return -EROFS;
+ err = ubi_leb_unmap(desc, lnum);
+ if (err)
+ return err;
+ if (do_fail(desc, lnum, 0))
+- return -EIO;
++ return -EROFS;
+ return 0;
+ }
+
+ int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum)
+ {
+ if (in_failure_mode(desc))
+- return -EIO;
++ return -EROFS;
+ return ubi_is_mapped(desc, lnum);
+ }
+
+@@ -2417,12 +2766,12 @@ int dbg_leb_map(struct ubi_volume_desc *
+ int err;
+
+ if (do_fail(desc, lnum, 0))
+- return -EIO;
++ return -EROFS;
+ err = ubi_leb_map(desc, lnum, dtype);
+ if (err)
+ return err;
+ if (do_fail(desc, lnum, 0))
+- return -EIO;
++ return -EROFS;
+ return 0;
+ }
+
+@@ -2440,16 +2789,8 @@ int ubifs_debugging_init(struct ubifs_in
+ if (!c->dbg)
+ return -ENOMEM;
+
+- c->dbg->buf = vmalloc(c->leb_size);
+- if (!c->dbg->buf)
+- goto out;
+-
+ failure_mode_init(c);
+ return 0;
+-
+-out:
+- kfree(c->dbg);
+- return -ENOMEM;
+ }
+
+ /**
+@@ -2459,7 +2800,6 @@ out:
+ void ubifs_debugging_exit(struct ubifs_info *c)
+ {
+ failure_mode_exit(c);
+- vfree(c->dbg->buf);
+ kfree(c->dbg);
+ }
+
+@@ -2501,7 +2841,7 @@ void dbg_debugfs_exit(void)
+ static int open_debugfs_file(struct inode *inode, struct file *file)
+ {
+ file->private_data = inode->i_private;
+- return 0;
++ return nonseekable_open(inode, file);
+ }
+
+ static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
+@@ -2512,18 +2852,15 @@ static ssize_t write_debugfs_file(struct
+
+ if (file->f_path.dentry == d->dfs_dump_lprops)
+ dbg_dump_lprops(c);
+- else if (file->f_path.dentry == d->dfs_dump_budg) {
+- spin_lock(&c->space_lock);
+- dbg_dump_budg(c);
+- spin_unlock(&c->space_lock);
+- } else if (file->f_path.dentry == d->dfs_dump_tnc) {
++ else if (file->f_path.dentry == d->dfs_dump_budg)
++ dbg_dump_budg(c, &c->bi);
++ else if (file->f_path.dentry == d->dfs_dump_tnc) {
+ mutex_lock(&c->tnc_mutex);
+ dbg_dump_tnc(c);
+ mutex_unlock(&c->tnc_mutex);
+ } else
+ return -EINVAL;
+
+- *ppos += count;
+ return count;
+ }
+
+@@ -2531,6 +2868,7 @@ static const struct file_operations dfs_
+ .open = open_debugfs_file,
+ .write = write_debugfs_file,
+ .owner = THIS_MODULE,
++ .llseek = no_llseek,
+ };
+
+ /**
+@@ -2553,40 +2891,38 @@ int dbg_debugfs_init_fs(struct ubifs_inf
+ struct ubifs_debug_info *d = c->dbg;
+
+ sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
+- d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir);
+- if (IS_ERR(d->dfs_dir)) {
+- err = PTR_ERR(d->dfs_dir);
+- ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
+- d->dfs_dir_name, err);
++ fname = d->dfs_dir_name;
++ dent = debugfs_create_dir(fname, dfs_rootdir);
++ if (!dent || IS_ERR(dent))
+ goto out;
+- }
++ d->dfs_dir = dent;
+
+ fname = "dump_lprops";
+- dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops);
+- if (IS_ERR(dent))
++ dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
++ if (!dent || IS_ERR(dent))
+ goto out_remove;
+ d->dfs_dump_lprops = dent;
+
+ fname = "dump_budg";
+- dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops);
+- if (IS_ERR(dent))
++ dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
++ if (!dent || IS_ERR(dent))
+ goto out_remove;
+ d->dfs_dump_budg = dent;
+
+ fname = "dump_tnc";
+- dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops);
+- if (IS_ERR(dent))
++ dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
++ if (!dent || IS_ERR(dent))
+ goto out_remove;
+ d->dfs_dump_tnc = dent;
+
+ return 0;
+
+ out_remove:
+- err = PTR_ERR(dent);
+- ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
+- fname, err);
+ debugfs_remove_recursive(d->dfs_dir);
+ out:
++ err = dent ? PTR_ERR(dent) : -ENODEV;
++ ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
++ fname, err);
+ return err;
+ }
+
+diff -uprN linux-2.6.28/fs/ubifs/debug.h ubifs-v2.6.28/fs/ubifs/debug.h
+--- linux-2.6.28/fs/ubifs/debug.h 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/debug.h 2011-06-15 14:22:09.000000000 -0400
+@@ -23,11 +23,18 @@
+ #ifndef __UBIFS_DEBUG_H__
+ #define __UBIFS_DEBUG_H__
+
++/* Checking helper functions */
++typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
++ struct ubifs_zbranch *zbr, void *priv);
++typedef int (*dbg_znode_callback)(struct ubifs_info *c,
++ struct ubifs_znode *znode, void *priv);
++
+ #ifdef CONFIG_UBIFS_FS_DEBUG
+
++#include <linux/random.h>
++
+ /**
+ * ubifs_debug_info - per-FS debugging information.
+- * @buf: a buffer of LEB size, used for various purposes
+ * @old_zroot: old index root - used by 'dbg_check_old_index()'
+ * @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
+ * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
+@@ -45,16 +52,17 @@
+ * @new_ihead_offs: used by debugging to check @c->ihead_offs
+ *
+ * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()')
+- * @saved_free: saved free space (used by 'dbg_save_space_info()')
++ * @saved_bi: saved budgeting information
++ * @saved_free: saved amount of free space
++ * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt
+ *
+- * dfs_dir_name: name of debugfs directory containing this file-system's files
+- * dfs_dir: direntry object of the file-system debugfs directory
+- * dfs_dump_lprops: "dump lprops" debugfs knob
+- * dfs_dump_budg: "dump budgeting information" debugfs knob
+- * dfs_dump_tnc: "dump TNC" debugfs knob
++ * @dfs_dir_name: name of debugfs directory containing this file-system's files
++ * @dfs_dir: direntry object of the file-system debugfs directory
++ * @dfs_dump_lprops: "dump lprops" debugfs knob
++ * @dfs_dump_budg: "dump budgeting information" debugfs knob
++ * @dfs_dump_tnc: "dump TNC" debugfs knob
+ */
+ struct ubifs_debug_info {
+- void *buf;
+ struct ubifs_zbranch old_zroot;
+ int old_zroot_level;
+ unsigned long long old_zroot_sqnum;
+@@ -72,7 +80,9 @@ struct ubifs_debug_info {
+ int new_ihead_offs;
+
+ struct ubifs_lp_stats saved_lst;
++ struct ubifs_budg_info saved_bi;
+ long long saved_free;
++ int saved_idx_gc_cnt;
+
+ char dfs_dir_name[100];
+ struct dentry *dfs_dir;
+@@ -97,23 +107,7 @@ struct ubifs_debug_info {
+ } \
+ } while (0)
+
+-#define dbg_dump_stack() do { \
+- if (!dbg_failure_mode) \
+- dump_stack(); \
+-} while (0)
+-
+-/* Generic debugging messages */
+-#define dbg_msg(fmt, ...) do { \
+- spin_lock(&dbg_lock); \
+- printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \
+- __func__, ##__VA_ARGS__); \
+- spin_unlock(&dbg_lock); \
+-} while (0)
+-
+-#define dbg_do_msg(typ, fmt, ...) do { \
+- if (ubifs_msg_flags & typ) \
+- dbg_msg(fmt, ##__VA_ARGS__); \
+-} while (0)
++#define dbg_dump_stack() dump_stack()
+
+ #define dbg_err(fmt, ...) do { \
+ spin_lock(&dbg_lock); \
+@@ -133,86 +127,43 @@ const char *dbg_key_str1(const struct ub
+ #define DBGKEY(key) dbg_key_str0(c, (key))
+ #define DBGKEY1(key) dbg_key_str1(c, (key))
+
+-/* General messages */
+-#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__)
++#define ubifs_dbg_msg(type, fmt, ...) do { \
++ spin_lock(&dbg_lock); \
++ pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \
++ spin_unlock(&dbg_lock); \
++} while (0)
+
++/* Just a debugging messages not related to any specific UBIFS subsystem */
++#define dbg_msg(fmt, ...) ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__)
++/* General messages */
++#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__)
+ /* Additional journal messages */
+-#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__)
+-
++#define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__)
+ /* Additional TNC messages */
+-#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__)
+-
++#define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__)
+ /* Additional lprops messages */
+-#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__)
+-
++#define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__)
+ /* Additional LEB find messages */
+-#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__)
+-
++#define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__)
+ /* Additional mount messages */
+-#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__)
+-
++#define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__)
+ /* Additional I/O messages */
+-#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__)
+-
++#define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__)
+ /* Additional commit messages */
+-#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__)
+-
++#define dbg_cmt(fmt, ...) ubifs_dbg_msg("cmt", fmt, ##__VA_ARGS__)
+ /* Additional budgeting messages */
+-#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__)
+-
++#define dbg_budg(fmt, ...) ubifs_dbg_msg("budg", fmt, ##__VA_ARGS__)
+ /* Additional log messages */
+-#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__)
+-
++#define dbg_log(fmt, ...) ubifs_dbg_msg("log", fmt, ##__VA_ARGS__)
+ /* Additional gc messages */
+-#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__)
+-
++#define dbg_gc(fmt, ...) ubifs_dbg_msg("gc", fmt, ##__VA_ARGS__)
+ /* Additional scan messages */
+-#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__)
+-
++#define dbg_scan(fmt, ...) ubifs_dbg_msg("scan", fmt, ##__VA_ARGS__)
+ /* Additional recovery messages */
+-#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__)
++#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__)
+
+ /*
+- * Debugging message type flags (must match msg_type_names in debug.c).
+- *
+- * UBIFS_MSG_GEN: general messages
+- * UBIFS_MSG_JNL: journal messages
+- * UBIFS_MSG_MNT: mount messages
+- * UBIFS_MSG_CMT: commit messages
+- * UBIFS_MSG_FIND: LEB find messages
+- * UBIFS_MSG_BUDG: budgeting messages
+- * UBIFS_MSG_GC: garbage collection messages
+- * UBIFS_MSG_TNC: TNC messages
+- * UBIFS_MSG_LP: lprops messages
+- * UBIFS_MSG_IO: I/O messages
+- * UBIFS_MSG_LOG: log messages
+- * UBIFS_MSG_SCAN: scan messages
+- * UBIFS_MSG_RCVRY: recovery messages
+- */
+-enum {
+- UBIFS_MSG_GEN = 0x1,
+- UBIFS_MSG_JNL = 0x2,
+- UBIFS_MSG_MNT = 0x4,
+- UBIFS_MSG_CMT = 0x8,
+- UBIFS_MSG_FIND = 0x10,
+- UBIFS_MSG_BUDG = 0x20,
+- UBIFS_MSG_GC = 0x40,
+- UBIFS_MSG_TNC = 0x80,
+- UBIFS_MSG_LP = 0x100,
+- UBIFS_MSG_IO = 0x200,
+- UBIFS_MSG_LOG = 0x400,
+- UBIFS_MSG_SCAN = 0x800,
+- UBIFS_MSG_RCVRY = 0x1000,
+-};
+-
+-/* Debugging message type flags for each default debug message level */
+-#define UBIFS_MSG_LVL_0 0
+-#define UBIFS_MSG_LVL_1 0x1
+-#define UBIFS_MSG_LVL_2 0x7f
+-#define UBIFS_MSG_LVL_3 0xffff
+-
+-/*
+- * Debugging check flags (must match chk_names in debug.c).
++ * Debugging check flags.
+ *
+ * UBIFS_CHK_GEN: general checks
+ * UBIFS_CHK_TNC: check TNC
+@@ -233,32 +184,14 @@ enum {
+ };
+
+ /*
+- * Special testing flags (must match tst_names in debug.c).
++ * Special testing flags.
+ *
+- * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method
+ * UBIFS_TST_RCVRY: failure mode for recovery testing
+ */
+ enum {
+- UBIFS_TST_FORCE_IN_THE_GAPS = 0x2,
+ UBIFS_TST_RCVRY = 0x4,
+ };
+
+-#if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1
+-#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1
+-#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2
+-#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2
+-#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3
+-#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3
+-#else
+-#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_0
+-#endif
+-
+-#ifdef CONFIG_UBIFS_FS_DEBUG_CHKS
+-#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff
+-#else
+-#define UBIFS_CHK_FLAGS_DEFAULT 0
+-#endif
+-
+ extern spinlock_t dbg_lock;
+
+ extern unsigned int ubifs_msg_flags;
+@@ -271,6 +204,7 @@ void ubifs_debugging_exit(struct ubifs_i
+ /* Dump functions */
+ const char *dbg_ntype(int type);
+ const char *dbg_cstate(int cmt_state);
++const char *dbg_jhead(int jhead);
+ const char *dbg_get_key_dump(const struct ubifs_info *c,
+ const union ubifs_key *key);
+ void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode);
+@@ -279,7 +213,7 @@ void dbg_dump_lpt_node(const struct ubif
+ int offs);
+ void dbg_dump_budget_req(const struct ubifs_budget_req *req);
+ void dbg_dump_lstats(const struct ubifs_lp_stats *lst);
+-void dbg_dump_budg(struct ubifs_info *c);
++void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi);
+ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp);
+ void dbg_dump_lprops(struct ubifs_info *c);
+ void dbg_dump_lpt_info(struct ubifs_info *c);
+@@ -293,11 +227,6 @@ void dbg_dump_tnc(struct ubifs_info *c);
+ void dbg_dump_index(struct ubifs_info *c);
+ void dbg_dump_lpt_lebs(const struct ubifs_info *c);
+
+-/* Checking helper functions */
+-typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
+- struct ubifs_zbranch *zbr, void *priv);
+-typedef int (*dbg_znode_callback)(struct ubifs_info *c,
+- struct ubifs_znode *znode, void *priv);
+ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
+ dbg_znode_callback znode_cb, void *priv);
+
+@@ -318,23 +247,24 @@ int dbg_check_idx_size(struct ubifs_info
+ int dbg_check_filesystem(struct ubifs_info *c);
+ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
+ int add_pos);
+-int dbg_check_lprops(struct ubifs_info *c);
+ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
+ int row, int col);
++int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
++ loff_t size);
++int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head);
++int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head);
+
+ /* Force the use of in-the-gaps method for testing */
+-
+-#define dbg_force_in_the_gaps_enabled \
+- (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS)
+-
++static inline int dbg_force_in_the_gaps_enabled(void)
++{
++ return ubifs_chk_flags & UBIFS_CHK_GEN;
++}
+ int dbg_force_in_the_gaps(void);
+
+ /* Failure mode for recovery testing */
+-
+ #define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY)
+
+ #ifndef UBIFS_DBG_PRESERVE_UBI
+-
+ #define ubi_leb_read dbg_leb_read
+ #define ubi_leb_write dbg_leb_write
+ #define ubi_leb_change dbg_leb_change
+@@ -342,7 +272,6 @@ int dbg_force_in_the_gaps(void);
+ #define ubi_leb_unmap dbg_leb_unmap
+ #define ubi_is_mapped dbg_is_mapped
+ #define ubi_leb_map dbg_leb_map
+-
+ #endif
+
+ int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
+@@ -389,85 +318,127 @@ void dbg_debugfs_exit_fs(struct ubifs_in
+ __func__, __LINE__, current->pid); \
+ } while (0)
+
+-#define dbg_err(fmt, ...) do { \
+- if (0) \
+- ubifs_err(fmt, ##__VA_ARGS__); \
++#define dbg_err(fmt, ...) do { \
++ if (0) \
++ ubifs_err(fmt, ##__VA_ARGS__); \
+ } while (0)
+
+-#define dbg_msg(fmt, ...) do { \
+- if (0) \
+- printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \
+- current->pid, __func__, ##__VA_ARGS__); \
++#define ubifs_dbg_msg(fmt, ...) do { \
++ if (0) \
++ pr_debug(fmt "\n", ##__VA_ARGS__); \
+ } while (0)
+
+ #define dbg_dump_stack()
+ #define ubifs_assert_cmt_locked(c)
+
+-#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+-#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+-#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+-#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+-#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+-#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+-#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+-#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+-#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+-#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+-#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+-#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+-#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
++#define dbg_msg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
++#define dbg_gen(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
++#define dbg_jnl(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
++#define dbg_tnc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
++#define dbg_lp(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
++#define dbg_find(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
++#define dbg_mnt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
++#define dbg_io(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
++#define dbg_cmt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
++#define dbg_budg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
++#define dbg_log(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
++#define dbg_gc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
++#define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
++#define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
+
+ #define DBGKEY(key) ((char *)(key))
+ #define DBGKEY1(key) ((char *)(key))
+
+-#define ubifs_debugging_init(c) 0
+-#define ubifs_debugging_exit(c) ({})
+-
+-#define dbg_ntype(type) ""
+-#define dbg_cstate(cmt_state) ""
+-#define dbg_get_key_dump(c, key) ({})
+-#define dbg_dump_inode(c, inode) ({})
+-#define dbg_dump_node(c, node) ({})
+-#define dbg_dump_lpt_node(c, node, lnum, offs) ({})
+-#define dbg_dump_budget_req(req) ({})
+-#define dbg_dump_lstats(lst) ({})
+-#define dbg_dump_budg(c) ({})
+-#define dbg_dump_lprop(c, lp) ({})
+-#define dbg_dump_lprops(c) ({})
+-#define dbg_dump_lpt_info(c) ({})
+-#define dbg_dump_leb(c, lnum) ({})
+-#define dbg_dump_znode(c, znode) ({})
+-#define dbg_dump_heap(c, heap, cat) ({})
+-#define dbg_dump_pnode(c, pnode, parent, iip) ({})
+-#define dbg_dump_tnc(c) ({})
+-#define dbg_dump_index(c) ({})
+-#define dbg_dump_lpt_lebs(c) ({})
+-
+-#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0
+-#define dbg_old_index_check_init(c, zroot) 0
+-#define dbg_save_space_info(c) ({})
+-#define dbg_check_space_info(c) 0
+-#define dbg_check_old_index(c, zroot) 0
+-#define dbg_check_cats(c) 0
+-#define dbg_check_ltab(c) 0
+-#define dbg_chk_lpt_free_spc(c) 0
+-#define dbg_chk_lpt_sz(c, action, len) 0
+-#define dbg_check_synced_i_size(inode) 0
+-#define dbg_check_dir_size(c, dir) 0
+-#define dbg_check_tnc(c, x) 0
+-#define dbg_check_idx_size(c, idx_size) 0
+-#define dbg_check_filesystem(c) 0
+-#define dbg_check_heap(c, heap, cat, add_pos) ({})
+-#define dbg_check_lprops(c) 0
+-#define dbg_check_lpt_nodes(c, cnode, row, col) 0
+-#define dbg_force_in_the_gaps_enabled 0
+-#define dbg_force_in_the_gaps() 0
+-#define dbg_failure_mode 0
+-
+-#define dbg_debugfs_init() 0
+-#define dbg_debugfs_exit()
+-#define dbg_debugfs_init_fs(c) 0
+-#define dbg_debugfs_exit_fs(c) 0
++static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; }
++static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; }
++static inline const char *dbg_ntype(int type) { return ""; }
++static inline const char *dbg_cstate(int cmt_state) { return ""; }
++static inline const char *dbg_jhead(int jhead) { return ""; }
++static inline const char *
++dbg_get_key_dump(const struct ubifs_info *c,
++ const union ubifs_key *key) { return ""; }
++static inline void dbg_dump_inode(const struct ubifs_info *c,
++ const struct inode *inode) { return; }
++static inline void dbg_dump_node(const struct ubifs_info *c,
++ const void *node) { return; }
++static inline void dbg_dump_lpt_node(const struct ubifs_info *c,
++ void *node, int lnum,
++ int offs) { return; }
++static inline void
++dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; }
++static inline void
++dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; }
++static inline void
++dbg_dump_budg(struct ubifs_info *c,
++ const struct ubifs_budg_info *bi) { return; }
++static inline void dbg_dump_lprop(const struct ubifs_info *c,
++ const struct ubifs_lprops *lp) { return; }
++static inline void dbg_dump_lprops(struct ubifs_info *c) { return; }
++static inline void dbg_dump_lpt_info(struct ubifs_info *c) { return; }
++static inline void dbg_dump_leb(const struct ubifs_info *c,
++ int lnum) { return; }
++static inline void
++dbg_dump_znode(const struct ubifs_info *c,
++ const struct ubifs_znode *znode) { return; }
++static inline void dbg_dump_heap(struct ubifs_info *c,
++ struct ubifs_lpt_heap *heap,
++ int cat) { return; }
++static inline void dbg_dump_pnode(struct ubifs_info *c,
++ struct ubifs_pnode *pnode,
++ struct ubifs_nnode *parent,
++ int iip) { return; }
++static inline void dbg_dump_tnc(struct ubifs_info *c) { return; }
++static inline void dbg_dump_index(struct ubifs_info *c) { return; }
++static inline void dbg_dump_lpt_lebs(const struct ubifs_info *c) { return; }
++
++static inline int dbg_walk_index(struct ubifs_info *c,
++ dbg_leaf_callback leaf_cb,
++ dbg_znode_callback znode_cb,
++ void *priv) { return 0; }
++static inline void dbg_save_space_info(struct ubifs_info *c) { return; }
++static inline int dbg_check_space_info(struct ubifs_info *c) { return 0; }
++static inline int dbg_check_lprops(struct ubifs_info *c) { return 0; }
++static inline int
++dbg_old_index_check_init(struct ubifs_info *c,
++ struct ubifs_zbranch *zroot) { return 0; }
++static inline int
++dbg_check_old_index(struct ubifs_info *c,
++ struct ubifs_zbranch *zroot) { return 0; }
++static inline int dbg_check_cats(struct ubifs_info *c) { return 0; }
++static inline int dbg_check_ltab(struct ubifs_info *c) { return 0; }
++static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c) { return 0; }
++static inline int dbg_chk_lpt_sz(struct ubifs_info *c,
++ int action, int len) { return 0; }
++static inline int dbg_check_synced_i_size(struct inode *inode) { return 0; }
++static inline int dbg_check_dir_size(struct ubifs_info *c,
++ const struct inode *dir) { return 0; }
++static inline int dbg_check_tnc(struct ubifs_info *c, int extra) { return 0; }
++static inline int dbg_check_idx_size(struct ubifs_info *c,
++ long long idx_size) { return 0; }
++static inline int dbg_check_filesystem(struct ubifs_info *c) { return 0; }
++static inline void dbg_check_heap(struct ubifs_info *c,
++ struct ubifs_lpt_heap *heap,
++ int cat, int add_pos) { return; }
++static inline int dbg_check_lpt_nodes(struct ubifs_info *c,
++ struct ubifs_cnode *cnode, int row, int col) { return 0; }
++static inline int dbg_check_inode_size(struct ubifs_info *c,
++ const struct inode *inode,
++ loff_t size) { return 0; }
++static inline int
++dbg_check_data_nodes_order(struct ubifs_info *c,
++ struct list_head *head) { return 0; }
++static inline int
++dbg_check_nondata_nodes_order(struct ubifs_info *c,
++ struct list_head *head) { return 0; }
++
++static inline int dbg_force_in_the_gaps(void) { return 0; }
++#define dbg_force_in_the_gaps_enabled() 0
++#define dbg_failure_mode 0
++
++static inline int dbg_debugfs_init(void) { return 0; }
++static inline void dbg_debugfs_exit(void) { return; }
++static inline int dbg_debugfs_init_fs(struct ubifs_info *c) { return 0; }
++static inline int dbg_debugfs_exit_fs(struct ubifs_info *c) { return 0; }
+
+ #endif /* !CONFIG_UBIFS_FS_DEBUG */
+ #endif /* !__UBIFS_DEBUG_H__ */
+diff -uprN linux-2.6.28/fs/ubifs/dir.c ubifs-v2.6.28/fs/ubifs/dir.c
+--- linux-2.6.28/fs/ubifs/dir.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/dir.c 2011-06-15 14:22:09.000000000 -0400
+@@ -104,13 +104,13 @@ struct inode *ubifs_new_inode(struct ubi
+ */
+ inode->i_flags |= (S_NOCMTIME);
+
+- inode->i_uid = current->fsuid;
++ inode->i_uid = current_fsuid();
+ if (dir->i_mode & S_ISGID) {
+ inode->i_gid = dir->i_gid;
+ if (S_ISDIR(mode))
+ mode |= S_ISGID;
+ } else
+- inode->i_gid = current->fsgid;
++ inode->i_gid = current_fsgid();
+ inode->i_mode = mode;
+ inode->i_mtime = inode->i_atime = inode->i_ctime =
+ ubifs_current_time(inode);
+@@ -628,7 +628,7 @@ static int ubifs_unlink(struct inode *di
+ ubifs_release_budget(c, &req);
+ else {
+ /* We've deleted something - clean the "no space" flags */
+- c->nospace = c->nospace_rp = 0;
++ c->bi.nospace = c->bi.nospace_rp = 0;
+ smp_wmb();
+ }
+ return 0;
+@@ -718,7 +718,7 @@ static int ubifs_rmdir(struct inode *dir
+ ubifs_release_budget(c, &req);
+ else {
+ /* We've deleted something - clean the "no space" flags */
+- c->nospace = c->nospace_rp = 0;
++ c->bi.nospace = c->bi.nospace_rp = 0;
+ smp_wmb();
+ }
+ return 0;
+diff -uprN linux-2.6.28/fs/ubifs/file.c ubifs-v2.6.28/fs/ubifs/file.c
+--- linux-2.6.28/fs/ubifs/file.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/file.c 2011-06-15 14:22:09.000000000 -0400
+@@ -21,34 +21,32 @@
+ */
+
+ /*
+- * This file implements VFS file and inode operations of regular files, device
++ * This file implements VFS file and inode operations for regular files, device
+ * nodes and symlinks as well as address space operations.
+ *
+- * UBIFS uses 2 page flags: PG_private and PG_checked. PG_private is set if the
+- * page is dirty and is used for budgeting purposes - dirty pages should not be
+- * budgeted. The PG_checked flag is set if full budgeting is required for the
+- * page e.g., when it corresponds to a file hole or it is just beyond the file
+- * size. The budgeting is done in 'ubifs_write_begin()', because it is OK to
+- * fail in this function, and the budget is released in 'ubifs_write_end()'. So
+- * the PG_private and PG_checked flags carry the information about how the page
+- * was budgeted, to make it possible to release the budget properly.
+- *
+- * A thing to keep in mind: inode's 'i_mutex' is locked in most VFS operations
+- * we implement. However, this is not true for '->writepage()', which might be
+- * called with 'i_mutex' unlocked. For example, when pdflush is performing
+- * write-back, it calls 'writepage()' with unlocked 'i_mutex', although the
+- * inode has 'I_LOCK' flag in this case. At "normal" work-paths 'i_mutex' is
+- * locked in '->writepage', e.g. in "sys_write -> alloc_pages -> direct reclaim
+- * path'. So, in '->writepage()' we are only guaranteed that the page is
+- * locked.
+- *
+- * Similarly, 'i_mutex' does not have to be locked in readpage(), e.g.,
+- * readahead path does not have it locked ("sys_read -> generic_file_aio_read
+- * -> ondemand_readahead -> readpage"). In case of readahead, 'I_LOCK' flag is
+- * not set as well. However, UBIFS disables readahead.
+- *
+- * This, for example means that there might be 2 concurrent '->writepage()'
+- * calls for the same inode, but different inode dirty pages.
++ * UBIFS uses 2 page flags: @PG_private and @PG_checked. @PG_private is set if
++ * the page is dirty and is used for optimization purposes - dirty pages are
++ * not budgeted so the flag shows that 'ubifs_write_end()' should not release
++ * the budget for this page. The @PG_checked flag is set if full budgeting is
++ * required for the page e.g., when it corresponds to a file hole or it is
++ * beyond the file size. The budgeting is done in 'ubifs_write_begin()', because
++ * it is OK to fail in this function, and the budget is released in
++ * 'ubifs_write_end()'. So the @PG_private and @PG_checked flags carry
++ * information about how the page was budgeted, to make it possible to release
++ * the budget properly.
++ *
++ * A thing to keep in mind: inode @i_mutex is locked in most VFS operations we
++ * implement. However, this is not true for 'ubifs_writepage()', which may be
++ * called with @i_mutex unlocked. For example, when pdflush is doing background
++ * write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex. At "normal"
++ * work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g. in the
++ * "sys_write -> alloc_pages -> direct reclaim path". So, in 'ubifs_writepage()'
++ * we are only guaranteed that the page is locked.
++ *
++ * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
++ * read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
++ * ondemand_readahead -> readpage"). In case of readahead, @I_LOCK flag is not
++ * set as well. However, UBIFS disables readahead.
+ */
+
+ #include "ubifs.h"
+@@ -213,7 +211,7 @@ static void release_new_page_budget(stru
+ */
+ static void release_existing_page_budget(struct ubifs_info *c)
+ {
+- struct ubifs_budget_req req = { .dd_growth = c->page_budget};
++ struct ubifs_budget_req req = { .dd_growth = c->bi.page_budget};
+
+ ubifs_release_budget(c, &req);
+ }
+@@ -433,8 +431,9 @@ static int ubifs_write_begin(struct file
+ struct page *page;
+
+ ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size);
++ ubifs_assert(!c->ro_media && !c->ro_mount);
+
+- if (unlikely(c->ro_media))
++ if (unlikely(c->ro_error))
+ return -EROFS;
+
+ /* Try out the fast-path part first */
+@@ -447,10 +446,12 @@ static int ubifs_write_begin(struct file
+ if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) {
+ /*
+ * We change whole page so no need to load it. But we
+- * have to set the @PG_checked flag to make the further
+- * code the page is new. This might be not true, but it
+- * is better to budget more that to read the page from
+- * the media.
++ * do not know whether this page exists on the media or
++ * not, so we assume the latter because it requires
++ * larger budget. The assumption is that it is better
++ * to budget a bit more than to read the page from the
++ * media. Thus, we are setting the @PG_checked flag
++ * here.
+ */
+ SetPageChecked(page);
+ skipped_read = 1;
+@@ -496,8 +497,8 @@ static int ubifs_write_begin(struct file
+ }
+
+ /*
+- * Whee, we aquired budgeting quickly - without involving
+- * garbage-collection, committing or forceing write-back. We return
++ * Whee, we acquired budgeting quickly - without involving
++ * garbage-collection, committing or forcing write-back. We return
+ * with @ui->ui_mutex locked if we are appending pages, and unlocked
+ * otherwise. This is an optimization (slightly hacky though).
+ */
+@@ -558,10 +559,11 @@ static int ubifs_write_end(struct file *
+ dbg_gen("copied %d instead of %d, read page and repeat",
+ copied, len);
+ cancel_budget(c, page, ui, appending);
++ ClearPageChecked(page);
+
+ /*
+ * Return 0 to force VFS to repeat the whole operation, or the
+- * error code if 'do_readpage()' failes.
++ * error code if 'do_readpage()' fails.
+ */
+ copied = do_readpage(page);
+ goto out;
+@@ -958,7 +960,7 @@ static int do_writepage(struct page *pag
+ * whole index and correct all inode sizes, which is long an unacceptable.
+ *
+ * To prevent situations like this, UBIFS writes pages back only if they are
+- * within last synchronized inode size, i.e. the the size which has been
++ * within the last synchronized inode size, i.e. the size which has been
+ * written to the flash media last time. Otherwise, UBIFS forces inode
+ * write-back, thus making sure the on-flash inode contains current inode size,
+ * and then keeps writing pages back.
+@@ -1174,16 +1176,16 @@ static int do_truncation(struct ubifs_in
+ ui->ui_size = inode->i_size;
+ /* Truncation changes inode [mc]time */
+ inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
+- /* The other attributes may be changed at the same time as well */
++ /* Other attributes may be changed at the same time as well */
+ do_attr_changes(inode, attr);
+-
+ err = ubifs_jnl_truncate(c, inode, old_size, new_size);
+ mutex_unlock(&ui->ui_mutex);
++
+ out_budg:
+ if (budgeted)
+ ubifs_release_budget(c, &req);
+ else {
+- c->nospace = c->nospace_rp = 0;
++ c->bi.nospace = c->bi.nospace_rp = 0;
+ smp_wmb();
+ }
+ return err;
+@@ -1312,6 +1314,13 @@ int ubifs_fsync(struct file *file, struc
+
+ dbg_gen("syncing inode %lu", inode->i_ino);
+
++ if (c->ro_mount)
++ /*
++ * For some really strange reasons VFS does not filter out
++ * 'fsync()' for R/O mounted file-systems as per 2.6.39.
++ */
++ return 0;
++
+ /*
+ * VFS has already synchronized dirty pages for this inode. Synchronize
+ * the inode unless this is a 'datasync()' call.
+@@ -1440,8 +1449,8 @@ static int ubifs_releasepage(struct page
+ }
+
+ /*
+- * mmap()d file has taken write protection fault and is being made
+- * writable. UBIFS must ensure page is budgeted for.
++ * mmap()d file has taken write protection fault and is being made writable.
++ * UBIFS must ensure page is budgeted for.
+ */
+ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+ {
+@@ -1453,9 +1462,9 @@ static int ubifs_vm_page_mkwrite(struct
+
+ dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index,
+ i_size_read(inode));
+- ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY));
++ ubifs_assert(!c->ro_media && !c->ro_mount);
+
+- if (unlikely(c->ro_media))
++ if (unlikely(c->ro_error))
+ return -EROFS;
+
+ /*
+@@ -1541,7 +1550,6 @@ static int ubifs_file_mmap(struct file *
+ {
+ int err;
+
+- /* 'generic_file_mmap()' takes care of NOMMU case */
+ err = generic_file_mmap(file, vma);
+ if (err)
+ return err;
+diff -uprN linux-2.6.28/fs/ubifs/find.c ubifs-v2.6.28/fs/ubifs/find.c
+--- linux-2.6.28/fs/ubifs/find.c 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/fs/ubifs/find.c 2011-06-15 14:22:09.000000000 -0400
+@@ -252,8 +252,8 @@ int ubifs_find_dirty_leb(struct ubifs_in
+ * But if the index takes fewer LEBs than it is reserved for it,
+ * this function must avoid picking those reserved LEBs.
+ */
+- if (c->min_idx_lebs >= c->lst.idx_lebs) {
+- rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs;
++ if (c->bi.min_idx_lebs >= c->lst.idx_lebs) {
++ rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
+ exclude_index = 1;
+ }
+ spin_unlock(&c->space_lock);
+@@ -276,7 +276,7 @@ int ubifs_find_dirty_leb(struct ubifs_in
+ pick_free = 0;
+ } else {
+ spin_lock(&c->space_lock);
+- exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs);
++ exclude_index = (c->bi.min_idx_lebs >= c->lst.idx_lebs);
+ spin_unlock(&c->space_lock);
+ }
+
+@@ -478,7 +478,7 @@ const struct ubifs_lprops *do_find_free_
+ * ubifs_find_free_space - find a data LEB with free space.
+ * @c: the UBIFS file-system description object
+ * @min_space: minimum amount of required free space
+- * @free: contains amount of free space in the LEB on exit
++ * @offs: contains offset of where free space starts on exit
+ * @squeeze: whether to try to find space in a non-empty LEB first
+ *
+ * This function looks for an LEB with at least @min_space bytes of free space.
+@@ -490,7 +490,7 @@ const struct ubifs_lprops *do_find_free_
+ * failed to find a LEB with @min_space bytes of free space and other a negative
+ * error codes in case of failure.
+ */
+-int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
++int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
+ int squeeze)
+ {
+ const struct ubifs_lprops *lprops;
+@@ -501,8 +501,8 @@ int ubifs_find_free_space(struct ubifs_i
+
+ /* Check if there are enough empty LEBs for commit */
+ spin_lock(&c->space_lock);
+- if (c->min_idx_lebs > c->lst.idx_lebs)
+- rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs;
++ if (c->bi.min_idx_lebs > c->lst.idx_lebs)
++ rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
+ else
+ rsvd_idx_lebs = 0;
+ lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
+@@ -558,10 +558,10 @@ int ubifs_find_free_space(struct ubifs_i
+ spin_unlock(&c->space_lock);
+ }
+
+- *free = lprops->free;
++ *offs = c->leb_size - lprops->free;
+ ubifs_release_lprops(c);
+
+- if (*free == c->leb_size) {
++ if (*offs == 0) {
+ /*
+ * Ensure that empty LEBs have been unmapped. They may not have
+ * been, for example, because of an unclean unmount. Also
+@@ -573,8 +573,8 @@ int ubifs_find_free_space(struct ubifs_i
+ return err;
+ }
+
+- dbg_find("found LEB %d, free %d", lnum, *free);
+- ubifs_assert(*free >= min_space);
++ dbg_find("found LEB %d, free %d", lnum, c->leb_size - *offs);
++ ubifs_assert(*offs <= c->leb_size - min_space);
+ return lnum;
+
+ out:
+diff -uprN linux-2.6.28/fs/ubifs/gc.c ubifs-v2.6.28/fs/ubifs/gc.c
+--- linux-2.6.28/fs/ubifs/gc.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/gc.c 2011-06-15 14:22:09.000000000 -0400
+@@ -47,25 +47,18 @@
+ * have to waste large pieces of free space at the end of LEB B, because nodes
+ * from LEB A would not fit. And the worst situation is when all nodes are of
+ * maximum size. So dark watermark is the amount of free + dirty space in LEB
+- * which are guaranteed to be reclaimable. If LEB has less space, the GC migh
++ * which are guaranteed to be reclaimable. If LEB has less space, the GC might
+ * be unable to reclaim it. So, LEBs with free + dirty greater than dark
+ * watermark are "good" LEBs from GC's point of few. The other LEBs are not so
+ * good, and GC takes extra care when moving them.
+ */
+
+ #include <linux/pagemap.h>
++#include <linux/list_sort.h>
+ #include "ubifs.h"
+
+ /*
+- * GC tries to optimize the way it fit nodes to available space, and it sorts
+- * nodes a little. The below constants are watermarks which define "large",
+- * "medium", and "small" nodes.
+- */
+-#define MEDIUM_NODE_WM (UBIFS_BLOCK_SIZE / 4)
+-#define SMALL_NODE_WM UBIFS_MAX_DENT_NODE_SZ
+-
+-/*
+- * GC may need to move more then one LEB to make progress. The below constants
++ * GC may need to move more than one LEB to make progress. The below constants
+ * define "soft" and "hard" limits on the number of LEBs the garbage collector
+ * may move.
+ */
+@@ -106,6 +99,10 @@ static int switch_gc_head(struct ubifs_i
+ if (err)
+ return err;
+
++ err = ubifs_wbuf_sync_nolock(wbuf);
++ if (err)
++ return err;
++
+ err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0);
+ if (err)
+ return err;
+@@ -116,138 +113,243 @@ static int switch_gc_head(struct ubifs_i
+ }
+
+ /**
+- * joinup - bring data nodes for an inode together.
+- * @c: UBIFS file-system description object
+- * @sleb: describes scanned LEB
+- * @inum: inode number
+- * @blk: block number
+- * @data: list to which to add data nodes
+- *
+- * This function looks at the first few nodes in the scanned LEB @sleb and adds
+- * them to @data if they are data nodes from @inum and have a larger block
+- * number than @blk. This function returns %0 on success and a negative error
+- * code on failure.
++ * data_nodes_cmp - compare 2 data nodes.
++ * @priv: UBIFS file-system description object
++ * @a: first data node
++ * @a: second data node
++ *
++ * This function compares data nodes @a and @b. Returns %1 if @a has greater
++ * inode or block number, and %-1 otherwise.
+ */
+-static int joinup(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ino_t inum,
+- unsigned int blk, struct list_head *data)
++static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
+ {
+- int err, cnt = 6, lnum = sleb->lnum, offs;
+- struct ubifs_scan_node *snod, *tmp;
+- union ubifs_key *key;
++ ino_t inuma, inumb;
++ struct ubifs_info *c = priv;
++ struct ubifs_scan_node *sa, *sb;
++
++ cond_resched();
++ if (a == b)
++ return 0;
++
++ sa = list_entry(a, struct ubifs_scan_node, list);
++ sb = list_entry(b, struct ubifs_scan_node, list);
++
++ ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY);
++ ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY);
++ ubifs_assert(sa->type == UBIFS_DATA_NODE);
++ ubifs_assert(sb->type == UBIFS_DATA_NODE);
++
++ inuma = key_inum(c, &sa->key);
++ inumb = key_inum(c, &sb->key);
++
++ if (inuma == inumb) {
++ unsigned int blka = key_block(c, &sa->key);
++ unsigned int blkb = key_block(c, &sb->key);
++
++ if (blka <= blkb)
++ return -1;
++ } else if (inuma <= inumb)
++ return -1;
+
+- list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
+- key = &snod->key;
+- if (key_inum(c, key) == inum &&
+- key_type(c, key) == UBIFS_DATA_KEY &&
+- key_block(c, key) > blk) {
+- offs = snod->offs;
+- err = ubifs_tnc_has_node(c, key, 0, lnum, offs, 0);
+- if (err < 0)
+- return err;
+- list_del(&snod->list);
+- if (err) {
+- list_add_tail(&snod->list, data);
+- blk = key_block(c, key);
+- } else
+- kfree(snod);
+- cnt = 6;
+- } else if (--cnt == 0)
+- break;
+- }
+- return 0;
++ return 1;
++}
++
++/*
++ * nondata_nodes_cmp - compare 2 non-data nodes.
++ * @priv: UBIFS file-system description object
++ * @a: first node
++ * @a: second node
++ *
++ * This function compares nodes @a and @b. It makes sure that inode nodes go
++ * first and sorted by length in descending order. Directory entry nodes go
++ * after inode nodes and are sorted in ascending hash valuer order.
++ */
++static int nondata_nodes_cmp(void *priv, struct list_head *a,
++ struct list_head *b)
++{
++ ino_t inuma, inumb;
++ struct ubifs_info *c = priv;
++ struct ubifs_scan_node *sa, *sb;
++
++ cond_resched();
++ if (a == b)
++ return 0;
++
++ sa = list_entry(a, struct ubifs_scan_node, list);
++ sb = list_entry(b, struct ubifs_scan_node, list);
++
++ ubifs_assert(key_type(c, &sa->key) != UBIFS_DATA_KEY &&
++ key_type(c, &sb->key) != UBIFS_DATA_KEY);
++ ubifs_assert(sa->type != UBIFS_DATA_NODE &&
++ sb->type != UBIFS_DATA_NODE);
++
++ /* Inodes go before directory entries */
++ if (sa->type == UBIFS_INO_NODE) {
++ if (sb->type == UBIFS_INO_NODE)
++ return sb->len - sa->len;
++ return -1;
++ }
++ if (sb->type == UBIFS_INO_NODE)
++ return 1;
++
++ ubifs_assert(key_type(c, &sa->key) == UBIFS_DENT_KEY ||
++ key_type(c, &sa->key) == UBIFS_XENT_KEY);
++ ubifs_assert(key_type(c, &sb->key) == UBIFS_DENT_KEY ||
++ key_type(c, &sb->key) == UBIFS_XENT_KEY);
++ ubifs_assert(sa->type == UBIFS_DENT_NODE ||
++ sa->type == UBIFS_XENT_NODE);
++ ubifs_assert(sb->type == UBIFS_DENT_NODE ||
++ sb->type == UBIFS_XENT_NODE);
++
++ inuma = key_inum(c, &sa->key);
++ inumb = key_inum(c, &sb->key);
++
++ if (inuma == inumb) {
++ uint32_t hasha = key_hash(c, &sa->key);
++ uint32_t hashb = key_hash(c, &sb->key);
++
++ if (hasha <= hashb)
++ return -1;
++ } else if (inuma <= inumb)
++ return -1;
++
++ return 1;
+ }
+
+ /**
+- * move_nodes - move nodes.
++ * sort_nodes - sort nodes for GC.
+ * @c: UBIFS file-system description object
+- * @sleb: describes nodes to move
+- *
+- * This function moves valid nodes from data LEB described by @sleb to the GC
+- * journal head. The obsolete nodes are dropped.
+- *
+- * When moving nodes we have to deal with classical bin-packing problem: the
+- * space in the current GC journal head LEB and in @c->gc_lnum are the "bins",
+- * where the nodes in the @sleb->nodes list are the elements which should be
+- * fit optimally to the bins. This function uses the "first fit decreasing"
+- * strategy, although it does not really sort the nodes but just split them on
+- * 3 classes - large, medium, and small, so they are roughly sorted.
++ * @sleb: describes nodes to sort and contains the result on exit
++ * @nondata: contains non-data nodes on exit
++ * @min: minimum node size is returned here
++ *
++ * This function sorts the list of inodes to garbage collect. First of all, it
++ * kills obsolete nodes and separates data and non-data nodes to the
++ * @sleb->nodes and @nondata lists correspondingly.
++ *
++ * Data nodes are then sorted in block number order - this is important for
++ * bulk-read; data nodes with lower inode number go before data nodes with
++ * higher inode number, and data nodes with lower block number go before data
++ * nodes with higher block number;
++ *
++ * Non-data nodes are sorted as follows.
++ * o First go inode nodes - they are sorted in descending length order.
++ * o Then go directory entry nodes - they are sorted in hash order, which
++ * should supposedly optimize 'readdir()'. Direntry nodes with lower parent
++ * inode number go before direntry nodes with higher parent inode number,
++ * and direntry nodes with lower name hash values go before direntry nodes
++ * with higher name hash values.
+ *
+- * This function returns zero in case of success, %-EAGAIN if commit is
+- * required, and other negative error codes in case of other failures.
++ * This function returns zero in case of success and a negative error code in
++ * case of failure.
+ */
+-static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
++static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
++ struct list_head *nondata, int *min)
+ {
++ int err;
+ struct ubifs_scan_node *snod, *tmp;
+- struct list_head data, large, medium, small;
+- struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
+- int avail, err, min = INT_MAX;
+- unsigned int blk = 0;
+- ino_t inum = 0;
+-
+- INIT_LIST_HEAD(&data);
+- INIT_LIST_HEAD(&large);
+- INIT_LIST_HEAD(&medium);
+- INIT_LIST_HEAD(&small);
+-
+- while (!list_empty(&sleb->nodes)) {
+- struct list_head *lst = sleb->nodes.next;
+-
+- snod = list_entry(lst, struct ubifs_scan_node, list);
+-
+- ubifs_assert(snod->type != UBIFS_IDX_NODE);
+- ubifs_assert(snod->type != UBIFS_REF_NODE);
+- ubifs_assert(snod->type != UBIFS_CS_NODE);
++
++ *min = INT_MAX;
++
++ /* Separate data nodes and non-data nodes */
++ list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
++ ubifs_assert(snod->type == UBIFS_INO_NODE ||
++ snod->type == UBIFS_DATA_NODE ||
++ snod->type == UBIFS_DENT_NODE ||
++ snod->type == UBIFS_XENT_NODE ||
++ snod->type == UBIFS_TRUN_NODE);
++
++ if (snod->type != UBIFS_INO_NODE &&
++ snod->type != UBIFS_DATA_NODE &&
++ snod->type != UBIFS_DENT_NODE &&
++ snod->type != UBIFS_XENT_NODE) {
++ /* Probably truncation node, zap it */
++ list_del(&snod->list);
++ kfree(snod);
++ continue;
++ }
++
++ ubifs_assert(key_type(c, &snod->key) == UBIFS_DATA_KEY ||
++ key_type(c, &snod->key) == UBIFS_INO_KEY ||
++ key_type(c, &snod->key) == UBIFS_DENT_KEY ||
++ key_type(c, &snod->key) == UBIFS_XENT_KEY);
+
+ err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum,
+ snod->offs, 0);
+ if (err < 0)
+- goto out;
++ return err;
+
+- list_del(lst);
+ if (!err) {
+ /* The node is obsolete, remove it from the list */
++ list_del(&snod->list);
+ kfree(snod);
+ continue;
+ }
+
+- /*
+- * Sort the list of nodes so that data nodes go first, large
+- * nodes go second, and small nodes go last.
+- */
+- if (key_type(c, &snod->key) == UBIFS_DATA_KEY) {
+- if (inum != key_inum(c, &snod->key)) {
+- if (inum) {
+- /*
+- * Try to move data nodes from the same
+- * inode together.
+- */
+- err = joinup(c, sleb, inum, blk, &data);
+- if (err)
+- goto out;
+- }
+- inum = key_inum(c, &snod->key);
+- blk = key_block(c, &snod->key);
+- }
+- list_add_tail(lst, &data);
+- } else if (snod->len > MEDIUM_NODE_WM)
+- list_add_tail(lst, &large);
+- else if (snod->len > SMALL_NODE_WM)
+- list_add_tail(lst, &medium);
+- else
+- list_add_tail(lst, &small);
+-
+- /* And find the smallest node */
+- if (snod->len < min)
+- min = snod->len;
++ if (snod->len < *min)
++ *min = snod->len;
++
++ if (key_type(c, &snod->key) != UBIFS_DATA_KEY)
++ list_move_tail(&snod->list, nondata);
+ }
+
+- /*
+- * Join the tree lists so that we'd have one roughly sorted list
+- * ('large' will be the head of the joined list).
+- */
+- list_splice(&data, &large);
+- list_splice(&medium, large.prev);
+- list_splice(&small, large.prev);
++ /* Sort data and non-data nodes */
++ list_sort(c, &sleb->nodes, &data_nodes_cmp);
++ list_sort(c, nondata, &nondata_nodes_cmp);
++
++ err = dbg_check_data_nodes_order(c, &sleb->nodes);
++ if (err)
++ return err;
++ err = dbg_check_nondata_nodes_order(c, nondata);
++ if (err)
++ return err;
++ return 0;
++}
++
++/**
++ * move_node - move a node.
++ * @c: UBIFS file-system description object
++ * @sleb: describes the LEB to move nodes from
++ * @snod: the mode to move
++ * @wbuf: write-buffer to move node to
++ *
++ * This function moves node @snod to @wbuf, changes TNC correspondingly, and
++ * destroys @snod. Returns zero in case of success and a negative error code in
++ * case of failure.
++ */
++static int move_node(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
++ struct ubifs_scan_node *snod, struct ubifs_wbuf *wbuf)
++{
++ int err, new_lnum = wbuf->lnum, new_offs = wbuf->offs + wbuf->used;
++
++ cond_resched();
++ err = ubifs_wbuf_write_nolock(wbuf, snod->node, snod->len);
++ if (err)
++ return err;
++
++ err = ubifs_tnc_replace(c, &snod->key, sleb->lnum,
++ snod->offs, new_lnum, new_offs,
++ snod->len);
++ list_del(&snod->list);
++ kfree(snod);
++ return err;
++}
++
++/**
++ * move_nodes - move nodes.
++ * @c: UBIFS file-system description object
++ * @sleb: describes the LEB to move nodes from
++ *
++ * This function moves valid nodes from data LEB described by @sleb to the GC
++ * journal head. This function returns zero in case of success, %-EAGAIN if
++ * commit is required, and other negative error codes in case of other
++ * failures.
++ */
++static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
++{
++ int err, min;
++ LIST_HEAD(nondata);
++ struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
+
+ if (wbuf->lnum == -1) {
+ /*
+@@ -256,42 +358,59 @@ static int move_nodes(struct ubifs_info
+ */
+ err = switch_gc_head(c);
+ if (err)
+- goto out;
++ return err;
+ }
+
++ err = sort_nodes(c, sleb, &nondata, &min);
++ if (err)
++ goto out;
++
+ /* Write nodes to their new location. Use the first-fit strategy */
+ while (1) {
+- avail = c->leb_size - wbuf->offs - wbuf->used;
+- list_for_each_entry_safe(snod, tmp, &large, list) {
+- int new_lnum, new_offs;
++ int avail;
++ struct ubifs_scan_node *snod, *tmp;
++
++ /* Move data nodes */
++ list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
++ avail = c->leb_size - wbuf->offs - wbuf->used;
++ if (snod->len > avail)
++ /*
++ * Do not skip data nodes in order to optimize
++ * bulk-read.
++ */
++ break;
++
++ err = move_node(c, sleb, snod, wbuf);
++ if (err)
++ goto out;
++ }
+
++ /* Move non-data nodes */
++ list_for_each_entry_safe(snod, tmp, &nondata, list) {
++ avail = c->leb_size - wbuf->offs - wbuf->used;
+ if (avail < min)
+ break;
+
+- if (snod->len > avail)
+- /* This node does not fit */
++ if (snod->len > avail) {
++ /*
++ * Keep going only if this is an inode with
++ * some data. Otherwise stop and switch the GC
++ * head. IOW, we assume that data-less inode
++ * nodes and direntry nodes are roughly of the
++ * same size.
++ */
++ if (key_type(c, &snod->key) == UBIFS_DENT_KEY ||
++ snod->len == UBIFS_INO_NODE_SZ)
++ break;
+ continue;
++ }
+
+- cond_resched();
+-
+- new_lnum = wbuf->lnum;
+- new_offs = wbuf->offs + wbuf->used;
+- err = ubifs_wbuf_write_nolock(wbuf, snod->node,
+- snod->len);
++ err = move_node(c, sleb, snod, wbuf);
+ if (err)
+ goto out;
+- err = ubifs_tnc_replace(c, &snod->key, sleb->lnum,
+- snod->offs, new_lnum, new_offs,
+- snod->len);
+- if (err)
+- goto out;
+-
+- avail = c->leb_size - wbuf->offs - wbuf->used;
+- list_del(&snod->list);
+- kfree(snod);
+ }
+
+- if (list_empty(&large))
++ if (list_empty(&sleb->nodes) && list_empty(&nondata))
+ break;
+
+ /*
+@@ -306,10 +425,7 @@ static int move_nodes(struct ubifs_info
+ return 0;
+
+ out:
+- list_for_each_entry_safe(snod, tmp, &large, list) {
+- list_del(&snod->list);
+- kfree(snod);
+- }
++ list_splice_tail(&nondata, &sleb->nodes);
+ return err;
+ }
+
+@@ -361,11 +477,42 @@ int ubifs_garbage_collect_leb(struct ubi
+ ubifs_assert(c->gc_lnum != lnum);
+ ubifs_assert(wbuf->lnum != lnum);
+
++ if (lp->free + lp->dirty == c->leb_size) {
++ /* Special case - a free LEB */
++ dbg_gc("LEB %d is free, return it", lp->lnum);
++ ubifs_assert(!(lp->flags & LPROPS_INDEX));
++
++ if (lp->free != c->leb_size) {
++ /*
++ * Write buffers must be sync'd before unmapping
++ * freeable LEBs, because one of them may contain data
++ * which obsoletes something in 'lp->pnum'.
++ */
++ err = gc_sync_wbufs(c);
++ if (err)
++ return err;
++ err = ubifs_change_one_lp(c, lp->lnum, c->leb_size,
++ 0, 0, 0, 0);
++ if (err)
++ return err;
++ }
++ err = ubifs_leb_unmap(c, lp->lnum);
++ if (err)
++ return err;
++
++ if (c->gc_lnum == -1) {
++ c->gc_lnum = lnum;
++ return LEB_RETAINED;
++ }
++
++ return LEB_FREED;
++ }
++
+ /*
+ * We scan the entire LEB even though we only really need to scan up to
+ * (c->leb_size - lp->free).
+ */
+- sleb = ubifs_scan(c, lnum, 0, c->sbuf);
++ sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0);
+ if (IS_ERR(sleb))
+ return PTR_ERR(sleb);
+
+@@ -504,13 +651,14 @@ int ubifs_garbage_collect(struct ubifs_i
+ struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
+
+ ubifs_assert_cmt_locked(c);
++ ubifs_assert(!c->ro_media && !c->ro_mount);
+
+ if (ubifs_gc_should_commit(c))
+ return -EAGAIN;
+
+ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
+
+- if (c->ro_media) {
++ if (c->ro_error) {
+ ret = -EROFS;
+ goto out_unlock;
+ }
+@@ -569,51 +717,18 @@ int ubifs_garbage_collect(struct ubifs_i
+ "(min. space %d)", lp.lnum, lp.free, lp.dirty,
+ lp.free + lp.dirty, min_space);
+
+- if (lp.free + lp.dirty == c->leb_size) {
+- /* An empty LEB was returned */
+- dbg_gc("LEB %d is free, return it", lp.lnum);
+- /*
+- * ubifs_find_dirty_leb() doesn't return freeable index
+- * LEBs.
+- */
+- ubifs_assert(!(lp.flags & LPROPS_INDEX));
+- if (lp.free != c->leb_size) {
+- /*
+- * Write buffers must be sync'd before
+- * unmapping freeable LEBs, because one of them
+- * may contain data which obsoletes something
+- * in 'lp.pnum'.
+- */
+- ret = gc_sync_wbufs(c);
+- if (ret)
+- goto out;
+- ret = ubifs_change_one_lp(c, lp.lnum,
+- c->leb_size, 0, 0, 0,
+- 0);
+- if (ret)
+- goto out;
+- }
+- ret = ubifs_leb_unmap(c, lp.lnum);
+- if (ret)
+- goto out;
+- ret = lp.lnum;
+- break;
+- }
+-
+ space_before = c->leb_size - wbuf->offs - wbuf->used;
+ if (wbuf->lnum == -1)
+ space_before = 0;
+
+ ret = ubifs_garbage_collect_leb(c, &lp);
+ if (ret < 0) {
+- if (ret == -EAGAIN || ret == -ENOSPC) {
++ if (ret == -EAGAIN) {
+ /*
+- * These codes are not errors, so we have to
+- * return the LEB to lprops. But if the
+- * 'ubifs_return_leb()' function fails, its
+- * failure code is propagated to the caller
+- * instead of the original '-EAGAIN' or
+- * '-ENOSPC'.
++ * This is not error, so we have to return the
++ * LEB to lprops. But if 'ubifs_return_leb()'
++ * fails, its failure code is propagated to the
++ * caller instead of the original '-EAGAIN'.
+ */
+ err = ubifs_return_leb(c, lp.lnum);
+ if (err)
+@@ -703,8 +818,8 @@ out_unlock:
+ out:
+ ubifs_assert(ret < 0);
+ ubifs_assert(ret != -ENOSPC && ret != -EAGAIN);
+- ubifs_ro_mode(c, ret);
+ ubifs_wbuf_sync_nolock(wbuf);
++ ubifs_ro_mode(c, ret);
+ mutex_unlock(&wbuf->io_mutex);
+ ubifs_return_leb(c, lp.lnum);
+ return ret;
+diff -uprN linux-2.6.28/fs/ubifs/io.c ubifs-v2.6.28/fs/ubifs/io.c
+--- linux-2.6.28/fs/ubifs/io.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/io.c 2011-06-15 14:22:09.000000000 -0400
+@@ -31,6 +31,26 @@
+ * buffer is full or when it is not used for some time (by timer). This is
+ * similar to the mechanism is used by JFFS2.
+ *
++ * UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum
++ * write size (@c->max_write_size). The latter is the maximum amount of bytes
++ * the underlying flash is able to program at a time, and writing in
++ * @c->max_write_size units should presumably be faster. Obviously,
++ * @c->min_io_size <= @c->max_write_size. Write-buffers are of
++ * @c->max_write_size bytes in size for maximum performance. However, when a
++ * write-buffer is flushed, only the portion of it (aligned to @c->min_io_size
++ * boundary) which contains data is written, not the whole write-buffer,
++ * because this is more space-efficient.
++ *
++ * This optimization adds few complications to the code. Indeed, on the one
++ * hand, we want to write in optimal @c->max_write_size bytes chunks, which
++ * also means aligning writes at the @c->max_write_size bytes offsets. On the
++ * other hand, we do not want to waste space when synchronizing the write
++ * buffer, so during synchronization we writes in smaller chunks. And this makes
++ * the next write offset to be not aligned to @c->max_write_size bytes. So the
++ * have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned
++ * to @c->max_write_size bytes again. We do this by temporarily shrinking
++ * write-buffer size (@wbuf->size).
++ *
+ * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by
+ * mutexes defined inside these objects. Since sometimes upper-level code
+ * has to lock the write-buffer (e.g. journal space reservation code), many
+@@ -46,8 +66,8 @@
+ * UBIFS uses padding when it pads to the next min. I/O unit. In this case it
+ * uses padding nodes or padding bytes, if the padding node does not fit.
+ *
+- * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes
+- * every time they are read from the flash media.
++ * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when
++ * they are read from the flash media.
+ */
+
+ #include <linux/crc32.h>
+@@ -60,9 +80,10 @@
+ */
+ void ubifs_ro_mode(struct ubifs_info *c, int err)
+ {
+- if (!c->ro_media) {
+- c->ro_media = 1;
++ if (!c->ro_error) {
++ c->ro_error = 1;
+ c->no_chk_data_crc = 0;
++ c->vfs_sb->s_flags |= MS_RDONLY;
+ ubifs_warn("switched to read-only mode, error %d", err);
+ dbg_dump_stack();
+ }
+@@ -86,8 +107,12 @@ void ubifs_ro_mode(struct ubifs_info *c,
+ * This function may skip data nodes CRC checking if @c->no_chk_data_crc is
+ * true, which is controlled by corresponding UBIFS mount option. However, if
+ * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is
+- * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is
+- * ignored and CRC is checked.
++ * checked. Similarly, if @c->mounting or @c->remounting_rw is true (we are
++ * mounting or re-mounting to R/W mode), @c->no_chk_data_crc is ignored and CRC
++ * is checked. This is because during mounting or re-mounting from R/O mode to
++ * R/W mode we may read journal nodes (when replying the journal or doing the
++ * recovery) and the journal nodes may potentially be corrupted, so checking is
++ * required.
+ *
+ * This function returns zero in case of success and %-EUCLEAN in case of bad
+ * CRC or magic.
+@@ -129,8 +154,8 @@ int ubifs_check_node(const struct ubifs_
+ node_len > c->ranges[type].max_len)
+ goto out_len;
+
+- if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc &&
+- c->no_chk_data_crc)
++ if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->mounting &&
++ !c->remounting_rw && c->no_chk_data_crc)
+ return 0;
+
+ crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
+@@ -297,7 +322,7 @@ static enum hrtimer_restart wbuf_timer_c
+ {
+ struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer);
+
+- dbg_io("jhead %d", wbuf->jhead);
++ dbg_io("jhead %s", dbg_jhead(wbuf->jhead));
+ wbuf->need_sync = 1;
+ wbuf->c->need_wbuf_sync = 1;
+ ubifs_wake_up_bgt(wbuf->c);
+@@ -314,7 +339,8 @@ static void new_wbuf_timer_nolock(struct
+
+ if (wbuf->no_timer)
+ return;
+- dbg_io("set timer for jhead %d, %llu-%llu millisecs", wbuf->jhead,
++ dbg_io("set timer for jhead %s, %llu-%llu millisecs",
++ dbg_jhead(wbuf->jhead),
+ div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC),
+ div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta,
+ USEC_PER_SEC));
+@@ -340,41 +366,73 @@ static void cancel_wbuf_timer_nolock(str
+ *
+ * This function synchronizes write-buffer @buf and returns zero in case of
+ * success or a negative error code in case of failure.
++ *
++ * Note, although write-buffers are of @c->max_write_size, this function does
++ * not necessarily writes all @c->max_write_size bytes to the flash. Instead,
++ * if the write-buffer is only partially filled with data, only the used part
++ * of the write-buffer (aligned on @c->min_io_size boundary) is synchronized.
++ * This way we waste less space.
+ */
+ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
+ {
+ struct ubifs_info *c = wbuf->c;
+- int err, dirt;
++ int err, dirt, sync_len;
+
+ cancel_wbuf_timer_nolock(wbuf);
+ if (!wbuf->used || wbuf->lnum == -1)
+ /* Write-buffer is empty or not seeked */
+ return 0;
+
+- dbg_io("LEB %d:%d, %d bytes, jhead %d",
+- wbuf->lnum, wbuf->offs, wbuf->used, wbuf->jhead);
+- ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY));
++ dbg_io("LEB %d:%d, %d bytes, jhead %s",
++ wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead));
+ ubifs_assert(!(wbuf->avail & 7));
+- ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size);
++ ubifs_assert(wbuf->offs + wbuf->size <= c->leb_size);
++ ubifs_assert(wbuf->size >= c->min_io_size);
++ ubifs_assert(wbuf->size <= c->max_write_size);
++ ubifs_assert(wbuf->size % c->min_io_size == 0);
++ ubifs_assert(!c->ro_media && !c->ro_mount);
++ if (c->leb_size - wbuf->offs >= c->max_write_size)
++ ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
+
+- if (c->ro_media)
++ if (c->ro_error)
+ return -EROFS;
+
+- ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail);
++ /*
++ * Do not write whole write buffer but write only the minimum necessary
++ * amount of min. I/O units.
++ */
++ sync_len = ALIGN(wbuf->used, c->min_io_size);
++ dirt = sync_len - wbuf->used;
++ if (dirt)
++ ubifs_pad(c, wbuf->buf + wbuf->used, dirt);
+ err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
+- c->min_io_size, wbuf->dtype);
++ sync_len, wbuf->dtype);
+ if (err) {
+ ubifs_err("cannot write %d bytes to LEB %d:%d",
+- c->min_io_size, wbuf->lnum, wbuf->offs);
++ sync_len, wbuf->lnum, wbuf->offs);
+ dbg_dump_stack();
+ return err;
+ }
+
+- dirt = wbuf->avail;
+-
+ spin_lock(&wbuf->lock);
+- wbuf->offs += c->min_io_size;
+- wbuf->avail = c->min_io_size;
++ wbuf->offs += sync_len;
++ /*
++ * Now @wbuf->offs is not necessarily aligned to @c->max_write_size.
++ * But our goal is to optimize writes and make sure we write in
++ * @c->max_write_size chunks and to @c->max_write_size-aligned offset.
++ * Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make
++ * sure that @wbuf->offs + @wbuf->size is aligned to
++ * @c->max_write_size. This way we make sure that after next
++ * write-buffer flush we are again at the optimal offset (aligned to
++ * @c->max_write_size).
++ */
++ if (c->leb_size - wbuf->offs < c->max_write_size)
++ wbuf->size = c->leb_size - wbuf->offs;
++ else if (wbuf->offs & (c->max_write_size - 1))
++ wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs;
++ else
++ wbuf->size = c->max_write_size;
++ wbuf->avail = wbuf->size;
+ wbuf->used = 0;
+ wbuf->next_ino = 0;
+ spin_unlock(&wbuf->lock);
+@@ -393,31 +451,31 @@ int ubifs_wbuf_sync_nolock(struct ubifs_
+ * @dtype: data type
+ *
+ * This function targets the write-buffer to logical eraseblock @lnum:@offs.
+- * The write-buffer is synchronized if it is not empty. Returns zero in case of
+- * success and a negative error code in case of failure.
++ * The write-buffer has to be empty. Returns zero in case of success and a
++ * negative error code in case of failure.
+ */
+ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
+ int dtype)
+ {
+ const struct ubifs_info *c = wbuf->c;
+
+- dbg_io("LEB %d:%d, jhead %d", lnum, offs, wbuf->jhead);
++ dbg_io("LEB %d:%d, jhead %s", lnum, offs, dbg_jhead(wbuf->jhead));
+ ubifs_assert(lnum >= 0 && lnum < c->leb_cnt);
+ ubifs_assert(offs >= 0 && offs <= c->leb_size);
+ ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7));
+ ubifs_assert(lnum != wbuf->lnum);
+-
+- if (wbuf->used > 0) {
+- int err = ubifs_wbuf_sync_nolock(wbuf);
+-
+- if (err)
+- return err;
+- }
++ ubifs_assert(wbuf->used == 0);
+
+ spin_lock(&wbuf->lock);
+ wbuf->lnum = lnum;
+ wbuf->offs = offs;
+- wbuf->avail = c->min_io_size;
++ if (c->leb_size - wbuf->offs < c->max_write_size)
++ wbuf->size = c->leb_size - wbuf->offs;
++ else if (wbuf->offs & (c->max_write_size - 1))
++ wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs;
++ else
++ wbuf->size = c->max_write_size;
++ wbuf->avail = wbuf->size;
+ wbuf->used = 0;
+ spin_unlock(&wbuf->lock);
+ wbuf->dtype = dtype;
+@@ -437,11 +495,12 @@ int ubifs_bg_wbufs_sync(struct ubifs_inf
+ {
+ int err, i;
+
++ ubifs_assert(!c->ro_media && !c->ro_mount);
+ if (!c->need_wbuf_sync)
+ return 0;
+ c->need_wbuf_sync = 0;
+
+- if (c->ro_media) {
++ if (c->ro_error) {
+ err = -EROFS;
+ goto out_timers;
+ }
+@@ -496,8 +555,9 @@ out_timers:
+ *
+ * This function writes data to flash via write-buffer @wbuf. This means that
+ * the last piece of the node won't reach the flash media immediately if it
+- * does not take whole minimal I/O unit. Instead, the node will sit in RAM
+- * until the write-buffer is synchronized (e.g., by timer).
++ * does not take whole max. write unit (@c->max_write_size). Instead, the node
++ * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or
++ * because more data are appended to the write-buffer).
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure. If the node cannot be written because there is no more
+@@ -506,16 +566,23 @@ out_timers:
+ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
+ {
+ struct ubifs_info *c = wbuf->c;
+- int err, written, n, aligned_len = ALIGN(len, 8), offs;
++ int err, written, n, aligned_len = ALIGN(len, 8);
+
+- dbg_io("%d bytes (%s) to jhead %d wbuf at LEB %d:%d", len,
+- dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->jhead,
+- wbuf->lnum, wbuf->offs + wbuf->used);
++ dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len,
++ dbg_ntype(((struct ubifs_ch *)buf)->node_type),
++ dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs + wbuf->used);
+ ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt);
+ ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0);
+ ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size);
+- ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size);
++ ubifs_assert(wbuf->avail > 0 && wbuf->avail <= wbuf->size);
++ ubifs_assert(wbuf->size >= c->min_io_size);
++ ubifs_assert(wbuf->size <= c->max_write_size);
++ ubifs_assert(wbuf->size % c->min_io_size == 0);
+ ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
++ ubifs_assert(!c->ro_media && !c->ro_mount);
++ ubifs_assert(!c->space_fixup);
++ if (c->leb_size - wbuf->offs >= c->max_write_size)
++ ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
+
+ if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
+ err = -ENOSPC;
+@@ -524,7 +591,7 @@ int ubifs_wbuf_write_nolock(struct ubifs
+
+ cancel_wbuf_timer_nolock(wbuf);
+
+- if (c->ro_media)
++ if (c->ro_error)
+ return -EROFS;
+
+ if (aligned_len <= wbuf->avail) {
+@@ -535,17 +602,21 @@ int ubifs_wbuf_write_nolock(struct ubifs
+ memcpy(wbuf->buf + wbuf->used, buf, len);
+
+ if (aligned_len == wbuf->avail) {
+- dbg_io("flush jhead %d wbuf to LEB %d:%d",
+- wbuf->jhead, wbuf->lnum, wbuf->offs);
++ dbg_io("flush jhead %s wbuf to LEB %d:%d",
++ dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
+ err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf,
+- wbuf->offs, c->min_io_size,
++ wbuf->offs, wbuf->size,
+ wbuf->dtype);
+ if (err)
+ goto out;
+
+ spin_lock(&wbuf->lock);
+- wbuf->offs += c->min_io_size;
+- wbuf->avail = c->min_io_size;
++ wbuf->offs += wbuf->size;
++ if (c->leb_size - wbuf->offs >= c->max_write_size)
++ wbuf->size = c->max_write_size;
++ else
++ wbuf->size = c->leb_size - wbuf->offs;
++ wbuf->avail = wbuf->size;
+ wbuf->used = 0;
+ wbuf->next_ino = 0;
+ spin_unlock(&wbuf->lock);
+@@ -559,39 +630,63 @@ int ubifs_wbuf_write_nolock(struct ubifs
+ goto exit;
+ }
+
+- /*
+- * The node is large enough and does not fit entirely within current
+- * minimal I/O unit. We have to fill and flush write-buffer and switch
+- * to the next min. I/O unit.
+- */
+- dbg_io("flush jhead %d wbuf to LEB %d:%d",
+- wbuf->jhead, wbuf->lnum, wbuf->offs);
+- memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
+- err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
+- c->min_io_size, wbuf->dtype);
+- if (err)
+- goto out;
++ written = 0;
+
+- offs = wbuf->offs + c->min_io_size;
+- len -= wbuf->avail;
+- aligned_len -= wbuf->avail;
+- written = wbuf->avail;
++ if (wbuf->used) {
++ /*
++ * The node is large enough and does not fit entirely within
++ * current available space. We have to fill and flush
++ * write-buffer and switch to the next max. write unit.
++ */
++ dbg_io("flush jhead %s wbuf to LEB %d:%d",
++ dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
++ memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
++ err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
++ wbuf->size, wbuf->dtype);
++ if (err)
++ goto out;
++
++ wbuf->offs += wbuf->size;
++ len -= wbuf->avail;
++ aligned_len -= wbuf->avail;
++ written += wbuf->avail;
++ } else if (wbuf->offs & (c->max_write_size - 1)) {
++ /*
++ * The write-buffer offset is not aligned to
++ * @c->max_write_size and @wbuf->size is less than
++ * @c->max_write_size. Write @wbuf->size bytes to make sure the
++ * following writes are done in optimal @c->max_write_size
++ * chunks.
++ */
++ dbg_io("write %d bytes to LEB %d:%d",
++ wbuf->size, wbuf->lnum, wbuf->offs);
++ err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs,
++ wbuf->size, wbuf->dtype);
++ if (err)
++ goto out;
++
++ wbuf->offs += wbuf->size;
++ len -= wbuf->size;
++ aligned_len -= wbuf->size;
++ written += wbuf->size;
++ }
+
+ /*
+- * The remaining data may take more whole min. I/O units, so write the
+- * remains multiple to min. I/O unit size directly to the flash media.
++ * The remaining data may take more whole max. write units, so write the
++ * remains multiple to max. write unit size directly to the flash media.
+ * We align node length to 8-byte boundary because we anyway flash wbuf
+ * if the remaining space is less than 8 bytes.
+ */
+- n = aligned_len >> c->min_io_shift;
++ n = aligned_len >> c->max_write_shift;
+ if (n) {
+- n <<= c->min_io_shift;
+- dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs);
+- err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n,
+- wbuf->dtype);
++ n <<= c->max_write_shift;
++ dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum,
++ wbuf->offs);
++ err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written,
++ wbuf->offs, n, wbuf->dtype);
+ if (err)
+ goto out;
+- offs += n;
++ wbuf->offs += n;
+ aligned_len -= n;
+ len -= n;
+ written += n;
+@@ -601,14 +696,17 @@ int ubifs_wbuf_write_nolock(struct ubifs
+ if (aligned_len)
+ /*
+ * And now we have what's left and what does not take whole
+- * min. I/O unit, so write it to the write-buffer and we are
++ * max. write unit, so write it to the write-buffer and we are
+ * done.
+ */
+ memcpy(wbuf->buf, buf + written, len);
+
+- wbuf->offs = offs;
++ if (c->leb_size - wbuf->offs >= c->max_write_size)
++ wbuf->size = c->max_write_size;
++ else
++ wbuf->size = c->leb_size - wbuf->offs;
++ wbuf->avail = wbuf->size - aligned_len;
+ wbuf->used = aligned_len;
+- wbuf->avail = c->min_io_size - aligned_len;
+ wbuf->next_ino = 0;
+ spin_unlock(&wbuf->lock);
+
+@@ -660,8 +758,10 @@ int ubifs_write_node(struct ubifs_info *
+ buf_len);
+ ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
+ ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size);
++ ubifs_assert(!c->ro_media && !c->ro_mount);
++ ubifs_assert(!c->space_fixup);
+
+- if (c->ro_media)
++ if (c->ro_error)
+ return -EROFS;
+
+ ubifs_prepare_node(c, buf, len, 1);
+@@ -698,8 +798,8 @@ int ubifs_read_node_wbuf(struct ubifs_wb
+ int err, rlen, overlap;
+ struct ubifs_ch *ch = buf;
+
+- dbg_io("LEB %d:%d, %s, length %d, jhead %d", lnum, offs,
+- dbg_ntype(type), len, wbuf->jhead);
++ dbg_io("LEB %d:%d, %s, length %d, jhead %s", lnum, offs,
++ dbg_ntype(type), len, dbg_jhead(wbuf->jhead));
+ ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
+ ubifs_assert(!(offs & 7) && offs < c->leb_size);
+ ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
+@@ -812,7 +912,8 @@ int ubifs_read_node(const struct ubifs_i
+ return 0;
+
+ out:
+- ubifs_err("bad node at LEB %d:%d", lnum, offs);
++ ubifs_err("bad node at LEB %d:%d, LEB mapping status %d", lnum, offs,
++ ubi_is_mapped(c->ubi, lnum));
+ dbg_dump_node(c, buf);
+ dbg_dump_stack();
+ return -EINVAL;
+@@ -830,11 +931,11 @@ int ubifs_wbuf_init(struct ubifs_info *c
+ {
+ size_t size;
+
+- wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL);
++ wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL);
+ if (!wbuf->buf)
+ return -ENOMEM;
+
+- size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t);
++ size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t);
+ wbuf->inodes = kmalloc(size, GFP_KERNEL);
+ if (!wbuf->inodes) {
+ kfree(wbuf->buf);
+@@ -844,7 +945,14 @@ int ubifs_wbuf_init(struct ubifs_info *c
+
+ wbuf->used = 0;
+ wbuf->lnum = wbuf->offs = -1;
+- wbuf->avail = c->min_io_size;
++ /*
++ * If the LEB starts at the max. write size aligned address, then
++ * write-buffer size has to be set to @c->max_write_size. Otherwise,
++ * set it to something smaller so that it ends at the closest max.
++ * write size boundary.
++ */
++ size = c->max_write_size - (c->leb_start % c->max_write_size);
++ wbuf->avail = wbuf->size = size;
+ wbuf->dtype = UBI_UNKNOWN;
+ wbuf->sync_callback = NULL;
+ mutex_init(&wbuf->io_mutex);
+diff -uprN linux-2.6.28/fs/ubifs/journal.c ubifs-v2.6.28/fs/ubifs/journal.c
+--- linux-2.6.28/fs/ubifs/journal.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/journal.c 2011-06-15 14:22:09.000000000 -0400
+@@ -114,7 +114,7 @@ static inline void zero_trun_node_unused
+ */
+ static int reserve_space(struct ubifs_info *c, int jhead, int len)
+ {
+- int err = 0, err1, retries = 0, avail, lnum, offs, free, squeeze;
++ int err = 0, err1, retries = 0, avail, lnum, offs, squeeze;
+ struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
+
+ /*
+@@ -122,11 +122,12 @@ static int reserve_space(struct ubifs_in
+ * better to try to allocate space at the ends of eraseblocks. This is
+ * what the squeeze parameter does.
+ */
++ ubifs_assert(!c->ro_media && !c->ro_mount);
+ squeeze = (jhead == BASEHD);
+ again:
+ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
+
+- if (c->ro_media) {
++ if (c->ro_error) {
+ err = -EROFS;
+ goto out_unlock;
+ }
+@@ -139,16 +140,9 @@ again:
+ * Write buffer wasn't seek'ed or there is no enough space - look for an
+ * LEB with some empty space.
+ */
+- lnum = ubifs_find_free_space(c, len, &free, squeeze);
+- if (lnum >= 0) {
+- /* Found an LEB, add it to the journal head */
+- offs = c->leb_size - free;
+- err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
+- if (err)
+- goto out_return;
+- /* A new bud was successfully allocated and added to the log */
++ lnum = ubifs_find_free_space(c, len, &offs, squeeze);
++ if (lnum >= 0)
+ goto out;
+- }
+
+ err = lnum;
+ if (err != -ENOSPC)
+@@ -159,7 +153,7 @@ again:
+ * some. But the write-buffer mutex has to be unlocked because
+ * GC also takes it.
+ */
+- dbg_jnl("no free space jhead %d, run GC", jhead);
++ dbg_jnl("no free space in jhead %s, run GC", dbg_jhead(jhead));
+ mutex_unlock(&wbuf->io_mutex);
+
+ lnum = ubifs_garbage_collect(c, 0);
+@@ -174,7 +168,8 @@ again:
+ * because we dropped @wbuf->io_mutex, so try once
+ * again.
+ */
+- dbg_jnl("GC couldn't make a free LEB for jhead %d", jhead);
++ dbg_jnl("GC couldn't make a free LEB for jhead %s",
++ dbg_jhead(jhead));
+ if (retries++ < 2) {
+ dbg_jnl("retry (%d)", retries);
+ goto again;
+@@ -185,13 +180,13 @@ again:
+ }
+
+ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
+- dbg_jnl("got LEB %d for jhead %d", lnum, jhead);
++ dbg_jnl("got LEB %d for jhead %s", lnum, dbg_jhead(jhead));
+ avail = c->leb_size - wbuf->offs - wbuf->used;
+
+ if (wbuf->lnum != -1 && avail >= len) {
+ /*
+ * Someone else has switched the journal head and we have
+- * enough space now. This happens when more then one process is
++ * enough space now. This happens when more than one process is
+ * trying to write to the same journal head at the same time.
+ */
+ dbg_jnl("return LEB %d back, already have LEB %d:%d",
+@@ -202,12 +197,23 @@ again:
+ return 0;
+ }
+
+- err = ubifs_add_bud_to_log(c, jhead, lnum, 0);
+- if (err)
+- goto out_return;
+ offs = 0;
+
+ out:
++ /*
++ * Make sure we synchronize the write-buffer before we add the new bud
++ * to the log. Otherwise we may have a power cut after the log
++ * reference node for the last bud (@lnum) is written but before the
++ * write-buffer data are written to the next-to-last bud
++ * (@wbuf->lnum). And the effect would be that the recovery would see
++ * that there is corruption in the next-to-last bud.
++ */
++ err = ubifs_wbuf_sync_nolock(wbuf);
++ if (err)
++ goto out_return;
++ err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
++ if (err)
++ goto out_return;
+ err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype);
+ if (err)
+ goto out_unlock;
+@@ -256,7 +262,8 @@ static int write_node(struct ubifs_info
+ *lnum = c->jheads[jhead].wbuf.lnum;
+ *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
+
+- dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len);
++ dbg_jnl("jhead %s, LEB %d:%d, len %d",
++ dbg_jhead(jhead), *lnum, *offs, len);
+ ubifs_prepare_node(c, node, len, 0);
+
+ return ubifs_wbuf_write_nolock(wbuf, node, len);
+@@ -286,7 +293,8 @@ static int write_head(struct ubifs_info
+
+ *lnum = c->jheads[jhead].wbuf.lnum;
+ *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
+- dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len);
++ dbg_jnl("jhead %s, LEB %d:%d, len %d",
++ dbg_jhead(jhead), *lnum, *offs, len);
+
+ err = ubifs_wbuf_write_nolock(wbuf, buf, len);
+ if (err)
+@@ -377,10 +385,8 @@ out:
+ if (err == -ENOSPC) {
+ /* This are some budgeting problems, print useful information */
+ down_write(&c->commit_sem);
+- spin_lock(&c->space_lock);
+ dbg_dump_stack();
+- dbg_dump_budg(c);
+- spin_unlock(&c->space_lock);
++ dbg_dump_budg(c, &c->bi);
+ dbg_dump_lprops(c);
+ cmt_retries = dbg_check_lprops(c);
+ up_write(&c->commit_sem);
+@@ -469,10 +475,7 @@ static void pack_inode(struct ubifs_info
+ ino->flags = cpu_to_le32(ui->flags);
+ ino->size = cpu_to_le64(ui->ui_size);
+ ino->nlink = cpu_to_le32(inode->i_nlink);
+- if (ui->compr_type == UBIFS_COMPR_LZO999)
+- ino->compr_type = cpu_to_le16(UBIFS_COMPR_LZO);
+- else
+- ino->compr_type = cpu_to_le16(ui->compr_type);
++ ino->compr_type = cpu_to_le16(ui->compr_type);
+ ino->data_len = cpu_to_le32(ui->data_len);
+ ino->xattr_cnt = cpu_to_le32(ui->xattr_cnt);
+ ino->xattr_size = cpu_to_le32(ui->xattr_size);
+@@ -666,6 +669,7 @@ out_free:
+
+ out_release:
+ release_head(c, BASEHD);
++ kfree(dent);
+ out_ro:
+ ubifs_ro_mode(c, err);
+ if (last_reference)
+@@ -690,7 +694,7 @@ int ubifs_jnl_write_data(struct ubifs_in
+ {
+ struct ubifs_data_node *data;
+ int err, lnum, offs, compr_type, out_len;
+- int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR;
++ int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1;
+ struct ubifs_inode *ui = ubifs_inode(inode);
+
+ dbg_jnl("ino %lu, blk %u, len %d, key %s",
+@@ -698,9 +702,19 @@ int ubifs_jnl_write_data(struct ubifs_in
+ DBGKEY(key));
+ ubifs_assert(len <= UBIFS_BLOCK_SIZE);
+
+- data = kmalloc(dlen, GFP_NOFS);
+- if (!data)
+- return -ENOMEM;
++ data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN);
++ if (!data) {
++ /*
++ * Fall-back to the write reserve buffer. Note, we might be
++ * currently on the memory reclaim path, when the kernel is
++ * trying to free some memory by writing out dirty pages. The
++ * write reserve buffer helps us to guarantee that we are
++ * always able to write the data.
++ */
++ allocated = 0;
++ mutex_lock(&c->write_reserve_mutex);
++ data = c->write_reserve_buf;
++ }
+
+ data->ch.node_type = UBIFS_DATA_NODE;
+ key_write(c, key, &data->key);
+@@ -736,7 +750,10 @@ int ubifs_jnl_write_data(struct ubifs_in
+ goto out_ro;
+
+ finish_reservation(c);
+- kfree(data);
++ if (!allocated)
++ mutex_unlock(&c->write_reserve_mutex);
++ else
++ kfree(data);
+ return 0;
+
+ out_release:
+@@ -745,7 +762,10 @@ out_ro:
+ ubifs_ro_mode(c, err);
+ finish_reservation(c);
+ out_free:
+- kfree(data);
++ if (!allocated)
++ mutex_unlock(&c->write_reserve_mutex);
++ else
++ kfree(data);
+ return err;
+ }
+
+@@ -1369,7 +1389,7 @@ out_ro:
+ * @host: host inode
+ *
+ * This function writes the updated version of an extended attribute inode and
+- * the host inode tho the journal (to the base head). The host inode is written
++ * the host inode to the journal (to the base head). The host inode is written
+ * after the extended attribute inode in order to guarantee that the extended
+ * attribute will be flushed when the inode is synchronized by 'fsync()' and
+ * consequently, the write-buffer is synchronized. This function returns zero
+diff -uprN linux-2.6.28/fs/ubifs/Kconfig ubifs-v2.6.28/fs/ubifs/Kconfig
+--- linux-2.6.28/fs/ubifs/Kconfig 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/fs/ubifs/Kconfig 2011-06-15 14:22:09.000000000 -0400
+@@ -40,33 +40,21 @@ config UBIFS_FS_ZLIB
+ depends on UBIFS_FS
+ default y
+ help
+- Zlib copresses better then LZO but it is slower. Say 'Y' if unsure.
++ Zlib compresses better than LZO but it is slower. Say 'Y' if unsure.
+
+ # Debugging-related stuff
+ config UBIFS_FS_DEBUG
+- bool "Enable debugging"
++ bool "Enable debugging support"
+ depends on UBIFS_FS
+ select DEBUG_FS
+- select KALLSYMS_ALL
++ select KALLSYMS
+ help
+- This option enables UBIFS debugging.
+-
+-config UBIFS_FS_DEBUG_MSG_LVL
+- int "Default message level (0 = no extra messages, 3 = lots)"
+- depends on UBIFS_FS_DEBUG
+- default "0"
+- help
+- This controls the amount of debugging messages produced by UBIFS.
+- If reporting bugs, please try to have available a full dump of the
+- messages at level 1 while the misbehaviour was occurring. Level 2
+- may become necessary if level 1 messages were not enough to find the
+- bug. Generally Level 3 should be avoided.
+-
+-config UBIFS_FS_DEBUG_CHKS
+- bool "Enable extra checks"
+- depends on UBIFS_FS_DEBUG
+- help
+- If extra checks are enabled UBIFS will check the consistency of its
+- internal data structures during operation. However, UBIFS performance
+- is dramatically slower when this option is selected especially if the
+- file system is large.
++ This option enables UBIFS debugging support. It makes sure various
++ assertions, self-checks, debugging messages and test modes are compiled
++ in (this all is compiled out otherwise). Assertions are light-weight
++ and this option also enables them. Self-checks, debugging messages and
++ test modes are switched off by default. Thus, it is safe and actually
++ recommended to have debugging support enabled, and it should not slow
++ down UBIFS. You can then further enable / disable individual debugging
++ features using UBIFS module parameters and the corresponding sysfs
++ interfaces.
+diff -uprN linux-2.6.28/fs/ubifs/key.h ubifs-v2.6.28/fs/ubifs/key.h
+--- linux-2.6.28/fs/ubifs/key.h 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/key.h 2011-06-15 14:22:09.000000000 -0400
+@@ -229,23 +229,6 @@ static inline void xent_key_init(const s
+ }
+
+ /**
+- * xent_key_init_hash - initialize extended attribute entry key without
+- * re-calculating hash function.
+- * @c: UBIFS file-system description object
+- * @key: key to initialize
+- * @inum: host inode number
+- * @hash: extended attribute entry name hash
+- */
+-static inline void xent_key_init_hash(const struct ubifs_info *c,
+- union ubifs_key *key, ino_t inum,
+- uint32_t hash)
+-{
+- ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
+- key->u32[0] = inum;
+- key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS);
+-}
+-
+-/**
+ * xent_key_init_flash - initialize on-flash extended attribute entry key.
+ * @c: UBIFS file-system description object
+ * @k: key to initialize
+@@ -295,22 +278,15 @@ static inline void data_key_init(const s
+ }
+
+ /**
+- * data_key_init_flash - initialize on-flash data key.
++ * highest_data_key - get the highest possible data key for an inode.
+ * @c: UBIFS file-system description object
+- * @k: key to initialize
++ * @key: key to initialize
+ * @inum: inode number
+- * @block: block number
+ */
+-static inline void data_key_init_flash(const struct ubifs_info *c, void *k,
+- ino_t inum, unsigned int block)
++static inline void highest_data_key(const struct ubifs_info *c,
++ union ubifs_key *key, ino_t inum)
+ {
+- union ubifs_key *key = k;
+-
+- ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK));
+- key->j32[0] = cpu_to_le32(inum);
+- key->j32[1] = cpu_to_le32(block |
+- (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS));
+- memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8);
++ data_key_init(c, key, inum, UBIFS_S_KEY_BLOCK_MASK);
+ }
+
+ /**
+@@ -330,6 +306,20 @@ static inline void trun_key_init(const s
+ }
+
+ /**
++ * invalid_key_init - initialize invalid node key.
++ * @c: UBIFS file-system description object
++ * @key: key to initialize
++ *
++ * This is a helper function which marks a @key object as invalid.
++ */
++static inline void invalid_key_init(const struct ubifs_info *c,
++ union ubifs_key *key)
++{
++ key->u32[0] = 0xDEADBEAF;
++ key->u32[1] = UBIFS_INVALID_KEY;
++}
++
++/**
+ * key_type - get key type.
+ * @c: UBIFS file-system description object
+ * @key: key to get type of
+@@ -381,8 +371,8 @@ static inline ino_t key_inum_flash(const
+ * @c: UBIFS file-system description object
+ * @key: the key to get hash from
+ */
+-static inline int key_hash(const struct ubifs_info *c,
+- const union ubifs_key *key)
++static inline uint32_t key_hash(const struct ubifs_info *c,
++ const union ubifs_key *key)
+ {
+ return key->u32[1] & UBIFS_S_KEY_HASH_MASK;
+ }
+@@ -392,7 +382,7 @@ static inline int key_hash(const struct
+ * @c: UBIFS file-system description object
+ * @k: the key to get hash from
+ */
+-static inline int key_hash_flash(const struct ubifs_info *c, const void *k)
++static inline uint32_t key_hash_flash(const struct ubifs_info *c, const void *k)
+ {
+ const union ubifs_key *key = k;
+
+@@ -554,4 +544,5 @@ static inline unsigned long long key_max
+ return 0;
+ }
+ }
++
+ #endif /* !__UBIFS_KEY_H__ */
+diff -uprN linux-2.6.28/fs/ubifs/log.c ubifs-v2.6.28/fs/ubifs/log.c
+--- linux-2.6.28/fs/ubifs/log.c 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/fs/ubifs/log.c 2011-06-15 14:22:09.000000000 -0400
+@@ -100,20 +100,6 @@ struct ubifs_wbuf *ubifs_get_wbuf(struct
+ }
+
+ /**
+- * next_log_lnum - switch to the next log LEB.
+- * @c: UBIFS file-system description object
+- * @lnum: current log LEB
+- */
+-static inline int next_log_lnum(const struct ubifs_info *c, int lnum)
+-{
+- lnum += 1;
+- if (lnum > c->log_last)
+- lnum = UBIFS_LOG_LNUM;
+-
+- return lnum;
+-}
+-
+-/**
+ * empty_log_bytes - calculate amount of empty space in the log.
+ * @c: UBIFS file-system description object
+ */
+@@ -159,7 +145,7 @@ void ubifs_add_bud(struct ubifs_info *c,
+ jhead = &c->jheads[bud->jhead];
+ list_add_tail(&bud->list, &jhead->buds_list);
+ } else
+- ubifs_assert(c->replaying && (c->vfs_sb->s_flags & MS_RDONLY));
++ ubifs_assert(c->replaying && c->ro_mount);
+
+ /*
+ * Note, although this is a new bud, we anyway account this space now,
+@@ -169,28 +155,8 @@ void ubifs_add_bud(struct ubifs_info *c,
+ */
+ c->bud_bytes += c->leb_size - bud->start;
+
+- dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud->lnum,
+- bud->start, bud->jhead, c->bud_bytes);
+- spin_unlock(&c->buds_lock);
+-}
+-
+-/**
+- * ubifs_create_buds_lists - create journal head buds lists for remount rw.
+- * @c: UBIFS file-system description object
+- */
+-void ubifs_create_buds_lists(struct ubifs_info *c)
+-{
+- struct rb_node *p;
+-
+- spin_lock(&c->buds_lock);
+- p = rb_first(&c->buds);
+- while (p) {
+- struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb);
+- struct ubifs_jhead *jhead = &c->jheads[bud->jhead];
+-
+- list_add_tail(&bud->list, &jhead->buds_list);
+- p = rb_next(p);
+- }
++ dbg_log("LEB %d:%d, jhead %s, bud_bytes %lld", bud->lnum,
++ bud->start, dbg_jhead(bud->jhead), c->bud_bytes);
+ spin_unlock(&c->buds_lock);
+ }
+
+@@ -223,8 +189,8 @@ int ubifs_add_bud_to_log(struct ubifs_in
+ }
+
+ mutex_lock(&c->log_mutex);
+-
+- if (c->ro_media) {
++ ubifs_assert(!c->ro_media && !c->ro_mount);
++ if (c->ro_error) {
+ err = -EROFS;
+ goto out_unlock;
+ }
+@@ -239,7 +205,7 @@ int ubifs_add_bud_to_log(struct ubifs_in
+ }
+
+ /*
+- * Make sure the the amount of space in buds will not exceed
++ * Make sure the amount of space in buds will not exceed the
+ * 'c->max_bud_bytes' limit, because we want to guarantee mount time
+ * limits.
+ *
+@@ -277,7 +243,7 @@ int ubifs_add_bud_to_log(struct ubifs_in
+ ref->jhead = cpu_to_le32(jhead);
+
+ if (c->lhead_offs > c->leb_size - c->ref_node_alsz) {
+- c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
++ c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
+ c->lhead_offs = 0;
+ }
+
+@@ -355,19 +321,18 @@ static void remove_buds(struct ubifs_inf
+ * heads (non-closed buds).
+ */
+ c->cmt_bud_bytes += wbuf->offs - bud->start;
+- dbg_log("preserve %d:%d, jhead %d, bud bytes %d, "
++ dbg_log("preserve %d:%d, jhead %s, bud bytes %d, "
+ "cmt_bud_bytes %lld", bud->lnum, bud->start,
+- bud->jhead, wbuf->offs - bud->start,
++ dbg_jhead(bud->jhead), wbuf->offs - bud->start,
+ c->cmt_bud_bytes);
+ bud->start = wbuf->offs;
+ } else {
+ c->cmt_bud_bytes += c->leb_size - bud->start;
+- dbg_log("remove %d:%d, jhead %d, bud bytes %d, "
++ dbg_log("remove %d:%d, jhead %s, bud bytes %d, "
+ "cmt_bud_bytes %lld", bud->lnum, bud->start,
+- bud->jhead, c->leb_size - bud->start,
++ dbg_jhead(bud->jhead), c->leb_size - bud->start,
+ c->cmt_bud_bytes);
+ rb_erase(p1, &c->buds);
+- list_del(&bud->list);
+ /*
+ * If the commit does not finish, the recovery will need
+ * to replay the journal, in which case the old buds
+@@ -375,7 +340,7 @@ static void remove_buds(struct ubifs_inf
+ * commit i.e. do not allow them to be garbage
+ * collected.
+ */
+- list_add(&bud->list, &c->old_buds);
++ list_move(&bud->list, &c->old_buds);
+ }
+ }
+ spin_unlock(&c->buds_lock);
+@@ -430,7 +395,8 @@ int ubifs_log_start_commit(struct ubifs_
+ if (lnum == -1 || offs == c->leb_size)
+ continue;
+
+- dbg_log("add ref to LEB %d:%d for jhead %d", lnum, offs, i);
++ dbg_log("add ref to LEB %d:%d for jhead %s",
++ lnum, offs, dbg_jhead(i));
+ ref = buf + len;
+ ref->ch.node_type = UBIFS_REF_NODE;
+ ref->lnum = cpu_to_le32(lnum);
+@@ -445,7 +411,7 @@ int ubifs_log_start_commit(struct ubifs_
+
+ /* Switch to the next log LEB */
+ if (c->lhead_offs) {
+- c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
++ c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
+ c->lhead_offs = 0;
+ }
+
+@@ -466,7 +432,7 @@ int ubifs_log_start_commit(struct ubifs_
+
+ c->lhead_offs += len;
+ if (c->lhead_offs == c->leb_size) {
+- c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
++ c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
+ c->lhead_offs = 0;
+ }
+
+@@ -553,7 +519,7 @@ int ubifs_log_post_commit(struct ubifs_i
+ }
+ mutex_lock(&c->log_mutex);
+ for (lnum = old_ltail_lnum; lnum != c->ltail_lnum;
+- lnum = next_log_lnum(c, lnum)) {
++ lnum = ubifs_next_log_lnum(c, lnum)) {
+ dbg_log("unmap log LEB %d", lnum);
+ err = ubifs_leb_unmap(c, lnum);
+ if (err)
+@@ -662,7 +628,7 @@ static int add_node(struct ubifs_info *c
+ err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM);
+ if (err)
+ return err;
+- *lnum = next_log_lnum(c, *lnum);
++ *lnum = ubifs_next_log_lnum(c, *lnum);
+ *offs = 0;
+ }
+ memcpy(buf + *offs, node, len);
+@@ -696,7 +662,7 @@ int ubifs_consolidate_log(struct ubifs_i
+ lnum = c->ltail_lnum;
+ write_lnum = lnum;
+ while (1) {
+- sleb = ubifs_scan(c, lnum, 0, c->sbuf);
++ sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0);
+ if (IS_ERR(sleb)) {
+ err = PTR_ERR(sleb);
+ goto out_free;
+@@ -732,7 +698,7 @@ int ubifs_consolidate_log(struct ubifs_i
+ ubifs_scan_destroy(sleb);
+ if (lnum == c->lhead_lnum)
+ break;
+- lnum = next_log_lnum(c, lnum);
++ lnum = ubifs_next_log_lnum(c, lnum);
+ }
+ if (offs) {
+ int sz = ALIGN(offs, c->min_io_size);
+@@ -752,7 +718,7 @@ int ubifs_consolidate_log(struct ubifs_i
+ /* Unmap remaining LEBs */
+ lnum = write_lnum;
+ do {
+- lnum = next_log_lnum(c, lnum);
++ lnum = ubifs_next_log_lnum(c, lnum);
+ err = ubifs_leb_unmap(c, lnum);
+ if (err)
+ return err;
+diff -uprN linux-2.6.28/fs/ubifs/lprops.c ubifs-v2.6.28/fs/ubifs/lprops.c
+--- linux-2.6.28/fs/ubifs/lprops.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/lprops.c 2011-06-15 14:22:09.000000000 -0400
+@@ -281,7 +281,7 @@ void ubifs_add_to_cat(struct ubifs_info
+ case LPROPS_FREE:
+ if (add_to_lpt_heap(c, lprops, cat))
+ break;
+- /* No more room on heap so make it uncategorized */
++ /* No more room on heap so make it un-categorized */
+ cat = LPROPS_UNCAT;
+ /* Fall through */
+ case LPROPS_UNCAT:
+@@ -375,8 +375,8 @@ void ubifs_replace_cat(struct ubifs_info
+ * @lprops: LEB properties
+ *
+ * A LEB may have fallen off of the bottom of a heap, and ended up as
+- * uncategorized even though it has enough space for us now. If that is the case
+- * this function will put the LEB back onto a heap.
++ * un-categorized even though it has enough space for us now. If that is the
++ * case this function will put the LEB back onto a heap.
+ */
+ void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops)
+ {
+@@ -436,10 +436,10 @@ int ubifs_categorize_lprops(const struct
+ /**
+ * change_category - change LEB properties category.
+ * @c: UBIFS file-system description object
+- * @lprops: LEB properties to recategorize
++ * @lprops: LEB properties to re-categorize
+ *
+ * LEB properties are categorized to enable fast find operations. When the LEB
+- * properties change they must be recategorized.
++ * properties change they must be re-categorized.
+ */
+ static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops)
+ {
+@@ -461,21 +461,18 @@ static void change_category(struct ubifs
+ }
+
+ /**
+- * calc_dark - calculate LEB dark space size.
++ * ubifs_calc_dark - calculate LEB dark space size.
+ * @c: the UBIFS file-system description object
+ * @spc: amount of free and dirty space in the LEB
+ *
+- * This function calculates amount of dark space in an LEB which has @spc bytes
+- * of free and dirty space. Returns the calculations result.
++ * This function calculates and returns amount of dark space in an LEB which
++ * has @spc bytes of free and dirty space.
+ *
+- * Dark space is the space which is not always usable - it depends on which
+- * nodes are written in which order. E.g., if an LEB has only 512 free bytes,
+- * it is dark space, because it cannot fit a large data node. So UBIFS cannot
+- * count on this LEB and treat these 512 bytes as usable because it is not true
+- * if, for example, only big chunks of uncompressible data will be written to
+- * the FS.
++ * UBIFS is trying to account the space which might not be usable, and this
++ * space is called "dark space". For example, if an LEB has only %512 free
++ * bytes, it is dark space, because it cannot fit a large data node.
+ */
+-static int calc_dark(struct ubifs_info *c, int spc)
++int ubifs_calc_dark(const struct ubifs_info *c, int spc)
+ {
+ ubifs_assert(!(spc & 7));
+
+@@ -518,7 +515,7 @@ static int is_lprops_dirty(struct ubifs_
+ * @free: new free space amount
+ * @dirty: new dirty space amount
+ * @flags: new flags
+- * @idx_gc_cnt: change to the count of idx_gc list
++ * @idx_gc_cnt: change to the count of @idx_gc list
+ *
+ * This function changes LEB properties (@free, @dirty or @flag). However, the
+ * property which has the %LPROPS_NC value is not changed. Returns a pointer to
+@@ -535,7 +532,7 @@ const struct ubifs_lprops *ubifs_change_
+ {
+ /*
+ * This is the only function that is allowed to change lprops, so we
+- * discard the const qualifier.
++ * discard the "const" qualifier.
+ */
+ struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp;
+
+@@ -575,7 +572,7 @@ const struct ubifs_lprops *ubifs_change_
+ if (old_spc < c->dead_wm)
+ c->lst.total_dead -= old_spc;
+ else
+- c->lst.total_dark -= calc_dark(c, old_spc);
++ c->lst.total_dark -= ubifs_calc_dark(c, old_spc);
+
+ c->lst.total_used -= c->leb_size - old_spc;
+ }
+@@ -616,7 +613,7 @@ const struct ubifs_lprops *ubifs_change_
+ if (new_spc < c->dead_wm)
+ c->lst.total_dead += new_spc;
+ else
+- c->lst.total_dark += calc_dark(c, new_spc);
++ c->lst.total_dark += ubifs_calc_dark(c, new_spc);
+
+ c->lst.total_used += c->leb_size - new_spc;
+ }
+@@ -1010,21 +1007,11 @@ out:
+ }
+
+ /**
+- * struct scan_check_data - data provided to scan callback function.
+- * @lst: LEB properties statistics
+- * @err: error code
+- */
+-struct scan_check_data {
+- struct ubifs_lp_stats lst;
+- int err;
+-};
+-
+-/**
+ * scan_check_cb - scan callback.
+ * @c: the UBIFS file-system description object
+ * @lp: LEB properties to scan
+ * @in_tree: whether the LEB properties are in main memory
+- * @data: information passed to and from the caller of the scan
++ * @lst: lprops statistics to update
+ *
+ * This function returns a code that indicates whether the scan should continue
+ * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
+@@ -1033,12 +1020,12 @@ struct scan_check_data {
+ */
+ static int scan_check_cb(struct ubifs_info *c,
+ const struct ubifs_lprops *lp, int in_tree,
+- struct scan_check_data *data)
++ struct ubifs_lp_stats *lst)
+ {
+ struct ubifs_scan_leb *sleb;
+ struct ubifs_scan_node *snod;
+- struct ubifs_lp_stats *lst = &data->lst;
+- int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty;
++ int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret;
++ void *buf = NULL;
+
+ cat = lp->flags & LPROPS_CAT_MASK;
+ if (cat != LPROPS_UNCAT) {
+@@ -1046,7 +1033,7 @@ static int scan_check_cb(struct ubifs_in
+ if (cat != (lp->flags & LPROPS_CAT_MASK)) {
+ ubifs_err("bad LEB category %d expected %d",
+ (lp->flags & LPROPS_CAT_MASK), cat);
+- goto out;
++ return -EINVAL;
+ }
+ }
+
+@@ -1080,7 +1067,7 @@ static int scan_check_cb(struct ubifs_in
+ }
+ if (!found) {
+ ubifs_err("bad LPT list (category %d)", cat);
+- goto out;
++ return -EINVAL;
+ }
+ }
+ }
+@@ -1092,36 +1079,40 @@ static int scan_check_cb(struct ubifs_in
+ if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) ||
+ lp != heap->arr[lp->hpos]) {
+ ubifs_err("bad LPT heap (category %d)", cat);
+- goto out;
++ return -EINVAL;
+ }
+ }
+
+- sleb = ubifs_scan(c, lnum, 0, c->dbg->buf);
++ buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
++ if (!buf)
++ return -ENOMEM;
++
++ /*
++ * After an unclean unmount, empty and freeable LEBs
++ * may contain garbage - do not scan them.
++ */
++ if (lp->free == c->leb_size) {
++ lst->empty_lebs += 1;
++ lst->total_free += c->leb_size;
++ lst->total_dark += ubifs_calc_dark(c, c->leb_size);
++ return LPT_SCAN_CONTINUE;
++ }
++ if (lp->free + lp->dirty == c->leb_size &&
++ !(lp->flags & LPROPS_INDEX)) {
++ lst->total_free += lp->free;
++ lst->total_dirty += lp->dirty;
++ lst->total_dark += ubifs_calc_dark(c, c->leb_size);
++ return LPT_SCAN_CONTINUE;
++ }
++
++ sleb = ubifs_scan(c, lnum, 0, buf, 0);
+ if (IS_ERR(sleb)) {
+- /*
+- * After an unclean unmount, empty and freeable LEBs
+- * may contain garbage.
+- */
+- if (lp->free == c->leb_size) {
+- ubifs_err("scan errors were in empty LEB "
+- "- continuing checking");
+- lst->empty_lebs += 1;
+- lst->total_free += c->leb_size;
+- lst->total_dark += calc_dark(c, c->leb_size);
+- return LPT_SCAN_CONTINUE;
+- }
+-
+- if (lp->free + lp->dirty == c->leb_size &&
+- !(lp->flags & LPROPS_INDEX)) {
+- ubifs_err("scan errors were in freeable LEB "
+- "- continuing checking");
+- lst->total_free += lp->free;
+- lst->total_dirty += lp->dirty;
+- lst->total_dark += calc_dark(c, c->leb_size);
+- return LPT_SCAN_CONTINUE;
++ ret = PTR_ERR(sleb);
++ if (ret == -EUCLEAN) {
++ dbg_dump_lprops(c);
++ dbg_dump_budg(c, &c->bi);
+ }
+- data->err = PTR_ERR(sleb);
+- return LPT_SCAN_STOP;
++ goto out;
+ }
+
+ is_idx = -1;
+@@ -1235,10 +1226,11 @@ static int scan_check_cb(struct ubifs_in
+ if (spc < c->dead_wm)
+ lst->total_dead += spc;
+ else
+- lst->total_dark += calc_dark(c, spc);
++ lst->total_dark += ubifs_calc_dark(c, spc);
+ }
+
+ ubifs_scan_destroy(sleb);
++ vfree(buf);
+ return LPT_SCAN_CONTINUE;
+
+ out_print:
+@@ -1248,9 +1240,10 @@ out_print:
+ dbg_dump_leb(c, lnum);
+ out_destroy:
+ ubifs_scan_destroy(sleb);
++ ret = -EINVAL;
+ out:
+- data->err = -EINVAL;
+- return LPT_SCAN_STOP;
++ vfree(buf);
++ return ret;
+ }
+
+ /**
+@@ -1267,8 +1260,7 @@ out:
+ int dbg_check_lprops(struct ubifs_info *c)
+ {
+ int i, err;
+- struct scan_check_data data;
+- struct ubifs_lp_stats *lst = &data.lst;
++ struct ubifs_lp_stats lst;
+
+ if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+ return 0;
+@@ -1283,29 +1275,23 @@ int dbg_check_lprops(struct ubifs_info *
+ return err;
+ }
+
+- memset(lst, 0, sizeof(struct ubifs_lp_stats));
+-
+- data.err = 0;
++ memset(&lst, 0, sizeof(struct ubifs_lp_stats));
+ err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1,
+ (ubifs_lpt_scan_callback)scan_check_cb,
+- &data);
++ &lst);
+ if (err && err != -ENOSPC)
+ goto out;
+- if (data.err) {
+- err = data.err;
+- goto out;
+- }
+
+- if (lst->empty_lebs != c->lst.empty_lebs ||
+- lst->idx_lebs != c->lst.idx_lebs ||
+- lst->total_free != c->lst.total_free ||
+- lst->total_dirty != c->lst.total_dirty ||
+- lst->total_used != c->lst.total_used) {
++ if (lst.empty_lebs != c->lst.empty_lebs ||
++ lst.idx_lebs != c->lst.idx_lebs ||
++ lst.total_free != c->lst.total_free ||
++ lst.total_dirty != c->lst.total_dirty ||
++ lst.total_used != c->lst.total_used) {
+ ubifs_err("bad overall accounting");
+ ubifs_err("calculated: empty_lebs %d, idx_lebs %d, "
+ "total_free %lld, total_dirty %lld, total_used %lld",
+- lst->empty_lebs, lst->idx_lebs, lst->total_free,
+- lst->total_dirty, lst->total_used);
++ lst.empty_lebs, lst.idx_lebs, lst.total_free,
++ lst.total_dirty, lst.total_used);
+ ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, "
+ "total_free %lld, total_dirty %lld, total_used %lld",
+ c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free,
+@@ -1314,11 +1300,11 @@ int dbg_check_lprops(struct ubifs_info *
+ goto out;
+ }
+
+- if (lst->total_dead != c->lst.total_dead ||
+- lst->total_dark != c->lst.total_dark) {
++ if (lst.total_dead != c->lst.total_dead ||
++ lst.total_dark != c->lst.total_dark) {
+ ubifs_err("bad dead/dark space accounting");
+ ubifs_err("calculated: total_dead %lld, total_dark %lld",
+- lst->total_dead, lst->total_dark);
++ lst.total_dead, lst.total_dark);
+ ubifs_err("read from lprops: total_dead %lld, total_dark %lld",
+ c->lst.total_dead, c->lst.total_dark);
+ err = -EINVAL;
+diff -uprN linux-2.6.28/fs/ubifs/lpt.c ubifs-v2.6.28/fs/ubifs/lpt.c
+--- linux-2.6.28/fs/ubifs/lpt.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/lpt.c 2011-06-15 14:22:09.000000000 -0400
+@@ -1269,10 +1269,9 @@ static int read_pnode(struct ubifs_info
+ lnum = branch->lnum;
+ offs = branch->offs;
+ pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS);
+- if (!pnode) {
+- err = -ENOMEM;
+- goto out;
+- }
++ if (!pnode)
++ return -ENOMEM;
++
+ if (lnum == 0) {
+ /*
+ * This pnode was not written which just means that the LEB
+@@ -1362,6 +1361,7 @@ static int read_lsave(struct ubifs_info
+ goto out;
+ for (i = 0; i < c->lsave_cnt; i++) {
+ int lnum = c->lsave[i];
++ struct ubifs_lprops *lprops;
+
+ /*
+ * Due to automatic resizing, the values in the lsave table
+@@ -1369,7 +1369,11 @@ static int read_lsave(struct ubifs_info
+ */
+ if (lnum >= c->leb_cnt)
+ continue;
+- ubifs_lpt_lookup(c, lnum);
++ lprops = ubifs_lpt_lookup(c, lnum);
++ if (IS_ERR(lprops)) {
++ err = PTR_ERR(lprops);
++ goto out;
++ }
+ }
+ out:
+ vfree(buf);
+@@ -1456,13 +1460,13 @@ struct ubifs_lprops *ubifs_lpt_lookup(st
+ shft -= UBIFS_LPT_FANOUT_SHIFT;
+ nnode = ubifs_get_nnode(c, nnode, iip);
+ if (IS_ERR(nnode))
+- return ERR_PTR(PTR_ERR(nnode));
++ return ERR_CAST(nnode);
+ }
+ iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
+ shft -= UBIFS_LPT_FANOUT_SHIFT;
+ pnode = ubifs_get_pnode(c, nnode, iip);
+ if (IS_ERR(pnode))
+- return ERR_PTR(PTR_ERR(pnode));
++ return ERR_CAST(pnode);
+ iip = (i & (UBIFS_LPT_FANOUT - 1));
+ dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum,
+ pnode->lprops[iip].free, pnode->lprops[iip].dirty,
+@@ -1585,7 +1589,7 @@ struct ubifs_lprops *ubifs_lpt_lookup_di
+ nnode = c->nroot;
+ nnode = dirty_cow_nnode(c, nnode);
+ if (IS_ERR(nnode))
+- return ERR_PTR(PTR_ERR(nnode));
++ return ERR_CAST(nnode);
+ i = lnum - c->main_first;
+ shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
+ for (h = 1; h < c->lpt_hght; h++) {
+@@ -1593,19 +1597,19 @@ struct ubifs_lprops *ubifs_lpt_lookup_di
+ shft -= UBIFS_LPT_FANOUT_SHIFT;
+ nnode = ubifs_get_nnode(c, nnode, iip);
+ if (IS_ERR(nnode))
+- return ERR_PTR(PTR_ERR(nnode));
++ return ERR_CAST(nnode);
+ nnode = dirty_cow_nnode(c, nnode);
+ if (IS_ERR(nnode))
+- return ERR_PTR(PTR_ERR(nnode));
++ return ERR_CAST(nnode);
+ }
+ iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
+ shft -= UBIFS_LPT_FANOUT_SHIFT;
+ pnode = ubifs_get_pnode(c, nnode, iip);
+ if (IS_ERR(pnode))
+- return ERR_PTR(PTR_ERR(pnode));
++ return ERR_CAST(pnode);
+ pnode = dirty_cow_pnode(c, pnode);
+ if (IS_ERR(pnode))
+- return ERR_PTR(PTR_ERR(pnode));
++ return ERR_CAST(pnode);
+ iip = (i & (UBIFS_LPT_FANOUT - 1));
+ dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum,
+ pnode->lprops[iip].free, pnode->lprops[iip].dirty,
+diff -uprN linux-2.6.28/fs/ubifs/lpt_commit.c ubifs-v2.6.28/fs/ubifs/lpt_commit.c
+--- linux-2.6.28/fs/ubifs/lpt_commit.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/lpt_commit.c 2011-06-15 14:22:09.000000000 -0400
+@@ -28,6 +28,12 @@
+ #include <linux/crc16.h>
+ #include "ubifs.h"
+
++#ifdef CONFIG_UBIFS_FS_DEBUG
++static int dbg_populate_lsave(struct ubifs_info *c);
++#else
++#define dbg_populate_lsave(c) 0
++#endif
++
+ /**
+ * first_dirty_cnode - find first dirty cnode.
+ * @c: UBIFS file-system description object
+@@ -585,7 +591,7 @@ static struct ubifs_pnode *next_pnode_to
+ if (nnode->nbranch[iip].lnum)
+ break;
+ }
+- } while (iip >= UBIFS_LPT_FANOUT);
++ } while (iip >= UBIFS_LPT_FANOUT);
+
+ /* Go right */
+ nnode = ubifs_get_nnode(c, nnode, iip);
+@@ -645,7 +651,7 @@ static struct ubifs_pnode *pnode_lookup(
+ shft -= UBIFS_LPT_FANOUT_SHIFT;
+ nnode = ubifs_get_nnode(c, nnode, iip);
+ if (IS_ERR(nnode))
+- return ERR_PTR(PTR_ERR(nnode));
++ return ERR_CAST(nnode);
+ }
+ iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
+ return ubifs_get_pnode(c, nnode, iip);
+@@ -704,6 +710,9 @@ static int make_tree_dirty(struct ubifs_
+ struct ubifs_pnode *pnode;
+
+ pnode = pnode_lookup(c, 0);
++ if (IS_ERR(pnode))
++ return PTR_ERR(pnode);
++
+ while (pnode) {
+ do_make_pnode_dirty(c, pnode);
+ pnode = next_pnode_to_dirty(c, pnode);
+@@ -811,6 +820,10 @@ static void populate_lsave(struct ubifs_
+ c->lpt_drty_flgs |= LSAVE_DIRTY;
+ ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz);
+ }
++
++ if (dbg_populate_lsave(c))
++ return;
++
+ list_for_each_entry(lprops, &c->empty_list, list) {
+ c->lsave[cnt++] = lprops->lnum;
+ if (cnt >= c->lsave_cnt)
+@@ -1624,29 +1637,35 @@ static int dbg_check_ltab_lnum(struct ub
+ {
+ int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len;
+ int ret;
+- void *buf = c->dbg->buf;
++ void *buf, *p;
+
+ if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+ return 0;
+
++ buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
++ if (!buf) {
++ ubifs_err("cannot allocate memory for ltab checking");
++ return 0;
++ }
++
+ dbg_lp("LEB %d", lnum);
+ err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
+ if (err) {
+ dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err);
+- return err;
++ goto out;
+ }
+ while (1) {
+- if (!is_a_node(c, buf, len)) {
++ if (!is_a_node(c, p, len)) {
+ int i, pad_len;
+
+- pad_len = get_pad_len(c, buf, len);
++ pad_len = get_pad_len(c, p, len);
+ if (pad_len) {
+- buf += pad_len;
++ p += pad_len;
+ len -= pad_len;
+ dirty += pad_len;
+ continue;
+ }
+- if (!dbg_is_all_ff(buf, len)) {
++ if (!dbg_is_all_ff(p, len)) {
+ dbg_msg("invalid empty space in LEB %d at %d",
+ lnum, c->leb_size - len);
+ err = -EINVAL;
+@@ -1664,16 +1683,21 @@ static int dbg_check_ltab_lnum(struct ub
+ lnum, dirty, c->ltab[i].dirty);
+ err = -EINVAL;
+ }
+- return err;
++ goto out;
+ }
+- node_type = get_lpt_node_type(c, buf, &node_num);
++ node_type = get_lpt_node_type(c, p, &node_num);
+ node_len = get_lpt_node_len(c, node_type);
+ ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len);
+ if (ret == 1)
+ dirty += node_len;
+- buf += node_len;
++ p += node_len;
+ len -= node_len;
+ }
++
++ err = 0;
++out:
++ vfree(buf);
++ return err;
+ }
+
+ /**
+@@ -1866,25 +1890,31 @@ int dbg_chk_lpt_sz(struct ubifs_info *c,
+ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
+ {
+ int err, len = c->leb_size, node_type, node_num, node_len, offs;
+- void *buf = c->dbg->buf;
++ void *buf, *p;
+
+ printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
+ current->pid, lnum);
++ buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
++ if (!buf) {
++ ubifs_err("cannot allocate memory to dump LPT");
++ return;
++ }
++
+ err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
+ if (err) {
+ ubifs_err("cannot read LEB %d, error %d", lnum, err);
+- return;
++ goto out;
+ }
+ while (1) {
+ offs = c->leb_size - len;
+- if (!is_a_node(c, buf, len)) {
++ if (!is_a_node(c, p, len)) {
+ int pad_len;
+
+- pad_len = get_pad_len(c, buf, len);
++ pad_len = get_pad_len(c, p, len);
+ if (pad_len) {
+ printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n",
+ lnum, offs, pad_len);
+- buf += pad_len;
++ p += pad_len;
+ len -= pad_len;
+ continue;
+ }
+@@ -1894,7 +1924,7 @@ static void dump_lpt_leb(const struct ub
+ break;
+ }
+
+- node_type = get_lpt_node_type(c, buf, &node_num);
++ node_type = get_lpt_node_type(c, p, &node_num);
+ switch (node_type) {
+ case UBIFS_LPT_PNODE:
+ {
+@@ -1919,14 +1949,14 @@ static void dump_lpt_leb(const struct ub
+ else
+ printk(KERN_DEBUG "LEB %d:%d, nnode, ",
+ lnum, offs);
+- err = ubifs_unpack_nnode(c, buf, &nnode);
++ err = ubifs_unpack_nnode(c, p, &nnode);
+ for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+- printk("%d:%d", nnode.nbranch[i].lnum,
++ printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum,
+ nnode.nbranch[i].offs);
+ if (i != UBIFS_LPT_FANOUT - 1)
+- printk(", ");
++ printk(KERN_CONT ", ");
+ }
+- printk("\n");
++ printk(KERN_CONT "\n");
+ break;
+ }
+ case UBIFS_LPT_LTAB:
+@@ -1940,15 +1970,18 @@ static void dump_lpt_leb(const struct ub
+ break;
+ default:
+ ubifs_err("LPT node type %d not recognized", node_type);
+- return;
++ goto out;
+ }
+
+- buf += node_len;
++ p += node_len;
+ len -= node_len;
+ }
+
+ printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n",
+ current->pid, lnum);
++out:
++ vfree(buf);
++ return;
+ }
+
+ /**
+@@ -1970,4 +2003,47 @@ void dbg_dump_lpt_lebs(const struct ubif
+ current->pid);
+ }
+
++/**
++ * dbg_populate_lsave - debugging version of 'populate_lsave()'
++ * @c: UBIFS file-system description object
++ *
++ * This is a debugging version for 'populate_lsave()' which populates lsave
++ * with random LEBs instead of useful LEBs, which is good for test coverage.
++ * Returns zero if lsave has not been populated (this debugging feature is
++ * disabled) an non-zero if lsave has been populated.
++ */
++static int dbg_populate_lsave(struct ubifs_info *c)
++{
++ struct ubifs_lprops *lprops;
++ struct ubifs_lpt_heap *heap;
++ int i;
++
++ if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
++ return 0;
++ if (random32() & 3)
++ return 0;
++
++ for (i = 0; i < c->lsave_cnt; i++)
++ c->lsave[i] = c->main_first;
++
++ list_for_each_entry(lprops, &c->empty_list, list)
++ c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
++ list_for_each_entry(lprops, &c->freeable_list, list)
++ c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
++ list_for_each_entry(lprops, &c->frdi_idx_list, list)
++ c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
++
++ heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
++ for (i = 0; i < heap->cnt; i++)
++ c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
++ heap = &c->lpt_heap[LPROPS_DIRTY - 1];
++ for (i = 0; i < heap->cnt; i++)
++ c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
++ heap = &c->lpt_heap[LPROPS_FREE - 1];
++ for (i = 0; i < heap->cnt; i++)
++ c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
++
++ return 1;
++}
++
+ #endif /* CONFIG_UBIFS_FS_DEBUG */
+diff -uprN linux-2.6.28/fs/ubifs/master.c ubifs-v2.6.28/fs/ubifs/master.c
+--- linux-2.6.28/fs/ubifs/master.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/master.c 2011-06-15 14:22:09.000000000 -0400
+@@ -29,7 +29,8 @@
+ * @c: UBIFS file-system description object
+ *
+ * This function scans the master node LEBs and search for the latest master
+- * node. Returns zero in case of success and a negative error code in case of
++ * node. Returns zero in case of success, %-EUCLEAN if there master area is
++ * corrupted and requires recovery, and a negative error code in case of
+ * failure.
+ */
+ static int scan_for_master(struct ubifs_info *c)
+@@ -40,7 +41,7 @@ static int scan_for_master(struct ubifs_
+
+ lnum = UBIFS_MST_LNUM;
+
+- sleb = ubifs_scan(c, lnum, 0, c->sbuf);
++ sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
+ if (IS_ERR(sleb))
+ return PTR_ERR(sleb);
+ nodes_cnt = sleb->nodes_cnt;
+@@ -48,7 +49,7 @@ static int scan_for_master(struct ubifs_
+ snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
+ list);
+ if (snod->type != UBIFS_MST_NODE)
+- goto out;
++ goto out_dump;
+ memcpy(c->mst_node, snod->node, snod->len);
+ offs = snod->offs;
+ }
+@@ -56,7 +57,7 @@ static int scan_for_master(struct ubifs_
+
+ lnum += 1;
+
+- sleb = ubifs_scan(c, lnum, 0, c->sbuf);
++ sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
+ if (IS_ERR(sleb))
+ return PTR_ERR(sleb);
+ if (sleb->nodes_cnt != nodes_cnt)
+@@ -65,7 +66,7 @@ static int scan_for_master(struct ubifs_
+ goto out;
+ snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list);
+ if (snod->type != UBIFS_MST_NODE)
+- goto out;
++ goto out_dump;
+ if (snod->offs != offs)
+ goto out;
+ if (memcmp((void *)c->mst_node + UBIFS_CH_SZ,
+@@ -78,6 +79,12 @@ static int scan_for_master(struct ubifs_
+
+ out:
+ ubifs_scan_destroy(sleb);
++ return -EUCLEAN;
++
++out_dump:
++ ubifs_err("unexpected node type %d master LEB %d:%d",
++ snod->type, lnum, snod->offs);
++ ubifs_scan_destroy(sleb);
+ return -EINVAL;
+ }
+
+@@ -141,7 +148,7 @@ static int validate_master(const struct
+ }
+
+ main_sz = (long long)c->main_lebs * c->leb_size;
+- if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) {
++ if (c->bi.old_idx_sz & 7 || c->bi.old_idx_sz >= main_sz) {
+ err = 9;
+ goto out;
+ }
+@@ -211,7 +218,7 @@ static int validate_master(const struct
+ }
+
+ if (c->lst.total_dead + c->lst.total_dark +
+- c->lst.total_used + c->old_idx_sz > main_sz) {
++ c->lst.total_used + c->bi.old_idx_sz > main_sz) {
+ err = 21;
+ goto out;
+ }
+@@ -256,7 +263,8 @@ int ubifs_read_master(struct ubifs_info
+
+ err = scan_for_master(c);
+ if (err) {
+- err = ubifs_recover_master_node(c);
++ if (err == -EUCLEAN)
++ err = ubifs_recover_master_node(c);
+ if (err)
+ /*
+ * Note, we do not free 'c->mst_node' here because the
+@@ -278,7 +286,7 @@ int ubifs_read_master(struct ubifs_info
+ c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum);
+ c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum);
+ c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs);
+- c->old_idx_sz = le64_to_cpu(c->mst_node->index_size);
++ c->bi.old_idx_sz = le64_to_cpu(c->mst_node->index_size);
+ c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum);
+ c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs);
+ c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum);
+@@ -297,7 +305,7 @@ int ubifs_read_master(struct ubifs_info
+ c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead);
+ c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark);
+
+- c->calc_idx_sz = c->old_idx_sz;
++ c->calc_idx_sz = c->bi.old_idx_sz;
+
+ if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS))
+ c->no_orphs = 1;
+@@ -353,7 +361,8 @@ int ubifs_write_master(struct ubifs_info
+ {
+ int err, lnum, offs, len;
+
+- if (c->ro_media)
++ ubifs_assert(!c->ro_media && !c->ro_mount);
++ if (c->ro_error)
+ return -EROFS;
+
+ lnum = UBIFS_MST_LNUM;
+diff -uprN linux-2.6.28/fs/ubifs/misc.h ubifs-v2.6.28/fs/ubifs/misc.h
+--- linux-2.6.28/fs/ubifs/misc.h 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/fs/ubifs/misc.h 2011-06-15 14:22:09.000000000 -0400
+@@ -132,7 +132,8 @@ static inline int ubifs_leb_unmap(const
+ {
+ int err;
+
+- if (c->ro_media)
++ ubifs_assert(!c->ro_media && !c->ro_mount);
++ if (c->ro_error)
+ return -EROFS;
+ err = ubi_leb_unmap(c->ubi, lnum);
+ if (err) {
+@@ -159,7 +160,8 @@ static inline int ubifs_leb_write(const
+ {
+ int err;
+
+- if (c->ro_media)
++ ubifs_assert(!c->ro_media && !c->ro_mount);
++ if (c->ro_error)
+ return -EROFS;
+ err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype);
+ if (err) {
+@@ -186,7 +188,8 @@ static inline int ubifs_leb_change(const
+ {
+ int err;
+
+- if (c->ro_media)
++ ubifs_assert(!c->ro_media && !c->ro_mount);
++ if (c->ro_error)
+ return -EROFS;
+ err = ubi_leb_change(c->ubi, lnum, buf, len, dtype);
+ if (err) {
+@@ -337,4 +340,21 @@ static inline void ubifs_release_lprops(
+ mutex_unlock(&c->lp_mutex);
+ }
+
++/**
++ * ubifs_next_log_lnum - switch to the next log LEB.
++ * @c: UBIFS file-system description object
++ * @lnum: current log LEB
++ *
++ * This helper function returns the log LEB number which goes next after LEB
++ * 'lnum'.
++ */
++static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum)
++{
++ lnum += 1;
++ if (lnum > c->log_last)
++ lnum = UBIFS_LOG_LNUM;
++
++ return lnum;
++}
++
+ #endif /* __UBIFS_MISC_H__ */
+diff -uprN linux-2.6.28/fs/ubifs/orphan.c ubifs-v2.6.28/fs/ubifs/orphan.c
+--- linux-2.6.28/fs/ubifs/orphan.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/orphan.c 2011-06-15 14:22:09.000000000 -0400
+@@ -670,9 +670,11 @@ static int kill_orphans(struct ubifs_inf
+ struct ubifs_scan_leb *sleb;
+
+ dbg_rcvry("LEB %d", lnum);
+- sleb = ubifs_scan(c, lnum, 0, c->sbuf);
++ sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
+ if (IS_ERR(sleb)) {
+- sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0);
++ if (PTR_ERR(sleb) == -EUCLEAN)
++ sleb = ubifs_recover_leb(c, lnum, 0,
++ c->sbuf, -1);
+ if (IS_ERR(sleb)) {
+ err = PTR_ERR(sleb);
+ break;
+@@ -891,15 +893,22 @@ static int dbg_read_orphans(struct check
+ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
+ {
+ int lnum, err = 0;
++ void *buf;
+
+ /* Check no-orphans flag and skip this if no orphans */
+ if (c->no_orphs)
+ return 0;
+
++ buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
++ if (!buf) {
++ ubifs_err("cannot allocate memory to check orphans");
++ return 0;
++ }
++
+ for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
+ struct ubifs_scan_leb *sleb;
+
+- sleb = ubifs_scan(c, lnum, 0, c->dbg->buf);
++ sleb = ubifs_scan(c, lnum, 0, buf, 0);
+ if (IS_ERR(sleb)) {
+ err = PTR_ERR(sleb);
+ break;
+@@ -911,6 +920,7 @@ static int dbg_scan_orphans(struct ubifs
+ break;
+ }
+
++ vfree(buf);
+ return err;
+ }
+
+diff -uprN linux-2.6.28/fs/ubifs/recovery.c ubifs-v2.6.28/fs/ubifs/recovery.c
+--- linux-2.6.28/fs/ubifs/recovery.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/recovery.c 2011-06-15 14:22:09.000000000 -0400
+@@ -24,10 +24,27 @@
+ * This file implements functions needed to recover from unclean un-mounts.
+ * When UBIFS is mounted, it checks a flag on the master node to determine if
+ * an un-mount was completed sucessfully. If not, the process of mounting
+- * incorparates additional checking and fixing of on-flash data structures.
++ * incorporates additional checking and fixing of on-flash data structures.
+ * UBIFS always cleans away all remnants of an unclean un-mount, so that
+ * errors do not accumulate. However UBIFS defers recovery if it is mounted
+ * read-only, and the flash is not modified in that case.
++ *
++ * The general UBIFS approach to the recovery is that it recovers from
++ * corruptions which could be caused by power cuts, but it refuses to recover
++ * from corruption caused by other reasons. And UBIFS tries to distinguish
++ * between these 2 reasons of corruptions and silently recover in the former
++ * case and loudly complain in the latter case.
++ *
++ * UBIFS writes only to erased LEBs, so it writes only to the flash space
++ * containing only 0xFFs. UBIFS also always writes strictly from the beginning
++ * of the LEB to the end. And UBIFS assumes that the underlying flash media
++ * writes in @c->max_write_size bytes at a time.
++ *
++ * Hence, if UBIFS finds a corrupted node at offset X, it expects only the min.
++ * I/O unit corresponding to offset X to contain corrupted data, all the
++ * following min. I/O units have to contain empty space (all 0xFFs). If this is
++ * not true, the corruption cannot be the result of a power cut, and UBIFS
++ * refuses to mount.
+ */
+
+ #include <linux/crc32.h>
+@@ -53,6 +70,25 @@ static int is_empty(void *buf, int len)
+ }
+
+ /**
++ * first_non_ff - find offset of the first non-0xff byte.
++ * @buf: buffer to search in
++ * @len: length of buffer
++ *
++ * This function returns offset of the first non-0xff byte in @buf or %-1 if
++ * the buffer contains only 0xff bytes.
++ */
++static int first_non_ff(void *buf, int len)
++{
++ uint8_t *p = buf;
++ int i;
++
++ for (i = 0; i < len; i++)
++ if (*p++ != 0xff)
++ return i;
++ return -1;
++}
++
++/**
+ * get_master_node - get the last valid master node allowing for corruption.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number
+@@ -267,12 +303,12 @@ int ubifs_recover_master_node(struct ubi
+ mst = mst2;
+ }
+
+- dbg_rcvry("recovered master node from LEB %d",
++ ubifs_msg("recovered master node from LEB %d",
+ (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1));
+
+ memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ);
+
+- if ((c->vfs_sb->s_flags & MS_RDONLY)) {
++ if (c->ro_mount) {
+ /* Read-only mode. Keep a copy for switching to rw mode */
+ c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL);
+ if (!c->rcvrd_mst_node) {
+@@ -280,6 +316,32 @@ int ubifs_recover_master_node(struct ubi
+ goto out_free;
+ }
+ memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ);
++
++ /*
++ * We had to recover the master node, which means there was an
++ * unclean reboot. However, it is possible that the master node
++ * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set.
++ * E.g., consider the following chain of events:
++ *
++ * 1. UBIFS was cleanly unmounted, so the master node is clean
++ * 2. UBIFS is being mounted R/W and starts changing the master
++ * node in the first (%UBIFS_MST_LNUM). A power cut happens,
++ * so this LEB ends up with some amount of garbage at the
++ * end.
++ * 3. UBIFS is being mounted R/O. We reach this place and
++ * recover the master node from the second LEB
++ * (%UBIFS_MST_LNUM + 1). But we cannot update the media
++ * because we are being mounted R/O. We have to defer the
++ * operation.
++ * 4. However, this master node (@c->mst_node) is marked as
++ * clean (since the step 1). And if we just return, the
++ * mount code will be confused and won't recover the master
++ * node when it is re-mounter R/W later.
++ *
++ * Thus, to force the recovery by marking the master node as
++ * dirty.
++ */
++ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
+ } else {
+ /* Write the recovered master node */
+ c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1;
+@@ -342,44 +404,23 @@ int ubifs_write_rcvrd_mst_node(struct ub
+ * @offs: offset to check
+ *
+ * This function returns %1 if @offs was in the last write to the LEB whose data
+- * is in @buf, otherwise %0 is returned. The determination is made by checking
+- * for subsequent empty space starting from the next min_io_size boundary (or a
+- * bit less than the common header size if min_io_size is one).
++ * is in @buf, otherwise %0 is returned. The determination is made by checking
++ * for subsequent empty space starting from the next @c->max_write_size
++ * boundary.
+ */
+ static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
+ {
+- int empty_offs;
+- int check_len;
++ int empty_offs, check_len;
+ uint8_t *p;
+
+- if (c->min_io_size == 1) {
+- check_len = c->leb_size - offs;
+- p = buf + check_len;
+- for (; check_len > 0; check_len--)
+- if (*--p != 0xff)
+- break;
+- /*
+- * 'check_len' is the size of the corruption which cannot be
+- * more than the size of 1 node if it was caused by an unclean
+- * unmount.
+- */
+- if (check_len > UBIFS_MAX_NODE_SZ)
+- return 0;
+- return 1;
+- }
+-
+ /*
+- * Round up to the next c->min_io_size boundary i.e. 'offs' is in the
+- * last wbuf written. After that should be empty space.
++ * Round up to the next @c->max_write_size boundary i.e. @offs is in
++ * the last wbuf written. After that should be empty space.
+ */
+- empty_offs = ALIGN(offs + 1, c->min_io_size);
++ empty_offs = ALIGN(offs + 1, c->max_write_size);
+ check_len = c->leb_size - empty_offs;
+ p = buf + empty_offs - offs;
+-
+- for (; check_len > 0; check_len--)
+- if (*p++ != 0xff)
+- return 0;
+- return 1;
++ return is_empty(p, check_len);
+ }
+
+ /**
+@@ -392,7 +433,7 @@ static int is_last_write(const struct ub
+ *
+ * This function pads up to the next min_io_size boundary (if there is one) and
+ * sets empty space to all 0xff. @buf, @offs and @len are updated to the next
+- * min_io_size boundary (if there is one).
++ * @c->min_io_size boundary.
+ */
+ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
+ int *offs, int *len)
+@@ -402,11 +443,6 @@ static void clean_buf(const struct ubifs
+ lnum = lnum;
+ dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs);
+
+- if (c->min_io_size == 1) {
+- memset(*buf, 0xff, c->leb_size - *offs);
+- return;
+- }
+-
+ ubifs_assert(!(*offs & 7));
+ empty_offs = ALIGN(*offs, c->min_io_size);
+ pad_len = empty_offs - *offs;
+@@ -436,7 +472,7 @@ static int no_more_nodes(const struct ub
+ int skip, dlen = le32_to_cpu(ch->len);
+
+ /* Check for empty space after the corrupt node's common header */
+- skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs;
++ skip = ALIGN(offs + UBIFS_CH_SZ, c->max_write_size) - offs;
+ if (is_empty(buf + skip, len - skip))
+ return 1;
+ /*
+@@ -448,7 +484,7 @@ static int no_more_nodes(const struct ub
+ return 0;
+ }
+ /* Now we know the corrupt node's length we can skip over it */
+- skip = ALIGN(offs + dlen, c->min_io_size) - offs;
++ skip = ALIGN(offs + dlen, c->max_write_size) - offs;
+ /* After which there should be empty space */
+ if (is_empty(buf + skip, len - skip))
+ return 1;
+@@ -476,7 +512,7 @@ static int fix_unclean_leb(struct ubifs_
+ endpt = snod->offs + snod->len;
+ }
+
+- if ((c->vfs_sb->s_flags & MS_RDONLY) && !c->remounting_rw) {
++ if (c->ro_mount && !c->remounting_rw) {
+ /* Add to recovery list */
+ struct ubifs_unclean_leb *ucleb;
+
+@@ -527,16 +563,15 @@ static int fix_unclean_leb(struct ubifs_
+ }
+
+ /**
+- * drop_incomplete_group - drop nodes from an incomplete group.
++ * drop_last_group - drop the last group of nodes.
+ * @sleb: scanned LEB information
+ * @offs: offset of dropped nodes is returned here
+ *
+- * This function returns %1 if nodes are dropped and %0 otherwise.
++ * This is a helper function for 'ubifs_recover_leb()' which drops the last
++ * group of nodes of the scanned LEB.
+ */
+-static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
++static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs)
+ {
+- int dropped = 0;
+-
+ while (!list_empty(&sleb->nodes)) {
+ struct ubifs_scan_node *snod;
+ struct ubifs_ch *ch;
+@@ -545,15 +580,40 @@ static int drop_incomplete_group(struct
+ list);
+ ch = snod->node;
+ if (ch->group_type != UBIFS_IN_NODE_GROUP)
+- return dropped;
+- dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs);
++ break;
++
++ dbg_rcvry("dropping grouped node at %d:%d",
++ sleb->lnum, snod->offs);
++ *offs = snod->offs;
++ list_del(&snod->list);
++ kfree(snod);
++ sleb->nodes_cnt -= 1;
++ }
++}
++
++/**
++ * drop_last_node - drop the last node.
++ * @sleb: scanned LEB information
++ * @offs: offset of dropped nodes is returned here
++ * @grouped: non-zero if whole group of nodes have to be dropped
++ *
++ * This is a helper function for 'ubifs_recover_leb()' which drops the last
++ * node of the scanned LEB.
++ */
++static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs)
++{
++ struct ubifs_scan_node *snod;
++
++ if (!list_empty(&sleb->nodes)) {
++ snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
++ list);
++
++ dbg_rcvry("dropping last node at %d:%d", sleb->lnum, snod->offs);
+ *offs = snod->offs;
+ list_del(&snod->list);
+ kfree(snod);
+ sleb->nodes_cnt -= 1;
+- dropped = 1;
+ }
+- return dropped;
+ }
+
+ /**
+@@ -562,33 +622,30 @@ static int drop_incomplete_group(struct
+ * @lnum: LEB number
+ * @offs: offset
+ * @sbuf: LEB-sized buffer to use
+- * @grouped: nodes may be grouped for recovery
++ * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not
++ * belong to any journal head)
+ *
+ * This function does a scan of a LEB, but caters for errors that might have
+ * been caused by the unclean unmount from which we are attempting to recover.
+- *
+- * This function returns %0 on success and a negative error code on failure.
++ * Returns %0 in case of success, %-EUCLEAN if an unrecoverable corruption is
++ * found, and a negative error code in case of failure.
+ */
+ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
+- int offs, void *sbuf, int grouped)
++ int offs, void *sbuf, int jhead)
+ {
+- int err, len = c->leb_size - offs, need_clean = 0, quiet = 1;
+- int empty_chkd = 0, start = offs;
++ int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit;
++ int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped;
+ struct ubifs_scan_leb *sleb;
+ void *buf = sbuf + offs;
+
+- dbg_rcvry("%d:%d", lnum, offs);
++ dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped);
+
+ sleb = ubifs_start_scan(c, lnum, offs, sbuf);
+ if (IS_ERR(sleb))
+ return sleb;
+
+- if (sleb->ecc)
+- need_clean = 1;
+-
++ ubifs_assert(len >= 8);
+ while (len >= 8) {
+- int ret;
+-
+ dbg_scan("look at LEB %d:%d (%d bytes left)",
+ lnum, offs, len);
+
+@@ -598,8 +655,7 @@ struct ubifs_scan_leb *ubifs_recover_leb
+ * Scan quietly until there is an error from which we cannot
+ * recover
+ */
+- ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
+-
++ ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
+ if (ret == SCANNED_A_NODE) {
+ /* A valid node, and not a padding node */
+ struct ubifs_ch *ch = buf;
+@@ -612,98 +668,126 @@ struct ubifs_scan_leb *ubifs_recover_leb
+ offs += node_len;
+ buf += node_len;
+ len -= node_len;
+- continue;
+- }
+-
+- if (ret > 0) {
++ } else if (ret > 0) {
+ /* Padding bytes or a valid padding node */
+ offs += ret;
+ buf += ret;
+ len -= ret;
+- continue;
+- }
+-
+- if (ret == SCANNED_EMPTY_SPACE) {
+- if (!is_empty(buf, len)) {
+- if (!is_last_write(c, buf, offs))
+- break;
+- clean_buf(c, &buf, lnum, &offs, &len);
+- need_clean = 1;
+- }
+- empty_chkd = 1;
++ } else if (ret == SCANNED_EMPTY_SPACE ||
++ ret == SCANNED_GARBAGE ||
++ ret == SCANNED_A_BAD_PAD_NODE ||
++ ret == SCANNED_A_CORRUPT_NODE) {
++ dbg_rcvry("found corruption - %d", ret);
+ break;
++ } else {
++ dbg_err("unexpected return value %d", ret);
++ err = -EINVAL;
++ goto error;
+ }
++ }
+
+- if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE)
+- if (is_last_write(c, buf, offs)) {
+- clean_buf(c, &buf, lnum, &offs, &len);
+- need_clean = 1;
+- empty_chkd = 1;
+- break;
+- }
+-
+- if (ret == SCANNED_A_CORRUPT_NODE)
+- if (no_more_nodes(c, buf, len, lnum, offs)) {
+- clean_buf(c, &buf, lnum, &offs, &len);
+- need_clean = 1;
+- empty_chkd = 1;
+- break;
+- }
+-
+- if (quiet) {
+- /* Redo the last scan but noisily */
+- quiet = 0;
+- continue;
+- }
++ if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) {
++ if (!is_last_write(c, buf, offs))
++ goto corrupted_rescan;
++ } else if (ret == SCANNED_A_CORRUPT_NODE) {
++ if (!no_more_nodes(c, buf, len, lnum, offs))
++ goto corrupted_rescan;
++ } else if (!is_empty(buf, len)) {
++ if (!is_last_write(c, buf, offs)) {
++ int corruption = first_non_ff(buf, len);
+
+- switch (ret) {
+- case SCANNED_GARBAGE:
+- dbg_err("garbage");
+- goto corrupted;
+- case SCANNED_A_CORRUPT_NODE:
+- case SCANNED_A_BAD_PAD_NODE:
+- dbg_err("bad node");
+- goto corrupted;
+- default:
+- dbg_err("unknown");
++ /*
++ * See header comment for this file for more
++ * explanations about the reasons we have this check.
++ */
++ ubifs_err("corrupt empty space LEB %d:%d, corruption "
++ "starts at %d", lnum, offs, corruption);
++ /* Make sure we dump interesting non-0xFF data */
++ offs += corruption;
++ buf += corruption;
+ goto corrupted;
+ }
+ }
+
+- if (!empty_chkd && !is_empty(buf, len)) {
+- if (is_last_write(c, buf, offs)) {
+- clean_buf(c, &buf, lnum, &offs, &len);
+- need_clean = 1;
+- } else {
+- ubifs_err("corrupt empty space at LEB %d:%d",
+- lnum, offs);
+- goto corrupted;
+- }
+- }
++ min_io_unit = round_down(offs, c->min_io_size);
++ if (grouped)
++ /*
++ * If nodes are grouped, always drop the incomplete group at
++ * the end.
++ */
++ drop_last_group(sleb, &offs);
+
+- /* Drop nodes from incomplete group */
+- if (grouped && drop_incomplete_group(sleb, &offs)) {
+- buf = sbuf + offs;
+- len = c->leb_size - offs;
+- clean_buf(c, &buf, lnum, &offs, &len);
+- need_clean = 1;
++ if (jhead == GCHD) {
++ /*
++ * If this LEB belongs to the GC head then while we are in the
++ * middle of the same min. I/O unit keep dropping nodes. So
++ * basically, what we want is to make sure that the last min.
++ * I/O unit where we saw the corruption is dropped completely
++ * with all the uncorrupted nodes which may possibly sit there.
++ *
++ * In other words, let's name the min. I/O unit where the
++ * corruption starts B, and the previous min. I/O unit A. The
++ * below code tries to deal with a situation when half of B
++ * contains valid nodes or the end of a valid node, and the
++ * second half of B contains corrupted data or garbage. This
++ * means that UBIFS had been writing to B just before the power
++ * cut happened. I do not know how realistic is this scenario
++ * that half of the min. I/O unit had been written successfully
++ * and the other half not, but this is possible in our 'failure
++ * mode emulation' infrastructure at least.
++ *
++ * So what is the problem, why we need to drop those nodes? Why
++ * can't we just clean-up the second half of B by putting a
++ * padding node there? We can, and this works fine with one
++ * exception which was reproduced with power cut emulation
++ * testing and happens extremely rarely.
++ *
++ * Imagine the file-system is full, we run GC which starts
++ * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is
++ * the current GC head LEB). The @c->gc_lnum is -1, which means
++ * that GC will retain LEB X and will try to continue. Imagine
++ * that LEB X is currently the dirtiest LEB, and the amount of
++ * used space in LEB Y is exactly the same as amount of free
++ * space in LEB X.
++ *
++ * And a power cut happens when nodes are moved from LEB X to
++ * LEB Y. We are here trying to recover LEB Y which is the GC
++ * head LEB. We find the min. I/O unit B as described above.
++ * Then we clean-up LEB Y by padding min. I/O unit. And later
++ * 'ubifs_rcvry_gc_commit()' function fails, because it cannot
++ * find a dirty LEB which could be GC'd into LEB Y! Even LEB X
++ * does not match because the amount of valid nodes there does
++ * not fit the free space in LEB Y any more! And this is
++ * because of the padding node which we added to LEB Y. The
++ * user-visible effect of this which I once observed and
++ * analysed is that we cannot mount the file-system with
++ * -ENOSPC error.
++ *
++ * So obviously, to make sure that situation does not happen we
++ * should free min. I/O unit B in LEB Y completely and the last
++ * used min. I/O unit in LEB Y should be A. This is basically
++ * what the below code tries to do.
++ */
++ while (offs > min_io_unit)
++ drop_last_node(sleb, &offs);
+ }
+
+- if (offs % c->min_io_size) {
+- clean_buf(c, &buf, lnum, &offs, &len);
+- need_clean = 1;
+- }
++ buf = sbuf + offs;
++ len = c->leb_size - offs;
+
++ clean_buf(c, &buf, lnum, &offs, &len);
+ ubifs_end_scan(c, sleb, lnum, offs);
+
+- if (need_clean) {
+- err = fix_unclean_leb(c, sleb, start);
+- if (err)
+- goto error;
+- }
++ err = fix_unclean_leb(c, sleb, start);
++ if (err)
++ goto error;
+
+ return sleb;
+
++corrupted_rescan:
++ /* Re-scan the corrupted data with verbose messages */
++ dbg_err("corruptio %d", ret);
++ ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
+ corrupted:
+ ubifs_scanned_corruption(c, lnum, offs, buf);
+ err = -EUCLEAN;
+@@ -773,7 +857,8 @@ out_free:
+ * @sbuf: LEB-sized buffer to use
+ *
+ * This function does a scan of a LEB, but caters for errors that might have
+- * been caused by the unclean unmount from which we are attempting to recover.
++ * been caused by unclean reboots from which we are attempting to recover
++ * (assume that only the last log LEB can be corrupted by an unclean reboot).
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+@@ -792,7 +877,7 @@ struct ubifs_scan_leb *ubifs_recover_log
+ * We can only recover at the end of the log, so check that the
+ * next log LEB is empty or out of date.
+ */
+- sleb = ubifs_scan(c, next_lnum, 0, sbuf);
++ sleb = ubifs_scan(c, next_lnum, 0, sbuf, 0);
+ if (IS_ERR(sleb))
+ return sleb;
+ if (sleb->nodes_cnt) {
+@@ -819,7 +904,7 @@ struct ubifs_scan_leb *ubifs_recover_log
+ }
+ ubifs_scan_destroy(sleb);
+ }
+- return ubifs_recover_leb(c, lnum, offs, sbuf, 0);
++ return ubifs_recover_leb(c, lnum, offs, sbuf, -1);
+ }
+
+ /**
+@@ -836,12 +921,8 @@ struct ubifs_scan_leb *ubifs_recover_log
+ static int recover_head(const struct ubifs_info *c, int lnum, int offs,
+ void *sbuf)
+ {
+- int len, err, need_clean = 0;
++ int len = c->max_write_size, err;
+
+- if (c->min_io_size > 1)
+- len = c->min_io_size;
+- else
+- len = 512;
+ if (offs + len > c->leb_size)
+ len = c->leb_size - offs;
+
+@@ -850,19 +931,7 @@ static int recover_head(const struct ubi
+
+ /* Read at the head location and check it is empty flash */
+ err = ubi_read(c->ubi, lnum, sbuf, offs, len);
+- if (err)
+- need_clean = 1;
+- else {
+- uint8_t *p = sbuf;
+-
+- while (len--)
+- if (*p++ != 0xff) {
+- need_clean = 1;
+- break;
+- }
+- }
+-
+- if (need_clean) {
++ if (err || !is_empty(sbuf, len)) {
+ dbg_rcvry("cleaning head at %d:%d", lnum, offs);
+ if (offs == 0)
+ return ubifs_leb_unmap(c, lnum);
+@@ -896,7 +965,7 @@ int ubifs_recover_inl_heads(const struct
+ {
+ int err;
+
+- ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY) || c->remounting_rw);
++ ubifs_assert(!c->ro_mount || c->remounting_rw);
+
+ dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs);
+ err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf);
+@@ -1038,6 +1107,53 @@ int ubifs_clean_lebs(const struct ubifs_
+ }
+
+ /**
++ * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit.
++ * @c: UBIFS file-system description object
++ *
++ * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty
++ * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns
++ * zero in case of success and a negative error code in case of failure.
++ */
++static int grab_empty_leb(struct ubifs_info *c)
++{
++ int lnum, err;
++
++ /*
++ * Note, it is very important to first search for an empty LEB and then
++ * run the commit, not vice-versa. The reason is that there might be
++ * only one empty LEB at the moment, the one which has been the
++ * @c->gc_lnum just before the power cut happened. During the regular
++ * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no
++ * one but GC can grab it. But at this moment this single empty LEB is
++ * not marked as taken, so if we run commit - what happens? Right, the
++ * commit will grab it and write the index there. Remember that the
++ * index always expands as long as there is free space, and it only
++ * starts consolidating when we run out of space.
++ *
++ * IOW, if we run commit now, we might not be able to find a free LEB
++ * after this.
++ */
++ lnum = ubifs_find_free_leb_for_idx(c);
++ if (lnum < 0) {
++ dbg_err("could not find an empty LEB");
++ dbg_dump_lprops(c);
++ dbg_dump_budg(c, &c->bi);
++ return lnum;
++ }
++
++ /* Reset the index flag */
++ err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
++ LPROPS_INDEX, 0);
++ if (err)
++ return err;
++
++ c->gc_lnum = lnum;
++ dbg_rcvry("found empty LEB %d, run commit", lnum);
++
++ return ubifs_run_commit(c);
++}
++
++/**
+ * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
+ * @c: UBIFS file-system description object
+ *
+@@ -1059,58 +1175,26 @@ int ubifs_rcvry_gc_commit(struct ubifs_i
+ {
+ struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
+ struct ubifs_lprops lp;
+- int lnum, err;
++ int err;
++
++ dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs);
+
+ c->gc_lnum = -1;
+- if (wbuf->lnum == -1) {
+- dbg_rcvry("no GC head LEB");
+- goto find_free;
+- }
+- /*
+- * See whether the used space in the dirtiest LEB fits in the GC head
+- * LEB.
+- */
+- if (wbuf->offs == c->leb_size) {
+- dbg_rcvry("no room in GC head LEB");
+- goto find_free;
+- }
++ if (wbuf->lnum == -1 || wbuf->offs == c->leb_size)
++ return grab_empty_leb(c);
++
+ err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
+ if (err) {
+- if (err == -ENOSPC)
+- dbg_err("could not find a dirty LEB");
+- return err;
+- }
+- ubifs_assert(!(lp.flags & LPROPS_INDEX));
+- lnum = lp.lnum;
+- if (lp.free + lp.dirty == c->leb_size) {
+- /* An empty LEB was returned */
+- if (lp.free != c->leb_size) {
+- err = ubifs_change_one_lp(c, lnum, c->leb_size,
+- 0, 0, 0, 0);
+- if (err)
+- return err;
+- }
+- err = ubifs_leb_unmap(c, lnum);
+- if (err)
++ if (err != -ENOSPC)
+ return err;
+- c->gc_lnum = lnum;
+- dbg_rcvry("allocated LEB %d for GC", lnum);
+- /* Run the commit */
+- dbg_rcvry("committing");
+- return ubifs_run_commit(c);
+- }
+- /*
+- * There was no empty LEB so the used space in the dirtiest LEB must fit
+- * in the GC head LEB.
+- */
+- if (lp.free + lp.dirty < wbuf->offs) {
+- dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d",
+- lnum, wbuf->lnum, wbuf->offs);
+- err = ubifs_return_leb(c, lnum);
+- if (err)
+- return err;
+- goto find_free;
++
++ dbg_rcvry("could not find a dirty LEB");
++ return grab_empty_leb(c);
+ }
++
++ ubifs_assert(!(lp.flags & LPROPS_INDEX));
++ ubifs_assert(lp.free + lp.dirty >= wbuf->offs);
++
+ /*
+ * We run the commit before garbage collection otherwise subsequent
+ * mounts will see the GC and orphan deletion in a different order.
+@@ -1119,11 +1203,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_i
+ err = ubifs_run_commit(c);
+ if (err)
+ return err;
+- /*
+- * The data in the dirtiest LEB fits in the GC head LEB, so do the GC
+- * - use locking to keep 'ubifs_assert()' happy.
+- */
+- dbg_rcvry("GC'ing LEB %d", lnum);
++
++ dbg_rcvry("GC'ing LEB %d", lp.lnum);
+ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
+ err = ubifs_garbage_collect_leb(c, &lp);
+ if (err >= 0) {
+@@ -1139,37 +1220,17 @@ int ubifs_rcvry_gc_commit(struct ubifs_i
+ err = -EINVAL;
+ return err;
+ }
+- if (err != LEB_RETAINED) {
+- dbg_err("GC returned %d", err);
++
++ ubifs_assert(err == LEB_RETAINED);
++ if (err != LEB_RETAINED)
+ return -EINVAL;
+- }
++
+ err = ubifs_leb_unmap(c, c->gc_lnum);
+ if (err)
+ return err;
+- dbg_rcvry("allocated LEB %d for GC", lnum);
+- return 0;
+
+-find_free:
+- /*
+- * There is no GC head LEB or the free space in the GC head LEB is too
+- * small. Allocate gc_lnum by calling 'ubifs_find_free_leb_for_idx()' so
+- * GC is not run.
+- */
+- lnum = ubifs_find_free_leb_for_idx(c);
+- if (lnum < 0) {
+- dbg_err("could not find an empty LEB");
+- return lnum;
+- }
+- /* And reset the index flag */
+- err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
+- LPROPS_INDEX, 0);
+- if (err)
+- return err;
+- c->gc_lnum = lnum;
+- dbg_rcvry("allocated LEB %d for GC", lnum);
+- /* Run the commit */
+- dbg_rcvry("committing");
+- return ubifs_run_commit(c);
++ dbg_rcvry("allocated LEB %d for GC", lp.lnum);
++ return 0;
+ }
+
+ /**
+@@ -1411,7 +1472,7 @@ static int fix_size_in_place(struct ubif
+ err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
+ if (err)
+ goto out;
+- dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ",
++ dbg_rcvry("inode %lu at %d:%d size %lld -> %lld",
+ (unsigned long)e->inum, lnum, offs, i_size, e->d_size);
+ return 0;
+
+@@ -1460,20 +1521,27 @@ int ubifs_recover_size(struct ubifs_info
+ e->i_size = le64_to_cpu(ino->size);
+ }
+ }
++
+ if (e->exists && e->i_size < e->d_size) {
+- if (!e->inode && (c->vfs_sb->s_flags & MS_RDONLY)) {
++ if (c->ro_mount) {
+ /* Fix the inode size and pin it in memory */
+ struct inode *inode;
++ struct ubifs_inode *ui;
++
++ ubifs_assert(!e->inode);
+
+ inode = ubifs_iget(c->vfs_sb, e->inum);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
++
++ ui = ubifs_inode(inode);
+ if (inode->i_size < e->d_size) {
+ dbg_rcvry("ino %lu size %lld -> %lld",
+ (unsigned long)e->inum,
+- e->d_size, inode->i_size);
++ inode->i_size, e->d_size);
+ inode->i_size = e->d_size;
+- ubifs_inode(inode)->ui_size = e->d_size;
++ ui->ui_size = e->d_size;
++ ui->synced_i_size = e->d_size;
+ e->inode = inode;
+ this = rb_next(this);
+ continue;
+@@ -1488,9 +1556,11 @@ int ubifs_recover_size(struct ubifs_info
+ iput(e->inode);
+ }
+ }
++
+ this = rb_next(this);
+ rb_erase(&e->rb, &c->size_tree);
+ kfree(e);
+ }
++
+ return 0;
+ }
+diff -uprN linux-2.6.28/fs/ubifs/replay.c ubifs-v2.6.28/fs/ubifs/replay.c
+--- linux-2.6.28/fs/ubifs/replay.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/replay.c 2011-06-15 14:22:09.000000000 -0400
+@@ -33,43 +33,32 @@
+ */
+
+ #include "ubifs.h"
+-
+-/*
+- * Replay flags.
+- *
+- * REPLAY_DELETION: node was deleted
+- * REPLAY_REF: node is a reference node
+- */
+-enum {
+- REPLAY_DELETION = 1,
+- REPLAY_REF = 2,
+-};
++#include <linux/list_sort.h>
+
+ /**
+- * struct replay_entry - replay tree entry.
++ * struct replay_entry - replay list entry.
+ * @lnum: logical eraseblock number of the node
+ * @offs: node offset
+ * @len: node length
++ * @deletion: non-zero if this entry corresponds to a node deletion
+ * @sqnum: node sequence number
+- * @flags: replay flags
+- * @rb: links the replay tree
++ * @list: links the replay list
+ * @key: node key
+ * @nm: directory entry name
+ * @old_size: truncation old size
+ * @new_size: truncation new size
+- * @free: amount of free space in a bud
+- * @dirty: amount of dirty space in a bud from padding and deletion nodes
+ *
+- * UBIFS journal replay must compare node sequence numbers, which means it must
+- * build a tree of node information to insert into the TNC.
++ * The replay process first scans all buds and builds the replay list, then
++ * sorts the replay list in nodes sequence number order, and then inserts all
++ * the replay entries to the TNC.
+ */
+ struct replay_entry {
+ int lnum;
+ int offs;
+ int len;
++ unsigned int deletion:1;
+ unsigned long long sqnum;
+- int flags;
+- struct rb_node rb;
++ struct list_head list;
+ union ubifs_key key;
+ union {
+ struct qstr nm;
+@@ -77,10 +66,6 @@ struct replay_entry {
+ loff_t old_size;
+ loff_t new_size;
+ };
+- struct {
+- int free;
+- int dirty;
+- };
+ };
+ };
+
+@@ -88,83 +73,117 @@ struct replay_entry {
+ * struct bud_entry - entry in the list of buds to replay.
+ * @list: next bud in the list
+ * @bud: bud description object
+- * @free: free bytes in the bud
+ * @sqnum: reference node sequence number
++ * @free: free bytes in the bud
++ * @dirty: dirty bytes in the bud
+ */
+ struct bud_entry {
+ struct list_head list;
+ struct ubifs_bud *bud;
+- int free;
+ unsigned long long sqnum;
++ int free;
++ int dirty;
+ };
+
+ /**
+ * set_bud_lprops - set free and dirty space used by a bud.
+ * @c: UBIFS file-system description object
+- * @r: replay entry of bud
++ * @b: bud entry which describes the bud
++ *
++ * This function makes sure the LEB properties of bud @b are set correctly
++ * after the replay. Returns zero in case of success and a negative error code
++ * in case of failure.
+ */
+-static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
++static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b)
+ {
+ const struct ubifs_lprops *lp;
+ int err = 0, dirty;
+
+ ubifs_get_lprops(c);
+
+- lp = ubifs_lpt_lookup_dirty(c, r->lnum);
++ lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum);
+ if (IS_ERR(lp)) {
+ err = PTR_ERR(lp);
+ goto out;
+ }
+
+ dirty = lp->dirty;
+- if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
++ if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
+ /*
+ * The LEB was added to the journal with a starting offset of
+ * zero which means the LEB must have been empty. The LEB
+- * property values should be lp->free == c->leb_size and
+- * lp->dirty == 0, but that is not the case. The reason is that
+- * the LEB was garbage collected. The garbage collector resets
+- * the free and dirty space without recording it anywhere except
+- * lprops, so if there is not a commit then lprops does not have
+- * that information next time the file system is mounted.
++ * property values should be @lp->free == @c->leb_size and
++ * @lp->dirty == 0, but that is not the case. The reason is that
++ * the LEB had been garbage collected before it became the bud,
++ * and there was not commit inbetween. The garbage collector
++ * resets the free and dirty space without recording it
++ * anywhere except lprops, so if there was no commit then
++ * lprops does not have that information.
+ *
+ * We do not need to adjust free space because the scan has told
+ * us the exact value which is recorded in the replay entry as
+- * r->free.
++ * @b->free.
+ *
+ * However we do need to subtract from the dirty space the
+ * amount of space that the garbage collector reclaimed, which
+ * is the whole LEB minus the amount of space that was free.
+ */
+- dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
++ dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
+ lp->free, lp->dirty);
+- dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
++ dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
+ lp->free, lp->dirty);
+ dirty -= c->leb_size - lp->free;
+ /*
+ * If the replay order was perfect the dirty space would now be
+- * zero. The order is not perfect because the the journal heads
++ * zero. The order is not perfect because the journal heads
+ * race with each other. This is not a problem but is does mean
+ * that the dirty space may temporarily exceed c->leb_size
+ * during the replay.
+ */
+ if (dirty != 0)
+ dbg_msg("LEB %d lp: %d free %d dirty "
+- "replay: %d free %d dirty", r->lnum, lp->free,
+- lp->dirty, r->free, r->dirty);
++ "replay: %d free %d dirty", b->bud->lnum,
++ lp->free, lp->dirty, b->free, b->dirty);
+ }
+- lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty,
++ lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty,
+ lp->flags | LPROPS_TAKEN, 0);
+ if (IS_ERR(lp)) {
+ err = PTR_ERR(lp);
+ goto out;
+ }
++
++ /* Make sure the journal head points to the latest bud */
++ err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf,
++ b->bud->lnum, c->leb_size - b->free,
++ UBI_SHORTTERM);
++
+ out:
+ ubifs_release_lprops(c);
+ return err;
+ }
+
+ /**
++ * set_buds_lprops - set free and dirty space for all replayed buds.
++ * @c: UBIFS file-system description object
++ *
++ * This function sets LEB properties for all replayed buds. Returns zero in
++ * case of success and a negative error code in case of failure.
++ */
++static int set_buds_lprops(struct ubifs_info *c)
++{
++ struct bud_entry *b;
++ int err;
++
++ list_for_each_entry(b, &c->replay_buds, list) {
++ err = set_bud_lprops(c, b);
++ if (err)
++ return err;
++ }
++
++ return 0;
++}
++
++/**
+ * trun_remove_range - apply a replay entry for a truncation to the TNC.
+ * @c: UBIFS file-system description object
+ * @r: replay entry of truncation
+@@ -200,24 +219,22 @@ static int trun_remove_range(struct ubif
+ */
+ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
+ {
+- int err, deletion = ((r->flags & REPLAY_DELETION) != 0);
++ int err;
+
+- dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum,
+- r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key));
++ dbg_mnt("LEB %d:%d len %d deletion %d sqnum %llu %s", r->lnum,
++ r->offs, r->len, r->deletion, r->sqnum, DBGKEY(&r->key));
+
+ /* Set c->replay_sqnum to help deal with dangling branches. */
+ c->replay_sqnum = r->sqnum;
+
+- if (r->flags & REPLAY_REF)
+- err = set_bud_lprops(c, r);
+- else if (is_hash_key(c, &r->key)) {
+- if (deletion)
++ if (is_hash_key(c, &r->key)) {
++ if (r->deletion)
+ err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
+ else
+ err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
+ r->len, &r->nm);
+ } else {
+- if (deletion)
++ if (r->deletion)
+ switch (key_type(c, &r->key)) {
+ case UBIFS_INO_KEY:
+ {
+@@ -240,7 +257,7 @@ static int apply_replay_entry(struct ubi
+ return err;
+
+ if (c->need_recovery)
+- err = ubifs_recover_size_accum(c, &r->key, deletion,
++ err = ubifs_recover_size_accum(c, &r->key, r->deletion,
+ r->new_size);
+ }
+
+@@ -248,68 +265,77 @@ static int apply_replay_entry(struct ubi
+ }
+
+ /**
+- * destroy_replay_tree - destroy the replay.
+- * @c: UBIFS file-system description object
++ * replay_entries_cmp - compare 2 replay entries.
++ * @priv: UBIFS file-system description object
++ * @a: first replay entry
++ * @a: second replay entry
+ *
+- * Destroy the replay tree.
++ * This is a comparios function for 'list_sort()' which compares 2 replay
++ * entries @a and @b by comparing their sequence numer. Returns %1 if @a has
++ * greater sequence number and %-1 otherwise.
+ */
+-static void destroy_replay_tree(struct ubifs_info *c)
++static int replay_entries_cmp(void *priv, struct list_head *a,
++ struct list_head *b)
+ {
+- struct rb_node *this = c->replay_tree.rb_node;
+- struct replay_entry *r;
++ struct replay_entry *ra, *rb;
+
+- while (this) {
+- if (this->rb_left) {
+- this = this->rb_left;
+- continue;
+- } else if (this->rb_right) {
+- this = this->rb_right;
+- continue;
+- }
+- r = rb_entry(this, struct replay_entry, rb);
+- this = rb_parent(this);
+- if (this) {
+- if (this->rb_left == &r->rb)
+- this->rb_left = NULL;
+- else
+- this->rb_right = NULL;
+- }
+- if (is_hash_key(c, &r->key))
+- kfree(r->nm.name);
+- kfree(r);
+- }
+- c->replay_tree = RB_ROOT;
++ cond_resched();
++ if (a == b)
++ return 0;
++
++ ra = list_entry(a, struct replay_entry, list);
++ rb = list_entry(b, struct replay_entry, list);
++ ubifs_assert(ra->sqnum != rb->sqnum);
++ if (ra->sqnum > rb->sqnum)
++ return 1;
++ return -1;
+ }
+
+ /**
+- * apply_replay_tree - apply the replay tree to the TNC.
++ * apply_replay_list - apply the replay list to the TNC.
+ * @c: UBIFS file-system description object
+ *
+- * Apply the replay tree.
+- * Returns zero in case of success and a negative error code in case of
+- * failure.
++ * Apply all entries in the replay list to the TNC. Returns zero in case of
++ * success and a negative error code in case of failure.
+ */
+-static int apply_replay_tree(struct ubifs_info *c)
++static int apply_replay_list(struct ubifs_info *c)
+ {
+- struct rb_node *this = rb_first(&c->replay_tree);
++ struct replay_entry *r;
++ int err;
+
+- while (this) {
+- struct replay_entry *r;
+- int err;
++ list_sort(c, &c->replay_list, &replay_entries_cmp);
+
++ list_for_each_entry(r, &c->replay_list, list) {
+ cond_resched();
+
+- r = rb_entry(this, struct replay_entry, rb);
+ err = apply_replay_entry(c, r);
+ if (err)
+ return err;
+- this = rb_next(this);
+ }
++
+ return 0;
+ }
+
+ /**
+- * insert_node - insert a node to the replay tree.
++ * destroy_replay_list - destroy the replay.
++ * @c: UBIFS file-system description object
++ *
++ * Destroy the replay list.
++ */
++static void destroy_replay_list(struct ubifs_info *c)
++{
++ struct replay_entry *r, *tmp;
++
++ list_for_each_entry_safe(r, tmp, &c->replay_list, list) {
++ if (is_hash_key(c, &r->key))
++ kfree(r->nm.name);
++ list_del(&r->list);
++ kfree(r);
++ }
++}
++
++/**
++ * insert_node - insert a node to the replay list
+ * @c: UBIFS file-system description object
+ * @lnum: node logical eraseblock number
+ * @offs: node offset
+@@ -321,39 +347,25 @@ static int apply_replay_tree(struct ubif
+ * @old_size: truncation old size
+ * @new_size: truncation new size
+ *
+- * This function inserts a scanned non-direntry node to the replay tree. The
+- * replay tree is an RB-tree containing @struct replay_entry elements which are
+- * indexed by the sequence number. The replay tree is applied at the very end
+- * of the replay process. Since the tree is sorted in sequence number order,
+- * the older modifications are applied first. This function returns zero in
+- * case of success and a negative error code in case of failure.
++ * This function inserts a scanned non-direntry node to the replay list. The
++ * replay list contains @struct replay_entry elements, and we sort this list in
++ * sequence number order before applying it. The replay list is applied at the
++ * very end of the replay process. Since the list is sorted in sequence number
++ * order, the older modifications are applied first. This function returns zero
++ * in case of success and a negative error code in case of failure.
+ */
+ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
+ union ubifs_key *key, unsigned long long sqnum,
+ int deletion, int *used, loff_t old_size,
+ loff_t new_size)
+ {
+- struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
+ struct replay_entry *r;
+
++ dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
++
+ if (key_inum(c, key) >= c->highest_inum)
+ c->highest_inum = key_inum(c, key);
+
+- dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
+- while (*p) {
+- parent = *p;
+- r = rb_entry(parent, struct replay_entry, rb);
+- if (sqnum < r->sqnum) {
+- p = &(*p)->rb_left;
+- continue;
+- } else if (sqnum > r->sqnum) {
+- p = &(*p)->rb_right;
+- continue;
+- }
+- ubifs_err("duplicate sqnum in replay");
+- return -EINVAL;
+- }
+-
+ r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
+ if (!r)
+ return -ENOMEM;
+@@ -363,19 +375,18 @@ static int insert_node(struct ubifs_info
+ r->lnum = lnum;
+ r->offs = offs;
+ r->len = len;
++ r->deletion = !!deletion;
+ r->sqnum = sqnum;
+- r->flags = (deletion ? REPLAY_DELETION : 0);
++ key_copy(c, key, &r->key);
+ r->old_size = old_size;
+ r->new_size = new_size;
+- key_copy(c, key, &r->key);
+
+- rb_link_node(&r->rb, parent, p);
+- rb_insert_color(&r->rb, &c->replay_tree);
++ list_add_tail(&r->list, &c->replay_list);
+ return 0;
+ }
+
+ /**
+- * insert_dent - insert a directory entry node into the replay tree.
++ * insert_dent - insert a directory entry node into the replay list.
+ * @c: UBIFS file-system description object
+ * @lnum: node logical eraseblock number
+ * @offs: node offset
+@@ -387,43 +398,25 @@ static int insert_node(struct ubifs_info
+ * @deletion: non-zero if this is a deletion
+ * @used: number of bytes in use in a LEB
+ *
+- * This function inserts a scanned directory entry node to the replay tree.
+- * Returns zero in case of success and a negative error code in case of
+- * failure.
+- *
+- * This function is also used for extended attribute entries because they are
+- * implemented as directory entry nodes.
++ * This function inserts a scanned directory entry node or an extended
++ * attribute entry to the replay list. Returns zero in case of success and a
++ * negative error code in case of failure.
+ */
+ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
+ union ubifs_key *key, const char *name, int nlen,
+ unsigned long long sqnum, int deletion, int *used)
+ {
+- struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
+ struct replay_entry *r;
+ char *nbuf;
+
++ dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
+ if (key_inum(c, key) >= c->highest_inum)
+ c->highest_inum = key_inum(c, key);
+
+- dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
+- while (*p) {
+- parent = *p;
+- r = rb_entry(parent, struct replay_entry, rb);
+- if (sqnum < r->sqnum) {
+- p = &(*p)->rb_left;
+- continue;
+- }
+- if (sqnum > r->sqnum) {
+- p = &(*p)->rb_right;
+- continue;
+- }
+- ubifs_err("duplicate sqnum in replay");
+- return -EINVAL;
+- }
+-
+ r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
+ if (!r)
+ return -ENOMEM;
++
+ nbuf = kmalloc(nlen + 1, GFP_KERNEL);
+ if (!nbuf) {
+ kfree(r);
+@@ -435,17 +428,15 @@ static int insert_dent(struct ubifs_info
+ r->lnum = lnum;
+ r->offs = offs;
+ r->len = len;
++ r->deletion = !!deletion;
+ r->sqnum = sqnum;
++ key_copy(c, key, &r->key);
+ r->nm.len = nlen;
+ memcpy(nbuf, name, nlen);
+ nbuf[nlen] = '\0';
+ r->nm.name = nbuf;
+- r->flags = (deletion ? REPLAY_DELETION : 0);
+- key_copy(c, key, &r->key);
+
+- ubifs_assert(!*p);
+- rb_link_node(&r->rb, parent, p);
+- rb_insert_color(&r->rb, &c->replay_tree);
++ list_add_tail(&r->list, &c->replay_list);
+ return 0;
+ }
+
+@@ -482,31 +473,93 @@ int ubifs_validate_entry(struct ubifs_in
+ }
+
+ /**
++ * is_last_bud - check if the bud is the last in the journal head.
++ * @c: UBIFS file-system description object
++ * @bud: bud description object
++ *
++ * This function checks if bud @bud is the last bud in its journal head. This
++ * information is then used by 'replay_bud()' to decide whether the bud can
++ * have corruptions or not. Indeed, only last buds can be corrupted by power
++ * cuts. Returns %1 if this is the last bud, and %0 if not.
++ */
++static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
++{
++ struct ubifs_jhead *jh = &c->jheads[bud->jhead];
++ struct ubifs_bud *next;
++ uint32_t data;
++ int err;
++
++ if (list_is_last(&bud->list, &jh->buds_list))
++ return 1;
++
++ /*
++ * The following is a quirk to make sure we work correctly with UBIFS
++ * images used with older UBIFS.
++ *
++ * Normally, the last bud will be the last in the journal head's list
++ * of bud. However, there is one exception if the UBIFS image belongs
++ * to older UBIFS. This is fairly unlikely: one would need to use old
++ * UBIFS, then have a power cut exactly at the right point, and then
++ * try to mount this image with new UBIFS.
++ *
++ * The exception is: it is possible to have 2 buds A and B, A goes
++ * before B, and B is the last, bud B is contains no data, and bud A is
++ * corrupted at the end. The reason is that in older versions when the
++ * journal code switched the next bud (from A to B), it first added a
++ * log reference node for the new bud (B), and only after this it
++ * synchronized the write-buffer of current bud (A). But later this was
++ * changed and UBIFS started to always synchronize the write-buffer of
++ * the bud (A) before writing the log reference for the new bud (B).
++ *
++ * But because older UBIFS always synchronized A's write-buffer before
++ * writing to B, we can recognize this exceptional situation but
++ * checking the contents of bud B - if it is empty, then A can be
++ * treated as the last and we can recover it.
++ *
++ * TODO: remove this piece of code in a couple of years (today it is
++ * 16.05.2011).
++ */
++ next = list_entry(bud->list.next, struct ubifs_bud, list);
++ if (!list_is_last(&next->list, &jh->buds_list))
++ return 0;
++
++ err = ubi_read(c->ubi, next->lnum, (char *)&data,
++ next->start, 4);
++ if (err)
++ return 0;
++
++ return data == 0xFFFFFFFF;
++}
++
++/**
+ * replay_bud - replay a bud logical eraseblock.
+ * @c: UBIFS file-system description object
+- * @lnum: bud logical eraseblock number to replay
+- * @offs: bud start offset
+- * @jhead: journal head to which this bud belongs
+- * @free: amount of free space in the bud is returned here
+- * @dirty: amount of dirty space from padding and deletion nodes is returned
+- * here
++ * @b: bud entry which describes the bud
+ *
+- * This function returns zero in case of success and a negative error code in
+- * case of failure.
++ * This function replays bud @bud, recovers it if needed, and adds all nodes
++ * from this bud to the replay list. Returns zero in case of success and a
++ * negative error code in case of failure.
+ */
+-static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
+- int *free, int *dirty)
++static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
+ {
+- int err = 0, used = 0;
++ int is_last = is_last_bud(c, b->bud);
++ int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start;
+ struct ubifs_scan_leb *sleb;
+ struct ubifs_scan_node *snod;
+- struct ubifs_bud *bud;
+
+- dbg_mnt("replay bud LEB %d, head %d", lnum, jhead);
+- if (c->need_recovery)
+- sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD);
++ dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d",
++ lnum, b->bud->jhead, offs, is_last);
++
++ if (c->need_recovery && is_last)
++ /*
++ * Recover only last LEBs in the journal heads, because power
++ * cuts may cause corruptions only in these LEBs, because only
++ * these LEBs could possibly be written to at the power cut
++ * time.
++ */
++ sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead);
+ else
+- sleb = ubifs_scan(c, lnum, offs, c->sbuf);
++ sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
+ if (IS_ERR(sleb))
+ return PTR_ERR(sleb);
+
+@@ -620,20 +673,13 @@ static int replay_bud(struct ubifs_info
+ goto out;
+ }
+
+- bud = ubifs_search_bud(c, lnum);
+- if (!bud)
+- BUG();
+-
++ ubifs_assert(ubifs_search_bud(c, lnum));
+ ubifs_assert(sleb->endpt - offs >= used);
+ ubifs_assert(sleb->endpt % c->min_io_size == 0);
+
+- if (sleb->endpt + c->min_io_size <= c->leb_size &&
+- !(c->vfs_sb->s_flags & MS_RDONLY))
+- err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum,
+- sleb->endpt, UBI_SHORTTERM);
+-
+- *dirty = sleb->endpt - offs - used;
+- *free = c->leb_size - sleb->endpt;
++ b->dirty = sleb->endpt - offs - used;
++ b->free = c->leb_size - sleb->endpt;
++ dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, b->dirty, b->free);
+
+ out:
+ ubifs_scan_destroy(sleb);
+@@ -647,55 +693,6 @@ out_dump:
+ }
+
+ /**
+- * insert_ref_node - insert a reference node to the replay tree.
+- * @c: UBIFS file-system description object
+- * @lnum: node logical eraseblock number
+- * @offs: node offset
+- * @sqnum: sequence number
+- * @free: amount of free space in bud
+- * @dirty: amount of dirty space from padding and deletion nodes
+- *
+- * This function inserts a reference node to the replay tree and returns zero
+- * in case of success or a negative error code in case of failure.
+- */
+-static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
+- unsigned long long sqnum, int free, int dirty)
+-{
+- struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
+- struct replay_entry *r;
+-
+- dbg_mnt("add ref LEB %d:%d", lnum, offs);
+- while (*p) {
+- parent = *p;
+- r = rb_entry(parent, struct replay_entry, rb);
+- if (sqnum < r->sqnum) {
+- p = &(*p)->rb_left;
+- continue;
+- } else if (sqnum > r->sqnum) {
+- p = &(*p)->rb_right;
+- continue;
+- }
+- ubifs_err("duplicate sqnum in replay tree");
+- return -EINVAL;
+- }
+-
+- r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
+- if (!r)
+- return -ENOMEM;
+-
+- r->lnum = lnum;
+- r->offs = offs;
+- r->sqnum = sqnum;
+- r->flags = REPLAY_REF;
+- r->free = free;
+- r->dirty = dirty;
+-
+- rb_link_node(&r->rb, parent, p);
+- rb_insert_color(&r->rb, &c->replay_tree);
+- return 0;
+-}
+-
+-/**
+ * replay_buds - replay all buds.
+ * @c: UBIFS file-system description object
+ *
+@@ -705,17 +702,16 @@ static int insert_ref_node(struct ubifs_
+ static int replay_buds(struct ubifs_info *c)
+ {
+ struct bud_entry *b;
+- int err, uninitialized_var(free), uninitialized_var(dirty);
++ int err;
++ unsigned long long prev_sqnum = 0;
+
+ list_for_each_entry(b, &c->replay_buds, list) {
+- err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead,
+- &free, &dirty);
+- if (err)
+- return err;
+- err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum,
+- free, dirty);
++ err = replay_bud(c, b);
+ if (err)
+ return err;
++
++ ubifs_assert(b->sqnum > prev_sqnum);
++ prev_sqnum = b->sqnum;
+ }
+
+ return 0;
+@@ -836,10 +832,16 @@ static int replay_log_leb(struct ubifs_i
+ const struct ubifs_cs_node *node;
+
+ dbg_mnt("replay log LEB %d:%d", lnum, offs);
+- sleb = ubifs_scan(c, lnum, offs, sbuf);
++ sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery);
+ if (IS_ERR(sleb)) {
+- if (c->need_recovery)
+- sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
++ if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery)
++ return PTR_ERR(sleb);
++ /*
++ * Note, the below function will recover this log LEB only if
++ * it is the last, because unclean reboots can possibly corrupt
++ * only the tail of the log.
++ */
++ sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
+ if (IS_ERR(sleb))
+ return PTR_ERR(sleb);
+ }
+@@ -850,7 +852,6 @@ static int replay_log_leb(struct ubifs_i
+ }
+
+ node = sleb->buf;
+-
+ snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
+ if (c->cs_sqnum == 0) {
+ /*
+@@ -897,7 +898,6 @@ static int replay_log_leb(struct ubifs_i
+ }
+
+ list_for_each_entry(snod, &sleb->nodes, list) {
+-
+ cond_resched();
+
+ if (snod->sqnum >= SQNUM_WATERMARK) {
+@@ -1010,7 +1010,6 @@ out:
+ int ubifs_replay_journal(struct ubifs_info *c)
+ {
+ int err, i, lnum, offs, free;
+- void *sbuf = NULL;
+
+ BUILD_BUG_ON(UBIFS_TRUN_KEY > 5);
+
+@@ -1025,14 +1024,8 @@ int ubifs_replay_journal(struct ubifs_in
+ return -EINVAL;
+ }
+
+- sbuf = vmalloc(c->leb_size);
+- if (!sbuf)
+- return -ENOMEM;
+-
+ dbg_mnt("start replaying the journal");
+-
+ c->replaying = 1;
+-
+ lnum = c->ltail_lnum = c->lhead_lnum;
+ offs = c->lhead_offs;
+
+@@ -1045,7 +1038,7 @@ int ubifs_replay_journal(struct ubifs_in
+ lnum = UBIFS_LOG_LNUM;
+ offs = 0;
+ }
+- err = replay_log_leb(c, lnum, offs, sbuf);
++ err = replay_log_leb(c, lnum, offs, c->sbuf);
+ if (err == 1)
+ /* We hit the end of the log */
+ break;
+@@ -1058,27 +1051,30 @@ int ubifs_replay_journal(struct ubifs_in
+ if (err)
+ goto out;
+
+- err = apply_replay_tree(c);
++ err = apply_replay_list(c);
++ if (err)
++ goto out;
++
++ err = set_buds_lprops(c);
+ if (err)
+ goto out;
+
+ /*
+- * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable
+- * to roughly estimate index growth. Things like @c->min_idx_lebs
++ * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable
++ * to roughly estimate index growth. Things like @c->bi.min_idx_lebs
+ * depend on it. This means we have to initialize it to make sure
+ * budgeting works properly.
+ */
+- c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
+- c->budg_uncommitted_idx *= c->max_idx_node_sz;
++ c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
++ c->bi.uncommitted_idx *= c->max_idx_node_sz;
+
+ ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
+ dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
+ "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
+ (unsigned long)c->highest_inum);
+ out:
+- destroy_replay_tree(c);
++ destroy_replay_list(c);
+ destroy_bud_list(c);
+- vfree(sbuf);
+ c->replaying = 0;
+ return err;
+ }
+diff -uprN linux-2.6.28/fs/ubifs/sb.c ubifs-v2.6.28/fs/ubifs/sb.c
+--- linux-2.6.28/fs/ubifs/sb.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/sb.c 2011-06-15 14:22:09.000000000 -0400
+@@ -181,12 +181,9 @@ static int create_default_filesystem(str
+ sup->lsave_cnt = cpu_to_le32(c->lsave_cnt);
+ sup->fmt_version = cpu_to_le32(UBIFS_FORMAT_VERSION);
+ sup->time_gran = cpu_to_le32(DEFAULT_TIME_GRAN);
+- if (c->mount_opts.override_compr) {
+- if (c->mount_opts.compr_type == UBIFS_COMPR_LZO999)
+- sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO);
+- else
+- sup->default_compr = cpu_to_le16(c->mount_opts.compr_type);
+- } else
++ if (c->mount_opts.override_compr)
++ sup->default_compr = cpu_to_le16(c->mount_opts.compr_type);
++ else
+ sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO);
+
+ generate_random_uuid(sup->uuid);
+@@ -196,6 +193,7 @@ static int create_default_filesystem(str
+ if (tmp64 > DEFAULT_MAX_RP_SIZE)
+ tmp64 = DEFAULT_MAX_RP_SIZE;
+ sup->rp_size = cpu_to_le64(tmp64);
++ sup->ro_compat_version = cpu_to_le32(UBIFS_RO_COMPAT_VERSION);
+
+ err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM);
+ kfree(sup);
+@@ -476,7 +474,8 @@ failed:
+ * @c: UBIFS file-system description object
+ *
+ * This function returns a pointer to the superblock node or a negative error
+- * code.
++ * code. Note, the user of this function is responsible of kfree()'ing the
++ * returned superblock buffer.
+ */
+ struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
+ {
+@@ -535,17 +534,36 @@ int ubifs_read_superblock(struct ubifs_i
+ if (IS_ERR(sup))
+ return PTR_ERR(sup);
+
++ c->fmt_version = le32_to_cpu(sup->fmt_version);
++ c->ro_compat_version = le32_to_cpu(sup->ro_compat_version);
++
+ /*
+ * The software supports all previous versions but not future versions,
+ * due to the unavailability of time-travelling equipment.
+ */
+- c->fmt_version = le32_to_cpu(sup->fmt_version);
+ if (c->fmt_version > UBIFS_FORMAT_VERSION) {
+- ubifs_err("on-flash format version is %d, but software only "
+- "supports up to version %d", c->fmt_version,
+- UBIFS_FORMAT_VERSION);
+- err = -EINVAL;
+- goto out;
++ ubifs_assert(!c->ro_media || c->ro_mount);
++ if (!c->ro_mount ||
++ c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) {
++ ubifs_err("on-flash format version is w%d/r%d, but "
++ "software only supports up to version "
++ "w%d/r%d", c->fmt_version,
++ c->ro_compat_version, UBIFS_FORMAT_VERSION,
++ UBIFS_RO_COMPAT_VERSION);
++ if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) {
++ ubifs_msg("only R/O mounting is possible");
++ err = -EROFS;
++ } else
++ err = -EINVAL;
++ goto out;
++ }
++
++ /*
++ * The FS is mounted R/O, and the media format is
++ * R/O-compatible with the UBIFS implementation, so we can
++ * mount.
++ */
++ c->rw_incompat = 1;
+ }
+
+ if (c->fmt_version < 3) {
+@@ -598,12 +616,13 @@ int ubifs_read_superblock(struct ubifs_i
+ c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran);
+ memcpy(&c->uuid, &sup->uuid, 16);
+ c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT);
++ c->space_fixup = !!(sup_flags & UBIFS_FLG_SPACE_FIXUP);
+
+ /* Automatically increase file system size to the maximum size */
+ c->old_leb_cnt = c->leb_cnt;
+ if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) {
+ c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size);
+- if (c->vfs_sb->s_flags & MS_RDONLY)
++ if (c->ro_mount)
+ dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs",
+ c->old_leb_cnt, c->leb_cnt);
+ else {
+@@ -626,10 +645,158 @@ int ubifs_read_superblock(struct ubifs_i
+ c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS;
+ c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs;
+ c->main_first = c->leb_cnt - c->main_lebs;
+- c->report_rp_size = ubifs_reported_space(c, c->rp_size);
+
+ err = validate_sb(c, sup);
+ out:
+ kfree(sup);
+ return err;
+ }
++
++/**
++ * fixup_leb - fixup/unmap an LEB containing free space.
++ * @c: UBIFS file-system description object
++ * @lnum: the LEB number to fix up
++ * @len: number of used bytes in LEB (starting at offset 0)
++ *
++ * This function reads the contents of the given LEB number @lnum, then fixes
++ * it up, so that empty min. I/O units in the end of LEB are actually erased on
++ * flash (rather than being just all-0xff real data). If the LEB is completely
++ * empty, it is simply unmapped.
++ */
++static int fixup_leb(struct ubifs_info *c, int lnum, int len)
++{
++ int err;
++
++ ubifs_assert(len >= 0);
++ ubifs_assert(len % c->min_io_size == 0);
++ ubifs_assert(len < c->leb_size);
++
++ if (len == 0) {
++ dbg_mnt("unmap empty LEB %d", lnum);
++ return ubi_leb_unmap(c->ubi, lnum);
++ }
++
++ dbg_mnt("fixup LEB %d, data len %d", lnum, len);
++ err = ubi_read(c->ubi, lnum, c->sbuf, 0, len);
++ if (err)
++ return err;
++
++ return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
++}
++
++/**
++ * fixup_free_space - find & remap all LEBs containing free space.
++ * @c: UBIFS file-system description object
++ *
++ * This function walks through all LEBs in the filesystem and fiexes up those
++ * containing free/empty space.
++ */
++static int fixup_free_space(struct ubifs_info *c)
++{
++ int lnum, err = 0;
++ struct ubifs_lprops *lprops;
++
++ ubifs_get_lprops(c);
++
++ /* Fixup LEBs in the master area */
++ for (lnum = UBIFS_MST_LNUM; lnum < UBIFS_LOG_LNUM; lnum++) {
++ err = fixup_leb(c, lnum, c->mst_offs + c->mst_node_alsz);
++ if (err)
++ goto out;
++ }
++
++ /* Unmap unused log LEBs */
++ lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
++ while (lnum != c->ltail_lnum) {
++ err = fixup_leb(c, lnum, 0);
++ if (err)
++ goto out;
++ lnum = ubifs_next_log_lnum(c, lnum);
++ }
++
++ /* Fixup the current log head */
++ err = fixup_leb(c, c->lhead_lnum, c->lhead_offs);
++ if (err)
++ goto out;
++
++ /* Fixup LEBs in the LPT area */
++ for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) {
++ int free = c->ltab[lnum - c->lpt_first].free;
++
++ if (free > 0) {
++ err = fixup_leb(c, lnum, c->leb_size - free);
++ if (err)
++ goto out;
++ }
++ }
++
++ /* Unmap LEBs in the orphans area */
++ for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
++ err = fixup_leb(c, lnum, 0);
++ if (err)
++ goto out;
++ }
++
++ /* Fixup LEBs in the main area */
++ for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) {
++ lprops = ubifs_lpt_lookup(c, lnum);
++ if (IS_ERR(lprops)) {
++ err = PTR_ERR(lprops);
++ goto out;
++ }
++
++ if (lprops->free > 0) {
++ err = fixup_leb(c, lnum, c->leb_size - lprops->free);
++ if (err)
++ goto out;
++ }
++ }
++
++out:
++ ubifs_release_lprops(c);
++ return err;
++}
++
++/**
++ * ubifs_fixup_free_space - find & fix all LEBs with free space.
++ * @c: UBIFS file-system description object
++ *
++ * This function fixes up LEBs containing free space on first mount, if the
++ * appropriate flag was set when the FS was created. Each LEB with one or more
++ * empty min. I/O unit (i.e. free-space-count > 0) is re-written, to make sure
++ * the free space is actually erased. E.g., this is necessary for some NAND
++ * chips, since the free space may have been programmed like real "0xff" data
++ * (generating a non-0xff ECC), causing future writes to the not-really-erased
++ * NAND pages to behave badly. After the space is fixed up, the superblock flag
++ * is cleared, so that this is skipped for all future mounts.
++ */
++int ubifs_fixup_free_space(struct ubifs_info *c)
++{
++ int err;
++ struct ubifs_sb_node *sup;
++
++ ubifs_assert(c->space_fixup);
++ ubifs_assert(!c->ro_mount);
++
++ ubifs_msg("start fixing up free space");
++
++ err = fixup_free_space(c);
++ if (err)
++ return err;
++
++ sup = ubifs_read_sb_node(c);
++ if (IS_ERR(sup))
++ return PTR_ERR(sup);
++
++ /* Free-space fixup is no longer required */
++ c->space_fixup = 0;
++ sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP);
++
++ err = ubifs_write_sb_node(c, sup);
++ kfree(sup);
++ if (err)
++ return err;
++
++ ubifs_msg("free space fixup complete");
++ return err;
++}
+diff -uprN linux-2.6.28/fs/ubifs/scan.c ubifs-v2.6.28/fs/ubifs/scan.c
+--- linux-2.6.28/fs/ubifs/scan.c 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/fs/ubifs/scan.c 2011-06-15 14:22:09.000000000 -0400
+@@ -108,10 +108,9 @@ int ubifs_scan_a_node(const struct ubifs
+
+ /* Make the node pads to 8-byte boundary */
+ if ((node_len + pad_len) & 7) {
+- if (!quiet) {
++ if (!quiet)
+ dbg_err("bad padding length %d - %d",
+ offs, offs + node_len + pad_len);
+- }
+ return SCANNED_A_BAD_PAD_NODE;
+ }
+
+@@ -198,7 +197,7 @@ int ubifs_add_snod(const struct ubifs_in
+ struct ubifs_ino_node *ino = buf;
+ struct ubifs_scan_node *snod;
+
+- snod = kzalloc(sizeof(struct ubifs_scan_node), GFP_NOFS);
++ snod = kmalloc(sizeof(struct ubifs_scan_node), GFP_NOFS);
+ if (!snod)
+ return -ENOMEM;
+
+@@ -213,13 +212,15 @@ int ubifs_add_snod(const struct ubifs_in
+ case UBIFS_DENT_NODE:
+ case UBIFS_XENT_NODE:
+ case UBIFS_DATA_NODE:
+- case UBIFS_TRUN_NODE:
+ /*
+ * The key is in the same place in all keyed
+ * nodes.
+ */
+ key_read(c, &ino->key, &snod->key);
+ break;
++ default:
++ invalid_key_init(c, &snod->key);
++ break;
+ }
+ list_add_tail(&snod->list, &sleb->nodes);
+ sleb->nodes_cnt += 1;
+@@ -238,12 +239,12 @@ void ubifs_scanned_corruption(const stru
+ {
+ int len;
+
+- ubifs_err("corrupted data at LEB %d:%d", lnum, offs);
++ ubifs_err("corruption at LEB %d:%d", lnum, offs);
+ if (dbg_failure_mode)
+ return;
+ len = c->leb_size - offs;
+- if (len > 4096)
+- len = 4096;
++ if (len > 8192)
++ len = 8192;
+ dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs);
+ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1);
+ }
+@@ -253,13 +254,19 @@ void ubifs_scanned_corruption(const stru
+ * @c: UBIFS file-system description object
+ * @lnum: logical eraseblock number
+ * @offs: offset to start at (usually zero)
+- * @sbuf: scan buffer (must be c->leb_size)
++ * @sbuf: scan buffer (must be of @c->leb_size bytes in size)
++ * @quiet: print no messages
+ *
+ * This function scans LEB number @lnum and returns complete information about
+- * its contents. Returns an error code in case of failure.
++ * its contents. Returns the scaned information in case of success and,
++ * %-EUCLEAN if the LEB neads recovery, and other negative error codes in case
++ * of failure.
++ *
++ * If @quiet is non-zero, this function does not print large and scary
++ * error messages and flash dumps in case of errors.
+ */
+ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
+- int offs, void *sbuf)
++ int offs, void *sbuf, int quiet)
+ {
+ void *buf = sbuf + offs;
+ int err, len = c->leb_size - offs;
+@@ -278,8 +285,7 @@ struct ubifs_scan_leb *ubifs_scan(const
+
+ cond_resched();
+
+- ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0);
+-
++ ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
+ if (ret > 0) {
+ /* Padding bytes or a valid padding node */
+ offs += ret;
+@@ -304,7 +310,8 @@ struct ubifs_scan_leb *ubifs_scan(const
+ goto corrupted;
+ default:
+ dbg_err("unknown");
+- goto corrupted;
++ err = -EINVAL;
++ goto error;
+ }
+
+ err = ubifs_add_snod(c, sleb, buf, offs);
+@@ -317,8 +324,12 @@ struct ubifs_scan_leb *ubifs_scan(const
+ len -= node_len;
+ }
+
+- if (offs % c->min_io_size)
++ if (offs % c->min_io_size) {
++ if (!quiet)
++ ubifs_err("empty space starts at non-aligned offset %d",
++ offs);
+ goto corrupted;
++ }
+
+ ubifs_end_scan(c, sleb, lnum, offs);
+
+@@ -327,18 +338,25 @@ struct ubifs_scan_leb *ubifs_scan(const
+ break;
+ for (; len; offs++, buf++, len--)
+ if (*(uint8_t *)buf != 0xff) {
+- ubifs_err("corrupt empty space at LEB %d:%d",
+- lnum, offs);
++ if (!quiet)
++ ubifs_err("corrupt empty space at LEB %d:%d",
++ lnum, offs);
+ goto corrupted;
+ }
+
+ return sleb;
+
+ corrupted:
+- ubifs_scanned_corruption(c, lnum, offs, buf);
++ if (!quiet) {
++ ubifs_scanned_corruption(c, lnum, offs, buf);
++ ubifs_err("LEB %d scanning failed", lnum);
++ }
+ err = -EUCLEAN;
++ ubifs_scan_destroy(sleb);
++ return ERR_PTR(err);
++
+ error:
+- ubifs_err("LEB %d scanning failed", lnum);
++ ubifs_err("LEB %d scanning failed, error %d", lnum, err);
+ ubifs_scan_destroy(sleb);
+ return ERR_PTR(err);
+ }
+diff -uprN linux-2.6.28/fs/ubifs/shrinker.c ubifs-v2.6.28/fs/ubifs/shrinker.c
+--- linux-2.6.28/fs/ubifs/shrinker.c 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/fs/ubifs/shrinker.c 2011-06-15 14:22:09.000000000 -0400
+@@ -151,7 +151,7 @@ static int shrink_tnc(struct ubifs_info
+ * @contention: if any contention, this is set to %1
+ *
+ * This function walks the list of mounted UBIFS file-systems and frees clean
+- * znodes which are older then @age, until at least @nr znodes are freed.
++ * znodes which are older than @age, until at least @nr znodes are freed.
+ * Returns the number of freed znodes.
+ */
+ static int shrink_tnc_trees(int nr, int age, int *contention)
+@@ -206,8 +206,7 @@ static int shrink_tnc_trees(int nr, int
+ * Move this one to the end of the list to provide some
+ * fairness.
+ */
+- list_del(&c->infos_list);
+- list_add_tail(&c->infos_list, &ubifs_infos);
++ list_move_tail(&c->infos_list, &ubifs_infos);
+ mutex_unlock(&c->umount_mutex);
+ if (freed >= nr)
+ break;
+@@ -251,7 +250,7 @@ static int kick_a_thread(void)
+ dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
+
+ if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
+- c->ro_media) {
++ c->ro_mount || c->ro_error) {
+ mutex_unlock(&c->umount_mutex);
+ continue;
+ }
+@@ -263,8 +262,7 @@ static int kick_a_thread(void)
+ }
+
+ if (i == 1) {
+- list_del(&c->infos_list);
+- list_add_tail(&c->infos_list, &ubifs_infos);
++ list_move_tail(&c->infos_list, &ubifs_infos);
+ spin_unlock(&ubifs_infos_lock);
+
+ ubifs_request_bg_commit(c);
+@@ -285,7 +283,11 @@ int ubifs_shrinker(int nr, gfp_t gfp_mas
+ long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
+
+ if (nr == 0)
+- return clean_zn_cnt;
++ /*
++ * Due to the way UBIFS updates the clean znode counter it may
++ * temporarily be negative.
++ */
++ return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
+
+ if (!clean_zn_cnt) {
+ /*
+diff -uprN linux-2.6.28/fs/ubifs/super.c ubifs-v2.6.28/fs/ubifs/super.c
+--- linux-2.6.28/fs/ubifs/super.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/super.c 2011-06-15 14:22:09.000000000 -0400
+@@ -317,6 +317,8 @@ static int ubifs_write_inode(struct inod
+ if (err)
+ ubifs_err("can't write inode %lu, error %d",
+ inode->i_ino, err);
++ else
++ err = dbg_check_inode_size(c, inode, ui->ui_size);
+ }
+
+ ui->dirty = 0;
+@@ -362,7 +364,7 @@ out:
+ ubifs_release_dirty_inode_budget(c, ui);
+ else {
+ /* We've deleted something - clean the "no space" flags */
+- c->nospace = c->nospace_rp = 0;
++ c->bi.nospace = c->bi.nospace_rp = 0;
+ smp_wmb();
+ }
+ clear_inode(inode);
+@@ -426,8 +428,8 @@ static int ubifs_show_options(struct seq
+ seq_printf(s, ",no_chk_data_crc");
+
+ if (c->mount_opts.override_compr) {
+- seq_printf(s, ",compr=");
+- seq_printf(s, ubifs_compr_name(c->mount_opts.compr_type));
++ seq_printf(s, ",compr=%s",
++ ubifs_compr_name(c->mount_opts.compr_type));
+ }
+
+ return 0;
+@@ -511,9 +513,12 @@ static int init_constants_early(struct u
+
+ c->leb_cnt = c->vi.size;
+ c->leb_size = c->vi.usable_leb_size;
++ c->leb_start = c->di.leb_start;
+ c->half_leb_size = c->leb_size / 2;
+ c->min_io_size = c->di.min_io_size;
+ c->min_io_shift = fls(c->min_io_size) - 1;
++ c->max_write_size = c->di.max_write_size;
++ c->max_write_shift = fls(c->max_write_size) - 1;
+
+ if (c->leb_size < UBIFS_MIN_LEB_SZ) {
+ ubifs_err("too small LEBs (%d bytes), min. is %d bytes",
+@@ -533,6 +538,18 @@ static int init_constants_early(struct u
+ }
+
+ /*
++ * Maximum write size has to be greater or equivalent to min. I/O
++ * size, and be multiple of min. I/O size.
++ */
++ if (c->max_write_size < c->min_io_size ||
++ c->max_write_size % c->min_io_size ||
++ !is_power_of_2(c->max_write_size)) {
++ ubifs_err("bad write buffer size %d for %d min. I/O unit",
++ c->max_write_size, c->min_io_size);
++ return -EINVAL;
++ }
++
++ /*
+ * UBIFS aligns all node to 8-byte boundary, so to make function in
+ * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is
+ * less than 8.
+@@ -540,6 +557,10 @@ static int init_constants_early(struct u
+ if (c->min_io_size < 8) {
+ c->min_io_size = 8;
+ c->min_io_shift = 3;
++ if (c->max_write_size < c->min_io_size) {
++ c->max_write_size = c->min_io_size;
++ c->max_write_shift = c->min_io_shift;
++ }
+ }
+
+ c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size);
+@@ -674,11 +695,11 @@ static int init_constants_sb(struct ubif
+ * be compressed and direntries are of the maximum size.
+ *
+ * Note, data, which may be stored in inodes is budgeted separately, so
+- * it is not included into 'c->inode_budget'.
++ * it is not included into 'c->bi.inode_budget'.
+ */
+- c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE;
+- c->inode_budget = UBIFS_INO_NODE_SZ;
+- c->dent_budget = UBIFS_MAX_DENT_NODE_SZ;
++ c->bi.page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE;
++ c->bi.inode_budget = UBIFS_INO_NODE_SZ;
++ c->bi.dent_budget = UBIFS_MAX_DENT_NODE_SZ;
+
+ /*
+ * When the amount of flash space used by buds becomes
+@@ -705,6 +726,8 @@ static int init_constants_sb(struct ubif
+ if (err)
+ return err;
+
++ /* Initialize effective LEB size used in budgeting calculations */
++ c->idx_leb_size = c->leb_size - c->max_idx_node_sz;
+ return 0;
+ }
+
+@@ -720,7 +743,8 @@ static void init_constants_master(struct
+ {
+ long long tmp64;
+
+- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
++ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
++ c->report_rp_size = ubifs_reported_space(c, c->rp_size);
+
+ /*
+ * Calculate total amount of FS blocks. This number is not used
+@@ -788,15 +812,18 @@ static int alloc_wbufs(struct ubifs_info
+
+ c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback;
+ c->jheads[i].wbuf.jhead = i;
++ c->jheads[i].grouped = 1;
+ }
+
+ c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM;
+ /*
+ * Garbage Collector head likely contains long-term data and
+- * does not need to be synchronized by timer.
++ * does not need to be synchronized by timer. Also GC head nodes are
++ * not grouped.
+ */
+ c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM;
+ c->jheads[GCHD].wbuf.no_timer = 1;
++ c->jheads[GCHD].grouped = 0;
+
+ return 0;
+ }
+@@ -937,6 +964,27 @@ static const match_table_t tokens = {
+ };
+
+ /**
++ * parse_standard_option - parse a standard mount option.
++ * @option: the option to parse
++ *
++ * Normally, standard mount options like "sync" are passed to file-systems as
++ * flags. However, when a "rootflags=" kernel boot parameter is used, they may
++ * be present in the options string. This function tries to deal with this
++ * situation and parse standard options. Returns 0 if the option was not
++ * recognized, and the corresponding integer flag if it was.
++ *
++ * UBIFS is only interested in the "sync" option, so do not check for anything
++ * else.
++ */
++static int parse_standard_option(const char *option)
++{
++ ubifs_msg("parse %s", option);
++ if (!strcmp(option, "sync"))
++ return MS_SYNCHRONOUS;
++ return 0;
++}
++
++/**
+ * ubifs_parse_options - parse mount parameters.
+ * @c: UBIFS file-system description object
+ * @options: parameters to parse
+@@ -1001,8 +1049,6 @@ static int ubifs_parse_options(struct ub
+ c->mount_opts.compr_type = UBIFS_COMPR_LZO;
+ else if (!strcmp(name, "zlib"))
+ c->mount_opts.compr_type = UBIFS_COMPR_ZLIB;
+- else if (!strcmp(name, "lzo999"))
+- c->mount_opts.compr_type = UBIFS_COMPR_LZO999;
+ else {
+ ubifs_err("unknown compressor \"%s\"", name);
+ kfree(name);
+@@ -1014,9 +1060,19 @@ static int ubifs_parse_options(struct ub
+ break;
+ }
+ default:
+- ubifs_err("unrecognized mount option \"%s\" "
+- "or missing value", p);
+- return -EINVAL;
++ {
++ unsigned long flag;
++ struct super_block *sb = c->vfs_sb;
++
++ flag = parse_standard_option(p);
++ if (!flag) {
++ ubifs_err("unrecognized mount option \"%s\" "
++ "or missing value", p);
++ return -EINVAL;
++ }
++ sb->s_flags |= flag;
++ break;
++ }
+ }
+ }
+
+@@ -1092,8 +1148,8 @@ static int check_free_space(struct ubifs
+ {
+ ubifs_assert(c->dark_wm > 0);
+ if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) {
+- ubifs_err("insufficient free space to mount in read/write mode");
+- dbg_dump_budg(c);
++ ubifs_err("insufficient free space to mount in R/W mode");
++ dbg_dump_budg(c, &c->bi);
+ dbg_dump_lprops(c);
+ return -ENOSPC;
+ }
+@@ -1112,11 +1168,11 @@ static int check_free_space(struct ubifs
+ */
+ static int mount_ubifs(struct ubifs_info *c)
+ {
+- struct super_block *sb = c->vfs_sb;
+- int err, mounted_read_only = (sb->s_flags & MS_RDONLY);
++ int err;
+ long long x;
+ size_t sz;
+
++ c->ro_mount = !!(c->vfs_sb->s_flags & MS_RDONLY);
+ err = init_constants_early(c);
+ if (err)
+ return err;
+@@ -1129,7 +1185,7 @@ static int mount_ubifs(struct ubifs_info
+ if (err)
+ goto out_free;
+
+- if (c->empty && (mounted_read_only || c->ro_media)) {
++ if (c->empty && (c->ro_mount || c->ro_media)) {
+ /*
+ * This UBI volume is empty, and read-only, or the file system
+ * is mounted read-only - we cannot format it.
+@@ -1140,7 +1196,7 @@ static int mount_ubifs(struct ubifs_info
+ goto out_free;
+ }
+
+- if (c->ro_media && !mounted_read_only) {
++ if (c->ro_media && !c->ro_mount) {
+ ubifs_err("cannot mount read-write - read-only media");
+ err = -EROFS;
+ goto out_free;
+@@ -1160,7 +1216,7 @@ static int mount_ubifs(struct ubifs_info
+ if (!c->sbuf)
+ goto out_free;
+
+- if (!mounted_read_only) {
++ if (!c->ro_mount) {
+ c->ileb_buf = vmalloc(c->leb_size);
+ if (!c->ileb_buf)
+ goto out_free;
+@@ -1169,11 +1225,14 @@ static int mount_ubifs(struct ubifs_info
+ if (c->bulk_read == 1)
+ bu_init(c);
+
+- /*
+- * We have to check all CRCs, even for data nodes, when we mount the FS
+- * (specifically, when we are replaying).
+- */
+- c->always_chk_crc = 1;
++ if (!c->ro_mount) {
++ c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ,
++ GFP_KERNEL);
++ if (!c->write_reserve_buf)
++ goto out_free;
++ }
++
++ c->mounting = 1;
+
+ err = ubifs_read_superblock(c);
+ if (err)
+@@ -1186,6 +1245,7 @@ static int mount_ubifs(struct ubifs_info
+ if (!ubifs_compr_present(c->default_compr)) {
+ ubifs_err("'compressor \"%s\" is not compiled in",
+ ubifs_compr_name(c->default_compr));
++ err = -ENOTSUPP;
+ goto out_free;
+ }
+
+@@ -1201,14 +1261,14 @@ static int mount_ubifs(struct ubifs_info
+ goto out_free;
+ }
+
+- sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);
+- if (!mounted_read_only) {
+- err = alloc_wbufs(c);
+- if (err)
+- goto out_cbuf;
++ err = alloc_wbufs(c);
++ if (err)
++ goto out_cbuf;
+
++ sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);
++ if (!c->ro_mount) {
+ /* Create background thread */
+- c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
++ c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
+ if (IS_ERR(c->bgt)) {
+ err = PTR_ERR(c->bgt);
+ c->bgt = NULL;
+@@ -1228,12 +1288,25 @@ static int mount_ubifs(struct ubifs_info
+ if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) {
+ ubifs_msg("recovery needed");
+ c->need_recovery = 1;
+- if (!mounted_read_only) {
+- err = ubifs_recover_inl_heads(c, c->sbuf);
+- if (err)
+- goto out_master;
+- }
+- } else if (!mounted_read_only) {
++ }
++
++ if (c->need_recovery && !c->ro_mount) {
++ err = ubifs_recover_inl_heads(c, c->sbuf);
++ if (err)
++ goto out_master;
++ }
++
++ err = ubifs_lpt_init(c, 1, !c->ro_mount);
++ if (err)
++ goto out_master;
++
++ if (!c->ro_mount && c->space_fixup) {
++ err = ubifs_fixup_free_space(c);
++ if (err)
++ goto out_master;
++ }
++
++ if (!c->ro_mount) {
+ /*
+ * Set the "dirty" flag so that if we reboot uncleanly we
+ * will notice this immediately on the next mount.
+@@ -1241,14 +1314,10 @@ static int mount_ubifs(struct ubifs_info
+ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
+ err = ubifs_write_master(c);
+ if (err)
+- goto out_master;
++ goto out_lpt;
+ }
+
+- err = ubifs_lpt_init(c, 1, !mounted_read_only);
+- if (err)
+- goto out_lpt;
+-
+- err = dbg_check_idx_size(c, c->old_idx_sz);
++ err = dbg_check_idx_size(c, c->bi.old_idx_sz);
+ if (err)
+ goto out_lpt;
+
+@@ -1256,11 +1325,14 @@ static int mount_ubifs(struct ubifs_info
+ if (err)
+ goto out_journal;
+
+- err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only);
++ /* Calculate 'min_idx_lebs' after journal replay */
++ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
++
++ err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount);
+ if (err)
+ goto out_orphans;
+
+- if (!mounted_read_only) {
++ if (!c->ro_mount) {
+ int lnum;
+
+ err = check_free_space(c);
+@@ -1282,6 +1354,8 @@ static int mount_ubifs(struct ubifs_info
+ if (err)
+ goto out_orphans;
+ err = ubifs_rcvry_gc_commit(c);
++ if (err)
++ goto out_orphans;
+ } else {
+ err = take_gc_lnum(c);
+ if (err)
+@@ -1293,7 +1367,7 @@ static int mount_ubifs(struct ubifs_info
+ */
+ err = ubifs_leb_unmap(c, c->gc_lnum);
+ if (err)
+- return err;
++ goto out_orphans;
+ }
+
+ err = dbg_check_lprops(c);
+@@ -1320,16 +1394,20 @@ static int mount_ubifs(struct ubifs_info
+ spin_unlock(&ubifs_infos_lock);
+
+ if (c->need_recovery) {
+- if (mounted_read_only)
++ if (c->ro_mount)
+ ubifs_msg("recovery deferred");
+ else {
+ c->need_recovery = 0;
+ ubifs_msg("recovery completed");
+- /* GC LEB has to be empty and taken at this point */
+- ubifs_assert(c->lst.taken_empty_lebs == 1);
++ /*
++ * GC LEB has to be empty and taken at this point. But
++ * the journal head LEBs may also be accounted as
++ * "empty taken" if they are empty.
++ */
++ ubifs_assert(c->lst.taken_empty_lebs > 0);
+ }
+ } else
+- ubifs_assert(c->lst.taken_empty_lebs == 1);
++ ubifs_assert(c->lst.taken_empty_lebs > 0);
+
+ err = dbg_check_filesystem(c);
+ if (err)
+@@ -1339,11 +1417,11 @@ static int mount_ubifs(struct ubifs_info
+ if (err)
+ goto out_infos;
+
+- c->always_chk_crc = 0;
++ c->mounting = 0;
+
+ ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"",
+ c->vi.ubi_num, c->vi.vol_id, c->vi.name);
+- if (mounted_read_only)
++ if (c->ro_mount)
+ ubifs_msg("mounted read-only");
+ x = (long long)c->main_lebs * c->leb_size;
+ ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d "
+@@ -1351,14 +1429,16 @@ static int mount_ubifs(struct ubifs_info
+ x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
+ ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d "
+ "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt);
+- ubifs_msg("media format: %d (latest is %d)",
+- c->fmt_version, UBIFS_FORMAT_VERSION);
++ ubifs_msg("media format: w%d/r%d (latest is w%d/r%d)",
++ c->fmt_version, c->ro_compat_version,
++ UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION);
+ ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr));
+ ubifs_msg("reserved for root: %llu bytes (%llu KiB)",
+ c->report_rp_size, c->report_rp_size >> 10);
+
+ dbg_msg("compiled on: " __DATE__ " at " __TIME__);
+ dbg_msg("min. I/O unit size: %d bytes", c->min_io_size);
++ dbg_msg("max. write size: %d bytes", c->max_write_size);
+ dbg_msg("LEB size: %d bytes (%d KiB)",
+ c->leb_size, c->leb_size >> 10);
+ dbg_msg("data journal heads: %d",
+@@ -1380,7 +1460,8 @@ static int mount_ubifs(struct ubifs_info
+ c->main_lebs, c->main_first, c->leb_cnt - 1);
+ dbg_msg("index LEBs: %d", c->lst.idx_lebs);
+ dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)",
+- c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20);
++ c->bi.old_idx_sz, c->bi.old_idx_sz >> 10,
++ c->bi.old_idx_sz >> 20);
+ dbg_msg("key hash type: %d", c->key_hash_type);
+ dbg_msg("tree fanout: %d", c->fanout);
+ dbg_msg("reserved GC LEB: %d", c->gc_lnum);
+@@ -1393,9 +1474,9 @@ static int mount_ubifs(struct ubifs_info
+ UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ);
+ dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu",
+ UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ);
+- dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu",
+- UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
+- UBIFS_MAX_DENT_NODE_SZ);
++ dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d",
++ UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
++ UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout));
+ dbg_msg("dead watermark: %d", c->dead_wm);
+ dbg_msg("dark watermark: %d", c->dark_wm);
+ dbg_msg("LEB overhead: %d", c->leb_overhead);
+@@ -1435,6 +1516,7 @@ out_wbufs:
+ out_cbuf:
+ kfree(c->cbuf);
+ out_free:
++ kfree(c->write_reserve_buf);
+ kfree(c->bu.buf);
+ vfree(c->ileb_buf);
+ vfree(c->sbuf);
+@@ -1473,6 +1555,7 @@ static void ubifs_umount(struct ubifs_in
+ kfree(c->cbuf);
+ kfree(c->rcvrd_mst_node);
+ kfree(c->mst_node);
++ kfree(c->write_reserve_buf);
+ kfree(c->bu.buf);
+ vfree(c->ileb_buf);
+ vfree(c->sbuf);
+@@ -1492,10 +1575,19 @@ static int ubifs_remount_rw(struct ubifs
+ {
+ int err, lnum;
+
++ if (c->rw_incompat) {
++ ubifs_err("the file-system is not R/W-compatible");
++ ubifs_msg("on-flash format version is w%d/r%d, but software "
++ "only supports up to version w%d/r%d", c->fmt_version,
++ c->ro_compat_version, UBIFS_FORMAT_VERSION,
++ UBIFS_RO_COMPAT_VERSION);
++ return -EROFS;
++ }
++
+ mutex_lock(&c->umount_mutex);
+ dbg_save_space_info(c);
+ c->remounting_rw = 1;
+- c->always_chk_crc = 1;
++ c->ro_mount = 0;
+
+ err = check_free_space(c);
+ if (err)
+@@ -1511,6 +1603,7 @@ static int ubifs_remount_rw(struct ubifs
+ }
+ sup->leb_cnt = cpu_to_le32(c->leb_cnt);
+ err = ubifs_write_sb_node(c, sup);
++ kfree(sup);
+ if (err)
+ goto out;
+ }
+@@ -1550,18 +1643,16 @@ static int ubifs_remount_rw(struct ubifs
+ goto out;
+ }
+
+- err = ubifs_lpt_init(c, 0, 1);
+- if (err)
++ c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, GFP_KERNEL);
++ if (!c->write_reserve_buf)
+ goto out;
+
+- err = alloc_wbufs(c);
++ err = ubifs_lpt_init(c, 0, 1);
+ if (err)
+ goto out;
+
+- ubifs_create_buds_lists(c);
+-
+ /* Create background thread */
+- c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
++ c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
+ if (IS_ERR(c->bgt)) {
+ err = PTR_ERR(c->bgt);
+ c->bgt = NULL;
+@@ -1594,20 +1685,37 @@ static int ubifs_remount_rw(struct ubifs
+ if (err)
+ goto out;
+
++ dbg_gen("re-mounted read-write");
++ c->remounting_rw = 0;
++
+ if (c->need_recovery) {
+ c->need_recovery = 0;
+ ubifs_msg("deferred recovery completed");
++ } else {
++ /*
++ * Do not run the debugging space check if the were doing
++ * recovery, because when we saved the information we had the
++ * file-system in a state where the TNC and lprops has been
++ * modified in memory, but all the I/O operations (including a
++ * commit) were deferred. So the file-system was in
++ * "non-committed" state. Now the file-system is in committed
++ * state, and of course the amount of free space will change
++ * because, for example, the old index size was imprecise.
++ */
++ err = dbg_check_space_info(c);
++ }
++
++ if (c->space_fixup) {
++ err = ubifs_fixup_free_space(c);
++ if (err)
++ goto out;
+ }
+
+- dbg_gen("re-mounted read-write");
+- c->vfs_sb->s_flags &= ~MS_RDONLY;
+- c->remounting_rw = 0;
+- c->always_chk_crc = 0;
+- err = dbg_check_space_info(c);
+ mutex_unlock(&c->umount_mutex);
+ return err;
+
+ out:
++ c->ro_mount = 1;
+ vfree(c->orph_buf);
+ c->orph_buf = NULL;
+ if (c->bgt) {
+@@ -1615,11 +1723,12 @@ out:
+ c->bgt = NULL;
+ }
+ free_wbufs(c);
++ kfree(c->write_reserve_buf);
++ c->write_reserve_buf = NULL;
+ vfree(c->ileb_buf);
+ c->ileb_buf = NULL;
+ ubifs_lpt_free(c, 1);
+ c->remounting_rw = 0;
+- c->always_chk_crc = 0;
+ mutex_unlock(&c->umount_mutex);
+ return err;
+ }
+@@ -1636,7 +1745,7 @@ static void ubifs_remount_ro(struct ubif
+ int i, err;
+
+ ubifs_assert(!c->need_recovery);
+- ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY));
++ ubifs_assert(!c->ro_mount);
+
+ mutex_lock(&c->umount_mutex);
+ if (c->bgt) {
+@@ -1646,10 +1755,8 @@ static void ubifs_remount_ro(struct ubif
+
+ dbg_save_space_info(c);
+
+- for (i = 0; i < c->jhead_cnt; i++) {
++ for (i = 0; i < c->jhead_cnt; i++)
+ ubifs_wbuf_sync(&c->jheads[i].wbuf);
+- hrtimer_cancel(&c->jheads[i].wbuf.timer);
+- }
+
+ c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
+ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
+@@ -1658,12 +1765,14 @@ static void ubifs_remount_ro(struct ubif
+ if (err)
+ ubifs_ro_mode(c, err);
+
+- free_wbufs(c);
+ vfree(c->orph_buf);
+ c->orph_buf = NULL;
++ kfree(c->write_reserve_buf);
++ c->write_reserve_buf = NULL;
+ vfree(c->ileb_buf);
+ c->ileb_buf = NULL;
+ ubifs_lpt_free(c, 1);
++ c->ro_mount = 1;
+ err = dbg_check_space_info(c);
+ if (err)
+ ubifs_ro_mode(c, err);
+@@ -1682,10 +1791,11 @@ static void ubifs_put_super(struct super
+ * of the media. For example, there will be dirty inodes if we failed
+ * to write them back because of I/O errors.
+ */
+- ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0);
+- ubifs_assert(c->budg_idx_growth == 0);
+- ubifs_assert(c->budg_dd_growth == 0);
+- ubifs_assert(c->budg_data_growth == 0);
++ if (!c->ro_error) {
++ ubifs_assert(c->bi.idx_growth == 0);
++ ubifs_assert(c->bi.dd_growth == 0);
++ ubifs_assert(c->bi.data_growth == 0);
++ }
+
+ /*
+ * The 'c->umount_lock' prevents races between UBIFS memory shrinker
+@@ -1694,7 +1804,7 @@ static void ubifs_put_super(struct super
+ * the mutex is locked.
+ */
+ mutex_lock(&c->umount_mutex);
+- if (!(c->vfs_sb->s_flags & MS_RDONLY)) {
++ if (!c->ro_mount) {
+ /*
+ * First of all kill the background thread to make sure it does
+ * not interfere with un-mounting and freeing resources.
+@@ -1704,23 +1814,22 @@ static void ubifs_put_super(struct super
+ c->bgt = NULL;
+ }
+
+- /* Synchronize write-buffers */
+- if (c->jheads)
+- for (i = 0; i < c->jhead_cnt; i++)
+- ubifs_wbuf_sync(&c->jheads[i].wbuf);
+-
+ /*
+- * On fatal errors c->ro_media is set to 1, in which case we do
++ * On fatal errors c->ro_error is set to 1, in which case we do
+ * not write the master node.
+ */
+- if (!c->ro_media) {
++ if (!c->ro_error) {
++ int err;
++
++ /* Synchronize write-buffers */
++ for (i = 0; i < c->jhead_cnt; i++)
++ ubifs_wbuf_sync(&c->jheads[i].wbuf);
++
+ /*
+ * We are being cleanly unmounted which means the
+ * orphans were killed - indicate this in the master
+ * node. Also save the reserved GC LEB number.
+ */
+- int err;
+-
+ c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
+ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
+ c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum);
+@@ -1733,6 +1842,10 @@ static void ubifs_put_super(struct super
+ */
+ ubifs_err("failed to write master node, "
+ "error %d", err);
++ } else {
++ for (i = 0; i < c->jhead_cnt; i++)
++ /* Make sure write-buffer timers are canceled */
++ hrtimer_cancel(&c->jheads[i].wbuf.timer);
+ }
+ }
+
+@@ -1756,17 +1869,21 @@ static int ubifs_remount_fs(struct super
+ return err;
+ }
+
+- if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
++ if (c->ro_mount && !(*flags & MS_RDONLY)) {
++ if (c->ro_error) {
++ ubifs_msg("cannot re-mount R/W due to prior errors");
++ return -EROFS;
++ }
+ if (c->ro_media) {
+- ubifs_msg("cannot re-mount due to prior errors");
++ ubifs_msg("cannot re-mount R/W - UBI volume is R/O");
+ return -EROFS;
+ }
+ err = ubifs_remount_rw(c);
+ if (err)
+ return err;
+- } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) {
+- if (c->ro_media) {
+- ubifs_msg("cannot re-mount due to prior errors");
++ } else if (!c->ro_mount && (*flags & MS_RDONLY)) {
++ if (c->ro_error) {
++ ubifs_msg("cannot re-mount R/O due to prior errors");
+ return -EROFS;
+ }
+ ubifs_remount_ro(c);
+@@ -1780,7 +1897,7 @@ static int ubifs_remount_fs(struct super
+ c->bu.buf = NULL;
+ }
+
+- ubifs_assert(c->lst.taken_empty_lebs == 1);
++ ubifs_assert(c->lst.taken_empty_lebs > 0);
+ return 0;
+ }
+
+@@ -1802,22 +1919,32 @@ const struct super_operations ubifs_supe
+ * @name: UBI volume name
+ * @mode: UBI volume open mode
+ *
+- * There are several ways to specify UBI volumes when mounting UBIFS:
+- * o ubiX_Y - UBI device number X, volume Y;
+- * o ubiY - UBI device number 0, volume Y;
++ * The primary method of mounting UBIFS is by specifying the UBI volume
++ * character device node path. However, UBIFS may also be mounted withoug any
++ * character device node using one of the following methods:
++ *
++ * o ubiX_Y - mount UBI device number X, volume Y;
++ * o ubiY - mount UBI device number 0, volume Y;
+ * o ubiX:NAME - mount UBI device X, volume with name NAME;
+ * o ubi:NAME - mount UBI device 0, volume with name NAME.
+ *
+ * Alternative '!' separator may be used instead of ':' (because some shells
+ * like busybox may interpret ':' as an NFS host name separator). This function
+- * returns ubi volume object in case of success and a negative error code in
+- * case of failure.
++ * returns UBI volume description object in case of success and a negative
++ * error code in case of failure.
+ */
+ static struct ubi_volume_desc *open_ubi(const char *name, int mode)
+ {
++ struct ubi_volume_desc *ubi;
+ int dev, vol;
+ char *endptr;
+
++ /* First, try to open using the device node path method */
++ ubi = ubi_open_volume_path(name, mode);
++ if (!IS_ERR(ubi))
++ return ubi;
++
++ /* Try the "nodev" method */
+ if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i')
+ return ERR_PTR(-EINVAL);
+
+@@ -1872,6 +1999,7 @@ static int ubifs_fill_super(struct super
+ mutex_init(&c->mst_mutex);
+ mutex_init(&c->umount_mutex);
+ mutex_init(&c->bu_mutex);
++ mutex_init(&c->write_reserve_mutex);
+ init_waitqueue_head(&c->cmt_wq);
+ c->buds = RB_ROOT;
+ c->old_idx = RB_ROOT;
+@@ -1889,7 +2017,9 @@ static int ubifs_fill_super(struct super
+ INIT_LIST_HEAD(&c->old_buds);
+ INIT_LIST_HEAD(&c->orph_list);
+ INIT_LIST_HEAD(&c->orph_new);
++ c->no_chk_data_crc = 1;
+
++ c->vfs_sb = sb;
+ c->highest_inum = UBIFS_FIRST_INO;
+ c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
+
+@@ -1916,18 +2046,19 @@ static int ubifs_fill_super(struct super
+ err = bdi_init(&c->bdi);
+ if (err)
+ goto out_close;
++ err = bdi_register(&c->bdi, NULL, "ubifs_%d_%d",
++ c->vi.ubi_num, c->vi.vol_id);
++ if (err)
++ goto out_bdi;
+
+ err = ubifs_parse_options(c, data, 0);
+ if (err)
+ goto out_bdi;
+
+- c->vfs_sb = sb;
+-
+ sb->s_fs_info = c;
+ sb->s_magic = UBIFS_SUPER_MAGIC;
+ sb->s_blocksize = UBIFS_BLOCK_SIZE;
+ sb->s_blocksize_bits = UBIFS_BLOCK_SHIFT;
+- sb->s_dev = c->vi.cdev;
+ sb->s_maxbytes = c->max_inode_sz = key_max_inode_size(c);
+ if (c->max_inode_sz > MAX_LFS_FILESIZE)
+ sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE;
+@@ -1972,16 +2103,9 @@ out_free:
+ static int sb_test(struct super_block *sb, void *data)
+ {
+ dev_t *dev = data;
++ struct ubifs_info *c = sb->s_fs_info;
+
+- return sb->s_dev == *dev;
+-}
+-
+-static int sb_set(struct super_block *sb, void *data)
+-{
+- dev_t *dev = data;
+-
+- sb->s_dev = *dev;
+- return 0;
++ return c->vi.cdev == *dev;
+ }
+
+ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
+@@ -2001,24 +2125,26 @@ static int ubifs_get_sb(struct file_syst
+ */
+ ubi = open_ubi(name, UBI_READONLY);
+ if (IS_ERR(ubi)) {
+- ubifs_err("cannot open \"%s\", error %d",
+- name, (int)PTR_ERR(ubi));
++ dbg_err("cannot open \"%s\", error %d",
++ name, (int)PTR_ERR(ubi));
+ return PTR_ERR(ubi);
+ }
+ ubi_get_volume_info(ubi, &vi);
+
+ dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id);
+
+- sb = sget(fs_type, &sb_test, &sb_set, &vi.cdev);
++ sb = sget(fs_type, &sb_test, &set_anon_super, &vi.cdev);
+ if (IS_ERR(sb)) {
+ err = PTR_ERR(sb);
+ goto out_close;
+ }
+
+ if (sb->s_root) {
++ struct ubifs_info *c1 = sb->s_fs_info;
++
+ /* A new mount point for already mounted UBIFS */
+ dbg_gen("this ubi volume is already mounted");
+- if ((flags ^ sb->s_flags) & MS_RDONLY) {
++ if (!!(flags & MS_RDONLY) != c1->ro_mount) {
+ err = -EBUSY;
+ goto out_deact;
+ }
+@@ -2049,16 +2175,11 @@ out_close:
+ return err;
+ }
+
+-static void ubifs_kill_sb(struct super_block *sb)
+-{
+- generic_shutdown_super(sb);
+-}
+-
+ static struct file_system_type ubifs_fs_type = {
+ .name = "ubifs",
+ .owner = THIS_MODULE,
+ .get_sb = ubifs_get_sb,
+- .kill_sb = ubifs_kill_sb
++ .kill_sb = kill_anon_super,
+ };
+
+ /*
+diff -uprN linux-2.6.28/fs/ubifs/tnc.c ubifs-v2.6.28/fs/ubifs/tnc.c
+--- linux-2.6.28/fs/ubifs/tnc.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/tnc.c 2011-06-15 14:22:09.000000000 -0400
+@@ -446,8 +446,11 @@ static int tnc_read_node_nm(struct ubifs
+ *
+ * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc
+ * is true (it is controlled by corresponding mount option). However, if
+- * @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always
+- * checked.
++ * @c->mounting or @c->remounting_rw is true (we are mounting or re-mounting to
++ * R/W mode), @c->no_chk_data_crc is ignored and CRC is checked. This is
++ * because during mounting or re-mounting from R/O mode to R/W mode we may read
++ * journal nodes (when replying the journal or doing the recovery) and the
++ * journal nodes may potentially be corrupted, so checking is required.
+ */
+ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
+ int len, int lnum, int offs)
+@@ -475,7 +478,8 @@ static int try_read_node(const struct ub
+ if (node_len != len)
+ return 0;
+
+- if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc)
++ if (type == UBIFS_DATA_NODE && c->no_chk_data_crc && !c->mounting &&
++ !c->remounting_rw)
+ return 1;
+
+ crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
+@@ -1159,8 +1163,8 @@ static struct ubifs_znode *dirty_cow_bot
+ * o exact match, i.e. the found zero-level znode contains key @key, then %1
+ * is returned and slot number of the matched branch is stored in @n;
+ * o not exact match, which means that zero-level znode does not contain
+- * @key, then %0 is returned and slot number of the closed branch is stored
+- * in @n;
++ * @key, then %0 is returned and slot number of the closest branch is stored
++ * in @n;
+ * o @key is so small that it is even less than the lowest key of the
+ * leftmost zero-level node, then %0 is returned and %0 is stored in @n.
+ *
+@@ -1176,6 +1180,7 @@ int ubifs_lookup_level0(struct ubifs_inf
+ unsigned long time = get_seconds();
+
+ dbg_tnc("search key %s", DBGKEY(key));
++ ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY);
+
+ znode = c->zroot.znode;
+ if (unlikely(!znode)) {
+@@ -1252,7 +1257,7 @@ int ubifs_lookup_level0(struct ubifs_inf
+ * splitting in the middle of the colliding sequence. Also, when
+ * removing the leftmost key, we would have to correct the key of the
+ * parent node, which would introduce additional complications. Namely,
+- * if we changed the the leftmost key of the parent znode, the garbage
++ * if we changed the leftmost key of the parent znode, the garbage
+ * collector would be unable to find it (GC is doing this when GC'ing
+ * indexing LEBs). Although we already have an additional RB-tree where
+ * we save such changed znodes (see 'ins_clr_old_idx_znode()') until
+@@ -1433,7 +1438,7 @@ static int maybe_leb_gced(struct ubifs_i
+ * @lnum: LEB number is returned here
+ * @offs: offset is returned here
+ *
+- * This function look up and reads node with key @key. The caller has to make
++ * This function looks up and reads node with key @key. The caller has to make
+ * sure the @node buffer is large enough to fit the node. Returns zero in case
+ * of success, %-ENOENT if the node was not found, and a negative error code in
+ * case of failure. The node location can be returned in @lnum and @offs.
+@@ -2551,11 +2556,11 @@ int ubifs_tnc_remove_nm(struct ubifs_inf
+ if (err) {
+ /* Ensure the znode is dirtied */
+ if (znode->cnext || !ubifs_zn_dirty(znode)) {
+- znode = dirty_cow_bottom_up(c, znode);
+- if (IS_ERR(znode)) {
+- err = PTR_ERR(znode);
+- goto out_unlock;
+- }
++ znode = dirty_cow_bottom_up(c, znode);
++ if (IS_ERR(znode)) {
++ err = PTR_ERR(znode);
++ goto out_unlock;
++ }
+ }
+ err = tnc_delete(c, znode, n);
+ }
+@@ -2870,12 +2875,13 @@ static void tnc_destroy_cnext(struct ubi
+ */
+ void ubifs_tnc_close(struct ubifs_info *c)
+ {
+- long clean_freed;
+-
+ tnc_destroy_cnext(c);
+ if (c->zroot.znode) {
+- clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode);
+- atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt);
++ long n;
++
++ ubifs_destroy_tnc_subtree(c->zroot.znode);
++ n = atomic_long_read(&c->clean_zn_cnt);
++ atomic_long_sub(n, &ubifs_clean_zn_cnt);
+ }
+ kfree(c->gap_lebs);
+ kfree(c->ilebs);
+@@ -2965,7 +2971,7 @@ static struct ubifs_znode *right_znode(s
+ *
+ * This function searches an indexing node by its first key @key and its
+ * address @lnum:@offs. It looks up the indexing tree by pulling all indexing
+- * nodes it traverses to TNC. This function is called fro indexing nodes which
++ * nodes it traverses to TNC. This function is called for indexing nodes which
+ * were found on the media by scanning, for example when garbage-collecting or
+ * when doing in-the-gaps commit. This means that the indexing node which is
+ * looked for does not have to have exactly the same leftmost key @key, because
+@@ -2987,6 +2993,8 @@ static struct ubifs_znode *lookup_znode(
+ struct ubifs_znode *znode, *zn;
+ int n, nn;
+
++ ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY);
++
+ /*
+ * The arguments have probably been read off flash, so don't assume
+ * they are valid.
+@@ -3268,3 +3276,73 @@ out_unlock:
+ mutex_unlock(&c->tnc_mutex);
+ return err;
+ }
++
++#ifdef CONFIG_UBIFS_FS_DEBUG
++
++/**
++ * dbg_check_inode_size - check if inode size is correct.
++ * @c: UBIFS file-system description object
++ * @inum: inode number
++ * @size: inode size
++ *
++ * This function makes sure that the inode size (@size) is correct and it does
++ * not have any pages beyond @size. Returns zero if the inode is OK, %-EINVAL
++ * if it has a data page beyond @size, and other negative error code in case of
++ * other errors.
++ */
++int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
++ loff_t size)
++{
++ int err, n;
++ union ubifs_key from_key, to_key, *key;
++ struct ubifs_znode *znode;
++ unsigned int block;
++
++ if (!S_ISREG(inode->i_mode))
++ return 0;
++ if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
++ return 0;
++
++ block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT;
++ data_key_init(c, &from_key, inode->i_ino, block);
++ highest_data_key(c, &to_key, inode->i_ino);
++
++ mutex_lock(&c->tnc_mutex);
++ err = ubifs_lookup_level0(c, &from_key, &znode, &n);
++ if (err < 0)
++ goto out_unlock;
++
++ if (err) {
++ err = -EINVAL;
++ key = &from_key;
++ goto out_dump;
++ }
++
++ err = tnc_next(c, &znode, &n);
++ if (err == -ENOENT) {
++ err = 0;
++ goto out_unlock;
++ }
++ if (err < 0)
++ goto out_unlock;
++
++ ubifs_assert(err == 0);
++ key = &znode->zbranch[n].key;
++ if (!key_in_range(c, key, &from_key, &to_key))
++ goto out_unlock;
++
++out_dump:
++ block = key_block(c, key);
++ ubifs_err("inode %lu has size %lld, but there are data at offset %lld "
++ "(data key %s)", (unsigned long)inode->i_ino, size,
++ ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key));
++ dbg_dump_inode(c, inode);
++ dbg_dump_stack();
++ err = -EINVAL;
++
++out_unlock:
++ mutex_unlock(&c->tnc_mutex);
++ return err;
++}
++
++#endif /* CONFIG_UBIFS_FS_DEBUG */
+diff -uprN linux-2.6.28/fs/ubifs/tnc_commit.c ubifs-v2.6.28/fs/ubifs/tnc_commit.c
+--- linux-2.6.28/fs/ubifs/tnc_commit.c 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/tnc_commit.c 2011-06-15 14:22:09.000000000 -0400
+@@ -245,7 +245,7 @@ static int layout_leb_in_gaps(struct ubi
+ * it is more comprehensive and less efficient than is needed for this
+ * purpose.
+ */
+- sleb = ubifs_scan(c, lnum, 0, c->ileb_buf);
++ sleb = ubifs_scan(c, lnum, 0, c->ileb_buf, 0);
+ c->ileb_len = 0;
+ if (IS_ERR(sleb))
+ return PTR_ERR(sleb);
+@@ -377,15 +377,13 @@ static int layout_in_gaps(struct ubifs_i
+ c->gap_lebs = NULL;
+ return err;
+ }
+- if (!dbg_force_in_the_gaps_enabled) {
++ if (dbg_force_in_the_gaps_enabled()) {
+ /*
+ * Do not print scary warnings if the debugging
+ * option which forces in-the-gaps is enabled.
+ */
+- ubifs_err("out of space");
+- spin_lock(&c->space_lock);
+- dbg_dump_budg(c);
+- spin_unlock(&c->space_lock);
++ ubifs_warn("out of space");
++ dbg_dump_budg(c, &c->bi);
+ dbg_dump_lprops(c);
+ }
+ /* Try to commit anyway */
+@@ -796,16 +794,16 @@ int ubifs_tnc_start_commit(struct ubifs_
+ spin_lock(&c->space_lock);
+ /*
+ * Although we have not finished committing yet, update size of the
+- * committed index ('c->old_idx_sz') and zero out the index growth
++ * committed index ('c->bi.old_idx_sz') and zero out the index growth
+ * budget. It is OK to do this now, because we've reserved all the
+ * space which is needed to commit the index, and it is save for the
+ * budgeting subsystem to assume the index is already committed,
+ * even though it is not.
+ */
+- ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c));
+- c->old_idx_sz = c->calc_idx_sz;
+- c->budg_uncommitted_idx = 0;
+- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
++ ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
++ c->bi.old_idx_sz = c->calc_idx_sz;
++ c->bi.uncommitted_idx = 0;
++ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+ spin_unlock(&c->space_lock);
+ mutex_unlock(&c->tnc_mutex);
+
+diff -uprN linux-2.6.28/fs/ubifs/ubifs.h ubifs-v2.6.28/fs/ubifs/ubifs.h
+--- linux-2.6.28/fs/ubifs/ubifs.h 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/ubifs.h 2011-06-15 14:22:09.000000000 -0400
+@@ -105,12 +105,10 @@
+ /* Number of non-data journal heads */
+ #define NONDATA_JHEADS_CNT 2
+
+-/* Garbage collector head */
+-#define GCHD 0
+-/* Base journal head number */
+-#define BASEHD 1
+-/* First "general purpose" journal head */
+-#define DATAHD 2
++/* Shorter names for journal head numbers for internal usage */
++#define GCHD UBIFS_GC_HEAD
++#define BASEHD UBIFS_BASE_HEAD
++#define DATAHD UBIFS_DATA_HEAD
+
+ /* 'No change' value for 'ubifs_change_lp()' */
+ #define LPROPS_NC 0x80000001
+@@ -120,8 +118,12 @@
+ * in TNC. However, when replaying, it is handy to introduce fake "truncation"
+ * keys for truncation nodes because the code becomes simpler. So we define
+ * %UBIFS_TRUN_KEY type.
++ *
++ * But otherwise, out of the journal reply scope, the truncation keys are
++ * invalid.
+ */
+-#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT
++#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT
++#define UBIFS_INVALID_KEY UBIFS_KEY_TYPES_CNT
+
+ /*
+ * How much a directory entry/extended attribute entry adds to the parent/host
+@@ -148,6 +150,12 @@
+ */
+ #define WORST_COMPR_FACTOR 2
+
++/*
++ * How much memory is needed for a buffer where we comress a data node.
++ */
++#define COMPRESSED_DATA_NODE_BUF_SZ \
++ (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR)
++
+ /* Maximum expected tree height for use by bottom_up_buf */
+ #define BOTTOM_UP_HEIGHT 64
+
+@@ -643,6 +651,7 @@ typedef int (*ubifs_lpt_scan_callback)(s
+ * @offs: write-buffer offset in this logical eraseblock
+ * @avail: number of bytes available in the write-buffer
+ * @used: number of used bytes in the write-buffer
++ * @size: write-buffer size (in [@c->min_io_size, @c->max_write_size] range)
+ * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM,
+ * %UBI_UNKNOWN)
+ * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep
+@@ -677,6 +686,7 @@ struct ubifs_wbuf {
+ int offs;
+ int avail;
+ int used;
++ int size;
+ int dtype;
+ int jhead;
+ int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad);
+@@ -711,12 +721,14 @@ struct ubifs_bud {
+ * struct ubifs_jhead - journal head.
+ * @wbuf: head's write-buffer
+ * @buds_list: list of bud LEBs belonging to this journal head
++ * @grouped: non-zero if UBIFS groups nodes when writing to this journal head
+ *
+ * Note, the @buds list is protected by the @c->buds_lock.
+ */
+ struct ubifs_jhead {
+ struct ubifs_wbuf wbuf;
+ struct list_head buds_list;
++ unsigned int grouped:1;
+ };
+
+ /**
+@@ -926,6 +938,40 @@ struct ubifs_mount_opts {
+ unsigned int compr_type:2;
+ };
+
++/**
++ * struct ubifs_budg_info - UBIFS budgeting information.
++ * @idx_growth: amount of bytes budgeted for index growth
++ * @data_growth: amount of bytes budgeted for cached data
++ * @dd_growth: amount of bytes budgeted for cached data that will make
++ * other data dirty
++ * @uncommitted_idx: amount of bytes were budgeted for growth of the index, but
++ * which still have to be taken into account because the index
++ * has not been committed so far
++ * @old_idx_sz: size of index on flash
++ * @min_idx_lebs: minimum number of LEBs required for the index
++ * @nospace: non-zero if the file-system does not have flash space (used as
++ * optimization)
++ * @nospace_rp: the same as @nospace, but additionally means that even reserved
++ * pool is full
++ * @page_budget: budget for a page (constant, nenver changed after mount)
++ * @inode_budget: budget for an inode (constant, nenver changed after mount)
++ * @dent_budget: budget for a directory entry (constant, nenver changed after
++ * mount)
++ */
++struct ubifs_budg_info {
++ long long idx_growth;
++ long long data_growth;
++ long long dd_growth;
++ long long uncommitted_idx;
++ unsigned long long old_idx_sz;
++ int min_idx_lebs;
++ unsigned int nospace:1;
++ unsigned int nospace_rp:1;
++ int page_budget;
++ int inode_budget;
++ int dent_budget;
++};
++
+ struct ubifs_debug_info;
+
+ /**
+@@ -940,6 +986,7 @@ struct ubifs_debug_info;
+ * by @commit_sem
+ * @cnt_lock: protects @highest_inum and @max_sqnum counters
+ * @fmt_version: UBIFS on-flash format version
++ * @ro_compat_version: R/O compatibility version
+ * @uuid: UUID from super block
+ *
+ * @lhead_lnum: log head logical eraseblock number
+@@ -968,10 +1015,12 @@ struct ubifs_debug_info;
+ * @cmt_wq: wait queue to sleep on if the log is full and a commit is running
+ *
+ * @big_lpt: flag that LPT is too big to write whole during commit
++ * @space_fixup: flag indicating that free space in LEBs needs to be cleaned up
+ * @no_chk_data_crc: do not check CRCs when reading data nodes (except during
+ * recovery)
+ * @bulk_read: enable bulk-reads
+ * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
++ * @rw_incompat: the media is not R/W compatible
+ *
+ * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and
+ * @calc_idx_sz
+@@ -998,6 +1047,11 @@ struct ubifs_debug_info;
+ * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu
+ * @bu: pre-allocated bulk-read information
+ *
++ * @write_reserve_mutex: protects @write_reserve_buf
++ * @write_reserve_buf: on the write path we allocate memory, which might
++ * sometimes be unavailable, in which case we use this
++ * write reserve buffer
++ *
+ * @log_lebs: number of logical eraseblocks in the log
+ * @log_bytes: log size in bytes
+ * @log_last: last LEB of the log
+@@ -1019,43 +1073,34 @@ struct ubifs_debug_info;
+ *
+ * @min_io_size: minimal input/output unit size
+ * @min_io_shift: number of bits in @min_io_size minus one
++ * @max_write_size: maximum amount of bytes the underlying flash can write at a
++ * time (MTD write buffer size)
++ * @max_write_shift: number of bits in @max_write_size minus one
+ * @leb_size: logical eraseblock size in bytes
++ * @leb_start: starting offset of logical eraseblocks within physical
++ * eraseblocks
+ * @half_leb_size: half LEB size
++ * @idx_leb_size: how many bytes of an LEB are effectively available when it is
++ * used to store indexing nodes (@leb_size - @max_idx_node_sz)
+ * @leb_cnt: count of logical eraseblocks
+ * @max_leb_cnt: maximum count of logical eraseblocks
+ * @old_leb_cnt: count of logical eraseblocks before re-size
+ * @ro_media: the underlying UBI volume is read-only
++ * @ro_mount: the file-system was mounted as read-only
++ * @ro_error: UBIFS switched to R/O mode because an error happened
+ *
+ * @dirty_pg_cnt: number of dirty pages (not used)
+ * @dirty_zn_cnt: number of dirty znodes
+ * @clean_zn_cnt: number of clean znodes
+ *
+- * @budg_idx_growth: amount of bytes budgeted for index growth
+- * @budg_data_growth: amount of bytes budgeted for cached data
+- * @budg_dd_growth: amount of bytes budgeted for cached data that will make
+- * other data dirty
+- * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index,
+- * but which still have to be taken into account because
+- * the index has not been committed so far
+- * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth,
+- * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst,
+- * @nospace, and @nospace_rp;
+- * @min_idx_lebs: minimum number of LEBs required for the index
+- * @old_idx_sz: size of index on flash
++ * @space_lock: protects @bi and @lst
++ * @lst: lprops statistics
++ * @bi: budgeting information
+ * @calc_idx_sz: temporary variable which is used to calculate new index size
+ * (contains accurate new index size at end of TNC commit start)
+- * @lst: lprops statistics
+- * @nospace: non-zero if the file-system does not have flash space (used as
+- * optimization)
+- * @nospace_rp: the same as @nospace, but additionally means that even reserved
+- * pool is full
+- *
+- * @page_budget: budget for a page
+- * @inode_budget: budget for an inode
+- * @dent_budget: budget for a directory entry
+ *
+ * @ref_node_alsz: size of the LEB reference node aligned to the min. flash
+- * I/O unit
++ * I/O unit
+ * @mst_node_alsz: master node aligned size
+ * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary
+ * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary
+@@ -1138,8 +1183,8 @@ struct ubifs_debug_info;
+ * previous commit start
+ * @uncat_list: list of un-categorized LEBs
+ * @empty_list: list of empty LEBs
+- * @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size)
+- * @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size)
++ * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size)
++ * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size)
+ * @freeable_cnt: number of freeable LEBs in @freeable_list
+ *
+ * @ltab_lnum: LEB number of LPT's own lprops table
+@@ -1157,19 +1202,20 @@ struct ubifs_debug_info;
+ * @rp_uid: reserved pool user ID
+ * @rp_gid: reserved pool group ID
+ *
+- * @empty: if the UBI device is empty
+- * @replay_tree: temporary tree used during journal replay
++ * @empty: %1 if the UBI device is empty
++ * @need_recovery: %1 if the file-system needs recovery
++ * @replaying: %1 during journal replay
++ * @mounting: %1 while mounting
++ * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode
+ * @replay_list: temporary list used during journal replay
+ * @replay_buds: list of buds to replay
+ * @cs_sqnum: sequence number of first node in the log (commit start node)
+ * @replay_sqnum: sequence number of node currently being replayed
+- * @need_recovery: file-system needs recovery
+- * @replaying: set to %1 during journal replay
+- * @unclean_leb_list: LEBs to recover when mounting ro to rw
+- * @rcvrd_mst_node: recovered master node to write when mounting ro to rw
++ * @unclean_leb_list: LEBs to recover when re-mounting R/O mounted FS to R/W
++ * mode
++ * @rcvrd_mst_node: recovered master node to write when re-mounting R/O mounted
++ * FS to R/W mode
+ * @size_tree: inode size information for recovery
+- * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY)
+- * @always_chk_crc: always check CRCs (while mounting and remounting rw)
+ * @mount_opts: UBIFS-specific mount options
+ *
+ * @dbg: debugging-related information
+@@ -1183,6 +1229,7 @@ struct ubifs_info {
+ unsigned long long cmt_no;
+ spinlock_t cnt_lock;
+ int fmt_version;
++ int ro_compat_version;
+ unsigned char uuid[16];
+
+ int lhead_lnum;
+@@ -1208,9 +1255,11 @@ struct ubifs_info {
+ wait_queue_head_t cmt_wq;
+
+ unsigned int big_lpt:1;
++ unsigned int space_fixup:1;
+ unsigned int no_chk_data_crc:1;
+ unsigned int bulk_read:1;
+ unsigned int default_compr:2;
++ unsigned int rw_incompat:1;
+
+ struct mutex tnc_mutex;
+ struct ubifs_zbranch zroot;
+@@ -1236,6 +1285,9 @@ struct ubifs_info {
+ struct mutex bu_mutex;
+ struct bu_info bu;
+
++ struct mutex write_reserve_mutex;
++ void *write_reserve_buf;
++
+ int log_lebs;
+ long long log_bytes;
+ int log_last;
+@@ -1257,32 +1309,27 @@ struct ubifs_info {
+
+ int min_io_size;
+ int min_io_shift;
++ int max_write_size;
++ int max_write_shift;
+ int leb_size;
++ int leb_start;
+ int half_leb_size;
++ int idx_leb_size;
+ int leb_cnt;
+ int max_leb_cnt;
+ int old_leb_cnt;
+- int ro_media;
++ unsigned int ro_media:1;
++ unsigned int ro_mount:1;
++ unsigned int ro_error:1;
+
+ atomic_long_t dirty_pg_cnt;
+ atomic_long_t dirty_zn_cnt;
+ atomic_long_t clean_zn_cnt;
+
+- long long budg_idx_growth;
+- long long budg_data_growth;
+- long long budg_dd_growth;
+- long long budg_uncommitted_idx;
+ spinlock_t space_lock;
+- int min_idx_lebs;
+- unsigned long long old_idx_sz;
+- unsigned long long calc_idx_sz;
+ struct ubifs_lp_stats lst;
+- unsigned int nospace:1;
+- unsigned int nospace_rp:1;
+-
+- int page_budget;
+- int inode_budget;
+- int dent_budget;
++ struct ubifs_budg_info bi;
++ unsigned long long calc_idx_sz;
+
+ int ref_node_alsz;
+ int mst_node_alsz;
+@@ -1385,19 +1432,18 @@ struct ubifs_info {
+ gid_t rp_gid;
+
+ /* The below fields are used only during mounting and re-mounting */
+- int empty;
+- struct rb_root replay_tree;
++ unsigned int empty:1;
++ unsigned int need_recovery:1;
++ unsigned int replaying:1;
++ unsigned int mounting:1;
++ unsigned int remounting_rw:1;
+ struct list_head replay_list;
+ struct list_head replay_buds;
+ unsigned long long cs_sqnum;
+ unsigned long long replay_sqnum;
+- int need_recovery;
+- int replaying;
+ struct list_head unclean_leb_list;
+ struct ubifs_mst_node *rcvrd_mst_node;
+ struct rb_root size_tree;
+- int remounting_rw;
+- int always_chk_crc;
+ struct ubifs_mount_opts mount_opts;
+
+ #ifdef CONFIG_UBIFS_FS_DEBUG
+@@ -1444,7 +1490,7 @@ int ubifs_sync_wbufs_by_inode(struct ubi
+
+ /* scan.c */
+ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
+- int offs, void *sbuf);
++ int offs, void *sbuf, int quiet);
+ void ubifs_scan_destroy(struct ubifs_scan_leb *sleb);
+ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
+ int offs, int quiet);
+@@ -1506,7 +1552,7 @@ long long ubifs_reported_space(const str
+ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs);
+
+ /* find.c */
+-int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
++int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
+ int squeeze);
+ int ubifs_find_free_leb_for_idx(struct ubifs_info *c);
+ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
+@@ -1588,6 +1634,7 @@ int ubifs_write_master(struct ubifs_info
+ int ubifs_read_superblock(struct ubifs_info *c);
+ struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c);
+ int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup);
++int ubifs_fixup_free_space(struct ubifs_info *c);
+
+ /* replay.c */
+ int ubifs_validate_entry(struct ubifs_info *c,
+@@ -1669,6 +1716,7 @@ const struct ubifs_lprops *ubifs_fast_fi
+ const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c);
+ const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c);
+ const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c);
++int ubifs_calc_dark(const struct ubifs_info *c, int spc);
+
+ /* file.c */
+ int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync);
+@@ -1695,7 +1743,7 @@ struct inode *ubifs_iget(struct super_bl
+ int ubifs_recover_master_node(struct ubifs_info *c);
+ int ubifs_write_rcvrd_mst_node(struct ubifs_info *c);
+ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
+- int offs, void *sbuf, int grouped);
++ int offs, void *sbuf, int jhead);
+ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
+ int offs, void *sbuf);
+ int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf);
+diff -uprN linux-2.6.28/fs/ubifs/ubifs-media.h ubifs-v2.6.28/fs/ubifs/ubifs-media.h
+--- linux-2.6.28/fs/ubifs/ubifs-media.h 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/fs/ubifs/ubifs-media.h 2011-06-15 14:22:09.000000000 -0400
+@@ -36,9 +36,31 @@
+ /* UBIFS node magic number (must not have the padding byte first or last) */
+ #define UBIFS_NODE_MAGIC 0x06101831
+
+-/* UBIFS on-flash format version */
++/*
++ * UBIFS on-flash format version. This version is increased when the on-flash
++ * format is changing. If this happens, UBIFS is will support older versions as
++ * well. But older UBIFS code will not support newer formats. Format changes
++ * will be rare and only when absolutely necessary, e.g. to fix a bug or to add
++ * a new feature.
++ *
++ * UBIFS went into mainline kernel with format version 4. The older formats
++ * were development formats.
++ */
+ #define UBIFS_FORMAT_VERSION 4
+
++/*
++ * Read-only compatibility version. If the UBIFS format is changed, older UBIFS
++ * implementations will not be able to mount newer formats in read-write mode.
++ * However, depending on the change, it may be possible to mount newer formats
++ * in R/O mode. This is indicated by the R/O compatibility version which is
++ * stored in the super-block.
++ *
++ * This is needed to support boot-loaders which only need R/O mounting. With
++ * this flag it is possible to do UBIFS format changes without a need to update
++ * boot-loaders.
++ */
++#define UBIFS_RO_COMPAT_VERSION 0
++
+ /* Minimum logical eraseblock size in bytes */
+ #define UBIFS_MIN_LEB_SZ (15*1024)
+
+@@ -53,7 +75,7 @@
+
+ /*
+ * If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes
+- * shorter than uncompressed data length, UBIFS preferes to leave this data
++ * shorter than uncompressed data length, UBIFS prefers to leave this data
+ * node uncompress, because it'll be read faster.
+ */
+ #define UBIFS_MIN_COMPRESS_DIFF 64
+@@ -113,6 +135,13 @@
+ /* The key is always at the same position in all keyed nodes */
+ #define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key)
+
++/* Garbage collector journal head number */
++#define UBIFS_GC_HEAD 0
++/* Base journal head number */
++#define UBIFS_BASE_HEAD 1
++/* Data journal head number */
++#define UBIFS_DATA_HEAD 2
++
+ /*
+ * LEB Properties Tree node types.
+ *
+@@ -303,14 +332,12 @@ enum {
+ * UBIFS_COMPR_NONE: no compression
+ * UBIFS_COMPR_LZO: LZO compression
+ * UBIFS_COMPR_ZLIB: ZLIB compression
+- * UBIFS_COMPR_LZO999: LZO999 compression
+ * UBIFS_COMPR_TYPES_CNT: count of supported compression types
+ */
+ enum {
+ UBIFS_COMPR_NONE,
+ UBIFS_COMPR_LZO,
+ UBIFS_COMPR_ZLIB,
+- UBIFS_COMPR_LZO999,
+ UBIFS_COMPR_TYPES_CNT,
+ };
+
+@@ -381,9 +408,11 @@ enum {
+ * Superblock flags.
+ *
+ * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set
++ * UBIFS_FLG_SPACE_FIXUP: first-mount "fixup" of free space within LEBs needed
+ */
+ enum {
+ UBIFS_FLG_BIGLPT = 0x02,
++ UBIFS_FLG_SPACE_FIXUP = 0x04,
+ };
+
+ /**
+@@ -407,7 +436,7 @@ struct ubifs_ch {
+ __u8 node_type;
+ __u8 group_type;
+ __u8 padding[2];
+-} __attribute__ ((packed));
++} __packed;
+
+ /**
+ * union ubifs_dev_desc - device node descriptor.
+@@ -421,7 +450,7 @@ struct ubifs_ch {
+ union ubifs_dev_desc {
+ __le32 new;
+ __le64 huge;
+-} __attribute__ ((packed));
++} __packed;
+
+ /**
+ * struct ubifs_ino_node - inode node.
+@@ -482,7 +511,7 @@ struct ubifs_ino_node {
+ __le16 compr_type;
+ __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */
+ __u8 data[];
+-} __attribute__ ((packed));
++} __packed;
+
+ /**
+ * struct ubifs_dent_node - directory entry node.
+@@ -507,7 +536,7 @@ struct ubifs_dent_node {
+ __le16 nlen;
+ __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */
+ __u8 name[];
+-} __attribute__ ((packed));
++} __packed;
+
+ /**
+ * struct ubifs_data_node - data node.
+@@ -528,7 +557,7 @@ struct ubifs_data_node {
+ __le16 compr_type;
+ __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */
+ __u8 data[];
+-} __attribute__ ((packed));
++} __packed;
+
+ /**
+ * struct ubifs_trun_node - truncation node.
+@@ -548,7 +577,7 @@ struct ubifs_trun_node {
+ __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */
+ __le64 old_size;
+ __le64 new_size;
+-} __attribute__ ((packed));
++} __packed;
+
+ /**
+ * struct ubifs_pad_node - padding node.
+@@ -559,7 +588,7 @@ struct ubifs_trun_node {
+ struct ubifs_pad_node {
+ struct ubifs_ch ch;
+ __le32 pad_len;
+-} __attribute__ ((packed));
++} __packed;
+
+ /**
+ * struct ubifs_sb_node - superblock node.
+@@ -588,6 +617,7 @@ struct ubifs_pad_node {
+ * @padding2: reserved for future, zeroes
+ * @time_gran: time granularity in nanoseconds
+ * @uuid: UUID generated when the file system image was created
++ * @ro_compat_version: UBIFS R/O compatibility version
+ */
+ struct ubifs_sb_node {
+ struct ubifs_ch ch;
+@@ -614,8 +644,9 @@ struct ubifs_sb_node {
+ __le64 rp_size;
+ __le32 time_gran;
+ __u8 uuid[16];
+- __u8 padding2[3972];
+-} __attribute__ ((packed));
++ __le32 ro_compat_version;
++ __u8 padding2[3968];
++} __packed;
+
+ /**
+ * struct ubifs_mst_node - master node.
+@@ -682,7 +713,7 @@ struct ubifs_mst_node {
+ __le32 idx_lebs;
+ __le32 leb_cnt;
+ __u8 padding[344];
+-} __attribute__ ((packed));
++} __packed;
+
+ /**
+ * struct ubifs_ref_node - logical eraseblock reference node.
+@@ -698,7 +729,7 @@ struct ubifs_ref_node {
+ __le32 offs;
+ __le32 jhead;
+ __u8 padding[28];
+-} __attribute__ ((packed));
++} __packed;
+
+ /**
+ * struct ubifs_branch - key/reference/length branch
+@@ -712,7 +743,7 @@ struct ubifs_branch {
+ __le32 offs;
+ __le32 len;
+ __u8 key[];
+-} __attribute__ ((packed));
++} __packed;
+
+ /**
+ * struct ubifs_idx_node - indexing node.
+@@ -726,7 +757,7 @@ struct ubifs_idx_node {
+ __le16 child_cnt;
+ __le16 level;
+ __u8 branches[];
+-} __attribute__ ((packed));
++} __packed;
+
+ /**
+ * struct ubifs_cs_node - commit start node.
+@@ -736,7 +767,7 @@ struct ubifs_idx_node {
+ struct ubifs_cs_node {
+ struct ubifs_ch ch;
+ __le64 cmt_no;
+-} __attribute__ ((packed));
++} __packed;
+
+ /**
+ * struct ubifs_orph_node - orphan node.
+@@ -748,6 +779,6 @@ struct ubifs_orph_node {
+ struct ubifs_ch ch;
+ __le64 cmt_no;
+ __le64 inos[];
+-} __attribute__ ((packed));
++} __packed;
+
+ #endif /* __UBIFS_MEDIA_H__ */
+diff -uprN linux-2.6.28/fs/ubifs/xattr.c ubifs-v2.6.28/fs/ubifs/xattr.c
+--- linux-2.6.28/fs/ubifs/xattr.c 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/fs/ubifs/xattr.c 2011-06-15 14:22:09.000000000 -0400
+@@ -78,9 +78,9 @@ enum {
+ SECURITY_XATTR,
+ };
+
+-static struct inode_operations none_inode_operations;
+-static struct address_space_operations none_address_operations;
+-static struct file_operations none_file_operations;
++static const struct inode_operations empty_iops;
++static const struct file_operations empty_fops;
++static struct address_space_operations empty_aops;
+
+ /**
+ * create_xattr - create an extended attribute.
+@@ -129,9 +129,9 @@ static int create_xattr(struct ubifs_inf
+ }
+
+ /* Re-define all operations to be "nothing" */
+- inode->i_mapping->a_ops = &none_address_operations;
+- inode->i_op = &none_inode_operations;
+- inode->i_fop = &none_file_operations;
++ inode->i_mapping->a_ops = &empty_aops;
++ inode->i_op = &empty_iops;
++ inode->i_fop = &empty_fops;
+
+ inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA;
+ ui = ubifs_inode(inode);
+diff -uprN linux-2.6.28/include/linux/kernel.h ubifs-v2.6.28/include/linux/kernel.h
+--- linux-2.6.28/include/linux/kernel.h 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/include/linux/kernel.h 2011-06-15 14:22:09.000000000 -0400
+@@ -45,6 +45,16 @@ extern const char linux_proc_banner[];
+
+ #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
+
++/*
++ * This looks more complex than it should be. But we need to
++ * get the type for the ~ right in round_down (it needs to be
++ * as wide as the result!), and we want to evaluate the macro
++ * arguments just once each.
++ */
++#define __round_mask(x, y) ((__typeof__(x))((y)-1))
++#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
++#define round_down(x, y) ((x) & ~__round_mask(x, y))
++
+ #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+ #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+ #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
+diff -uprN linux-2.6.28/include/linux/list_sort.h ubifs-v2.6.28/include/linux/list_sort.h
+--- linux-2.6.28/include/linux/list_sort.h 1969-12-31 19:00:00.000000000 -0500
++++ ubifs-v2.6.28/include/linux/list_sort.h 2011-06-15 14:22:09.000000000 -0400
+@@ -0,0 +1,11 @@
++#ifndef _LINUX_LIST_SORT_H
++#define _LINUX_LIST_SORT_H
++
++#include <linux/types.h>
++
++struct list_head;
++
++void list_sort(void *priv, struct list_head *head,
++ int (*cmp)(void *priv, struct list_head *a,
++ struct list_head *b));
++#endif
+diff -uprN linux-2.6.28/include/linux/mtd/mtd.h ubifs-v2.6.28/include/linux/mtd/mtd.h
+--- linux-2.6.28/include/linux/mtd/mtd.h 2011-06-15 15:12:27.000000000 -0400
++++ ubifs-v2.6.28/include/linux/mtd/mtd.h 2011-06-15 15:16:03.000000000 -0400
+@@ -117,6 +117,17 @@ struct mtd_info {
+ */
+ u_int32_t writesize;
+
++ /*
++ * Size of the write buffer used by the MTD. MTD devices having a write
++ * buffer can write multiple writesize chunks at a time. E.g. while
++ * writing 4 * writesize bytes to a device with 2 * writesize bytes
++ * buffer the MTD driver can (but doesn't have to) do 2 writesize
++ * operations, but not 4. Currently, all NANDs have writebufsize
++ * equivalent to writesize (NAND page size). Some NOR flashes do have
++ * writebufsize greater than writesize.
++ */
++ uint32_t writebufsize;
++
+ u_int32_t oobsize; // Amount of OOB data per block (e.g. 16)
+ u_int32_t oobavail; // Available OOB bytes per block
+
+diff -uprN linux-2.6.28/include/linux/mtd/ubi.h ubifs-v2.6.28/include/linux/mtd/ubi.h
+--- linux-2.6.28/include/linux/mtd/ubi.h 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/include/linux/mtd/ubi.h 2011-06-15 14:22:09.000000000 -0400
+@@ -21,7 +21,7 @@
+ #ifndef __LINUX_UBI_H__
+ #define __LINUX_UBI_H__
+
+-#include <asm/ioctl.h>
++#include <linux/ioctl.h>
+ #include <linux/types.h>
+ #include <mtd/ubi-user.h>
+
+@@ -87,7 +87,7 @@ enum {
+ * physical eraseblock size and on how much bytes UBI headers consume. But
+ * because of the volume alignment (@alignment), the usable size of logical
+ * eraseblocks if a volume may be less. The following equation is true:
+- * @usable_leb_size = LEB size - (LEB size mod @alignment),
++ * @usable_leb_size = LEB size - (LEB size mod @alignment),
+ * where LEB size is the logical eraseblock size defined by the UBI device.
+ *
+ * The alignment is multiple to the minimal flash input/output unit size or %1
+@@ -116,22 +116,77 @@ struct ubi_volume_info {
+ * struct ubi_device_info - UBI device description data structure.
+ * @ubi_num: ubi device number
+ * @leb_size: logical eraseblock size on this UBI device
++ * @leb_start: starting offset of logical eraseblocks within physical
++ * eraseblocks
+ * @min_io_size: minimal I/O unit size
++ * @max_write_size: maximum amount of bytes the underlying flash can write at a
++ * time (MTD write buffer size)
+ * @ro_mode: if this device is in read-only mode
+ * @cdev: UBI character device major and minor numbers
+ *
+ * Note, @leb_size is the logical eraseblock size offered by the UBI device.
+ * Volumes of this UBI device may have smaller logical eraseblock size if their
+ * alignment is not equivalent to %1.
++ *
++ * The @max_write_size field describes flash write maximum write unit. For
++ * example, NOR flash allows for changing individual bytes, so @min_io_size is
++ * %1. However, it does not mean than NOR flash has to write data byte-by-byte.
++ * Instead, CFI NOR flashes have a write-buffer of, e.g., 64 bytes, and when
++ * writing large chunks of data, they write 64-bytes at a time. Obviously, this
++ * improves write throughput.
++ *
++ * Also, the MTD device may have N interleaved (striped) flash chips
++ * underneath, in which case @min_io_size can be physical min. I/O size of
++ * single flash chip, while @max_write_size can be N * @min_io_size.
++ *
++ * The @max_write_size field is always greater or equivalent to @min_io_size.
++ * E.g., some NOR flashes may have (@min_io_size = 1, @max_write_size = 64). In
++ * contrast, NAND flashes usually have @min_io_size = @max_write_size = NAND
++ * page size.
+ */
+ struct ubi_device_info {
+ int ubi_num;
+ int leb_size;
++ int leb_start;
+ int min_io_size;
++ int max_write_size;
+ int ro_mode;
+ dev_t cdev;
+ };
+
++/*
++ * enum - volume notification types.
++ * @UBI_VOLUME_ADDED: volume has been added
++ * @UBI_VOLUME_REMOVED: start volume volume
++ * @UBI_VOLUME_RESIZED: volume size has been re-sized
++ * @UBI_VOLUME_RENAMED: volume name has been re-named
++ * @UBI_VOLUME_UPDATED: volume name has been updated
++ *
++ * These constants define which type of event has happened when a volume
++ * notification function is invoked.
++ */
++enum {
++ UBI_VOLUME_ADDED,
++ UBI_VOLUME_REMOVED,
++ UBI_VOLUME_RESIZED,
++ UBI_VOLUME_RENAMED,
++ UBI_VOLUME_UPDATED,
++};
++
++/*
++ * struct ubi_notification - UBI notification description structure.
++ * @di: UBI device description object
++ * @vi: UBI volume description object
++ *
++ * UBI notifiers are called with a pointer to an object of this type. The
++ * object describes the notification. Namely, it provides a description of the
++ * UBI device and UBI volume the notification informs about.
++ */
++struct ubi_notification {
++ struct ubi_device_info di;
++ struct ubi_volume_info vi;
++};
++
+ /* UBI descriptor given to users when they open UBI volumes */
+ struct ubi_volume_desc;
+
+@@ -141,6 +196,12 @@ void ubi_get_volume_info(struct ubi_volu
+ struct ubi_volume_desc *ubi_open_volume(int ubi_num, int vol_id, int mode);
+ struct ubi_volume_desc *ubi_open_volume_nm(int ubi_num, const char *name,
+ int mode);
++struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode);
++
++int ubi_register_volume_notifier(struct notifier_block *nb,
++ int ignore_existing);
++int ubi_unregister_volume_notifier(struct notifier_block *nb);
++
+ void ubi_close_volume(struct ubi_volume_desc *desc);
+ int ubi_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
+ int len, int check);
+diff -uprN linux-2.6.28/include/mtd/ubi-user.h ubifs-v2.6.28/include/mtd/ubi-user.h
+--- linux-2.6.28/include/mtd/ubi-user.h 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/include/mtd/ubi-user.h 2011-06-15 14:22:09.000000000 -0400
+@@ -40,37 +40,37 @@
+ * UBI volume creation
+ * ~~~~~~~~~~~~~~~~~~~
+ *
+- * UBI volumes are created via the %UBI_IOCMKVOL IOCTL command of UBI character
++ * UBI volumes are created via the %UBI_IOCMKVOL ioctl command of UBI character
+ * device. A &struct ubi_mkvol_req object has to be properly filled and a
+- * pointer to it has to be passed to the IOCTL.
++ * pointer to it has to be passed to the ioctl.
+ *
+ * UBI volume deletion
+ * ~~~~~~~~~~~~~~~~~~~
+ *
+- * To delete a volume, the %UBI_IOCRMVOL IOCTL command of the UBI character
++ * To delete a volume, the %UBI_IOCRMVOL ioctl command of the UBI character
+ * device should be used. A pointer to the 32-bit volume ID hast to be passed
+- * to the IOCTL.
++ * to the ioctl.
+ *
+ * UBI volume re-size
+ * ~~~~~~~~~~~~~~~~~~
+ *
+- * To re-size a volume, the %UBI_IOCRSVOL IOCTL command of the UBI character
++ * To re-size a volume, the %UBI_IOCRSVOL ioctl command of the UBI character
+ * device should be used. A &struct ubi_rsvol_req object has to be properly
+- * filled and a pointer to it has to be passed to the IOCTL.
++ * filled and a pointer to it has to be passed to the ioctl.
+ *
+ * UBI volumes re-name
+ * ~~~~~~~~~~~~~~~~~~~
+ *
+ * To re-name several volumes atomically at one go, the %UBI_IOCRNVOL command
+ * of the UBI character device should be used. A &struct ubi_rnvol_req object
+- * has to be properly filled and a pointer to it has to be passed to the IOCTL.
++ * has to be properly filled and a pointer to it has to be passed to the ioctl.
+ *
+ * UBI volume update
+ * ~~~~~~~~~~~~~~~~~
+ *
+- * Volume update should be done via the %UBI_IOCVOLUP IOCTL command of the
++ * Volume update should be done via the %UBI_IOCVOLUP ioctl command of the
+ * corresponding UBI volume character device. A pointer to a 64-bit update
+- * size should be passed to the IOCTL. After this, UBI expects user to write
++ * size should be passed to the ioctl. After this, UBI expects user to write
+ * this number of bytes to the volume character device. The update is finished
+ * when the claimed number of bytes is passed. So, the volume update sequence
+ * is something like:
+@@ -80,14 +80,58 @@
+ * write(fd, buf, image_size);
+ * close(fd);
+ *
+- * Atomic eraseblock change
++ * Logical eraseblock erase
+ * ~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+- * Atomic eraseblock change operation is done via the %UBI_IOCEBCH IOCTL
+- * command of the corresponding UBI volume character device. A pointer to
+- * &struct ubi_leb_change_req has to be passed to the IOCTL. Then the user is
+- * expected to write the requested amount of bytes. This is similar to the
+- * "volume update" IOCTL.
++ * To erase a logical eraseblock, the %UBI_IOCEBER ioctl command of the
++ * corresponding UBI volume character device should be used. This command
++ * unmaps the requested logical eraseblock, makes sure the corresponding
++ * physical eraseblock is successfully erased, and returns.
++ *
++ * Atomic logical eraseblock change
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ *
++ * Atomic logical eraseblock change operation is called using the %UBI_IOCEBCH
++ * ioctl command of the corresponding UBI volume character device. A pointer to
++ * a &struct ubi_leb_change_req object has to be passed to the ioctl. Then the
++ * user is expected to write the requested amount of bytes (similarly to what
++ * should be done in case of the "volume update" ioctl).
++ *
++ * Logical eraseblock map
++ * ~~~~~~~~~~~~~~~~~~~~~
++ *
++ * To map a logical eraseblock to a physical eraseblock, the %UBI_IOCEBMAP
++ * ioctl command should be used. A pointer to a &struct ubi_map_req object is
++ * expected to be passed. The ioctl maps the requested logical eraseblock to
++ * a physical eraseblock and returns. Only non-mapped logical eraseblocks can
++ * be mapped. If the logical eraseblock specified in the request is already
++ * mapped to a physical eraseblock, the ioctl fails and returns error.
++ *
++ * Logical eraseblock unmap
++ * ~~~~~~~~~~~~~~~~~~~~~~~~
++ *
++ * To unmap a logical eraseblock to a physical eraseblock, the %UBI_IOCEBUNMAP
++ * ioctl command should be used. The ioctl unmaps the logical eraseblocks,
++ * schedules corresponding physical eraseblock for erasure, and returns. Unlike
++ * the "LEB erase" command, it does not wait for the physical eraseblock being
++ * erased. Note, the side effect of this is that if an unclean reboot happens
++ * after the unmap ioctl returns, you may find the LEB mapped again to the same
++ * physical eraseblock after the UBI is run again.
++ *
++ * Check if logical eraseblock is mapped
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ *
++ * To check if a logical eraseblock is mapped to a physical eraseblock, the
++ * %UBI_IOCEBISMAP ioctl command should be used. It returns %0 if the LEB is
++ * not mapped, and %1 if it is mapped.
++ *
++ * Set an UBI volume property
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~
++ *
++ * To set an UBI volume property the %UBI_IOCSETPROP ioctl command should be
++ * used. A pointer to a &struct ubi_set_vol_prop_req object is expected to be
++ * passed. The object describes which property should be set, and to which value
++ * it should be set.
+ */
+
+ /*
+@@ -101,7 +145,7 @@
+ /* Maximum volume name length */
+ #define UBI_MAX_VOLUME_NAME 127
+
+-/* IOCTL commands of UBI character devices */
++/* ioctl commands of UBI character devices */
+
+ #define UBI_IOC_MAGIC 'o'
+
+@@ -114,7 +158,7 @@
+ /* Re-name volumes */
+ #define UBI_IOCRNVOL _IOW(UBI_IOC_MAGIC, 3, struct ubi_rnvol_req)
+
+-/* IOCTL commands of the UBI control character device */
++/* ioctl commands of the UBI control character device */
+
+ #define UBI_CTRL_IOC_MAGIC 'o'
+
+@@ -123,16 +167,25 @@
+ /* Detach an MTD device */
+ #define UBI_IOCDET _IOW(UBI_CTRL_IOC_MAGIC, 65, int32_t)
+
+-/* IOCTL commands of UBI volume character devices */
++/* ioctl commands of UBI volume character devices */
+
+ #define UBI_VOL_IOC_MAGIC 'O'
+
+ /* Start UBI volume update */
+ #define UBI_IOCVOLUP _IOW(UBI_VOL_IOC_MAGIC, 0, int64_t)
+-/* An eraseblock erasure command, used for debugging, disabled by default */
++/* LEB erasure command, used for debugging, disabled by default */
+ #define UBI_IOCEBER _IOW(UBI_VOL_IOC_MAGIC, 1, int32_t)
+-/* An atomic eraseblock change command */
++/* Atomic LEB change command */
+ #define UBI_IOCEBCH _IOW(UBI_VOL_IOC_MAGIC, 2, int32_t)
++/* Map LEB command */
++#define UBI_IOCEBMAP _IOW(UBI_VOL_IOC_MAGIC, 3, struct ubi_map_req)
++/* Unmap LEB command */
++#define UBI_IOCEBUNMAP _IOW(UBI_VOL_IOC_MAGIC, 4, int32_t)
++/* Check if LEB is mapped command */
++#define UBI_IOCEBISMAP _IOR(UBI_VOL_IOC_MAGIC, 5, int32_t)
++/* Set an UBI volume property */
++#define UBI_IOCSETVOLPROP _IOW(UBI_VOL_IOC_MAGIC, 6, \
++ struct ubi_set_vol_prop_req)
+
+ /* Maximum MTD device name length supported by UBI */
+ #define MAX_UBI_MTD_NAME_LEN 127
+@@ -168,6 +221,17 @@ enum {
+ UBI_STATIC_VOLUME = 4,
+ };
+
++/*
++ * UBI set volume property ioctl constants.
++ *
++ * @UBI_VOL_PROP_DIRECT_WRITE: allow (any non-zero value) or disallow (value 0)
++ * user to directly write and erase individual
++ * eraseblocks on dynamic volumes
++ */
++enum {
++ UBI_VOL_PROP_DIRECT_WRITE = 1,
++};
++
+ /**
+ * struct ubi_attach_req - attach MTD device request.
+ * @ubi_num: UBI device number to create
+@@ -244,7 +308,7 @@ struct ubi_mkvol_req {
+ int16_t name_len;
+ int8_t padding2[4];
+ char name[UBI_MAX_VOLUME_NAME + 1];
+-} __attribute__ ((packed));
++} __packed;
+
+ /**
+ * struct ubi_rsvol_req - a data structure used in volume re-size requests.
+@@ -260,7 +324,7 @@ struct ubi_mkvol_req {
+ struct ubi_rsvol_req {
+ int64_t bytes;
+ int32_t vol_id;
+-} __attribute__ ((packed));
++} __packed;
+
+ /**
+ * struct ubi_rnvol_req - volumes re-name request.
+@@ -302,11 +366,11 @@ struct ubi_rnvol_req {
+ int8_t padding2[2];
+ char name[UBI_MAX_VOLUME_NAME + 1];
+ } ents[UBI_MAX_RNVOL];
+-} __attribute__ ((packed));
++} __packed;
+
+ /**
+- * struct ubi_leb_change_req - a data structure used in atomic logical
+- * eraseblock change requests.
++ * struct ubi_leb_change_req - a data structure used in atomic LEB change
++ * requests.
+ * @lnum: logical eraseblock number to change
+ * @bytes: how many bytes will be written to the logical eraseblock
+ * @dtype: data type (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN)
+@@ -317,6 +381,32 @@ struct ubi_leb_change_req {
+ int32_t bytes;
+ int8_t dtype;
+ int8_t padding[7];
+-} __attribute__ ((packed));
++} __packed;
++
++/**
++ * struct ubi_map_req - a data structure used in map LEB requests.
++ * @lnum: logical eraseblock number to unmap
++ * @dtype: data type (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN)
++ * @padding: reserved for future, not used, has to be zeroed
++ */
++struct ubi_map_req {
++ int32_t lnum;
++ int8_t dtype;
++ int8_t padding[3];
++} __packed;
++
++
++/**
++ * struct ubi_set_vol_prop_req - a data structure used to set an UBI volume
++ * property.
++ * @property: property to set (%UBI_VOL_PROP_DIRECT_WRITE)
++ * @padding: reserved for future, not used, has to be zeroed
++ * @value: value to set
++ */
++struct ubi_set_vol_prop_req {
++ uint8_t property;
++ uint8_t padding[7];
++ uint64_t value;
++} __packed;
+
+ #endif /* __UBI_USER_H__ */
+diff -uprN linux-2.6.28/lib/list_sort.c ubifs-v2.6.28/lib/list_sort.c
+--- linux-2.6.28/lib/list_sort.c 1969-12-31 19:00:00.000000000 -0500
++++ ubifs-v2.6.28/lib/list_sort.c 2011-06-15 14:22:09.000000000 -0400
+@@ -0,0 +1,291 @@
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/list_sort.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++
++#define MAX_LIST_LENGTH_BITS 20
++
++/*
++ * Returns a list organized in an intermediate format suited
++ * to chaining of merge() calls: null-terminated, no reserved or
++ * sentinel head node, "prev" links not maintained.
++ */
++static struct list_head *merge(void *priv,
++ int (*cmp)(void *priv, struct list_head *a,
++ struct list_head *b),
++ struct list_head *a, struct list_head *b)
++{
++ struct list_head head, *tail = &head;
++
++ while (a && b) {
++ /* if equal, take 'a' -- important for sort stability */
++ if ((*cmp)(priv, a, b) <= 0) {
++ tail->next = a;
++ a = a->next;
++ } else {
++ tail->next = b;
++ b = b->next;
++ }
++ tail = tail->next;
++ }
++ tail->next = a?:b;
++ return head.next;
++}
++
++/*
++ * Combine final list merge with restoration of standard doubly-linked
++ * list structure. This approach duplicates code from merge(), but
++ * runs faster than the tidier alternatives of either a separate final
++ * prev-link restoration pass, or maintaining the prev links
++ * throughout.
++ */
++static void merge_and_restore_back_links(void *priv,
++ int (*cmp)(void *priv, struct list_head *a,
++ struct list_head *b),
++ struct list_head *head,
++ struct list_head *a, struct list_head *b)
++{
++ struct list_head *tail = head;
++
++ while (a && b) {
++ /* if equal, take 'a' -- important for sort stability */
++ if ((*cmp)(priv, a, b) <= 0) {
++ tail->next = a;
++ a->prev = tail;
++ a = a->next;
++ } else {
++ tail->next = b;
++ b->prev = tail;
++ b = b->next;
++ }
++ tail = tail->next;
++ }
++ tail->next = a ? : b;
++
++ do {
++ /*
++ * In worst cases this loop may run many iterations.
++ * Continue callbacks to the client even though no
++ * element comparison is needed, so the client's cmp()
++ * routine can invoke cond_resched() periodically.
++ */
++ (*cmp)(priv, tail->next, tail->next);
++
++ tail->next->prev = tail;
++ tail = tail->next;
++ } while (tail->next);
++
++ tail->next = head;
++ head->prev = tail;
++}
++
++/**
++ * list_sort - sort a list
++ * @priv: private data, opaque to list_sort(), passed to @cmp
++ * @head: the list to sort
++ * @cmp: the elements comparison function
++ *
++ * This function implements "merge sort", which has O(nlog(n))
++ * complexity.
++ *
++ * The comparison function @cmp must return a negative value if @a
++ * should sort before @b, and a positive value if @a should sort after
++ * @b. If @a and @b are equivalent, and their original relative
++ * ordering is to be preserved, @cmp must return 0.
++ */
++void list_sort(void *priv, struct list_head *head,
++ int (*cmp)(void *priv, struct list_head *a,
++ struct list_head *b))
++{
++ struct list_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists
++ -- last slot is a sentinel */
++ int lev; /* index into part[] */
++ int max_lev = 0;
++ struct list_head *list;
++
++ if (list_empty(head))
++ return;
++
++ memset(part, 0, sizeof(part));
++
++ head->prev->next = NULL;
++ list = head->next;
++
++ while (list) {
++ struct list_head *cur = list;
++ list = list->next;
++ cur->next = NULL;
++
++ for (lev = 0; part[lev]; lev++) {
++ cur = merge(priv, cmp, part[lev], cur);
++ part[lev] = NULL;
++ }
++ if (lev > max_lev) {
++ if (unlikely(lev >= ARRAY_SIZE(part)-1)) {
++ printk(KERN_DEBUG "list passed to"
++ " list_sort() too long for"
++ " efficiency\n");
++ lev--;
++ }
++ max_lev = lev;
++ }
++ part[lev] = cur;
++ }
++
++ for (lev = 0; lev < max_lev; lev++)
++ if (part[lev])
++ list = merge(priv, cmp, part[lev], list);
++
++ merge_and_restore_back_links(priv, cmp, head, part[max_lev], list);
++}
++EXPORT_SYMBOL(list_sort);
++
++#ifdef CONFIG_TEST_LIST_SORT
++
++#include <linux/random.h>
++
++/*
++ * The pattern of set bits in the list length determines which cases
++ * are hit in list_sort().
++ */
++#define TEST_LIST_LEN (512+128+2) /* not including head */
++
++#define TEST_POISON1 0xDEADBEEF
++#define TEST_POISON2 0xA324354C
++
++struct debug_el {
++ unsigned int poison1;
++ struct list_head list;
++ unsigned int poison2;
++ int value;
++ unsigned serial;
++};
++
++/* Array, containing pointers to all elements in the test list */
++static struct debug_el **elts __initdata;
++
++static int __init check(struct debug_el *ela, struct debug_el *elb)
++{
++ if (ela->serial >= TEST_LIST_LEN) {
++ printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
++ ela->serial);
++ return -EINVAL;
++ }
++ if (elb->serial >= TEST_LIST_LEN) {
++ printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
++ elb->serial);
++ return -EINVAL;
++ }
++ if (elts[ela->serial] != ela || elts[elb->serial] != elb) {
++ printk(KERN_ERR "list_sort_test: error: phantom element\n");
++ return -EINVAL;
++ }
++ if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) {
++ printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
++ ela->poison1, ela->poison2);
++ return -EINVAL;
++ }
++ if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) {
++ printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
++ elb->poison1, elb->poison2);
++ return -EINVAL;
++ }
++ return 0;
++}
++
++static int __init cmp(void *priv, struct list_head *a, struct list_head *b)
++{
++ struct debug_el *ela, *elb;
++
++ ela = container_of(a, struct debug_el, list);
++ elb = container_of(b, struct debug_el, list);
++
++ check(ela, elb);
++ return ela->value - elb->value;
++}
++
++static int __init list_sort_test(void)
++{
++ int i, count = 1, err = -EINVAL;
++ struct debug_el *el;
++ struct list_head *cur, *tmp;
++ LIST_HEAD(head);
++
++ printk(KERN_DEBUG "list_sort_test: start testing list_sort()\n");
++
++ elts = kmalloc(sizeof(void *) * TEST_LIST_LEN, GFP_KERNEL);
++ if (!elts) {
++ printk(KERN_ERR "list_sort_test: error: cannot allocate "
++ "memory\n");
++ goto exit;
++ }
++
++ for (i = 0; i < TEST_LIST_LEN; i++) {
++ el = kmalloc(sizeof(*el), GFP_KERNEL);
++ if (!el) {
++ printk(KERN_ERR "list_sort_test: error: cannot "
++ "allocate memory\n");
++ goto exit;
++ }
++ /* force some equivalencies */
++ el->value = random32() % (TEST_LIST_LEN/3);
++ el->serial = i;
++ el->poison1 = TEST_POISON1;
++ el->poison2 = TEST_POISON2;
++ elts[i] = el;
++ list_add_tail(&el->list, &head);
++ }
++
++ list_sort(NULL, &head, cmp);
++
++ for (cur = head.next; cur->next != &head; cur = cur->next) {
++ struct debug_el *el1;
++ int cmp_result;
++
++ if (cur->next->prev != cur) {
++ printk(KERN_ERR "list_sort_test: error: list is "
++ "corrupted\n");
++ goto exit;
++ }
++
++ cmp_result = cmp(NULL, cur, cur->next);
++ if (cmp_result > 0) {
++ printk(KERN_ERR "list_sort_test: error: list is not "
++ "sorted\n");
++ goto exit;
++ }
++
++ el = container_of(cur, struct debug_el, list);
++ el1 = container_of(cur->next, struct debug_el, list);
++ if (cmp_result == 0 && el->serial >= el1->serial) {
++ printk(KERN_ERR "list_sort_test: error: order of "
++ "equivalent elements not preserved\n");
++ goto exit;
++ }
++
++ if (check(el, el1)) {
++ printk(KERN_ERR "list_sort_test: error: element check "
++ "failed\n");
++ goto exit;
++ }
++ count++;
++ }
++
++ if (count != TEST_LIST_LEN) {
++ printk(KERN_ERR "list_sort_test: error: bad list length %d",
++ count);
++ goto exit;
++ }
++
++ err = 0;
++exit:
++ kfree(elts);
++ list_for_each_safe(cur, tmp, &head) {
++ list_del(cur);
++ kfree(container_of(cur, struct debug_el, list));
++ }
++ return err;
++}
++module_init(list_sort_test);
++#endif /* CONFIG_TEST_LIST_SORT */
+diff -uprN linux-2.6.28/lib/Makefile ubifs-v2.6.28/lib/Makefile
+--- linux-2.6.28/lib/Makefile 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/lib/Makefile 2011-06-15 14:22:09.000000000 -0400
+@@ -20,7 +20,7 @@ lib-y += kobject.o kref.o klist.o
+
+ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
+ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
+- string_helpers.o
++ string_helpers.o list_sort.o
+
+ ifeq ($(CONFIG_DEBUG_KOBJECT),y)
+ CFLAGS_kobject.o += -DDEBUG
+diff -uprN linux-2.6.28/MAINTAINERS ubifs-v2.6.28/MAINTAINERS
+--- linux-2.6.28/MAINTAINERS 2011-06-15 15:12:26.000000000 -0400
++++ ubifs-v2.6.28/MAINTAINERS 2011-06-15 15:16:03.000000000 -0400
+@@ -4242,9 +4242,9 @@ S: Maintained
+
+ UBI FILE SYSTEM (UBIFS)
+ P: Artem Bityutskiy
+-M: dedekind@infradead.org
++M: dedekind1@gmail.com
+ P: Adrian Hunter
+-M: ext-adrian.hunter@nokia.com
++M: adrian.hunter@nokia.com
+ L: linux-mtd@lists.infradead.org
+ T: git git://git.infradead.org/ubifs-2.6.git
+ W: http://www.linux-mtd.infradead.org/doc/ubifs.html
+@@ -4297,7 +4297,7 @@ S: Maintained
+
+ UNSORTED BLOCK IMAGES (UBI)
+ P: Artem Bityutskiy
+-M: dedekind@infradead.org
++M: dedekind1@gmail.com
+ W: http://www.linux-mtd.infradead.org/
+ L: linux-mtd@lists.infradead.org
+ T: git git://git.infradead.org/ubi-2.6.git
+diff -uprN linux-2.6.28/scripts/unifdef.c ubifs-v2.6.28/scripts/unifdef.c
+--- linux-2.6.28/scripts/unifdef.c 2008-12-24 18:26:37.000000000 -0500
++++ ubifs-v2.6.28/scripts/unifdef.c 2011-06-15 14:22:10.000000000 -0400
+@@ -206,7 +206,7 @@ static void done(void);
+ static void error(const char *);
+ static int findsym(const char *);
+ static void flushline(bool);
+-static Linetype getline(void);
++static Linetype get_line(void);
+ static Linetype ifeval(const char **);
+ static void ignoreoff(void);
+ static void ignoreon(void);
+@@ -512,7 +512,7 @@ process(void)
+
+ for (;;) {
+ linenum++;
+- lineval = getline();
++ lineval = get_line();
+ trans_table[ifstate[depth]][lineval]();
+ debug("process %s -> %s depth %d",
+ linetype_name[lineval],
+@@ -526,7 +526,7 @@ process(void)
+ * help from skipcomment().
+ */
+ static Linetype
+-getline(void)
++get_line(void)
+ {
+ const char *cp;
+ int cursym;