git.maemo.org Git - kernel-power/blob - kernel-power-2.6.28/debian/patches/unionfs-2.5.3.diff

   1 --- kernel-2.6.28.orig/Documentation/filesystems/00-INDEX
   2 +++ kernel-2.6.28/Documentation/filesystems/00-INDEX
   3 @@ -106,6 +106,8 @@
   4         - info and mount options for the UDF filesystem.
   5  ufs.txt
   6         - info on the ufs filesystem.
   7 +unionfs/
   8 +       - info on the unionfs filesystem
   9  vfat.txt
  10         - info on using the VFAT filesystem used in Windows NT and Windows 95
  11  vfs.txt
  12 --- /dev/null
  13 +++ kernel-2.6.28/Documentation/filesystems/unionfs/00-INDEX
  14 @@ -0,0 +1,10 @@
  15 +00-INDEX
  16 +       - this file.
  17 +concepts.txt
  18 +       - A brief introduction of concepts.
  19 +issues.txt
  20 +       - A summary of known issues with unionfs.
  21 +rename.txt
  22 +       - Information regarding rename operations.
  23 +usage.txt
  24 +       - Usage information and examples.
  25 --- /dev/null
  26 +++ kernel-2.6.28/Documentation/filesystems/unionfs/concepts.txt
  27 @@ -0,0 +1,287 @@
  28 +Unionfs 2.x CONCEPTS:
  29 +=====================
  30 +
  31 +This file describes the concepts needed by a namespace unification file
  32 +system.
  33 +
  34 +
  35 +Branch Priority:
  36 +================
  37 +
  38 +Each branch is assigned a unique priority - starting from 0 (highest
  39 +priority).  No two branches can have the same priority.
  40 +
  41 +
  42 +Branch Mode:
  43 +============
  44 +
  45 +Each branch is assigned a mode - read-write or read-only. This allows
  46 +directories on media mounted read-write to be used in a read-only manner.
  47 +
  48 +
  49 +Whiteouts:
  50 +==========
  51 +
  52 +A whiteout removes a file name from the namespace. Whiteouts are needed when
  53 +one attempts to remove a file on a read-only branch.
  54 +
  55 +Suppose we have a two-branch union, where branch 0 is read-write and branch
  56 +1 is read-only. And a file 'foo' on branch 1:
  57 +
  58 +./b0/
  59 +./b1/
  60 +./b1/foo
  61 +
  62 +The unified view would simply be:
  63 +
  64 +./union/
  65 +./union/foo
  66 +
  67 +Since 'foo' is stored on a read-only branch, it cannot be removed. A
  68 +whiteout is used to remove the name 'foo' from the unified namespace. Again,
  69 +since branch 1 is read-only, the whiteout cannot be created there. So, we
  70 +try on a higher priority (lower numerically) branch and create the whiteout
  71 +there.
  72 +
  73 +./b0/
  74 +./b0/.wh.foo
  75 +./b1/
  76 +./b1/foo
  77 +
  78 +Later, when Unionfs traverses branches (due to lookup or readdir), it
  79 +eliminate 'foo' from the namespace (as well as the whiteout itself.)
  80 +
  81 +
  82 +Opaque Directories:
  83 +===================
  84 +
  85 +Assume we have a unionfs mount comprising of two branches.  Branch 0 is
  86 +empty; branch 1 has the directory /a and file /a/f.  Let's say we mount a
  87 +union of branch 0 as read-write and branch 1 as read-only.  Now, let's say
  88 +we try to perform the following operation in the union:
  89 +
  90 +       rm -fr a
  91 +
  92 +Because branch 1 is not writable, we cannot physically remove the file /a/f
  93 +or the directory /a.  So instead, we will create a whiteout in branch 0
  94 +named /.wh.a, masking out the name "a" from branch 1.  Next, let's say we
  95 +try to create a directory named "a" as follows:
  96 +
  97 +       mkdir a
  98 +
  99 +Because we have a whiteout for "a" already, Unionfs behaves as if "a"
 100 +doesn't exist, and thus will delete the whiteout and replace it with an
 101 +actual directory named "a".
 102 +
 103 +The problem now is that if you try to "ls" in the union, Unionfs will
 104 +perform is normal directory name unification, for *all* directories named
 105 +"a" in all branches.  This will cause the file /a/f from branch 1 to
 106 +re-appear in the union's namespace, which violates Unix semantics.
 107 +
 108 +To avoid this problem, we have a different form of whiteouts for
 109 +directories, called "opaque directories" (same as BSD Union Mount does).
 110 +Whenever we replace a whiteout with a directory, that directory is marked as
 111 +opaque.  In Unionfs 2.x, it means that we create a file named
 112 +/a/.wh.__dir_opaque in branch 0, after having created directory /a there.
 113 +When unionfs notices that a directory is opaque, it stops all namespace
 114 +operations (including merging readdir contents) at that opaque directory.
 115 +This prevents re-exposing names from masked out directories.
 116 +
 117 +
 118 +Duplicate Elimination:
 119 +======================
 120 +
 121 +It is possible for files on different branches to have the same name.
 122 +Unionfs then has to select which instance of the file to show to the user.
 123 +Given the fact that each branch has a priority associated with it, the
 124 +simplest solution is to take the instance from the highest priority
 125 +(numerically lowest value) and "hide" the others.
 126 +
 127 +
 128 +Unlinking:
 129 +=========
 130 +
 131 +Unlink operation on non-directory instances is optimized to remove the
 132 +maximum possible objects in case multiple underlying branches have the same
 133 +file name.  The unlink operation will first try to delete file instances
 134 +from highest priority branch and then move further to delete from remaining
 135 +branches in order of their decreasing priority.  Consider a case (F..D..F),
 136 +where F is a file and D is a directory of the same name; here, some
 137 +intermediate branch could have an empty directory instance with the same
 138 +name, so this operation also tries to delete this directory instance and
 139 +proceed further to delete from next possible lower priority branch.  The
 140 +unionfs unlink operation will smoothly delete the files with same name from
 141 +all possible underlying branches.  In case if some error occurs, it creates
 142 +whiteout in highest priority branch that will hide file instance in rest of
 143 +the branches.  An error could occur either if an unlink operations in any of
 144 +the underlying branch failed or if a branch has no write permission.
 145 +
 146 +This unlinking policy is known as "delete all" and it has the benefit of
 147 +overall reducing the number of inodes used by duplicate files, and further
 148 +reducing the total number of inodes consumed by whiteouts.  The cost is of
 149 +extra processing, but testing shows this extra processing is well worth the
 150 +savings.
 151 +
 152 +
 153 +Copyup:
 154 +=======
 155 +
 156 +When a change is made to the contents of a file's data or meta-data, they
 157 +have to be stored somewhere.  The best way is to create a copy of the
 158 +original file on a branch that is writable, and then redirect the write
 159 +though to this copy.  The copy must be made on a higher priority branch so
 160 +that lookup and readdir return this newer "version" of the file rather than
 161 +the original (see duplicate elimination).
 162 +
 163 +An entire unionfs mount can be read-only or read-write.  If it's read-only,
 164 +then none of the branches will be written to, even if some of the branches
 165 +are physically writeable.  If the unionfs mount is read-write, then the
 166 +leftmost (highest priority) branch must be writeable (for copyup to take
 167 +place); the remaining branches can be any mix of read-write and read-only.
 168 +
 169 +In a writeable mount, unionfs will create new files/dir in the leftmost
 170 +branch.  If one tries to modify a file in a read-only branch/media, unionfs
 171 +will copyup the file to the leftmost branch and modify it there.  If you try
 172 +to modify a file from a writeable branch which is not the leftmost branch,
 173 +then unionfs will modify it in that branch; this is useful if you, say,
 174 +unify differnet packages (e.g., apache, sendmail, ftpd, etc.) and you want
 175 +changes to specific package files to remain logically in the directory where
 176 +they came from.
 177 +
 178 +Cache Coherency:
 179 +================
 180 +
 181 +Unionfs users often want to be able to modify files and directories directly
 182 +on the lower branches, and have those changes be visible at the Unionfs
 183 +level.  This means that data (e.g., pages) and meta-data (dentries, inodes,
 184 +open files, etc.) have to be synchronized between the upper and lower
 185 +layers.  In other words, the newest changes from a layer below have to be
 186 +propagated to the Unionfs layer above.  If the two layers are not in sync, a
 187 +cache incoherency ensues, which could lead to application failures and even
 188 +oopses.  The Linux kernel, however, has a rather limited set of mechanisms
 189 +to ensure this inter-layer cache coherency---so Unionfs has to do most of
 190 +the hard work on its own.
 191 +
 192 +Maintaining Invariants:
 193 +
 194 +The way Unionfs ensures cache coherency is as follows.  At each entry point
 195 +to a Unionfs file system method, we call a utility function to validate the
 196 +primary objects of this method.  Generally, we call unionfs_file_revalidate
 197 +on open files, and __unionfs_d_revalidate_chain on dentries (which also
 198 +validates inodes).  These utility functions check to see whether the upper
 199 +Unionfs object is in sync with any of the lower objects that it represents.
 200 +The checks we perform include whether the Unionfs superblock has a newer
 201 +generation number, or if any of the lower objects mtime's or ctime's are
 202 +newer.  (Note: generation numbers change when branch-management commands are
 203 +issued, so in a way, maintaining cache coherency is also very important for
 204 +branch-management.)  If indeed we determine that any Unionfs object is no
 205 +longer in sync with its lower counterparts, then we rebuild that object
 206 +similarly to how we do so for branch-management.
 207 +
 208 +While rebuilding Unionfs's objects, we also purge any page mappings and
 209 +truncate inode pages (see fs/unionfs/dentry.c:purge_inode_data).  This is to
 210 +ensure that Unionfs will re-get the newer data from the lower branches.  We
 211 +perform this purging only if the Unionfs operation in question is a reading
 212 +operation; if Unionfs is performing a data writing operation (e.g., ->write,
 213 +->commit_write, etc.) then we do NOT flush the lower mappings/pages: this is
 214 +because (1) a self-deadlock could occur and (2) the upper Unionfs pages are
 215 +considered more authoritative anyway, as they are newer and will overwrite
 216 +any lower pages.
 217 +
 218 +Unionfs maintains the following important invariant regarding mtime's,
 219 +ctime's, and atime's: the upper inode object's times are the max() of all of
 220 +the lower ones.  For non-directory objects, there's only one object below,
 221 +so the mapping is simple; for directory objects, there could me multiple
 222 +lower objects and we have to sync up with the newest one of all the lower
 223 +ones.  This invariant is important to maintain, especially for directories
 224 +(besides, we need this to be POSIX compliant).  A union could comprise
 225 +multiple writable branches, each of which could change.  If we don't reflect
 226 +the newest possible mtime/ctime, some applications could fail.  For example,
 227 +NFSv2/v3 exports check for newer directory mtimes on the server to determine
 228 +if the client-side attribute cache should be purged.
 229 +
 230 +To maintain these important invariants, of course, Unionfs carefully
 231 +synchronizes upper and lower times in various places.  For example, if we
 232 +copy-up a file to a top-level branch, the parent directory where the file
 233 +was copied up to will now have a new mtime: so after a successful copy-up,
 234 +we sync up with the new top-level branch's parent directory mtime.
 235 +
 236 +Implementation:
 237 +
 238 +This cache-coherency implementation is efficient because it defers any
 239 +synchronizing between the upper and lower layers until absolutely needed.
 240 +Consider the example a common situation where users perform a lot of lower
 241 +changes, such as untarring a whole package.  While these take place,
 242 +typically the user doesn't access the files via Unionfs; only after the
 243 +lower changes are done, does the user try to access the lower files.  With
 244 +our cache-coherency implementation, the entirety of the changes to the lower
 245 +branches will not result in a single CPU cycle spent at the Unionfs level
 246 +until the user invokes a system call that goes through Unionfs.
 247 +
 248 +We have considered two alternate cache-coherency designs.  (1) Using the
 249 +dentry/inode notify functionality to register interest in finding out about
 250 +any lower changes.  This is a somewhat limited and also a heavy-handed
 251 +approach which could result in many notifications to the Unionfs layer upon
 252 +each small change at the lower layer (imagine a file being modified multiple
 253 +times in rapid succession).  (2) Rewriting the VFS to support explicit
 254 +callbacks from lower objects to upper objects.  We began exploring such an
 255 +implementation, but found it to be very complicated--it would have resulted
 256 +in massive VFS/MM changes which are unlikely to be accepted by the LKML
 257 +community.  We therefore believe that our current cache-coherency design and
 258 +implementation represent the best approach at this time.
 259 +
 260 +Limitations:
 261 +
 262 +Our implementation works in that as long as a user process will have caused
 263 +Unionfs to be called, directly or indirectly, even to just do
 264 +->d_revalidate; then we will have purged the current Unionfs data and the
 265 +process will see the new data.  For example, a process that continually
 266 +re-reads the same file's data will see the NEW data as soon as the lower
 267 +file had changed, upon the next read(2) syscall (even if the file is still
 268 +open!)  However, this doesn't work when the process re-reads the open file's
 269 +data via mmap(2) (unless the user unmaps/closes the file and remaps/reopens
 270 +it).  Once we respond to ->readpage(s), then the kernel maps the page into
 271 +the process's address space and there doesn't appear to be a way to force
 272 +the kernel to invalidate those pages/mappings, and force the process to
 273 +re-issue ->readpage.  If there's a way to invalidate active mappings and
 274 +force a ->readpage, let us know please (invalidate_inode_pages2 doesn't do
 275 +the trick).
 276 +
 277 +Our current Unionfs code has to perform many file-revalidation calls.  It
 278 +would be really nice if the VFS would export an optional file system hook
 279 +->file_revalidate (similarly to dentry->d_revalidate) that will be called
 280 +before each VFS op that has a "struct file" in it.
 281 +
 282 +Certain file systems have micro-second granularity (or better) for inode
 283 +times, and asynchronous actions could cause those times to change with some
 284 +small delay.  In such cases, Unionfs may see a changed inode time that only
 285 +differs by a tiny fraction of a second: such a change may be a false
 286 +positive indication that the lower object has changed, whereas if unionfs
 287 +waits a little longer, that false indication will not be seen.  (These false
 288 +positives are harmless, because they would at most cause unionfs to
 289 +re-validate an object that may need no revalidation, and print a debugging
 290 +message that clutters the console/logs.)  Therefore, to minimize the chances
 291 +of these situations, we delay the detection of changed times by a small
 292 +factor of a few seconds, called UNIONFS_MIN_CC_TIME (which defaults to 3
 293 +seconds, as does NFS).  This means that we will detect the change, only a
 294 +couple of seconds later, if indeed the time change persists in the lower
 295 +file object.  This delayed detection has an added performance benefit: we
 296 +reduce the number of times that unionfs has to revalidate objects, in case
 297 +there's a lot of concurrent activity on both the upper and lower objects,
 298 +for the same file(s).  Lastly, this delayed time attribute detection is
 299 +similar to how NFS clients operate (e.g., acregmin).
 300 +
 301 +Finally, there is no way currently in Linux to prevent lower directories
 302 +from being moved around (i.e., topology changes); there's no way to prevent
 303 +modifications to directory sub-trees of whole file systems which are mounted
 304 +read-write.  It is therefore possible for in-flight operations in unionfs to
 305 +take place, while a lower directory is being moved around.  Therefore, if
 306 +you try to, say, create a new file in a directory through unionfs, while the
 307 +directory is being moved around directly, then the new file may get created
 308 +in the new location where that directory was moved to.  This is a somewhat
 309 +similar behaviour in NFS: an NFS client could be creating a new file while
 310 +th NFS server is moving th directory around; the file will get successfully
 311 +created in the new location.  (The one exception in unionfs is that if the
 312 +branch is marked read-only by unionfs, then a copyup will take place.)
 313 +
 314 +For more information, see <http://unionfs.filesystems.org/>.
 315 --- /dev/null
 316 +++ kernel-2.6.28/Documentation/filesystems/unionfs/issues.txt
 317 @@ -0,0 +1,28 @@
 318 +KNOWN Unionfs 2.x ISSUES:
 319 +=========================
 320 +
 321 +1. Unionfs should not use lookup_one_len() on the underlying f/s as it
 322 +   confuses NFSv4.  Currently, unionfs_lookup() passes lookup intents to the
 323 +   lower file-system, this eliminates part of the problem.  The remaining
 324 +   calls to lookup_one_len may need to be changed to pass an intent.  We are
 325 +   currently introducing VFS changes to fs/namei.c's do_path_lookup() to
 326 +   allow proper file lookup and opening in stackable file systems.
 327 +
 328 +2. Lockdep (a debugging feature) isn't aware of stacking, and so it
 329 +   incorrectly complains about locking problems.  The problem boils down to
 330 +   this: Lockdep considers all objects of a certain type to be in the same
 331 +   class, for example, all inodes.  Lockdep doesn't like to see a lock held
 332 +   on two inodes within the same task, and warns that it could lead to a
 333 +   deadlock.  However, stackable file systems do precisely that: they lock
 334 +   an upper object, and then a lower object, in a strict order to avoid
 335 +   locking problems; in addition, Unionfs, as a fan-out file system, may
 336 +   have to lock several lower inodes.  We are currently looking into Lockdep
 337 +   to see how to make it aware of stackable file systems.  For now, we
 338 +   temporarily disable lockdep when calling vfs methods on lower objects,
 339 +   but only for those places where lockdep complained.  While this solution
 340 +   may seem unclean, it is not without precedent: other places in the kernel
 341 +   also do similar temporary disabling, of course after carefully having
 342 +   checked that it is the right thing to do.  Anyway, you get any warnings
 343 +   from Lockdep, please report them to the Unionfs maintainers.
 344 +
 345 +For more information, see <http://unionfs.filesystems.org/>.
 346 --- /dev/null
 347 +++ kernel-2.6.28/Documentation/filesystems/unionfs/rename.txt
 348 @@ -0,0 +1,31 @@
 349 +Rename is a complex beast. The following table shows which rename(2) operations
 350 +should succeed and which should fail.
 351 +
 352 +o: success
 353 +E: error (either unionfs or vfs)
 354 +X: EXDEV
 355 +
 356 +none = file does not exist
 357 +file = file is a file
 358 +dir  = file is a empty directory
 359 +child= file is a non-empty directory
 360 +wh   = file is a directory containing only whiteouts; this makes it logically
 361 +               empty
 362 +
 363 +                      none    file    dir     child   wh
 364 +file                  o       o       E       E       E
 365 +dir                   o       E       o       E       o
 366 +child                 X       E       X       E       X
 367 +wh                    o       E       o       E       o
 368 +
 369 +
 370 +Renaming directories:
 371 +=====================
 372 +
 373 +Whenever a empty (either physically or logically) directory is being renamed,
 374 +the following sequence of events should take place:
 375 +
 376 +1) Remove whiteouts from both source and destination directory
 377 +2) Rename source to destination
 378 +3) Make destination opaque to prevent anything under it from showing up
 379 +
 380 --- /dev/null
 381 +++ kernel-2.6.28/Documentation/filesystems/unionfs/usage.txt
 382 @@ -0,0 +1,134 @@
 383 +Unionfs is a stackable unification file system, which can appear to merge
 384 +the contents of several directories (branches), while keeping their physical
 385 +content separate.  Unionfs is useful for unified source tree management,
 386 +merged contents of split CD-ROM, merged separate software package
 387 +directories, data grids, and more.  Unionfs allows any mix of read-only and
 388 +read-write branches, as well as insertion and deletion of branches anywhere
 389 +in the fan-out.  To maintain Unix semantics, Unionfs handles elimination of
 390 +duplicates, partial-error conditions, and more.
 391 +
 392 +GENERAL SYNTAX
 393 +==============
 394 +
 395 +# mount -t unionfs -o <OPTIONS>,<BRANCH-OPTIONS> none MOUNTPOINT
 396 +
 397 +OPTIONS can be any legal combination of:
 398 +
 399 +- ro           # mount file system read-only
 400 +- rw           # mount file system read-write
 401 +- remount      # remount the file system (see Branch Management below)
 402 +- incgen       # increment generation no. (see Cache Consistency below)
 403 +
 404 +BRANCH-OPTIONS can be either (1) a list of branches given to the "dirs="
 405 +option, or (2) a list of individual branch manipulation commands, combined
 406 +with the "remount" option, and is further described in the "Branch
 407 +Management" section below.
 408 +
 409 +The syntax for the "dirs=" mount option is:
 410 +
 411 +       dirs=branch[=ro|=rw][:...]
 412 +
 413 +The "dirs=" option takes a colon-delimited list of directories to compose
 414 +the union, with an optional branch mode for each of those directories.
 415 +Directories that come earlier (specified first, on the left) in the list
 416 +have a higher precedence than those which come later.  Additionally,
 417 +read-only or read-write permissions of the branch can be specified by
 418 +appending =ro or =rw (default) to each directory.  See the Copyup section in
 419 +concepts.txt, for a description of Unionfs's behavior when mixing read-only
 420 +and read-write branches and mounts.
 421 +
 422 +Syntax:
 423 +
 424 +       dirs=/branch1[=ro|=rw]:/branch2[=ro|=rw]:...:/branchN[=ro|=rw]
 425 +
 426 +Example:
 427 +
 428 +       dirs=/writable_branch=rw:/read-only_branch=ro
 429 +
 430 +
 431 +BRANCH MANAGEMENT
 432 +=================
 433 +
 434 +Once you mount your union for the first time, using the "dirs=" option, you
 435 +can then change the union's overall mode or reconfigure the branches, using
 436 +the remount option, as follows.
 437 +
 438 +To downgrade a union from read-write to read-only:
 439 +
 440 +# mount -t unionfs -o remount,ro none MOUNTPOINT
 441 +
 442 +To upgrade a union from read-only to read-write:
 443 +
 444 +# mount -t unionfs -o remount,rw none MOUNTPOINT
 445 +
 446 +To delete a branch /foo, regardless where it is in the current union:
 447 +
 448 +# mount -t unionfs -o remount,del=/foo none MOUNTPOINT
 449 +
 450 +To insert (add) a branch /foo before /bar:
 451 +
 452 +# mount -t unionfs -o remount,add=/bar:/foo none MOUNTPOINT
 453 +
 454 +To insert (add) a branch /foo (with the "rw" mode flag) before /bar:
 455 +
 456 +# mount -t unionfs -o remount,add=/bar:/foo=rw none MOUNTPOINT
 457 +
 458 +To insert (add) a branch /foo (in "rw" mode) at the very beginning (i.e., a
 459 +new highest-priority branch), you can use the above syntax, or use a short
 460 +hand version as follows:
 461 +
 462 +# mount -t unionfs -o remount,add=/foo none MOUNTPOINT
 463 +
 464 +To append a branch to the very end (new lowest-priority branch):
 465 +
 466 +# mount -t unionfs -o remount,add=:/foo none MOUNTPOINT
 467 +
 468 +To append a branch to the very end (new lowest-priority branch), in
 469 +read-only mode:
 470 +
 471 +# mount -t unionfs -o remount,add=:/foo=ro none MOUNTPOINT
 472 +
 473 +Finally, to change the mode of one existing branch, say /foo, from read-only
 474 +to read-write, and change /bar from read-write to read-only:
 475 +
 476 +# mount -t unionfs -o remount,mode=/foo=rw,mode=/bar=ro none MOUNTPOINT
 477 +
 478 +Note: in Unionfs 2.x, you cannot set the leftmost branch to readonly because
 479 +then Unionfs won't have any writable place for copyups to take place.
 480 +Moreover, the VFS can get confused when it tries to modify something in a
 481 +file system mounted read-write, but isn't permitted to write to it.
 482 +Instead, you should set the whole union as readonly, as described above.
 483 +If, however, you must set the leftmost branch as readonly, perhaps so you
 484 +can get a snapshot of it at a point in time, then you should insert a new
 485 +writable top-level branch, and mark the one you want as readonly.  This can
 486 +be accomplished as follows, assuming that /foo is your current leftmost
 487 +branch:
 488 +
 489 +# mount -t tmpfs -o size=NNN /new
 490 +# mount -t unionfs -o remount,add=/new,mode=/foo=ro none MOUNTPOINT
 491 +<do what you want safely in /foo>
 492 +# mount -t unionfs -o remount,del=/new,mode=/foo=rw none MOUNTPOINT
 493 +<check if there's anything in /new you want to preserve>
 494 +# umount /new
 495 +
 496 +CACHE CONSISTENCY
 497 +=================
 498 +
 499 +If you modify any file on any of the lower branches directly, while there is
 500 +a Unionfs 2.x mounted above any of those branches, you should tell Unionfs
 501 +to purge its caches and re-get the objects.  To do that, you have to
 502 +increment the generation number of the superblock using the following
 503 +command:
 504 +
 505 +# mount -t unionfs -o remount,incgen none MOUNTPOINT
 506 +
 507 +Note that the older way of incrementing the generation number using an
 508 +ioctl, is no longer supported in Unionfs 2.0 and newer.  Ioctls in general
 509 +are not encouraged.  Plus, an ioctl is per-file concept, whereas the
 510 +generation number is a per-file-system concept.  Worse, such an ioctl
 511 +requires an open file, which then has to be invalidated by the very nature
 512 +of the generation number increase (read: the old generation increase ioctl
 513 +was pretty racy).
 514 +
 515 +
 516 +For more information, see <http://unionfs.filesystems.org/>.
 517 --- kernel-2.6.28.orig/MAINTAINERS
 518 +++ kernel-2.6.28/MAINTAINERS
 519 @@ -4295,6 +4295,14 @@
 520  W:     http://www.kernel.dk
 521  S:     Maintained
 522
 523 +UNIONFS
 524 +P:     Erez Zadok
 525 +M:     ezk@cs.sunysb.edu
 526 +L:     unionfs@filesystems.org
 527 +W:     http://unionfs.filesystems.org
 528 +T:     git git.kernel.org/pub/scm/linux/kernel/git/ezk/unionfs.git
 529 +S:     Maintained
 530 +
 531  UNSORTED BLOCK IMAGES (UBI)
 532  P:     Artem Bityutskiy
 533  M:     dedekind@infradead.org
 534 --- kernel-2.6.28.orig/fs/Kconfig
 535 +++ kernel-2.6.28/fs/Kconfig
 536 @@ -752,6 +752,47 @@
 537
 538  endmenu
 539
 540 +menu "Layered filesystems"
 541 +
 542 +config ECRYPT_FS
 543 +       tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
 544 +       depends on EXPERIMENTAL && KEYS && CRYPTO && NET
 545 +       help
 546 +         Encrypted filesystem that operates on the VFS layer.  See
 547 +         <file:Documentation/filesystems/ecryptfs.txt> to learn more about
 548 +         eCryptfs.  Userspace components are required and can be
 549 +         obtained from <http://ecryptfs.sf.net>.
 550 +
 551 +         To compile this file system support as a module, choose M here: the
 552 +         module will be called ecryptfs.
 553 +
 554 +config UNION_FS
 555 +       tristate "Union file system (EXPERIMENTAL)"
 556 +       depends on EXPERIMENTAL
 557 +       help
 558 +         Unionfs is a stackable unification file system, which appears to
 559 +         merge the contents of several directories (branches), while keeping
 560 +         their physical content separate.
 561 +
 562 +         See <http://unionfs.filesystems.org> for details
 563 +
 564 +config UNION_FS_XATTR
 565 +       bool "Unionfs extended attributes"
 566 +       depends on UNION_FS
 567 +       help
 568 +         Extended attributes are name:value pairs associated with inodes by
 569 +         the kernel or by users (see the attr(5) manual page).
 570 +
 571 +         If unsure, say N.
 572 +
 573 +config UNION_FS_DEBUG
 574 +       bool "Debug Unionfs"
 575 +       depends on UNION_FS
 576 +       help
 577 +         If you say Y here, you can turn on debugging output from Unionfs.
 578 +
 579 +endmenu
 580 +
 581  menu "Miscellaneous filesystems"
 582
 583  config ADFS_FS
 584 @@ -804,18 +845,6 @@
 585           To compile this file system support as a module, choose M here: the
 586           module will be called affs.  If unsure, say N.
 587
 588 -config ECRYPT_FS
 589 -       tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
 590 -       depends on EXPERIMENTAL && KEYS && CRYPTO && NET
 591 -       help
 592 -         Encrypted filesystem that operates on the VFS layer.  See
 593 -         <file:Documentation/filesystems/ecryptfs.txt> to learn more about
 594 -         eCryptfs.  Userspace components are required and can be
 595 -         obtained from <http://ecryptfs.sf.net>.
 596 -
 597 -         To compile this file system support as a module, choose M here: the
 598 -         module will be called ecryptfs.
 599 -
 600  config HFS_FS
 601         tristate "Apple Macintosh file system support (EXPERIMENTAL)"
 602         depends on BLOCK && EXPERIMENTAL
 603 --- kernel-2.6.28.orig/fs/Makefile
 604 +++ kernel-2.6.28/fs/Makefile
 605 @@ -86,6 +86,7 @@
 606  obj-$(CONFIG_HFSPLUS_FS)       += hfsplus/ # Before hfs to find wrapped HFS+
 607  obj-$(CONFIG_HFS_FS)           += hfs/
 608  obj-$(CONFIG_ECRYPT_FS)                += ecryptfs/
 609 +obj-$(CONFIG_UNION_FS)         += unionfs/
 610  obj-$(CONFIG_VXFS_FS)          += freevxfs/
 611  obj-$(CONFIG_NFS_FS)           += nfs/
 612  obj-$(CONFIG_EXPORTFS)         += exportfs/
 613 --- kernel-2.6.28.orig/fs/ecryptfs/dentry.c
 614 +++ kernel-2.6.28/fs/ecryptfs/dentry.c
 615 @@ -62,7 +62,7 @@
 616                 struct inode *lower_inode =
 617                         ecryptfs_inode_to_lower(dentry->d_inode);
 618
 619 -               fsstack_copy_attr_all(dentry->d_inode, lower_inode, NULL);
 620 +               fsstack_copy_attr_all(dentry->d_inode, lower_inode);
 621         }
 622  out:
 623         return rc;
 624 --- kernel-2.6.28.orig/fs/ecryptfs/inode.c
 625 +++ kernel-2.6.28/fs/ecryptfs/inode.c
 626 @@ -589,9 +589,9 @@
 627                         lower_new_dir_dentry->d_inode, lower_new_dentry);
 628         if (rc)
 629                 goto out_lock;
 630 -       fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL);
 631 +       fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
 632         if (new_dir != old_dir)
 633 -               fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode, NULL);
 634 +               fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
 635  out_lock:
 636         unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
 637         dput(lower_new_dentry->d_parent);
 638 @@ -913,7 +913,7 @@
 639         rc = notify_change(lower_dentry, ia);
 640         mutex_unlock(&lower_dentry->d_inode->i_mutex);
 641  out:
 642 -       fsstack_copy_attr_all(inode, lower_inode, NULL);
 643 +       fsstack_copy_attr_all(inode, lower_inode);
 644         return rc;
 645  }
 646
 647 --- kernel-2.6.28.orig/fs/ecryptfs/main.c
 648 +++ kernel-2.6.28/fs/ecryptfs/main.c
 649 @@ -193,7 +193,7 @@
 650                 d_add(dentry, inode);
 651         else
 652                 d_instantiate(dentry, inode);
 653 -       fsstack_copy_attr_all(inode, lower_inode, NULL);
 654 +       fsstack_copy_attr_all(inode, lower_inode);
 655         /* This size will be overwritten for real files w/ headers and
 656          * other metadata */
 657         fsstack_copy_inode_size(inode, lower_inode);
 658 --- kernel-2.6.28.orig/fs/namei.c
 659 +++ kernel-2.6.28/fs/namei.c
 660 @@ -379,6 +379,7 @@
 661         else
 662                 fput(nd->intent.open.file);
 663  }
 664 +EXPORT_SYMBOL_GPL(release_open_intent);
 665
 666  static inline struct dentry *
 667  do_revalidate(struct dentry *dentry, struct nameidata *nd)
 668 --- kernel-2.6.28.orig/fs/splice.c
 669 +++ kernel-2.6.28/fs/splice.c
 670 @@ -887,8 +887,8 @@
 671  /*
 672   * Attempt to initiate a splice from pipe to file.
 673   */
 674 -static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
 675 -                          loff_t *ppos, size_t len, unsigned int flags)
 676 +long vfs_splice_from(struct pipe_inode_info *pipe, struct file *out,
 677 +                    loff_t *ppos, size_t len, unsigned int flags)
 678  {
 679         int ret;
 680
 681 @@ -907,13 +907,14 @@
 682
 683         return out->f_op->splice_write(pipe, out, ppos, len, flags);
 684  }
 685 +EXPORT_SYMBOL_GPL(vfs_splice_from);
 686
 687  /*
 688   * Attempt to initiate a splice from a file to a pipe.
 689   */
 690 -static long do_splice_to(struct file *in, loff_t *ppos,
 691 -                        struct pipe_inode_info *pipe, size_t len,
 692 -                        unsigned int flags)
 693 +long vfs_splice_to(struct file *in, loff_t *ppos,
 694 +                  struct pipe_inode_info *pipe, size_t len,
 695 +                  unsigned int flags)
 696  {
 697         int ret;
 698
 699 @@ -929,6 +930,7 @@
 700
 701         return in->f_op->splice_read(in, ppos, pipe, len, flags);
 702  }
 703 +EXPORT_SYMBOL_GPL(vfs_splice_to);
 704
 705  /**
 706   * splice_direct_to_actor - splices data directly between two non-pipes
 707 @@ -998,7 +1000,7 @@
 708                 size_t read_len;
 709                 loff_t pos = sd->pos, prev_pos = pos;
 710
 711 -               ret = do_splice_to(in, &pos, pipe, len, flags);
 712 +               ret = vfs_splice_to(in, &pos, pipe, len, flags);
 713                 if (unlikely(ret <= 0))
 714                         goto out_release;
 715
 716 @@ -1057,7 +1059,7 @@
 717  {
 718         struct file *file = sd->u.file;
 719
 720 -       return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags);
 721 +       return vfs_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags);
 722  }
 723
 724  /**
 725 @@ -1131,7 +1133,7 @@
 726                 } else
 727                         off = &out->f_pos;
 728
 729 -               ret = do_splice_from(pipe, out, off, len, flags);
 730 +               ret = vfs_splice_from(pipe, out, off, len, flags);
 731
 732                 if (off_out && copy_to_user(off_out, off, sizeof(loff_t)))
 733                         ret = -EFAULT;
 734 @@ -1152,7 +1154,7 @@
 735                 } else
 736                         off = &in->f_pos;
 737
 738 -               ret = do_splice_to(in, off, pipe, len, flags);
 739 +               ret = vfs_splice_to(in, off, pipe, len, flags);
 740
 741                 if (off_in && copy_to_user(off_in, off, sizeof(loff_t)))
 742                         ret = -EFAULT;
 743 --- kernel-2.6.28.orig/fs/stack.c
 744 +++ kernel-2.6.28/fs/stack.c
 745 @@ -1,24 +1,82 @@
 746 +/*
 747 + * Copyright (c) 2006-2009 Erez Zadok
 748 + * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
 749 + * Copyright (c) 2006-2009 Stony Brook University
 750 + * Copyright (c) 2006-2009 The Research Foundation of SUNY
 751 + *
 752 + * This program is free software; you can redistribute it and/or modify
 753 + * it under the terms of the GNU General Public License version 2 as
 754 + * published by the Free Software Foundation.
 755 + */
 756 +
 757  #include <linux/module.h>
 758  #include <linux/fs.h>
 759  #include <linux/fs_stack.h>
 760
 761 -/* does _NOT_ require i_mutex to be held.
 762 +/*
 763 + * does _NOT_ require i_mutex to be held.
 764   *
 765   * This function cannot be inlined since i_size_{read,write} is rather
 766   * heavy-weight on 32-bit systems
 767   */
 768 -void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
 769 +void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
 770  {
 771 -       i_size_write(dst, i_size_read((struct inode *)src));
 772 -       dst->i_blocks = src->i_blocks;
 773 +       loff_t i_size;
 774 +       blkcnt_t i_blocks;
 775 +
 776 +       /*
 777 +        * i_size_read() includes its own seqlocking and protection from
 778 +        * preemption (see include/linux/fs.h): we need nothing extra for
 779 +        * that here, and prefer to avoid nesting locks than attempt to
 780 +        * keep i_size and i_blocks in synch together.
 781 +        */
 782 +       i_size = i_size_read(src);
 783 +
 784 +       /*
 785 +        * But if CONFIG_LSF (on 32-bit), we ought to make an effort to keep
 786 +        * the two halves of i_blocks in synch despite SMP or PREEMPT - though
 787 +        * stat's generic_fillattr() doesn't bother, and we won't be applying
 788 +        * quotas (where i_blocks does become important) at the upper level.
 789 +        *
 790 +        * We don't actually know what locking is used at the lower level; but
 791 +        * if it's a filesystem that supports quotas, it will be using i_lock
 792 +        * as in inode_add_bytes().  tmpfs uses other locking, and its 32-bit
 793 +        * is (just) able to exceed 2TB i_size with the aid of holes; but its
 794 +        * i_blocks cannot carry into the upper long without almost 2TB swap -
 795 +        * let's ignore that case.
 796 +        */
 797 +       if (sizeof(i_blocks) > sizeof(long))
 798 +               spin_lock(&src->i_lock);
 799 +       i_blocks = src->i_blocks;
 800 +       if (sizeof(i_blocks) > sizeof(long))
 801 +               spin_unlock(&src->i_lock);
 802 +
 803 +       /*
 804 +        * If CONFIG_SMP on 32-bit, it's vital for fsstack_copy_inode_size()
 805 +        * to hold some lock around i_size_write(), otherwise i_size_read()
 806 +        * may spin forever (see include/linux/fs.h).  We don't necessarily
 807 +        * hold i_mutex when this is called, so take i_lock for that case.
 808 +        *
 809 +        * And if CONFIG_LSF (on 32-bit), continue our effort to keep the
 810 +        * two halves of i_blocks in synch despite SMP or PREEMPT: use i_lock
 811 +        * for that case too, and do both at once by combining the tests.
 812 +        *
 813 +        * There is none of this locking overhead in the 64-bit case.
 814 +        */
 815 +       if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
 816 +               spin_lock(&dst->i_lock);
 817 +       i_size_write(dst, i_size);
 818 +       dst->i_blocks = i_blocks;
 819 +       if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
 820 +               spin_unlock(&dst->i_lock);
 821  }
 822  EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
 823
 824 -/* copy all attributes; get_nlinks is optional way to override the i_nlink
 825 +/*
 826 + * copy all attributes; get_nlinks is optional way to override the i_nlink
 827   * copying
 828   */
 829 -void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
 830 -                               int (*get_nlinks)(struct inode *))
 831 +void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
 832  {
 833         dest->i_mode = src->i_mode;
 834         dest->i_uid = src->i_uid;
 835 @@ -29,14 +87,6 @@
 836         dest->i_ctime = src->i_ctime;
 837         dest->i_blkbits = src->i_blkbits;
 838         dest->i_flags = src->i_flags;
 839 -
 840 -       /*
 841 -        * Update the nlinks AFTER updating the above fields, because the
 842 -        * get_links callback may depend on them.
 843 -        */
 844 -       if (!get_nlinks)
 845 -               dest->i_nlink = src->i_nlink;
 846 -       else
 847 -               dest->i_nlink = (*get_nlinks)(dest);
 848 +       dest->i_nlink = src->i_nlink;
 849  }
 850  EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
 851 --- /dev/null
 852 +++ kernel-2.6.28/fs/unionfs/Makefile
 853 @@ -0,0 +1,17 @@
 854 +UNIONFS_VERSION="2.5.3 (for 2.6.28.10)"
 855 +
 856 +EXTRA_CFLAGS += -DUNIONFS_VERSION=\"$(UNIONFS_VERSION)\"
 857 +
 858 +obj-$(CONFIG_UNION_FS) += unionfs.o
 859 +
 860 +unionfs-y := subr.o dentry.o file.o inode.o main.o super.o \
 861 +       rdstate.o copyup.o dirhelper.o rename.o unlink.o \
 862 +       lookup.o commonfops.o dirfops.o sioq.o mmap.o whiteout.o
 863 +
 864 +unionfs-$(CONFIG_UNION_FS_XATTR) += xattr.o
 865 +
 866 +unionfs-$(CONFIG_UNION_FS_DEBUG) += debug.o
 867 +
 868 +ifeq ($(CONFIG_UNION_FS_DEBUG),y)
 869 +EXTRA_CFLAGS += -DDEBUG
 870 +endif
 871 --- /dev/null
 872 +++ kernel-2.6.28/fs/unionfs/commonfops.c
 873 @@ -0,0 +1,879 @@
 874 +/*
 875 + * Copyright (c) 2003-2009 Erez Zadok
 876 + * Copyright (c) 2003-2006 Charles P. Wright
 877 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
 878 + * Copyright (c) 2005-2006 Junjiro Okajima
 879 + * Copyright (c) 2005      Arun M. Krishnakumar
 880 + * Copyright (c) 2004-2006 David P. Quigley
 881 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
 882 + * Copyright (c) 2003      Puja Gupta
 883 + * Copyright (c) 2003      Harikesavan Krishnan
 884 + * Copyright (c) 2003-2009 Stony Brook University
 885 + * Copyright (c) 2003-2009 The Research Foundation of SUNY
 886 + *
 887 + * This program is free software; you can redistribute it and/or modify
 888 + * it under the terms of the GNU General Public License version 2 as
 889 + * published by the Free Software Foundation.
 890 + */
 891 +
 892 +#include "union.h"
 893 +
 894 +/*
 895 + * 1) Copyup the file
 896 + * 2) Rename the file to '.unionfs<original inode#><counter>' - obviously
 897 + * stolen from NFS's silly rename
 898 + */
 899 +static int copyup_deleted_file(struct file *file, struct dentry *dentry,
 900 +                              struct dentry *parent, int bstart, int bindex)
 901 +{
 902 +       static unsigned int counter;
 903 +       const int i_inosize = sizeof(dentry->d_inode->i_ino) * 2;
 904 +       const int countersize = sizeof(counter) * 2;
 905 +       const int nlen = sizeof(".unionfs") + i_inosize + countersize - 1;
 906 +       char name[nlen + 1];
 907 +       int err;
 908 +       struct dentry *tmp_dentry = NULL;
 909 +       struct dentry *lower_dentry;
 910 +       struct dentry *lower_dir_dentry = NULL;
 911 +
 912 +       lower_dentry = unionfs_lower_dentry_idx(dentry, bstart);
 913 +
 914 +       sprintf(name, ".unionfs%*.*lx",
 915 +               i_inosize, i_inosize, lower_dentry->d_inode->i_ino);
 916 +
 917 +       /*
 918 +        * Loop, looking for an unused temp name to copyup to.
 919 +        *
 920 +        * It's somewhat silly that we look for a free temp tmp name in the
 921 +        * source branch (bstart) instead of the dest branch (bindex), where
 922 +        * the final name will be created.  We _will_ catch it if somehow
 923 +        * the name exists in the dest branch, but it'd be nice to catch it
 924 +        * sooner than later.
 925 +        */
 926 +retry:
 927 +       tmp_dentry = NULL;
 928 +       do {
 929 +               char *suffix = name + nlen - countersize;
 930 +
 931 +               dput(tmp_dentry);
 932 +               counter++;
 933 +               sprintf(suffix, "%*.*x", countersize, countersize, counter);
 934 +
 935 +               pr_debug("unionfs: trying to rename %s to %s\n",
 936 +                        dentry->d_name.name, name);
 937 +
 938 +               tmp_dentry = lookup_one_len(name, lower_dentry->d_parent,
 939 +                                           nlen);
 940 +               if (IS_ERR(tmp_dentry)) {
 941 +                       err = PTR_ERR(tmp_dentry);
 942 +                       goto out;
 943 +               }
 944 +       } while (tmp_dentry->d_inode != NULL);  /* need negative dentry */
 945 +       dput(tmp_dentry);
 946 +
 947 +       err = copyup_named_file(parent->d_inode, file, name, bstart, bindex,
 948 +                               i_size_read(file->f_path.dentry->d_inode));
 949 +       if (err) {
 950 +               if (unlikely(err == -EEXIST))
 951 +                       goto retry;
 952 +               goto out;
 953 +       }
 954 +
 955 +       /* bring it to the same state as an unlinked file */
 956 +       lower_dentry = unionfs_lower_dentry_idx(dentry, dbstart(dentry));
 957 +       if (!unionfs_lower_inode_idx(dentry->d_inode, bindex)) {
 958 +               atomic_inc(&lower_dentry->d_inode->i_count);
 959 +               unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
 960 +                                           lower_dentry->d_inode);
 961 +       }
 962 +       lower_dir_dentry = lock_parent(lower_dentry);
 963 +       err = vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
 964 +       unlock_dir(lower_dir_dentry);
 965 +
 966 +out:
 967 +       if (!err)
 968 +               unionfs_check_dentry(dentry);
 969 +       return err;
 970 +}
 971 +
 972 +/*
 973 + * put all references held by upper struct file and free lower file pointer
 974 + * array
 975 + */
 976 +static void cleanup_file(struct file *file)
 977 +{
 978 +       int bindex, bstart, bend;
 979 +       struct file **lower_files;
 980 +       struct file *lower_file;
 981 +       struct super_block *sb = file->f_path.dentry->d_sb;
 982 +
 983 +       lower_files = UNIONFS_F(file)->lower_files;
 984 +       bstart = fbstart(file);
 985 +       bend = fbend(file);
 986 +
 987 +       for (bindex = bstart; bindex <= bend; bindex++) {
 988 +               int i;  /* holds (possibly) updated branch index */
 989 +               int old_bid;
 990 +
 991 +               lower_file = unionfs_lower_file_idx(file, bindex);
 992 +               if (!lower_file)
 993 +                       continue;
 994 +
 995 +               /*
 996 +                * Find new index of matching branch with an open
 997 +                * file, since branches could have been added or
 998 +                * deleted causing the one with open files to shift.
 999 +                */
1000 +               old_bid = UNIONFS_F(file)->saved_branch_ids[bindex];
1001 +               i = branch_id_to_idx(sb, old_bid);
1002 +               if (unlikely(i < 0)) {
1003 +                       printk(KERN_ERR "unionfs: no superblock for "
1004 +                              "file %p\n", file);
1005 +                       continue;
1006 +               }
1007 +
1008 +               /* decrement count of open files */
1009 +               branchput(sb, i);
1010 +               /*
1011 +                * fput will perform an mntput for us on the correct branch.
1012 +                * Although we're using the file's old branch configuration,
1013 +                * bindex, which is the old index, correctly points to the
1014 +                * right branch in the file's branch list.  In other words,
1015 +                * we're going to mntput the correct branch even if branches
1016 +                * have been added/removed.
1017 +                */
1018 +               fput(lower_file);
1019 +               UNIONFS_F(file)->lower_files[bindex] = NULL;
1020 +               UNIONFS_F(file)->saved_branch_ids[bindex] = -1;
1021 +       }
1022 +
1023 +       UNIONFS_F(file)->lower_files = NULL;
1024 +       kfree(lower_files);
1025 +       kfree(UNIONFS_F(file)->saved_branch_ids);
1026 +       /* set to NULL because caller needs to know if to kfree on error */
1027 +       UNIONFS_F(file)->saved_branch_ids = NULL;
1028 +}
1029 +
1030 +/* open all lower files for a given file */
1031 +static int open_all_files(struct file *file)
1032 +{
1033 +       int bindex, bstart, bend, err = 0;
1034 +       struct file *lower_file;
1035 +       struct dentry *lower_dentry;
1036 +       struct dentry *dentry = file->f_path.dentry;
1037 +       struct super_block *sb = dentry->d_sb;
1038 +
1039 +       bstart = dbstart(dentry);
1040 +       bend = dbend(dentry);
1041 +
1042 +       for (bindex = bstart; bindex <= bend; bindex++) {
1043 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
1044 +               if (!lower_dentry)
1045 +                       continue;
1046 +
1047 +               dget(lower_dentry);
1048 +               unionfs_mntget(dentry, bindex);
1049 +               branchget(sb, bindex);
1050 +
1051 +               lower_file =
1052 +                       dentry_open(lower_dentry,
1053 +                                   unionfs_lower_mnt_idx(dentry, bindex),
1054 +                                   file->f_flags);
1055 +               if (IS_ERR(lower_file)) {
1056 +                       branchput(sb, bindex);
1057 +                       err = PTR_ERR(lower_file);
1058 +                       goto out;
1059 +               } else {
1060 +                       unionfs_set_lower_file_idx(file, bindex, lower_file);
1061 +               }
1062 +       }
1063 +out:
1064 +       return err;
1065 +}
1066 +
1067 +/* open the highest priority file for a given upper file */
1068 +static int open_highest_file(struct file *file, bool willwrite)
1069 +{
1070 +       int bindex, bstart, bend, err = 0;
1071 +       struct file *lower_file;
1072 +       struct dentry *lower_dentry;
1073 +       struct dentry *dentry = file->f_path.dentry;
1074 +       struct dentry *parent = dget_parent(dentry);
1075 +       struct inode *parent_inode = parent->d_inode;
1076 +       struct super_block *sb = dentry->d_sb;
1077 +
1078 +       bstart = dbstart(dentry);
1079 +       bend = dbend(dentry);
1080 +
1081 +       lower_dentry = unionfs_lower_dentry(dentry);
1082 +       if (willwrite && IS_WRITE_FLAG(file->f_flags) && is_robranch(dentry)) {
1083 +               for (bindex = bstart - 1; bindex >= 0; bindex--) {
1084 +                       err = copyup_file(parent_inode, file, bstart, bindex,
1085 +                                         i_size_read(dentry->d_inode));
1086 +                       if (!err)
1087 +                               break;
1088 +               }
1089 +               atomic_set(&UNIONFS_F(file)->generation,
1090 +                          atomic_read(&UNIONFS_I(dentry->d_inode)->
1091 +                                      generation));
1092 +               goto out;
1093 +       }
1094 +
1095 +       dget(lower_dentry);
1096 +       unionfs_mntget(dentry, bstart);
1097 +       lower_file = dentry_open(lower_dentry,
1098 +                                unionfs_lower_mnt_idx(dentry, bstart),
1099 +                                file->f_flags);
1100 +       if (IS_ERR(lower_file)) {
1101 +               err = PTR_ERR(lower_file);
1102 +               goto out;
1103 +       }
1104 +       branchget(sb, bstart);
1105 +       unionfs_set_lower_file(file, lower_file);
1106 +       /* Fix up the position. */
1107 +       lower_file->f_pos = file->f_pos;
1108 +
1109 +       memcpy(&lower_file->f_ra, &file->f_ra, sizeof(struct file_ra_state));
1110 +out:
1111 +       dput(parent);
1112 +       return err;
1113 +}
1114 +
1115 +/* perform a delayed copyup of a read-write file on a read-only branch */
1116 +static int do_delayed_copyup(struct file *file, struct dentry *parent)
1117 +{
1118 +       int bindex, bstart, bend, err = 0;
1119 +       struct dentry *dentry = file->f_path.dentry;
1120 +       struct inode *parent_inode = parent->d_inode;
1121 +
1122 +       bstart = fbstart(file);
1123 +       bend = fbend(file);
1124 +
1125 +       BUG_ON(!S_ISREG(dentry->d_inode->i_mode));
1126 +
1127 +       unionfs_check_file(file);
1128 +       for (bindex = bstart - 1; bindex >= 0; bindex--) {
1129 +               if (!d_deleted(dentry))
1130 +                       err = copyup_file(parent_inode, file, bstart,
1131 +                                         bindex,
1132 +                                         i_size_read(dentry->d_inode));
1133 +               else
1134 +                       err = copyup_deleted_file(file, dentry, parent,
1135 +                                                 bstart, bindex);
1136 +               /* if succeeded, set lower open-file flags and break */
1137 +               if (!err) {
1138 +                       struct file *lower_file;
1139 +                       lower_file = unionfs_lower_file_idx(file, bindex);
1140 +                       lower_file->f_flags = file->f_flags;
1141 +                       break;
1142 +               }
1143 +       }
1144 +       if (err || (bstart <= fbstart(file)))
1145 +               goto out;
1146 +       bend = fbend(file);
1147 +       for (bindex = bstart; bindex <= bend; bindex++) {
1148 +               if (unionfs_lower_file_idx(file, bindex)) {
1149 +                       branchput(dentry->d_sb, bindex);
1150 +                       fput(unionfs_lower_file_idx(file, bindex));
1151 +                       unionfs_set_lower_file_idx(file, bindex, NULL);
1152 +               }
1153 +       }
1154 +       path_put_lowers(dentry, bstart, bend, false);
1155 +       iput_lowers(dentry->d_inode, bstart, bend, false);
1156 +       /* for reg file, we only open it "once" */
1157 +       fbend(file) = fbstart(file);
1158 +       dbend(dentry) = dbstart(dentry);
1159 +       ibend(dentry->d_inode) = ibstart(dentry->d_inode);
1160 +
1161 +out:
1162 +       unionfs_check_file(file);
1163 +       return err;
1164 +}
1165 +
1166 +/*
1167 + * Helper function for unionfs_file_revalidate/locked.
1168 + * Expects dentry/parent to be locked already, and revalidated.
1169 + */
1170 +static int __unionfs_file_revalidate(struct file *file, struct dentry *dentry,
1171 +                                    struct dentry *parent,
1172 +                                    struct super_block *sb, int sbgen,
1173 +                                    int dgen, bool willwrite)
1174 +{
1175 +       int fgen;
1176 +       int bstart, bend, orig_brid;
1177 +       int size;
1178 +       int err = 0;
1179 +
1180 +       fgen = atomic_read(&UNIONFS_F(file)->generation);
1181 +
1182 +       /*
1183 +        * There are two cases we are interested in.  The first is if the
1184 +        * generation is lower than the super-block.  The second is if
1185 +        * someone has copied up this file from underneath us, we also need
1186 +        * to refresh things.
1187 +        */
1188 +       if (d_deleted(dentry) ||
1189 +           (sbgen <= fgen &&
1190 +            dbstart(dentry) == fbstart(file) &&
1191 +            unionfs_lower_file(file)))
1192 +               goto out_may_copyup;
1193 +
1194 +       /* save orig branch ID */
1195 +       orig_brid = UNIONFS_F(file)->saved_branch_ids[fbstart(file)];
1196 +
1197 +       /* First we throw out the existing files. */
1198 +       cleanup_file(file);
1199 +
1200 +       /* Now we reopen the file(s) as in unionfs_open. */
1201 +       bstart = fbstart(file) = dbstart(dentry);
1202 +       bend = fbend(file) = dbend(dentry);
1203 +
1204 +       size = sizeof(struct file *) * sbmax(sb);
1205 +       UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL);
1206 +       if (unlikely(!UNIONFS_F(file)->lower_files)) {
1207 +               err = -ENOMEM;
1208 +               goto out;
1209 +       }
1210 +       size = sizeof(int) * sbmax(sb);
1211 +       UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL);
1212 +       if (unlikely(!UNIONFS_F(file)->saved_branch_ids)) {
1213 +               err = -ENOMEM;
1214 +               goto out;
1215 +       }
1216 +
1217 +       if (S_ISDIR(dentry->d_inode->i_mode)) {
1218 +               /* We need to open all the files. */
1219 +               err = open_all_files(file);
1220 +               if (err)
1221 +                       goto out;
1222 +       } else {
1223 +               int new_brid;
1224 +               /* We only open the highest priority branch. */
1225 +               err = open_highest_file(file, willwrite);
1226 +               if (err)
1227 +                       goto out;
1228 +               new_brid = UNIONFS_F(file)->saved_branch_ids[fbstart(file)];
1229 +               if (unlikely(new_brid != orig_brid && sbgen > fgen)) {
1230 +                       /*
1231 +                        * If we re-opened the file on a different branch
1232 +                        * than the original one, and this was due to a new
1233 +                        * branch inserted, then update the mnt counts of
1234 +                        * the old and new branches accordingly.
1235 +                        */
1236 +                       unionfs_mntget(dentry, bstart);
1237 +                       unionfs_mntput(sb->s_root,
1238 +                                      branch_id_to_idx(sb, orig_brid));
1239 +               }
1240 +               /* regular files have only one open lower file */
1241 +               fbend(file) = fbstart(file);
1242 +       }
1243 +       atomic_set(&UNIONFS_F(file)->generation,
1244 +                  atomic_read(&UNIONFS_I(dentry->d_inode)->generation));
1245 +
1246 +out_may_copyup:
1247 +       /* Copyup on the first write to a file on a readonly branch. */
1248 +       if (willwrite && IS_WRITE_FLAG(file->f_flags) &&
1249 +           !IS_WRITE_FLAG(unionfs_lower_file(file)->f_flags) &&
1250 +           is_robranch(dentry)) {
1251 +               pr_debug("unionfs: do delay copyup of \"%s\"\n",
1252 +                        dentry->d_name.name);
1253 +               err = do_delayed_copyup(file, parent);
1254 +               /* regular files have only one open lower file */
1255 +               if (!err && !S_ISDIR(dentry->d_inode->i_mode))
1256 +                       fbend(file) = fbstart(file);
1257 +       }
1258 +
1259 +out:
1260 +       if (err) {
1261 +               kfree(UNIONFS_F(file)->lower_files);
1262 +               kfree(UNIONFS_F(file)->saved_branch_ids);
1263 +       }
1264 +       return err;
1265 +}
1266 +
1267 +/*
1268 + * Revalidate the struct file
1269 + * @file: file to revalidate
1270 + * @parent: parent dentry (locked by caller)
1271 + * @willwrite: true if caller may cause changes to the file; false otherwise.
1272 + * Caller must lock/unlock dentry's branch configuration.
1273 + */
1274 +int unionfs_file_revalidate(struct file *file, struct dentry *parent,
1275 +                           bool willwrite)
1276 +{
1277 +       struct super_block *sb;
1278 +       struct dentry *dentry;
1279 +       int sbgen, dgen;
1280 +       int err = 0;
1281 +
1282 +       dentry = file->f_path.dentry;
1283 +       sb = dentry->d_sb;
1284 +       verify_locked(dentry);
1285 +       verify_locked(parent);
1286 +
1287 +       /*
1288 +        * First revalidate the dentry inside struct file,
1289 +        * but not unhashed dentries.
1290 +        */
1291 +       if (!d_deleted(dentry) &&
1292 +           !__unionfs_d_revalidate(dentry, parent, willwrite)) {
1293 +               err = -ESTALE;
1294 +               goto out;
1295 +       }
1296 +
1297 +       sbgen = atomic_read(&UNIONFS_SB(sb)->generation);
1298 +       dgen = atomic_read(&UNIONFS_D(dentry)->generation);
1299 +
1300 +       if (unlikely(sbgen > dgen)) { /* XXX: should never happen */
1301 +               pr_debug("unionfs: failed to revalidate dentry (%s)\n",
1302 +                        dentry->d_name.name);
1303 +               err = -ESTALE;
1304 +               goto out;
1305 +       }
1306 +
1307 +       err = __unionfs_file_revalidate(file, dentry, parent, sb,
1308 +                                       sbgen, dgen, willwrite);
1309 +out:
1310 +       return err;
1311 +}
1312 +
1313 +/* unionfs_open helper function: open a directory */
1314 +static int __open_dir(struct inode *inode, struct file *file)
1315 +{
1316 +       struct dentry *lower_dentry;
1317 +       struct file *lower_file;
1318 +       int bindex, bstart, bend;
1319 +       struct vfsmount *mnt;
1320 +
1321 +       bstart = fbstart(file) = dbstart(file->f_path.dentry);
1322 +       bend = fbend(file) = dbend(file->f_path.dentry);
1323 +
1324 +       for (bindex = bstart; bindex <= bend; bindex++) {
1325 +               lower_dentry =
1326 +                       unionfs_lower_dentry_idx(file->f_path.dentry, bindex);
1327 +               if (!lower_dentry)
1328 +                       continue;
1329 +
1330 +               dget(lower_dentry);
1331 +               unionfs_mntget(file->f_path.dentry, bindex);
1332 +               mnt = unionfs_lower_mnt_idx(file->f_path.dentry, bindex);
1333 +               lower_file = dentry_open(lower_dentry, mnt, file->f_flags);
1334 +               if (IS_ERR(lower_file))
1335 +                       return PTR_ERR(lower_file);
1336 +
1337 +               unionfs_set_lower_file_idx(file, bindex, lower_file);
1338 +
1339 +               /*
1340 +                * The branchget goes after the open, because otherwise
1341 +                * we would miss the reference on release.
1342 +                */
1343 +               branchget(inode->i_sb, bindex);
1344 +       }
1345 +
1346 +       return 0;
1347 +}
1348 +
1349 +/* unionfs_open helper function: open a file */
1350 +static int __open_file(struct inode *inode, struct file *file,
1351 +                      struct dentry *parent)
1352 +{
1353 +       struct dentry *lower_dentry;
1354 +       struct file *lower_file;
1355 +       int lower_flags;
1356 +       int bindex, bstart, bend;
1357 +
1358 +       lower_dentry = unionfs_lower_dentry(file->f_path.dentry);
1359 +       lower_flags = file->f_flags;
1360 +
1361 +       bstart = fbstart(file) = dbstart(file->f_path.dentry);
1362 +       bend = fbend(file) = dbend(file->f_path.dentry);
1363 +
1364 +       /*
1365 +        * check for the permission for lower file.  If the error is
1366 +        * COPYUP_ERR, copyup the file.
1367 +        */
1368 +       if (lower_dentry->d_inode && is_robranch(file->f_path.dentry)) {
1369 +               /*
1370 +                * if the open will change the file, copy it up otherwise
1371 +                * defer it.
1372 +                */
1373 +               if (lower_flags & O_TRUNC) {
1374 +                       int size = 0;
1375 +                       int err = -EROFS;
1376 +
1377 +                       /* copyup the file */
1378 +                       for (bindex = bstart - 1; bindex >= 0; bindex--) {
1379 +                               err = copyup_file(parent->d_inode, file,
1380 +                                                 bstart, bindex, size);
1381 +                               if (!err)
1382 +                                       break;
1383 +                       }
1384 +                       return err;
1385 +               } else {
1386 +                       /*
1387 +                        * turn off writeable flags, to force delayed copyup
1388 +                        * by caller.
1389 +                        */
1390 +                       lower_flags &= ~(OPEN_WRITE_FLAGS);
1391 +               }
1392 +       }
1393 +
1394 +       dget(lower_dentry);
1395 +
1396 +       /*
1397 +        * dentry_open will decrement mnt refcnt if err.
1398 +        * otherwise fput() will do an mntput() for us upon file close.
1399 +        */
1400 +       unionfs_mntget(file->f_path.dentry, bstart);
1401 +       lower_file =
1402 +               dentry_open(lower_dentry,
1403 +                           unionfs_lower_mnt_idx(file->f_path.dentry, bstart),
1404 +                           lower_flags);
1405 +       if (IS_ERR(lower_file))
1406 +               return PTR_ERR(lower_file);
1407 +
1408 +       unionfs_set_lower_file(file, lower_file);
1409 +       branchget(inode->i_sb, bstart);
1410 +
1411 +       return 0;
1412 +}
1413 +
1414 +int unionfs_open(struct inode *inode, struct file *file)
1415 +{
1416 +       int err = 0;
1417 +       struct file *lower_file = NULL;
1418 +       struct dentry *dentry = file->f_path.dentry;
1419 +       struct dentry *parent;
1420 +       int bindex = 0, bstart = 0, bend = 0;
1421 +       int size;
1422 +       int valid = 0;
1423 +
1424 +       unionfs_read_lock(inode->i_sb, UNIONFS_SMUTEX_PARENT);
1425 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
1426 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1427 +
1428 +       /* don't open unhashed/deleted files */
1429 +       if (d_deleted(dentry)) {
1430 +               err = -ENOENT;
1431 +               goto out_nofree;
1432 +       }
1433 +
1434 +       /* XXX: should I change 'false' below to the 'willwrite' flag? */
1435 +       valid = __unionfs_d_revalidate(dentry, parent, false);
1436 +       if (unlikely(!valid)) {
1437 +               err = -ESTALE;
1438 +               goto out_nofree;
1439 +       }
1440 +
1441 +       file->private_data =
1442 +               kzalloc(sizeof(struct unionfs_file_info), GFP_KERNEL);
1443 +       if (unlikely(!UNIONFS_F(file))) {
1444 +               err = -ENOMEM;
1445 +               goto out_nofree;
1446 +       }
1447 +       fbstart(file) = -1;
1448 +       fbend(file) = -1;
1449 +       atomic_set(&UNIONFS_F(file)->generation,
1450 +                  atomic_read(&UNIONFS_I(inode)->generation));
1451 +
1452 +       size = sizeof(struct file *) * sbmax(inode->i_sb);
1453 +       UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL);
1454 +       if (unlikely(!UNIONFS_F(file)->lower_files)) {
1455 +               err = -ENOMEM;
1456 +               goto out;
1457 +       }
1458 +       size = sizeof(int) * sbmax(inode->i_sb);
1459 +       UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL);
1460 +       if (unlikely(!UNIONFS_F(file)->saved_branch_ids)) {
1461 +               err = -ENOMEM;
1462 +               goto out;
1463 +       }
1464 +
1465 +       bstart = fbstart(file) = dbstart(dentry);
1466 +       bend = fbend(file) = dbend(dentry);
1467 +
1468 +       /*
1469 +        * open all directories and make the unionfs file struct point to
1470 +        * these lower file structs
1471 +        */
1472 +       if (S_ISDIR(inode->i_mode))
1473 +               err = __open_dir(inode, file);  /* open a dir */
1474 +       else
1475 +               err = __open_file(inode, file, parent); /* open a file */
1476 +
1477 +       /* freeing the allocated resources, and fput the opened files */
1478 +       if (err) {
1479 +               for (bindex = bstart; bindex <= bend; bindex++) {
1480 +                       lower_file = unionfs_lower_file_idx(file, bindex);
1481 +                       if (!lower_file)
1482 +                               continue;
1483 +
1484 +                       branchput(dentry->d_sb, bindex);
1485 +                       /* fput calls dput for lower_dentry */
1486 +                       fput(lower_file);
1487 +               }
1488 +       }
1489 +
1490 +out:
1491 +       if (err) {
1492 +               kfree(UNIONFS_F(file)->lower_files);
1493 +               kfree(UNIONFS_F(file)->saved_branch_ids);
1494 +               kfree(UNIONFS_F(file));
1495 +       }
1496 +out_nofree:
1497 +       if (!err) {
1498 +               unionfs_postcopyup_setmnt(dentry);
1499 +               unionfs_copy_attr_times(inode);
1500 +               unionfs_check_file(file);
1501 +               unionfs_check_inode(inode);
1502 +       }
1503 +       unionfs_unlock_dentry(dentry);
1504 +       unionfs_unlock_parent(dentry, parent);
1505 +       unionfs_read_unlock(inode->i_sb);
1506 +       return err;
1507 +}
1508 +
1509 +/*
1510 + * release all lower object references & free the file info structure
1511 + *
1512 + * No need to grab sb info's rwsem.
1513 + */
1514 +int unionfs_file_release(struct inode *inode, struct file *file)
1515 +{
1516 +       struct file *lower_file = NULL;
1517 +       struct unionfs_file_info *fileinfo;
1518 +       struct unionfs_inode_info *inodeinfo;
1519 +       struct super_block *sb = inode->i_sb;
1520 +       struct dentry *dentry = file->f_path.dentry;
1521 +       struct dentry *parent;
1522 +       int bindex, bstart, bend;
1523 +       int fgen, err = 0;
1524 +
1525 +       unionfs_read_lock(sb, UNIONFS_SMUTEX_PARENT);
1526 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
1527 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1528 +
1529 +       /*
1530 +        * We try to revalidate, but the VFS ignores return return values
1531 +        * from file->release, so we must always try to succeed here,
1532 +        * including to do the kfree and dput below.  So if revalidation
1533 +        * failed, all we can do is print some message and keep going.
1534 +        */
1535 +       err = unionfs_file_revalidate(file, parent,
1536 +                                     UNIONFS_F(file)->wrote_to_file);
1537 +       if (!err)
1538 +               unionfs_check_file(file);
1539 +       fileinfo = UNIONFS_F(file);
1540 +       BUG_ON(file->f_path.dentry->d_inode != inode);
1541 +       inodeinfo = UNIONFS_I(inode);
1542 +
1543 +       /* fput all the lower files */
1544 +       fgen = atomic_read(&fileinfo->generation);
1545 +       bstart = fbstart(file);
1546 +       bend = fbend(file);
1547 +
1548 +       for (bindex = bstart; bindex <= bend; bindex++) {
1549 +               lower_file = unionfs_lower_file_idx(file, bindex);
1550 +
1551 +               if (lower_file) {
1552 +                       unionfs_set_lower_file_idx(file, bindex, NULL);
1553 +                       fput(lower_file);
1554 +                       branchput(sb, bindex);
1555 +               }
1556 +
1557 +               /* if there are no more refs to the dentry, dput it */
1558 +               if (d_deleted(dentry)) {
1559 +                       dput(unionfs_lower_dentry_idx(dentry, bindex));
1560 +                       unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
1561 +               }
1562 +       }
1563 +
1564 +       kfree(fileinfo->lower_files);
1565 +       kfree(fileinfo->saved_branch_ids);
1566 +
1567 +       if (fileinfo->rdstate) {
1568 +               fileinfo->rdstate->access = jiffies;
1569 +               spin_lock(&inodeinfo->rdlock);
1570 +               inodeinfo->rdcount++;
1571 +               list_add_tail(&fileinfo->rdstate->cache,
1572 +                             &inodeinfo->readdircache);
1573 +               mark_inode_dirty(inode);
1574 +               spin_unlock(&inodeinfo->rdlock);
1575 +               fileinfo->rdstate = NULL;
1576 +       }
1577 +       kfree(fileinfo);
1578 +
1579 +       unionfs_unlock_dentry(dentry);
1580 +       unionfs_unlock_parent(dentry, parent);
1581 +       unionfs_read_unlock(sb);
1582 +       return err;
1583 +}
1584 +
1585 +/* pass the ioctl to the lower fs */
1586 +static long do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1587 +{
1588 +       struct file *lower_file;
1589 +       int err;
1590 +
1591 +       lower_file = unionfs_lower_file(file);
1592 +
1593 +       err = -ENOTTY;
1594 +       if (!lower_file || !lower_file->f_op)
1595 +               goto out;
1596 +       if (lower_file->f_op->unlocked_ioctl) {
1597 +               err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
1598 +       } else if (lower_file->f_op->ioctl) {
1599 +               lock_kernel();
1600 +               err = lower_file->f_op->ioctl(
1601 +                       lower_file->f_path.dentry->d_inode,
1602 +                       lower_file, cmd, arg);
1603 +               unlock_kernel();
1604 +       }
1605 +
1606 +out:
1607 +       return err;
1608 +}
1609 +
1610 +/*
1611 + * return to user-space the branch indices containing the file in question
1612 + *
1613 + * We use fd_set and therefore we are limited to the number of the branches
1614 + * to FD_SETSIZE, which is currently 1024 - plenty for most people
1615 + */
1616 +static int unionfs_ioctl_queryfile(struct file *file, struct dentry *parent,
1617 +                                  unsigned int cmd, unsigned long arg)
1618 +{
1619 +       int err = 0;
1620 +       fd_set branchlist;
1621 +       int bstart = 0, bend = 0, bindex = 0;
1622 +       int orig_bstart, orig_bend;
1623 +       struct dentry *dentry, *lower_dentry;
1624 +       struct vfsmount *mnt;
1625 +
1626 +       dentry = file->f_path.dentry;
1627 +       orig_bstart = dbstart(dentry);
1628 +       orig_bend = dbend(dentry);
1629 +       err = unionfs_partial_lookup(dentry, parent);
1630 +       if (err)
1631 +               goto out;
1632 +       bstart = dbstart(dentry);
1633 +       bend = dbend(dentry);
1634 +
1635 +       FD_ZERO(&branchlist);
1636 +
1637 +       for (bindex = bstart; bindex <= bend; bindex++) {
1638 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
1639 +               if (!lower_dentry)
1640 +                       continue;
1641 +               if (likely(lower_dentry->d_inode))
1642 +                       FD_SET(bindex, &branchlist);
1643 +               /* purge any lower objects after partial_lookup */
1644 +               if (bindex < orig_bstart || bindex > orig_bend) {
1645 +                       dput(lower_dentry);
1646 +                       unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
1647 +                       iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
1648 +                       unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
1649 +                                                   NULL);
1650 +                       mnt = unionfs_lower_mnt_idx(dentry, bindex);
1651 +                       if (!mnt)
1652 +                               continue;
1653 +                       unionfs_mntput(dentry, bindex);
1654 +                       unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
1655 +               }
1656 +       }
1657 +       /* restore original dentry's offsets */
1658 +       dbstart(dentry) = orig_bstart;
1659 +       dbend(dentry) = orig_bend;
1660 +       ibstart(dentry->d_inode) = orig_bstart;
1661 +       ibend(dentry->d_inode) = orig_bend;
1662 +
1663 +       err = copy_to_user((void __user *)arg, &branchlist, sizeof(fd_set));
1664 +       if (unlikely(err))
1665 +               err = -EFAULT;
1666 +
1667 +out:
1668 +       return err < 0 ? err : bend;
1669 +}
1670 +
1671 +long unionfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1672 +{
1673 +       long err;
1674 +       struct dentry *dentry = file->f_path.dentry;
1675 +       struct dentry *parent;
1676 +
1677 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
1678 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
1679 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1680 +
1681 +       err = unionfs_file_revalidate(file, parent, true);
1682 +       if (unlikely(err))
1683 +               goto out;
1684 +
1685 +       /* check if asked for local commands */
1686 +       switch (cmd) {
1687 +       case UNIONFS_IOCTL_INCGEN:
1688 +               /* Increment the superblock generation count */
1689 +               pr_info("unionfs: incgen ioctl deprecated; "
1690 +                       "use \"-o remount,incgen\"\n");
1691 +               err = -ENOSYS;
1692 +               break;
1693 +
1694 +       case UNIONFS_IOCTL_QUERYFILE:
1695 +               /* Return list of branches containing the given file */
1696 +               err = unionfs_ioctl_queryfile(file, parent, cmd, arg);
1697 +               break;
1698 +
1699 +       default:
1700 +               /* pass the ioctl down */
1701 +               err = do_ioctl(file, cmd, arg);
1702 +               break;
1703 +       }
1704 +
1705 +out:
1706 +       unionfs_check_file(file);
1707 +       unionfs_unlock_dentry(dentry);
1708 +       unionfs_unlock_parent(dentry, parent);
1709 +       unionfs_read_unlock(dentry->d_sb);
1710 +       return err;
1711 +}
1712 +
1713 +int unionfs_flush(struct file *file, fl_owner_t id)
1714 +{
1715 +       int err = 0;
1716 +       struct file *lower_file = NULL;
1717 +       struct dentry *dentry = file->f_path.dentry;
1718 +       struct dentry *parent;
1719 +       int bindex, bstart, bend;
1720 +
1721 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
1722 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
1723 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1724 +
1725 +       err = unionfs_file_revalidate(file, parent,
1726 +                                     UNIONFS_F(file)->wrote_to_file);
1727 +       if (unlikely(err))
1728 +               goto out;
1729 +       unionfs_check_file(file);
1730 +
1731 +       bstart = fbstart(file);
1732 +       bend = fbend(file);
1733 +       for (bindex = bstart; bindex <= bend; bindex++) {
1734 +               lower_file = unionfs_lower_file_idx(file, bindex);
1735 +
1736 +               if (lower_file && lower_file->f_op &&
1737 +                   lower_file->f_op->flush) {
1738 +                       err = lower_file->f_op->flush(lower_file, id);
1739 +                       if (err)
1740 +                               goto out;
1741 +               }
1742 +
1743 +       }
1744 +
1745 +out:
1746 +       if (!err)
1747 +               unionfs_check_file(file);
1748 +       unionfs_unlock_dentry(dentry);
1749 +       unionfs_unlock_parent(dentry, parent);
1750 +       unionfs_read_unlock(dentry->d_sb);
1751 +       return err;
1752 +}
1753 --- /dev/null
1754 +++ kernel-2.6.28/fs/unionfs/copyup.c
1755 @@ -0,0 +1,888 @@
1756 +/*
1757 + * Copyright (c) 2003-2009 Erez Zadok
1758 + * Copyright (c) 2003-2006 Charles P. Wright
1759 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
1760 + * Copyright (c) 2005-2006 Junjiro Okajima
1761 + * Copyright (c) 2005      Arun M. Krishnakumar
1762 + * Copyright (c) 2004-2006 David P. Quigley
1763 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
1764 + * Copyright (c) 2003      Puja Gupta
1765 + * Copyright (c) 2003      Harikesavan Krishnan
1766 + * Copyright (c) 2003-2009 Stony Brook University
1767 + * Copyright (c) 2003-2009 The Research Foundation of SUNY
1768 + *
1769 + * This program is free software; you can redistribute it and/or modify
1770 + * it under the terms of the GNU General Public License version 2 as
1771 + * published by the Free Software Foundation.
1772 + */
1773 +
1774 +#include "union.h"
1775 +
1776 +/*
1777 + * For detailed explanation of copyup see:
1778 + * Documentation/filesystems/unionfs/concepts.txt
1779 + */
1780 +
1781 +#ifdef CONFIG_UNION_FS_XATTR
1782 +/* copyup all extended attrs for a given dentry */
1783 +static int copyup_xattrs(struct dentry *old_lower_dentry,
1784 +                        struct dentry *new_lower_dentry)
1785 +{
1786 +       int err = 0;
1787 +       ssize_t list_size = -1;
1788 +       char *name_list = NULL;
1789 +       char *attr_value = NULL;
1790 +       char *name_list_buf = NULL;
1791 +
1792 +       /* query the actual size of the xattr list */
1793 +       list_size = vfs_listxattr(old_lower_dentry, NULL, 0);
1794 +       if (list_size <= 0) {
1795 +               err = list_size;
1796 +               goto out;
1797 +       }
1798 +
1799 +       /* allocate space for the actual list */
1800 +       name_list = unionfs_xattr_alloc(list_size + 1, XATTR_LIST_MAX);
1801 +       if (unlikely(!name_list || IS_ERR(name_list))) {
1802 +               err = PTR_ERR(name_list);
1803 +               goto out;
1804 +       }
1805 +
1806 +       name_list_buf = name_list; /* save for kfree at end */
1807 +
1808 +       /* now get the actual xattr list of the source file */
1809 +       list_size = vfs_listxattr(old_lower_dentry, name_list, list_size);
1810 +       if (list_size <= 0) {
1811 +               err = list_size;
1812 +               goto out;
1813 +       }
1814 +
1815 +       /* allocate space to hold each xattr's value */
1816 +       attr_value = unionfs_xattr_alloc(XATTR_SIZE_MAX, XATTR_SIZE_MAX);
1817 +       if (unlikely(!attr_value || IS_ERR(attr_value))) {
1818 +               err = PTR_ERR(name_list);
1819 +               goto out;
1820 +       }
1821 +
1822 +       /* in a loop, get and set each xattr from src to dst file */
1823 +       while (*name_list) {
1824 +               ssize_t size;
1825 +
1826 +               /* Lock here since vfs_getxattr doesn't lock for us */
1827 +               mutex_lock(&old_lower_dentry->d_inode->i_mutex);
1828 +               size = vfs_getxattr(old_lower_dentry, name_list,
1829 +                                   attr_value, XATTR_SIZE_MAX);
1830 +               mutex_unlock(&old_lower_dentry->d_inode->i_mutex);
1831 +               if (size < 0) {
1832 +                       err = size;
1833 +                       goto out;
1834 +               }
1835 +               if (size > XATTR_SIZE_MAX) {
1836 +                       err = -E2BIG;
1837 +                       goto out;
1838 +               }
1839 +               /* Don't lock here since vfs_setxattr does it for us. */
1840 +               err = vfs_setxattr(new_lower_dentry, name_list, attr_value,
1841 +                                  size, 0);
1842 +               /*
1843 +                * Selinux depends on "security.*" xattrs, so to maintain
1844 +                * the security of copied-up files, if Selinux is active,
1845 +                * then we must copy these xattrs as well.  So we need to
1846 +                * temporarily get FOWNER privileges.
1847 +                * XXX: move entire copyup code to SIOQ.
1848 +                */
1849 +               if (err == -EPERM && !capable(CAP_FOWNER)) {
1850 +                       cap_raise(current->cap_effective, CAP_FOWNER);
1851 +                       err = vfs_setxattr(new_lower_dentry, name_list,
1852 +                                          attr_value, size, 0);
1853 +                       cap_lower(current->cap_effective, CAP_FOWNER);
1854 +               }
1855 +               if (err < 0)
1856 +                       goto out;
1857 +               name_list += strlen(name_list) + 1;
1858 +       }
1859 +out:
1860 +       unionfs_xattr_kfree(name_list_buf);
1861 +       unionfs_xattr_kfree(attr_value);
1862 +       /* Ignore if xattr isn't supported */
1863 +       if (err == -ENOTSUPP || err == -EOPNOTSUPP)
1864 +               err = 0;
1865 +       return err;
1866 +}
1867 +#endif /* CONFIG_UNION_FS_XATTR */
1868 +
1869 +/*
1870 + * Determine the mode based on the copyup flags, and the existing dentry.
1871 + *
1872 + * Handle file systems which may not support certain options.  For example
1873 + * jffs2 doesn't allow one to chmod a symlink.  So we ignore such harmless
1874 + * errors, rather than propagating them up, which results in copyup errors
1875 + * and errors returned back to users.
1876 + */
1877 +static int copyup_permissions(struct super_block *sb,
1878 +                             struct dentry *old_lower_dentry,
1879 +                             struct dentry *new_lower_dentry)
1880 +{
1881 +       struct inode *i = old_lower_dentry->d_inode;
1882 +       struct iattr newattrs;
1883 +       int err;
1884 +
1885 +       newattrs.ia_atime = i->i_atime;
1886 +       newattrs.ia_mtime = i->i_mtime;
1887 +       newattrs.ia_ctime = i->i_ctime;
1888 +       newattrs.ia_gid = i->i_gid;
1889 +       newattrs.ia_uid = i->i_uid;
1890 +       newattrs.ia_valid = ATTR_CTIME | ATTR_ATIME | ATTR_MTIME |
1891 +               ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_FORCE |
1892 +               ATTR_GID | ATTR_UID;
1893 +       mutex_lock(&new_lower_dentry->d_inode->i_mutex);
1894 +       err = notify_change(new_lower_dentry, &newattrs);
1895 +       if (err)
1896 +               goto out;
1897 +
1898 +       /* now try to change the mode and ignore EOPNOTSUPP on symlinks */
1899 +       newattrs.ia_mode = i->i_mode;
1900 +       newattrs.ia_valid = ATTR_MODE | ATTR_FORCE;
1901 +       err = notify_change(new_lower_dentry, &newattrs);
1902 +       if (err == -EOPNOTSUPP &&
1903 +           S_ISLNK(new_lower_dentry->d_inode->i_mode)) {
1904 +               printk(KERN_WARNING
1905 +                      "unionfs: changing \"%s\" symlink mode unsupported\n",
1906 +                      new_lower_dentry->d_name.name);
1907 +               err = 0;
1908 +       }
1909 +
1910 +out:
1911 +       mutex_unlock(&new_lower_dentry->d_inode->i_mutex);
1912 +       return err;
1913 +}
1914 +
1915 +/*
1916 + * create the new device/file/directory - use copyup_permission to copyup
1917 + * times, and mode
1918 + *
1919 + * if the object being copied up is a regular file, the file is only created,
1920 + * the contents have to be copied up separately
1921 + */
1922 +static int __copyup_ndentry(struct dentry *old_lower_dentry,
1923 +                           struct dentry *new_lower_dentry,
1924 +                           struct dentry *new_lower_parent_dentry,
1925 +                           char *symbuf)
1926 +{
1927 +       int err = 0;
1928 +       umode_t old_mode = old_lower_dentry->d_inode->i_mode;
1929 +       struct sioq_args args;
1930 +
1931 +       if (S_ISDIR(old_mode)) {
1932 +               args.mkdir.parent = new_lower_parent_dentry->d_inode;
1933 +               args.mkdir.dentry = new_lower_dentry;
1934 +               args.mkdir.mode = old_mode;
1935 +
1936 +               run_sioq(__unionfs_mkdir, &args);
1937 +               err = args.err;
1938 +       } else if (S_ISLNK(old_mode)) {
1939 +               args.symlink.parent = new_lower_parent_dentry->d_inode;
1940 +               args.symlink.dentry = new_lower_dentry;
1941 +               args.symlink.symbuf = symbuf;
1942 +
1943 +               run_sioq(__unionfs_symlink, &args);
1944 +               err = args.err;
1945 +       } else if (S_ISBLK(old_mode) || S_ISCHR(old_mode) ||
1946 +                  S_ISFIFO(old_mode) || S_ISSOCK(old_mode)) {
1947 +               args.mknod.parent = new_lower_parent_dentry->d_inode;
1948 +               args.mknod.dentry = new_lower_dentry;
1949 +               args.mknod.mode = old_mode;
1950 +               args.mknod.dev = old_lower_dentry->d_inode->i_rdev;
1951 +
1952 +               run_sioq(__unionfs_mknod, &args);
1953 +               err = args.err;
1954 +       } else if (S_ISREG(old_mode)) {
1955 +               struct nameidata nd;
1956 +               err = init_lower_nd(&nd, LOOKUP_CREATE);
1957 +               if (unlikely(err < 0))
1958 +                       goto out;
1959 +               args.create.nd = &nd;
1960 +               args.create.parent = new_lower_parent_dentry->d_inode;
1961 +               args.create.dentry = new_lower_dentry;
1962 +               args.create.mode = old_mode;
1963 +
1964 +               run_sioq(__unionfs_create, &args);
1965 +               err = args.err;
1966 +               release_lower_nd(&nd, err);
1967 +       } else {
1968 +               printk(KERN_CRIT "unionfs: unknown inode type %d\n",
1969 +                      old_mode);
1970 +               BUG();
1971 +       }
1972 +
1973 +out:
1974 +       return err;
1975 +}
1976 +
1977 +static int __copyup_reg_data(struct dentry *dentry,
1978 +                            struct dentry *new_lower_dentry, int new_bindex,
1979 +                            struct dentry *old_lower_dentry, int old_bindex,
1980 +                            struct file **copyup_file, loff_t len)
1981 +{
1982 +       struct super_block *sb = dentry->d_sb;
1983 +       struct file *input_file;
1984 +       struct file *output_file;
1985 +       struct vfsmount *output_mnt;
1986 +       mm_segment_t old_fs;
1987 +       char *buf = NULL;
1988 +       ssize_t read_bytes, write_bytes;
1989 +       loff_t size;
1990 +       int err = 0;
1991 +
1992 +       /* open old file */
1993 +       unionfs_mntget(dentry, old_bindex);
1994 +       branchget(sb, old_bindex);
1995 +       /* dentry_open calls dput and mntput if it returns an error */
1996 +       input_file = dentry_open(old_lower_dentry,
1997 +                                unionfs_lower_mnt_idx(dentry, old_bindex),
1998 +                                O_RDONLY | O_LARGEFILE);
1999 +       if (IS_ERR(input_file)) {
2000 +               dput(old_lower_dentry);
2001 +               err = PTR_ERR(input_file);
2002 +               goto out;
2003 +       }
2004 +       if (unlikely(!input_file->f_op || !input_file->f_op->read)) {
2005 +               err = -EINVAL;
2006 +               goto out_close_in;
2007 +       }
2008 +
2009 +       /* open new file */
2010 +       dget(new_lower_dentry);
2011 +       output_mnt = unionfs_mntget(sb->s_root, new_bindex);
2012 +       branchget(sb, new_bindex);
2013 +       output_file = dentry_open(new_lower_dentry, output_mnt,
2014 +                                 O_RDWR | O_LARGEFILE);
2015 +       if (IS_ERR(output_file)) {
2016 +               err = PTR_ERR(output_file);
2017 +               goto out_close_in2;
2018 +       }
2019 +       if (unlikely(!output_file->f_op || !output_file->f_op->write)) {
2020 +               err = -EINVAL;
2021 +               goto out_close_out;
2022 +       }
2023 +
2024 +       /* allocating a buffer */
2025 +       buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
2026 +       if (unlikely(!buf)) {
2027 +               err = -ENOMEM;
2028 +               goto out_close_out;
2029 +       }
2030 +
2031 +       input_file->f_pos = 0;
2032 +       output_file->f_pos = 0;
2033 +
2034 +       old_fs = get_fs();
2035 +       set_fs(KERNEL_DS);
2036 +
2037 +       size = len;
2038 +       err = 0;
2039 +       do {
2040 +               if (len >= PAGE_SIZE)
2041 +                       size = PAGE_SIZE;
2042 +               else if ((len < PAGE_SIZE) && (len > 0))
2043 +                       size = len;
2044 +
2045 +               len -= PAGE_SIZE;
2046 +
2047 +               read_bytes =
2048 +                       input_file->f_op->read(input_file,
2049 +                                              (char __user *)buf, size,
2050 +                                              &input_file->f_pos);
2051 +               if (read_bytes <= 0) {
2052 +                       err = read_bytes;
2053 +                       break;
2054 +               }
2055 +
2056 +               /* see Documentation/filesystems/unionfs/issues.txt */
2057 +               lockdep_off();
2058 +               write_bytes =
2059 +                       output_file->f_op->write(output_file,
2060 +                                                (char __user *)buf,
2061 +                                                read_bytes,
2062 +                                                &output_file->f_pos);
2063 +               lockdep_on();
2064 +               if ((write_bytes < 0) || (write_bytes < read_bytes)) {
2065 +                       err = write_bytes;
2066 +                       break;
2067 +               }
2068 +       } while ((read_bytes > 0) && (len > 0));
2069 +
2070 +       set_fs(old_fs);
2071 +
2072 +       kfree(buf);
2073 +
2074 +       if (!err)
2075 +               err = output_file->f_op->fsync(output_file,
2076 +                                              new_lower_dentry, 0);
2077 +
2078 +       if (err)
2079 +               goto out_close_out;
2080 +
2081 +       if (copyup_file) {
2082 +               *copyup_file = output_file;
2083 +               goto out_close_in;
2084 +       }
2085 +
2086 +out_close_out:
2087 +       fput(output_file);
2088 +
2089 +out_close_in2:
2090 +       branchput(sb, new_bindex);
2091 +
2092 +out_close_in:
2093 +       fput(input_file);
2094 +
2095 +out:
2096 +       branchput(sb, old_bindex);
2097 +
2098 +       return err;
2099 +}
2100 +
2101 +/*
2102 + * dput the lower references for old and new dentry & clear a lower dentry
2103 + * pointer
2104 + */
2105 +static void __clear(struct dentry *dentry, struct dentry *old_lower_dentry,
2106 +                   int old_bstart, int old_bend,
2107 +                   struct dentry *new_lower_dentry, int new_bindex)
2108 +{
2109 +       /* get rid of the lower dentry and all its traces */
2110 +       unionfs_set_lower_dentry_idx(dentry, new_bindex, NULL);
2111 +       dbstart(dentry) = old_bstart;
2112 +       dbend(dentry) = old_bend;
2113 +
2114 +       dput(new_lower_dentry);
2115 +       dput(old_lower_dentry);
2116 +}
2117 +
2118 +/*
2119 + * Copy up a dentry to a file of specified name.
2120 + *
2121 + * @dir: used to pull the ->i_sb to access other branches
2122 + * @dentry: the non-negative dentry whose lower_inode we should copy
2123 + * @bstart: the branch of the lower_inode to copy from
2124 + * @new_bindex: the branch to create the new file in
2125 + * @name: the name of the file to create
2126 + * @namelen: length of @name
2127 + * @copyup_file: the "struct file" to return (optional)
2128 + * @len: how many bytes to copy-up?
2129 + */
2130 +int copyup_dentry(struct inode *dir, struct dentry *dentry, int bstart,
2131 +                 int new_bindex, const char *name, int namelen,
2132 +                 struct file **copyup_file, loff_t len)
2133 +{
2134 +       struct dentry *new_lower_dentry;
2135 +       struct dentry *old_lower_dentry = NULL;
2136 +       struct super_block *sb;
2137 +       int err = 0;
2138 +       int old_bindex;
2139 +       int old_bstart;
2140 +       int old_bend;
2141 +       struct dentry *new_lower_parent_dentry = NULL;
2142 +       mm_segment_t oldfs;
2143 +       char *symbuf = NULL;
2144 +
2145 +       verify_locked(dentry);
2146 +
2147 +       old_bindex = bstart;
2148 +       old_bstart = dbstart(dentry);
2149 +       old_bend = dbend(dentry);
2150 +
2151 +       BUG_ON(new_bindex < 0);
2152 +       BUG_ON(new_bindex >= old_bindex);
2153 +
2154 +       sb = dir->i_sb;
2155 +
2156 +       err = is_robranch_super(sb, new_bindex);
2157 +       if (err)
2158 +               goto out;
2159 +
2160 +       /* Create the directory structure above this dentry. */
2161 +       new_lower_dentry = create_parents(dir, dentry, name, new_bindex);
2162 +       if (IS_ERR(new_lower_dentry)) {
2163 +               err = PTR_ERR(new_lower_dentry);
2164 +               goto out;
2165 +       }
2166 +
2167 +       old_lower_dentry = unionfs_lower_dentry_idx(dentry, old_bindex);
2168 +       /* we conditionally dput this old_lower_dentry at end of function */
2169 +       dget(old_lower_dentry);
2170 +
2171 +       /* For symlinks, we must read the link before we lock the directory. */
2172 +       if (S_ISLNK(old_lower_dentry->d_inode->i_mode)) {
2173 +
2174 +               symbuf = kmalloc(PATH_MAX, GFP_KERNEL);
2175 +               if (unlikely(!symbuf)) {
2176 +                       __clear(dentry, old_lower_dentry,
2177 +                               old_bstart, old_bend,
2178 +                               new_lower_dentry, new_bindex);
2179 +                       err = -ENOMEM;
2180 +                       goto out_free;
2181 +               }
2182 +
2183 +               oldfs = get_fs();
2184 +               set_fs(KERNEL_DS);
2185 +               err = old_lower_dentry->d_inode->i_op->readlink(
2186 +                       old_lower_dentry,
2187 +                       (char __user *)symbuf,
2188 +                       PATH_MAX);
2189 +               set_fs(oldfs);
2190 +               if (err < 0) {
2191 +                       __clear(dentry, old_lower_dentry,
2192 +                               old_bstart, old_bend,
2193 +                               new_lower_dentry, new_bindex);
2194 +                       goto out_free;
2195 +               }
2196 +               symbuf[err] = '\0';
2197 +       }
2198 +
2199 +       /* Now we lock the parent, and create the object in the new branch. */
2200 +       new_lower_parent_dentry = lock_parent(new_lower_dentry);
2201 +
2202 +       /* create the new inode */
2203 +       err = __copyup_ndentry(old_lower_dentry, new_lower_dentry,
2204 +                              new_lower_parent_dentry, symbuf);
2205 +
2206 +       if (err) {
2207 +               __clear(dentry, old_lower_dentry,
2208 +                       old_bstart, old_bend,
2209 +                       new_lower_dentry, new_bindex);
2210 +               goto out_unlock;
2211 +       }
2212 +
2213 +       /* We actually copyup the file here. */
2214 +       if (S_ISREG(old_lower_dentry->d_inode->i_mode))
2215 +               err = __copyup_reg_data(dentry, new_lower_dentry, new_bindex,
2216 +                                       old_lower_dentry, old_bindex,
2217 +                                       copyup_file, len);
2218 +       if (err)
2219 +               goto out_unlink;
2220 +
2221 +       /* Set permissions. */
2222 +       err = copyup_permissions(sb, old_lower_dentry, new_lower_dentry);
2223 +       if (err)
2224 +               goto out_unlink;
2225 +
2226 +#ifdef CONFIG_UNION_FS_XATTR
2227 +       /* Selinux uses extended attributes for permissions. */
2228 +       err = copyup_xattrs(old_lower_dentry, new_lower_dentry);
2229 +       if (err)
2230 +               goto out_unlink;
2231 +#endif /* CONFIG_UNION_FS_XATTR */
2232 +
2233 +       /* do not allow files getting deleted to be re-interposed */
2234 +       if (!d_deleted(dentry))
2235 +               unionfs_reinterpose(dentry);
2236 +
2237 +       goto out_unlock;
2238 +
2239 +out_unlink:
2240 +       /*
2241 +        * copyup failed, because we possibly ran out of space or
2242 +        * quota, or something else happened so let's unlink; we don't
2243 +        * really care about the return value of vfs_unlink
2244 +        */
2245 +       vfs_unlink(new_lower_parent_dentry->d_inode, new_lower_dentry);
2246 +
2247 +       if (copyup_file) {
2248 +               /* need to close the file */
2249 +
2250 +               fput(*copyup_file);
2251 +               branchput(sb, new_bindex);
2252 +       }
2253 +
2254 +       /*
2255 +        * TODO: should we reset the error to something like -EIO?
2256 +        *
2257 +        * If we don't reset, the user may get some nonsensical errors, but
2258 +        * on the other hand, if we reset to EIO, we guarantee that the user
2259 +        * will get a "confusing" error message.
2260 +        */
2261 +
2262 +out_unlock:
2263 +       unlock_dir(new_lower_parent_dentry);
2264 +
2265 +out_free:
2266 +       /*
2267 +        * If old_lower_dentry was not a file, then we need to dput it.  If
2268 +        * it was a file, then it was already dput indirectly by other
2269 +        * functions we call above which operate on regular files.
2270 +        */
2271 +       if (old_lower_dentry && old_lower_dentry->d_inode &&
2272 +           !S_ISREG(old_lower_dentry->d_inode->i_mode))
2273 +               dput(old_lower_dentry);
2274 +       kfree(symbuf);
2275 +
2276 +       if (err) {
2277 +               /*
2278 +                * if directory creation succeeded, but inode copyup failed,
2279 +                * then purge new dentries.
2280 +                */
2281 +               if (dbstart(dentry) < old_bstart &&
2282 +                   ibstart(dentry->d_inode) > dbstart(dentry))
2283 +                       __clear(dentry, NULL, old_bstart, old_bend,
2284 +                               unionfs_lower_dentry(dentry), dbstart(dentry));
2285 +               goto out;
2286 +       }
2287 +       if (!S_ISDIR(dentry->d_inode->i_mode)) {
2288 +               unionfs_postcopyup_release(dentry);
2289 +               if (!unionfs_lower_inode(dentry->d_inode)) {
2290 +                       /*
2291 +                        * If we got here, then we copied up to an
2292 +                        * unlinked-open file, whose name is .unionfsXXXXX.
2293 +                        */
2294 +                       struct inode *inode = new_lower_dentry->d_inode;
2295 +                       atomic_inc(&inode->i_count);
2296 +                       unionfs_set_lower_inode_idx(dentry->d_inode,
2297 +                                                   ibstart(dentry->d_inode),
2298 +                                                   inode);
2299 +               }
2300 +       }
2301 +       unionfs_postcopyup_setmnt(dentry);
2302 +       /* sync inode times from copied-up inode to our inode */
2303 +       unionfs_copy_attr_times(dentry->d_inode);
2304 +       unionfs_check_inode(dir);
2305 +       unionfs_check_dentry(dentry);
2306 +out:
2307 +       return err;
2308 +}
2309 +
2310 +/*
2311 + * This function creates a copy of a file represented by 'file' which
2312 + * currently resides in branch 'bstart' to branch 'new_bindex.'  The copy
2313 + * will be named "name".
2314 + */
2315 +int copyup_named_file(struct inode *dir, struct file *file, char *name,
2316 +                     int bstart, int new_bindex, loff_t len)
2317 +{
2318 +       int err = 0;
2319 +       struct file *output_file = NULL;
2320 +
2321 +       err = copyup_dentry(dir, file->f_path.dentry, bstart, new_bindex,
2322 +                           name, strlen(name), &output_file, len);
2323 +       if (!err) {
2324 +               fbstart(file) = new_bindex;
2325 +               unionfs_set_lower_file_idx(file, new_bindex, output_file);
2326 +       }
2327 +
2328 +       return err;
2329 +}
2330 +
2331 +/*
2332 + * This function creates a copy of a file represented by 'file' which
2333 + * currently resides in branch 'bstart' to branch 'new_bindex'.
2334 + */
2335 +int copyup_file(struct inode *dir, struct file *file, int bstart,
2336 +               int new_bindex, loff_t len)
2337 +{
2338 +       int err = 0;
2339 +       struct file *output_file = NULL;
2340 +       struct dentry *dentry = file->f_path.dentry;
2341 +
2342 +       err = copyup_dentry(dir, dentry, bstart, new_bindex,
2343 +                           dentry->d_name.name, dentry->d_name.len,
2344 +                           &output_file, len);
2345 +       if (!err) {
2346 +               fbstart(file) = new_bindex;
2347 +               unionfs_set_lower_file_idx(file, new_bindex, output_file);
2348 +       }
2349 +
2350 +       return err;
2351 +}
2352 +
2353 +/* purge a dentry's lower-branch states (dput/mntput, etc.) */
2354 +static void __cleanup_dentry(struct dentry *dentry, int bindex,
2355 +                            int old_bstart, int old_bend)
2356 +{
2357 +       int loop_start;
2358 +       int loop_end;
2359 +       int new_bstart = -1;
2360 +       int new_bend = -1;
2361 +       int i;
2362 +
2363 +       loop_start = min(old_bstart, bindex);
2364 +       loop_end = max(old_bend, bindex);
2365 +
2366 +       /*
2367 +        * This loop sets the bstart and bend for the new dentry by
2368 +        * traversing from left to right.  It also dputs all negative
2369 +        * dentries except bindex
2370 +        */
2371 +       for (i = loop_start; i <= loop_end; i++) {
2372 +               if (!unionfs_lower_dentry_idx(dentry, i))
2373 +                       continue;
2374 +
2375 +               if (i == bindex) {
2376 +                       new_bend = i;
2377 +                       if (new_bstart < 0)
2378 +                               new_bstart = i;
2379 +                       continue;
2380 +               }
2381 +
2382 +               if (!unionfs_lower_dentry_idx(dentry, i)->d_inode) {
2383 +                       dput(unionfs_lower_dentry_idx(dentry, i));
2384 +                       unionfs_set_lower_dentry_idx(dentry, i, NULL);
2385 +
2386 +                       unionfs_mntput(dentry, i);
2387 +                       unionfs_set_lower_mnt_idx(dentry, i, NULL);
2388 +               } else {
2389 +                       if (new_bstart < 0)
2390 +                               new_bstart = i;
2391 +                       new_bend = i;
2392 +               }
2393 +       }
2394 +
2395 +       if (new_bstart < 0)
2396 +               new_bstart = bindex;
2397 +       if (new_bend < 0)
2398 +               new_bend = bindex;
2399 +       dbstart(dentry) = new_bstart;
2400 +       dbend(dentry) = new_bend;
2401 +
2402 +}
2403 +
2404 +/* set lower inode ptr and update bstart & bend if necessary */
2405 +static void __set_inode(struct dentry *upper, struct dentry *lower,
2406 +                       int bindex)
2407 +{
2408 +       unionfs_set_lower_inode_idx(upper->d_inode, bindex,
2409 +                                   igrab(lower->d_inode));
2410 +       if (likely(ibstart(upper->d_inode) > bindex))
2411 +               ibstart(upper->d_inode) = bindex;
2412 +       if (likely(ibend(upper->d_inode) < bindex))
2413 +               ibend(upper->d_inode) = bindex;
2414 +
2415 +}
2416 +
2417 +/* set lower dentry ptr and update bstart & bend if necessary */
2418 +static void __set_dentry(struct dentry *upper, struct dentry *lower,
2419 +                        int bindex)
2420 +{
2421 +       unionfs_set_lower_dentry_idx(upper, bindex, lower);
2422 +       if (likely(dbstart(upper) > bindex))
2423 +               dbstart(upper) = bindex;
2424 +       if (likely(dbend(upper) < bindex))
2425 +               dbend(upper) = bindex;
2426 +}
2427 +
2428 +/*
2429 + * This function replicates the directory structure up-to given dentry
2430 + * in the bindex branch.
2431 + */
2432 +struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
2433 +                             const char *name, int bindex)
2434 +{
2435 +       int err;
2436 +       struct dentry *child_dentry;
2437 +       struct dentry *parent_dentry;
2438 +       struct dentry *lower_parent_dentry = NULL;
2439 +       struct dentry *lower_dentry = NULL;
2440 +       const char *childname;
2441 +       unsigned int childnamelen;
2442 +       int nr_dentry;
2443 +       int count = 0;
2444 +       int old_bstart;
2445 +       int old_bend;
2446 +       struct dentry **path = NULL;
2447 +       struct super_block *sb;
2448 +
2449 +       verify_locked(dentry);
2450 +
2451 +       err = is_robranch_super(dir->i_sb, bindex);
2452 +       if (err) {
2453 +               lower_dentry = ERR_PTR(err);
2454 +               goto out;
2455 +       }
2456 +
2457 +       old_bstart = dbstart(dentry);
2458 +       old_bend = dbend(dentry);
2459 +
2460 +       lower_dentry = ERR_PTR(-ENOMEM);
2461 +
2462 +       /* There is no sense allocating any less than the minimum. */
2463 +       nr_dentry = 1;
2464 +       path = kmalloc(nr_dentry * sizeof(struct dentry *), GFP_KERNEL);
2465 +       if (unlikely(!path))
2466 +               goto out;
2467 +
2468 +       /* assume the negative dentry of unionfs as the parent dentry */
2469 +       parent_dentry = dentry;
2470 +
2471 +       /*
2472 +        * This loop finds the first parent that exists in the given branch.
2473 +        * We start building the directory structure from there.  At the end
2474 +        * of the loop, the following should hold:
2475 +        *  - child_dentry is the first nonexistent child
2476 +        *  - parent_dentry is the first existent parent
2477 +        *  - path[0] is the = deepest child
2478 +        *  - path[count] is the first child to create
2479 +        */
2480 +       do {
2481 +               child_dentry = parent_dentry;
2482 +
2483 +               /* find the parent directory dentry in unionfs */
2484 +               parent_dentry = dget_parent(child_dentry);
2485 +
2486 +               /* find out the lower_parent_dentry in the given branch */
2487 +               lower_parent_dentry =
2488 +                       unionfs_lower_dentry_idx(parent_dentry, bindex);
2489 +
2490 +               /* grow path table */
2491 +               if (count == nr_dentry) {
2492 +                       void *p;
2493 +
2494 +                       nr_dentry *= 2;
2495 +                       p = krealloc(path, nr_dentry * sizeof(struct dentry *),
2496 +                                    GFP_KERNEL);
2497 +                       if (unlikely(!p)) {
2498 +                               lower_dentry = ERR_PTR(-ENOMEM);
2499 +                               goto out;
2500 +                       }
2501 +                       path = p;
2502 +               }
2503 +
2504 +               /* store the child dentry */
2505 +               path[count++] = child_dentry;
2506 +       } while (!lower_parent_dentry);
2507 +       count--;
2508 +
2509 +       sb = dentry->d_sb;
2510 +
2511 +       /*
2512 +        * This code goes between the begin/end labels and basically
2513 +        * emulates a while(child_dentry != dentry), only cleaner and
2514 +        * shorter than what would be a much longer while loop.
2515 +        */
2516 +begin:
2517 +       /* get lower parent dir in the current branch */
2518 +       lower_parent_dentry = unionfs_lower_dentry_idx(parent_dentry, bindex);
2519 +       dput(parent_dentry);
2520 +
2521 +       /* init the values to lookup */
2522 +       childname = child_dentry->d_name.name;
2523 +       childnamelen = child_dentry->d_name.len;
2524 +
2525 +       if (child_dentry != dentry) {
2526 +               /* lookup child in the underlying file system */
2527 +               lower_dentry = lookup_one_len(childname, lower_parent_dentry,
2528 +                                             childnamelen);
2529 +               if (IS_ERR(lower_dentry))
2530 +                       goto out;
2531 +       } else {
2532 +               /*
2533 +                * Is the name a whiteout of the child name ?  lookup the
2534 +                * whiteout child in the underlying file system
2535 +                */
2536 +               lower_dentry = lookup_one_len(name, lower_parent_dentry,
2537 +                                             strlen(name));
2538 +               if (IS_ERR(lower_dentry))
2539 +                       goto out;
2540 +
2541 +               /* Replace the current dentry (if any) with the new one */
2542 +               dput(unionfs_lower_dentry_idx(dentry, bindex));
2543 +               unionfs_set_lower_dentry_idx(dentry, bindex,
2544 +                                            lower_dentry);
2545 +
2546 +               __cleanup_dentry(dentry, bindex, old_bstart, old_bend);
2547 +               goto out;
2548 +       }
2549 +
2550 +       if (lower_dentry->d_inode) {
2551 +               /*
2552 +                * since this already exists we dput to avoid
2553 +                * multiple references on the same dentry
2554 +                */
2555 +               dput(lower_dentry);
2556 +       } else {
2557 +               struct sioq_args args;
2558 +
2559 +               /* it's a negative dentry, create a new dir */
2560 +               lower_parent_dentry = lock_parent(lower_dentry);
2561 +
2562 +               args.mkdir.parent = lower_parent_dentry->d_inode;
2563 +               args.mkdir.dentry = lower_dentry;
2564 +               args.mkdir.mode = child_dentry->d_inode->i_mode;
2565 +
2566 +               run_sioq(__unionfs_mkdir, &args);
2567 +               err = args.err;
2568 +
2569 +               if (!err)
2570 +                       err = copyup_permissions(dir->i_sb, child_dentry,
2571 +                                                lower_dentry);
2572 +               unlock_dir(lower_parent_dentry);
2573 +               if (err) {
2574 +                       dput(lower_dentry);
2575 +                       lower_dentry = ERR_PTR(err);
2576 +                       goto out;
2577 +               }
2578 +
2579 +       }
2580 +
2581 +       __set_inode(child_dentry, lower_dentry, bindex);
2582 +       __set_dentry(child_dentry, lower_dentry, bindex);
2583 +       /*
2584 +        * update times of this dentry, but also the parent, because if
2585 +        * we changed, the parent may have changed too.
2586 +        */
2587 +       fsstack_copy_attr_times(parent_dentry->d_inode,
2588 +                               lower_parent_dentry->d_inode);
2589 +       unionfs_copy_attr_times(child_dentry->d_inode);
2590 +
2591 +       parent_dentry = child_dentry;
2592 +       child_dentry = path[--count];
2593 +       goto begin;
2594 +out:
2595 +       /* cleanup any leftover locks from the do/while loop above */
2596 +       if (IS_ERR(lower_dentry))
2597 +               while (count)
2598 +                       dput(path[count--]);
2599 +       kfree(path);
2600 +       return lower_dentry;
2601 +}
2602 +
2603 +/*
2604 + * Post-copyup helper to ensure we have valid mnts: set lower mnt of
2605 + * dentry+parents to the first parent node that has an mnt.
2606 + */
2607 +void unionfs_postcopyup_setmnt(struct dentry *dentry)
2608 +{
2609 +       struct dentry *parent, *hasone;
2610 +       int bindex = dbstart(dentry);
2611 +
2612 +       if (unionfs_lower_mnt_idx(dentry, bindex))
2613 +               return;
2614 +       hasone = dentry->d_parent;
2615 +       /* this loop should stop at root dentry */
2616 +       while (!unionfs_lower_mnt_idx(hasone, bindex))
2617 +               hasone = hasone->d_parent;
2618 +       parent = dentry;
2619 +       while (!unionfs_lower_mnt_idx(parent, bindex)) {
2620 +               unionfs_set_lower_mnt_idx(parent, bindex,
2621 +                                         unionfs_mntget(hasone, bindex));
2622 +               parent = parent->d_parent;
2623 +       }
2624 +}
2625 +
2626 +/*
2627 + * Post-copyup helper to release all non-directory source objects of a
2628 + * copied-up file.  Regular files should have only one lower object.
2629 + */
2630 +void unionfs_postcopyup_release(struct dentry *dentry)
2631 +{
2632 +       int bstart, bend;
2633 +
2634 +       BUG_ON(S_ISDIR(dentry->d_inode->i_mode));
2635 +       bstart = dbstart(dentry);
2636 +       bend = dbend(dentry);
2637 +
2638 +       path_put_lowers(dentry, bstart + 1, bend, false);
2639 +       iput_lowers(dentry->d_inode, bstart + 1, bend, false);
2640 +
2641 +       dbend(dentry) = bstart;
2642 +       ibend(dentry->d_inode) = ibstart(dentry->d_inode) = bstart;
2643 +}
2644 --- /dev/null
2645 +++ kernel-2.6.28/fs/unionfs/debug.c
2646 @@ -0,0 +1,533 @@
2647 +/*
2648 + * Copyright (c) 2003-2009 Erez Zadok
2649 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
2650 + * Copyright (c) 2003-2009 Stony Brook University
2651 + * Copyright (c) 2003-2009 The Research Foundation of SUNY
2652 + *
2653 + * This program is free software; you can redistribute it and/or modify
2654 + * it under the terms of the GNU General Public License version 2 as
2655 + * published by the Free Software Foundation.
2656 + */
2657 +
2658 +#include "union.h"
2659 +
2660 +/*
2661 + * Helper debugging functions for maintainers (and for users to report back
2662 + * useful information back to maintainers)
2663 + */
2664 +
2665 +/* it's always useful to know what part of the code called us */
2666 +#define PRINT_CALLER(fname, fxn, line)                                 \
2667 +       do {                                                            \
2668 +               if (!printed_caller) {                                  \
2669 +                       pr_debug("PC:%s:%s:%d\n", (fname), (fxn), (line)); \
2670 +                       printed_caller = 1;                             \
2671 +               }                                                       \
2672 +       } while (0)
2673 +
2674 +/*
2675 + * __unionfs_check_{inode,dentry,file} perform exhaustive sanity checking on
2676 + * the fan-out of various Unionfs objects.  We check that no lower objects
2677 + * exist  outside the start/end branch range; that all objects within are
2678 + * non-NULL (with some allowed exceptions); that for every lower file
2679 + * there's a lower dentry+inode; that the start/end ranges match for all
2680 + * corresponding lower objects; that open files/symlinks have only one lower
2681 + * objects, but directories can have several; and more.
2682 + */
2683 +void __unionfs_check_inode(const struct inode *inode,
2684 +                          const char *fname, const char *fxn, int line)
2685 +{
2686 +       int bindex;
2687 +       int istart, iend;
2688 +       struct inode *lower_inode;
2689 +       struct super_block *sb;
2690 +       int printed_caller = 0;
2691 +       void *poison_ptr;
2692 +
2693 +       /* for inodes now */
2694 +       BUG_ON(!inode);
2695 +       sb = inode->i_sb;
2696 +       istart = ibstart(inode);
2697 +       iend = ibend(inode);
2698 +       /* don't check inode if no lower branches */
2699 +       if (istart < 0 && iend < 0)
2700 +               return;
2701 +       if (unlikely(istart > iend)) {
2702 +               PRINT_CALLER(fname, fxn, line);
2703 +               pr_debug(" Ci0: inode=%p istart/end=%d:%d\n",
2704 +                        inode, istart, iend);
2705 +       }
2706 +       if (unlikely((istart == -1 && iend != -1) ||
2707 +                    (istart != -1 && iend == -1))) {
2708 +               PRINT_CALLER(fname, fxn, line);
2709 +               pr_debug(" Ci1: inode=%p istart/end=%d:%d\n",
2710 +                        inode, istart, iend);
2711 +       }
2712 +       if (!S_ISDIR(inode->i_mode)) {
2713 +               if (unlikely(iend != istart)) {
2714 +                       PRINT_CALLER(fname, fxn, line);
2715 +                       pr_debug(" Ci2: inode=%p istart=%d iend=%d\n",
2716 +                                inode, istart, iend);
2717 +               }
2718 +       }
2719 +
2720 +       for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2721 +               if (unlikely(!UNIONFS_I(inode))) {
2722 +                       PRINT_CALLER(fname, fxn, line);
2723 +                       pr_debug(" Ci3: no inode_info %p\n", inode);
2724 +                       return;
2725 +               }
2726 +               if (unlikely(!UNIONFS_I(inode)->lower_inodes)) {
2727 +                       PRINT_CALLER(fname, fxn, line);
2728 +                       pr_debug(" Ci4: no lower_inodes %p\n", inode);
2729 +                       return;
2730 +               }
2731 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
2732 +               if (lower_inode) {
2733 +                       memset(&poison_ptr, POISON_INUSE, sizeof(void *));
2734 +                       if (unlikely(bindex < istart || bindex > iend)) {
2735 +                               PRINT_CALLER(fname, fxn, line);
2736 +                               pr_debug(" Ci5: inode/linode=%p:%p bindex=%d "
2737 +                                        "istart/end=%d:%d\n", inode,
2738 +                                        lower_inode, bindex, istart, iend);
2739 +                       } else if (unlikely(lower_inode == poison_ptr)) {
2740 +                               /* freed inode! */
2741 +                               PRINT_CALLER(fname, fxn, line);
2742 +                               pr_debug(" Ci6: inode/linode=%p:%p bindex=%d "
2743 +                                        "istart/end=%d:%d\n", inode,
2744 +                                        lower_inode, bindex, istart, iend);
2745 +                       }
2746 +                       continue;
2747 +               }
2748 +               /* if we get here, then lower_inode == NULL */
2749 +               if (bindex < istart || bindex > iend)
2750 +                       continue;
2751 +               /*
2752 +                * directories can have NULL lower inodes in b/t start/end,
2753 +                * but NOT if at the start/end range.
2754 +                */
2755 +               if (unlikely(S_ISDIR(inode->i_mode) &&
2756 +                            bindex > istart && bindex < iend))
2757 +                       continue;
2758 +               PRINT_CALLER(fname, fxn, line);
2759 +               pr_debug(" Ci7: inode/linode=%p:%p "
2760 +                        "bindex=%d istart/end=%d:%d\n",
2761 +                        inode, lower_inode, bindex, istart, iend);
2762 +       }
2763 +}
2764 +
2765 +void __unionfs_check_dentry(const struct dentry *dentry,
2766 +                           const char *fname, const char *fxn, int line)
2767 +{
2768 +       int bindex;
2769 +       int dstart, dend, istart, iend;
2770 +       struct dentry *lower_dentry;
2771 +       struct inode *inode, *lower_inode;
2772 +       struct super_block *sb;
2773 +       struct vfsmount *lower_mnt;
2774 +       int printed_caller = 0;
2775 +       void *poison_ptr;
2776 +
2777 +       BUG_ON(!dentry);
2778 +       sb = dentry->d_sb;
2779 +       inode = dentry->d_inode;
2780 +       dstart = dbstart(dentry);
2781 +       dend = dbend(dentry);
2782 +       /* don't check dentry/mnt if no lower branches */
2783 +       if (dstart < 0 && dend < 0)
2784 +               goto check_inode;
2785 +       BUG_ON(dstart > dend);
2786 +
2787 +       if (unlikely((dstart == -1 && dend != -1) ||
2788 +                    (dstart != -1 && dend == -1))) {
2789 +               PRINT_CALLER(fname, fxn, line);
2790 +               pr_debug(" CD0: dentry=%p dstart/end=%d:%d\n",
2791 +                        dentry, dstart, dend);
2792 +       }
2793 +       /*
2794 +        * check for NULL dentries inside the start/end range, or
2795 +        * non-NULL dentries outside the start/end range.
2796 +        */
2797 +       for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2798 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
2799 +               if (lower_dentry) {
2800 +                       if (unlikely(bindex < dstart || bindex > dend)) {
2801 +                               PRINT_CALLER(fname, fxn, line);
2802 +                               pr_debug(" CD1: dentry/lower=%p:%p(%p) "
2803 +                                        "bindex=%d dstart/end=%d:%d\n",
2804 +                                        dentry, lower_dentry,
2805 +                                        (lower_dentry ? lower_dentry->d_inode :
2806 +                                         (void *) -1L),
2807 +                                        bindex, dstart, dend);
2808 +                       }
2809 +               } else {        /* lower_dentry == NULL */
2810 +                       if (bindex < dstart || bindex > dend)
2811 +                               continue;
2812 +                       /*
2813 +                        * Directories can have NULL lower inodes in b/t
2814 +                        * start/end, but NOT if at the start/end range.
2815 +                        * Ignore this rule, however, if this is a NULL
2816 +                        * dentry or a deleted dentry.
2817 +                        */
2818 +                       if (unlikely(!d_deleted((struct dentry *) dentry) &&
2819 +                                    inode &&
2820 +                                    !(inode && S_ISDIR(inode->i_mode) &&
2821 +                                      bindex > dstart && bindex < dend))) {
2822 +                               PRINT_CALLER(fname, fxn, line);
2823 +                               pr_debug(" CD2: dentry/lower=%p:%p(%p) "
2824 +                                        "bindex=%d dstart/end=%d:%d\n",
2825 +                                        dentry, lower_dentry,
2826 +                                        (lower_dentry ?
2827 +                                         lower_dentry->d_inode :
2828 +                                         (void *) -1L),
2829 +                                        bindex, dstart, dend);
2830 +                       }
2831 +               }
2832 +       }
2833 +
2834 +       /* check for vfsmounts same as for dentries */
2835 +       for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2836 +               lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
2837 +               if (lower_mnt) {
2838 +                       if (unlikely(bindex < dstart || bindex > dend)) {
2839 +                               PRINT_CALLER(fname, fxn, line);
2840 +                               pr_debug(" CM0: dentry/lmnt=%p:%p bindex=%d "
2841 +                                        "dstart/end=%d:%d\n", dentry,
2842 +                                        lower_mnt, bindex, dstart, dend);
2843 +                       }
2844 +               } else {        /* lower_mnt == NULL */
2845 +                       if (bindex < dstart || bindex > dend)
2846 +                               continue;
2847 +                       /*
2848 +                        * Directories can have NULL lower inodes in b/t
2849 +                        * start/end, but NOT if at the start/end range.
2850 +                        * Ignore this rule, however, if this is a NULL
2851 +                        * dentry.
2852 +                        */
2853 +                       if (unlikely(inode &&
2854 +                                    !(inode && S_ISDIR(inode->i_mode) &&
2855 +                                      bindex > dstart && bindex < dend))) {
2856 +                               PRINT_CALLER(fname, fxn, line);
2857 +                               pr_debug(" CM1: dentry/lmnt=%p:%p "
2858 +                                        "bindex=%d dstart/end=%d:%d\n",
2859 +                                        dentry, lower_mnt, bindex,
2860 +                                        dstart, dend);
2861 +                       }
2862 +               }
2863 +       }
2864 +
2865 +check_inode:
2866 +       /* for inodes now */
2867 +       if (!inode)
2868 +               return;
2869 +       istart = ibstart(inode);
2870 +       iend = ibend(inode);
2871 +       /* don't check inode if no lower branches */
2872 +       if (istart < 0 && iend < 0)
2873 +               return;
2874 +       BUG_ON(istart > iend);
2875 +       if (unlikely((istart == -1 && iend != -1) ||
2876 +                    (istart != -1 && iend == -1))) {
2877 +               PRINT_CALLER(fname, fxn, line);
2878 +               pr_debug(" CI0: dentry/inode=%p:%p istart/end=%d:%d\n",
2879 +                        dentry, inode, istart, iend);
2880 +       }
2881 +       if (unlikely(istart != dstart)) {
2882 +               PRINT_CALLER(fname, fxn, line);
2883 +               pr_debug(" CI1: dentry/inode=%p:%p istart=%d dstart=%d\n",
2884 +                        dentry, inode, istart, dstart);
2885 +       }
2886 +       if (unlikely(iend != dend)) {
2887 +               PRINT_CALLER(fname, fxn, line);
2888 +               pr_debug(" CI2: dentry/inode=%p:%p iend=%d dend=%d\n",
2889 +                        dentry, inode, iend, dend);
2890 +       }
2891 +
2892 +       if (!S_ISDIR(inode->i_mode)) {
2893 +               if (unlikely(dend != dstart)) {
2894 +                       PRINT_CALLER(fname, fxn, line);
2895 +                       pr_debug(" CI3: dentry/inode=%p:%p dstart=%d dend=%d\n",
2896 +                                dentry, inode, dstart, dend);
2897 +               }
2898 +               if (unlikely(iend != istart)) {
2899 +                       PRINT_CALLER(fname, fxn, line);
2900 +                       pr_debug(" CI4: dentry/inode=%p:%p istart=%d iend=%d\n",
2901 +                                dentry, inode, istart, iend);
2902 +               }
2903 +       }
2904 +
2905 +       for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2906 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
2907 +               if (lower_inode) {
2908 +                       memset(&poison_ptr, POISON_INUSE, sizeof(void *));
2909 +                       if (unlikely(bindex < istart || bindex > iend)) {
2910 +                               PRINT_CALLER(fname, fxn, line);
2911 +                               pr_debug(" CI5: dentry/linode=%p:%p bindex=%d "
2912 +                                        "istart/end=%d:%d\n", dentry,
2913 +                                        lower_inode, bindex, istart, iend);
2914 +                       } else if (unlikely(lower_inode == poison_ptr)) {
2915 +                               /* freed inode! */
2916 +                               PRINT_CALLER(fname, fxn, line);
2917 +                               pr_debug(" CI6: dentry/linode=%p:%p bindex=%d "
2918 +                                        "istart/end=%d:%d\n", dentry,
2919 +                                        lower_inode, bindex, istart, iend);
2920 +                       }
2921 +                       continue;
2922 +               }
2923 +               /* if we get here, then lower_inode == NULL */
2924 +               if (bindex < istart || bindex > iend)
2925 +                       continue;
2926 +               /*
2927 +                * directories can have NULL lower inodes in b/t start/end,
2928 +                * but NOT if at the start/end range.
2929 +                */
2930 +               if (unlikely(S_ISDIR(inode->i_mode) &&
2931 +                            bindex > istart && bindex < iend))
2932 +                       continue;
2933 +               PRINT_CALLER(fname, fxn, line);
2934 +               pr_debug(" CI7: dentry/linode=%p:%p "
2935 +                        "bindex=%d istart/end=%d:%d\n",
2936 +                        dentry, lower_inode, bindex, istart, iend);
2937 +       }
2938 +
2939 +       /*
2940 +        * If it's a directory, then intermediate objects b/t start/end can
2941 +        * be NULL.  But, check that all three are NULL: lower dentry, mnt,
2942 +        * and inode.
2943 +        */
2944 +       if (dstart >= 0 && dend >= 0 && S_ISDIR(inode->i_mode))
2945 +               for (bindex = dstart+1; bindex < dend; bindex++) {
2946 +                       lower_inode = unionfs_lower_inode_idx(inode, bindex);
2947 +                       lower_dentry = unionfs_lower_dentry_idx(dentry,
2948 +                                                               bindex);
2949 +                       lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
2950 +                       if (unlikely(!((lower_inode && lower_dentry &&
2951 +                                       lower_mnt) ||
2952 +                                      (!lower_inode &&
2953 +                                       !lower_dentry && !lower_mnt)))) {
2954 +                               PRINT_CALLER(fname, fxn, line);
2955 +                               pr_debug(" Cx: lmnt/ldentry/linode=%p:%p:%p "
2956 +                                        "bindex=%d dstart/end=%d:%d\n",
2957 +                                        lower_mnt, lower_dentry, lower_inode,
2958 +                                        bindex, dstart, dend);
2959 +                       }
2960 +               }
2961 +       /* check if lower inode is newer than upper one (it shouldn't) */
2962 +       if (unlikely(is_newer_lower(dentry) && !is_negative_lower(dentry))) {
2963 +               PRINT_CALLER(fname, fxn, line);
2964 +               for (bindex = ibstart(inode); bindex <= ibend(inode);
2965 +                    bindex++) {
2966 +                       lower_inode = unionfs_lower_inode_idx(inode, bindex);
2967 +                       if (unlikely(!lower_inode))
2968 +                               continue;
2969 +                       pr_debug(" CI8: bindex=%d mtime/lmtime=%lu.%lu/%lu.%lu "
2970 +                                "ctime/lctime=%lu.%lu/%lu.%lu\n",
2971 +                                bindex,
2972 +                                inode->i_mtime.tv_sec,
2973 +                                inode->i_mtime.tv_nsec,
2974 +                                lower_inode->i_mtime.tv_sec,
2975 +                                lower_inode->i_mtime.tv_nsec,
2976 +                                inode->i_ctime.tv_sec,
2977 +                                inode->i_ctime.tv_nsec,
2978 +                                lower_inode->i_ctime.tv_sec,
2979 +                                lower_inode->i_ctime.tv_nsec);
2980 +               }
2981 +       }
2982 +}
2983 +
2984 +void __unionfs_check_file(const struct file *file,
2985 +                         const char *fname, const char *fxn, int line)
2986 +{
2987 +       int bindex;
2988 +       int dstart, dend, fstart, fend;
2989 +       struct dentry *dentry;
2990 +       struct file *lower_file;
2991 +       struct inode *inode;
2992 +       struct super_block *sb;
2993 +       int printed_caller = 0;
2994 +
2995 +       BUG_ON(!file);
2996 +       dentry = file->f_path.dentry;
2997 +       sb = dentry->d_sb;
2998 +       dstart = dbstart(dentry);
2999 +       dend = dbend(dentry);
3000 +       BUG_ON(dstart > dend);
3001 +       fstart = fbstart(file);
3002 +       fend = fbend(file);
3003 +       BUG_ON(fstart > fend);
3004 +
3005 +       if (unlikely((fstart == -1 && fend != -1) ||
3006 +                    (fstart != -1 && fend == -1))) {
3007 +               PRINT_CALLER(fname, fxn, line);
3008 +               pr_debug(" CF0: file/dentry=%p:%p fstart/end=%d:%d\n",
3009 +                        file, dentry, fstart, fend);
3010 +       }
3011 +       if (unlikely(fstart != dstart)) {
3012 +               PRINT_CALLER(fname, fxn, line);
3013 +               pr_debug(" CF1: file/dentry=%p:%p fstart=%d dstart=%d\n",
3014 +                        file, dentry, fstart, dstart);
3015 +       }
3016 +       if (unlikely(fend != dend)) {
3017 +               PRINT_CALLER(fname, fxn, line);
3018 +               pr_debug(" CF2: file/dentry=%p:%p fend=%d dend=%d\n",
3019 +                        file, dentry, fend, dend);
3020 +       }
3021 +       inode = dentry->d_inode;
3022 +       if (!S_ISDIR(inode->i_mode)) {
3023 +               if (unlikely(fend != fstart)) {
3024 +                       PRINT_CALLER(fname, fxn, line);
3025 +                       pr_debug(" CF3: file/inode=%p:%p fstart=%d fend=%d\n",
3026 +                                file, inode, fstart, fend);
3027 +               }
3028 +               if (unlikely(dend != dstart)) {
3029 +                       PRINT_CALLER(fname, fxn, line);
3030 +                       pr_debug(" CF4: file/dentry=%p:%p dstart=%d dend=%d\n",
3031 +                                file, dentry, dstart, dend);
3032 +               }
3033 +       }
3034 +
3035 +       /*
3036 +        * check for NULL dentries inside the start/end range, or
3037 +        * non-NULL dentries outside the start/end range.
3038 +        */
3039 +       for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
3040 +               lower_file = unionfs_lower_file_idx(file, bindex);
3041 +               if (lower_file) {
3042 +                       if (unlikely(bindex < fstart || bindex > fend)) {
3043 +                               PRINT_CALLER(fname, fxn, line);
3044 +                               pr_debug(" CF5: file/lower=%p:%p bindex=%d "
3045 +                                        "fstart/end=%d:%d\n", file,
3046 +                                        lower_file, bindex, fstart, fend);
3047 +                       }
3048 +               } else {        /* lower_file == NULL */
3049 +                       if (bindex >= fstart && bindex <= fend) {
3050 +                               /*
3051 +                                * directories can have NULL lower inodes in
3052 +                                * b/t start/end, but NOT if at the
3053 +                                * start/end range.
3054 +                                */
3055 +                               if (unlikely(!(S_ISDIR(inode->i_mode) &&
3056 +                                              bindex > fstart &&
3057 +                                              bindex < fend))) {
3058 +                                       PRINT_CALLER(fname, fxn, line);
3059 +                                       pr_debug(" CF6: file/lower=%p:%p "
3060 +                                                "bindex=%d fstart/end=%d:%d\n",
3061 +                                                file, lower_file, bindex,
3062 +                                                fstart, fend);
3063 +                               }
3064 +                       }
3065 +               }
3066 +       }
3067 +
3068 +       __unionfs_check_dentry(dentry, fname, fxn, line);
3069 +}
3070 +
3071 +void __unionfs_check_nd(const struct nameidata *nd,
3072 +                       const char *fname, const char *fxn, int line)
3073 +{
3074 +       struct file *file;
3075 +       int printed_caller = 0;
3076 +
3077 +       if (unlikely(!nd))
3078 +               return;
3079 +       if (nd->flags & LOOKUP_OPEN) {
3080 +               file = nd->intent.open.file;
3081 +               if (unlikely(file->f_path.dentry &&
3082 +                            strcmp(file->f_path.dentry->d_sb->s_type->name,
3083 +                                   UNIONFS_NAME))) {
3084 +                       PRINT_CALLER(fname, fxn, line);
3085 +                       pr_debug(" CND1: lower_file of type %s\n",
3086 +                                file->f_path.dentry->d_sb->s_type->name);
3087 +                       BUG();
3088 +               }
3089 +       }
3090 +}
3091 +
3092 +/* useful to track vfsmount leaks that could cause EBUSY on unmount */
3093 +void __show_branch_counts(const struct super_block *sb,
3094 +                         const char *file, const char *fxn, int line)
3095 +{
3096 +       int i;
3097 +       struct vfsmount *mnt;
3098 +
3099 +       pr_debug("BC:");
3100 +       for (i = 0; i < sbmax(sb); i++) {
3101 +               if (likely(sb->s_root))
3102 +                       mnt = UNIONFS_D(sb->s_root)->lower_paths[i].mnt;
3103 +               else
3104 +                       mnt = NULL;
3105 +               printk(KERN_CONT "%d:",
3106 +                      (mnt ? atomic_read(&mnt->mnt_count) : -99));
3107 +       }
3108 +       printk(KERN_CONT "%s:%s:%d\n", file, fxn, line);
3109 +}
3110 +
3111 +void __show_inode_times(const struct inode *inode,
3112 +                       const char *file, const char *fxn, int line)
3113 +{
3114 +       struct inode *lower_inode;
3115 +       int bindex;
3116 +
3117 +       for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
3118 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
3119 +               if (unlikely(!lower_inode))
3120 +                       continue;
3121 +               pr_debug("IT(%lu:%d): %s:%s:%d "
3122 +                        "um=%lu/%lu lm=%lu/%lu uc=%lu/%lu lc=%lu/%lu\n",
3123 +                        inode->i_ino, bindex,
3124 +                        file, fxn, line,
3125 +                        inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
3126 +                        lower_inode->i_mtime.tv_sec,
3127 +                        lower_inode->i_mtime.tv_nsec,
3128 +                        inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
3129 +                        lower_inode->i_ctime.tv_sec,
3130 +                        lower_inode->i_ctime.tv_nsec);
3131 +       }
3132 +}
3133 +
3134 +void __show_dinode_times(const struct dentry *dentry,
3135 +                       const char *file, const char *fxn, int line)
3136 +{
3137 +       struct inode *inode = dentry->d_inode;
3138 +       struct inode *lower_inode;
3139 +       int bindex;
3140 +
3141 +       for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
3142 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
3143 +               if (!lower_inode)
3144 +                       continue;
3145 +               pr_debug("DT(%s:%lu:%d): %s:%s:%d "
3146 +                        "um=%lu/%lu lm=%lu/%lu uc=%lu/%lu lc=%lu/%lu\n",
3147 +                        dentry->d_name.name, inode->i_ino, bindex,
3148 +                        file, fxn, line,
3149 +                        inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
3150 +                        lower_inode->i_mtime.tv_sec,
3151 +                        lower_inode->i_mtime.tv_nsec,
3152 +                        inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
3153 +                        lower_inode->i_ctime.tv_sec,
3154 +                        lower_inode->i_ctime.tv_nsec);
3155 +       }
3156 +}
3157 +
3158 +void __show_inode_counts(const struct inode *inode,
3159 +                       const char *file, const char *fxn, int line)
3160 +{
3161 +       struct inode *lower_inode;
3162 +       int bindex;
3163 +
3164 +       if (unlikely(!inode)) {
3165 +               pr_debug("SiC: Null inode\n");
3166 +               return;
3167 +       }
3168 +       for (bindex = sbstart(inode->i_sb); bindex <= sbend(inode->i_sb);
3169 +            bindex++) {
3170 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
3171 +               if (unlikely(!lower_inode))
3172 +                       continue;
3173 +               pr_debug("SIC(%lu:%d:%d): lc=%d %s:%s:%d\n",
3174 +                        inode->i_ino, bindex,
3175 +                        atomic_read(&(inode)->i_count),
3176 +                        atomic_read(&(lower_inode)->i_count),
3177 +                        file, fxn, line);
3178 +       }
3179 +}
3180 --- /dev/null
3181 +++ kernel-2.6.28/fs/unionfs/dentry.c
3182 @@ -0,0 +1,397 @@
3183 +/*
3184 + * Copyright (c) 2003-2009 Erez Zadok
3185 + * Copyright (c) 2003-2006 Charles P. Wright
3186 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3187 + * Copyright (c) 2005-2006 Junjiro Okajima
3188 + * Copyright (c) 2005      Arun M. Krishnakumar
3189 + * Copyright (c) 2004-2006 David P. Quigley
3190 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3191 + * Copyright (c) 2003      Puja Gupta
3192 + * Copyright (c) 2003      Harikesavan Krishnan
3193 + * Copyright (c) 2003-2009 Stony Brook University
3194 + * Copyright (c) 2003-2009 The Research Foundation of SUNY
3195 + *
3196 + * This program is free software; you can redistribute it and/or modify
3197 + * it under the terms of the GNU General Public License version 2 as
3198 + * published by the Free Software Foundation.
3199 + */
3200 +
3201 +#include "union.h"
3202 +
3203 +bool is_negative_lower(const struct dentry *dentry)
3204 +{
3205 +       int bindex;
3206 +       struct dentry *lower_dentry;
3207 +
3208 +       BUG_ON(!dentry);
3209 +       /* cache coherency: check if file was deleted on lower branch */
3210 +       if (dbstart(dentry) < 0)
3211 +               return true;
3212 +       for (bindex = dbstart(dentry); bindex <= dbend(dentry); bindex++) {
3213 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3214 +               /* unhashed (i.e., unlinked) lower dentries don't count */
3215 +               if (lower_dentry && lower_dentry->d_inode &&
3216 +                   !d_deleted(lower_dentry) &&
3217 +                   !(lower_dentry->d_flags & DCACHE_NFSFS_RENAMED))
3218 +                       return false;
3219 +       }
3220 +       return true;
3221 +}
3222 +
3223 +static inline void __dput_lowers(struct dentry *dentry, int start, int end)
3224 +{
3225 +       struct dentry *lower_dentry;
3226 +       int bindex;
3227 +
3228 +       if (start < 0)
3229 +               return;
3230 +       for (bindex = start; bindex <= end; bindex++) {
3231 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3232 +               if (!lower_dentry)
3233 +                       continue;
3234 +               unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
3235 +               dput(lower_dentry);
3236 +       }
3237 +}
3238 +
3239 +/*
3240 + * Purge and invalidate as many data pages of a unionfs inode.  This is
3241 + * called when the lower inode has changed, and we want to force processes
3242 + * to re-get the new data.
3243 + */
3244 +static inline void purge_inode_data(struct inode *inode)
3245 +{
3246 +       /* remove all non-private mappings */
3247 +       unmap_mapping_range(inode->i_mapping, 0, 0, 0);
3248 +       /* invalidate as many pages as possible */
3249 +       invalidate_mapping_pages(inode->i_mapping, 0, -1);
3250 +       /*
3251 +        * Don't try to truncate_inode_pages here, because this could lead
3252 +        * to a deadlock between some of address_space ops and dentry
3253 +        * revalidation: the address space op is invoked with a lock on our
3254 +        * own page, and truncate_inode_pages will block on locked pages.
3255 +        */
3256 +}
3257 +
3258 +/*
3259 + * Revalidate a single file/symlink/special dentry.  Assume that info nodes
3260 + * of the @dentry and its @parent are locked.  Assume parent is valid,
3261 + * otherwise return false (and let's hope the VFS will try to re-lookup this
3262 + * dentry).  Returns true if valid, false otherwise.
3263 + */
3264 +bool __unionfs_d_revalidate(struct dentry *dentry, struct dentry *parent,
3265 +                           bool willwrite)
3266 +{
3267 +       bool valid = true;      /* default is valid */
3268 +       struct dentry *lower_dentry;
3269 +       struct dentry *result;
3270 +       int bindex, bstart, bend;
3271 +       int sbgen, dgen, pdgen;
3272 +       int positive = 0;
3273 +       int interpose_flag;
3274 +
3275 +       verify_locked(dentry);
3276 +       verify_locked(parent);
3277 +
3278 +       /* if the dentry is unhashed, do NOT revalidate */
3279 +       if (d_deleted(dentry))
3280 +               goto out;
3281 +
3282 +       dgen = atomic_read(&UNIONFS_D(dentry)->generation);
3283 +
3284 +       if (is_newer_lower(dentry)) {
3285 +               /* root dentry is always valid */
3286 +               if (IS_ROOT(dentry)) {
3287 +                       unionfs_copy_attr_times(dentry->d_inode);
3288 +               } else {
3289 +                       /*
3290 +                        * reset generation number to zero, guaranteed to be
3291 +                        * "old"
3292 +                        */
3293 +                       dgen = 0;
3294 +                       atomic_set(&UNIONFS_D(dentry)->generation, dgen);
3295 +               }
3296 +               if (!willwrite)
3297 +                       purge_inode_data(dentry->d_inode);
3298 +       }
3299 +
3300 +       sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
3301 +
3302 +       BUG_ON(dbstart(dentry) == -1);
3303 +       if (dentry->d_inode)
3304 +               positive = 1;
3305 +
3306 +       /* if our dentry is valid, then validate all lower ones */
3307 +       if (sbgen == dgen)
3308 +               goto validate_lowers;
3309 +
3310 +       /* The root entry should always be valid */
3311 +       BUG_ON(IS_ROOT(dentry));
3312 +
3313 +       /* We can't work correctly if our parent isn't valid. */
3314 +       pdgen = atomic_read(&UNIONFS_D(parent)->generation);
3315 +
3316 +       /* Free the pointers for our inodes and this dentry. */
3317 +       path_put_lowers_all(dentry, false);
3318 +
3319 +       interpose_flag = INTERPOSE_REVAL_NEG;
3320 +       if (positive) {
3321 +               interpose_flag = INTERPOSE_REVAL;
3322 +               iput_lowers_all(dentry->d_inode, true);
3323 +       }
3324 +
3325 +       if (realloc_dentry_private_data(dentry) != 0) {
3326 +               valid = false;
3327 +               goto out;
3328 +       }
3329 +
3330 +       result = unionfs_lookup_full(dentry, parent, interpose_flag);
3331 +       if (result) {
3332 +               if (IS_ERR(result)) {
3333 +                       valid = false;
3334 +                       goto out;
3335 +               }
3336 +               /*
3337 +                * current unionfs_lookup_backend() doesn't return
3338 +                * a valid dentry
3339 +                */
3340 +               dput(dentry);
3341 +               dentry = result;
3342 +       }
3343 +
3344 +       if (unlikely(positive && is_negative_lower(dentry))) {
3345 +               /* call make_bad_inode here ? */
3346 +               d_drop(dentry);
3347 +               valid = false;
3348 +               goto out;
3349 +       }
3350 +
3351 +       /*
3352 +        * if we got here then we have revalidated our dentry and all lower
3353 +        * ones, so we can return safely.
3354 +        */
3355 +       if (!valid)             /* lower dentry revalidation failed */
3356 +               goto out;
3357 +
3358 +       /*
3359 +        * If the parent's gen no.  matches the superblock's gen no., then
3360 +        * we can update our denty's gen no.  If they didn't match, then it
3361 +        * was OK to revalidate this dentry with a stale parent, but we'll
3362 +        * purposely not update our dentry's gen no. (so it can be redone);
3363 +        * and, we'll mark our parent dentry as invalid so it'll force it
3364 +        * (and our dentry) to be revalidated.
3365 +        */
3366 +       if (pdgen == sbgen)
3367 +               atomic_set(&UNIONFS_D(dentry)->generation, sbgen);
3368 +       goto out;
3369 +
3370 +validate_lowers:
3371 +
3372 +       /* The revalidation must occur across all branches */
3373 +       bstart = dbstart(dentry);
3374 +       bend = dbend(dentry);
3375 +       BUG_ON(bstart == -1);
3376 +       for (bindex = bstart; bindex <= bend; bindex++) {
3377 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3378 +               if (!lower_dentry || !lower_dentry->d_op
3379 +                   || !lower_dentry->d_op->d_revalidate)
3380 +                       continue;
3381 +               /*
3382 +                * Don't pass nameidata to lower file system, because we
3383 +                * don't want an arbitrary lower file being opened or
3384 +                * returned to us: it may be useless to us because of the
3385 +                * fanout nature of unionfs (cf. file/directory open-file
3386 +                * invariants).  We will open lower files as and when needed
3387 +                * later on.
3388 +                */
3389 +               if (!lower_dentry->d_op->d_revalidate(lower_dentry, NULL))
3390 +                       valid = false;
3391 +       }
3392 +
3393 +       if (!dentry->d_inode ||
3394 +           ibstart(dentry->d_inode) < 0 ||
3395 +           ibend(dentry->d_inode) < 0) {
3396 +               valid = false;
3397 +               goto out;
3398 +       }
3399 +
3400 +       if (valid) {
3401 +               /*
3402 +                * If we get here, and we copy the meta-data from the lower
3403 +                * inode to our inode, then it is vital that we have already
3404 +                * purged all unionfs-level file data.  We do that in the
3405 +                * caller (__unionfs_d_revalidate) by calling
3406 +                * purge_inode_data.
3407 +                */
3408 +               unionfs_copy_attr_all(dentry->d_inode,
3409 +                                     unionfs_lower_inode(dentry->d_inode));
3410 +               fsstack_copy_inode_size(dentry->d_inode,
3411 +                                       unionfs_lower_inode(dentry->d_inode));
3412 +       }
3413 +
3414 +out:
3415 +       return valid;
3416 +}
3417 +
3418 +/*
3419 + * Determine if the lower inode objects have changed from below the unionfs
3420 + * inode.  Return true if changed, false otherwise.
3421 + *
3422 + * We check if the mtime or ctime have changed.  However, the inode times
3423 + * can be changed by anyone without much protection, including
3424 + * asynchronously.  This can sometimes cause unionfs to find that the lower
3425 + * file system doesn't change its inode times quick enough, resulting in a
3426 + * false positive indication (which is harmless, it just makes unionfs do
3427 + * extra work in re-validating the objects).  To minimize the chances of
3428 + * these situations, we still consider such small time changes valid, but we
3429 + * don't print debugging messages unless the time changes are greater than
3430 + * UNIONFS_MIN_CC_TIME (which defaults to 3 seconds, as with NFS's acregmin)
3431 + * because significant changes are more likely due to users manually
3432 + * touching lower files.
3433 + */
3434 +bool is_newer_lower(const struct dentry *dentry)
3435 +{
3436 +       int bindex;
3437 +       struct inode *inode;
3438 +       struct inode *lower_inode;
3439 +
3440 +       /* ignore if we're called on semi-initialized dentries/inodes */
3441 +       if (!dentry || !UNIONFS_D(dentry))
3442 +               return false;
3443 +       inode = dentry->d_inode;
3444 +       if (!inode || !UNIONFS_I(inode)->lower_inodes ||
3445 +           ibstart(inode) < 0 || ibend(inode) < 0)
3446 +               return false;
3447 +
3448 +       for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
3449 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
3450 +               if (!lower_inode)
3451 +                       continue;
3452 +
3453 +               /* check if mtime/ctime have changed */
3454 +               if (unlikely(timespec_compare(&inode->i_mtime,
3455 +                                             &lower_inode->i_mtime) < 0)) {
3456 +                       if ((lower_inode->i_mtime.tv_sec -
3457 +                            inode->i_mtime.tv_sec) > UNIONFS_MIN_CC_TIME) {
3458 +                               pr_info("unionfs: new lower inode mtime "
3459 +                                       "(bindex=%d, name=%s)\n", bindex,
3460 +                                       dentry->d_name.name);
3461 +                               show_dinode_times(dentry);
3462 +                       }
3463 +                       return true;
3464 +               }
3465 +               if (unlikely(timespec_compare(&inode->i_ctime,
3466 +                                             &lower_inode->i_ctime) < 0)) {
3467 +                       if ((lower_inode->i_ctime.tv_sec -
3468 +                            inode->i_ctime.tv_sec) > UNIONFS_MIN_CC_TIME) {
3469 +                               pr_info("unionfs: new lower inode ctime "
3470 +                                       "(bindex=%d, name=%s)\n", bindex,
3471 +                                       dentry->d_name.name);
3472 +                               show_dinode_times(dentry);
3473 +                       }
3474 +                       return true;
3475 +               }
3476 +       }
3477 +
3478 +       /*
3479 +        * Last check: if this is a positive dentry, but somehow all lower
3480 +        * dentries are negative or unhashed, then this dentry needs to be
3481 +        * revalidated, because someone probably deleted the objects from
3482 +        * the lower branches directly.
3483 +        */
3484 +       if (is_negative_lower(dentry))
3485 +               return true;
3486 +
3487 +       return false;           /* default: lower is not newer */
3488 +}
3489 +
3490 +static int unionfs_d_revalidate(struct dentry *dentry,
3491 +                               struct nameidata *nd_unused)
3492 +{
3493 +       bool valid = true;
3494 +       int err = 1;            /* 1 means valid for the VFS */
3495 +       struct dentry *parent;
3496 +
3497 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
3498 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
3499 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3500 +
3501 +       valid = __unionfs_d_revalidate(dentry, parent, false);
3502 +       if (valid) {
3503 +               unionfs_postcopyup_setmnt(dentry);
3504 +               unionfs_check_dentry(dentry);
3505 +       } else {
3506 +               d_drop(dentry);
3507 +               err = valid;
3508 +       }
3509 +       unionfs_unlock_dentry(dentry);
3510 +       unionfs_unlock_parent(dentry, parent);
3511 +       unionfs_read_unlock(dentry->d_sb);
3512 +
3513 +       return err;
3514 +}
3515 +
3516 +static void unionfs_d_release(struct dentry *dentry)
3517 +{
3518 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
3519 +       if (unlikely(!UNIONFS_D(dentry)))
3520 +               goto out;       /* skip if no lower branches */
3521 +       /* must lock our branch configuration here */
3522 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3523 +
3524 +       unionfs_check_dentry(dentry);
3525 +       /* this could be a negative dentry, so check first */
3526 +       if (dbstart(dentry) < 0) {
3527 +               unionfs_unlock_dentry(dentry);
3528 +               goto out;       /* due to a (normal) failed lookup */
3529 +       }
3530 +
3531 +       /* Release all the lower dentries */
3532 +       path_put_lowers_all(dentry, true);
3533 +
3534 +       unionfs_unlock_dentry(dentry);
3535 +
3536 +out:
3537 +       free_dentry_private_data(dentry);
3538 +       unionfs_read_unlock(dentry->d_sb);
3539 +       return;
3540 +}
3541 +
3542 +/*
3543 + * Called when we're removing the last reference to our dentry.  So we
3544 + * should drop all lower references too.
3545 + */
3546 +static void unionfs_d_iput(struct dentry *dentry, struct inode *inode)
3547 +{
3548 +       int rc;
3549 +
3550 +       BUG_ON(!dentry);
3551 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
3552 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3553 +
3554 +       if (!UNIONFS_D(dentry) || dbstart(dentry) < 0)
3555 +               goto drop_lower_inodes;
3556 +       path_put_lowers_all(dentry, false);
3557 +
3558 +drop_lower_inodes:
3559 +       rc = atomic_read(&inode->i_count);
3560 +       if (rc == 1 && inode->i_nlink == 1 && ibstart(inode) >= 0) {
3561 +               /* see Documentation/filesystems/unionfs/issues.txt */
3562 +               lockdep_off();
3563 +               iput(unionfs_lower_inode(inode));
3564 +               lockdep_on();
3565 +               unionfs_set_lower_inode(inode, NULL);
3566 +               /* XXX: may need to set start/end to -1? */
3567 +       }
3568 +
3569 +       iput(inode);
3570 +
3571 +       unionfs_unlock_dentry(dentry);
3572 +       unionfs_read_unlock(dentry->d_sb);
3573 +}
3574 +
3575 +struct dentry_operations unionfs_dops = {
3576 +       .d_revalidate   = unionfs_d_revalidate,
3577 +       .d_release      = unionfs_d_release,
3578 +       .d_iput         = unionfs_d_iput,
3579 +};
3580 --- /dev/null
3581 +++ kernel-2.6.28/fs/unionfs/dirfops.c
3582 @@ -0,0 +1,302 @@
3583 +/*
3584 + * Copyright (c) 2003-2009 Erez Zadok
3585 + * Copyright (c) 2003-2006 Charles P. Wright
3586 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3587 + * Copyright (c) 2005-2006 Junjiro Okajima
3588 + * Copyright (c) 2005      Arun M. Krishnakumar
3589 + * Copyright (c) 2004-2006 David P. Quigley
3590 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3591 + * Copyright (c) 2003      Puja Gupta
3592 + * Copyright (c) 2003      Harikesavan Krishnan
3593 + * Copyright (c) 2003-2009 Stony Brook University
3594 + * Copyright (c) 2003-2009 The Research Foundation of SUNY
3595 + *
3596 + * This program is free software; you can redistribute it and/or modify
3597 + * it under the terms of the GNU General Public License version 2 as
3598 + * published by the Free Software Foundation.
3599 + */
3600 +
3601 +#include "union.h"
3602 +
3603 +/* Make sure our rdstate is playing by the rules. */
3604 +static void verify_rdstate_offset(struct unionfs_dir_state *rdstate)
3605 +{
3606 +       BUG_ON(rdstate->offset >= DIREOF);
3607 +       BUG_ON(rdstate->cookie >= MAXRDCOOKIE);
3608 +}
3609 +
3610 +struct unionfs_getdents_callback {
3611 +       struct unionfs_dir_state *rdstate;
3612 +       void *dirent;
3613 +       int entries_written;
3614 +       int filldir_called;
3615 +       int filldir_error;
3616 +       filldir_t filldir;
3617 +       struct super_block *sb;
3618 +};
3619 +
3620 +/* based on generic filldir in fs/readir.c */
3621 +static int unionfs_filldir(void *dirent, const char *oname, int namelen,
3622 +                          loff_t offset, u64 ino, unsigned int d_type)
3623 +{
3624 +       struct unionfs_getdents_callback *buf = dirent;
3625 +       struct filldir_node *found = NULL;
3626 +       int err = 0;
3627 +       int is_whiteout;
3628 +       char *name = (char *) oname;
3629 +
3630 +       buf->filldir_called++;
3631 +
3632 +       is_whiteout = is_whiteout_name(&name, &namelen);
3633 +
3634 +       found = find_filldir_node(buf->rdstate, name, namelen, is_whiteout);
3635 +
3636 +       if (found) {
3637 +               /*
3638 +                * If we had non-whiteout entry in dir cache, then mark it
3639 +                * as a whiteout and but leave it in the dir cache.
3640 +                */
3641 +               if (is_whiteout && !found->whiteout)
3642 +                       found->whiteout = is_whiteout;
3643 +               goto out;
3644 +       }
3645 +
3646 +       /* if 'name' isn't a whiteout, filldir it. */
3647 +       if (!is_whiteout) {
3648 +               off_t pos = rdstate2offset(buf->rdstate);
3649 +               u64 unionfs_ino = ino;
3650 +
3651 +               err = buf->filldir(buf->dirent, name, namelen, pos,
3652 +                                  unionfs_ino, d_type);
3653 +               buf->rdstate->offset++;
3654 +               verify_rdstate_offset(buf->rdstate);
3655 +       }
3656 +       /*
3657 +        * If we did fill it, stuff it in our hash, otherwise return an
3658 +        * error.
3659 +        */
3660 +       if (err) {
3661 +               buf->filldir_error = err;
3662 +               goto out;
3663 +       }
3664 +       buf->entries_written++;
3665 +       err = add_filldir_node(buf->rdstate, name, namelen,
3666 +                              buf->rdstate->bindex, is_whiteout);
3667 +       if (err)
3668 +               buf->filldir_error = err;
3669 +
3670 +out:
3671 +       return err;
3672 +}
3673 +
3674 +static int unionfs_readdir(struct file *file, void *dirent, filldir_t filldir)
3675 +{
3676 +       int err = 0;
3677 +       struct file *lower_file = NULL;
3678 +       struct dentry *dentry = file->f_path.dentry;
3679 +       struct dentry *parent;
3680 +       struct inode *inode = NULL;
3681 +       struct unionfs_getdents_callback buf;
3682 +       struct unionfs_dir_state *uds;
3683 +       int bend;
3684 +       loff_t offset;
3685 +
3686 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
3687 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
3688 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3689 +
3690 +       err = unionfs_file_revalidate(file, parent, false);
3691 +       if (unlikely(err))
3692 +               goto out;
3693 +
3694 +       inode = dentry->d_inode;
3695 +
3696 +       uds = UNIONFS_F(file)->rdstate;
3697 +       if (!uds) {
3698 +               if (file->f_pos == DIREOF) {
3699 +                       goto out;
3700 +               } else if (file->f_pos > 0) {
3701 +                       uds = find_rdstate(inode, file->f_pos);
3702 +                       if (unlikely(!uds)) {
3703 +                               err = -ESTALE;
3704 +                               goto out;
3705 +                       }
3706 +                       UNIONFS_F(file)->rdstate = uds;
3707 +               } else {
3708 +                       init_rdstate(file);
3709 +                       uds = UNIONFS_F(file)->rdstate;
3710 +               }
3711 +       }
3712 +       bend = fbend(file);
3713 +
3714 +       while (uds->bindex <= bend) {
3715 +               lower_file = unionfs_lower_file_idx(file, uds->bindex);
3716 +               if (!lower_file) {
3717 +                       uds->bindex++;
3718 +                       uds->dirpos = 0;
3719 +                       continue;
3720 +               }
3721 +
3722 +               /* prepare callback buffer */
3723 +               buf.filldir_called = 0;
3724 +               buf.filldir_error = 0;
3725 +               buf.entries_written = 0;
3726 +               buf.dirent = dirent;
3727 +               buf.filldir = filldir;
3728 +               buf.rdstate = uds;
3729 +               buf.sb = inode->i_sb;
3730 +
3731 +               /* Read starting from where we last left off. */
3732 +               offset = vfs_llseek(lower_file, uds->dirpos, SEEK_SET);
3733 +               if (offset < 0) {
3734 +                       err = offset;
3735 +                       goto out;
3736 +               }
3737 +               err = vfs_readdir(lower_file, unionfs_filldir, &buf);
3738 +
3739 +               /* Save the position for when we continue. */
3740 +               offset = vfs_llseek(lower_file, 0, SEEK_CUR);
3741 +               if (offset < 0) {
3742 +                       err = offset;
3743 +                       goto out;
3744 +               }
3745 +               uds->dirpos = offset;
3746 +
3747 +               /* Copy the atime. */
3748 +               fsstack_copy_attr_atime(inode,
3749 +                                       lower_file->f_path.dentry->d_inode);
3750 +
3751 +               if (err < 0)
3752 +                       goto out;
3753 +
3754 +               if (buf.filldir_error)
3755 +                       break;
3756 +
3757 +               if (!buf.entries_written) {
3758 +                       uds->bindex++;
3759 +                       uds->dirpos = 0;
3760 +               }
3761 +       }
3762 +
3763 +       if (!buf.filldir_error && uds->bindex >= bend) {
3764 +               /* Save the number of hash entries for next time. */
3765 +               UNIONFS_I(inode)->hashsize = uds->hashentries;
3766 +               free_rdstate(uds);
3767 +               UNIONFS_F(file)->rdstate = NULL;
3768 +               file->f_pos = DIREOF;
3769 +       } else {
3770 +               file->f_pos = rdstate2offset(uds);
3771 +       }
3772 +
3773 +out:
3774 +       if (!err)
3775 +               unionfs_check_file(file);
3776 +       unionfs_unlock_dentry(dentry);
3777 +       unionfs_unlock_parent(dentry, parent);
3778 +       unionfs_read_unlock(dentry->d_sb);
3779 +       return err;
3780 +}
3781 +
3782 +/*
3783 + * This is not meant to be a generic repositioning function.  If you do
3784 + * things that aren't supported, then we return EINVAL.
3785 + *
3786 + * What is allowed:
3787 + *  (1) seeking to the same position that you are currently at
3788 + *     This really has no effect, but returns where you are.
3789 + *  (2) seeking to the beginning of the file
3790 + *     This throws out all state, and lets you begin again.
3791 + */
3792 +static loff_t unionfs_dir_llseek(struct file *file, loff_t offset, int origin)
3793 +{
3794 +       struct unionfs_dir_state *rdstate;
3795 +       struct dentry *dentry = file->f_path.dentry;
3796 +       struct dentry *parent;
3797 +       loff_t err;
3798 +
3799 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
3800 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
3801 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3802 +
3803 +       err = unionfs_file_revalidate(file, parent, false);
3804 +       if (unlikely(err))
3805 +               goto out;
3806 +
3807 +       rdstate = UNIONFS_F(file)->rdstate;
3808 +
3809 +       /*
3810 +        * we let users seek to their current position, but not anywhere
3811 +        * else.
3812 +        */
3813 +       if (!offset) {
3814 +               switch (origin) {
3815 +               case SEEK_SET:
3816 +                       if (rdstate) {
3817 +                               free_rdstate(rdstate);
3818 +                               UNIONFS_F(file)->rdstate = NULL;
3819 +                       }
3820 +                       init_rdstate(file);
3821 +                       err = 0;
3822 +                       break;
3823 +               case SEEK_CUR:
3824 +                       err = file->f_pos;
3825 +                       break;
3826 +               case SEEK_END:
3827 +                       /* Unsupported, because we would break everything.  */
3828 +                       err = -EINVAL;
3829 +                       break;
3830 +               }
3831 +       } else {
3832 +               switch (origin) {
3833 +               case SEEK_SET:
3834 +                       if (rdstate) {
3835 +                               if (offset == rdstate2offset(rdstate))
3836 +                                       err = offset;
3837 +                               else if (file->f_pos == DIREOF)
3838 +                                       err = DIREOF;
3839 +                               else
3840 +                                       err = -EINVAL;
3841 +                       } else {
3842 +                               struct inode *inode;
3843 +                               inode = dentry->d_inode;
3844 +                               rdstate = find_rdstate(inode, offset);
3845 +                               if (rdstate) {
3846 +                                       UNIONFS_F(file)->rdstate = rdstate;
3847 +                                       err = rdstate->offset;
3848 +                               } else {
3849 +                                       err = -EINVAL;
3850 +                               }
3851 +                       }
3852 +                       break;
3853 +               case SEEK_CUR:
3854 +               case SEEK_END:
3855 +                       /* Unsupported, because we would break everything.  */
3856 +                       err = -EINVAL;
3857 +                       break;
3858 +               }
3859 +       }
3860 +
3861 +out:
3862 +       if (!err)
3863 +               unionfs_check_file(file);
3864 +       unionfs_unlock_dentry(dentry);
3865 +       unionfs_unlock_parent(dentry, parent);
3866 +       unionfs_read_unlock(dentry->d_sb);
3867 +       return err;
3868 +}
3869 +
3870 +/*
3871 + * Trimmed directory options, we shouldn't pass everything down since
3872 + * we don't want to operate on partial directories.
3873 + */
3874 +struct file_operations unionfs_dir_fops = {
3875 +       .llseek         = unionfs_dir_llseek,
3876 +       .read           = generic_read_dir,
3877 +       .readdir        = unionfs_readdir,
3878 +       .unlocked_ioctl = unionfs_ioctl,
3879 +       .open           = unionfs_open,
3880 +       .release        = unionfs_file_release,
3881 +       .flush          = unionfs_flush,
3882 +       .fsync          = unionfs_fsync,
3883 +       .fasync         = unionfs_fasync,
3884 +};
3885 --- /dev/null
3886 +++ kernel-2.6.28/fs/unionfs/dirhelper.c
3887 @@ -0,0 +1,158 @@
3888 +/*
3889 + * Copyright (c) 2003-2009 Erez Zadok
3890 + * Copyright (c) 2003-2006 Charles P. Wright
3891 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3892 + * Copyright (c) 2005-2006 Junjiro Okajima
3893 + * Copyright (c) 2005      Arun M. Krishnakumar
3894 + * Copyright (c) 2004-2006 David P. Quigley
3895 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3896 + * Copyright (c) 2003      Puja Gupta
3897 + * Copyright (c) 2003      Harikesavan Krishnan
3898 + * Copyright (c) 2003-2009 Stony Brook University
3899 + * Copyright (c) 2003-2009 The Research Foundation of SUNY
3900 + *
3901 + * This program is free software; you can redistribute it and/or modify
3902 + * it under the terms of the GNU General Public License version 2 as
3903 + * published by the Free Software Foundation.
3904 + */
3905 +
3906 +#include "union.h"
3907 +
3908 +#define RD_NONE 0
3909 +#define RD_CHECK_EMPTY 1
3910 +/* The callback structure for check_empty. */
3911 +struct unionfs_rdutil_callback {
3912 +       int err;
3913 +       int filldir_called;
3914 +       struct unionfs_dir_state *rdstate;
3915 +       int mode;
3916 +};
3917 +
3918 +/* This filldir function makes sure only whiteouts exist within a directory. */
3919 +static int readdir_util_callback(void *dirent, const char *oname, int namelen,
3920 +                                loff_t offset, u64 ino, unsigned int d_type)
3921 +{
3922 +       int err = 0;
3923 +       struct unionfs_rdutil_callback *buf = dirent;
3924 +       int is_whiteout;
3925 +       struct filldir_node *found;
3926 +       char *name = (char *) oname;
3927 +
3928 +       buf->filldir_called = 1;
3929 +
3930 +       if (name[0] == '.' && (namelen == 1 ||
3931 +                              (name[1] == '.' && namelen == 2)))
3932 +               goto out;
3933 +
3934 +       is_whiteout = is_whiteout_name(&name, &namelen);
3935 +
3936 +       found = find_filldir_node(buf->rdstate, name, namelen, is_whiteout);
3937 +       /* If it was found in the table there was a previous whiteout. */
3938 +       if (found)
3939 +               goto out;
3940 +
3941 +       /*
3942 +        * if it wasn't found and isn't a whiteout, the directory isn't
3943 +        * empty.
3944 +        */
3945 +       err = -ENOTEMPTY;
3946 +       if ((buf->mode == RD_CHECK_EMPTY) && !is_whiteout)
3947 +               goto out;
3948 +
3949 +       err = add_filldir_node(buf->rdstate, name, namelen,
3950 +                              buf->rdstate->bindex, is_whiteout);
3951 +
3952 +out:
3953 +       buf->err = err;
3954 +       return err;
3955 +}
3956 +
3957 +/* Is a directory logically empty? */
3958 +int check_empty(struct dentry *dentry, struct dentry *parent,
3959 +               struct unionfs_dir_state **namelist)
3960 +{
3961 +       int err = 0;
3962 +       struct dentry *lower_dentry = NULL;
3963 +       struct vfsmount *mnt;
3964 +       struct super_block *sb;
3965 +       struct file *lower_file;
3966 +       struct unionfs_rdutil_callback *buf = NULL;
3967 +       int bindex, bstart, bend, bopaque;
3968 +
3969 +       sb = dentry->d_sb;
3970 +
3971 +
3972 +       BUG_ON(!S_ISDIR(dentry->d_inode->i_mode));
3973 +
3974 +       err = unionfs_partial_lookup(dentry, parent);
3975 +       if (err)
3976 +               goto out;
3977 +
3978 +       bstart = dbstart(dentry);
3979 +       bend = dbend(dentry);
3980 +       bopaque = dbopaque(dentry);
3981 +       if (0 <= bopaque && bopaque < bend)
3982 +               bend = bopaque;
3983 +
3984 +       buf = kmalloc(sizeof(struct unionfs_rdutil_callback), GFP_KERNEL);
3985 +       if (unlikely(!buf)) {
3986 +               err = -ENOMEM;
3987 +               goto out;
3988 +       }
3989 +       buf->err = 0;
3990 +       buf->mode = RD_CHECK_EMPTY;
3991 +       buf->rdstate = alloc_rdstate(dentry->d_inode, bstart);
3992 +       if (unlikely(!buf->rdstate)) {
3993 +               err = -ENOMEM;
3994 +               goto out;
3995 +       }
3996 +
3997 +       /* Process the lower directories with rdutil_callback as a filldir. */
3998 +       for (bindex = bstart; bindex <= bend; bindex++) {
3999 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4000 +               if (!lower_dentry)
4001 +                       continue;
4002 +               if (!lower_dentry->d_inode)
4003 +                       continue;
4004 +               if (!S_ISDIR(lower_dentry->d_inode->i_mode))
4005 +                       continue;
4006 +
4007 +               dget(lower_dentry);
4008 +               mnt = unionfs_mntget(dentry, bindex);
4009 +               branchget(sb, bindex);
4010 +               lower_file = dentry_open(lower_dentry, mnt, O_RDONLY);
4011 +               if (IS_ERR(lower_file)) {
4012 +                       err = PTR_ERR(lower_file);
4013 +                       branchput(sb, bindex);
4014 +                       goto out;
4015 +               }
4016 +
4017 +               do {
4018 +                       buf->filldir_called = 0;
4019 +                       buf->rdstate->bindex = bindex;
4020 +                       err = vfs_readdir(lower_file,
4021 +                                         readdir_util_callback, buf);
4022 +                       if (buf->err)
4023 +                               err = buf->err;
4024 +               } while ((err >= 0) && buf->filldir_called);
4025 +
4026 +               /* fput calls dput for lower_dentry */
4027 +               fput(lower_file);
4028 +               branchput(sb, bindex);
4029 +
4030 +               if (err < 0)
4031 +                       goto out;
4032 +       }
4033 +
4034 +out:
4035 +       if (buf) {
4036 +               if (namelist && !err)
4037 +                       *namelist = buf->rdstate;
4038 +               else if (buf->rdstate)
4039 +                       free_rdstate(buf->rdstate);
4040 +               kfree(buf);
4041 +       }
4042 +
4043 +
4044 +       return err;
4045 +}
4046 --- /dev/null
4047 +++ kernel-2.6.28/fs/unionfs/fanout.h
4048 @@ -0,0 +1,407 @@
4049 +/*
4050 + * Copyright (c) 2003-2009 Erez Zadok
4051 + * Copyright (c) 2003-2006 Charles P. Wright
4052 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4053 + * Copyright (c) 2005      Arun M. Krishnakumar
4054 + * Copyright (c) 2004-2006 David P. Quigley
4055 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4056 + * Copyright (c) 2003      Puja Gupta
4057 + * Copyright (c) 2003      Harikesavan Krishnan
4058 + * Copyright (c) 2003-2009 Stony Brook University
4059 + * Copyright (c) 2003-2009 The Research Foundation of SUNY
4060 + *
4061 + * This program is free software; you can redistribute it and/or modify
4062 + * it under the terms of the GNU General Public License version 2 as
4063 + * published by the Free Software Foundation.
4064 + */
4065 +
4066 +#ifndef _FANOUT_H_
4067 +#define _FANOUT_H_
4068 +
4069 +/*
4070 + * Inode to private data
4071 + *
4072 + * Since we use containers and the struct inode is _inside_ the
4073 + * unionfs_inode_info structure, UNIONFS_I will always (given a non-NULL
4074 + * inode pointer), return a valid non-NULL pointer.
4075 + */
4076 +static inline struct unionfs_inode_info *UNIONFS_I(const struct inode *inode)
4077 +{
4078 +       return container_of(inode, struct unionfs_inode_info, vfs_inode);
4079 +}
4080 +
4081 +#define ibstart(ino) (UNIONFS_I(ino)->bstart)
4082 +#define ibend(ino) (UNIONFS_I(ino)->bend)
4083 +
4084 +/* Dentry to private data */
4085 +#define UNIONFS_D(dent) ((struct unionfs_dentry_info *)(dent)->d_fsdata)
4086 +#define dbstart(dent) (UNIONFS_D(dent)->bstart)
4087 +#define dbend(dent) (UNIONFS_D(dent)->bend)
4088 +#define dbopaque(dent) (UNIONFS_D(dent)->bopaque)
4089 +
4090 +/* Superblock to private data */
4091 +#define UNIONFS_SB(super) ((struct unionfs_sb_info *)(super)->s_fs_info)
4092 +#define sbstart(sb) 0
4093 +#define sbend(sb) (UNIONFS_SB(sb)->bend)
4094 +#define sbmax(sb) (UNIONFS_SB(sb)->bend + 1)
4095 +#define sbhbid(sb) (UNIONFS_SB(sb)->high_branch_id)
4096 +
4097 +/* File to private Data */
4098 +#define UNIONFS_F(file) ((struct unionfs_file_info *)((file)->private_data))
4099 +#define fbstart(file) (UNIONFS_F(file)->bstart)
4100 +#define fbend(file) (UNIONFS_F(file)->bend)
4101 +
4102 +/* macros to manipulate branch IDs in stored in our superblock */
4103 +static inline int branch_id(struct super_block *sb, int index)
4104 +{
4105 +       BUG_ON(!sb || index < 0);
4106 +       return UNIONFS_SB(sb)->data[index].branch_id;
4107 +}
4108 +
4109 +static inline void set_branch_id(struct super_block *sb, int index, int val)
4110 +{
4111 +       BUG_ON(!sb || index < 0);
4112 +       UNIONFS_SB(sb)->data[index].branch_id = val;
4113 +}
4114 +
4115 +static inline void new_branch_id(struct super_block *sb, int index)
4116 +{
4117 +       BUG_ON(!sb || index < 0);
4118 +       set_branch_id(sb, index, ++UNIONFS_SB(sb)->high_branch_id);
4119 +}
4120 +
4121 +/*
4122 + * Find new index of matching branch with an existing superblock of a known
4123 + * (possibly old) id.  This is needed because branches could have been
4124 + * added/deleted causing the branches of any open files to shift.
4125 + *
4126 + * @sb: the new superblock which may have new/different branch IDs
4127 + * @id: the old/existing id we're looking for
4128 + * Returns index of newly found branch (0 or greater), -1 otherwise.
4129 + */
4130 +static inline int branch_id_to_idx(struct super_block *sb, int id)
4131 +{
4132 +       int i;
4133 +       for (i = 0; i < sbmax(sb); i++) {
4134 +               if (branch_id(sb, i) == id)
4135 +                       return i;
4136 +       }
4137 +       /* in the non-ODF code, this should really never happen */
4138 +       printk(KERN_WARNING "unionfs: cannot find branch with id %d\n", id);
4139 +       return -1;
4140 +}
4141 +
4142 +/* File to lower file. */
4143 +static inline struct file *unionfs_lower_file(const struct file *f)
4144 +{
4145 +       BUG_ON(!f);
4146 +       return UNIONFS_F(f)->lower_files[fbstart(f)];
4147 +}
4148 +
4149 +static inline struct file *unionfs_lower_file_idx(const struct file *f,
4150 +                                                 int index)
4151 +{
4152 +       BUG_ON(!f || index < 0);
4153 +       return UNIONFS_F(f)->lower_files[index];
4154 +}
4155 +
4156 +static inline void unionfs_set_lower_file_idx(struct file *f, int index,
4157 +                                             struct file *val)
4158 +{
4159 +       BUG_ON(!f || index < 0);
4160 +       UNIONFS_F(f)->lower_files[index] = val;
4161 +       /* save branch ID (may be redundant?) */
4162 +       UNIONFS_F(f)->saved_branch_ids[index] =
4163 +               branch_id((f)->f_path.dentry->d_sb, index);
4164 +}
4165 +
4166 +static inline void unionfs_set_lower_file(struct file *f, struct file *val)
4167 +{
4168 +       BUG_ON(!f);
4169 +       unionfs_set_lower_file_idx((f), fbstart(f), (val));
4170 +}
4171 +
4172 +/* Inode to lower inode. */
4173 +static inline struct inode *unionfs_lower_inode(const struct inode *i)
4174 +{
4175 +       BUG_ON(!i);
4176 +       return UNIONFS_I(i)->lower_inodes[ibstart(i)];
4177 +}
4178 +
4179 +static inline struct inode *unionfs_lower_inode_idx(const struct inode *i,
4180 +                                                   int index)
4181 +{
4182 +       BUG_ON(!i || index < 0);
4183 +       return UNIONFS_I(i)->lower_inodes[index];
4184 +}
4185 +
4186 +static inline void unionfs_set_lower_inode_idx(struct inode *i, int index,
4187 +                                              struct inode *val)
4188 +{
4189 +       BUG_ON(!i || index < 0);
4190 +       UNIONFS_I(i)->lower_inodes[index] = val;
4191 +}
4192 +
4193 +static inline void unionfs_set_lower_inode(struct inode *i, struct inode *val)
4194 +{
4195 +       BUG_ON(!i);
4196 +       UNIONFS_I(i)->lower_inodes[ibstart(i)] = val;
4197 +}
4198 +
4199 +/* Superblock to lower superblock. */
4200 +static inline struct super_block *unionfs_lower_super(
4201 +                                       const struct super_block *sb)
4202 +{
4203 +       BUG_ON(!sb);
4204 +       return UNIONFS_SB(sb)->data[sbstart(sb)].sb;
4205 +}
4206 +
4207 +static inline struct super_block *unionfs_lower_super_idx(
4208 +                                       const struct super_block *sb,
4209 +                                       int index)
4210 +{
4211 +       BUG_ON(!sb || index < 0);
4212 +       return UNIONFS_SB(sb)->data[index].sb;
4213 +}
4214 +
4215 +static inline void unionfs_set_lower_super_idx(struct super_block *sb,
4216 +                                              int index,
4217 +                                              struct super_block *val)
4218 +{
4219 +       BUG_ON(!sb || index < 0);
4220 +       UNIONFS_SB(sb)->data[index].sb = val;
4221 +}
4222 +
4223 +static inline void unionfs_set_lower_super(struct super_block *sb,
4224 +                                          struct super_block *val)
4225 +{
4226 +       BUG_ON(!sb);
4227 +       UNIONFS_SB(sb)->data[sbstart(sb)].sb = val;
4228 +}
4229 +
4230 +/* Branch count macros. */
4231 +static inline int branch_count(const struct super_block *sb, int index)
4232 +{
4233 +       BUG_ON(!sb || index < 0);
4234 +       return atomic_read(&UNIONFS_SB(sb)->data[index].open_files);
4235 +}
4236 +
4237 +static inline void set_branch_count(struct super_block *sb, int index, int val)
4238 +{
4239 +       BUG_ON(!sb || index < 0);
4240 +       atomic_set(&UNIONFS_SB(sb)->data[index].open_files, val);
4241 +}
4242 +
4243 +static inline void branchget(struct super_block *sb, int index)
4244 +{
4245 +       BUG_ON(!sb || index < 0);
4246 +       atomic_inc(&UNIONFS_SB(sb)->data[index].open_files);
4247 +}
4248 +
4249 +static inline void branchput(struct super_block *sb, int index)
4250 +{
4251 +       BUG_ON(!sb || index < 0);
4252 +       atomic_dec(&UNIONFS_SB(sb)->data[index].open_files);
4253 +}
4254 +
4255 +/* Dentry macros */
4256 +static inline void unionfs_set_lower_dentry_idx(struct dentry *dent, int index,
4257 +                                               struct dentry *val)
4258 +{
4259 +       BUG_ON(!dent || index < 0);
4260 +       UNIONFS_D(dent)->lower_paths[index].dentry = val;
4261 +}
4262 +
4263 +static inline struct dentry *unionfs_lower_dentry_idx(
4264 +                               const struct dentry *dent,
4265 +                               int index)
4266 +{
4267 +       BUG_ON(!dent || index < 0);
4268 +       return UNIONFS_D(dent)->lower_paths[index].dentry;
4269 +}
4270 +
4271 +static inline struct dentry *unionfs_lower_dentry(const struct dentry *dent)
4272 +{
4273 +       BUG_ON(!dent);
4274 +       return unionfs_lower_dentry_idx(dent, dbstart(dent));
4275 +}
4276 +
4277 +static inline void unionfs_set_lower_mnt_idx(struct dentry *dent, int index,
4278 +                                            struct vfsmount *mnt)
4279 +{
4280 +       BUG_ON(!dent || index < 0);
4281 +       UNIONFS_D(dent)->lower_paths[index].mnt = mnt;
4282 +}
4283 +
4284 +static inline struct vfsmount *unionfs_lower_mnt_idx(
4285 +                                       const struct dentry *dent,
4286 +                                       int index)
4287 +{
4288 +       BUG_ON(!dent || index < 0);
4289 +       return UNIONFS_D(dent)->lower_paths[index].mnt;
4290 +}
4291 +
4292 +static inline struct vfsmount *unionfs_lower_mnt(const struct dentry *dent)
4293 +{
4294 +       BUG_ON(!dent);
4295 +       return unionfs_lower_mnt_idx(dent, dbstart(dent));
4296 +}
4297 +
4298 +/* Macros for locking a dentry. */
4299 +enum unionfs_dentry_lock_class {
4300 +       UNIONFS_DMUTEX_NORMAL,
4301 +       UNIONFS_DMUTEX_ROOT,
4302 +       UNIONFS_DMUTEX_PARENT,
4303 +       UNIONFS_DMUTEX_CHILD,
4304 +       UNIONFS_DMUTEX_WHITEOUT,
4305 +       UNIONFS_DMUTEX_REVAL_PARENT, /* for file/dentry revalidate */
4306 +       UNIONFS_DMUTEX_REVAL_CHILD,   /* for file/dentry revalidate */
4307 +};
4308 +
4309 +static inline void unionfs_lock_dentry(struct dentry *d,
4310 +                                      unsigned int subclass)
4311 +{
4312 +       BUG_ON(!d);
4313 +       mutex_lock_nested(&UNIONFS_D(d)->lock, subclass);
4314 +}
4315 +
4316 +static inline void unionfs_unlock_dentry(struct dentry *d)
4317 +{
4318 +       BUG_ON(!d);
4319 +       mutex_unlock(&UNIONFS_D(d)->lock);
4320 +}
4321 +
4322 +static inline struct dentry *unionfs_lock_parent(struct dentry *d,
4323 +                                                unsigned int subclass)
4324 +{
4325 +       struct dentry *p;
4326 +
4327 +       BUG_ON(!d);
4328 +       p = dget_parent(d);
4329 +       if (p != d)
4330 +               mutex_lock_nested(&UNIONFS_D(p)->lock, subclass);
4331 +       return p;
4332 +}
4333 +
4334 +static inline void unionfs_unlock_parent(struct dentry *d, struct dentry *p)
4335 +{
4336 +       BUG_ON(!d);
4337 +       BUG_ON(!p);
4338 +       if (p != d) {
4339 +               BUG_ON(!mutex_is_locked(&UNIONFS_D(p)->lock));
4340 +               mutex_unlock(&UNIONFS_D(p)->lock);
4341 +       }
4342 +       dput(p);
4343 +}
4344 +
4345 +static inline void verify_locked(struct dentry *d)
4346 +{
4347 +       BUG_ON(!d);
4348 +       BUG_ON(!mutex_is_locked(&UNIONFS_D(d)->lock));
4349 +}
4350 +
4351 +/* macros to put lower objects */
4352 +
4353 +/*
4354 + * iput lower inodes of an unionfs dentry, from bstart to bend.  If
4355 + * @free_lower is true, then also kfree the memory used to hold the lower
4356 + * object pointers.
4357 + */
4358 +static inline void iput_lowers(struct inode *inode,
4359 +                              int bstart, int bend, bool free_lower)
4360 +{
4361 +       struct inode *lower_inode;
4362 +       int bindex;
4363 +
4364 +       BUG_ON(!inode);
4365 +       BUG_ON(!UNIONFS_I(inode));
4366 +       BUG_ON(bstart < 0);
4367 +
4368 +       for (bindex = bstart; bindex <= bend; bindex++) {
4369 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
4370 +               if (lower_inode) {
4371 +                       unionfs_set_lower_inode_idx(inode, bindex, NULL);
4372 +                       /* see Documentation/filesystems/unionfs/issues.txt */
4373 +                       lockdep_off();
4374 +                       iput(lower_inode);
4375 +                       lockdep_on();
4376 +               }
4377 +       }
4378 +
4379 +       if (free_lower) {
4380 +               kfree(UNIONFS_I(inode)->lower_inodes);
4381 +               UNIONFS_I(inode)->lower_inodes = NULL;
4382 +       }
4383 +}
4384 +
4385 +/* iput all lower inodes, and reset start/end branch indices to -1 */
4386 +static inline void iput_lowers_all(struct inode *inode, bool free_lower)
4387 +{
4388 +       int bstart, bend;
4389 +
4390 +       BUG_ON(!inode);
4391 +       BUG_ON(!UNIONFS_I(inode));
4392 +       bstart = ibstart(inode);
4393 +       bend = ibend(inode);
4394 +       BUG_ON(bstart < 0);
4395 +
4396 +       iput_lowers(inode, bstart, bend, free_lower);
4397 +       ibstart(inode) = ibend(inode) = -1;
4398 +}
4399 +
4400 +/*
4401 + * dput/mntput all lower dentries and vfsmounts of an unionfs dentry, from
4402 + * bstart to bend.  If @free_lower is true, then also kfree the memory used
4403 + * to hold the lower object pointers.
4404 + *
4405 + * XXX: implement using path_put VFS macros
4406 + */
4407 +static inline void path_put_lowers(struct dentry *dentry,
4408 +                                  int bstart, int bend, bool free_lower)
4409 +{
4410 +       struct dentry *lower_dentry;
4411 +       struct vfsmount *lower_mnt;
4412 +       int bindex;
4413 +
4414 +       BUG_ON(!dentry);
4415 +       BUG_ON(!UNIONFS_D(dentry));
4416 +       BUG_ON(bstart < 0);
4417 +
4418 +       for (bindex = bstart; bindex <= bend; bindex++) {
4419 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4420 +               if (lower_dentry) {
4421 +                       unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
4422 +                       dput(lower_dentry);
4423 +               }
4424 +               lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
4425 +               if (lower_mnt) {
4426 +                       unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
4427 +                       mntput(lower_mnt);
4428 +               }
4429 +       }
4430 +
4431 +       if (free_lower) {
4432 +               kfree(UNIONFS_D(dentry)->lower_paths);
4433 +               UNIONFS_D(dentry)->lower_paths = NULL;
4434 +       }
4435 +}
4436 +
4437 +/*
4438 + * dput/mntput all lower dentries and vfsmounts, and reset start/end branch
4439 + * indices to -1.
4440 + */
4441 +static inline void path_put_lowers_all(struct dentry *dentry, bool free_lower)
4442 +{
4443 +       int bstart, bend;
4444 +
4445 +       BUG_ON(!dentry);
4446 +       BUG_ON(!UNIONFS_D(dentry));
4447 +       bstart = dbstart(dentry);
4448 +       bend = dbend(dentry);
4449 +       BUG_ON(bstart < 0);
4450 +
4451 +       path_put_lowers(dentry, bstart, bend, free_lower);
4452 +       dbstart(dentry) = dbend(dentry) = -1;
4453 +}
4454 +
4455 +#endif /* not _FANOUT_H */
4456 --- /dev/null
4457 +++ kernel-2.6.28/fs/unionfs/file.c
4458 @@ -0,0 +1,364 @@
4459 +/*
4460 + * Copyright (c) 2003-2009 Erez Zadok
4461 + * Copyright (c) 2003-2006 Charles P. Wright
4462 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4463 + * Copyright (c) 2005-2006 Junjiro Okajima
4464 + * Copyright (c) 2005      Arun M. Krishnakumar
4465 + * Copyright (c) 2004-2006 David P. Quigley
4466 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4467 + * Copyright (c) 2003      Puja Gupta
4468 + * Copyright (c) 2003      Harikesavan Krishnan
4469 + * Copyright (c) 2003-2009 Stony Brook University
4470 + * Copyright (c) 2003-2009 The Research Foundation of SUNY
4471 + *
4472 + * This program is free software; you can redistribute it and/or modify
4473 + * it under the terms of the GNU General Public License version 2 as
4474 + * published by the Free Software Foundation.
4475 + */
4476 +
4477 +#include "union.h"
4478 +
4479 +static ssize_t unionfs_read(struct file *file, char __user *buf,
4480 +                           size_t count, loff_t *ppos)
4481 +{
4482 +       int err;
4483 +       struct file *lower_file;
4484 +       struct dentry *dentry = file->f_path.dentry;
4485 +       struct dentry *parent;
4486 +
4487 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4488 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4489 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4490 +
4491 +       err = unionfs_file_revalidate(file, parent, false);
4492 +       if (unlikely(err))
4493 +               goto out;
4494 +
4495 +       lower_file = unionfs_lower_file(file);
4496 +       err = vfs_read(lower_file, buf, count, ppos);
4497 +       /* update our inode atime upon a successful lower read */
4498 +       if (err >= 0) {
4499 +               fsstack_copy_attr_atime(dentry->d_inode,
4500 +                                       lower_file->f_path.dentry->d_inode);
4501 +               unionfs_check_file(file);
4502 +       }
4503 +
4504 +out:
4505 +       unionfs_unlock_dentry(dentry);
4506 +       unionfs_unlock_parent(dentry, parent);
4507 +       unionfs_read_unlock(dentry->d_sb);
4508 +       return err;
4509 +}
4510 +
4511 +static ssize_t unionfs_write(struct file *file, const char __user *buf,
4512 +                            size_t count, loff_t *ppos)
4513 +{
4514 +       int err = 0;
4515 +       struct file *lower_file;
4516 +       struct dentry *dentry = file->f_path.dentry;
4517 +       struct dentry *parent;
4518 +
4519 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4520 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4521 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4522 +
4523 +       err = unionfs_file_revalidate(file, parent, true);
4524 +       if (unlikely(err))
4525 +               goto out;
4526 +
4527 +       lower_file = unionfs_lower_file(file);
4528 +       err = vfs_write(lower_file, buf, count, ppos);
4529 +       /* update our inode times+sizes upon a successful lower write */
4530 +       if (err >= 0) {
4531 +               fsstack_copy_inode_size(dentry->d_inode,
4532 +                                       lower_file->f_path.dentry->d_inode);
4533 +               fsstack_copy_attr_times(dentry->d_inode,
4534 +                                       lower_file->f_path.dentry->d_inode);
4535 +               UNIONFS_F(file)->wrote_to_file = true; /* for delayed copyup */
4536 +               unionfs_check_file(file);
4537 +       }
4538 +
4539 +out:
4540 +       unionfs_unlock_dentry(dentry);
4541 +       unionfs_unlock_parent(dentry, parent);
4542 +       unionfs_read_unlock(dentry->d_sb);
4543 +       return err;
4544 +}
4545 +
4546 +static int unionfs_file_readdir(struct file *file, void *dirent,
4547 +                               filldir_t filldir)
4548 +{
4549 +       return -ENOTDIR;
4550 +}
4551 +
4552 +static int unionfs_mmap(struct file *file, struct vm_area_struct *vma)
4553 +{
4554 +       int err = 0;
4555 +       bool willwrite;
4556 +       struct file *lower_file;
4557 +       struct dentry *dentry = file->f_path.dentry;
4558 +       struct dentry *parent;
4559 +       struct vm_operations_struct *saved_vm_ops = NULL;
4560 +
4561 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4562 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4563 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4564 +
4565 +       /* This might be deferred to mmap's writepage */
4566 +       willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags);
4567 +       err = unionfs_file_revalidate(file, parent, willwrite);
4568 +       if (unlikely(err))
4569 +               goto out;
4570 +       unionfs_check_file(file);
4571 +
4572 +       /*
4573 +        * File systems which do not implement ->writepage may use
4574 +        * generic_file_readonly_mmap as their ->mmap op.  If you call
4575 +        * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL.
4576 +        * But we cannot call the lower ->mmap op, so we can't tell that
4577 +        * writeable mappings won't work.  Therefore, our only choice is to
4578 +        * check if the lower file system supports the ->writepage, and if
4579 +        * not, return EINVAL (the same error that
4580 +        * generic_file_readonly_mmap returns in that case).
4581 +        */
4582 +       lower_file = unionfs_lower_file(file);
4583 +       if (willwrite && !lower_file->f_mapping->a_ops->writepage) {
4584 +               err = -EINVAL;
4585 +               printk(KERN_ERR "unionfs: branch %d file system does not "
4586 +                      "support writeable mmap\n", fbstart(file));
4587 +               goto out;
4588 +       }
4589 +
4590 +       /*
4591 +        * find and save lower vm_ops.
4592 +        *
4593 +        * XXX: the VFS should have a cleaner way of finding the lower vm_ops
4594 +        */
4595 +       if (!UNIONFS_F(file)->lower_vm_ops) {
4596 +               err = lower_file->f_op->mmap(lower_file, vma);
4597 +               if (err) {
4598 +                       printk(KERN_ERR "unionfs: lower mmap failed %d\n", err);
4599 +                       goto out;
4600 +               }
4601 +               saved_vm_ops = vma->vm_ops;
4602 +               err = do_munmap(current->mm, vma->vm_start,
4603 +                               vma->vm_end - vma->vm_start);
4604 +               if (err) {
4605 +                       printk(KERN_ERR "unionfs: do_munmap failed %d\n", err);
4606 +                       goto out;
4607 +               }
4608 +       }
4609 +
4610 +       file->f_mapping->a_ops = &unionfs_dummy_aops;
4611 +       err = generic_file_mmap(file, vma);
4612 +       file->f_mapping->a_ops = &unionfs_aops;
4613 +       if (err) {
4614 +               printk(KERN_ERR "unionfs: generic_file_mmap failed %d\n", err);
4615 +               goto out;
4616 +       }
4617 +       vma->vm_ops = &unionfs_vm_ops;
4618 +       if (!UNIONFS_F(file)->lower_vm_ops)
4619 +               UNIONFS_F(file)->lower_vm_ops = saved_vm_ops;
4620 +
4621 +out:
4622 +       if (!err) {
4623 +               /* copyup could cause parent dir times to change */
4624 +               unionfs_copy_attr_times(parent->d_inode);
4625 +               unionfs_check_file(file);
4626 +       }
4627 +       unionfs_unlock_dentry(dentry);
4628 +       unionfs_unlock_parent(dentry, parent);
4629 +       unionfs_read_unlock(dentry->d_sb);
4630 +       return err;
4631 +}
4632 +
4633 +int unionfs_fsync(struct file *file, struct dentry *dentry, int datasync)
4634 +{
4635 +       int bindex, bstart, bend;
4636 +       struct file *lower_file;
4637 +       struct dentry *lower_dentry;
4638 +       struct dentry *parent;
4639 +       struct inode *lower_inode, *inode;
4640 +       int err = -EINVAL;
4641 +
4642 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4643 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4644 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4645 +
4646 +       err = unionfs_file_revalidate(file, parent, true);
4647 +       if (unlikely(err))
4648 +               goto out;
4649 +       unionfs_check_file(file);
4650 +
4651 +       bstart = fbstart(file);
4652 +       bend = fbend(file);
4653 +       if (bstart < 0 || bend < 0)
4654 +               goto out;
4655 +
4656 +       inode = dentry->d_inode;
4657 +       if (unlikely(!inode)) {
4658 +               printk(KERN_ERR
4659 +                      "unionfs: null lower inode in unionfs_fsync\n");
4660 +               goto out;
4661 +       }
4662 +       for (bindex = bstart; bindex <= bend; bindex++) {
4663 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
4664 +               if (!lower_inode || !lower_inode->i_fop->fsync)
4665 +                       continue;
4666 +               lower_file = unionfs_lower_file_idx(file, bindex);
4667 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4668 +               mutex_lock(&lower_inode->i_mutex);
4669 +               err = lower_inode->i_fop->fsync(lower_file,
4670 +                                               lower_dentry,
4671 +                                               datasync);
4672 +               if (!err && bindex == bstart)
4673 +                       fsstack_copy_attr_times(inode, lower_inode);
4674 +               mutex_unlock(&lower_inode->i_mutex);
4675 +               if (err)
4676 +                       goto out;
4677 +       }
4678 +
4679 +out:
4680 +       if (!err)
4681 +               unionfs_check_file(file);
4682 +       unionfs_unlock_dentry(dentry);
4683 +       unionfs_unlock_parent(dentry, parent);
4684 +       unionfs_read_unlock(dentry->d_sb);
4685 +       return err;
4686 +}
4687 +
4688 +int unionfs_fasync(int fd, struct file *file, int flag)
4689 +{
4690 +       int bindex, bstart, bend;
4691 +       struct file *lower_file;
4692 +       struct dentry *dentry = file->f_path.dentry;
4693 +       struct dentry *parent;
4694 +       struct inode *lower_inode, *inode;
4695 +       int err = 0;
4696 +
4697 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4698 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4699 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4700 +
4701 +       err = unionfs_file_revalidate(file, parent, true);
4702 +       if (unlikely(err))
4703 +               goto out;
4704 +       unionfs_check_file(file);
4705 +
4706 +       bstart = fbstart(file);
4707 +       bend = fbend(file);
4708 +       if (bstart < 0 || bend < 0)
4709 +               goto out;
4710 +
4711 +       inode = dentry->d_inode;
4712 +       if (unlikely(!inode)) {
4713 +               printk(KERN_ERR
4714 +                      "unionfs: null lower inode in unionfs_fasync\n");
4715 +               goto out;
4716 +       }
4717 +       for (bindex = bstart; bindex <= bend; bindex++) {
4718 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
4719 +               if (!lower_inode || !lower_inode->i_fop->fasync)
4720 +                       continue;
4721 +               lower_file = unionfs_lower_file_idx(file, bindex);
4722 +               mutex_lock(&lower_inode->i_mutex);
4723 +               err = lower_inode->i_fop->fasync(fd, lower_file, flag);
4724 +               if (!err && bindex == bstart)
4725 +                       fsstack_copy_attr_times(inode, lower_inode);
4726 +               mutex_unlock(&lower_inode->i_mutex);
4727 +               if (err)
4728 +                       goto out;
4729 +       }
4730 +
4731 +out:
4732 +       if (!err)
4733 +               unionfs_check_file(file);
4734 +       unionfs_unlock_dentry(dentry);
4735 +       unionfs_unlock_parent(dentry, parent);
4736 +       unionfs_read_unlock(dentry->d_sb);
4737 +       return err;
4738 +}
4739 +
4740 +static ssize_t unionfs_splice_read(struct file *file, loff_t *ppos,
4741 +                                  struct pipe_inode_info *pipe, size_t len,
4742 +                                  unsigned int flags)
4743 +{
4744 +       ssize_t err;
4745 +       struct file *lower_file;
4746 +       struct dentry *dentry = file->f_path.dentry;
4747 +       struct dentry *parent;
4748 +
4749 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4750 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4751 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4752 +
4753 +       err = unionfs_file_revalidate(file, parent, false);
4754 +       if (unlikely(err))
4755 +               goto out;
4756 +
4757 +       lower_file = unionfs_lower_file(file);
4758 +       err = vfs_splice_to(lower_file, ppos, pipe, len, flags);
4759 +       /* update our inode atime upon a successful lower splice-read */
4760 +       if (err >= 0) {
4761 +               fsstack_copy_attr_atime(dentry->d_inode,
4762 +                                       lower_file->f_path.dentry->d_inode);
4763 +               unionfs_check_file(file);
4764 +       }
4765 +
4766 +out:
4767 +       unionfs_unlock_dentry(dentry);
4768 +       unionfs_unlock_parent(dentry, parent);
4769 +       unionfs_read_unlock(dentry->d_sb);
4770 +       return err;
4771 +}
4772 +
4773 +static ssize_t unionfs_splice_write(struct pipe_inode_info *pipe,
4774 +                                   struct file *file, loff_t *ppos,
4775 +                                   size_t len, unsigned int flags)
4776 +{
4777 +       ssize_t err = 0;
4778 +       struct file *lower_file;
4779 +       struct dentry *dentry = file->f_path.dentry;
4780 +       struct dentry *parent;
4781 +
4782 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4783 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4784 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4785 +
4786 +       err = unionfs_file_revalidate(file, parent, true);
4787 +       if (unlikely(err))
4788 +               goto out;
4789 +
4790 +       lower_file = unionfs_lower_file(file);
4791 +       err = vfs_splice_from(pipe, lower_file, ppos, len, flags);
4792 +       /* update our inode times+sizes upon a successful lower write */
4793 +       if (err >= 0) {
4794 +               fsstack_copy_inode_size(dentry->d_inode,
4795 +                                       lower_file->f_path.dentry->d_inode);
4796 +               fsstack_copy_attr_times(dentry->d_inode,
4797 +                                       lower_file->f_path.dentry->d_inode);
4798 +               unionfs_check_file(file);
4799 +       }
4800 +
4801 +out:
4802 +       unionfs_unlock_dentry(dentry);
4803 +       unionfs_unlock_parent(dentry, parent);
4804 +       unionfs_read_unlock(dentry->d_sb);
4805 +       return err;
4806 +}
4807 +
4808 +struct file_operations unionfs_main_fops = {
4809 +       .llseek         = generic_file_llseek,
4810 +       .read           = unionfs_read,
4811 +       .write          = unionfs_write,
4812 +       .readdir        = unionfs_file_readdir,
4813 +       .unlocked_ioctl = unionfs_ioctl,
4814 +       .mmap           = unionfs_mmap,
4815 +       .open           = unionfs_open,
4816 +       .flush          = unionfs_flush,
4817 +       .release        = unionfs_file_release,
4818 +       .fsync          = unionfs_fsync,
4819 +       .fasync         = unionfs_fasync,
4820 +       .splice_read    = unionfs_splice_read,
4821 +       .splice_write   = unionfs_splice_write,
4822 +};
4823 --- /dev/null
4824 +++ kernel-2.6.28/fs/unionfs/inode.c
4825 @@ -0,0 +1,1055 @@
4826 +/*
4827 + * Copyright (c) 2003-2009 Erez Zadok
4828 + * Copyright (c) 2003-2006 Charles P. Wright
4829 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4830 + * Copyright (c) 2005-2006 Junjiro Okajima
4831 + * Copyright (c) 2005      Arun M. Krishnakumar
4832 + * Copyright (c) 2004-2006 David P. Quigley
4833 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4834 + * Copyright (c) 2003      Puja Gupta
4835 + * Copyright (c) 2003      Harikesavan Krishnan
4836 + * Copyright (c) 2003-2009 Stony Brook University
4837 + * Copyright (c) 2003-2009 The Research Foundation of SUNY
4838 + *
4839 + * This program is free software; you can redistribute it and/or modify
4840 + * it under the terms of the GNU General Public License version 2 as
4841 + * published by the Free Software Foundation.
4842 + */
4843 +
4844 +#include "union.h"
4845 +
4846 +/*
4847 + * Find a writeable branch to create new object in.  Checks all writeble
4848 + * branches of the parent inode, from istart to iend order; if none are
4849 + * suitable, also tries branch 0 (which may require a copyup).
4850 + *
4851 + * Return a lower_dentry we can use to create object in, or ERR_PTR.
4852 + */
4853 +static struct dentry *find_writeable_branch(struct inode *parent,
4854 +                                           struct dentry *dentry)
4855 +{
4856 +       int err = -EINVAL;
4857 +       int bindex, istart, iend;
4858 +       struct dentry *lower_dentry = NULL;
4859 +
4860 +       istart = ibstart(parent);
4861 +       iend = ibend(parent);
4862 +       if (istart < 0)
4863 +               goto out;
4864 +
4865 +begin:
4866 +       for (bindex = istart; bindex <= iend; bindex++) {
4867 +               /* skip non-writeable branches */
4868 +               err = is_robranch_super(dentry->d_sb, bindex);
4869 +               if (err) {
4870 +                       err = -EROFS;
4871 +                       continue;
4872 +               }
4873 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4874 +               if (!lower_dentry)
4875 +                       continue;
4876 +               /*
4877 +                * check for whiteouts in writeable branch, and remove them
4878 +                * if necessary.
4879 +                */
4880 +               err = check_unlink_whiteout(dentry, lower_dentry, bindex);
4881 +               if (err > 0)    /* ignore if whiteout found and removed */
4882 +                       err = 0;
4883 +               if (err)
4884 +                       continue;
4885 +               /* if get here, we can write to the branch */
4886 +               break;
4887 +       }
4888 +       /*
4889 +        * If istart wasn't already branch 0, and we got any error, then try
4890 +        * branch 0 (which may require copyup)
4891 +        */
4892 +       if (err && istart > 0) {
4893 +               istart = iend = 0;
4894 +               goto begin;
4895 +       }
4896 +
4897 +       /*
4898 +        * If we tried even branch 0, and still got an error, abort.  But if
4899 +        * the error was an EROFS, then we should try to copyup.
4900 +        */
4901 +       if (err && err != -EROFS)
4902 +               goto out;
4903 +
4904 +       /*
4905 +        * If we get here, then check if copyup needed.  If lower_dentry is
4906 +        * NULL, create the entire dentry directory structure in branch 0.
4907 +        */
4908 +       if (!lower_dentry) {
4909 +               bindex = 0;
4910 +               lower_dentry = create_parents(parent, dentry,
4911 +                                             dentry->d_name.name, bindex);
4912 +               if (IS_ERR(lower_dentry)) {
4913 +                       err = PTR_ERR(lower_dentry);
4914 +                       goto out;
4915 +               }
4916 +       }
4917 +       err = 0;                /* all's well */
4918 +out:
4919 +       if (err)
4920 +               return ERR_PTR(err);
4921 +       return lower_dentry;
4922 +}
4923 +
4924 +static int unionfs_create(struct inode *dir, struct dentry *dentry,
4925 +                         int mode, struct nameidata *nd_unused)
4926 +{
4927 +       int err = 0;
4928 +       struct dentry *lower_dentry = NULL;
4929 +       struct dentry *lower_parent_dentry = NULL;
4930 +       struct dentry *parent;
4931 +       int valid = 0;
4932 +       struct nameidata lower_nd;
4933 +
4934 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
4935 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4936 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4937 +
4938 +       valid = __unionfs_d_revalidate(dentry, parent, false);
4939 +       if (unlikely(!valid)) {
4940 +               err = -ESTALE;  /* same as what real_lookup does */
4941 +               goto out;
4942 +       }
4943 +
4944 +       lower_dentry = find_writeable_branch(dir, dentry);
4945 +       if (IS_ERR(lower_dentry)) {
4946 +               err = PTR_ERR(lower_dentry);
4947 +               goto out;
4948 +       }
4949 +
4950 +       lower_parent_dentry = lock_parent(lower_dentry);
4951 +       if (IS_ERR(lower_parent_dentry)) {
4952 +               err = PTR_ERR(lower_parent_dentry);
4953 +               goto out;
4954 +       }
4955 +
4956 +       err = init_lower_nd(&lower_nd, LOOKUP_CREATE);
4957 +       if (unlikely(err < 0))
4958 +               goto out;
4959 +       err = vfs_create(lower_parent_dentry->d_inode, lower_dentry, mode,
4960 +                        &lower_nd);
4961 +       release_lower_nd(&lower_nd, err);
4962 +
4963 +       if (!err) {
4964 +               err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0));
4965 +               if (!err) {
4966 +                       unionfs_copy_attr_times(dir);
4967 +                       fsstack_copy_inode_size(dir,
4968 +                                               lower_parent_dentry->d_inode);
4969 +                       /* update no. of links on parent directory */
4970 +                       dir->i_nlink = unionfs_get_nlinks(dir);
4971 +               }
4972 +       }
4973 +
4974 +       unlock_dir(lower_parent_dentry);
4975 +
4976 +out:
4977 +       if (!err) {
4978 +               unionfs_postcopyup_setmnt(dentry);
4979 +               unionfs_check_inode(dir);
4980 +               unionfs_check_dentry(dentry);
4981 +       }
4982 +       unionfs_unlock_dentry(dentry);
4983 +       unionfs_unlock_parent(dentry, parent);
4984 +       unionfs_read_unlock(dentry->d_sb);
4985 +       return err;
4986 +}
4987 +
4988 +/*
4989 + * unionfs_lookup is the only special function which takes a dentry, yet we
4990 + * do NOT want to call __unionfs_d_revalidate_chain because by definition,
4991 + * we don't have a valid dentry here yet.
4992 + */
4993 +static struct dentry *unionfs_lookup(struct inode *dir,
4994 +                                    struct dentry *dentry,
4995 +                                    struct nameidata *nd_unused)
4996 +{
4997 +       struct dentry *ret, *parent;
4998 +       int err = 0;
4999 +
5000 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5001 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5002 +
5003 +       /*
5004 +        * As long as we lock/dget the parent, then can skip validating the
5005 +        * parent now; we may have to rebuild this dentry on the next
5006 +        * ->d_revalidate, however.
5007 +        */
5008 +
5009 +       /* allocate dentry private data.  We free it in ->d_release */
5010 +       err = new_dentry_private_data(dentry, UNIONFS_DMUTEX_CHILD);
5011 +       if (unlikely(err)) {
5012 +               ret = ERR_PTR(err);
5013 +               goto out;
5014 +       }
5015 +
5016 +       ret = unionfs_lookup_full(dentry, parent, INTERPOSE_LOOKUP);
5017 +
5018 +       if (!IS_ERR(ret)) {
5019 +               if (ret)
5020 +                       dentry = ret;
5021 +               /* lookup_full can return multiple positive dentries */
5022 +               if (dentry->d_inode && !S_ISDIR(dentry->d_inode->i_mode)) {
5023 +                       BUG_ON(dbstart(dentry) < 0);
5024 +                       unionfs_postcopyup_release(dentry);
5025 +               }
5026 +               unionfs_copy_attr_times(dentry->d_inode);
5027 +       }
5028 +
5029 +       unionfs_check_inode(dir);
5030 +       if (!IS_ERR(ret))
5031 +               unionfs_check_dentry(dentry);
5032 +       unionfs_check_dentry(parent);
5033 +       unionfs_unlock_dentry(dentry); /* locked in new_dentry_private data */
5034 +
5035 +out:
5036 +       unionfs_unlock_parent(dentry, parent);
5037 +       unionfs_read_unlock(dentry->d_sb);
5038 +
5039 +       return ret;
5040 +}
5041 +
5042 +static int unionfs_link(struct dentry *old_dentry, struct inode *dir,
5043 +                       struct dentry *new_dentry)
5044 +{
5045 +       int err = 0;
5046 +       struct dentry *lower_old_dentry = NULL;
5047 +       struct dentry *lower_new_dentry = NULL;
5048 +       struct dentry *lower_dir_dentry = NULL;
5049 +       struct dentry *old_parent, *new_parent;
5050 +       char *name = NULL;
5051 +       bool valid;
5052 +
5053 +       unionfs_read_lock(old_dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5054 +       old_parent = dget_parent(old_dentry);
5055 +       new_parent = dget_parent(new_dentry);
5056 +       unionfs_double_lock_parents(old_parent, new_parent);
5057 +       unionfs_double_lock_dentry(old_dentry, new_dentry);
5058 +
5059 +       valid = __unionfs_d_revalidate(old_dentry, old_parent, false);
5060 +       if (unlikely(!valid)) {
5061 +               err = -ESTALE;
5062 +               goto out;
5063 +       }
5064 +       if (new_dentry->d_inode) {
5065 +               valid = __unionfs_d_revalidate(new_dentry, new_parent, false);
5066 +               if (unlikely(!valid)) {
5067 +                       err = -ESTALE;
5068 +                       goto out;
5069 +               }
5070 +       }
5071 +
5072 +       lower_new_dentry = unionfs_lower_dentry(new_dentry);
5073 +
5074 +       /* check for a whiteout in new dentry branch, and delete it */
5075 +       err = check_unlink_whiteout(new_dentry, lower_new_dentry,
5076 +                                   dbstart(new_dentry));
5077 +       if (err > 0) {         /* whiteout found and removed successfully */
5078 +               lower_dir_dentry = dget_parent(lower_new_dentry);
5079 +               fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
5080 +               dput(lower_dir_dentry);
5081 +               dir->i_nlink = unionfs_get_nlinks(dir);
5082 +               err = 0;
5083 +       }
5084 +       if (err)
5085 +               goto out;
5086 +
5087 +       /* check if parent hierachy is needed, then link in same branch */
5088 +       if (dbstart(old_dentry) != dbstart(new_dentry)) {
5089 +               lower_new_dentry = create_parents(dir, new_dentry,
5090 +                                                 new_dentry->d_name.name,
5091 +                                                 dbstart(old_dentry));
5092 +               err = PTR_ERR(lower_new_dentry);
5093 +               if (IS_COPYUP_ERR(err))
5094 +                       goto docopyup;
5095 +               if (!lower_new_dentry || IS_ERR(lower_new_dentry))
5096 +                       goto out;
5097 +       }
5098 +       lower_new_dentry = unionfs_lower_dentry(new_dentry);
5099 +       lower_old_dentry = unionfs_lower_dentry(old_dentry);
5100 +
5101 +       BUG_ON(dbstart(old_dentry) != dbstart(new_dentry));
5102 +       lower_dir_dentry = lock_parent(lower_new_dentry);
5103 +       err = is_robranch(old_dentry);
5104 +       if (!err) {
5105 +               /* see Documentation/filesystems/unionfs/issues.txt */
5106 +               lockdep_off();
5107 +               err = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
5108 +                              lower_new_dentry);
5109 +               lockdep_on();
5110 +       }
5111 +       unlock_dir(lower_dir_dentry);
5112 +
5113 +docopyup:
5114 +       if (IS_COPYUP_ERR(err)) {
5115 +               int old_bstart = dbstart(old_dentry);
5116 +               int bindex;
5117 +
5118 +               for (bindex = old_bstart - 1; bindex >= 0; bindex--) {
5119 +                       err = copyup_dentry(old_parent->d_inode,
5120 +                                           old_dentry, old_bstart,
5121 +                                           bindex, old_dentry->d_name.name,
5122 +                                           old_dentry->d_name.len, NULL,
5123 +                                           i_size_read(old_dentry->d_inode));
5124 +                       if (err)
5125 +                               continue;
5126 +                       lower_new_dentry =
5127 +                               create_parents(dir, new_dentry,
5128 +                                              new_dentry->d_name.name,
5129 +                                              bindex);
5130 +                       lower_old_dentry = unionfs_lower_dentry(old_dentry);
5131 +                       lower_dir_dentry = lock_parent(lower_new_dentry);
5132 +                       /* see Documentation/filesystems/unionfs/issues.txt */
5133 +                       lockdep_off();
5134 +                       /* do vfs_link */
5135 +                       err = vfs_link(lower_old_dentry,
5136 +                                      lower_dir_dentry->d_inode,
5137 +                                      lower_new_dentry);
5138 +                       lockdep_on();
5139 +                       unlock_dir(lower_dir_dentry);
5140 +                       goto check_link;
5141 +               }
5142 +               goto out;
5143 +       }
5144 +
5145 +check_link:
5146 +       if (err || !lower_new_dentry->d_inode)
5147 +               goto out;
5148 +
5149 +       /* Its a hard link, so use the same inode */
5150 +       new_dentry->d_inode = igrab(old_dentry->d_inode);
5151 +       d_add(new_dentry, new_dentry->d_inode);
5152 +       unionfs_copy_attr_all(dir, lower_new_dentry->d_parent->d_inode);
5153 +       fsstack_copy_inode_size(dir, lower_new_dentry->d_parent->d_inode);
5154 +
5155 +       /* propagate number of hard-links */
5156 +       old_dentry->d_inode->i_nlink = unionfs_get_nlinks(old_dentry->d_inode);
5157 +       /* new dentry's ctime may have changed due to hard-link counts */
5158 +       unionfs_copy_attr_times(new_dentry->d_inode);
5159 +
5160 +out:
5161 +       if (!new_dentry->d_inode)
5162 +               d_drop(new_dentry);
5163 +
5164 +       kfree(name);
5165 +       if (!err)
5166 +               unionfs_postcopyup_setmnt(new_dentry);
5167 +
5168 +       unionfs_check_inode(dir);
5169 +       unionfs_check_dentry(new_dentry);
5170 +       unionfs_check_dentry(old_dentry);
5171 +
5172 +       unionfs_double_unlock_dentry(old_dentry, new_dentry);
5173 +       unionfs_double_unlock_parents(old_parent, new_parent);
5174 +       dput(new_parent);
5175 +       dput(old_parent);
5176 +       unionfs_read_unlock(old_dentry->d_sb);
5177 +
5178 +       return err;
5179 +}
5180 +
5181 +static int unionfs_symlink(struct inode *dir, struct dentry *dentry,
5182 +                          const char *symname)
5183 +{
5184 +       int err = 0;
5185 +       struct dentry *lower_dentry = NULL;
5186 +       struct dentry *wh_dentry = NULL;
5187 +       struct dentry *lower_parent_dentry = NULL;
5188 +       struct dentry *parent;
5189 +       char *name = NULL;
5190 +       int valid = 0;
5191 +       umode_t mode;
5192 +
5193 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5194 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5195 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5196 +
5197 +       valid = __unionfs_d_revalidate(dentry, parent, false);
5198 +       if (unlikely(!valid)) {
5199 +               err = -ESTALE;
5200 +               goto out;
5201 +       }
5202 +
5203 +       /*
5204 +        * It's only a bug if this dentry was not negative and couldn't be
5205 +        * revalidated (shouldn't happen).
5206 +        */
5207 +       BUG_ON(!valid && dentry->d_inode);
5208 +
5209 +       lower_dentry = find_writeable_branch(dir, dentry);
5210 +       if (IS_ERR(lower_dentry)) {
5211 +               err = PTR_ERR(lower_dentry);
5212 +               goto out;
5213 +       }
5214 +
5215 +       lower_parent_dentry = lock_parent(lower_dentry);
5216 +       if (IS_ERR(lower_parent_dentry)) {
5217 +               err = PTR_ERR(lower_parent_dentry);
5218 +               goto out;
5219 +       }
5220 +
5221 +       mode = S_IALLUGO;
5222 +       err = vfs_symlink(lower_parent_dentry->d_inode, lower_dentry, symname);
5223 +       if (!err) {
5224 +               err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0));
5225 +               if (!err) {
5226 +                       unionfs_copy_attr_times(dir);
5227 +                       fsstack_copy_inode_size(dir,
5228 +                                               lower_parent_dentry->d_inode);
5229 +                       /* update no. of links on parent directory */
5230 +                       dir->i_nlink = unionfs_get_nlinks(dir);
5231 +               }
5232 +       }
5233 +
5234 +       unlock_dir(lower_parent_dentry);
5235 +
5236 +out:
5237 +       dput(wh_dentry);
5238 +       kfree(name);
5239 +
5240 +       if (!err) {
5241 +               unionfs_postcopyup_setmnt(dentry);
5242 +               unionfs_check_inode(dir);
5243 +               unionfs_check_dentry(dentry);
5244 +       }
5245 +       unionfs_unlock_dentry(dentry);
5246 +       unionfs_unlock_parent(dentry, parent);
5247 +       unionfs_read_unlock(dentry->d_sb);
5248 +       return err;
5249 +}
5250 +
5251 +static int unionfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
5252 +{
5253 +       int err = 0;
5254 +       struct dentry *lower_dentry = NULL;
5255 +       struct dentry *lower_parent_dentry = NULL;
5256 +       struct dentry *parent;
5257 +       int bindex = 0, bstart;
5258 +       char *name = NULL;
5259 +       int valid;
5260 +
5261 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5262 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5263 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5264 +
5265 +       valid = __unionfs_d_revalidate(dentry, parent, false);
5266 +       if (unlikely(!valid)) {
5267 +               err = -ESTALE;  /* same as what real_lookup does */
5268 +               goto out;
5269 +       }
5270 +
5271 +       bstart = dbstart(dentry);
5272 +
5273 +       lower_dentry = unionfs_lower_dentry(dentry);
5274 +
5275 +       /* check for a whiteout in new dentry branch, and delete it */
5276 +       err = check_unlink_whiteout(dentry, lower_dentry, bstart);
5277 +       if (err > 0)           /* whiteout found and removed successfully */
5278 +               err = 0;
5279 +       if (err) {
5280 +               /* exit if the error returned was NOT -EROFS */
5281 +               if (!IS_COPYUP_ERR(err))
5282 +                       goto out;
5283 +               bstart--;
5284 +       }
5285 +
5286 +       /* check if copyup's needed, and mkdir */
5287 +       for (bindex = bstart; bindex >= 0; bindex--) {
5288 +               int i;
5289 +               int bend = dbend(dentry);
5290 +
5291 +               if (is_robranch_super(dentry->d_sb, bindex))
5292 +                       continue;
5293 +
5294 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5295 +               if (!lower_dentry) {
5296 +                       lower_dentry = create_parents(dir, dentry,
5297 +                                                     dentry->d_name.name,
5298 +                                                     bindex);
5299 +                       if (!lower_dentry || IS_ERR(lower_dentry)) {
5300 +                               printk(KERN_ERR "unionfs: lower dentry "
5301 +                                      " NULL for bindex = %d\n", bindex);
5302 +                               continue;
5303 +                       }
5304 +               }
5305 +
5306 +               lower_parent_dentry = lock_parent(lower_dentry);
5307 +
5308 +               if (IS_ERR(lower_parent_dentry)) {
5309 +                       err = PTR_ERR(lower_parent_dentry);
5310 +                       goto out;
5311 +               }
5312 +
5313 +               err = vfs_mkdir(lower_parent_dentry->d_inode, lower_dentry,
5314 +                               mode);
5315 +
5316 +               unlock_dir(lower_parent_dentry);
5317 +
5318 +               /* did the mkdir succeed? */
5319 +               if (err)
5320 +                       break;
5321 +
5322 +               for (i = bindex + 1; i <= bend; i++) {
5323 +                       /* XXX: use path_put_lowers? */
5324 +                       if (unionfs_lower_dentry_idx(dentry, i)) {
5325 +                               dput(unionfs_lower_dentry_idx(dentry, i));
5326 +                               unionfs_set_lower_dentry_idx(dentry, i, NULL);
5327 +                       }
5328 +               }
5329 +               dbend(dentry) = bindex;
5330 +
5331 +               /*
5332 +                * Only INTERPOSE_LOOKUP can return a value other than 0 on
5333 +                * err.
5334 +                */
5335 +               err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0));
5336 +               if (!err) {
5337 +                       unionfs_copy_attr_times(dir);
5338 +                       fsstack_copy_inode_size(dir,
5339 +                                               lower_parent_dentry->d_inode);
5340 +
5341 +                       /* update number of links on parent directory */
5342 +                       dir->i_nlink = unionfs_get_nlinks(dir);
5343 +               }
5344 +
5345 +               err = make_dir_opaque(dentry, dbstart(dentry));
5346 +               if (err) {
5347 +                       printk(KERN_ERR "unionfs: mkdir: error creating "
5348 +                              ".wh.__dir_opaque: %d\n", err);
5349 +                       goto out;
5350 +               }
5351 +
5352 +               /* we are done! */
5353 +               break;
5354 +       }
5355 +
5356 +out:
5357 +       if (!dentry->d_inode)
5358 +               d_drop(dentry);
5359 +
5360 +       kfree(name);
5361 +
5362 +       if (!err) {
5363 +               unionfs_copy_attr_times(dentry->d_inode);
5364 +               unionfs_postcopyup_setmnt(dentry);
5365 +       }
5366 +       unionfs_check_inode(dir);
5367 +       unionfs_check_dentry(dentry);
5368 +       unionfs_unlock_dentry(dentry);
5369 +       unionfs_unlock_parent(dentry, parent);
5370 +       unionfs_read_unlock(dentry->d_sb);
5371 +
5372 +       return err;
5373 +}
5374 +
5375 +static int unionfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
5376 +                        dev_t dev)
5377 +{
5378 +       int err = 0;
5379 +       struct dentry *lower_dentry = NULL;
5380 +       struct dentry *wh_dentry = NULL;
5381 +       struct dentry *lower_parent_dentry = NULL;
5382 +       struct dentry *parent;
5383 +       char *name = NULL;
5384 +       int valid = 0;
5385 +
5386 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5387 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5388 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5389 +
5390 +       valid = __unionfs_d_revalidate(dentry, parent, false);
5391 +       if (unlikely(!valid)) {
5392 +               err = -ESTALE;
5393 +               goto out;
5394 +       }
5395 +
5396 +       /*
5397 +        * It's only a bug if this dentry was not negative and couldn't be
5398 +        * revalidated (shouldn't happen).
5399 +        */
5400 +       BUG_ON(!valid && dentry->d_inode);
5401 +
5402 +       lower_dentry = find_writeable_branch(dir, dentry);
5403 +       if (IS_ERR(lower_dentry)) {
5404 +               err = PTR_ERR(lower_dentry);
5405 +               goto out;
5406 +       }
5407 +
5408 +       lower_parent_dentry = lock_parent(lower_dentry);
5409 +       if (IS_ERR(lower_parent_dentry)) {
5410 +               err = PTR_ERR(lower_parent_dentry);
5411 +               goto out;
5412 +       }
5413 +
5414 +       err = vfs_mknod(lower_parent_dentry->d_inode, lower_dentry, mode, dev);
5415 +       if (!err) {
5416 +               err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0));
5417 +               if (!err) {
5418 +                       unionfs_copy_attr_times(dir);
5419 +                       fsstack_copy_inode_size(dir,
5420 +                                               lower_parent_dentry->d_inode);
5421 +                       /* update no. of links on parent directory */
5422 +                       dir->i_nlink = unionfs_get_nlinks(dir);
5423 +               }
5424 +       }
5425 +
5426 +       unlock_dir(lower_parent_dentry);
5427 +
5428 +out:
5429 +       dput(wh_dentry);
5430 +       kfree(name);
5431 +
5432 +       if (!err) {
5433 +               unionfs_postcopyup_setmnt(dentry);
5434 +               unionfs_check_inode(dir);
5435 +               unionfs_check_dentry(dentry);
5436 +       }
5437 +       unionfs_unlock_dentry(dentry);
5438 +       unionfs_unlock_parent(dentry, parent);
5439 +       unionfs_read_unlock(dentry->d_sb);
5440 +       return err;
5441 +}
5442 +
5443 +/* requires sb, dentry, and parent to already be locked */
5444 +static int __unionfs_readlink(struct dentry *dentry, char __user *buf,
5445 +                             int bufsiz)
5446 +{
5447 +       int err;
5448 +       struct dentry *lower_dentry;
5449 +
5450 +       lower_dentry = unionfs_lower_dentry(dentry);
5451 +
5452 +       if (!lower_dentry->d_inode->i_op ||
5453 +           !lower_dentry->d_inode->i_op->readlink) {
5454 +               err = -EINVAL;
5455 +               goto out;
5456 +       }
5457 +
5458 +       err = lower_dentry->d_inode->i_op->readlink(lower_dentry,
5459 +                                                   buf, bufsiz);
5460 +       if (err >= 0)
5461 +               fsstack_copy_attr_atime(dentry->d_inode,
5462 +                                       lower_dentry->d_inode);
5463 +
5464 +out:
5465 +       return err;
5466 +}
5467 +
5468 +static int unionfs_readlink(struct dentry *dentry, char __user *buf,
5469 +                           int bufsiz)
5470 +{
5471 +       int err;
5472 +       struct dentry *parent;
5473 +
5474 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5475 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5476 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5477 +
5478 +       if (unlikely(!__unionfs_d_revalidate(dentry, parent, false))) {
5479 +               err = -ESTALE;
5480 +               goto out;
5481 +       }
5482 +
5483 +       err = __unionfs_readlink(dentry, buf, bufsiz);
5484 +
5485 +out:
5486 +       unionfs_check_dentry(dentry);
5487 +       unionfs_unlock_dentry(dentry);
5488 +       unionfs_unlock_parent(dentry, parent);
5489 +       unionfs_read_unlock(dentry->d_sb);
5490 +
5491 +       return err;
5492 +}
5493 +
5494 +static void *unionfs_follow_link(struct dentry *dentry, struct nameidata *nd)
5495 +{
5496 +       char *buf;
5497 +       int len = PAGE_SIZE, err;
5498 +       mm_segment_t old_fs;
5499 +       struct dentry *parent;
5500 +
5501 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5502 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5503 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5504 +
5505 +       /* This is freed by the put_link method assuming a successful call. */
5506 +       buf = kmalloc(len, GFP_KERNEL);
5507 +       if (unlikely(!buf)) {
5508 +               err = -ENOMEM;
5509 +               goto out;
5510 +       }
5511 +
5512 +       /* read the symlink, and then we will follow it */
5513 +       old_fs = get_fs();
5514 +       set_fs(KERNEL_DS);
5515 +       err = __unionfs_readlink(dentry, buf, len);
5516 +       set_fs(old_fs);
5517 +       if (err < 0) {
5518 +               kfree(buf);
5519 +               buf = NULL;
5520 +               goto out;
5521 +       }
5522 +       buf[err] = 0;
5523 +       nd_set_link(nd, buf);
5524 +       err = 0;
5525 +
5526 +out:
5527 +       if (err >= 0) {
5528 +               unionfs_check_nd(nd);
5529 +               unionfs_check_dentry(dentry);
5530 +       }
5531 +
5532 +       unionfs_unlock_dentry(dentry);
5533 +       unionfs_unlock_parent(dentry, parent);
5534 +       unionfs_read_unlock(dentry->d_sb);
5535 +
5536 +       return ERR_PTR(err);
5537 +}
5538 +
5539 +/* this @nd *IS* still used */
5540 +static void unionfs_put_link(struct dentry *dentry, struct nameidata *nd,
5541 +                            void *cookie)
5542 +{
5543 +       struct dentry *parent;
5544 +
5545 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5546 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5547 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5548 +
5549 +       if (unlikely(!__unionfs_d_revalidate(dentry, parent, false)))
5550 +               printk(KERN_ERR
5551 +                      "unionfs: put_link failed to revalidate dentry\n");
5552 +
5553 +       unionfs_check_dentry(dentry);
5554 +       unionfs_check_nd(nd);
5555 +       kfree(nd_get_link(nd));
5556 +       unionfs_unlock_dentry(dentry);
5557 +       unionfs_unlock_parent(dentry, parent);
5558 +       unionfs_read_unlock(dentry->d_sb);
5559 +}
5560 +
5561 +/*
5562 + * This is a variant of fs/namei.c:permission() or inode_permission() which
5563 + * skips over EROFS tests (because we perform copyup on EROFS).
5564 + */
5565 +static int __inode_permission(struct inode *inode, int mask)
5566 +{
5567 +       int retval;
5568 +
5569 +       /* nobody gets write access to an immutable file */
5570 +       if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
5571 +               return -EACCES;
5572 +
5573 +       /* Ordinary permission routines do not understand MAY_APPEND. */
5574 +       if (inode->i_op && inode->i_op->permission) {
5575 +               retval = inode->i_op->permission(inode, mask);
5576 +               if (!retval) {
5577 +                       /*
5578 +                        * Exec permission on a regular file is denied if none
5579 +                        * of the execute bits are set.
5580 +                        *
5581 +                        * This check should be done by the ->permission()
5582 +                        * method.
5583 +                        */
5584 +                       if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode) &&
5585 +                           !(inode->i_mode & S_IXUGO))
5586 +                               return -EACCES;
5587 +               }
5588 +       } else {
5589 +               retval = generic_permission(inode, mask, NULL);
5590 +       }
5591 +       if (retval)
5592 +               return retval;
5593 +
5594 +       return security_inode_permission(inode,
5595 +                       mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND));
5596 +}
5597 +
5598 +/*
5599 + * Don't grab the superblock read-lock in unionfs_permission, which prevents
5600 + * a deadlock with the branch-management "add branch" code (which grabbed
5601 + * the write lock).  It is safe to not grab the read lock here, because even
5602 + * with branch management taking place, there is no chance that
5603 + * unionfs_permission, or anything it calls, will use stale branch
5604 + * information.
5605 + */
5606 +static int unionfs_permission(struct inode *inode, int mask)
5607 +{
5608 +       struct inode *lower_inode = NULL;
5609 +       int err = 0;
5610 +       int bindex, bstart, bend;
5611 +       const int is_file = !S_ISDIR(inode->i_mode);
5612 +       const int write_mask = (mask & MAY_WRITE) && !(mask & MAY_READ);
5613 +       struct inode *inode_grabbed = igrab(inode);
5614 +       struct dentry *dentry = d_find_alias(inode);
5615 +
5616 +       if (dentry)
5617 +               unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5618 +
5619 +       if (!UNIONFS_I(inode)->lower_inodes) {
5620 +               if (is_file)    /* dirs can be unlinked but chdir'ed to */
5621 +                       err = -ESTALE;  /* force revalidate */
5622 +               goto out;
5623 +       }
5624 +       bstart = ibstart(inode);
5625 +       bend = ibend(inode);
5626 +       if (unlikely(bstart < 0 || bend < 0)) {
5627 +               /*
5628 +                * With branch-management, we can get a stale inode here.
5629 +                * If so, we return ESTALE back to link_path_walk, which
5630 +                * would discard the dcache entry and re-lookup the
5631 +                * dentry+inode.  This should be equivalent to issuing
5632 +                * __unionfs_d_revalidate_chain on nd.dentry here.
5633 +                */
5634 +               if (is_file)    /* dirs can be unlinked but chdir'ed to */
5635 +                       err = -ESTALE;  /* force revalidate */
5636 +               goto out;
5637 +       }
5638 +
5639 +       for (bindex = bstart; bindex <= bend; bindex++) {
5640 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
5641 +               if (!lower_inode)
5642 +                       continue;
5643 +
5644 +               /*
5645 +                * check the condition for D-F-D underlying files/directories,
5646 +                * we don't have to check for files, if we are checking for
5647 +                * directories.
5648 +                */
5649 +               if (!is_file && !S_ISDIR(lower_inode->i_mode))
5650 +                       continue;
5651 +
5652 +               /*
5653 +                * We check basic permissions, but we ignore any conditions
5654 +                * such as readonly file systems or branches marked as
5655 +                * readonly, because those conditions should lead to a
5656 +                * copyup taking place later on.  However, if user never had
5657 +                * access to the file, then no copyup could ever take place.
5658 +                */
5659 +               err = __inode_permission(lower_inode, mask);
5660 +               if (err && err != -EACCES && err != EPERM && bindex > 0) {
5661 +                       umode_t mode = lower_inode->i_mode;
5662 +                       if ((is_robranch_super(inode->i_sb, bindex) ||
5663 +                            __is_rdonly(lower_inode)) &&
5664 +                           (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
5665 +                               err = 0;
5666 +                       if (IS_COPYUP_ERR(err))
5667 +                               err = 0;
5668 +               }
5669 +
5670 +               /*
5671 +                * NFS HACK: NFSv2/3 return EACCES on readonly-exported,
5672 +                * locally readonly-mounted file systems, instead of EROFS
5673 +                * like other file systems do.  So we have no choice here
5674 +                * but to intercept this and ignore it for NFS branches
5675 +                * marked readonly.  Specifically, we avoid using NFS's own
5676 +                * "broken" ->permission method, and rely on
5677 +                * generic_permission() to do basic checking for us.
5678 +                */
5679 +               if (err && err == -EACCES &&
5680 +                   is_robranch_super(inode->i_sb, bindex) &&
5681 +                   lower_inode->i_sb->s_magic == NFS_SUPER_MAGIC)
5682 +                       err = generic_permission(lower_inode, mask, NULL);
5683 +
5684 +               /*
5685 +                * The permissions are an intersection of the overall directory
5686 +                * permissions, so we fail if one fails.
5687 +                */
5688 +               if (err)
5689 +                       goto out;
5690 +
5691 +               /* only the leftmost file matters. */
5692 +               if (is_file || write_mask) {
5693 +                       if (is_file && write_mask) {
5694 +                               err = get_write_access(lower_inode);
5695 +                               if (!err)
5696 +                                       put_write_access(lower_inode);
5697 +                       }
5698 +                       break;
5699 +               }
5700 +       }
5701 +       /* sync times which may have changed (asynchronously) below */
5702 +       unionfs_copy_attr_times(inode);
5703 +
5704 +out:
5705 +       unionfs_check_inode(inode);
5706 +       if (dentry) {
5707 +               unionfs_unlock_dentry(dentry);
5708 +               dput(dentry);
5709 +       }
5710 +       iput(inode_grabbed);
5711 +       return err;
5712 +}
5713 +
5714 +static int unionfs_setattr(struct dentry *dentry, struct iattr *ia)
5715 +{
5716 +       int err = 0;
5717 +       struct dentry *lower_dentry;
5718 +       struct dentry *parent;
5719 +       struct inode *inode;
5720 +       struct inode *lower_inode;
5721 +       int bstart, bend, bindex;
5722 +       loff_t size;
5723 +
5724 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5725 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5726 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5727 +
5728 +       if (unlikely(!__unionfs_d_revalidate(dentry, parent, false))) {
5729 +               err = -ESTALE;
5730 +               goto out;
5731 +       }
5732 +
5733 +       bstart = dbstart(dentry);
5734 +       bend = dbend(dentry);
5735 +       inode = dentry->d_inode;
5736 +
5737 +       /*
5738 +        * mode change is for clearing setuid/setgid. Allow lower filesystem
5739 +        * to reinterpret it in its own way.
5740 +        */
5741 +       if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
5742 +               ia->ia_valid &= ~ATTR_MODE;
5743 +
5744 +       lower_dentry = unionfs_lower_dentry(dentry);
5745 +       if (!lower_dentry) { /* should never happen after above revalidate */
5746 +               err = -EINVAL;
5747 +               goto out;
5748 +       }
5749 +       lower_inode = unionfs_lower_inode(inode);
5750 +
5751 +       /* check if user has permission to change lower inode */
5752 +       err = inode_change_ok(lower_inode, ia);
5753 +       if (err)
5754 +               goto out;
5755 +
5756 +       /* copyup if the file is on a read only branch */
5757 +       if (is_robranch_super(dentry->d_sb, bstart)
5758 +           || __is_rdonly(lower_inode)) {
5759 +               /* check if we have a branch to copy up to */
5760 +               if (bstart <= 0) {
5761 +                       err = -EACCES;
5762 +                       goto out;
5763 +               }
5764 +
5765 +               if (ia->ia_valid & ATTR_SIZE)
5766 +                       size = ia->ia_size;
5767 +               else
5768 +                       size = i_size_read(inode);
5769 +               /* copyup to next available branch */
5770 +               for (bindex = bstart - 1; bindex >= 0; bindex--) {
5771 +                       err = copyup_dentry(parent->d_inode,
5772 +                                           dentry, bstart, bindex,
5773 +                                           dentry->d_name.name,
5774 +                                           dentry->d_name.len,
5775 +                                           NULL, size);
5776 +                       if (!err)
5777 +                               break;
5778 +               }
5779 +               if (err)
5780 +                       goto out;
5781 +               /* get updated lower_dentry/inode after copyup */
5782 +               lower_dentry = unionfs_lower_dentry(dentry);
5783 +               lower_inode = unionfs_lower_inode(inode);
5784 +       }
5785 +
5786 +       /*
5787 +        * If shrinking, first truncate upper level to cancel writing dirty
5788 +        * pages beyond the new eof; and also if its' maxbytes is more
5789 +        * limiting (fail with -EFBIG before making any change to the lower
5790 +        * level).  There is no need to vmtruncate the upper level
5791 +        * afterwards in the other cases: we fsstack_copy_inode_size from
5792 +        * the lower level.
5793 +        */
5794 +       if (ia->ia_valid & ATTR_SIZE) {
5795 +               size = i_size_read(inode);
5796 +               if (ia->ia_size < size || (ia->ia_size > size &&
5797 +                   inode->i_sb->s_maxbytes < lower_inode->i_sb->s_maxbytes)) {
5798 +                       err = vmtruncate(inode, ia->ia_size);
5799 +                       if (err)
5800 +                               goto out;
5801 +               }
5802 +       }
5803 +
5804 +       /* notify the (possibly copied-up) lower inode */
5805 +       /*
5806 +        * Note: we use lower_dentry->d_inode, because lower_inode may be
5807 +        * unlinked (no inode->i_sb and i_ino==0.  This happens if someone
5808 +        * tries to open(), unlink(), then ftruncate() a file.
5809 +        */
5810 +       mutex_lock(&lower_dentry->d_inode->i_mutex);
5811 +       err = notify_change(lower_dentry, ia);
5812 +       mutex_unlock(&lower_dentry->d_inode->i_mutex);
5813 +       if (err)
5814 +               goto out;
5815 +
5816 +       /* get attributes from the first lower inode */
5817 +       if (ibstart(inode) >= 0)
5818 +               unionfs_copy_attr_all(inode, lower_inode);
5819 +       /*
5820 +        * unionfs_copy_attr_all will copy the lower times to our inode if
5821 +        * the lower ones are newer (useful for cache coherency).  However,
5822 +        * ->setattr is the only place in which we may have to copy the
5823 +        * lower inode times absolutely, to support utimes(2).
5824 +        */
5825 +       if (ia->ia_valid & ATTR_MTIME_SET)
5826 +               inode->i_mtime = lower_inode->i_mtime;
5827 +       if (ia->ia_valid & ATTR_CTIME)
5828 +               inode->i_ctime = lower_inode->i_ctime;
5829 +       if (ia->ia_valid & ATTR_ATIME_SET)
5830 +               inode->i_atime = lower_inode->i_atime;
5831 +       fsstack_copy_inode_size(inode, lower_inode);
5832 +
5833 +out:
5834 +       if (!err)
5835 +               unionfs_check_dentry(dentry);
5836 +       unionfs_unlock_dentry(dentry);
5837 +       unionfs_unlock_parent(dentry, parent);
5838 +       unionfs_read_unlock(dentry->d_sb);
5839 +
5840 +       return err;
5841 +}
5842 +
5843 +struct inode_operations unionfs_symlink_iops = {
5844 +       .readlink       = unionfs_readlink,
5845 +       .permission     = unionfs_permission,
5846 +       .follow_link    = unionfs_follow_link,
5847 +       .setattr        = unionfs_setattr,
5848 +       .put_link       = unionfs_put_link,
5849 +};
5850 +
5851 +struct inode_operations unionfs_dir_iops = {
5852 +       .create         = unionfs_create,
5853 +       .lookup         = unionfs_lookup,
5854 +       .link           = unionfs_link,
5855 +       .unlink         = unionfs_unlink,
5856 +       .symlink        = unionfs_symlink,
5857 +       .mkdir          = unionfs_mkdir,
5858 +       .rmdir          = unionfs_rmdir,
5859 +       .mknod          = unionfs_mknod,
5860 +       .rename         = unionfs_rename,
5861 +       .permission     = unionfs_permission,
5862 +       .setattr        = unionfs_setattr,
5863 +#ifdef CONFIG_UNION_FS_XATTR
5864 +       .setxattr       = unionfs_setxattr,
5865 +       .getxattr       = unionfs_getxattr,
5866 +       .removexattr    = unionfs_removexattr,
5867 +       .listxattr      = unionfs_listxattr,
5868 +#endif /* CONFIG_UNION_FS_XATTR */
5869 +};
5870 +
5871 +struct inode_operations unionfs_main_iops = {
5872 +       .permission     = unionfs_permission,
5873 +       .setattr        = unionfs_setattr,
5874 +#ifdef CONFIG_UNION_FS_XATTR
5875 +       .setxattr       = unionfs_setxattr,
5876 +       .getxattr       = unionfs_getxattr,
5877 +       .removexattr    = unionfs_removexattr,
5878 +       .listxattr      = unionfs_listxattr,
5879 +#endif /* CONFIG_UNION_FS_XATTR */
5880 +};
5881 --- /dev/null
5882 +++ kernel-2.6.28/fs/unionfs/lookup.c
5883 @@ -0,0 +1,569 @@
5884 +/*
5885 + * Copyright (c) 2003-2009 Erez Zadok
5886 + * Copyright (c) 2003-2006 Charles P. Wright
5887 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
5888 + * Copyright (c) 2005-2006 Junjiro Okajima
5889 + * Copyright (c) 2005      Arun M. Krishnakumar
5890 + * Copyright (c) 2004-2006 David P. Quigley
5891 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
5892 + * Copyright (c) 2003      Puja Gupta
5893 + * Copyright (c) 2003      Harikesavan Krishnan
5894 + * Copyright (c) 2003-2009 Stony Brook University
5895 + * Copyright (c) 2003-2009 The Research Foundation of SUNY
5896 + *
5897 + * This program is free software; you can redistribute it and/or modify
5898 + * it under the terms of the GNU General Public License version 2 as
5899 + * published by the Free Software Foundation.
5900 + */
5901 +
5902 +#include "union.h"
5903 +
5904 +/*
5905 + * Lookup one path component @name relative to a <base,mnt> path pair.
5906 + * Behaves nearly the same as lookup_one_len (i.e., return negative dentry
5907 + * on ENOENT), but uses the @mnt passed, so it can cross bind mounts and
5908 + * other lower mounts properly.  If @new_mnt is non-null, will fill in the
5909 + * new mnt there.  Caller is responsible to dput/mntput/path_put returned
5910 + * @dentry and @new_mnt.
5911 + */
5912 +struct dentry *__lookup_one(struct dentry *base, struct vfsmount *mnt,
5913 +                           const char *name, struct vfsmount **new_mnt)
5914 +{
5915 +       struct dentry *dentry = NULL;
5916 +       struct nameidata lower_nd;
5917 +       int err;
5918 +
5919 +       /* we use flags=0 to get basic lookup */
5920 +       err = vfs_path_lookup(base, mnt, name, 0, &lower_nd);
5921 +
5922 +       switch (err) {
5923 +       case 0: /* no error */
5924 +               dentry = lower_nd.path.dentry;
5925 +               if (new_mnt)
5926 +                       *new_mnt = lower_nd.path.mnt; /* rc already inc'ed */
5927 +               break;
5928 +       case -ENOENT:
5929 +                /*
5930 +                 * We don't consider ENOENT an error, and we want to return
5931 +                 * a negative dentry (ala lookup_one_len).  As we know
5932 +                 * there was no inode for this name before (-ENOENT), then
5933 +                 * it's safe to call lookup_one_len (which doesn't take a
5934 +                 * vfsmount).
5935 +                 */
5936 +               dentry = lookup_one_len(name, base, strlen(name));
5937 +               if (new_mnt)
5938 +                       *new_mnt = mntget(lower_nd.path.mnt);
5939 +               break;
5940 +       default: /* all other real errors */
5941 +               dentry = ERR_PTR(err);
5942 +               break;
5943 +       }
5944 +
5945 +       return dentry;
5946 +}
5947 +
5948 +/*
5949 + * This is a utility function that fills in a unionfs dentry.
5950 + * Caller must lock this dentry with unionfs_lock_dentry.
5951 + *
5952 + * Returns: 0 (ok), or -ERRNO if an error occurred.
5953 + * XXX: get rid of _partial_lookup and make callers call _lookup_full directly
5954 + */
5955 +int unionfs_partial_lookup(struct dentry *dentry, struct dentry *parent)
5956 +{
5957 +       struct dentry *tmp;
5958 +       int err = -ENOSYS;
5959 +
5960 +       tmp = unionfs_lookup_full(dentry, parent, INTERPOSE_PARTIAL);
5961 +
5962 +       if (!tmp) {
5963 +               err = 0;
5964 +               goto out;
5965 +       }
5966 +       if (IS_ERR(tmp)) {
5967 +               err = PTR_ERR(tmp);
5968 +               goto out;
5969 +       }
5970 +       /* XXX: need to change the interface */
5971 +       BUG_ON(tmp != dentry);
5972 +out:
5973 +       return err;
5974 +}
5975 +
5976 +/* The dentry cache is just so we have properly sized dentries. */
5977 +static struct kmem_cache *unionfs_dentry_cachep;
5978 +int unionfs_init_dentry_cache(void)
5979 +{
5980 +       unionfs_dentry_cachep =
5981 +               kmem_cache_create("unionfs_dentry",
5982 +                                 sizeof(struct unionfs_dentry_info),
5983 +                                 0, SLAB_RECLAIM_ACCOUNT, NULL);
5984 +
5985 +       return (unionfs_dentry_cachep ? 0 : -ENOMEM);
5986 +}
5987 +
5988 +void unionfs_destroy_dentry_cache(void)
5989 +{
5990 +       if (unionfs_dentry_cachep)
5991 +               kmem_cache_destroy(unionfs_dentry_cachep);
5992 +}
5993 +
5994 +void free_dentry_private_data(struct dentry *dentry)
5995 +{
5996 +       if (!dentry || !dentry->d_fsdata)
5997 +               return;
5998 +       kfree(UNIONFS_D(dentry)->lower_paths);
5999 +       UNIONFS_D(dentry)->lower_paths = NULL;
6000 +       kmem_cache_free(unionfs_dentry_cachep, dentry->d_fsdata);
6001 +       dentry->d_fsdata = NULL;
6002 +}
6003 +
6004 +static inline int __realloc_dentry_private_data(struct dentry *dentry)
6005 +{
6006 +       struct unionfs_dentry_info *info = UNIONFS_D(dentry);
6007 +       void *p;
6008 +       int size;
6009 +
6010 +       BUG_ON(!info);
6011 +
6012 +       size = sizeof(struct path) * sbmax(dentry->d_sb);
6013 +       p = krealloc(info->lower_paths, size, GFP_ATOMIC);
6014 +       if (unlikely(!p))
6015 +               return -ENOMEM;
6016 +
6017 +       info->lower_paths = p;
6018 +
6019 +       info->bstart = -1;
6020 +       info->bend = -1;
6021 +       info->bopaque = -1;
6022 +       info->bcount = sbmax(dentry->d_sb);
6023 +       atomic_set(&info->generation,
6024 +                       atomic_read(&UNIONFS_SB(dentry->d_sb)->generation));
6025 +
6026 +       memset(info->lower_paths, 0, size);
6027 +
6028 +       return 0;
6029 +}
6030 +
6031 +/* UNIONFS_D(dentry)->lock must be locked */
6032 +int realloc_dentry_private_data(struct dentry *dentry)
6033 +{
6034 +       if (!__realloc_dentry_private_data(dentry))
6035 +               return 0;
6036 +
6037 +       kfree(UNIONFS_D(dentry)->lower_paths);
6038 +       free_dentry_private_data(dentry);
6039 +       return -ENOMEM;
6040 +}
6041 +
6042 +/* allocate new dentry private data */
6043 +int new_dentry_private_data(struct dentry *dentry, int subclass)
6044 +{
6045 +       struct unionfs_dentry_info *info = UNIONFS_D(dentry);
6046 +
6047 +       BUG_ON(info);
6048 +
6049 +       info = kmem_cache_alloc(unionfs_dentry_cachep, GFP_ATOMIC);
6050 +       if (unlikely(!info))
6051 +               return -ENOMEM;
6052 +
6053 +       mutex_init(&info->lock);
6054 +       mutex_lock_nested(&info->lock, subclass);
6055 +
6056 +       info->lower_paths = NULL;
6057 +
6058 +       dentry->d_fsdata = info;
6059 +
6060 +       if (!__realloc_dentry_private_data(dentry))
6061 +               return 0;
6062 +
6063 +       mutex_unlock(&info->lock);
6064 +       free_dentry_private_data(dentry);
6065 +       return -ENOMEM;
6066 +}
6067 +
6068 +/*
6069 + * scan through the lower dentry objects, and set bstart to reflect the
6070 + * starting branch
6071 + */
6072 +void update_bstart(struct dentry *dentry)
6073 +{
6074 +       int bindex;
6075 +       int bstart = dbstart(dentry);
6076 +       int bend = dbend(dentry);
6077 +       struct dentry *lower_dentry;
6078 +
6079 +       for (bindex = bstart; bindex <= bend; bindex++) {
6080 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6081 +               if (!lower_dentry)
6082 +                       continue;
6083 +               if (lower_dentry->d_inode) {
6084 +                       dbstart(dentry) = bindex;
6085 +                       break;
6086 +               }
6087 +               dput(lower_dentry);
6088 +               unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
6089 +       }
6090 +}
6091 +
6092 +
6093 +/*
6094 + * Initialize a nameidata structure (the intent part) we can pass to a lower
6095 + * file system.  Returns 0 on success or -error (only -ENOMEM possible).
6096 + * Inside that nd structure, this function may also return an allocated
6097 + * struct file (for open intents).  The caller, when done with this nd, must
6098 + * kfree the intent file (using release_lower_nd).
6099 + *
6100 + * XXX: this code, and the callers of this code, should be redone using
6101 + * vfs_path_lookup() when (1) the nameidata structure is refactored into a
6102 + * separate intent-structure, and (2) open_namei() is broken into a VFS-only
6103 + * function and a method that other file systems can call.
6104 + */
6105 +int init_lower_nd(struct nameidata *nd, unsigned int flags)
6106 +{
6107 +       int err = 0;
6108 +#ifdef ALLOC_LOWER_ND_FILE
6109 +       /*
6110 +        * XXX: one day we may need to have the lower return an open file
6111 +        * for us.  It is not needed in 2.6.23-rc1 for nfs2/nfs3, but may
6112 +        * very well be needed for nfs4.
6113 +        */
6114 +       struct file *file;
6115 +#endif /* ALLOC_LOWER_ND_FILE */
6116 +
6117 +       memset(nd, 0, sizeof(struct nameidata));
6118 +       if (!flags)
6119 +               return err;
6120 +
6121 +       switch (flags) {
6122 +       case LOOKUP_CREATE:
6123 +               nd->intent.open.flags |= O_CREAT;
6124 +               /* fall through: shared code for create/open cases */
6125 +       case LOOKUP_OPEN:
6126 +               nd->flags = flags;
6127 +               nd->intent.open.flags |= (FMODE_READ | FMODE_WRITE);
6128 +#ifdef ALLOC_LOWER_ND_FILE
6129 +               file = kzalloc(sizeof(struct file), GFP_KERNEL);
6130 +               if (unlikely(!file)) {
6131 +                       err = -ENOMEM;
6132 +                       break; /* exit switch statement and thus return */
6133 +               }
6134 +               nd->intent.open.file = file;
6135 +#endif /* ALLOC_LOWER_ND_FILE */
6136 +               break;
6137 +       default:
6138 +               /*
6139 +                * We should never get here, for now.
6140 +                * We can add new cases here later on.
6141 +                */
6142 +               pr_debug("unionfs: unknown nameidata flag 0x%x\n", flags);
6143 +               BUG();
6144 +               break;
6145 +       }
6146 +
6147 +       return err;
6148 +}
6149 +
6150 +void release_lower_nd(struct nameidata *nd, int err)
6151 +{
6152 +       if (!nd->intent.open.file)
6153 +               return;
6154 +       else if (!err)
6155 +               release_open_intent(nd);
6156 +#ifdef ALLOC_LOWER_ND_FILE
6157 +       kfree(nd->intent.open.file);
6158 +#endif /* ALLOC_LOWER_ND_FILE */
6159 +}
6160 +
6161 +/*
6162 + * Main (and complex) driver function for Unionfs's lookup
6163 + *
6164 + * Returns: NULL (ok), ERR_PTR if an error occurred, or a non-null non-error
6165 + * PTR if d_splice returned a different dentry.
6166 + *
6167 + * If lookupmode is INTERPOSE_PARTIAL/REVAL/REVAL_NEG, the passed dentry's
6168 + * inode info must be locked.  If lookupmode is INTERPOSE_LOOKUP (i.e., a
6169 + * newly looked-up dentry), then unionfs_lookup_backend will return a locked
6170 + * dentry's info, which the caller must unlock.
6171 + */
6172 +struct dentry *unionfs_lookup_full(struct dentry *dentry,
6173 +                                  struct dentry *parent, int lookupmode)
6174 +{
6175 +       int err = 0;
6176 +       struct dentry *lower_dentry = NULL;
6177 +       struct vfsmount *lower_mnt;
6178 +       struct vfsmount *lower_dir_mnt;
6179 +       struct dentry *wh_lower_dentry = NULL;
6180 +       struct dentry *lower_dir_dentry = NULL;
6181 +       struct dentry *d_interposed = NULL;
6182 +       int bindex, bstart, bend, bopaque;
6183 +       int opaque, num_positive = 0;
6184 +       const char *name;
6185 +       int namelen;
6186 +       int pos_start, pos_end;
6187 +
6188 +       /*
6189 +        * We should already have a lock on this dentry in the case of a
6190 +        * partial lookup, or a revalidation.  Otherwise it is returned from
6191 +        * new_dentry_private_data already locked.
6192 +        */
6193 +       verify_locked(dentry);
6194 +       verify_locked(parent);
6195 +
6196 +       /* must initialize dentry operations */
6197 +       dentry->d_op = &unionfs_dops;
6198 +
6199 +       /* We never partial lookup the root directory. */
6200 +       if (IS_ROOT(dentry))
6201 +               goto out;
6202 +
6203 +       name = dentry->d_name.name;
6204 +       namelen = dentry->d_name.len;
6205 +
6206 +       /* No dentries should get created for possible whiteout names. */
6207 +       if (!is_validname(name)) {
6208 +               err = -EPERM;
6209 +               goto out_free;
6210 +       }
6211 +
6212 +       /* Now start the actual lookup procedure. */
6213 +       bstart = dbstart(parent);
6214 +       bend = dbend(parent);
6215 +       bopaque = dbopaque(parent);
6216 +       BUG_ON(bstart < 0);
6217 +
6218 +       /* adjust bend to bopaque if needed */
6219 +       if ((bopaque >= 0) && (bopaque < bend))
6220 +               bend = bopaque;
6221 +
6222 +       /* lookup all possible dentries */
6223 +       for (bindex = bstart; bindex <= bend; bindex++) {
6224 +
6225 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6226 +               lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
6227 +
6228 +               /* skip if we already have a positive lower dentry */
6229 +               if (lower_dentry) {
6230 +                       if (dbstart(dentry) < 0)
6231 +                               dbstart(dentry) = bindex;
6232 +                       if (bindex > dbend(dentry))
6233 +                               dbend(dentry) = bindex;
6234 +                       if (lower_dentry->d_inode)
6235 +                               num_positive++;
6236 +                       continue;
6237 +               }
6238 +
6239 +               lower_dir_dentry =
6240 +                       unionfs_lower_dentry_idx(parent, bindex);
6241 +               /* if the lower dentry's parent does not exist, skip this */
6242 +               if (!lower_dir_dentry || !lower_dir_dentry->d_inode)
6243 +                       continue;
6244 +
6245 +               /* also skip it if the parent isn't a directory. */
6246 +               if (!S_ISDIR(lower_dir_dentry->d_inode->i_mode))
6247 +                       continue; /* XXX: should be BUG_ON */
6248 +
6249 +               /* check for whiteouts: stop lookup if found */
6250 +               wh_lower_dentry = lookup_whiteout(name, lower_dir_dentry);
6251 +               if (IS_ERR(wh_lower_dentry)) {
6252 +                       err = PTR_ERR(wh_lower_dentry);
6253 +                       goto out_free;
6254 +               }
6255 +               if (wh_lower_dentry->d_inode) {
6256 +                       dbend(dentry) = dbopaque(dentry) = bindex;
6257 +                       if (dbstart(dentry) < 0)
6258 +                               dbstart(dentry) = bindex;
6259 +                       dput(wh_lower_dentry);
6260 +                       break;
6261 +               }
6262 +               dput(wh_lower_dentry);
6263 +
6264 +               /* Now do regular lookup; lookup @name */
6265 +               lower_dir_mnt = unionfs_lower_mnt_idx(parent, bindex);
6266 +               lower_mnt = NULL; /* XXX: needed? */
6267 +
6268 +               lower_dentry = __lookup_one(lower_dir_dentry, lower_dir_mnt,
6269 +                                           name, &lower_mnt);
6270 +
6271 +               if (IS_ERR(lower_dentry)) {
6272 +                       err = PTR_ERR(lower_dentry);
6273 +                       goto out_free;
6274 +               }
6275 +               unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
6276 +               if (!lower_mnt)
6277 +                       lower_mnt = unionfs_mntget(dentry->d_sb->s_root,
6278 +                                                  bindex);
6279 +               unionfs_set_lower_mnt_idx(dentry, bindex, lower_mnt);
6280 +
6281 +               /* adjust dbstart/end */
6282 +               if (dbstart(dentry) < 0)
6283 +                       dbstart(dentry) = bindex;
6284 +               if (bindex > dbend(dentry))
6285 +                       dbend(dentry) = bindex;
6286 +               /*
6287 +                * We always store the lower dentries above, and update
6288 +                * dbstart/dbend, even if the whole unionfs dentry is
6289 +                * negative (i.e., no lower inodes).
6290 +                */
6291 +               if (!lower_dentry->d_inode)
6292 +                       continue;
6293 +               num_positive++;
6294 +
6295 +               /*
6296 +                * check if we just found an opaque directory, if so, stop
6297 +                * lookups here.
6298 +                */
6299 +               if (!S_ISDIR(lower_dentry->d_inode->i_mode))
6300 +                       continue;
6301 +               opaque = is_opaque_dir(dentry, bindex);
6302 +               if (opaque < 0) {
6303 +                       err = opaque;
6304 +                       goto out_free;
6305 +               } else if (opaque) {
6306 +                       dbend(dentry) = dbopaque(dentry) = bindex;
6307 +                       break;
6308 +               }
6309 +               dbend(dentry) = bindex;
6310 +
6311 +               /* update parent directory's atime with the bindex */
6312 +               fsstack_copy_attr_atime(parent->d_inode,
6313 +                                       lower_dir_dentry->d_inode);
6314 +       }
6315 +
6316 +       /* sanity checks, then decide if to process a negative dentry */
6317 +       BUG_ON(dbstart(dentry) < 0 && dbend(dentry) >= 0);
6318 +       BUG_ON(dbstart(dentry) >= 0 && dbend(dentry) < 0);
6319 +
6320 +       if (num_positive > 0)
6321 +               goto out_positive;
6322 +
6323 +       /*** handle NEGATIVE dentries ***/
6324 +
6325 +       /*
6326 +        * If negative, keep only first lower negative dentry, to save on
6327 +        * memory.
6328 +        */
6329 +       if (dbstart(dentry) < dbend(dentry)) {
6330 +               path_put_lowers(dentry, dbstart(dentry) + 1,
6331 +                               dbend(dentry), false);
6332 +               dbend(dentry) = dbstart(dentry);
6333 +       }
6334 +       if (lookupmode == INTERPOSE_PARTIAL)
6335 +               goto out;
6336 +       if (lookupmode == INTERPOSE_LOOKUP) {
6337 +               /*
6338 +                * If all we found was a whiteout in the first available
6339 +                * branch, then create a negative dentry for a possibly new
6340 +                * file to be created.
6341 +                */
6342 +               if (dbopaque(dentry) < 0)
6343 +                       goto out;
6344 +               /* XXX: need to get mnt here */
6345 +               bindex = dbstart(dentry);
6346 +               if (unionfs_lower_dentry_idx(dentry, bindex))
6347 +                       goto out;
6348 +               lower_dir_dentry =
6349 +                       unionfs_lower_dentry_idx(parent, bindex);
6350 +               if (!lower_dir_dentry || !lower_dir_dentry->d_inode)
6351 +                       goto out;
6352 +               if (!S_ISDIR(lower_dir_dentry->d_inode->i_mode))
6353 +                       goto out; /* XXX: should be BUG_ON */
6354 +               /* XXX: do we need to cross bind mounts here? */
6355 +               lower_dentry = lookup_one_len(name, lower_dir_dentry, namelen);
6356 +               if (IS_ERR(lower_dentry)) {
6357 +                       err = PTR_ERR(lower_dentry);
6358 +                       goto out;
6359 +               }
6360 +               /* XXX: need to mntget/mntput as needed too! */
6361 +               unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
6362 +               /* XXX: wrong mnt for crossing bind mounts! */
6363 +               lower_mnt = unionfs_mntget(dentry->d_sb->s_root, bindex);
6364 +               unionfs_set_lower_mnt_idx(dentry, bindex, lower_mnt);
6365 +
6366 +               goto out;
6367 +       }
6368 +
6369 +       /* if we're revalidating a positive dentry, don't make it negative */
6370 +       if (lookupmode != INTERPOSE_REVAL)
6371 +               d_add(dentry, NULL);
6372 +
6373 +       goto out;
6374 +
6375 +out_positive:
6376 +       /*** handle POSITIVE dentries ***/
6377 +
6378 +       /*
6379 +        * This unionfs dentry is positive (at least one lower inode
6380 +        * exists), so scan entire dentry from beginning to end, and remove
6381 +        * any negative lower dentries, if any.  Then, update dbstart/dbend
6382 +        * to reflect the start/end of positive dentries.
6383 +        */
6384 +       pos_start = pos_end = -1;
6385 +       for (bindex = bstart; bindex <= bend; bindex++) {
6386 +               lower_dentry = unionfs_lower_dentry_idx(dentry,
6387 +                                                       bindex);
6388 +               if (lower_dentry && lower_dentry->d_inode) {
6389 +                       if (pos_start < 0)
6390 +                               pos_start = bindex;
6391 +                       if (bindex > pos_end)
6392 +                               pos_end = bindex;
6393 +                       continue;
6394 +               }
6395 +               path_put_lowers(dentry, bindex, bindex, false);
6396 +       }
6397 +       if (pos_start >= 0)
6398 +               dbstart(dentry) = pos_start;
6399 +       if (pos_end >= 0)
6400 +               dbend(dentry) = pos_end;
6401 +
6402 +       /* Partial lookups need to re-interpose, or throw away older negs. */
6403 +       if (lookupmode == INTERPOSE_PARTIAL) {
6404 +               if (dentry->d_inode) {
6405 +                       unionfs_reinterpose(dentry);
6406 +                       goto out;
6407 +               }
6408 +
6409 +               /*
6410 +                * This dentry was positive, so it is as if we had a
6411 +                * negative revalidation.
6412 +                */
6413 +               lookupmode = INTERPOSE_REVAL_NEG;
6414 +               update_bstart(dentry);
6415 +       }
6416 +
6417 +       /*
6418 +        * Interpose can return a dentry if d_splice returned a different
6419 +        * dentry.
6420 +        */
6421 +       d_interposed = unionfs_interpose(dentry, dentry->d_sb, lookupmode);
6422 +       if (IS_ERR(d_interposed))
6423 +               err = PTR_ERR(d_interposed);
6424 +       else if (d_interposed)
6425 +               dentry = d_interposed;
6426 +
6427 +       if (!err)
6428 +               goto out;
6429 +       d_drop(dentry);
6430 +
6431 +out_free:
6432 +       /* should dput/mntput all the underlying dentries on error condition */
6433 +       if (dbstart(dentry) >= 0)
6434 +               path_put_lowers_all(dentry, false);
6435 +       /* free lower_paths unconditionally */
6436 +       kfree(UNIONFS_D(dentry)->lower_paths);
6437 +       UNIONFS_D(dentry)->lower_paths = NULL;
6438 +
6439 +out:
6440 +       if (dentry && UNIONFS_D(dentry)) {
6441 +               BUG_ON(dbstart(dentry) < 0 && dbend(dentry) >= 0);
6442 +               BUG_ON(dbstart(dentry) >= 0 && dbend(dentry) < 0);
6443 +       }
6444 +       if (d_interposed && UNIONFS_D(d_interposed)) {
6445 +               BUG_ON(dbstart(d_interposed) < 0 && dbend(d_interposed) >= 0);
6446 +               BUG_ON(dbstart(d_interposed) >= 0 && dbend(d_interposed) < 0);
6447 +       }
6448 +
6449 +       if (!err && d_interposed)
6450 +               return d_interposed;
6451 +       return ERR_PTR(err);
6452 +}
6453 --- /dev/null
6454 +++ kernel-2.6.28/fs/unionfs/main.c
6455 @@ -0,0 +1,758 @@
6456 +/*
6457 + * Copyright (c) 2003-2009 Erez Zadok
6458 + * Copyright (c) 2003-2006 Charles P. Wright
6459 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
6460 + * Copyright (c) 2005-2006 Junjiro Okajima
6461 + * Copyright (c) 2005      Arun M. Krishnakumar
6462 + * Copyright (c) 2004-2006 David P. Quigley
6463 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
6464 + * Copyright (c) 2003      Puja Gupta
6465 + * Copyright (c) 2003      Harikesavan Krishnan
6466 + * Copyright (c) 2003-2009 Stony Brook University
6467 + * Copyright (c) 2003-2009 The Research Foundation of SUNY
6468 + *
6469 + * This program is free software; you can redistribute it and/or modify
6470 + * it under the terms of the GNU General Public License version 2 as
6471 + * published by the Free Software Foundation.
6472 + */
6473 +
6474 +#include "union.h"
6475 +#include <linux/module.h>
6476 +#include <linux/moduleparam.h>
6477 +
6478 +static void unionfs_fill_inode(struct dentry *dentry,
6479 +                              struct inode *inode)
6480 +{
6481 +       struct inode *lower_inode;
6482 +       struct dentry *lower_dentry;
6483 +       int bindex, bstart, bend;
6484 +
6485 +       bstart = dbstart(dentry);
6486 +       bend = dbend(dentry);
6487 +
6488 +       for (bindex = bstart; bindex <= bend; bindex++) {
6489 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6490 +               if (!lower_dentry) {
6491 +                       unionfs_set_lower_inode_idx(inode, bindex, NULL);
6492 +                       continue;
6493 +               }
6494 +
6495 +               /* Initialize the lower inode to the new lower inode. */
6496 +               if (!lower_dentry->d_inode)
6497 +                       continue;
6498 +
6499 +               unionfs_set_lower_inode_idx(inode, bindex,
6500 +                                           igrab(lower_dentry->d_inode));
6501 +       }
6502 +
6503 +       ibstart(inode) = dbstart(dentry);
6504 +       ibend(inode) = dbend(dentry);
6505 +
6506 +       /* Use attributes from the first branch. */
6507 +       lower_inode = unionfs_lower_inode(inode);
6508 +
6509 +       /* Use different set of inode ops for symlinks & directories */
6510 +       if (S_ISLNK(lower_inode->i_mode))
6511 +               inode->i_op = &unionfs_symlink_iops;
6512 +       else if (S_ISDIR(lower_inode->i_mode))
6513 +               inode->i_op = &unionfs_dir_iops;
6514 +
6515 +       /* Use different set of file ops for directories */
6516 +       if (S_ISDIR(lower_inode->i_mode))
6517 +               inode->i_fop = &unionfs_dir_fops;
6518 +
6519 +       /* properly initialize special inodes */
6520 +       if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) ||
6521 +           S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode))
6522 +               init_special_inode(inode, lower_inode->i_mode,
6523 +                                  lower_inode->i_rdev);
6524 +
6525 +       /* all well, copy inode attributes */
6526 +       unionfs_copy_attr_all(inode, lower_inode);
6527 +       fsstack_copy_inode_size(inode, lower_inode);
6528 +}
6529 +
6530 +/*
6531 + * Connect a unionfs inode dentry/inode with several lower ones.  This is
6532 + * the classic stackable file system "vnode interposition" action.
6533 + *
6534 + * @sb: unionfs's super_block
6535 + */
6536 +struct dentry *unionfs_interpose(struct dentry *dentry, struct super_block *sb,
6537 +                                int flag)
6538 +{
6539 +       int err = 0;
6540 +       struct inode *inode;
6541 +       int need_fill_inode = 1;
6542 +       struct dentry *spliced = NULL;
6543 +
6544 +       verify_locked(dentry);
6545 +
6546 +       /*
6547 +        * We allocate our new inode below by calling unionfs_iget,
6548 +        * which will initialize some of the new inode's fields
6549 +        */
6550 +
6551 +       /*
6552 +        * On revalidate we've already got our own inode and just need
6553 +        * to fix it up.
6554 +        */
6555 +       if (flag == INTERPOSE_REVAL) {
6556 +               inode = dentry->d_inode;
6557 +               UNIONFS_I(inode)->bstart = -1;
6558 +               UNIONFS_I(inode)->bend = -1;
6559 +               atomic_set(&UNIONFS_I(inode)->generation,
6560 +                          atomic_read(&UNIONFS_SB(sb)->generation));
6561 +
6562 +               UNIONFS_I(inode)->lower_inodes =
6563 +                       kcalloc(sbmax(sb), sizeof(struct inode *), GFP_KERNEL);
6564 +               if (unlikely(!UNIONFS_I(inode)->lower_inodes)) {
6565 +                       err = -ENOMEM;
6566 +                       goto out;
6567 +               }
6568 +       } else {
6569 +               /* get unique inode number for unionfs */
6570 +               inode = unionfs_iget(sb, iunique(sb, UNIONFS_ROOT_INO));
6571 +               if (IS_ERR(inode)) {
6572 +                       err = PTR_ERR(inode);
6573 +                       goto out;
6574 +               }
6575 +               if (atomic_read(&inode->i_count) > 1)
6576 +                       goto skip;
6577 +       }
6578 +
6579 +       need_fill_inode = 0;
6580 +       unionfs_fill_inode(dentry, inode);
6581 +
6582 +skip:
6583 +       /* only (our) lookup wants to do a d_add */
6584 +       switch (flag) {
6585 +       case INTERPOSE_DEFAULT:
6586 +               /* for operations which create new inodes */
6587 +               d_add(dentry, inode);
6588 +               break;
6589 +       case INTERPOSE_REVAL_NEG:
6590 +               d_instantiate(dentry, inode);
6591 +               break;
6592 +       case INTERPOSE_LOOKUP:
6593 +               spliced = d_splice_alias(inode, dentry);
6594 +               if (spliced && spliced != dentry) {
6595 +                       /*
6596 +                        * d_splice can return a dentry if it was
6597 +                        * disconnected and had to be moved.  We must ensure
6598 +                        * that the private data of the new dentry is
6599 +                        * correct and that the inode info was filled
6600 +                        * properly.  Finally we must return this new
6601 +                        * dentry.
6602 +                        */
6603 +                       spliced->d_op = &unionfs_dops;
6604 +                       spliced->d_fsdata = dentry->d_fsdata;
6605 +                       dentry->d_fsdata = NULL;
6606 +                       dentry = spliced;
6607 +                       if (need_fill_inode) {
6608 +                               need_fill_inode = 0;
6609 +                               unionfs_fill_inode(dentry, inode);
6610 +                       }
6611 +                       goto out_spliced;
6612 +               } else if (!spliced) {
6613 +                       if (need_fill_inode) {
6614 +                               need_fill_inode = 0;
6615 +                               unionfs_fill_inode(dentry, inode);
6616 +                               goto out_spliced;
6617 +                       }
6618 +               }
6619 +               break;
6620 +       case INTERPOSE_REVAL:
6621 +               /* Do nothing. */
6622 +               break;
6623 +       default:
6624 +               printk(KERN_CRIT "unionfs: invalid interpose flag passed!\n");
6625 +               BUG();
6626 +       }
6627 +       goto out;
6628 +
6629 +out_spliced:
6630 +       if (!err)
6631 +               return spliced;
6632 +out:
6633 +       return ERR_PTR(err);
6634 +}
6635 +
6636 +/* like interpose above, but for an already existing dentry */
6637 +void unionfs_reinterpose(struct dentry *dentry)
6638 +{
6639 +       struct dentry *lower_dentry;
6640 +       struct inode *inode;
6641 +       int bindex, bstart, bend;
6642 +
6643 +       verify_locked(dentry);
6644 +
6645 +       /* This is pre-allocated inode */
6646 +       inode = dentry->d_inode;
6647 +
6648 +       bstart = dbstart(dentry);
6649 +       bend = dbend(dentry);
6650 +       for (bindex = bstart; bindex <= bend; bindex++) {
6651 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6652 +               if (!lower_dentry)
6653 +                       continue;
6654 +
6655 +               if (!lower_dentry->d_inode)
6656 +                       continue;
6657 +               if (unionfs_lower_inode_idx(inode, bindex))
6658 +                       continue;
6659 +               unionfs_set_lower_inode_idx(inode, bindex,
6660 +                                           igrab(lower_dentry->d_inode));
6661 +       }
6662 +       ibstart(inode) = dbstart(dentry);
6663 +       ibend(inode) = dbend(dentry);
6664 +}
6665 +
6666 +/*
6667 + * make sure the branch we just looked up (nd) makes sense:
6668 + *
6669 + * 1) we're not trying to stack unionfs on top of unionfs
6670 + * 2) it exists
6671 + * 3) is a directory
6672 + */
6673 +int check_branch(struct nameidata *nd)
6674 +{
6675 +       /* XXX: remove in ODF code -- stacking unions allowed there */
6676 +       if (!strcmp(nd->path.dentry->d_sb->s_type->name, UNIONFS_NAME))
6677 +               return -EINVAL;
6678 +       if (!nd->path.dentry->d_inode)
6679 +               return -ENOENT;
6680 +       if (!S_ISDIR(nd->path.dentry->d_inode->i_mode))
6681 +               return -ENOTDIR;
6682 +       return 0;
6683 +}
6684 +
6685 +/* checks if two lower_dentries have overlapping branches */
6686 +static int is_branch_overlap(struct dentry *dent1, struct dentry *dent2)
6687 +{
6688 +       struct dentry *dent = NULL;
6689 +
6690 +       dent = dent1;
6691 +       while ((dent != dent2) && (dent->d_parent != dent))
6692 +               dent = dent->d_parent;
6693 +
6694 +       if (dent == dent2)
6695 +               return 1;
6696 +
6697 +       dent = dent2;
6698 +       while ((dent != dent1) && (dent->d_parent != dent))
6699 +               dent = dent->d_parent;
6700 +
6701 +       return (dent == dent1);
6702 +}
6703 +
6704 +/*
6705 + * Parse "ro" or "rw" options, but default to "rw" if no mode options was
6706 + * specified.  Fill the mode bits in @perms.  If encounter an unknown
6707 + * string, return -EINVAL.  Otherwise return 0.
6708 + */
6709 +int parse_branch_mode(const char *name, int *perms)
6710 +{
6711 +       if (!name || !strcmp(name, "rw")) {
6712 +               *perms = MAY_READ | MAY_WRITE;
6713 +               return 0;
6714 +       }
6715 +       if (!strcmp(name, "ro")) {
6716 +               *perms = MAY_READ;
6717 +               return 0;
6718 +       }
6719 +       return -EINVAL;
6720 +}
6721 +
6722 +/*
6723 + * parse the dirs= mount argument
6724 + *
6725 + * We don't need to lock the superblock private data's rwsem, as we get
6726 + * called only by unionfs_read_super - it is still a long time before anyone
6727 + * can even get a reference to us.
6728 + */
6729 +static int parse_dirs_option(struct super_block *sb, struct unionfs_dentry_info
6730 +                            *lower_root_info, char *options)
6731 +{
6732 +       struct nameidata nd;
6733 +       char *name;
6734 +       int err = 0;
6735 +       int branches = 1;
6736 +       int bindex = 0;
6737 +       int i = 0;
6738 +       int j = 0;
6739 +       struct dentry *dent1;
6740 +       struct dentry *dent2;
6741 +
6742 +       if (options[0] == '\0') {
6743 +               printk(KERN_ERR "unionfs: no branches specified\n");
6744 +               err = -EINVAL;
6745 +               goto out;
6746 +       }
6747 +
6748 +       /*
6749 +        * Each colon means we have a separator, this is really just a rough
6750 +        * guess, since strsep will handle empty fields for us.
6751 +        */
6752 +       for (i = 0; options[i]; i++)
6753 +               if (options[i] == ':')
6754 +                       branches++;
6755 +
6756 +       /* allocate space for underlying pointers to lower dentry */
6757 +       UNIONFS_SB(sb)->data =
6758 +               kcalloc(branches, sizeof(struct unionfs_data), GFP_KERNEL);
6759 +       if (unlikely(!UNIONFS_SB(sb)->data)) {
6760 +               err = -ENOMEM;
6761 +               goto out;
6762 +       }
6763 +
6764 +       lower_root_info->lower_paths =
6765 +               kcalloc(branches, sizeof(struct path), GFP_KERNEL);
6766 +       if (unlikely(!lower_root_info->lower_paths)) {
6767 +               err = -ENOMEM;
6768 +               goto out;
6769 +       }
6770 +
6771 +       /* now parsing a string such as "b1:b2=rw:b3=ro:b4" */
6772 +       branches = 0;
6773 +       while ((name = strsep(&options, ":")) != NULL) {
6774 +               int perms;
6775 +               char *mode = strchr(name, '=');
6776 +
6777 +               if (!name)
6778 +                       continue;
6779 +               if (!*name) {   /* bad use of ':' (extra colons) */
6780 +                       err = -EINVAL;
6781 +                       goto out;
6782 +               }
6783 +
6784 +               branches++;
6785 +
6786 +               /* strip off '=' if any */
6787 +               if (mode)
6788 +                       *mode++ = '\0';
6789 +
6790 +               err = parse_branch_mode(mode, &perms);
6791 +               if (err) {
6792 +                       printk(KERN_ERR "unionfs: invalid mode \"%s\" for "
6793 +                              "branch %d\n", mode, bindex);
6794 +                       goto out;
6795 +               }
6796 +               /* ensure that leftmost branch is writeable */
6797 +               if (!bindex && !(perms & MAY_WRITE)) {
6798 +                       printk(KERN_ERR "unionfs: leftmost branch cannot be "
6799 +                              "read-only (use \"-o ro\" to create a "
6800 +                              "read-only union)\n");
6801 +                       err = -EINVAL;
6802 +                       goto out;
6803 +               }
6804 +
6805 +               err = path_lookup(name, LOOKUP_FOLLOW, &nd);
6806 +               if (err) {
6807 +                       printk(KERN_ERR "unionfs: error accessing "
6808 +                              "lower directory '%s' (error %d)\n",
6809 +                              name, err);
6810 +                       goto out;
6811 +               }
6812 +
6813 +               err = check_branch(&nd);
6814 +               if (err) {
6815 +                       printk(KERN_ERR "unionfs: lower directory "
6816 +                              "'%s' is not a valid branch\n", name);
6817 +                       path_put(&nd.path);
6818 +                       goto out;
6819 +               }
6820 +
6821 +               lower_root_info->lower_paths[bindex].dentry = nd.path.dentry;
6822 +               lower_root_info->lower_paths[bindex].mnt = nd.path.mnt;
6823 +
6824 +               set_branchperms(sb, bindex, perms);
6825 +               set_branch_count(sb, bindex, 0);
6826 +               new_branch_id(sb, bindex);
6827 +
6828 +               if (lower_root_info->bstart < 0)
6829 +                       lower_root_info->bstart = bindex;
6830 +               lower_root_info->bend = bindex;
6831 +               bindex++;
6832 +       }
6833 +
6834 +       if (branches == 0) {
6835 +               printk(KERN_ERR "unionfs: no branches specified\n");
6836 +               err = -EINVAL;
6837 +               goto out;
6838 +       }
6839 +
6840 +       BUG_ON(branches != (lower_root_info->bend + 1));
6841 +
6842 +       /*
6843 +        * Ensure that no overlaps exist in the branches.
6844 +        *
6845 +        * This test is required because the Linux kernel has no support
6846 +        * currently for ensuring coherency between stackable layers and
6847 +        * branches.  If we were to allow overlapping branches, it would be
6848 +        * possible, for example, to delete a file via one branch, which
6849 +        * would not be reflected in another branch.  Such incoherency could
6850 +        * lead to inconsistencies and even kernel oopses.  Rather than
6851 +        * implement hacks to work around some of these cache-coherency
6852 +        * problems, we prevent branch overlapping, for now.  A complete
6853 +        * solution will involve proper kernel/VFS support for cache
6854 +        * coherency, at which time we could safely remove this
6855 +        * branch-overlapping test.
6856 +        */
6857 +       for (i = 0; i < branches; i++) {
6858 +               dent1 = lower_root_info->lower_paths[i].dentry;
6859 +               for (j = i + 1; j < branches; j++) {
6860 +                       dent2 = lower_root_info->lower_paths[j].dentry;
6861 +                       if (is_branch_overlap(dent1, dent2)) {
6862 +                               printk(KERN_ERR "unionfs: branches %d and "
6863 +                                      "%d overlap\n", i, j);
6864 +                               err = -EINVAL;
6865 +                               goto out;
6866 +                       }
6867 +               }
6868 +       }
6869 +
6870 +out:
6871 +       if (err) {
6872 +               for (i = 0; i < branches; i++)
6873 +                       path_put(&lower_root_info->lower_paths[i]);
6874 +
6875 +               kfree(lower_root_info->lower_paths);
6876 +               kfree(UNIONFS_SB(sb)->data);
6877 +
6878 +               /*
6879 +                * MUST clear the pointers to prevent potential double free if
6880 +                * the caller dies later on
6881 +                */
6882 +               lower_root_info->lower_paths = NULL;
6883 +               UNIONFS_SB(sb)->data = NULL;
6884 +       }
6885 +       return err;
6886 +}
6887 +
6888 +/*
6889 + * Parse mount options.  See the manual page for usage instructions.
6890 + *
6891 + * Returns the dentry object of the lower-level (lower) directory;
6892 + * We want to mount our stackable file system on top of that lower directory.
6893 + */
6894 +static struct unionfs_dentry_info *unionfs_parse_options(
6895 +                                        struct super_block *sb,
6896 +                                        char *options)
6897 +{
6898 +       struct unionfs_dentry_info *lower_root_info;
6899 +       char *optname;
6900 +       int err = 0;
6901 +       int bindex;
6902 +       int dirsfound = 0;
6903 +
6904 +       /* allocate private data area */
6905 +       err = -ENOMEM;
6906 +       lower_root_info =
6907 +               kzalloc(sizeof(struct unionfs_dentry_info), GFP_KERNEL);
6908 +       if (unlikely(!lower_root_info))
6909 +               goto out_error;
6910 +       lower_root_info->bstart = -1;
6911 +       lower_root_info->bend = -1;
6912 +       lower_root_info->bopaque = -1;
6913 +
6914 +       while ((optname = strsep(&options, ",")) != NULL) {
6915 +               char *optarg;
6916 +
6917 +               if (!optname || !*optname)
6918 +                       continue;
6919 +
6920 +               optarg = strchr(optname, '=');
6921 +               if (optarg)
6922 +                       *optarg++ = '\0';
6923 +
6924 +               /*
6925 +                * All of our options take an argument now. Insert ones that
6926 +                * don't, above this check.
6927 +                */
6928 +               if (!optarg) {
6929 +                       printk(KERN_ERR "unionfs: %s requires an argument\n",
6930 +                              optname);
6931 +                       err = -EINVAL;
6932 +                       goto out_error;
6933 +               }
6934 +
6935 +               if (!strcmp("dirs", optname)) {
6936 +                       if (++dirsfound > 1) {
6937 +                               printk(KERN_ERR
6938 +                                      "unionfs: multiple dirs specified\n");
6939 +                               err = -EINVAL;
6940 +                               goto out_error;
6941 +                       }
6942 +                       err = parse_dirs_option(sb, lower_root_info, optarg);
6943 +                       if (err)
6944 +                               goto out_error;
6945 +                       continue;
6946 +               }
6947 +
6948 +               err = -EINVAL;
6949 +               printk(KERN_ERR
6950 +                      "unionfs: unrecognized option '%s'\n", optname);
6951 +               goto out_error;
6952 +       }
6953 +       if (dirsfound != 1) {
6954 +               printk(KERN_ERR "unionfs: dirs option required\n");
6955 +               err = -EINVAL;
6956 +               goto out_error;
6957 +       }
6958 +       goto out;
6959 +
6960 +out_error:
6961 +       if (lower_root_info && lower_root_info->lower_paths) {
6962 +               for (bindex = lower_root_info->bstart;
6963 +                    bindex >= 0 && bindex <= lower_root_info->bend;
6964 +                    bindex++)
6965 +                       path_put(&lower_root_info->lower_paths[bindex]);
6966 +       }
6967 +
6968 +       kfree(lower_root_info->lower_paths);
6969 +       kfree(lower_root_info);
6970 +
6971 +       kfree(UNIONFS_SB(sb)->data);
6972 +       UNIONFS_SB(sb)->data = NULL;
6973 +
6974 +       lower_root_info = ERR_PTR(err);
6975 +out:
6976 +       return lower_root_info;
6977 +}
6978 +
6979 +/*
6980 + * our custom d_alloc_root work-alike
6981 + *
6982 + * we can't use d_alloc_root if we want to use our own interpose function
6983 + * unchanged, so we simply call our own "fake" d_alloc_root
6984 + */
6985 +static struct dentry *unionfs_d_alloc_root(struct super_block *sb)
6986 +{
6987 +       struct dentry *ret = NULL;
6988 +
6989 +       if (sb) {
6990 +               static const struct qstr name = {
6991 +                       .name = "/",
6992 +                       .len = 1
6993 +               };
6994 +
6995 +               ret = d_alloc(NULL, &name);
6996 +               if (likely(ret)) {
6997 +                       ret->d_op = &unionfs_dops;
6998 +                       ret->d_sb = sb;
6999 +                       ret->d_parent = ret;
7000 +               }
7001 +       }
7002 +       return ret;
7003 +}
7004 +
7005 +/*
7006 + * There is no need to lock the unionfs_super_info's rwsem as there is no
7007 + * way anyone can have a reference to the superblock at this point in time.
7008 + */
7009 +static int unionfs_read_super(struct super_block *sb, void *raw_data,
7010 +                             int silent)
7011 +{
7012 +       int err = 0;
7013 +       struct unionfs_dentry_info *lower_root_info = NULL;
7014 +       int bindex, bstart, bend;
7015 +
7016 +       if (!raw_data) {
7017 +               printk(KERN_ERR
7018 +                      "unionfs: read_super: missing data argument\n");
7019 +               err = -EINVAL;
7020 +               goto out;
7021 +       }
7022 +
7023 +       /* Allocate superblock private data */
7024 +       sb->s_fs_info = kzalloc(sizeof(struct unionfs_sb_info), GFP_KERNEL);
7025 +       if (unlikely(!UNIONFS_SB(sb))) {
7026 +               printk(KERN_CRIT "unionfs: read_super: out of memory\n");
7027 +               err = -ENOMEM;
7028 +               goto out;
7029 +       }
7030 +
7031 +       UNIONFS_SB(sb)->bend = -1;
7032 +       atomic_set(&UNIONFS_SB(sb)->generation, 1);
7033 +       init_rwsem(&UNIONFS_SB(sb)->rwsem);
7034 +       UNIONFS_SB(sb)->high_branch_id = -1; /* -1 == invalid branch ID */
7035 +
7036 +       lower_root_info = unionfs_parse_options(sb, raw_data);
7037 +       if (IS_ERR(lower_root_info)) {
7038 +               printk(KERN_ERR
7039 +                      "unionfs: read_super: error while parsing options "
7040 +                      "(err = %ld)\n", PTR_ERR(lower_root_info));
7041 +               err = PTR_ERR(lower_root_info);
7042 +               lower_root_info = NULL;
7043 +               goto out_free;
7044 +       }
7045 +       if (lower_root_info->bstart == -1) {
7046 +               err = -ENOENT;
7047 +               goto out_free;
7048 +       }
7049 +
7050 +       /* set the lower superblock field of upper superblock */
7051 +       bstart = lower_root_info->bstart;
7052 +       BUG_ON(bstart != 0);
7053 +       sbend(sb) = bend = lower_root_info->bend;
7054 +       for (bindex = bstart; bindex <= bend; bindex++) {
7055 +               struct dentry *d = lower_root_info->lower_paths[bindex].dentry;
7056 +               atomic_inc(&d->d_sb->s_active);
7057 +               unionfs_set_lower_super_idx(sb, bindex, d->d_sb);
7058 +       }
7059 +
7060 +       /* max Bytes is the maximum bytes from highest priority branch */
7061 +       sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes;
7062 +
7063 +       /*
7064 +        * Our c/m/atime granularity is 1 ns because we may stack on file
7065 +        * systems whose granularity is as good.  This is important for our
7066 +        * time-based cache coherency.
7067 +        */
7068 +       sb->s_time_gran = 1;
7069 +
7070 +       sb->s_op = &unionfs_sops;
7071 +
7072 +       /* See comment next to the definition of unionfs_d_alloc_root */
7073 +       sb->s_root = unionfs_d_alloc_root(sb);
7074 +       if (unlikely(!sb->s_root)) {
7075 +               err = -ENOMEM;
7076 +               goto out_dput;
7077 +       }
7078 +
7079 +       /* link the upper and lower dentries */
7080 +       sb->s_root->d_fsdata = NULL;
7081 +       err = new_dentry_private_data(sb->s_root, UNIONFS_DMUTEX_ROOT);
7082 +       if (unlikely(err))
7083 +               goto out_freedpd;
7084 +
7085 +       /* Set the lower dentries for s_root */
7086 +       for (bindex = bstart; bindex <= bend; bindex++) {
7087 +               struct dentry *d;
7088 +               struct vfsmount *m;
7089 +
7090 +               d = lower_root_info->lower_paths[bindex].dentry;
7091 +               m = lower_root_info->lower_paths[bindex].mnt;
7092 +
7093 +               unionfs_set_lower_dentry_idx(sb->s_root, bindex, d);
7094 +               unionfs_set_lower_mnt_idx(sb->s_root, bindex, m);
7095 +       }
7096 +       dbstart(sb->s_root) = bstart;
7097 +       dbend(sb->s_root) = bend;
7098 +
7099 +       /* Set the generation number to one, since this is for the mount. */
7100 +       atomic_set(&UNIONFS_D(sb->s_root)->generation, 1);
7101 +
7102 +       /*
7103 +        * Call interpose to create the upper level inode.  Only
7104 +        * INTERPOSE_LOOKUP can return a value other than 0 on err.
7105 +        */
7106 +       err = PTR_ERR(unionfs_interpose(sb->s_root, sb, 0));
7107 +       unionfs_unlock_dentry(sb->s_root);
7108 +       if (!err)
7109 +               goto out;
7110 +       /* else fall through */
7111 +
7112 +out_freedpd:
7113 +       if (UNIONFS_D(sb->s_root)) {
7114 +               kfree(UNIONFS_D(sb->s_root)->lower_paths);
7115 +               free_dentry_private_data(sb->s_root);
7116 +       }
7117 +       dput(sb->s_root);
7118 +
7119 +out_dput:
7120 +       if (lower_root_info && !IS_ERR(lower_root_info)) {
7121 +               for (bindex = lower_root_info->bstart;
7122 +                    bindex <= lower_root_info->bend; bindex++) {
7123 +                       struct dentry *d;
7124 +                       d = lower_root_info->lower_paths[bindex].dentry;
7125 +                       /* drop refs we took earlier */
7126 +                       atomic_dec(&d->d_sb->s_active);
7127 +                       path_put(&lower_root_info->lower_paths[bindex]);
7128 +               }
7129 +               kfree(lower_root_info->lower_paths);
7130 +               kfree(lower_root_info);
7131 +               lower_root_info = NULL;
7132 +       }
7133 +
7134 +out_free:
7135 +       kfree(UNIONFS_SB(sb)->data);
7136 +       kfree(UNIONFS_SB(sb));
7137 +       sb->s_fs_info = NULL;
7138 +
7139 +out:
7140 +       if (lower_root_info && !IS_ERR(lower_root_info)) {
7141 +               kfree(lower_root_info->lower_paths);
7142 +               kfree(lower_root_info);
7143 +       }
7144 +       return err;
7145 +}
7146 +
7147 +static int unionfs_get_sb(struct file_system_type *fs_type,
7148 +                         int flags, const char *dev_name,
7149 +                         void *raw_data, struct vfsmount *mnt)
7150 +{
7151 +       int err;
7152 +       err = get_sb_nodev(fs_type, flags, raw_data, unionfs_read_super, mnt);
7153 +       if (!err)
7154 +               UNIONFS_SB(mnt->mnt_sb)->dev_name =
7155 +                       kstrdup(dev_name, GFP_KERNEL);
7156 +       return err;
7157 +}
7158 +
7159 +static struct file_system_type unionfs_fs_type = {
7160 +       .owner          = THIS_MODULE,
7161 +       .name           = UNIONFS_NAME,
7162 +       .get_sb         = unionfs_get_sb,
7163 +       .kill_sb        = generic_shutdown_super,
7164 +       .fs_flags       = FS_REVAL_DOT,
7165 +};
7166 +
7167 +static int __init init_unionfs_fs(void)
7168 +{
7169 +       int err;
7170 +
7171 +       pr_info("Registering unionfs " UNIONFS_VERSION "\n");
7172 +
7173 +       err = unionfs_init_filldir_cache();
7174 +       if (unlikely(err))
7175 +               goto out;
7176 +       err = unionfs_init_inode_cache();
7177 +       if (unlikely(err))
7178 +               goto out;
7179 +       err = unionfs_init_dentry_cache();
7180 +       if (unlikely(err))
7181 +               goto out;
7182 +       err = init_sioq();
7183 +       if (unlikely(err))
7184 +               goto out;
7185 +       err = register_filesystem(&unionfs_fs_type);
7186 +out:
7187 +       if (unlikely(err)) {
7188 +               stop_sioq();
7189 +               unionfs_destroy_filldir_cache();
7190 +               unionfs_destroy_inode_cache();
7191 +               unionfs_destroy_dentry_cache();
7192 +       }
7193 +       return err;
7194 +}
7195 +
7196 +static void __exit exit_unionfs_fs(void)
7197 +{
7198 +       stop_sioq();
7199 +       unionfs_destroy_filldir_cache();
7200 +       unionfs_destroy_inode_cache();
7201 +       unionfs_destroy_dentry_cache();
7202 +       unregister_filesystem(&unionfs_fs_type);
7203 +       pr_info("Completed unionfs module unload\n");
7204 +}
7205 +
7206 +MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University"
7207 +             " (http://www.fsl.cs.sunysb.edu)");
7208 +MODULE_DESCRIPTION("Unionfs " UNIONFS_VERSION
7209 +                  " (http://unionfs.filesystems.org)");
7210 +MODULE_LICENSE("GPL");
7211 +
7212 +module_init(init_unionfs_fs);
7213 +module_exit(exit_unionfs_fs);
7214 --- /dev/null
7215 +++ kernel-2.6.28/fs/unionfs/mmap.c
7216 @@ -0,0 +1,89 @@
7217 +/*
7218 + * Copyright (c) 2003-2009 Erez Zadok
7219 + * Copyright (c) 2003-2006 Charles P. Wright
7220 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7221 + * Copyright (c) 2005-2006 Junjiro Okajima
7222 + * Copyright (c) 2006      Shaya Potter
7223 + * Copyright (c) 2005      Arun M. Krishnakumar
7224 + * Copyright (c) 2004-2006 David P. Quigley
7225 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7226 + * Copyright (c) 2003      Puja Gupta
7227 + * Copyright (c) 2003      Harikesavan Krishnan
7228 + * Copyright (c) 2003-2009 Stony Brook University
7229 + * Copyright (c) 2003-2009 The Research Foundation of SUNY
7230 + *
7231 + * This program is free software; you can redistribute it and/or modify
7232 + * it under the terms of the GNU General Public License version 2 as
7233 + * published by the Free Software Foundation.
7234 + */
7235 +
7236 +#include "union.h"
7237 +
7238 +
7239 +/*
7240 + * XXX: we need a dummy readpage handler because generic_file_mmap (which we
7241 + * use in unionfs_mmap) checks for the existence of
7242 + * mapping->a_ops->readpage, else it returns -ENOEXEC.  The VFS will need to
7243 + * be fixed to allow a file system to define vm_ops->fault without any
7244 + * address_space_ops whatsoever.
7245 + *
7246 + * Otherwise, we don't want to use our readpage method at all.
7247 + */
7248 +static int unionfs_readpage(struct file *file, struct page *page)
7249 +{
7250 +       BUG();
7251 +       return -EINVAL;
7252 +}
7253 +
7254 +static int unionfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
7255 +{
7256 +       int err;
7257 +       struct file *file, *lower_file;
7258 +       struct vm_operations_struct *lower_vm_ops;
7259 +       struct vm_area_struct lower_vma;
7260 +
7261 +       BUG_ON(!vma);
7262 +       memcpy(&lower_vma, vma, sizeof(struct vm_area_struct));
7263 +       file = lower_vma.vm_file;
7264 +       lower_vm_ops = UNIONFS_F(file)->lower_vm_ops;
7265 +       BUG_ON(!lower_vm_ops);
7266 +
7267 +       lower_file = unionfs_lower_file(file);
7268 +       BUG_ON(!lower_file);
7269 +       /*
7270 +        * XXX: vm_ops->fault may be called in parallel.  Because we have to
7271 +        * resort to temporarily changing the vma->vm_file to point to the
7272 +        * lower file, a concurrent invocation of unionfs_fault could see a
7273 +        * different value.  In this workaround, we keep a different copy of
7274 +        * the vma structure in our stack, so we never expose a different
7275 +        * value of the vma->vm_file called to us, even temporarily.  A
7276 +        * better fix would be to change the calling semantics of ->fault to
7277 +        * take an explicit file pointer.
7278 +        */
7279 +       lower_vma.vm_file = lower_file;
7280 +       err = lower_vm_ops->fault(&lower_vma, vmf);
7281 +       return err;
7282 +}
7283 +
7284 +/*
7285 + * XXX: the default address_space_ops for unionfs is empty.  We cannot set
7286 + * our inode->i_mapping->a_ops to NULL because too many code paths expect
7287 + * the a_ops vector to be non-NULL.
7288 + */
7289 +struct address_space_operations unionfs_aops = {
7290 +       /* empty on purpose */
7291 +};
7292 +
7293 +/*
7294 + * XXX: we need a second, dummy address_space_ops vector, to be used
7295 + * temporarily during unionfs_mmap, because the latter calls
7296 + * generic_file_mmap, which checks if ->readpage exists, else returns
7297 + * -ENOEXEC.
7298 + */
7299 +struct address_space_operations unionfs_dummy_aops = {
7300 +       .readpage       = unionfs_readpage,
7301 +};
7302 +
7303 +struct vm_operations_struct unionfs_vm_ops = {
7304 +       .fault          = unionfs_fault,
7305 +};
7306 --- /dev/null
7307 +++ kernel-2.6.28/fs/unionfs/rdstate.c
7308 @@ -0,0 +1,285 @@
7309 +/*
7310 + * Copyright (c) 2003-2009 Erez Zadok
7311 + * Copyright (c) 2003-2006 Charles P. Wright
7312 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7313 + * Copyright (c) 2005-2006 Junjiro Okajima
7314 + * Copyright (c) 2005      Arun M. Krishnakumar
7315 + * Copyright (c) 2004-2006 David P. Quigley
7316 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7317 + * Copyright (c) 2003      Puja Gupta
7318 + * Copyright (c) 2003      Harikesavan Krishnan
7319 + * Copyright (c) 2003-2009 Stony Brook University
7320 + * Copyright (c) 2003-2009 The Research Foundation of SUNY
7321 + *
7322 + * This program is free software; you can redistribute it and/or modify
7323 + * it under the terms of the GNU General Public License version 2 as
7324 + * published by the Free Software Foundation.
7325 + */
7326 +
7327 +#include "union.h"
7328 +
7329 +/* This file contains the routines for maintaining readdir state. */
7330 +
7331 +/*
7332 + * There are two structures here, rdstate which is a hash table
7333 + * of the second structure which is a filldir_node.
7334 + */
7335 +
7336 +/*
7337 + * This is a struct kmem_cache for filldir nodes, because we allocate a lot
7338 + * of them and they shouldn't waste memory.  If the node has a small name
7339 + * (as defined by the dentry structure), then we use an inline name to
7340 + * preserve kmalloc space.
7341 + */
7342 +static struct kmem_cache *unionfs_filldir_cachep;
7343 +
7344 +int unionfs_init_filldir_cache(void)
7345 +{
7346 +       unionfs_filldir_cachep =
7347 +               kmem_cache_create("unionfs_filldir",
7348 +                                 sizeof(struct filldir_node), 0,
7349 +                                 SLAB_RECLAIM_ACCOUNT, NULL);
7350 +
7351 +       return (unionfs_filldir_cachep ? 0 : -ENOMEM);
7352 +}
7353 +
7354 +void unionfs_destroy_filldir_cache(void)
7355 +{
7356 +       if (unionfs_filldir_cachep)
7357 +               kmem_cache_destroy(unionfs_filldir_cachep);
7358 +}
7359 +
7360 +/*
7361 + * This is a tuning parameter that tells us roughly how big to make the
7362 + * hash table in directory entries per page.  This isn't perfect, but
7363 + * at least we get a hash table size that shouldn't be too overloaded.
7364 + * The following averages are based on my home directory.
7365 + * 14.44693    Overall
7366 + * 12.29       Single Page Directories
7367 + * 117.93      Multi-page directories
7368 + */
7369 +#define DENTPAGE 4096
7370 +#define DENTPERONEPAGE 12
7371 +#define DENTPERPAGE 118
7372 +#define MINHASHSIZE 1
7373 +static int guesstimate_hash_size(struct inode *inode)
7374 +{
7375 +       struct inode *lower_inode;
7376 +       int bindex;
7377 +       int hashsize = MINHASHSIZE;
7378 +
7379 +       if (UNIONFS_I(inode)->hashsize > 0)
7380 +               return UNIONFS_I(inode)->hashsize;
7381 +
7382 +       for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
7383 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
7384 +               if (!lower_inode)
7385 +                       continue;
7386 +
7387 +               if (i_size_read(lower_inode) == DENTPAGE)
7388 +                       hashsize += DENTPERONEPAGE;
7389 +               else
7390 +                       hashsize += (i_size_read(lower_inode) / DENTPAGE) *
7391 +                               DENTPERPAGE;
7392 +       }
7393 +
7394 +       return hashsize;
7395 +}
7396 +
7397 +int init_rdstate(struct file *file)
7398 +{
7399 +       BUG_ON(sizeof(loff_t) !=
7400 +              (sizeof(unsigned int) + sizeof(unsigned int)));
7401 +       BUG_ON(UNIONFS_F(file)->rdstate != NULL);
7402 +
7403 +       UNIONFS_F(file)->rdstate = alloc_rdstate(file->f_path.dentry->d_inode,
7404 +                                                fbstart(file));
7405 +
7406 +       return (UNIONFS_F(file)->rdstate ? 0 : -ENOMEM);
7407 +}
7408 +
7409 +struct unionfs_dir_state *find_rdstate(struct inode *inode, loff_t fpos)
7410 +{
7411 +       struct unionfs_dir_state *rdstate = NULL;
7412 +       struct list_head *pos;
7413 +
7414 +       spin_lock(&UNIONFS_I(inode)->rdlock);
7415 +       list_for_each(pos, &UNIONFS_I(inode)->readdircache) {
7416 +               struct unionfs_dir_state *r =
7417 +                       list_entry(pos, struct unionfs_dir_state, cache);
7418 +               if (fpos == rdstate2offset(r)) {
7419 +                       UNIONFS_I(inode)->rdcount--;
7420 +                       list_del(&r->cache);
7421 +                       rdstate = r;
7422 +                       break;
7423 +               }
7424 +       }
7425 +       spin_unlock(&UNIONFS_I(inode)->rdlock);
7426 +       return rdstate;
7427 +}
7428 +
7429 +struct unionfs_dir_state *alloc_rdstate(struct inode *inode, int bindex)
7430 +{
7431 +       int i = 0;
7432 +       int hashsize;
7433 +       unsigned long mallocsize = sizeof(struct unionfs_dir_state);
7434 +       struct unionfs_dir_state *rdstate;
7435 +
7436 +       hashsize = guesstimate_hash_size(inode);
7437 +       mallocsize += hashsize * sizeof(struct list_head);
7438 +       mallocsize = __roundup_pow_of_two(mallocsize);
7439 +
7440 +       /* This should give us about 500 entries anyway. */
7441 +       if (mallocsize > PAGE_SIZE)
7442 +               mallocsize = PAGE_SIZE;
7443 +
7444 +       hashsize = (mallocsize - sizeof(struct unionfs_dir_state)) /
7445 +               sizeof(struct list_head);
7446 +
7447 +       rdstate = kmalloc(mallocsize, GFP_KERNEL);
7448 +       if (unlikely(!rdstate))
7449 +               return NULL;
7450 +
7451 +       spin_lock(&UNIONFS_I(inode)->rdlock);
7452 +       if (UNIONFS_I(inode)->cookie >= (MAXRDCOOKIE - 1))
7453 +               UNIONFS_I(inode)->cookie = 1;
7454 +       else
7455 +               UNIONFS_I(inode)->cookie++;
7456 +
7457 +       rdstate->cookie = UNIONFS_I(inode)->cookie;
7458 +       spin_unlock(&UNIONFS_I(inode)->rdlock);
7459 +       rdstate->offset = 1;
7460 +       rdstate->access = jiffies;
7461 +       rdstate->bindex = bindex;
7462 +       rdstate->dirpos = 0;
7463 +       rdstate->hashentries = 0;
7464 +       rdstate->size = hashsize;
7465 +       for (i = 0; i < rdstate->size; i++)
7466 +               INIT_LIST_HEAD(&rdstate->list[i]);
7467 +
7468 +       return rdstate;
7469 +}
7470 +
7471 +static void free_filldir_node(struct filldir_node *node)
7472 +{
7473 +       if (node->namelen >= DNAME_INLINE_LEN_MIN)
7474 +               kfree(node->name);
7475 +       kmem_cache_free(unionfs_filldir_cachep, node);
7476 +}
7477 +
7478 +void free_rdstate(struct unionfs_dir_state *state)
7479 +{
7480 +       struct filldir_node *tmp;
7481 +       int i;
7482 +
7483 +       for (i = 0; i < state->size; i++) {
7484 +               struct list_head *head = &(state->list[i]);
7485 +               struct list_head *pos, *n;
7486 +
7487 +               /* traverse the list and deallocate space */
7488 +               list_for_each_safe(pos, n, head) {
7489 +                       tmp = list_entry(pos, struct filldir_node, file_list);
7490 +                       list_del(&tmp->file_list);
7491 +                       free_filldir_node(tmp);
7492 +               }
7493 +       }
7494 +
7495 +       kfree(state);
7496 +}
7497 +
7498 +struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate,
7499 +                                      const char *name, int namelen,
7500 +                                      int is_whiteout)
7501 +{
7502 +       int index;
7503 +       unsigned int hash;
7504 +       struct list_head *head;
7505 +       struct list_head *pos;
7506 +       struct filldir_node *cursor = NULL;
7507 +       int found = 0;
7508 +
7509 +       BUG_ON(namelen <= 0);
7510 +
7511 +       hash = full_name_hash(name, namelen);
7512 +       index = hash % rdstate->size;
7513 +
7514 +       head = &(rdstate->list[index]);
7515 +       list_for_each(pos, head) {
7516 +               cursor = list_entry(pos, struct filldir_node, file_list);
7517 +
7518 +               if (cursor->namelen == namelen && cursor->hash == hash &&
7519 +                   !strncmp(cursor->name, name, namelen)) {
7520 +                       /*
7521 +                        * a duplicate exists, and hence no need to create
7522 +                        * entry to the list
7523 +                        */
7524 +                       found = 1;
7525 +
7526 +                       /*
7527 +                        * if a duplicate is found in this branch, and is
7528 +                        * not due to the caller looking for an entry to
7529 +                        * whiteout, then the file system may be corrupted.
7530 +                        */
7531 +                       if (unlikely(!is_whiteout &&
7532 +                                    cursor->bindex == rdstate->bindex))
7533 +                               printk(KERN_ERR "unionfs: filldir: possible "
7534 +                                      "I/O error: a file is duplicated "
7535 +                                      "in the same branch %d: %s\n",
7536 +                                      rdstate->bindex, cursor->name);
7537 +                       break;
7538 +               }
7539 +       }
7540 +
7541 +       if (!found)
7542 +               cursor = NULL;
7543 +
7544 +       return cursor;
7545 +}
7546 +
7547 +int add_filldir_node(struct unionfs_dir_state *rdstate, const char *name,
7548 +                    int namelen, int bindex, int whiteout)
7549 +{
7550 +       struct filldir_node *new;
7551 +       unsigned int hash;
7552 +       int index;
7553 +       int err = 0;
7554 +       struct list_head *head;
7555 +
7556 +       BUG_ON(namelen <= 0);
7557 +
7558 +       hash = full_name_hash(name, namelen);
7559 +       index = hash % rdstate->size;
7560 +       head = &(rdstate->list[index]);
7561 +
7562 +       new = kmem_cache_alloc(unionfs_filldir_cachep, GFP_KERNEL);
7563 +       if (unlikely(!new)) {
7564 +               err = -ENOMEM;
7565 +               goto out;
7566 +       }
7567 +
7568 +       INIT_LIST_HEAD(&new->file_list);
7569 +       new->namelen = namelen;
7570 +       new->hash = hash;
7571 +       new->bindex = bindex;
7572 +       new->whiteout = whiteout;
7573 +
7574 +       if (namelen < DNAME_INLINE_LEN_MIN) {
7575 +               new->name = new->iname;
7576 +       } else {
7577 +               new->name = kmalloc(namelen + 1, GFP_KERNEL);
7578 +               if (unlikely(!new->name)) {
7579 +                       kmem_cache_free(unionfs_filldir_cachep, new);
7580 +                       new = NULL;
7581 +                       goto out;
7582 +               }
7583 +       }
7584 +
7585 +       memcpy(new->name, name, namelen);
7586 +       new->name[namelen] = '\0';
7587 +
7588 +       rdstate->hashentries++;
7589 +
7590 +       list_add(&(new->file_list), head);
7591 +out:
7592 +       return err;
7593 +}
7594 --- /dev/null
7595 +++ kernel-2.6.28/fs/unionfs/rename.c
7596 @@ -0,0 +1,520 @@
7597 +/*
7598 + * Copyright (c) 2003-2009 Erez Zadok
7599 + * Copyright (c) 2003-2006 Charles P. Wright
7600 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7601 + * Copyright (c) 2005-2006 Junjiro Okajima
7602 + * Copyright (c) 2005      Arun M. Krishnakumar
7603 + * Copyright (c) 2004-2006 David P. Quigley
7604 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7605 + * Copyright (c) 2003      Puja Gupta
7606 + * Copyright (c) 2003      Harikesavan Krishnan
7607 + * Copyright (c) 2003-2009 Stony Brook University
7608 + * Copyright (c) 2003-2009 The Research Foundation of SUNY
7609 + *
7610 + * This program is free software; you can redistribute it and/or modify
7611 + * it under the terms of the GNU General Public License version 2 as
7612 + * published by the Free Software Foundation.
7613 + */
7614 +
7615 +#include "union.h"
7616 +
7617 +/*
7618 + * This is a helper function for rename, used when rename ends up with hosed
7619 + * over dentries and we need to revert.
7620 + */
7621 +static int unionfs_refresh_lower_dentry(struct dentry *dentry,
7622 +                                       struct dentry *parent, int bindex)
7623 +{
7624 +       struct dentry *lower_dentry;
7625 +       struct dentry *lower_parent;
7626 +       int err = 0;
7627 +
7628 +       verify_locked(dentry);
7629 +
7630 +       lower_parent = unionfs_lower_dentry_idx(parent, bindex);
7631 +
7632 +       BUG_ON(!S_ISDIR(lower_parent->d_inode->i_mode));
7633 +
7634 +       lower_dentry = lookup_one_len(dentry->d_name.name, lower_parent,
7635 +                                     dentry->d_name.len);
7636 +       if (IS_ERR(lower_dentry)) {
7637 +               err = PTR_ERR(lower_dentry);
7638 +               goto out;
7639 +       }
7640 +
7641 +       dput(unionfs_lower_dentry_idx(dentry, bindex));
7642 +       iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
7643 +       unionfs_set_lower_inode_idx(dentry->d_inode, bindex, NULL);
7644 +
7645 +       if (!lower_dentry->d_inode) {
7646 +               dput(lower_dentry);
7647 +               unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
7648 +       } else {
7649 +               unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
7650 +               unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
7651 +                                           igrab(lower_dentry->d_inode));
7652 +       }
7653 +
7654 +out:
7655 +       return err;
7656 +}
7657 +
7658 +static int __unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7659 +                           struct dentry *old_parent,
7660 +                           struct inode *new_dir, struct dentry *new_dentry,
7661 +                           struct dentry *new_parent,
7662 +                           int bindex)
7663 +{
7664 +       int err = 0;
7665 +       struct dentry *lower_old_dentry;
7666 +       struct dentry *lower_new_dentry;
7667 +       struct dentry *lower_old_dir_dentry;
7668 +       struct dentry *lower_new_dir_dentry;
7669 +       struct dentry *trap;
7670 +
7671 +       lower_new_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
7672 +       lower_old_dentry = unionfs_lower_dentry_idx(old_dentry, bindex);
7673 +
7674 +       if (!lower_new_dentry) {
7675 +               lower_new_dentry =
7676 +                       create_parents(new_parent->d_inode,
7677 +                                      new_dentry, new_dentry->d_name.name,
7678 +                                      bindex);
7679 +               if (IS_ERR(lower_new_dentry)) {
7680 +                       err = PTR_ERR(lower_new_dentry);
7681 +                       if (IS_COPYUP_ERR(err))
7682 +                               goto out;
7683 +                       printk(KERN_ERR "unionfs: error creating directory "
7684 +                              "tree for rename, bindex=%d err=%d\n",
7685 +                              bindex, err);
7686 +                       goto out;
7687 +               }
7688 +       }
7689 +
7690 +       /* check for and remove whiteout, if any */
7691 +       err = check_unlink_whiteout(new_dentry, lower_new_dentry, bindex);
7692 +       if (err > 0) /* ignore if whiteout found and successfully removed */
7693 +               err = 0;
7694 +       if (err)
7695 +               goto out;
7696 +
7697 +       /* check of old_dentry branch is writable */
7698 +       err = is_robranch_super(old_dentry->d_sb, bindex);
7699 +       if (err)
7700 +               goto out;
7701 +
7702 +       dget(lower_old_dentry);
7703 +       dget(lower_new_dentry);
7704 +       lower_old_dir_dentry = dget_parent(lower_old_dentry);
7705 +       lower_new_dir_dentry = dget_parent(lower_new_dentry);
7706 +
7707 +       /* see Documentation/filesystems/unionfs/issues.txt */
7708 +       lockdep_off();
7709 +       trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
7710 +       /* source should not be ancenstor of target */
7711 +       if (trap == lower_old_dentry) {
7712 +               err = -EINVAL;
7713 +               goto out_err_unlock;
7714 +       }
7715 +       /* target should not be ancenstor of source */
7716 +       if (trap == lower_new_dentry) {
7717 +               err = -ENOTEMPTY;
7718 +               goto out_err_unlock;
7719 +       }
7720 +       err = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
7721 +                        lower_new_dir_dentry->d_inode, lower_new_dentry);
7722 +out_err_unlock:
7723 +       if (!err) {
7724 +               /* update parent dir times */
7725 +               fsstack_copy_attr_times(old_dir, lower_old_dir_dentry->d_inode);
7726 +               fsstack_copy_attr_times(new_dir, lower_new_dir_dentry->d_inode);
7727 +       }
7728 +       unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
7729 +       lockdep_on();
7730 +
7731 +       dput(lower_old_dir_dentry);
7732 +       dput(lower_new_dir_dentry);
7733 +       dput(lower_old_dentry);
7734 +       dput(lower_new_dentry);
7735 +
7736 +out:
7737 +       if (!err) {
7738 +               /* Fixup the new_dentry. */
7739 +               if (bindex < dbstart(new_dentry))
7740 +                       dbstart(new_dentry) = bindex;
7741 +               else if (bindex > dbend(new_dentry))
7742 +                       dbend(new_dentry) = bindex;
7743 +       }
7744 +
7745 +       return err;
7746 +}
7747 +
7748 +/*
7749 + * Main rename code.  This is sufficiently complex, that it's documented in
7750 + * Documentation/filesystems/unionfs/rename.txt.  This routine calls
7751 + * __unionfs_rename() above to perform some of the work.
7752 + */
7753 +static int do_unionfs_rename(struct inode *old_dir,
7754 +                            struct dentry *old_dentry,
7755 +                            struct dentry *old_parent,
7756 +                            struct inode *new_dir,
7757 +                            struct dentry *new_dentry,
7758 +                            struct dentry *new_parent)
7759 +{
7760 +       int err = 0;
7761 +       int bindex;
7762 +       int old_bstart, old_bend;
7763 +       int new_bstart, new_bend;
7764 +       int do_copyup = -1;
7765 +       int local_err = 0;
7766 +       int eio = 0;
7767 +       int revert = 0;
7768 +
7769 +       old_bstart = dbstart(old_dentry);
7770 +       old_bend = dbend(old_dentry);
7771 +
7772 +       new_bstart = dbstart(new_dentry);
7773 +       new_bend = dbend(new_dentry);
7774 +
7775 +       /* Rename source to destination. */
7776 +       err = __unionfs_rename(old_dir, old_dentry, old_parent,
7777 +                              new_dir, new_dentry, new_parent,
7778 +                              old_bstart);
7779 +       if (err) {
7780 +               if (!IS_COPYUP_ERR(err))
7781 +                       goto out;
7782 +               do_copyup = old_bstart - 1;
7783 +       } else {
7784 +               revert = 1;
7785 +       }
7786 +
7787 +       /*
7788 +        * Unlink all instances of destination that exist to the left of
7789 +        * bstart of source. On error, revert back, goto out.
7790 +        */
7791 +       for (bindex = old_bstart - 1; bindex >= new_bstart; bindex--) {
7792 +               struct dentry *unlink_dentry;
7793 +               struct dentry *unlink_dir_dentry;
7794 +
7795 +               BUG_ON(bindex < 0);
7796 +               unlink_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
7797 +               if (!unlink_dentry)
7798 +                       continue;
7799 +
7800 +               unlink_dir_dentry = lock_parent(unlink_dentry);
7801 +               err = is_robranch_super(old_dir->i_sb, bindex);
7802 +               if (!err)
7803 +                       err = vfs_unlink(unlink_dir_dentry->d_inode,
7804 +                                        unlink_dentry);
7805 +
7806 +               fsstack_copy_attr_times(new_parent->d_inode,
7807 +                                       unlink_dir_dentry->d_inode);
7808 +               /* propagate number of hard-links */
7809 +               new_parent->d_inode->i_nlink =
7810 +                       unionfs_get_nlinks(new_parent->d_inode);
7811 +
7812 +               unlock_dir(unlink_dir_dentry);
7813 +               if (!err) {
7814 +                       if (bindex != new_bstart) {
7815 +                               dput(unlink_dentry);
7816 +                               unionfs_set_lower_dentry_idx(new_dentry,
7817 +                                                            bindex, NULL);
7818 +                       }
7819 +               } else if (IS_COPYUP_ERR(err)) {
7820 +                       do_copyup = bindex - 1;
7821 +               } else if (revert) {
7822 +                       goto revert;
7823 +               }
7824 +       }
7825 +
7826 +       if (do_copyup != -1) {
7827 +               for (bindex = do_copyup; bindex >= 0; bindex--) {
7828 +                       /*
7829 +                        * copyup the file into some left directory, so that
7830 +                        * you can rename it
7831 +                        */
7832 +                       err = copyup_dentry(old_parent->d_inode,
7833 +                                           old_dentry, old_bstart, bindex,
7834 +                                           old_dentry->d_name.name,
7835 +                                           old_dentry->d_name.len, NULL,
7836 +                                           i_size_read(old_dentry->d_inode));
7837 +                       /* if copyup failed, try next branch to the left */
7838 +                       if (err)
7839 +                               continue;
7840 +                       /*
7841 +                        * create whiteout before calling __unionfs_rename
7842 +                        * because the latter will change the old_dentry's
7843 +                        * lower name and parent dir, resulting in the
7844 +                        * whiteout getting created in the wrong dir.
7845 +                        */
7846 +                       err = create_whiteout(old_dentry, bindex);
7847 +                       if (err) {
7848 +                               printk(KERN_ERR "unionfs: can't create a "
7849 +                                      "whiteout for %s in rename (err=%d)\n",
7850 +                                      old_dentry->d_name.name, err);
7851 +                               continue;
7852 +                       }
7853 +                       err = __unionfs_rename(old_dir, old_dentry, old_parent,
7854 +                                              new_dir, new_dentry, new_parent,
7855 +                                              bindex);
7856 +                       break;
7857 +               }
7858 +       }
7859 +
7860 +       /* make it opaque */
7861 +       if (S_ISDIR(old_dentry->d_inode->i_mode)) {
7862 +               err = make_dir_opaque(old_dentry, dbstart(old_dentry));
7863 +               if (err)
7864 +                       goto revert;
7865 +       }
7866 +
7867 +       /*
7868 +        * Create whiteout for source, only if:
7869 +        * (1) There is more than one underlying instance of source.
7870 +        * (We did a copy_up is taken care of above).
7871 +        */
7872 +       if ((old_bstart != old_bend) && (do_copyup == -1)) {
7873 +               err = create_whiteout(old_dentry, old_bstart);
7874 +               if (err) {
7875 +                       /* can't fix anything now, so we exit with -EIO */
7876 +                       printk(KERN_ERR "unionfs: can't create a whiteout for "
7877 +                              "%s in rename!\n", old_dentry->d_name.name);
7878 +                       err = -EIO;
7879 +               }
7880 +       }
7881 +
7882 +out:
7883 +       return err;
7884 +
7885 +revert:
7886 +       /* Do revert here. */
7887 +       local_err = unionfs_refresh_lower_dentry(new_dentry, new_parent,
7888 +                                                old_bstart);
7889 +       if (local_err) {
7890 +               printk(KERN_ERR "unionfs: revert failed in rename: "
7891 +                      "the new refresh failed\n");
7892 +               eio = -EIO;
7893 +       }
7894 +
7895 +       local_err = unionfs_refresh_lower_dentry(old_dentry, old_parent,
7896 +                                                old_bstart);
7897 +       if (local_err) {
7898 +               printk(KERN_ERR "unionfs: revert failed in rename: "
7899 +                      "the old refresh failed\n");
7900 +               eio = -EIO;
7901 +               goto revert_out;
7902 +       }
7903 +
7904 +       if (!unionfs_lower_dentry_idx(new_dentry, bindex) ||
7905 +           !unionfs_lower_dentry_idx(new_dentry, bindex)->d_inode) {
7906 +               printk(KERN_ERR "unionfs: revert failed in rename: "
7907 +                      "the object disappeared from under us!\n");
7908 +               eio = -EIO;
7909 +               goto revert_out;
7910 +       }
7911 +
7912 +       if (unionfs_lower_dentry_idx(old_dentry, bindex) &&
7913 +           unionfs_lower_dentry_idx(old_dentry, bindex)->d_inode) {
7914 +               printk(KERN_ERR "unionfs: revert failed in rename: "
7915 +                      "the object was created underneath us!\n");
7916 +               eio = -EIO;
7917 +               goto revert_out;
7918 +       }
7919 +
7920 +       local_err = __unionfs_rename(new_dir, new_dentry, new_parent,
7921 +                                    old_dir, old_dentry, old_parent,
7922 +                                    old_bstart);
7923 +
7924 +       /* If we can't fix it, then we cop-out with -EIO. */
7925 +       if (local_err) {
7926 +               printk(KERN_ERR "unionfs: revert failed in rename!\n");
7927 +               eio = -EIO;
7928 +       }
7929 +
7930 +       local_err = unionfs_refresh_lower_dentry(new_dentry, new_parent,
7931 +                                                bindex);
7932 +       if (local_err)
7933 +               eio = -EIO;
7934 +       local_err = unionfs_refresh_lower_dentry(old_dentry, old_parent,
7935 +                                                bindex);
7936 +       if (local_err)
7937 +               eio = -EIO;
7938 +
7939 +revert_out:
7940 +       if (eio)
7941 +               err = eio;
7942 +       return err;
7943 +}
7944 +
7945 +/*
7946 + * We can't copyup a directory, because it may involve huge numbers of
7947 + * children, etc.  Doing that in the kernel would be bad, so instead we
7948 + * return EXDEV to the user-space utility that caused this, and let the
7949 + * user-space recurse and ask us to copy up each file separately.
7950 + */
7951 +static int may_rename_dir(struct dentry *dentry, struct dentry *parent)
7952 +{
7953 +       int err, bstart;
7954 +
7955 +       err = check_empty(dentry, parent, NULL);
7956 +       if (err == -ENOTEMPTY) {
7957 +               if (is_robranch(dentry))
7958 +                       return -EXDEV;
7959 +       } else if (err) {
7960 +               return err;
7961 +       }
7962 +
7963 +       bstart = dbstart(dentry);
7964 +       if (dbend(dentry) == bstart || dbopaque(dentry) == bstart)
7965 +               return 0;
7966 +
7967 +       dbstart(dentry) = bstart + 1;
7968 +       err = check_empty(dentry, parent, NULL);
7969 +       dbstart(dentry) = bstart;
7970 +       if (err == -ENOTEMPTY)
7971 +               err = -EXDEV;
7972 +       return err;
7973 +}
7974 +
7975 +/*
7976 + * The locking rules in unionfs_rename are complex.  We could use a simpler
7977 + * superblock-level name-space lock for renames and copy-ups.
7978 + */
7979 +int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7980 +                  struct inode *new_dir, struct dentry *new_dentry)
7981 +{
7982 +       int err = 0;
7983 +       struct dentry *wh_dentry;
7984 +       struct dentry *old_parent, *new_parent;
7985 +       int valid = true;
7986 +
7987 +       unionfs_read_lock(old_dentry->d_sb, UNIONFS_SMUTEX_CHILD);
7988 +       old_parent = dget_parent(old_dentry);
7989 +       new_parent = dget_parent(new_dentry);
7990 +       /* un/lock parent dentries only if they differ from old/new_dentry */
7991 +       if (old_parent != old_dentry &&
7992 +           old_parent != new_dentry)
7993 +               unionfs_lock_dentry(old_parent, UNIONFS_DMUTEX_REVAL_PARENT);
7994 +       if (new_parent != old_dentry &&
7995 +           new_parent != new_dentry &&
7996 +           new_parent != old_parent)
7997 +               unionfs_lock_dentry(new_parent, UNIONFS_DMUTEX_REVAL_CHILD);
7998 +       unionfs_double_lock_dentry(old_dentry, new_dentry);
7999 +
8000 +       valid = __unionfs_d_revalidate(old_dentry, old_parent, false);
8001 +       if (!valid) {
8002 +               err = -ESTALE;
8003 +               goto out;
8004 +       }
8005 +       if (!d_deleted(new_dentry) && new_dentry->d_inode) {
8006 +               valid = __unionfs_d_revalidate(new_dentry, new_parent, false);
8007 +               if (!valid) {
8008 +                       err = -ESTALE;
8009 +                       goto out;
8010 +               }
8011 +       }
8012 +
8013 +       if (!S_ISDIR(old_dentry->d_inode->i_mode))
8014 +               err = unionfs_partial_lookup(old_dentry, old_parent);
8015 +       else
8016 +               err = may_rename_dir(old_dentry, old_parent);
8017 +
8018 +       if (err)
8019 +               goto out;
8020 +
8021 +       err = unionfs_partial_lookup(new_dentry, new_parent);
8022 +       if (err)
8023 +               goto out;
8024 +
8025 +       /*
8026 +        * if new_dentry is already lower because of whiteout,
8027 +        * simply override it even if the whited-out dir is not empty.
8028 +        */
8029 +       wh_dentry = find_first_whiteout(new_dentry);
8030 +       if (!IS_ERR(wh_dentry)) {
8031 +               dput(wh_dentry);
8032 +       } else if (new_dentry->d_inode) {
8033 +               if (S_ISDIR(old_dentry->d_inode->i_mode) !=
8034 +                   S_ISDIR(new_dentry->d_inode->i_mode)) {
8035 +                       err = S_ISDIR(old_dentry->d_inode->i_mode) ?
8036 +                               -ENOTDIR : -EISDIR;
8037 +                       goto out;
8038 +               }
8039 +
8040 +               if (S_ISDIR(new_dentry->d_inode->i_mode)) {
8041 +                       struct unionfs_dir_state *namelist = NULL;
8042 +                       /* check if this unionfs directory is empty or not */
8043 +                       err = check_empty(new_dentry, new_parent, &namelist);
8044 +                       if (err)
8045 +                               goto out;
8046 +
8047 +                       if (!is_robranch(new_dentry))
8048 +                               err = delete_whiteouts(new_dentry,
8049 +                                                      dbstart(new_dentry),
8050 +                                                      namelist);
8051 +
8052 +                       free_rdstate(namelist);
8053 +
8054 +                       if (err)
8055 +                               goto out;
8056 +               }
8057 +       }
8058 +
8059 +       err = do_unionfs_rename(old_dir, old_dentry, old_parent,
8060 +                               new_dir, new_dentry, new_parent);
8061 +       if (err)
8062 +               goto out;
8063 +
8064 +       /*
8065 +        * force re-lookup since the dir on ro branch is not renamed, and
8066 +        * lower dentries still indicate the un-renamed ones.
8067 +        */
8068 +       if (S_ISDIR(old_dentry->d_inode->i_mode))
8069 +               atomic_dec(&UNIONFS_D(old_dentry)->generation);
8070 +       else
8071 +               unionfs_postcopyup_release(old_dentry);
8072 +       if (new_dentry->d_inode && !S_ISDIR(new_dentry->d_inode->i_mode)) {
8073 +               unionfs_postcopyup_release(new_dentry);
8074 +               unionfs_postcopyup_setmnt(new_dentry);
8075 +               if (!unionfs_lower_inode(new_dentry->d_inode)) {
8076 +                       /*
8077 +                        * If we get here, it means that no copyup was
8078 +                        * needed, and that a file by the old name already
8079 +                        * existing on the destination branch; that file got
8080 +                        * renamed earlier in this function, so all we need
8081 +                        * to do here is set the lower inode.
8082 +                        */
8083 +                       struct inode *inode;
8084 +                       inode = unionfs_lower_inode(old_dentry->d_inode);
8085 +                       igrab(inode);
8086 +                       unionfs_set_lower_inode_idx(new_dentry->d_inode,
8087 +                                                   dbstart(new_dentry),
8088 +                                                   inode);
8089 +               }
8090 +       }
8091 +       /* if all of this renaming succeeded, update our times */
8092 +       unionfs_copy_attr_times(old_dentry->d_inode);
8093 +       unionfs_copy_attr_times(new_dentry->d_inode);
8094 +       unionfs_check_inode(old_dir);
8095 +       unionfs_check_inode(new_dir);
8096 +       unionfs_check_dentry(old_dentry);
8097 +       unionfs_check_dentry(new_dentry);
8098 +
8099 +out:
8100 +       if (err)                /* clear the new_dentry stuff created */
8101 +               d_drop(new_dentry);
8102 +
8103 +       unionfs_double_unlock_dentry(old_dentry, new_dentry);
8104 +       if (new_parent != old_dentry &&
8105 +           new_parent != new_dentry &&
8106 +           new_parent != old_parent)
8107 +               unionfs_unlock_dentry(new_parent);
8108 +       if (old_parent != old_dentry &&
8109 +           old_parent != new_dentry)
8110 +               unionfs_unlock_dentry(old_parent);
8111 +       dput(new_parent);
8112 +       dput(old_parent);
8113 +       unionfs_read_unlock(old_dentry->d_sb);
8114 +
8115 +       return err;
8116 +}
8117 --- /dev/null
8118 +++ kernel-2.6.28/fs/unionfs/sioq.c
8119 @@ -0,0 +1,101 @@
8120 +/*
8121 + * Copyright (c) 2006-2009 Erez Zadok
8122 + * Copyright (c) 2006      Charles P. Wright
8123 + * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
8124 + * Copyright (c) 2006      Junjiro Okajima
8125 + * Copyright (c) 2006      David P. Quigley
8126 + * Copyright (c) 2006-2009 Stony Brook University
8127 + * Copyright (c) 2006-2009 The Research Foundation of SUNY
8128 + *
8129 + * This program is free software; you can redistribute it and/or modify
8130 + * it under the terms of the GNU General Public License version 2 as
8131 + * published by the Free Software Foundation.
8132 + */
8133 +
8134 +#include "union.h"
8135 +
8136 +/*
8137 + * Super-user IO work Queue - sometimes we need to perform actions which
8138 + * would fail due to the unix permissions on the parent directory (e.g.,
8139 + * rmdir a directory which appears empty, but in reality contains
8140 + * whiteouts).
8141 + */
8142 +
8143 +static struct workqueue_struct *superio_workqueue;
8144 +
8145 +int __init init_sioq(void)
8146 +{
8147 +       int err;
8148 +
8149 +       superio_workqueue = create_workqueue("unionfs_siod");
8150 +       if (!IS_ERR(superio_workqueue))
8151 +               return 0;
8152 +
8153 +       err = PTR_ERR(superio_workqueue);
8154 +       printk(KERN_ERR "unionfs: create_workqueue failed %d\n", err);
8155 +       superio_workqueue = NULL;
8156 +       return err;
8157 +}
8158 +
8159 +void stop_sioq(void)
8160 +{
8161 +       if (superio_workqueue)
8162 +               destroy_workqueue(superio_workqueue);
8163 +}
8164 +
8165 +void run_sioq(work_func_t func, struct sioq_args *args)
8166 +{
8167 +       INIT_WORK(&args->work, func);
8168 +
8169 +       init_completion(&args->comp);
8170 +       while (!queue_work(superio_workqueue, &args->work)) {
8171 +               /* TODO: do accounting if needed */
8172 +               schedule();
8173 +       }
8174 +       wait_for_completion(&args->comp);
8175 +}
8176 +
8177 +void __unionfs_create(struct work_struct *work)
8178 +{
8179 +       struct sioq_args *args = container_of(work, struct sioq_args, work);
8180 +       struct create_args *c = &args->create;
8181 +
8182 +       args->err = vfs_create(c->parent, c->dentry, c->mode, c->nd);
8183 +       complete(&args->comp);
8184 +}
8185 +
8186 +void __unionfs_mkdir(struct work_struct *work)
8187 +{
8188 +       struct sioq_args *args = container_of(work, struct sioq_args, work);
8189 +       struct mkdir_args *m = &args->mkdir;
8190 +
8191 +       args->err = vfs_mkdir(m->parent, m->dentry, m->mode);
8192 +       complete(&args->comp);
8193 +}
8194 +
8195 +void __unionfs_mknod(struct work_struct *work)
8196 +{
8197 +       struct sioq_args *args = container_of(work, struct sioq_args, work);
8198 +       struct mknod_args *m = &args->mknod;
8199 +
8200 +       args->err = vfs_mknod(m->parent, m->dentry, m->mode, m->dev);
8201 +       complete(&args->comp);
8202 +}
8203 +
8204 +void __unionfs_symlink(struct work_struct *work)
8205 +{
8206 +       struct sioq_args *args = container_of(work, struct sioq_args, work);
8207 +       struct symlink_args *s = &args->symlink;
8208 +
8209 +       args->err = vfs_symlink(s->parent, s->dentry, s->symbuf);
8210 +       complete(&args->comp);
8211 +}
8212 +
8213 +void __unionfs_unlink(struct work_struct *work)
8214 +{
8215 +       struct sioq_args *args = container_of(work, struct sioq_args, work);
8216 +       struct unlink_args *u = &args->unlink;
8217 +
8218 +       args->err = vfs_unlink(u->parent, u->dentry);
8219 +       complete(&args->comp);
8220 +}
8221 --- /dev/null
8222 +++ kernel-2.6.28/fs/unionfs/sioq.h
8223 @@ -0,0 +1,91 @@
8224 +/*
8225 + * Copyright (c) 2006-2009 Erez Zadok
8226 + * Copyright (c) 2006      Charles P. Wright
8227 + * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
8228 + * Copyright (c) 2006      Junjiro Okajima
8229 + * Copyright (c) 2006      David P. Quigley
8230 + * Copyright (c) 2006-2009 Stony Brook University
8231 + * Copyright (c) 2006-2009 The Research Foundation of SUNY
8232 + *
8233 + * This program is free software; you can redistribute it and/or modify
8234 + * it under the terms of the GNU General Public License version 2 as
8235 + * published by the Free Software Foundation.
8236 + */
8237 +
8238 +#ifndef _SIOQ_H
8239 +#define _SIOQ_H
8240 +
8241 +struct deletewh_args {
8242 +       struct unionfs_dir_state *namelist;
8243 +       struct dentry *dentry;
8244 +       int bindex;
8245 +};
8246 +
8247 +struct is_opaque_args {
8248 +       struct dentry *dentry;
8249 +};
8250 +
8251 +struct create_args {
8252 +       struct inode *parent;
8253 +       struct dentry *dentry;
8254 +       umode_t mode;
8255 +       struct nameidata *nd;
8256 +};
8257 +
8258 +struct mkdir_args {
8259 +       struct inode *parent;
8260 +       struct dentry *dentry;
8261 +       umode_t mode;
8262 +};
8263 +
8264 +struct mknod_args {
8265 +       struct inode *parent;
8266 +       struct dentry *dentry;
8267 +       umode_t mode;
8268 +       dev_t dev;
8269 +};
8270 +
8271 +struct symlink_args {
8272 +       struct inode *parent;
8273 +       struct dentry *dentry;
8274 +       char *symbuf;
8275 +};
8276 +
8277 +struct unlink_args {
8278 +       struct inode *parent;
8279 +       struct dentry *dentry;
8280 +};
8281 +
8282 +
8283 +struct sioq_args {
8284 +       struct completion comp;
8285 +       struct work_struct work;
8286 +       int err;
8287 +       void *ret;
8288 +
8289 +       union {
8290 +               struct deletewh_args deletewh;
8291 +               struct is_opaque_args is_opaque;
8292 +               struct create_args create;
8293 +               struct mkdir_args mkdir;
8294 +               struct mknod_args mknod;
8295 +               struct symlink_args symlink;
8296 +               struct unlink_args unlink;
8297 +       };
8298 +};
8299 +
8300 +/* Extern definitions for SIOQ functions */
8301 +extern int __init init_sioq(void);
8302 +extern void stop_sioq(void);
8303 +extern void run_sioq(work_func_t func, struct sioq_args *args);
8304 +
8305 +/* Extern definitions for our privilege escalation helpers */
8306 +extern void __unionfs_create(struct work_struct *work);
8307 +extern void __unionfs_mkdir(struct work_struct *work);
8308 +extern void __unionfs_mknod(struct work_struct *work);
8309 +extern void __unionfs_symlink(struct work_struct *work);
8310 +extern void __unionfs_unlink(struct work_struct *work);
8311 +extern void __delete_whiteouts(struct work_struct *work);
8312 +extern void __is_opaque_dir(struct work_struct *work);
8313 +
8314 +#endif /* not _SIOQ_H */
8315 --- /dev/null
8316 +++ kernel-2.6.28/fs/unionfs/subr.c
8317 @@ -0,0 +1,95 @@
8318 +/*
8319 + * Copyright (c) 2003-2009 Erez Zadok
8320 + * Copyright (c) 2003-2006 Charles P. Wright
8321 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
8322 + * Copyright (c) 2005-2006 Junjiro Okajima
8323 + * Copyright (c) 2005      Arun M. Krishnakumar
8324 + * Copyright (c) 2004-2006 David P. Quigley
8325 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
8326 + * Copyright (c) 2003      Puja Gupta
8327 + * Copyright (c) 2003      Harikesavan Krishnan
8328 + * Copyright (c) 2003-2009 Stony Brook University
8329 + * Copyright (c) 2003-2009 The Research Foundation of SUNY
8330 + *
8331 + * This program is free software; you can redistribute it and/or modify
8332 + * it under the terms of the GNU General Public License version 2 as
8333 + * published by the Free Software Foundation.
8334 + */
8335 +
8336 +#include "union.h"
8337 +
8338 +/*
8339 + * returns the right n_link value based on the inode type
8340 + */
8341 +int unionfs_get_nlinks(const struct inode *inode)
8342 +{
8343 +       /* don't bother to do all the work since we're unlinked */
8344 +       if (inode->i_nlink == 0)
8345 +               return 0;
8346 +
8347 +       if (!S_ISDIR(inode->i_mode))
8348 +               return unionfs_lower_inode(inode)->i_nlink;
8349 +
8350 +       /*
8351 +        * For directories, we return 1. The only place that could cares
8352 +        * about links is readdir, and there's d_type there so even that
8353 +        * doesn't matter.
8354 +        */
8355 +       return 1;
8356 +}
8357 +
8358 +/* copy a/m/ctime from the lower branch with the newest times */
8359 +void unionfs_copy_attr_times(struct inode *upper)
8360 +{
8361 +       int bindex;
8362 +       struct inode *lower;
8363 +
8364 +       if (!upper)
8365 +               return;
8366 +       if (ibstart(upper) < 0) {
8367 +#ifdef CONFIG_UNION_FS_DEBUG
8368 +               WARN_ON(ibstart(upper) < 0);
8369 +#endif /* CONFIG_UNION_FS_DEBUG */
8370 +               return;
8371 +       }
8372 +       for (bindex = ibstart(upper); bindex <= ibend(upper); bindex++) {
8373 +               lower = unionfs_lower_inode_idx(upper, bindex);
8374 +               if (!lower)
8375 +                       continue; /* not all lower dir objects may exist */
8376 +               if (unlikely(timespec_compare(&upper->i_mtime,
8377 +                                             &lower->i_mtime) < 0))
8378 +                       upper->i_mtime = lower->i_mtime;
8379 +               if (unlikely(timespec_compare(&upper->i_ctime,
8380 +                                             &lower->i_ctime) < 0))
8381 +                       upper->i_ctime = lower->i_ctime;
8382 +               if (unlikely(timespec_compare(&upper->i_atime,
8383 +                                             &lower->i_atime) < 0))
8384 +                       upper->i_atime = lower->i_atime;
8385 +       }
8386 +}
8387 +
8388 +/*
8389 + * A unionfs/fanout version of fsstack_copy_attr_all.  Uses a
8390 + * unionfs_get_nlinks to properly calcluate the number of links to a file.
8391 + * Also, copies the max() of all a/m/ctimes for all lower inodes (which is
8392 + * important if the lower inode is a directory type)
8393 + */
8394 +void unionfs_copy_attr_all(struct inode *dest,
8395 +                          const struct inode *src)
8396 +{
8397 +       dest->i_mode = src->i_mode;
8398 +       dest->i_uid = src->i_uid;
8399 +       dest->i_gid = src->i_gid;
8400 +       dest->i_rdev = src->i_rdev;
8401 +
8402 +       unionfs_copy_attr_times(dest);
8403 +
8404 +       dest->i_blkbits = src->i_blkbits;
8405 +       dest->i_flags = src->i_flags;
8406 +
8407 +       /*
8408 +        * Update the nlinks AFTER updating the above fields, because the
8409 +        * get_links callback may depend on them.
8410 +        */
8411 +       dest->i_nlink = unionfs_get_nlinks(dest);
8412 +}
8413 --- /dev/null
8414 +++ kernel-2.6.28/fs/unionfs/super.c
8415 @@ -0,0 +1,1047 @@
8416 +/*
8417 + * Copyright (c) 2003-2009 Erez Zadok
8418 + * Copyright (c) 2003-2006 Charles P. Wright
8419 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
8420 + * Copyright (c) 2005-2006 Junjiro Okajima
8421 + * Copyright (c) 2005      Arun M. Krishnakumar
8422 + * Copyright (c) 2004-2006 David P. Quigley
8423 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
8424 + * Copyright (c) 2003      Puja Gupta
8425 + * Copyright (c) 2003      Harikesavan Krishnan
8426 + * Copyright (c) 2003-2009 Stony Brook University
8427 + * Copyright (c) 2003-2009 The Research Foundation of SUNY
8428 + *
8429 + * This program is free software; you can redistribute it and/or modify
8430 + * it under the terms of the GNU General Public License version 2 as
8431 + * published by the Free Software Foundation.
8432 + */
8433 +
8434 +#include "union.h"
8435 +
8436 +/*
8437 + * The inode cache is used with alloc_inode for both our inode info and the
8438 + * vfs inode.
8439 + */
8440 +static struct kmem_cache *unionfs_inode_cachep;
8441 +
8442 +struct inode *unionfs_iget(struct super_block *sb, unsigned long ino)
8443 +{
8444 +       int size;
8445 +       struct unionfs_inode_info *info;
8446 +       struct inode *inode;
8447 +
8448 +       inode = iget_locked(sb, ino);
8449 +       if (!inode)
8450 +               return ERR_PTR(-ENOMEM);
8451 +       if (!(inode->i_state & I_NEW))
8452 +               return inode;
8453 +
8454 +       info = UNIONFS_I(inode);
8455 +       memset(info, 0, offsetof(struct unionfs_inode_info, vfs_inode));
8456 +       info->bstart = -1;
8457 +       info->bend = -1;
8458 +       atomic_set(&info->generation,
8459 +                  atomic_read(&UNIONFS_SB(inode->i_sb)->generation));
8460 +       spin_lock_init(&info->rdlock);
8461 +       info->rdcount = 1;
8462 +       info->hashsize = -1;
8463 +       INIT_LIST_HEAD(&info->readdircache);
8464 +
8465 +       size = sbmax(inode->i_sb) * sizeof(struct inode *);
8466 +       info->lower_inodes = kzalloc(size, GFP_KERNEL);
8467 +       if (unlikely(!info->lower_inodes)) {
8468 +               printk(KERN_CRIT "unionfs: no kernel memory when allocating "
8469 +                      "lower-pointer array!\n");
8470 +               iget_failed(inode);
8471 +               return ERR_PTR(-ENOMEM);
8472 +       }
8473 +
8474 +       inode->i_version++;
8475 +       inode->i_op = &unionfs_main_iops;
8476 +       inode->i_fop = &unionfs_main_fops;
8477 +
8478 +       inode->i_mapping->a_ops = &unionfs_aops;
8479 +
8480 +       /*
8481 +        * reset times so unionfs_copy_attr_all can keep out time invariants
8482 +        * right (upper inode time being the max of all lower ones).
8483 +        */
8484 +       inode->i_atime.tv_sec = inode->i_atime.tv_nsec = 0;
8485 +       inode->i_mtime.tv_sec = inode->i_mtime.tv_nsec = 0;
8486 +       inode->i_ctime.tv_sec = inode->i_ctime.tv_nsec = 0;
8487 +       unlock_new_inode(inode);
8488 +       return inode;
8489 +}
8490 +
8491 +/*
8492 + * we now define delete_inode, because there are two VFS paths that may
8493 + * destroy an inode: one of them calls clear inode before doing everything
8494 + * else that's needed, and the other is fine.  This way we truncate the inode
8495 + * size (and its pages) and then clear our own inode, which will do an iput
8496 + * on our and the lower inode.
8497 + *
8498 + * No need to lock sb info's rwsem.
8499 + */
8500 +static void unionfs_delete_inode(struct inode *inode)
8501 +{
8502 +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
8503 +       spin_lock(&inode->i_lock);
8504 +#endif
8505 +       i_size_write(inode, 0); /* every f/s seems to do that */
8506 +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
8507 +       spin_unlock(&inode->i_lock);
8508 +#endif
8509 +
8510 +       if (inode->i_data.nrpages)
8511 +               truncate_inode_pages(&inode->i_data, 0);
8512 +
8513 +       clear_inode(inode);
8514 +}
8515 +
8516 +/*
8517 + * final actions when unmounting a file system
8518 + *
8519 + * No need to lock rwsem.
8520 + */
8521 +static void unionfs_put_super(struct super_block *sb)
8522 +{
8523 +       int bindex, bstart, bend;
8524 +       struct unionfs_sb_info *spd;
8525 +       int leaks = 0;
8526 +
8527 +       spd = UNIONFS_SB(sb);
8528 +       if (!spd)
8529 +               return;
8530 +
8531 +       bstart = sbstart(sb);
8532 +       bend = sbend(sb);
8533 +
8534 +       /* Make sure we have no leaks of branchget/branchput. */
8535 +       for (bindex = bstart; bindex <= bend; bindex++)
8536 +               if (unlikely(branch_count(sb, bindex) != 0)) {
8537 +                       printk(KERN_CRIT
8538 +                              "unionfs: branch %d has %d references left!\n",
8539 +                              bindex, branch_count(sb, bindex));
8540 +                       leaks = 1;
8541 +               }
8542 +       WARN_ON(leaks != 0);
8543 +
8544 +       /* decrement lower super references */
8545 +       for (bindex = bstart; bindex <= bend; bindex++) {
8546 +               struct super_block *s;
8547 +               s = unionfs_lower_super_idx(sb, bindex);
8548 +               unionfs_set_lower_super_idx(sb, bindex, NULL);
8549 +               atomic_dec(&s->s_active);
8550 +       }
8551 +
8552 +       kfree(spd->dev_name);
8553 +       kfree(spd->data);
8554 +       kfree(spd);
8555 +       sb->s_fs_info = NULL;
8556 +}
8557 +
8558 +/*
8559 + * Since people use this to answer the "How big of a file can I write?"
8560 + * question, we report the size of the highest priority branch as the size of
8561 + * the union.
8562 + */
8563 +static int unionfs_statfs(struct dentry *dentry, struct kstatfs *buf)
8564 +{
8565 +       int err = 0;
8566 +       struct super_block *sb;
8567 +       struct dentry *lower_dentry;
8568 +       struct dentry *parent;
8569 +       bool valid;
8570 +
8571 +       sb = dentry->d_sb;
8572 +
8573 +       unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD);
8574 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
8575 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
8576 +
8577 +       valid = __unionfs_d_revalidate(dentry, parent, false);
8578 +       if (unlikely(!valid)) {
8579 +               err = -ESTALE;
8580 +               goto out;
8581 +       }
8582 +       unionfs_check_dentry(dentry);
8583 +
8584 +       lower_dentry = unionfs_lower_dentry(sb->s_root);
8585 +       err = vfs_statfs(lower_dentry, buf);
8586 +
8587 +       /* set return buf to our f/s to avoid confusing user-level utils */
8588 +       buf->f_type = UNIONFS_SUPER_MAGIC;
8589 +       /*
8590 +        * Our maximum file name can is shorter by a few bytes because every
8591 +        * file name could potentially be whited-out.
8592 +        *
8593 +        * XXX: this restriction goes away with ODF.
8594 +        */
8595 +       unionfs_set_max_namelen(&buf->f_namelen);
8596 +
8597 +       /*
8598 +        * reset two fields to avoid confusing user-land.
8599 +        * XXX: is this still necessary?
8600 +        */
8601 +       memset(&buf->f_fsid, 0, sizeof(__kernel_fsid_t));
8602 +       memset(&buf->f_spare, 0, sizeof(buf->f_spare));
8603 +
8604 +out:
8605 +       unionfs_check_dentry(dentry);
8606 +       unionfs_unlock_dentry(dentry);
8607 +       unionfs_unlock_parent(dentry, parent);
8608 +       unionfs_read_unlock(sb);
8609 +       return err;
8610 +}
8611 +
8612 +/* handle mode changing during remount */
8613 +static noinline_for_stack int do_remount_mode_option(
8614 +                                       char *optarg,
8615 +                                       int cur_branches,
8616 +                                       struct unionfs_data *new_data,
8617 +                                       struct path *new_lower_paths)
8618 +{
8619 +       int err = -EINVAL;
8620 +       int perms, idx;
8621 +       char *modename = strchr(optarg, '=');
8622 +       struct nameidata nd;
8623 +
8624 +       /* by now, optarg contains the branch name */
8625 +       if (!*optarg) {
8626 +               printk(KERN_ERR
8627 +                      "unionfs: no branch specified for mode change\n");
8628 +               goto out;
8629 +       }
8630 +       if (!modename) {
8631 +               printk(KERN_ERR "unionfs: branch \"%s\" requires a mode\n",
8632 +                      optarg);
8633 +               goto out;
8634 +       }
8635 +       *modename++ = '\0';
8636 +       err = parse_branch_mode(modename, &perms);
8637 +       if (err) {
8638 +               printk(KERN_ERR "unionfs: invalid mode \"%s\" for \"%s\"\n",
8639 +                      modename, optarg);
8640 +               goto out;
8641 +       }
8642 +
8643 +       /*
8644 +        * Find matching branch index.  For now, this assumes that nothing
8645 +        * has been mounted on top of this Unionfs stack.  Once we have /odf
8646 +        * and cache-coherency resolved, we'll address the branch-path
8647 +        * uniqueness.
8648 +        */
8649 +       err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
8650 +       if (err) {
8651 +               printk(KERN_ERR "unionfs: error accessing "
8652 +                      "lower directory \"%s\" (error %d)\n",
8653 +                      optarg, err);
8654 +               goto out;
8655 +       }
8656 +       for (idx = 0; idx < cur_branches; idx++)
8657 +               if (nd.path.mnt == new_lower_paths[idx].mnt &&
8658 +                   nd.path.dentry == new_lower_paths[idx].dentry)
8659 +                       break;
8660 +       path_put(&nd.path);     /* no longer needed */
8661 +       if (idx == cur_branches) {
8662 +               err = -ENOENT;  /* err may have been reset above */
8663 +               printk(KERN_ERR "unionfs: branch \"%s\" "
8664 +                      "not found\n", optarg);
8665 +               goto out;
8666 +       }
8667 +       /* check/change mode for existing branch */
8668 +       /* we don't warn if perms==branchperms */
8669 +       new_data[idx].branchperms = perms;
8670 +       err = 0;
8671 +out:
8672 +       return err;
8673 +}
8674 +
8675 +/* handle branch deletion during remount */
8676 +static noinline_for_stack int do_remount_del_option(
8677 +                                       char *optarg, int cur_branches,
8678 +                                       struct unionfs_data *new_data,
8679 +                                       struct path *new_lower_paths)
8680 +{
8681 +       int err = -EINVAL;
8682 +       int idx;
8683 +       struct nameidata nd;
8684 +
8685 +       /* optarg contains the branch name to delete */
8686 +
8687 +       /*
8688 +        * Find matching branch index.  For now, this assumes that nothing
8689 +        * has been mounted on top of this Unionfs stack.  Once we have /odf
8690 +        * and cache-coherency resolved, we'll address the branch-path
8691 +        * uniqueness.
8692 +        */
8693 +       err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
8694 +       if (err) {
8695 +               printk(KERN_ERR "unionfs: error accessing "
8696 +                      "lower directory \"%s\" (error %d)\n",
8697 +                      optarg, err);
8698 +               goto out;
8699 +       }
8700 +       for (idx = 0; idx < cur_branches; idx++)
8701 +               if (nd.path.mnt == new_lower_paths[idx].mnt &&
8702 +                   nd.path.dentry == new_lower_paths[idx].dentry)
8703 +                       break;
8704 +       path_put(&nd.path);     /* no longer needed */
8705 +       if (idx == cur_branches) {
8706 +               printk(KERN_ERR "unionfs: branch \"%s\" "
8707 +                      "not found\n", optarg);
8708 +               err = -ENOENT;
8709 +               goto out;
8710 +       }
8711 +       /* check if there are any open files on the branch to be deleted */
8712 +       if (atomic_read(&new_data[idx].open_files) > 0) {
8713 +               err = -EBUSY;
8714 +               goto out;
8715 +       }
8716 +
8717 +       /*
8718 +        * Now we have to delete the branch.  First, release any handles it
8719 +        * has.  Then, move the remaining array indexes past "idx" in
8720 +        * new_data and new_lower_paths one to the left.  Finally, adjust
8721 +        * cur_branches.
8722 +        */
8723 +       path_put(&new_lower_paths[idx]);
8724 +
8725 +       if (idx < cur_branches - 1) {
8726 +               /* if idx==cur_branches-1, we delete last branch: easy */
8727 +               memmove(&new_data[idx], &new_data[idx+1],
8728 +                       (cur_branches - 1 - idx) *
8729 +                       sizeof(struct unionfs_data));
8730 +               memmove(&new_lower_paths[idx], &new_lower_paths[idx+1],
8731 +                       (cur_branches - 1 - idx) * sizeof(struct path));
8732 +       }
8733 +
8734 +       err = 0;
8735 +out:
8736 +       return err;
8737 +}
8738 +
8739 +/* handle branch insertion during remount */
8740 +static noinline_for_stack int do_remount_add_option(
8741 +                                       char *optarg, int cur_branches,
8742 +                                       struct unionfs_data *new_data,
8743 +                                       struct path *new_lower_paths,
8744 +                                       int *high_branch_id)
8745 +{
8746 +       int err = -EINVAL;
8747 +       int perms;
8748 +       int idx = 0;            /* default: insert at beginning */
8749 +       char *new_branch , *modename = NULL;
8750 +       struct nameidata nd;
8751 +
8752 +       /*
8753 +        * optarg can be of several forms:
8754 +        *
8755 +        * /bar:/foo            insert /foo before /bar
8756 +        * /bar:/foo=ro         insert /foo in ro mode before /bar
8757 +        * /foo                 insert /foo in the beginning (prepend)
8758 +        * :/foo                insert /foo at the end (append)
8759 +        */
8760 +       if (*optarg == ':') {   /* append? */
8761 +               new_branch = optarg + 1; /* skip ':' */
8762 +               idx = cur_branches;
8763 +               goto found_insertion_point;
8764 +       }
8765 +       new_branch = strchr(optarg, ':');
8766 +       if (!new_branch) {      /* prepend? */
8767 +               new_branch = optarg;
8768 +               goto found_insertion_point;
8769 +       }
8770 +       *new_branch++ = '\0';   /* holds path+mode of new branch */
8771 +
8772 +       /*
8773 +        * Find matching branch index.  For now, this assumes that nothing
8774 +        * has been mounted on top of this Unionfs stack.  Once we have /odf
8775 +        * and cache-coherency resolved, we'll address the branch-path
8776 +        * uniqueness.
8777 +        */
8778 +       err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
8779 +       if (err) {
8780 +               printk(KERN_ERR "unionfs: error accessing "
8781 +                      "lower directory \"%s\" (error %d)\n",
8782 +                      optarg, err);
8783 +               goto out;
8784 +       }
8785 +       for (idx = 0; idx < cur_branches; idx++)
8786 +               if (nd.path.mnt == new_lower_paths[idx].mnt &&
8787 +                   nd.path.dentry == new_lower_paths[idx].dentry)
8788 +                       break;
8789 +       path_put(&nd.path);     /* no longer needed */
8790 +       if (idx == cur_branches) {
8791 +               printk(KERN_ERR "unionfs: branch \"%s\" "
8792 +                      "not found\n", optarg);
8793 +               err = -ENOENT;
8794 +               goto out;
8795 +       }
8796 +
8797 +       /*
8798 +        * At this point idx will hold the index where the new branch should
8799 +        * be inserted before.
8800 +        */
8801 +found_insertion_point:
8802 +       /* find the mode for the new branch */
8803 +       if (new_branch)
8804 +               modename = strchr(new_branch, '=');
8805 +       if (modename)
8806 +               *modename++ = '\0';
8807 +       if (!new_branch || !*new_branch) {
8808 +               printk(KERN_ERR "unionfs: null new branch\n");
8809 +               err = -EINVAL;
8810 +               goto out;
8811 +       }
8812 +       err = parse_branch_mode(modename, &perms);
8813 +       if (err) {
8814 +               printk(KERN_ERR "unionfs: invalid mode \"%s\" for "
8815 +                      "branch \"%s\"\n", modename, new_branch);
8816 +               goto out;
8817 +       }
8818 +       err = path_lookup(new_branch, LOOKUP_FOLLOW, &nd);
8819 +       if (err) {
8820 +               printk(KERN_ERR "unionfs: error accessing "
8821 +                      "lower directory \"%s\" (error %d)\n",
8822 +                      new_branch, err);
8823 +               goto out;
8824 +       }
8825 +       /*
8826 +        * It's probably safe to check_mode the new branch to insert.  Note:
8827 +        * we don't allow inserting branches which are unionfs's by
8828 +        * themselves (check_branch returns EINVAL in that case).  This is
8829 +        * because this code base doesn't support stacking unionfs: the ODF
8830 +        * code base supports that correctly.
8831 +        */
8832 +       err = check_branch(&nd);
8833 +       if (err) {
8834 +               printk(KERN_ERR "unionfs: lower directory "
8835 +                      "\"%s\" is not a valid branch\n", optarg);
8836 +               path_put(&nd.path);
8837 +               goto out;
8838 +       }
8839 +
8840 +       /*
8841 +        * Now we have to insert the new branch.  But first, move the bits
8842 +        * to make space for the new branch, if needed.  Finally, adjust
8843 +        * cur_branches.
8844 +        * We don't release nd here; it's kept until umount/remount.
8845 +        */
8846 +       if (idx < cur_branches) {
8847 +               /* if idx==cur_branches, we append: easy */
8848 +               memmove(&new_data[idx+1], &new_data[idx],
8849 +                       (cur_branches - idx) * sizeof(struct unionfs_data));
8850 +               memmove(&new_lower_paths[idx+1], &new_lower_paths[idx],
8851 +                       (cur_branches - idx) * sizeof(struct path));
8852 +       }
8853 +       new_lower_paths[idx].dentry = nd.path.dentry;
8854 +       new_lower_paths[idx].mnt = nd.path.mnt;
8855 +
8856 +       new_data[idx].sb = nd.path.dentry->d_sb;
8857 +       atomic_set(&new_data[idx].open_files, 0);
8858 +       new_data[idx].branchperms = perms;
8859 +       new_data[idx].branch_id = ++*high_branch_id; /* assign new branch ID */
8860 +
8861 +       err = 0;
8862 +out:
8863 +       return err;
8864 +}
8865 +
8866 +
8867 +/*
8868 + * Support branch management options on remount.
8869 + *
8870 + * See Documentation/filesystems/unionfs/ for details.
8871 + *
8872 + * @flags: numeric mount options
8873 + * @options: mount options string
8874 + *
8875 + * This function can rearrange a mounted union dynamically, adding and
8876 + * removing branches, including changing branch modes.  Clearly this has to
8877 + * be done safely and atomically.  Luckily, the VFS already calls this
8878 + * function with lock_super(sb) and lock_kernel() held, preventing
8879 + * concurrent mixing of new mounts, remounts, and unmounts.  Moreover,
8880 + * do_remount_sb(), our caller function, already called shrink_dcache_sb(sb)
8881 + * to purge dentries/inodes from our superblock, and also called
8882 + * fsync_super(sb) to purge any dirty pages.  So we're good.
8883 + *
8884 + * XXX: however, our remount code may also need to invalidate mapped pages
8885 + * so as to force them to be re-gotten from the (newly reconfigured) lower
8886 + * branches.  This has to wait for proper mmap and cache coherency support
8887 + * in the VFS.
8888 + *
8889 + */
8890 +static int unionfs_remount_fs(struct super_block *sb, int *flags,
8891 +                             char *options)
8892 +{
8893 +       int err = 0;
8894 +       int i;
8895 +       char *optionstmp, *tmp_to_free; /* kstrdup'ed of "options" */
8896 +       char *optname;
8897 +       int cur_branches = 0;   /* no. of current branches */
8898 +       int new_branches = 0;   /* no. of branches actually left in the end */
8899 +       int add_branches;       /* est. no. of branches to add */
8900 +       int del_branches;       /* est. no. of branches to del */
8901 +       int max_branches;       /* max possible no. of branches */
8902 +       struct unionfs_data *new_data = NULL, *tmp_data = NULL;
8903 +       struct path *new_lower_paths = NULL, *tmp_lower_paths = NULL;
8904 +       struct inode **new_lower_inodes = NULL;
8905 +       int new_high_branch_id; /* new high branch ID */
8906 +       int size;               /* memory allocation size, temp var */
8907 +       int old_ibstart, old_ibend;
8908 +
8909 +       unionfs_write_lock(sb);
8910 +
8911 +       /*
8912 +        * The VFS will take care of "ro" and "rw" flags, and we can safely
8913 +        * ignore MS_SILENT, but anything else left over is an error.  So we
8914 +        * need to check if any other flags may have been passed (none are
8915 +        * allowed/supported as of now).
8916 +        */
8917 +       if ((*flags & ~(MS_RDONLY | MS_SILENT)) != 0) {
8918 +               printk(KERN_ERR
8919 +                      "unionfs: remount flags 0x%x unsupported\n", *flags);
8920 +               err = -EINVAL;
8921 +               goto out_error;
8922 +       }
8923 +
8924 +       /*
8925 +        * If 'options' is NULL, it's probably because the user just changed
8926 +        * the union to a "ro" or "rw" and the VFS took care of it.  So
8927 +        * nothing to do and we're done.
8928 +        */
8929 +       if (!options || options[0] == '\0')
8930 +               goto out_error;
8931 +
8932 +       /*
8933 +        * Find out how many branches we will have in the end, counting
8934 +        * "add" and "del" commands.  Copy the "options" string because
8935 +        * strsep modifies the string and we need it later.
8936 +        */
8937 +       tmp_to_free = kstrdup(options, GFP_KERNEL);
8938 +       optionstmp = tmp_to_free;
8939 +       if (unlikely(!optionstmp)) {
8940 +               err = -ENOMEM;
8941 +               goto out_free;
8942 +       }
8943 +       cur_branches = sbmax(sb); /* current no. branches */
8944 +       new_branches = sbmax(sb);
8945 +       del_branches = 0;
8946 +       add_branches = 0;
8947 +       new_high_branch_id = sbhbid(sb); /* save current high_branch_id */
8948 +       while ((optname = strsep(&optionstmp, ",")) != NULL) {
8949 +               char *optarg;
8950 +
8951 +               if (!optname || !*optname)
8952 +                       continue;
8953 +
8954 +               optarg = strchr(optname, '=');
8955 +               if (optarg)
8956 +                       *optarg++ = '\0';
8957 +
8958 +               if (!strcmp("add", optname))
8959 +                       add_branches++;
8960 +               else if (!strcmp("del", optname))
8961 +                       del_branches++;
8962 +       }
8963 +       kfree(tmp_to_free);
8964 +       /* after all changes, will we have at least one branch left? */
8965 +       if ((new_branches + add_branches - del_branches) < 1) {
8966 +               printk(KERN_ERR
8967 +                      "unionfs: no branches left after remount\n");
8968 +               err = -EINVAL;
8969 +               goto out_free;
8970 +       }
8971 +
8972 +       /*
8973 +        * Since we haven't actually parsed all the add/del options, nor
8974 +        * have we checked them for errors, we don't know for sure how many
8975 +        * branches we will have after all changes have taken place.  In
8976 +        * fact, the total number of branches left could be less than what
8977 +        * we have now.  So we need to allocate space for a temporary
8978 +        * placeholder that is at least as large as the maximum number of
8979 +        * branches we *could* have, which is the current number plus all
8980 +        * the additions.  Once we're done with these temp placeholders, we
8981 +        * may have to re-allocate the final size, copy over from the temp,
8982 +        * and then free the temps (done near the end of this function).
8983 +        */
8984 +       max_branches = cur_branches + add_branches;
8985 +       /* allocate space for new pointers to lower dentry */
8986 +       tmp_data = kcalloc(max_branches,
8987 +                          sizeof(struct unionfs_data), GFP_KERNEL);
8988 +       if (unlikely(!tmp_data)) {
8989 +               err = -ENOMEM;
8990 +               goto out_free;
8991 +       }
8992 +       /* allocate space for new pointers to lower paths */
8993 +       tmp_lower_paths = kcalloc(max_branches,
8994 +                                 sizeof(struct path), GFP_KERNEL);
8995 +       if (unlikely(!tmp_lower_paths)) {
8996 +               err = -ENOMEM;
8997 +               goto out_free;
8998 +       }
8999 +       /* copy current info into new placeholders, incrementing refcnts */
9000 +       memcpy(tmp_data, UNIONFS_SB(sb)->data,
9001 +              cur_branches * sizeof(struct unionfs_data));
9002 +       memcpy(tmp_lower_paths, UNIONFS_D(sb->s_root)->lower_paths,
9003 +              cur_branches * sizeof(struct path));
9004 +       for (i = 0; i < cur_branches; i++)
9005 +               path_get(&tmp_lower_paths[i]); /* drop refs at end of fxn */
9006 +
9007 +       /*******************************************************************
9008 +        * For each branch command, do path_lookup on the requested branch,
9009 +        * and apply the change to a temp branch list.  To handle errors, we
9010 +        * already dup'ed the old arrays (above), and increased the refcnts
9011 +        * on various f/s objects.  So now we can do all the path_lookups
9012 +        * and branch-management commands on the new arrays.  If it fail mid
9013 +        * way, we free the tmp arrays and *put all objects.  If we succeed,
9014 +        * then we free old arrays and *put its objects, and then replace
9015 +        * the arrays with the new tmp list (we may have to re-allocate the
9016 +        * memory because the temp lists could have been larger than what we
9017 +        * actually needed).
9018 +        *******************************************************************/
9019 +
9020 +       while ((optname = strsep(&options, ",")) != NULL) {
9021 +               char *optarg;
9022 +
9023 +               if (!optname || !*optname)
9024 +                       continue;
9025 +               /*
9026 +                * At this stage optname holds a comma-delimited option, but
9027 +                * without the commas.  Next, we need to break the string on
9028 +                * the '=' symbol to separate CMD=ARG, where ARG itself can
9029 +                * be KEY=VAL.  For example, in mode=/foo=rw, CMD is "mode",
9030 +                * KEY is "/foo", and VAL is "rw".
9031 +                */
9032 +               optarg = strchr(optname, '=');
9033 +               if (optarg)
9034 +                       *optarg++ = '\0';
9035 +               /* incgen remount option (instead of old ioctl) */
9036 +               if (!strcmp("incgen", optname)) {
9037 +                       err = 0;
9038 +                       goto out_no_change;
9039 +               }
9040 +
9041 +               /*
9042 +                * All of our options take an argument now.  (Insert ones
9043 +                * that don't above this check.)  So at this stage optname
9044 +                * contains the CMD part and optarg contains the ARG part.
9045 +                */
9046 +               if (!optarg || !*optarg) {
9047 +                       printk(KERN_ERR "unionfs: all remount options require "
9048 +                              "an argument (%s)\n", optname);
9049 +                       err = -EINVAL;
9050 +                       goto out_release;
9051 +               }
9052 +
9053 +               if (!strcmp("add", optname)) {
9054 +                       err = do_remount_add_option(optarg, new_branches,
9055 +                                                   tmp_data,
9056 +                                                   tmp_lower_paths,
9057 +                                                   &new_high_branch_id);
9058 +                       if (err)
9059 +                               goto out_release;
9060 +                       new_branches++;
9061 +                       if (new_branches > UNIONFS_MAX_BRANCHES) {
9062 +                               printk(KERN_ERR "unionfs: command exceeds "
9063 +                                      "%d branches\n", UNIONFS_MAX_BRANCHES);
9064 +                               err = -E2BIG;
9065 +                               goto out_release;
9066 +                       }
9067 +                       continue;
9068 +               }
9069 +               if (!strcmp("del", optname)) {
9070 +                       err = do_remount_del_option(optarg, new_branches,
9071 +                                                   tmp_data,
9072 +                                                   tmp_lower_paths);
9073 +                       if (err)
9074 +                               goto out_release;
9075 +                       new_branches--;
9076 +                       continue;
9077 +               }
9078 +               if (!strcmp("mode", optname)) {
9079 +                       err = do_remount_mode_option(optarg, new_branches,
9080 +                                                    tmp_data,
9081 +                                                    tmp_lower_paths);
9082 +                       if (err)
9083 +                               goto out_release;
9084 +                       continue;
9085 +               }
9086 +
9087 +               /*
9088 +                * When you use "mount -o remount,ro", mount(8) will
9089 +                * reportedly pass the original dirs= string from
9090 +                * /proc/mounts.  So for now, we have to ignore dirs= and
9091 +                * not consider it an error, unless we want to allow users
9092 +                * to pass dirs= in remount.  Note that to allow the VFS to
9093 +                * actually process the ro/rw remount options, we have to
9094 +                * return 0 from this function.
9095 +                */
9096 +               if (!strcmp("dirs", optname)) {
9097 +                       printk(KERN_WARNING
9098 +                              "unionfs: remount ignoring option \"%s\"\n",
9099 +                              optname);
9100 +                       continue;
9101 +               }
9102 +
9103 +               err = -EINVAL;
9104 +               printk(KERN_ERR
9105 +                      "unionfs: unrecognized option \"%s\"\n", optname);
9106 +               goto out_release;
9107 +       }
9108 +
9109 +out_no_change:
9110 +
9111 +       /******************************************************************
9112 +        * WE'RE ALMOST DONE: check if leftmost branch might be read-only,
9113 +        * see if we need to allocate a small-sized new vector, copy the
9114 +        * vectors to their correct place, release the refcnt of the older
9115 +        * ones, and return.  Also handle invalidating any pages that will
9116 +        * have to be re-read.
9117 +        *******************************************************************/
9118 +
9119 +       if (!(tmp_data[0].branchperms & MAY_WRITE)) {
9120 +               printk(KERN_ERR "unionfs: leftmost branch cannot be read-only "
9121 +                      "(use \"remount,ro\" to create a read-only union)\n");
9122 +               err = -EINVAL;
9123 +               goto out_release;
9124 +       }
9125 +
9126 +       /* (re)allocate space for new pointers to lower dentry */
9127 +       size = new_branches * sizeof(struct unionfs_data);
9128 +       new_data = krealloc(tmp_data, size, GFP_KERNEL);
9129 +       if (unlikely(!new_data)) {
9130 +               err = -ENOMEM;
9131 +               goto out_release;
9132 +       }
9133 +
9134 +       /* allocate space for new pointers to lower paths */
9135 +       size = new_branches * sizeof(struct path);
9136 +       new_lower_paths = krealloc(tmp_lower_paths, size, GFP_KERNEL);
9137 +       if (unlikely(!new_lower_paths)) {
9138 +               err = -ENOMEM;
9139 +               goto out_release;
9140 +       }
9141 +
9142 +       /* allocate space for new pointers to lower inodes */
9143 +       new_lower_inodes = kcalloc(new_branches,
9144 +                                  sizeof(struct inode *), GFP_KERNEL);
9145 +       if (unlikely(!new_lower_inodes)) {
9146 +               err = -ENOMEM;
9147 +               goto out_release;
9148 +       }
9149 +
9150 +       /*
9151 +        * OK, just before we actually put the new set of branches in place,
9152 +        * we need to ensure that our own f/s has no dirty objects left.
9153 +        * Luckily, do_remount_sb() already calls shrink_dcache_sb(sb) and
9154 +        * fsync_super(sb), taking care of dentries, inodes, and dirty
9155 +        * pages.  So all that's left is for us to invalidate any leftover
9156 +        * (non-dirty) pages to ensure that they will be re-read from the
9157 +        * new lower branches (and to support mmap).
9158 +        */
9159 +
9160 +       /*
9161 +        * Once we finish the remounting successfully, our superblock
9162 +        * generation number will have increased.  This will be detected by
9163 +        * our dentry-revalidation code upon subsequent f/s operations
9164 +        * through unionfs.  The revalidation code will rebuild the union of
9165 +        * lower inodes for a given unionfs inode and invalidate any pages
9166 +        * of such "stale" inodes (by calling our purge_inode_data
9167 +        * function).  This revalidation will happen lazily and
9168 +        * incrementally, as users perform operations on cached inodes.  We
9169 +        * would like to encourage this revalidation to happen sooner if
9170 +        * possible, so we like to try to invalidate as many other pages in
9171 +        * our superblock as we can.  We used to call drop_pagecache_sb() or
9172 +        * a variant thereof, but either method was racy (drop_caches alone
9173 +        * is known to be racy).  So now we let the revalidation happen on a
9174 +        * per file basis in ->d_revalidate.
9175 +        */
9176 +
9177 +       /* grab new lower super references; release old ones */
9178 +       for (i = 0; i < new_branches; i++)
9179 +               atomic_inc(&new_data[i].sb->s_active);
9180 +       for (i = 0; i < sbmax(sb); i++)
9181 +               atomic_dec(&UNIONFS_SB(sb)->data[i].sb->s_active);
9182 +
9183 +       /* copy new vectors into their correct place */
9184 +       tmp_data = UNIONFS_SB(sb)->data;
9185 +       UNIONFS_SB(sb)->data = new_data;
9186 +       new_data = NULL;        /* so don't free good pointers below */
9187 +       tmp_lower_paths = UNIONFS_D(sb->s_root)->lower_paths;
9188 +       UNIONFS_D(sb->s_root)->lower_paths = new_lower_paths;
9189 +       new_lower_paths = NULL; /* so don't free good pointers below */
9190 +
9191 +       /* update our unionfs_sb_info and root dentry index of last branch */
9192 +       i = sbmax(sb);          /* save no. of branches to release at end */
9193 +       sbend(sb) = new_branches - 1;
9194 +       dbend(sb->s_root) = new_branches - 1;
9195 +       old_ibstart = ibstart(sb->s_root->d_inode);
9196 +       old_ibend = ibend(sb->s_root->d_inode);
9197 +       ibend(sb->s_root->d_inode) = new_branches - 1;
9198 +       UNIONFS_D(sb->s_root)->bcount = new_branches;
9199 +       new_branches = i; /* no. of branches to release below */
9200 +
9201 +       /*
9202 +        * Update lower inodes: 3 steps
9203 +        * 1. grab ref on all new lower inodes
9204 +        */
9205 +       for (i = dbstart(sb->s_root); i <= dbend(sb->s_root); i++) {
9206 +               struct dentry *lower_dentry =
9207 +                       unionfs_lower_dentry_idx(sb->s_root, i);
9208 +               igrab(lower_dentry->d_inode);
9209 +               new_lower_inodes[i] = lower_dentry->d_inode;
9210 +       }
9211 +       /* 2. release reference on all older lower inodes */
9212 +       iput_lowers(sb->s_root->d_inode, old_ibstart, old_ibend, true);
9213 +       /* 3. update root dentry's inode to new lower_inodes array */
9214 +       UNIONFS_I(sb->s_root->d_inode)->lower_inodes = new_lower_inodes;
9215 +       new_lower_inodes = NULL;
9216 +
9217 +       /* maxbytes may have changed */
9218 +       sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes;
9219 +       /* update high branch ID */
9220 +       sbhbid(sb) = new_high_branch_id;
9221 +
9222 +       /* update our sb->generation for revalidating objects */
9223 +       i = atomic_inc_return(&UNIONFS_SB(sb)->generation);
9224 +       atomic_set(&UNIONFS_D(sb->s_root)->generation, i);
9225 +       atomic_set(&UNIONFS_I(sb->s_root->d_inode)->generation, i);
9226 +       if (!(*flags & MS_SILENT))
9227 +               pr_info("unionfs: %s: new generation number %d\n",
9228 +                       UNIONFS_SB(sb)->dev_name, i);
9229 +       /* finally, update the root dentry's times */
9230 +       unionfs_copy_attr_times(sb->s_root->d_inode);
9231 +       err = 0;                /* reset to success */
9232 +
9233 +       /*
9234 +        * The code above falls through to the next label, and releases the
9235 +        * refcnts of the older ones (stored in tmp_*): if we fell through
9236 +        * here, it means success.  However, if we jump directly to this
9237 +        * label from any error above, then an error occurred after we
9238 +        * grabbed various refcnts, and so we have to release the
9239 +        * temporarily constructed structures.
9240 +        */
9241 +out_release:
9242 +       /* no need to cleanup/release anything in tmp_data */
9243 +       if (tmp_lower_paths)
9244 +               for (i = 0; i < new_branches; i++)
9245 +                       path_put(&tmp_lower_paths[i]);
9246 +out_free:
9247 +       kfree(tmp_lower_paths);
9248 +       kfree(tmp_data);
9249 +       kfree(new_lower_paths);
9250 +       kfree(new_data);
9251 +       kfree(new_lower_inodes);
9252 +out_error:
9253 +       unionfs_check_dentry(sb->s_root);
9254 +       unionfs_write_unlock(sb);
9255 +       return err;
9256 +}
9257 +
9258 +/*
9259 + * Called by iput() when the inode reference count reached zero
9260 + * and the inode is not hashed anywhere.  Used to clear anything
9261 + * that needs to be, before the inode is completely destroyed and put
9262 + * on the inode free list.
9263 + *
9264 + * No need to lock sb info's rwsem.
9265 + */
9266 +static void unionfs_clear_inode(struct inode *inode)
9267 +{
9268 +       int bindex, bstart, bend;
9269 +       struct inode *lower_inode;
9270 +       struct list_head *pos, *n;
9271 +       struct unionfs_dir_state *rdstate;
9272 +
9273 +       list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
9274 +               rdstate = list_entry(pos, struct unionfs_dir_state, cache);
9275 +               list_del(&rdstate->cache);
9276 +               free_rdstate(rdstate);
9277 +       }
9278 +
9279 +       /*
9280 +        * Decrement a reference to a lower_inode, which was incremented
9281 +        * by our read_inode when it was created initially.
9282 +        */
9283 +       bstart = ibstart(inode);
9284 +       bend = ibend(inode);
9285 +       if (bstart >= 0) {
9286 +               for (bindex = bstart; bindex <= bend; bindex++) {
9287 +                       lower_inode = unionfs_lower_inode_idx(inode, bindex);
9288 +                       if (!lower_inode)
9289 +                               continue;
9290 +                       unionfs_set_lower_inode_idx(inode, bindex, NULL);
9291 +                       /* see Documentation/filesystems/unionfs/issues.txt */
9292 +                       lockdep_off();
9293 +                       iput(lower_inode);
9294 +                       lockdep_on();
9295 +               }
9296 +       }
9297 +
9298 +       kfree(UNIONFS_I(inode)->lower_inodes);
9299 +       UNIONFS_I(inode)->lower_inodes = NULL;
9300 +}
9301 +
9302 +static struct inode *unionfs_alloc_inode(struct super_block *sb)
9303 +{
9304 +       struct unionfs_inode_info *i;
9305 +
9306 +       i = kmem_cache_alloc(unionfs_inode_cachep, GFP_KERNEL);
9307 +       if (unlikely(!i))
9308 +               return NULL;
9309 +
9310 +       /* memset everything up to the inode to 0 */
9311 +       memset(i, 0, offsetof(struct unionfs_inode_info, vfs_inode));
9312 +
9313 +       i->vfs_inode.i_version = 1;
9314 +       return &i->vfs_inode;
9315 +}
9316 +
9317 +static void unionfs_destroy_inode(struct inode *inode)
9318 +{
9319 +       kmem_cache_free(unionfs_inode_cachep, UNIONFS_I(inode));
9320 +}
9321 +
9322 +/* unionfs inode cache constructor */
9323 +static void init_once(void *obj)
9324 +{
9325 +       struct unionfs_inode_info *i = obj;
9326 +
9327 +       inode_init_once(&i->vfs_inode);
9328 +}
9329 +
9330 +int unionfs_init_inode_cache(void)
9331 +{
9332 +       int err = 0;
9333 +
9334 +       unionfs_inode_cachep =
9335 +               kmem_cache_create("unionfs_inode_cache",
9336 +                                 sizeof(struct unionfs_inode_info), 0,
9337 +                                 SLAB_RECLAIM_ACCOUNT, init_once);
9338 +       if (unlikely(!unionfs_inode_cachep))
9339 +               err = -ENOMEM;
9340 +       return err;
9341 +}
9342 +
9343 +/* unionfs inode cache destructor */
9344 +void unionfs_destroy_inode_cache(void)
9345 +{
9346 +       if (unionfs_inode_cachep)
9347 +               kmem_cache_destroy(unionfs_inode_cachep);
9348 +}
9349 +
9350 +/*
9351 + * Called when we have a dirty inode, right here we only throw out
9352 + * parts of our readdir list that are too old.
9353 + *
9354 + * No need to grab sb info's rwsem.
9355 + */
9356 +static int unionfs_write_inode(struct inode *inode, int sync)
9357 +{
9358 +       struct list_head *pos, *n;
9359 +       struct unionfs_dir_state *rdstate;
9360 +
9361 +       spin_lock(&UNIONFS_I(inode)->rdlock);
9362 +       list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
9363 +               rdstate = list_entry(pos, struct unionfs_dir_state, cache);
9364 +               /* We keep this list in LRU order. */
9365 +               if ((rdstate->access + RDCACHE_JIFFIES) > jiffies)
9366 +                       break;
9367 +               UNIONFS_I(inode)->rdcount--;
9368 +               list_del(&rdstate->cache);
9369 +               free_rdstate(rdstate);
9370 +       }
9371 +       spin_unlock(&UNIONFS_I(inode)->rdlock);
9372 +
9373 +       return 0;
9374 +}
9375 +
9376 +/*
9377 + * Used only in nfs, to kill any pending RPC tasks, so that subsequent
9378 + * code can actually succeed and won't leave tasks that need handling.
9379 + */
9380 +static void unionfs_umount_begin(struct super_block *sb)
9381 +{
9382 +       struct super_block *lower_sb;
9383 +       int bindex, bstart, bend;
9384 +
9385 +       unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD);
9386 +
9387 +       bstart = sbstart(sb);
9388 +       bend = sbend(sb);
9389 +       for (bindex = bstart; bindex <= bend; bindex++) {
9390 +               lower_sb = unionfs_lower_super_idx(sb, bindex);
9391 +
9392 +               if (lower_sb && lower_sb->s_op &&
9393 +                   lower_sb->s_op->umount_begin)
9394 +                       lower_sb->s_op->umount_begin(lower_sb);
9395 +       }
9396 +
9397 +       unionfs_read_unlock(sb);
9398 +}
9399 +
9400 +static int unionfs_show_options(struct seq_file *m, struct vfsmount *mnt)
9401 +{
9402 +       struct super_block *sb = mnt->mnt_sb;
9403 +       int ret = 0;
9404 +       char *tmp_page;
9405 +       char *path;
9406 +       int bindex, bstart, bend;
9407 +       int perms;
9408 +
9409 +       unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD);
9410 +
9411 +       unionfs_lock_dentry(sb->s_root, UNIONFS_DMUTEX_CHILD);
9412 +
9413 +       tmp_page = (char *) __get_free_page(GFP_KERNEL);
9414 +       if (unlikely(!tmp_page)) {
9415 +               ret = -ENOMEM;
9416 +               goto out;
9417 +       }
9418 +
9419 +       bstart = sbstart(sb);
9420 +       bend = sbend(sb);
9421 +
9422 +       seq_printf(m, ",dirs=");
9423 +       for (bindex = bstart; bindex <= bend; bindex++) {
9424 +               struct path p;
9425 +               p.dentry = unionfs_lower_dentry_idx(sb->s_root, bindex);
9426 +               p.mnt = unionfs_lower_mnt_idx(sb->s_root, bindex);
9427 +               path = d_path(&p, tmp_page, PAGE_SIZE);
9428 +               if (IS_ERR(path)) {
9429 +                       ret = PTR_ERR(path);
9430 +                       goto out;
9431 +               }
9432 +
9433 +               perms = branchperms(sb, bindex);
9434 +
9435 +               seq_printf(m, "%s=%s", path,
9436 +                          perms & MAY_WRITE ? "rw" : "ro");
9437 +               if (bindex != bend)
9438 +                       seq_printf(m, ":");
9439 +       }
9440 +
9441 +out:
9442 +       free_page((unsigned long) tmp_page);
9443 +
9444 +       unionfs_unlock_dentry(sb->s_root);
9445 +
9446 +       unionfs_read_unlock(sb);
9447 +
9448 +       return ret;
9449 +}
9450 +
9451 +struct super_operations unionfs_sops = {
9452 +       .delete_inode   = unionfs_delete_inode,
9453 +       .put_super      = unionfs_put_super,
9454 +       .statfs         = unionfs_statfs,
9455 +       .remount_fs     = unionfs_remount_fs,
9456 +       .clear_inode    = unionfs_clear_inode,
9457 +       .umount_begin   = unionfs_umount_begin,
9458 +       .show_options   = unionfs_show_options,
9459 +       .write_inode    = unionfs_write_inode,
9460 +       .alloc_inode    = unionfs_alloc_inode,
9461 +       .destroy_inode  = unionfs_destroy_inode,
9462 +};
9463 --- /dev/null
9464 +++ kernel-2.6.28/fs/unionfs/union.h
9465 @@ -0,0 +1,659 @@
9466 +/*
9467 + * Copyright (c) 2003-2009 Erez Zadok
9468 + * Copyright (c) 2003-2006 Charles P. Wright
9469 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
9470 + * Copyright (c) 2005      Arun M. Krishnakumar
9471 + * Copyright (c) 2004-2006 David P. Quigley
9472 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
9473 + * Copyright (c) 2003      Puja Gupta
9474 + * Copyright (c) 2003      Harikesavan Krishnan
9475 + * Copyright (c) 2003-2009 Stony Brook University
9476 + * Copyright (c) 2003-2009 The Research Foundation of SUNY
9477 + *
9478 + * This program is free software; you can redistribute it and/or modify
9479 + * it under the terms of the GNU General Public License version 2 as
9480 + * published by the Free Software Foundation.
9481 + */
9482 +
9483 +#ifndef _UNION_H_
9484 +#define _UNION_H_
9485 +
9486 +#include <linux/dcache.h>
9487 +#include <linux/file.h>
9488 +#include <linux/list.h>
9489 +#include <linux/fs.h>
9490 +#include <linux/mm.h>
9491 +#include <linux/module.h>
9492 +#include <linux/mount.h>
9493 +#include <linux/namei.h>
9494 +#include <linux/page-flags.h>
9495 +#include <linux/pagemap.h>
9496 +#include <linux/poll.h>
9497 +#include <linux/security.h>
9498 +#include <linux/seq_file.h>
9499 +#include <linux/slab.h>
9500 +#include <linux/spinlock.h>
9501 +#include <linux/smp_lock.h>
9502 +#include <linux/statfs.h>
9503 +#include <linux/string.h>
9504 +#include <linux/vmalloc.h>
9505 +#include <linux/writeback.h>
9506 +#include <linux/buffer_head.h>
9507 +#include <linux/xattr.h>
9508 +#include <linux/fs_stack.h>
9509 +#include <linux/magic.h>
9510 +#include <linux/log2.h>
9511 +#include <linux/poison.h>
9512 +#include <linux/mman.h>
9513 +#include <linux/backing-dev.h>
9514 +#include <linux/splice.h>
9515 +
9516 +#include <asm/system.h>
9517 +
9518 +#include <linux/union_fs.h>
9519 +
9520 +/* the file system name */
9521 +#define UNIONFS_NAME "unionfs"
9522 +
9523 +/* unionfs root inode number */
9524 +#define UNIONFS_ROOT_INO     1
9525 +
9526 +/* number of times we try to get a unique temporary file name */
9527 +#define GET_TMPNAM_MAX_RETRY   5
9528 +
9529 +/* maximum number of branches we support, to avoid memory blowup */
9530 +#define UNIONFS_MAX_BRANCHES   128
9531 +
9532 +/* minimum time (seconds) required for time-based cache-coherency */
9533 +#define UNIONFS_MIN_CC_TIME    3
9534 +
9535 +/* Operations vectors defined in specific files. */
9536 +extern struct file_operations unionfs_main_fops;
9537 +extern struct file_operations unionfs_dir_fops;
9538 +extern struct inode_operations unionfs_main_iops;
9539 +extern struct inode_operations unionfs_dir_iops;
9540 +extern struct inode_operations unionfs_symlink_iops;
9541 +extern struct super_operations unionfs_sops;
9542 +extern struct dentry_operations unionfs_dops;
9543 +extern struct address_space_operations unionfs_aops, unionfs_dummy_aops;
9544 +extern struct vm_operations_struct unionfs_vm_ops;
9545 +
9546 +/* How long should an entry be allowed to persist */
9547 +#define RDCACHE_JIFFIES        (5*HZ)
9548 +
9549 +/* compatibility with Real-Time patches */
9550 +#ifdef CONFIG_PREEMPT_RT
9551 +# define unionfs_rw_semaphore  compat_rw_semaphore
9552 +#else /* not CONFIG_PREEMPT_RT */
9553 +# define unionfs_rw_semaphore  rw_semaphore
9554 +#endif /* not CONFIG_PREEMPT_RT */
9555 +
9556 +/* file private data. */
9557 +struct unionfs_file_info {
9558 +       int bstart;
9559 +       int bend;
9560 +       atomic_t generation;
9561 +
9562 +       struct unionfs_dir_state *rdstate;
9563 +       struct file **lower_files;
9564 +       int *saved_branch_ids; /* IDs of branches when file was opened */
9565 +       struct vm_operations_struct *lower_vm_ops;
9566 +       bool wrote_to_file;     /* for delayed copyup */
9567 +};
9568 +
9569 +/* unionfs inode data in memory */
9570 +struct unionfs_inode_info {
9571 +       int bstart;
9572 +       int bend;
9573 +       atomic_t generation;
9574 +       /* Stuff for readdir over NFS. */
9575 +       spinlock_t rdlock;
9576 +       struct list_head readdircache;
9577 +       int rdcount;
9578 +       int hashsize;
9579 +       int cookie;
9580 +
9581 +       /* The lower inodes */
9582 +       struct inode **lower_inodes;
9583 +
9584 +       struct inode vfs_inode;
9585 +};
9586 +
9587 +/* unionfs dentry data in memory */
9588 +struct unionfs_dentry_info {
9589 +       /*
9590 +        * The semaphore is used to lock the dentry as soon as we get into a
9591 +        * unionfs function from the VFS.  Our lock ordering is that children
9592 +        * go before their parents.
9593 +        */
9594 +       struct mutex lock;
9595 +       int bstart;
9596 +       int bend;
9597 +       int bopaque;
9598 +       int bcount;
9599 +       atomic_t generation;
9600 +       struct path *lower_paths;
9601 +};
9602 +
9603 +/* These are the pointers to our various objects. */
9604 +struct unionfs_data {
9605 +       struct super_block *sb; /* lower super_block */
9606 +       atomic_t open_files;    /* number of open files on branch */
9607 +       int branchperms;
9608 +       int branch_id;          /* unique branch ID at re/mount time */
9609 +};
9610 +
9611 +/* unionfs super-block data in memory */
9612 +struct unionfs_sb_info {
9613 +       int bend;
9614 +
9615 +       atomic_t generation;
9616 +
9617 +       /*
9618 +        * This rwsem is used to make sure that a branch management
9619 +        * operation...
9620 +        *   1) will not begin before all currently in-flight operations
9621 +        *      complete.
9622 +        *   2) any new operations do not execute until the currently
9623 +        *      running branch management operation completes.
9624 +        *
9625 +        * The write_lock_owner records the PID of the task which grabbed
9626 +        * the rw_sem for writing.  If the same task also tries to grab the
9627 +        * read lock, we allow it.  This prevents a self-deadlock when
9628 +        * branch-management is used on a pivot_root'ed union, because we
9629 +        * have to ->lookup paths which belong to the same union.
9630 +        */
9631 +       struct unionfs_rw_semaphore rwsem;
9632 +       pid_t write_lock_owner; /* PID of rw_sem owner (write lock) */
9633 +       int high_branch_id;     /* last unique branch ID given */
9634 +       char *dev_name;         /* to identify different unions in pr_debug */
9635 +       struct unionfs_data *data;
9636 +};
9637 +
9638 +/*
9639 + * structure for making the linked list of entries by readdir on left branch
9640 + * to compare with entries on right branch
9641 + */
9642 +struct filldir_node {
9643 +       struct list_head file_list;     /* list for directory entries */
9644 +       char *name;             /* name entry */
9645 +       int hash;               /* name hash */
9646 +       int namelen;            /* name len since name is not 0 terminated */
9647 +
9648 +       /*
9649 +        * we can check for duplicate whiteouts and files in the same branch
9650 +        * in order to return -EIO.
9651 +        */
9652 +       int bindex;
9653 +
9654 +       /* is this a whiteout entry? */
9655 +       int whiteout;
9656 +
9657 +       /* Inline name, so we don't need to separately kmalloc small ones */
9658 +       char iname[DNAME_INLINE_LEN_MIN];
9659 +};
9660 +
9661 +/* Directory hash table. */
9662 +struct unionfs_dir_state {
9663 +       unsigned int cookie;    /* the cookie, based off of rdversion */
9664 +       unsigned int offset;    /* The entry we have returned. */
9665 +       int bindex;
9666 +       loff_t dirpos;          /* offset within the lower level directory */
9667 +       int size;               /* How big is the hash table? */
9668 +       int hashentries;        /* How many entries have been inserted? */
9669 +       unsigned long access;
9670 +
9671 +       /* This cache list is used when the inode keeps us around. */
9672 +       struct list_head cache;
9673 +       struct list_head list[0];
9674 +};
9675 +
9676 +/* externs needed for fanout.h or sioq.h */
9677 +extern int unionfs_get_nlinks(const struct inode *inode);
9678 +extern void unionfs_copy_attr_times(struct inode *upper);
9679 +extern void unionfs_copy_attr_all(struct inode *dest, const struct inode *src);
9680 +
9681 +/* include miscellaneous macros */
9682 +#include "fanout.h"
9683 +#include "sioq.h"
9684 +
9685 +/* externs for cache creation/deletion routines */
9686 +extern void unionfs_destroy_filldir_cache(void);
9687 +extern int unionfs_init_filldir_cache(void);
9688 +extern int unionfs_init_inode_cache(void);
9689 +extern void unionfs_destroy_inode_cache(void);
9690 +extern int unionfs_init_dentry_cache(void);
9691 +extern void unionfs_destroy_dentry_cache(void);
9692 +
9693 +/* Initialize and free readdir-specific  state. */
9694 +extern int init_rdstate(struct file *file);
9695 +extern struct unionfs_dir_state *alloc_rdstate(struct inode *inode,
9696 +                                              int bindex);
9697 +extern struct unionfs_dir_state *find_rdstate(struct inode *inode,
9698 +                                             loff_t fpos);
9699 +extern void free_rdstate(struct unionfs_dir_state *state);
9700 +extern int add_filldir_node(struct unionfs_dir_state *rdstate,
9701 +                           const char *name, int namelen, int bindex,
9702 +                           int whiteout);
9703 +extern struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate,
9704 +                                             const char *name, int namelen,
9705 +                                             int is_whiteout);
9706 +
9707 +extern struct dentry **alloc_new_dentries(int objs);
9708 +extern struct unionfs_data *alloc_new_data(int objs);
9709 +
9710 +/* We can only use 32-bits of offset for rdstate --- blech! */
9711 +#define DIREOF (0xfffff)
9712 +#define RDOFFBITS 20           /* This is the number of bits in DIREOF. */
9713 +#define MAXRDCOOKIE (0xfff)
9714 +/* Turn an rdstate into an offset. */
9715 +static inline off_t rdstate2offset(struct unionfs_dir_state *buf)
9716 +{
9717 +       off_t tmp;
9718 +
9719 +       tmp = ((buf->cookie & MAXRDCOOKIE) << RDOFFBITS)
9720 +               | (buf->offset & DIREOF);
9721 +       return tmp;
9722 +}
9723 +
9724 +/* Macros for locking a super_block. */
9725 +enum unionfs_super_lock_class {
9726 +       UNIONFS_SMUTEX_NORMAL,
9727 +       UNIONFS_SMUTEX_PARENT,  /* when locking on behalf of file */
9728 +       UNIONFS_SMUTEX_CHILD,   /* when locking on behalf of dentry */
9729 +};
9730 +static inline void unionfs_read_lock(struct super_block *sb, int subclass)
9731 +{
9732 +       if (UNIONFS_SB(sb)->write_lock_owner &&
9733 +           UNIONFS_SB(sb)->write_lock_owner == current->pid)
9734 +               return;
9735 +       down_read_nested(&UNIONFS_SB(sb)->rwsem, subclass);
9736 +}
9737 +static inline void unionfs_read_unlock(struct super_block *sb)
9738 +{
9739 +       if (UNIONFS_SB(sb)->write_lock_owner &&
9740 +           UNIONFS_SB(sb)->write_lock_owner == current->pid)
9741 +               return;
9742 +       up_read(&UNIONFS_SB(sb)->rwsem);
9743 +}
9744 +static inline void unionfs_write_lock(struct super_block *sb)
9745 +{
9746 +       down_write(&UNIONFS_SB(sb)->rwsem);
9747 +       UNIONFS_SB(sb)->write_lock_owner = current->pid;
9748 +}
9749 +static inline void unionfs_write_unlock(struct super_block *sb)
9750 +{
9751 +       up_write(&UNIONFS_SB(sb)->rwsem);
9752 +       UNIONFS_SB(sb)->write_lock_owner = 0;
9753 +}
9754 +
9755 +static inline void unionfs_double_lock_dentry(struct dentry *d1,
9756 +                                             struct dentry *d2)
9757 +{
9758 +       BUG_ON(d1 == d2);
9759 +       if (d1 < d2) {
9760 +               unionfs_lock_dentry(d1, UNIONFS_DMUTEX_PARENT);
9761 +               unionfs_lock_dentry(d2, UNIONFS_DMUTEX_CHILD);
9762 +       } else {
9763 +               unionfs_lock_dentry(d2, UNIONFS_DMUTEX_PARENT);
9764 +               unionfs_lock_dentry(d1, UNIONFS_DMUTEX_CHILD);
9765 +       }
9766 +}
9767 +
9768 +static inline void unionfs_double_unlock_dentry(struct dentry *d1,
9769 +                                               struct dentry *d2)
9770 +{
9771 +       BUG_ON(d1 == d2);
9772 +       if (d1 < d2) { /* unlock in reverse order than double_lock_dentry */
9773 +               unionfs_unlock_dentry(d1);
9774 +               unionfs_unlock_dentry(d2);
9775 +       } else {
9776 +               unionfs_unlock_dentry(d2);
9777 +               unionfs_unlock_dentry(d1);
9778 +       }
9779 +}
9780 +
9781 +static inline void unionfs_double_lock_parents(struct dentry *p1,
9782 +                                              struct dentry *p2)
9783 +{
9784 +       if (p1 == p2) {
9785 +               unionfs_lock_dentry(p1, UNIONFS_DMUTEX_REVAL_PARENT);
9786 +               return;
9787 +       }
9788 +       if (p1 < p2) {
9789 +               unionfs_lock_dentry(p1, UNIONFS_DMUTEX_REVAL_PARENT);
9790 +               unionfs_lock_dentry(p2, UNIONFS_DMUTEX_REVAL_CHILD);
9791 +       } else {
9792 +               unionfs_lock_dentry(p2, UNIONFS_DMUTEX_REVAL_PARENT);
9793 +               unionfs_lock_dentry(p1, UNIONFS_DMUTEX_REVAL_CHILD);
9794 +       }
9795 +}
9796 +
9797 +static inline void unionfs_double_unlock_parents(struct dentry *p1,
9798 +                                                struct dentry *p2)
9799 +{
9800 +       if (p1 == p2) {
9801 +               unionfs_unlock_dentry(p1);
9802 +               return;
9803 +       }
9804 +       if (p1 < p2) { /* unlock in reverse order of double_lock_parents */
9805 +               unionfs_unlock_dentry(p1);
9806 +               unionfs_unlock_dentry(p2);
9807 +       } else {
9808 +               unionfs_unlock_dentry(p2);
9809 +               unionfs_unlock_dentry(p1);
9810 +       }
9811 +}
9812 +
9813 +extern int new_dentry_private_data(struct dentry *dentry, int subclass);
9814 +extern int realloc_dentry_private_data(struct dentry *dentry);
9815 +extern void free_dentry_private_data(struct dentry *dentry);
9816 +extern void update_bstart(struct dentry *dentry);
9817 +extern int init_lower_nd(struct nameidata *nd, unsigned int flags);
9818 +extern void release_lower_nd(struct nameidata *nd, int err);
9819 +
9820 +/*
9821 + * EXTERNALS:
9822 + */
9823 +
9824 +/* replicates the directory structure up to given dentry in given branch */
9825 +extern struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
9826 +                                    const char *name, int bindex);
9827 +
9828 +/* partial lookup */
9829 +extern int unionfs_partial_lookup(struct dentry *dentry,
9830 +                                 struct dentry *parent);
9831 +extern struct dentry *unionfs_lookup_full(struct dentry *dentry,
9832 +                                         struct dentry *parent,
9833 +                                         int lookupmode);
9834 +
9835 +/* copies a file from dbstart to newbindex branch */
9836 +extern int copyup_file(struct inode *dir, struct file *file, int bstart,
9837 +                      int newbindex, loff_t size);
9838 +extern int copyup_named_file(struct inode *dir, struct file *file,
9839 +                            char *name, int bstart, int new_bindex,
9840 +                            loff_t len);
9841 +/* copies a dentry from dbstart to newbindex branch */
9842 +extern int copyup_dentry(struct inode *dir, struct dentry *dentry,
9843 +                        int bstart, int new_bindex, const char *name,
9844 +                        int namelen, struct file **copyup_file, loff_t len);
9845 +/* helper functions for post-copyup actions */
9846 +extern void unionfs_postcopyup_setmnt(struct dentry *dentry);
9847 +extern void unionfs_postcopyup_release(struct dentry *dentry);
9848 +
9849 +/* Is this directory empty: 0 if it is empty, -ENOTEMPTY if not. */
9850 +extern int check_empty(struct dentry *dentry, struct dentry *parent,
9851 +                      struct unionfs_dir_state **namelist);
9852 +/* whiteout and opaque directory helpers */
9853 +extern char *alloc_whname(const char *name, int len);
9854 +extern bool is_whiteout_name(char **namep, int *namelenp);
9855 +extern bool is_validname(const char *name);
9856 +extern struct dentry *lookup_whiteout(const char *name,
9857 +                                     struct dentry *lower_parent);
9858 +extern struct dentry *find_first_whiteout(struct dentry *dentry);
9859 +extern int unlink_whiteout(struct dentry *wh_dentry);
9860 +extern int check_unlink_whiteout(struct dentry *dentry,
9861 +                                struct dentry *lower_dentry, int bindex);
9862 +extern int create_whiteout(struct dentry *dentry, int start);
9863 +extern int delete_whiteouts(struct dentry *dentry, int bindex,
9864 +                           struct unionfs_dir_state *namelist);
9865 +extern int is_opaque_dir(struct dentry *dentry, int bindex);
9866 +extern int make_dir_opaque(struct dentry *dir, int bindex);
9867 +extern void unionfs_set_max_namelen(long *namelen);
9868 +
9869 +extern void unionfs_reinterpose(struct dentry *this_dentry);
9870 +extern struct super_block *unionfs_duplicate_super(struct super_block *sb);
9871 +
9872 +/* Locking functions. */
9873 +extern int unionfs_setlk(struct file *file, int cmd, struct file_lock *fl);
9874 +extern int unionfs_getlk(struct file *file, struct file_lock *fl);
9875 +
9876 +/* Common file operations. */
9877 +extern int unionfs_file_revalidate(struct file *file, struct dentry *parent,
9878 +                                  bool willwrite);
9879 +extern int unionfs_open(struct inode *inode, struct file *file);
9880 +extern int unionfs_file_release(struct inode *inode, struct file *file);
9881 +extern int unionfs_flush(struct file *file, fl_owner_t id);
9882 +extern long unionfs_ioctl(struct file *file, unsigned int cmd,
9883 +                         unsigned long arg);
9884 +extern int unionfs_fsync(struct file *file, struct dentry *dentry,
9885 +                        int datasync);
9886 +extern int unionfs_fasync(int fd, struct file *file, int flag);
9887 +
9888 +/* Inode operations */
9889 +extern struct inode *unionfs_iget(struct super_block *sb, unsigned long ino);
9890 +extern int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
9891 +                         struct inode *new_dir, struct dentry *new_dentry);
9892 +extern int unionfs_unlink(struct inode *dir, struct dentry *dentry);
9893 +extern int unionfs_rmdir(struct inode *dir, struct dentry *dentry);
9894 +
9895 +extern bool __unionfs_d_revalidate(struct dentry *dentry,
9896 +                                  struct dentry *parent, bool willwrite);
9897 +extern bool is_negative_lower(const struct dentry *dentry);
9898 +extern bool is_newer_lower(const struct dentry *dentry);
9899 +extern void purge_sb_data(struct super_block *sb);
9900 +
9901 +/* The values for unionfs_interpose's flag. */
9902 +#define INTERPOSE_DEFAULT      0
9903 +#define INTERPOSE_LOOKUP       1
9904 +#define INTERPOSE_REVAL                2
9905 +#define INTERPOSE_REVAL_NEG    3
9906 +#define INTERPOSE_PARTIAL      4
9907 +
9908 +extern struct dentry *unionfs_interpose(struct dentry *this_dentry,
9909 +                                       struct super_block *sb, int flag);
9910 +
9911 +#ifdef CONFIG_UNION_FS_XATTR
9912 +/* Extended attribute functions. */
9913 +extern void *unionfs_xattr_alloc(size_t size, size_t limit);
9914 +static inline void unionfs_xattr_kfree(const void *p)
9915 +{
9916 +       kfree(p);
9917 +}
9918 +extern ssize_t unionfs_getxattr(struct dentry *dentry, const char *name,
9919 +                               void *value, size_t size);
9920 +extern int unionfs_removexattr(struct dentry *dentry, const char *name);
9921 +extern ssize_t unionfs_listxattr(struct dentry *dentry, char *list,
9922 +                                size_t size);
9923 +extern int unionfs_setxattr(struct dentry *dentry, const char *name,
9924 +                           const void *value, size_t size, int flags);
9925 +#endif /* CONFIG_UNION_FS_XATTR */
9926 +
9927 +/* The root directory is unhashed, but isn't deleted. */
9928 +static inline int d_deleted(struct dentry *d)
9929 +{
9930 +       return d_unhashed(d) && (d != d->d_sb->s_root);
9931 +}
9932 +
9933 +/* unionfs_permission, check if we should bypass error to facilitate copyup */
9934 +#define IS_COPYUP_ERR(err) ((err) == -EROFS)
9935 +
9936 +/* unionfs_open, check if we need to copyup the file */
9937 +#define OPEN_WRITE_FLAGS (O_WRONLY | O_RDWR | O_APPEND)
9938 +#define IS_WRITE_FLAG(flag) ((flag) & OPEN_WRITE_FLAGS)
9939 +
9940 +static inline int branchperms(const struct super_block *sb, int index)
9941 +{
9942 +       BUG_ON(index < 0);
9943 +       return UNIONFS_SB(sb)->data[index].branchperms;
9944 +}
9945 +
9946 +static inline int set_branchperms(struct super_block *sb, int index, int perms)
9947 +{
9948 +       BUG_ON(index < 0);
9949 +       UNIONFS_SB(sb)->data[index].branchperms = perms;
9950 +       return perms;
9951 +}
9952 +
9953 +/* check if readonly lower inode, but possibly unlinked (no inode->i_sb) */
9954 +static inline int __is_rdonly(const struct inode *inode)
9955 +{
9956 +       /* if unlinked, can't be readonly (?) */
9957 +       if (!inode->i_sb)
9958 +               return 0;
9959 +       return IS_RDONLY(inode);
9960 +
9961 +}
9962 +/* Is this file on a read-only branch? */
9963 +static inline int is_robranch_super(const struct super_block *sb, int index)
9964 +{
9965 +       int ret;
9966 +
9967 +       ret = (!(branchperms(sb, index) & MAY_WRITE)) ? -EROFS : 0;
9968 +       return ret;
9969 +}
9970 +
9971 +/* Is this file on a read-only branch? */
9972 +static inline int is_robranch_idx(const struct dentry *dentry, int index)
9973 +{
9974 +       struct super_block *lower_sb;
9975 +
9976 +       BUG_ON(index < 0);
9977 +
9978 +       if (!(branchperms(dentry->d_sb, index) & MAY_WRITE))
9979 +               return -EROFS;
9980 +
9981 +       lower_sb = unionfs_lower_super_idx(dentry->d_sb, index);
9982 +       BUG_ON(lower_sb == NULL);
9983 +       /*
9984 +        * test sb flags directly, not IS_RDONLY(lower_inode) because the
9985 +        * lower_dentry could be a negative.
9986 +        */
9987 +       if (lower_sb->s_flags & MS_RDONLY)
9988 +               return -EROFS;
9989 +
9990 +       return 0;
9991 +}
9992 +
9993 +static inline int is_robranch(const struct dentry *dentry)
9994 +{
9995 +       int index;
9996 +
9997 +       index = UNIONFS_D(dentry)->bstart;
9998 +       BUG_ON(index < 0);
9999 +
10000 +       return is_robranch_idx(dentry, index);
10001 +}
10002 +
10003 +/*
10004 + * EXTERNALS:
10005 + */
10006 +extern int check_branch(struct nameidata *nd);
10007 +extern int parse_branch_mode(const char *name, int *perms);
10008 +
10009 +/* locking helpers */
10010 +static inline struct dentry *lock_parent(struct dentry *dentry)
10011 +{
10012 +       struct dentry *dir = dget_parent(dentry);
10013 +       mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
10014 +       return dir;
10015 +}
10016 +static inline struct dentry *lock_parent_wh(struct dentry *dentry)
10017 +{
10018 +       struct dentry *dir = dget_parent(dentry);
10019 +
10020 +       mutex_lock_nested(&dir->d_inode->i_mutex, UNIONFS_DMUTEX_WHITEOUT);
10021 +       return dir;
10022 +}
10023 +
10024 +static inline void unlock_dir(struct dentry *dir)
10025 +{
10026 +       mutex_unlock(&dir->d_inode->i_mutex);
10027 +       dput(dir);
10028 +}
10029 +
10030 +static inline struct vfsmount *unionfs_mntget(struct dentry *dentry,
10031 +                                             int bindex)
10032 +{
10033 +       struct vfsmount *mnt;
10034 +
10035 +       BUG_ON(!dentry || bindex < 0);
10036 +
10037 +       mnt = mntget(unionfs_lower_mnt_idx(dentry, bindex));
10038 +#ifdef CONFIG_UNION_FS_DEBUG
10039 +       if (!mnt)
10040 +               pr_debug("unionfs: mntget: mnt=%p bindex=%d\n",
10041 +                        mnt, bindex);
10042 +#endif /* CONFIG_UNION_FS_DEBUG */
10043 +
10044 +       return mnt;
10045 +}
10046 +
10047 +static inline void unionfs_mntput(struct dentry *dentry, int bindex)
10048 +{
10049 +       struct vfsmount *mnt;
10050 +
10051 +       if (!dentry && bindex < 0)
10052 +               return;
10053 +       BUG_ON(!dentry || bindex < 0);
10054 +
10055 +       mnt = unionfs_lower_mnt_idx(dentry, bindex);
10056 +#ifdef CONFIG_UNION_FS_DEBUG
10057 +       /*
10058 +        * Directories can have NULL lower objects in between start/end, but
10059 +        * NOT if at the start/end range.  We cannot verify that this dentry
10060 +        * is a type=DIR, because it may already be a negative dentry.  But
10061 +        * if dbstart is greater than dbend, we know that this couldn't have
10062 +        * been a regular file: it had to have been a directory.
10063 +        */
10064 +       if (!mnt && !(bindex > dbstart(dentry) && bindex < dbend(dentry)))
10065 +               pr_debug("unionfs: mntput: mnt=%p bindex=%d\n", mnt, bindex);
10066 +#endif /* CONFIG_UNION_FS_DEBUG */
10067 +       mntput(mnt);
10068 +}
10069 +
10070 +#ifdef CONFIG_UNION_FS_DEBUG
10071 +
10072 +/* useful for tracking code reachability */
10073 +#define UDBG pr_debug("DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__)
10074 +
10075 +#define unionfs_check_inode(i) __unionfs_check_inode((i),      \
10076 +       __FILE__, __func__, __LINE__)
10077 +#define unionfs_check_dentry(d)        __unionfs_check_dentry((d),     \
10078 +       __FILE__, __func__, __LINE__)
10079 +#define unionfs_check_file(f)  __unionfs_check_file((f),       \
10080 +       __FILE__, __func__, __LINE__)
10081 +#define unionfs_check_nd(n)    __unionfs_check_nd((n),         \
10082 +       __FILE__, __func__, __LINE__)
10083 +#define show_branch_counts(sb) __show_branch_counts((sb),      \
10084 +       __FILE__, __func__, __LINE__)
10085 +#define show_inode_times(i)    __show_inode_times((i),         \
10086 +       __FILE__, __func__, __LINE__)
10087 +#define show_dinode_times(d)   __show_dinode_times((d),        \
10088 +       __FILE__, __func__, __LINE__)
10089 +#define show_inode_counts(i)   __show_inode_counts((i),        \
10090 +       __FILE__, __func__, __LINE__)
10091 +
10092 +extern void __unionfs_check_inode(const struct inode *inode, const char *fname,
10093 +                                 const char *fxn, int line);
10094 +extern void __unionfs_check_dentry(const struct dentry *dentry,
10095 +                                  const char *fname, const char *fxn,
10096 +                                  int line);
10097 +extern void __unionfs_check_file(const struct file *file,
10098 +                                const char *fname, const char *fxn, int line);
10099 +extern void __unionfs_check_nd(const struct nameidata *nd,
10100 +                              const char *fname, const char *fxn, int line);
10101 +extern void __show_branch_counts(const struct super_block *sb,
10102 +                                const char *file, const char *fxn, int line);
10103 +extern void __show_inode_times(const struct inode *inode,
10104 +                              const char *file, const char *fxn, int line);
10105 +extern void __show_dinode_times(const struct dentry *dentry,
10106 +                               const char *file, const char *fxn, int line);
10107 +extern void __show_inode_counts(const struct inode *inode,
10108 +                               const char *file, const char *fxn, int line);
10109 +
10110 +#else /* not CONFIG_UNION_FS_DEBUG */
10111 +
10112 +/* we leave useful hooks for these check functions throughout the code */
10113 +#define unionfs_check_inode(i)         do { } while (0)
10114 +#define unionfs_check_dentry(d)                do { } while (0)
10115 +#define unionfs_check_file(f)          do { } while (0)
10116 +#define unionfs_check_nd(n)            do { } while (0)
10117 +#define show_branch_counts(sb)         do { } while (0)
10118 +#define show_inode_times(i)            do { } while (0)
10119 +#define show_dinode_times(d)           do { } while (0)
10120 +#define show_inode_counts(i)           do { } while (0)
10121 +
10122 +#endif /* not CONFIG_UNION_FS_DEBUG */
10123 +
10124 +#endif /* not _UNION_H_ */
10125 --- /dev/null
10126 +++ kernel-2.6.28/fs/unionfs/unlink.c
10127 @@ -0,0 +1,282 @@
10128 +/*
10129 + * Copyright (c) 2003-2009 Erez Zadok
10130 + * Copyright (c) 2003-2006 Charles P. Wright
10131 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10132 + * Copyright (c) 2005-2006 Junjiro Okajima
10133 + * Copyright (c) 2005      Arun M. Krishnakumar
10134 + * Copyright (c) 2004-2006 David P. Quigley
10135 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
10136 + * Copyright (c) 2003      Puja Gupta
10137 + * Copyright (c) 2003      Harikesavan Krishnan
10138 + * Copyright (c) 2003-2009 Stony Brook University
10139 + * Copyright (c) 2003-2009 The Research Foundation of SUNY
10140 + *
10141 + * This program is free software; you can redistribute it and/or modify
10142 + * it under the terms of the GNU General Public License version 2 as
10143 + * published by the Free Software Foundation.
10144 + */
10145 +
10146 +#include "union.h"
10147 +
10148 +/*
10149 + * Helper function for Unionfs's unlink operation.
10150 + *
10151 + * The main goal of this function is to optimize the unlinking of non-dir
10152 + * objects in unionfs by deleting all possible lower inode objects from the
10153 + * underlying branches having same dentry name as the non-dir dentry on
10154 + * which this unlink operation is called.  This way we delete as many lower
10155 + * inodes as possible, and save space.  Whiteouts need to be created in
10156 + * branch0 only if unlinking fails on any of the lower branch other than
10157 + * branch0, or if a lower branch is marked read-only.
10158 + *
10159 + * Also, while unlinking a file, if we encounter any dir type entry in any
10160 + * intermediate branch, then we remove the directory by calling vfs_rmdir.
10161 + * The following special cases are also handled:
10162 +
10163 + * (1) If an error occurs in branch0 during vfs_unlink, then we return
10164 + *     appropriate error.
10165 + *
10166 + * (2) If we get an error during unlink in any of other lower branch other
10167 + *     than branch0, then we create a whiteout in branch0.
10168 + *
10169 + * (3) If a whiteout already exists in any intermediate branch, we delete
10170 + *     all possible inodes only up to that branch (this is an "opaqueness"
10171 + *     as as per Documentation/filesystems/unionfs/concepts.txt).
10172 + *
10173 + */
10174 +static int unionfs_unlink_whiteout(struct inode *dir, struct dentry *dentry,
10175 +                                  struct dentry *parent)
10176 +{
10177 +       struct dentry *lower_dentry;
10178 +       struct dentry *lower_dir_dentry;
10179 +       int bindex;
10180 +       int err = 0;
10181 +
10182 +       err = unionfs_partial_lookup(dentry, parent);
10183 +       if (err)
10184 +               goto out;
10185 +
10186 +       /* trying to unlink all possible valid instances */
10187 +       for (bindex = dbstart(dentry); bindex <= dbend(dentry); bindex++) {
10188 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10189 +               if (!lower_dentry || !lower_dentry->d_inode)
10190 +                       continue;
10191 +
10192 +               lower_dir_dentry = lock_parent(lower_dentry);
10193 +
10194 +               /* avoid destroying the lower inode if the object is in use */
10195 +               dget(lower_dentry);
10196 +               err = is_robranch_super(dentry->d_sb, bindex);
10197 +               if (!err) {
10198 +                       /* see Documentation/filesystems/unionfs/issues.txt */
10199 +                       lockdep_off();
10200 +                       if (!S_ISDIR(lower_dentry->d_inode->i_mode))
10201 +                               err = vfs_unlink(lower_dir_dentry->d_inode,
10202 +                                                               lower_dentry);
10203 +                       else
10204 +                               err = vfs_rmdir(lower_dir_dentry->d_inode,
10205 +                                                               lower_dentry);
10206 +                       lockdep_on();
10207 +               }
10208 +
10209 +               /* if lower object deletion succeeds, update inode's times */
10210 +               if (!err)
10211 +                       unionfs_copy_attr_times(dentry->d_inode);
10212 +               dput(lower_dentry);
10213 +               fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
10214 +               unlock_dir(lower_dir_dentry);
10215 +
10216 +               if (err)
10217 +                       break;
10218 +       }
10219 +
10220 +       /*
10221 +        * Create the whiteout in branch 0 (highest priority) only if (a)
10222 +        * there was an error in any intermediate branch other than branch 0
10223 +        * due to failure of vfs_unlink/vfs_rmdir or (b) a branch marked or
10224 +        * mounted read-only.
10225 +        */
10226 +       if (err) {
10227 +               if ((bindex == 0) ||
10228 +                   ((bindex == dbstart(dentry)) &&
10229 +                    (!IS_COPYUP_ERR(err))))
10230 +                       goto out;
10231 +               else {
10232 +                       if (!IS_COPYUP_ERR(err))
10233 +                               pr_debug("unionfs: lower object deletion "
10234 +                                            "failed in branch:%d\n", bindex);
10235 +                       err = create_whiteout(dentry, sbstart(dentry->d_sb));
10236 +               }
10237 +       }
10238 +
10239 +out:
10240 +       if (!err)
10241 +               inode_dec_link_count(dentry->d_inode);
10242 +
10243 +       /* We don't want to leave negative leftover dentries for revalidate. */
10244 +       if (!err && (dbopaque(dentry) != -1))
10245 +               update_bstart(dentry);
10246 +
10247 +       return err;
10248 +}
10249 +
10250 +int unionfs_unlink(struct inode *dir, struct dentry *dentry)
10251 +{
10252 +       int err = 0;
10253 +       struct inode *inode = dentry->d_inode;
10254 +       struct dentry *parent;
10255 +       int valid;
10256 +
10257 +       BUG_ON(S_ISDIR(inode->i_mode));
10258 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
10259 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
10260 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
10261 +
10262 +       valid = __unionfs_d_revalidate(dentry, parent, false);
10263 +       if (unlikely(!valid)) {
10264 +               err = -ESTALE;
10265 +               goto out;
10266 +       }
10267 +       unionfs_check_dentry(dentry);
10268 +
10269 +       err = unionfs_unlink_whiteout(dir, dentry, parent);
10270 +       /* call d_drop so the system "forgets" about us */
10271 +       if (!err) {
10272 +               unionfs_postcopyup_release(dentry);
10273 +               unionfs_postcopyup_setmnt(parent);
10274 +               if (inode->i_nlink == 0) /* drop lower inodes */
10275 +                       iput_lowers_all(inode, false);
10276 +               d_drop(dentry);
10277 +               /*
10278 +                * if unlink/whiteout succeeded, parent dir mtime has
10279 +                * changed
10280 +                */
10281 +               unionfs_copy_attr_times(dir);
10282 +       }
10283 +
10284 +out:
10285 +       if (!err) {
10286 +               unionfs_check_dentry(dentry);
10287 +               unionfs_check_inode(dir);
10288 +       }
10289 +       unionfs_unlock_dentry(dentry);
10290 +       unionfs_unlock_parent(dentry, parent);
10291 +       unionfs_read_unlock(dentry->d_sb);
10292 +       return err;
10293 +}
10294 +
10295 +static int unionfs_rmdir_first(struct inode *dir, struct dentry *dentry,
10296 +                              struct unionfs_dir_state *namelist)
10297 +{
10298 +       int err;
10299 +       struct dentry *lower_dentry;
10300 +       struct dentry *lower_dir_dentry = NULL;
10301 +
10302 +       /* Here we need to remove whiteout entries. */
10303 +       err = delete_whiteouts(dentry, dbstart(dentry), namelist);
10304 +       if (err)
10305 +               goto out;
10306 +
10307 +       lower_dentry = unionfs_lower_dentry(dentry);
10308 +
10309 +       lower_dir_dentry = lock_parent(lower_dentry);
10310 +
10311 +       /* avoid destroying the lower inode if the file is in use */
10312 +       dget(lower_dentry);
10313 +       err = is_robranch(dentry);
10314 +       if (!err) {
10315 +               /* see Documentation/filesystems/unionfs/issues.txt */
10316 +               lockdep_off();
10317 +               err = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
10318 +               lockdep_on();
10319 +       }
10320 +       dput(lower_dentry);
10321 +
10322 +       fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
10323 +       /* propagate number of hard-links */
10324 +       dentry->d_inode->i_nlink = unionfs_get_nlinks(dentry->d_inode);
10325 +
10326 +out:
10327 +       if (lower_dir_dentry)
10328 +               unlock_dir(lower_dir_dentry);
10329 +       return err;
10330 +}
10331 +
10332 +int unionfs_rmdir(struct inode *dir, struct dentry *dentry)
10333 +{
10334 +       int err = 0;
10335 +       struct unionfs_dir_state *namelist = NULL;
10336 +       struct dentry *parent;
10337 +       int dstart, dend;
10338 +       bool valid;
10339 +
10340 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
10341 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
10342 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
10343 +
10344 +       valid = __unionfs_d_revalidate(dentry, parent, false);
10345 +       if (unlikely(!valid)) {
10346 +               err = -ESTALE;
10347 +               goto out;
10348 +       }
10349 +       unionfs_check_dentry(dentry);
10350 +
10351 +       /* check if this unionfs directory is empty or not */
10352 +       err = check_empty(dentry, parent, &namelist);
10353 +       if (err)
10354 +               goto out;
10355 +
10356 +       err = unionfs_rmdir_first(dir, dentry, namelist);
10357 +       dstart = dbstart(dentry);
10358 +       dend = dbend(dentry);
10359 +       /*
10360 +        * We create a whiteout for the directory if there was an error to
10361 +        * rmdir the first directory entry in the union.  Otherwise, we
10362 +        * create a whiteout only if there is no chance that a lower
10363 +        * priority branch might also have the same named directory.  IOW,
10364 +        * if there is not another same-named directory at a lower priority
10365 +        * branch, then we don't need to create a whiteout for it.
10366 +        */
10367 +       if (!err) {
10368 +               if (dstart < dend)
10369 +                       err = create_whiteout(dentry, dstart);
10370 +       } else {
10371 +               int new_err;
10372 +
10373 +               if (dstart == 0)
10374 +                       goto out;
10375 +
10376 +               /* exit if the error returned was NOT -EROFS */
10377 +               if (!IS_COPYUP_ERR(err))
10378 +                       goto out;
10379 +
10380 +               new_err = create_whiteout(dentry, dstart - 1);
10381 +               if (new_err != -EEXIST)
10382 +                       err = new_err;
10383 +       }
10384 +
10385 +out:
10386 +       /*
10387 +        * Drop references to lower dentry/inode so storage space for them
10388 +        * can be reclaimed.  Then, call d_drop so the system "forgets"
10389 +        * about us.
10390 +        */
10391 +       if (!err) {
10392 +               iput_lowers_all(dentry->d_inode, false);
10393 +               dput(unionfs_lower_dentry_idx(dentry, dstart));
10394 +               unionfs_set_lower_dentry_idx(dentry, dstart, NULL);
10395 +               d_drop(dentry);
10396 +               /* update our lower vfsmnts, in case a copyup took place */
10397 +               unionfs_postcopyup_setmnt(dentry);
10398 +               unionfs_check_dentry(dentry);
10399 +               unionfs_check_inode(dir);
10400 +       }
10401 +
10402 +       if (namelist)
10403 +               free_rdstate(namelist);
10404 +
10405 +       unionfs_unlock_dentry(dentry);
10406 +       unionfs_unlock_parent(dentry, parent);
10407 +       unionfs_read_unlock(dentry->d_sb);
10408 +       return err;
10409 +}
10410 --- /dev/null
10411 +++ kernel-2.6.28/fs/unionfs/whiteout.c
10412 @@ -0,0 +1,577 @@
10413 +/*
10414 + * Copyright (c) 2003-2009 Erez Zadok
10415 + * Copyright (c) 2003-2006 Charles P. Wright
10416 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10417 + * Copyright (c) 2005-2006 Junjiro Okajima
10418 + * Copyright (c) 2005      Arun M. Krishnakumar
10419 + * Copyright (c) 2004-2006 David P. Quigley
10420 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
10421 + * Copyright (c) 2003      Puja Gupta
10422 + * Copyright (c) 2003      Harikesavan Krishnan
10423 + * Copyright (c) 2003-2009 Stony Brook University
10424 + * Copyright (c) 2003-2009 The Research Foundation of SUNY
10425 + *
10426 + * This program is free software; you can redistribute it and/or modify
10427 + * it under the terms of the GNU General Public License version 2 as
10428 + * published by the Free Software Foundation.
10429 + */
10430 +
10431 +#include "union.h"
10432 +
10433 +/*
10434 + * whiteout and opaque directory helpers
10435 + */
10436 +
10437 +/* What do we use for whiteouts. */
10438 +#define UNIONFS_WHPFX ".wh."
10439 +#define UNIONFS_WHLEN 4
10440 +/*
10441 + * If a directory contains this file, then it is opaque.  We start with the
10442 + * .wh. flag so that it is blocked by lookup.
10443 + */
10444 +#define UNIONFS_DIR_OPAQUE_NAME "__dir_opaque"
10445 +#define UNIONFS_DIR_OPAQUE UNIONFS_WHPFX UNIONFS_DIR_OPAQUE_NAME
10446 +
10447 +/* construct whiteout filename */
10448 +char *alloc_whname(const char *name, int len)
10449 +{
10450 +       char *buf;
10451 +
10452 +       buf = kmalloc(len + UNIONFS_WHLEN + 1, GFP_KERNEL);
10453 +       if (unlikely(!buf))
10454 +               return ERR_PTR(-ENOMEM);
10455 +
10456 +       strcpy(buf, UNIONFS_WHPFX);
10457 +       strlcat(buf, name, len + UNIONFS_WHLEN + 1);
10458 +
10459 +       return buf;
10460 +}
10461 +
10462 +/*
10463 + * XXX: this can be inline or CPP macro, but is here to keep all whiteout
10464 + * code in one place.
10465 + */
10466 +void unionfs_set_max_namelen(long *namelen)
10467 +{
10468 +       *namelen -= UNIONFS_WHLEN;
10469 +}
10470 +
10471 +/* check if @namep is a whiteout, update @namep and @namelenp accordingly */
10472 +bool is_whiteout_name(char **namep, int *namelenp)
10473 +{
10474 +       if (*namelenp > UNIONFS_WHLEN &&
10475 +           !strncmp(*namep, UNIONFS_WHPFX, UNIONFS_WHLEN)) {
10476 +               *namep += UNIONFS_WHLEN;
10477 +               *namelenp -= UNIONFS_WHLEN;
10478 +               return true;
10479 +       }
10480 +       return false;
10481 +}
10482 +
10483 +/* is the filename valid == !(whiteout for a file or opaque dir marker) */
10484 +bool is_validname(const char *name)
10485 +{
10486 +       if (!strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN))
10487 +               return false;
10488 +       if (!strncmp(name, UNIONFS_DIR_OPAQUE_NAME,
10489 +                    sizeof(UNIONFS_DIR_OPAQUE_NAME) - 1))
10490 +               return false;
10491 +       return true;
10492 +}
10493 +
10494 +/*
10495 + * Look for a whiteout @name in @lower_parent directory.  If error, return
10496 + * ERR_PTR.  Caller must dput() the returned dentry if not an error.
10497 + *
10498 + * XXX: some callers can reuse the whname allocated buffer to avoid repeated
10499 + * free then re-malloc calls.  Need to provide a different API for those
10500 + * callers.
10501 + */
10502 +struct dentry *lookup_whiteout(const char *name, struct dentry *lower_parent)
10503 +{
10504 +       char *whname = NULL;
10505 +       int err = 0, namelen;
10506 +       struct dentry *wh_dentry = NULL;
10507 +
10508 +       namelen = strlen(name);
10509 +       whname = alloc_whname(name, namelen);
10510 +       if (unlikely(IS_ERR(whname))) {
10511 +               err = PTR_ERR(whname);
10512 +               goto out;
10513 +       }
10514 +
10515 +       /* check if whiteout exists in this branch: lookup .wh.foo */
10516 +       wh_dentry = lookup_one_len(whname, lower_parent, strlen(whname));
10517 +       if (IS_ERR(wh_dentry)) {
10518 +               err = PTR_ERR(wh_dentry);
10519 +               goto out;
10520 +       }
10521 +
10522 +       /* check if negative dentry (ENOENT) */
10523 +       if (!wh_dentry->d_inode)
10524 +               goto out;
10525 +
10526 +       /* whiteout found: check if valid type */
10527 +       if (!S_ISREG(wh_dentry->d_inode->i_mode)) {
10528 +               printk(KERN_ERR "unionfs: invalid whiteout %s entry type %d\n",
10529 +                      whname, wh_dentry->d_inode->i_mode);
10530 +               dput(wh_dentry);
10531 +               err = -EIO;
10532 +               goto out;
10533 +       }
10534 +
10535 +out:
10536 +       kfree(whname);
10537 +       if (err)
10538 +               wh_dentry = ERR_PTR(err);
10539 +       return wh_dentry;
10540 +}
10541 +
10542 +/* find and return first whiteout in parent directory, else ENOENT */
10543 +struct dentry *find_first_whiteout(struct dentry *dentry)
10544 +{
10545 +       int bindex, bstart, bend;
10546 +       struct dentry *parent, *lower_parent, *wh_dentry;
10547 +
10548 +       parent = dget_parent(dentry);
10549 +
10550 +       bstart = dbstart(parent);
10551 +       bend = dbend(parent);
10552 +       wh_dentry = ERR_PTR(-ENOENT);
10553 +
10554 +       for (bindex = bstart; bindex <= bend; bindex++) {
10555 +               lower_parent = unionfs_lower_dentry_idx(parent, bindex);
10556 +               if (!lower_parent)
10557 +                       continue;
10558 +               wh_dentry = lookup_whiteout(dentry->d_name.name, lower_parent);
10559 +               if (IS_ERR(wh_dentry))
10560 +                       continue;
10561 +               if (wh_dentry->d_inode)
10562 +                       break;
10563 +               dput(wh_dentry);
10564 +               wh_dentry = ERR_PTR(-ENOENT);
10565 +       }
10566 +
10567 +       dput(parent);
10568 +
10569 +       return wh_dentry;
10570 +}
10571 +
10572 +/*
10573 + * Unlink a whiteout dentry.  Returns 0 or -errno.  Caller must hold and
10574 + * release dentry reference.
10575 + */
10576 +int unlink_whiteout(struct dentry *wh_dentry)
10577 +{
10578 +       int err;
10579 +       struct dentry *lower_dir_dentry;
10580 +
10581 +       /* dget and lock parent dentry */
10582 +       lower_dir_dentry = lock_parent_wh(wh_dentry);
10583 +
10584 +       /* see Documentation/filesystems/unionfs/issues.txt */
10585 +       lockdep_off();
10586 +       err = vfs_unlink(lower_dir_dentry->d_inode, wh_dentry);
10587 +       lockdep_on();
10588 +       unlock_dir(lower_dir_dentry);
10589 +
10590 +       /*
10591 +        * Whiteouts are special files and should be deleted no matter what
10592 +        * (as if they never existed), in order to allow this create
10593 +        * operation to succeed.  This is especially important in sticky
10594 +        * directories: a whiteout may have been created by one user, but
10595 +        * the newly created file may be created by another user.
10596 +        * Therefore, in order to maintain Unix semantics, if the vfs_unlink
10597 +        * above failed, then we have to try to directly unlink the
10598 +        * whiteout.  Note: in the ODF version of unionfs, whiteout are
10599 +        * handled much more cleanly.
10600 +        */
10601 +       if (err == -EPERM) {
10602 +               struct inode *inode = lower_dir_dentry->d_inode;
10603 +               err = inode->i_op->unlink(inode, wh_dentry);
10604 +       }
10605 +       if (err)
10606 +               printk(KERN_ERR "unionfs: could not unlink whiteout %s, "
10607 +                      "err = %d\n", wh_dentry->d_name.name, err);
10608 +
10609 +       return err;
10610 +
10611 +}
10612 +
10613 +/*
10614 + * Helper function when creating new objects (create, symlink, mknod, etc.).
10615 + * Checks to see if there's a whiteout in @lower_dentry's parent directory,
10616 + * whose name is taken from @dentry.  Then tries to remove that whiteout, if
10617 + * found.  If <dentry,bindex> is a branch marked readonly, return -EROFS.
10618 + * If it finds both a regular file and a whiteout, return -EIO (this should
10619 + * never happen).
10620 + *
10621 + * Return 0 if no whiteout was found.  Return 1 if one was found and
10622 + * successfully removed.  Therefore a value >= 0 tells the caller that
10623 + * @lower_dentry belongs to a good branch to create the new object in).
10624 + * Return -ERRNO if an error occurred during whiteout lookup or in trying to
10625 + * unlink the whiteout.
10626 + */
10627 +int check_unlink_whiteout(struct dentry *dentry, struct dentry *lower_dentry,
10628 +                         int bindex)
10629 +{
10630 +       int err;
10631 +       struct dentry *wh_dentry = NULL;
10632 +       struct dentry *lower_dir_dentry = NULL;
10633 +
10634 +       /* look for whiteout dentry first */
10635 +       lower_dir_dentry = dget_parent(lower_dentry);
10636 +       wh_dentry = lookup_whiteout(dentry->d_name.name, lower_dir_dentry);
10637 +       dput(lower_dir_dentry);
10638 +       if (IS_ERR(wh_dentry)) {
10639 +               err = PTR_ERR(wh_dentry);
10640 +               goto out;
10641 +       }
10642 +
10643 +       if (!wh_dentry->d_inode) { /* no whiteout exists*/
10644 +               err = 0;
10645 +               goto out_dput;
10646 +       }
10647 +
10648 +       /* check if regular file and whiteout were both found */
10649 +       if (unlikely(lower_dentry->d_inode)) {
10650 +               err = -EIO;
10651 +               printk(KERN_ERR "unionfs: found both whiteout and regular "
10652 +                      "file in directory %s (branch %d)\n",
10653 +                      lower_dir_dentry->d_name.name, bindex);
10654 +               goto out_dput;
10655 +       }
10656 +
10657 +       /* check if branch is writeable */
10658 +       err = is_robranch_super(dentry->d_sb, bindex);
10659 +       if (err)
10660 +               goto out_dput;
10661 +
10662 +       /* .wh.foo has been found, so let's unlink it */
10663 +       err = unlink_whiteout(wh_dentry);
10664 +       if (!err)
10665 +               err = 1; /* a whiteout was found and successfully removed */
10666 +out_dput:
10667 +       dput(wh_dentry);
10668 +out:
10669 +       return err;
10670 +}
10671 +
10672 +/*
10673 + * Pass an unionfs dentry and an index.  It will try to create a whiteout
10674 + * for the filename in dentry, and will try in branch 'index'.  On error,
10675 + * it will proceed to a branch to the left.
10676 + */
10677 +int create_whiteout(struct dentry *dentry, int start)
10678 +{
10679 +       int bstart, bend, bindex;
10680 +       struct dentry *lower_dir_dentry;
10681 +       struct dentry *lower_dentry;
10682 +       struct dentry *lower_wh_dentry;
10683 +       struct nameidata nd;
10684 +       char *name = NULL;
10685 +       int err = -EINVAL;
10686 +
10687 +       verify_locked(dentry);
10688 +
10689 +       bstart = dbstart(dentry);
10690 +       bend = dbend(dentry);
10691 +
10692 +       /* create dentry's whiteout equivalent */
10693 +       name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
10694 +       if (unlikely(IS_ERR(name))) {
10695 +               err = PTR_ERR(name);
10696 +               goto out;
10697 +       }
10698 +
10699 +       for (bindex = start; bindex >= 0; bindex--) {
10700 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10701 +
10702 +               if (!lower_dentry) {
10703 +                       /*
10704 +                        * if lower dentry is not present, create the
10705 +                        * entire lower dentry directory structure and go
10706 +                        * ahead.  Since we want to just create whiteout, we
10707 +                        * only want the parent dentry, and hence get rid of
10708 +                        * this dentry.
10709 +                        */
10710 +                       lower_dentry = create_parents(dentry->d_inode,
10711 +                                                     dentry,
10712 +                                                     dentry->d_name.name,
10713 +                                                     bindex);
10714 +                       if (!lower_dentry || IS_ERR(lower_dentry)) {
10715 +                               int ret = PTR_ERR(lower_dentry);
10716 +                               if (!IS_COPYUP_ERR(ret))
10717 +                                       printk(KERN_ERR
10718 +                                              "unionfs: create_parents for "
10719 +                                              "whiteout failed: bindex=%d "
10720 +                                              "err=%d\n", bindex, ret);
10721 +                               continue;
10722 +                       }
10723 +               }
10724 +
10725 +               lower_wh_dentry =
10726 +                       lookup_one_len(name, lower_dentry->d_parent,
10727 +                                      dentry->d_name.len + UNIONFS_WHLEN);
10728 +               if (IS_ERR(lower_wh_dentry))
10729 +                       continue;
10730 +
10731 +               /*
10732 +                * The whiteout already exists. This used to be impossible,
10733 +                * but now is possible because of opaqueness.
10734 +                */
10735 +               if (lower_wh_dentry->d_inode) {
10736 +                       dput(lower_wh_dentry);
10737 +                       err = 0;
10738 +                       goto out;
10739 +               }
10740 +
10741 +               err = init_lower_nd(&nd, LOOKUP_CREATE);
10742 +               if (unlikely(err < 0))
10743 +                       goto out;
10744 +               lower_dir_dentry = lock_parent_wh(lower_wh_dentry);
10745 +               err = is_robranch_super(dentry->d_sb, bindex);
10746 +               if (!err)
10747 +                       err = vfs_create(lower_dir_dentry->d_inode,
10748 +                                        lower_wh_dentry,
10749 +                                        ~current->fs->umask & S_IRUGO,
10750 +                                        &nd);
10751 +               unlock_dir(lower_dir_dentry);
10752 +               dput(lower_wh_dentry);
10753 +               release_lower_nd(&nd, err);
10754 +
10755 +               if (!err || !IS_COPYUP_ERR(err))
10756 +                       break;
10757 +       }
10758 +
10759 +       /* set dbopaque so that lookup will not proceed after this branch */
10760 +       if (!err)
10761 +               dbopaque(dentry) = bindex;
10762 +
10763 +out:
10764 +       kfree(name);
10765 +       return err;
10766 +}
10767 +
10768 +/*
10769 + * Delete all of the whiteouts in a given directory for rmdir.
10770 + *
10771 + * lower directory inode should be locked
10772 + */
10773 +static int do_delete_whiteouts(struct dentry *dentry, int bindex,
10774 +                              struct unionfs_dir_state *namelist)
10775 +{
10776 +       int err = 0;
10777 +       struct dentry *lower_dir_dentry = NULL;
10778 +       struct dentry *lower_dentry;
10779 +       char *name = NULL, *p;
10780 +       struct inode *lower_dir;
10781 +       int i;
10782 +       struct list_head *pos;
10783 +       struct filldir_node *cursor;
10784 +
10785 +       /* Find out lower parent dentry */
10786 +       lower_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10787 +       BUG_ON(!S_ISDIR(lower_dir_dentry->d_inode->i_mode));
10788 +       lower_dir = lower_dir_dentry->d_inode;
10789 +       BUG_ON(!S_ISDIR(lower_dir->i_mode));
10790 +
10791 +       err = -ENOMEM;
10792 +       name = __getname();
10793 +       if (unlikely(!name))
10794 +               goto out;
10795 +       strcpy(name, UNIONFS_WHPFX);
10796 +       p = name + UNIONFS_WHLEN;
10797 +
10798 +       err = 0;
10799 +       for (i = 0; !err && i < namelist->size; i++) {
10800 +               list_for_each(pos, &namelist->list[i]) {
10801 +                       cursor =
10802 +                               list_entry(pos, struct filldir_node,
10803 +                                          file_list);
10804 +                       /* Only operate on whiteouts in this branch. */
10805 +                       if (cursor->bindex != bindex)
10806 +                               continue;
10807 +                       if (!cursor->whiteout)
10808 +                               continue;
10809 +
10810 +                       strlcpy(p, cursor->name, PATH_MAX - UNIONFS_WHLEN);
10811 +                       lower_dentry =
10812 +                               lookup_one_len(name, lower_dir_dentry,
10813 +                                              cursor->namelen +
10814 +                                              UNIONFS_WHLEN);
10815 +                       if (IS_ERR(lower_dentry)) {
10816 +                               err = PTR_ERR(lower_dentry);
10817 +                               break;
10818 +                       }
10819 +                       if (lower_dentry->d_inode)
10820 +                               err = vfs_unlink(lower_dir, lower_dentry);
10821 +                       dput(lower_dentry);
10822 +                       if (err)
10823 +                               break;
10824 +               }
10825 +       }
10826 +
10827 +       __putname(name);
10828 +
10829 +       /* After all of the removals, we should copy the attributes once. */
10830 +       fsstack_copy_attr_times(dentry->d_inode, lower_dir_dentry->d_inode);
10831 +
10832 +out:
10833 +       return err;
10834 +}
10835 +
10836 +
10837 +void __delete_whiteouts(struct work_struct *work)
10838 +{
10839 +       struct sioq_args *args = container_of(work, struct sioq_args, work);
10840 +       struct deletewh_args *d = &args->deletewh;
10841 +
10842 +       args->err = do_delete_whiteouts(d->dentry, d->bindex, d->namelist);
10843 +       complete(&args->comp);
10844 +}
10845 +
10846 +/* delete whiteouts in a dir (for rmdir operation) using sioq if necessary */
10847 +int delete_whiteouts(struct dentry *dentry, int bindex,
10848 +                    struct unionfs_dir_state *namelist)
10849 +{
10850 +       int err;
10851 +       struct super_block *sb;
10852 +       struct dentry *lower_dir_dentry;
10853 +       struct inode *lower_dir;
10854 +       struct sioq_args args;
10855 +
10856 +       sb = dentry->d_sb;
10857 +
10858 +       BUG_ON(!S_ISDIR(dentry->d_inode->i_mode));
10859 +       BUG_ON(bindex < dbstart(dentry));
10860 +       BUG_ON(bindex > dbend(dentry));
10861 +       err = is_robranch_super(sb, bindex);
10862 +       if (err)
10863 +               goto out;
10864 +
10865 +       lower_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10866 +       BUG_ON(!S_ISDIR(lower_dir_dentry->d_inode->i_mode));
10867 +       lower_dir = lower_dir_dentry->d_inode;
10868 +       BUG_ON(!S_ISDIR(lower_dir->i_mode));
10869 +
10870 +       if (!inode_permission(lower_dir, MAY_WRITE | MAY_EXEC)) {
10871 +               err = do_delete_whiteouts(dentry, bindex, namelist);
10872 +       } else {
10873 +               args.deletewh.namelist = namelist;
10874 +               args.deletewh.dentry = dentry;
10875 +               args.deletewh.bindex = bindex;
10876 +               run_sioq(__delete_whiteouts, &args);
10877 +               err = args.err;
10878 +       }
10879 +
10880 +out:
10881 +       return err;
10882 +}
10883 +
10884 +/****************************************************************************
10885 + * Opaque directory helpers                                                 *
10886 + ****************************************************************************/
10887 +
10888 +/*
10889 + * is_opaque_dir: returns 0 if it is NOT an opaque dir, 1 if it is, and
10890 + * -errno if an error occurred trying to figure this out.
10891 + */
10892 +int is_opaque_dir(struct dentry *dentry, int bindex)
10893 +{
10894 +       int err = 0;
10895 +       struct dentry *lower_dentry;
10896 +       struct dentry *wh_lower_dentry;
10897 +       struct inode *lower_inode;
10898 +       struct sioq_args args;
10899 +
10900 +       lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10901 +       lower_inode = lower_dentry->d_inode;
10902 +
10903 +       BUG_ON(!S_ISDIR(lower_inode->i_mode));
10904 +
10905 +       mutex_lock(&lower_inode->i_mutex);
10906 +
10907 +       if (!inode_permission(lower_inode, MAY_EXEC)) {
10908 +               wh_lower_dentry =
10909 +                       lookup_one_len(UNIONFS_DIR_OPAQUE, lower_dentry,
10910 +                                      sizeof(UNIONFS_DIR_OPAQUE) - 1);
10911 +       } else {
10912 +               args.is_opaque.dentry = lower_dentry;
10913 +               run_sioq(__is_opaque_dir, &args);
10914 +               wh_lower_dentry = args.ret;
10915 +       }
10916 +
10917 +       mutex_unlock(&lower_inode->i_mutex);
10918 +
10919 +       if (IS_ERR(wh_lower_dentry)) {
10920 +               err = PTR_ERR(wh_lower_dentry);
10921 +               goto out;
10922 +       }
10923 +
10924 +       /* This is an opaque dir iff wh_lower_dentry is positive */
10925 +       err = !!wh_lower_dentry->d_inode;
10926 +
10927 +       dput(wh_lower_dentry);
10928 +out:
10929 +       return err;
10930 +}
10931 +
10932 +void __is_opaque_dir(struct work_struct *work)
10933 +{
10934 +       struct sioq_args *args = container_of(work, struct sioq_args, work);
10935 +
10936 +       args->ret = lookup_one_len(UNIONFS_DIR_OPAQUE, args->is_opaque.dentry,
10937 +                                  sizeof(UNIONFS_DIR_OPAQUE) - 1);
10938 +       complete(&args->comp);
10939 +}
10940 +
10941 +int make_dir_opaque(struct dentry *dentry, int bindex)
10942 +{
10943 +       int err = 0;
10944 +       struct dentry *lower_dentry, *diropq;
10945 +       struct inode *lower_dir;
10946 +       struct nameidata nd;
10947 +       kernel_cap_t orig_cap;
10948 +
10949 +       /*
10950 +        * Opaque directory whiteout markers are special files (like regular
10951 +        * whiteouts), and should appear to the users as if they don't
10952 +        * exist.  They should be created/deleted regardless of directory
10953 +        * search/create permissions, but only for the duration of this
10954 +        * creation of the .wh.__dir_opaque: file.  Note, this does not
10955 +        * circumvent normal ->permission).
10956 +        */
10957 +       orig_cap = current->cap_effective;
10958 +       cap_raise(current->cap_effective, CAP_DAC_READ_SEARCH);
10959 +       cap_raise(current->cap_effective, CAP_DAC_OVERRIDE);
10960 +
10961 +       lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10962 +       lower_dir = lower_dentry->d_inode;
10963 +       BUG_ON(!S_ISDIR(dentry->d_inode->i_mode) ||
10964 +              !S_ISDIR(lower_dir->i_mode));
10965 +
10966 +       mutex_lock(&lower_dir->i_mutex);
10967 +       diropq = lookup_one_len(UNIONFS_DIR_OPAQUE, lower_dentry,
10968 +                               sizeof(UNIONFS_DIR_OPAQUE) - 1);
10969 +       if (IS_ERR(diropq)) {
10970 +               err = PTR_ERR(diropq);
10971 +               goto out;
10972 +       }
10973 +
10974 +       err = init_lower_nd(&nd, LOOKUP_CREATE);
10975 +       if (unlikely(err < 0))
10976 +               goto out;
10977 +       if (!diropq->d_inode)
10978 +               err = vfs_create(lower_dir, diropq, S_IRUGO, &nd);
10979 +       if (!err)
10980 +               dbopaque(dentry) = bindex;
10981 +       release_lower_nd(&nd, err);
10982 +
10983 +       dput(diropq);
10984 +
10985 +out:
10986 +       mutex_unlock(&lower_dir->i_mutex);
10987 +       current->cap_effective = orig_cap;
10988 +       return err;
10989 +}
10990 --- /dev/null
10991 +++ kernel-2.6.28/fs/unionfs/xattr.c
10992 @@ -0,0 +1,173 @@
10993 +/*
10994 + * Copyright (c) 2003-2009 Erez Zadok
10995 + * Copyright (c) 2003-2006 Charles P. Wright
10996 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10997 + * Copyright (c) 2005-2006 Junjiro Okajima
10998 + * Copyright (c) 2005      Arun M. Krishnakumar
10999 + * Copyright (c) 2004-2006 David P. Quigley
11000 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
11001 + * Copyright (c) 2003      Puja Gupta
11002 + * Copyright (c) 2003      Harikesavan Krishnan
11003 + * Copyright (c) 2003-2009 Stony Brook University
11004 + * Copyright (c) 2003-2009 The Research Foundation of SUNY
11005 + *
11006 + * This program is free software; you can redistribute it and/or modify
11007 + * it under the terms of the GNU General Public License version 2 as
11008 + * published by the Free Software Foundation.
11009 + */
11010 +
11011 +#include "union.h"
11012 +
11013 +/* This is lifted from fs/xattr.c */
11014 +void *unionfs_xattr_alloc(size_t size, size_t limit)
11015 +{
11016 +       void *ptr;
11017 +
11018 +       if (size > limit)
11019 +               return ERR_PTR(-E2BIG);
11020 +
11021 +       if (!size)              /* size request, no buffer is needed */
11022 +               return NULL;
11023 +
11024 +       ptr = kmalloc(size, GFP_KERNEL);
11025 +       if (unlikely(!ptr))
11026 +               return ERR_PTR(-ENOMEM);
11027 +       return ptr;
11028 +}
11029 +
11030 +/*
11031 + * BKL held by caller.
11032 + * dentry->d_inode->i_mutex locked
11033 + */
11034 +ssize_t unionfs_getxattr(struct dentry *dentry, const char *name, void *value,
11035 +                        size_t size)
11036 +{
11037 +       struct dentry *lower_dentry = NULL;
11038 +       struct dentry *parent;
11039 +       int err = -EOPNOTSUPP;
11040 +       bool valid;
11041 +
11042 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
11043 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
11044 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
11045 +
11046 +       valid = __unionfs_d_revalidate(dentry, parent, false);
11047 +       if (unlikely(!valid)) {
11048 +               err = -ESTALE;
11049 +               goto out;
11050 +       }
11051 +
11052 +       lower_dentry = unionfs_lower_dentry(dentry);
11053 +
11054 +       err = vfs_getxattr(lower_dentry, (char *) name, value, size);
11055 +
11056 +out:
11057 +       unionfs_check_dentry(dentry);
11058 +       unionfs_unlock_dentry(dentry);
11059 +       unionfs_unlock_parent(dentry, parent);
11060 +       unionfs_read_unlock(dentry->d_sb);
11061 +       return err;
11062 +}
11063 +
11064 +/*
11065 + * BKL held by caller.
11066 + * dentry->d_inode->i_mutex locked
11067 + */
11068 +int unionfs_setxattr(struct dentry *dentry, const char *name,
11069 +                    const void *value, size_t size, int flags)
11070 +{
11071 +       struct dentry *lower_dentry = NULL;
11072 +       struct dentry *parent;
11073 +       int err = -EOPNOTSUPP;
11074 +       bool valid;
11075 +
11076 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
11077 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
11078 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
11079 +
11080 +       valid = __unionfs_d_revalidate(dentry, parent, false);
11081 +       if (unlikely(!valid)) {
11082 +               err = -ESTALE;
11083 +               goto out;
11084 +       }
11085 +
11086 +       lower_dentry = unionfs_lower_dentry(dentry);
11087 +
11088 +       err = vfs_setxattr(lower_dentry, (char *) name, (void *) value,
11089 +                          size, flags);
11090 +
11091 +out:
11092 +       unionfs_check_dentry(dentry);
11093 +       unionfs_unlock_dentry(dentry);
11094 +       unionfs_unlock_parent(dentry, parent);
11095 +       unionfs_read_unlock(dentry->d_sb);
11096 +       return err;
11097 +}
11098 +
11099 +/*
11100 + * BKL held by caller.
11101 + * dentry->d_inode->i_mutex locked
11102 + */
11103 +int unionfs_removexattr(struct dentry *dentry, const char *name)
11104 +{
11105 +       struct dentry *lower_dentry = NULL;
11106 +       struct dentry *parent;
11107 +       int err = -EOPNOTSUPP;
11108 +       bool valid;
11109 +
11110 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
11111 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
11112 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
11113 +
11114 +       valid = __unionfs_d_revalidate(dentry, parent, false);
11115 +       if (unlikely(!valid)) {
11116 +               err = -ESTALE;
11117 +               goto out;
11118 +       }
11119 +
11120 +       lower_dentry = unionfs_lower_dentry(dentry);
11121 +
11122 +       err = vfs_removexattr(lower_dentry, (char *) name);
11123 +
11124 +out:
11125 +       unionfs_check_dentry(dentry);
11126 +       unionfs_unlock_dentry(dentry);
11127 +       unionfs_unlock_parent(dentry, parent);
11128 +       unionfs_read_unlock(dentry->d_sb);
11129 +       return err;
11130 +}
11131 +
11132 +/*
11133 + * BKL held by caller.
11134 + * dentry->d_inode->i_mutex locked
11135 + */
11136 +ssize_t unionfs_listxattr(struct dentry *dentry, char *list, size_t size)
11137 +{
11138 +       struct dentry *lower_dentry = NULL;
11139 +       struct dentry *parent;
11140 +       int err = -EOPNOTSUPP;
11141 +       char *encoded_list = NULL;
11142 +       bool valid;
11143 +
11144 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
11145 +       parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
11146 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
11147 +
11148 +       valid = __unionfs_d_revalidate(dentry, parent, false);
11149 +       if (unlikely(!valid)) {
11150 +               err = -ESTALE;
11151 +               goto out;
11152 +       }
11153 +
11154 +       lower_dentry = unionfs_lower_dentry(dentry);
11155 +
11156 +       encoded_list = list;
11157 +       err = vfs_listxattr(lower_dentry, encoded_list, size);
11158 +
11159 +out:
11160 +       unionfs_check_dentry(dentry);
11161 +       unionfs_unlock_dentry(dentry);
11162 +       unionfs_unlock_parent(dentry, parent);
11163 +       unionfs_read_unlock(dentry->d_sb);
11164 +       return err;
11165 +}
11166 --- kernel-2.6.28.orig/include/linux/fs_stack.h
11167 +++ kernel-2.6.28/include/linux/fs_stack.h
11168 @@ -1,17 +1,27 @@
11169 +/*
11170 + * Copyright (c) 2006-2009 Erez Zadok
11171 + * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
11172 + * Copyright (c) 2006-2009 Stony Brook University
11173 + * Copyright (c) 2006-2009 The Research Foundation of SUNY
11174 + *
11175 + * This program is free software; you can redistribute it and/or modify
11176 + * it under the terms of the GNU General Public License version 2 as
11177 + * published by the Free Software Foundation.
11178 + */
11179 +
11180  #ifndef _LINUX_FS_STACK_H
11181  #define _LINUX_FS_STACK_H
11182
11183 -/* This file defines generic functions used primarily by stackable
11184 +/*
11185 + * This file defines generic functions used primarily by stackable
11186   * filesystems; none of these functions require i_mutex to be held.
11187   */
11188
11189  #include <linux/fs.h>
11190
11191  /* externs for fs/stack.c */
11192 -extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
11193 -                               int (*get_nlinks)(struct inode *));
11194 -
11195 -extern void fsstack_copy_inode_size(struct inode *dst, const struct inode *src);
11196 +extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src);
11197 +extern void fsstack_copy_inode_size(struct inode *dst, struct inode *src);
11198
11199  /* inlines */
11200  static inline void fsstack_copy_attr_atime(struct inode *dest,
11201 --- kernel-2.6.28.orig/include/linux/magic.h
11202 +++ kernel-2.6.28/include/linux/magic.h
11203 @@ -39,6 +39,8 @@
11204  #define REISER2FS_SUPER_MAGIC_STRING   "ReIsEr2Fs"
11205  #define REISER2FS_JR_SUPER_MAGIC_STRING        "ReIsEr3Fs"
11206
11207 +#define UNIONFS_SUPER_MAGIC 0xf15f083d
11208 +
11209  #define SMB_SUPER_MAGIC                0x517B
11210  #define USBDEVICE_SUPER_MAGIC  0x9fa2
11211  #define CGROUP_SUPER_MAGIC     0x27e0eb
11212 --- kernel-2.6.28.orig/include/linux/splice.h
11213 +++ kernel-2.6.28/include/linux/splice.h
11214 @@ -70,5 +70,10 @@
11215                               struct splice_pipe_desc *);
11216  extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
11217                                       splice_direct_actor *);
11218 +extern long vfs_splice_from(struct pipe_inode_info *pipe, struct file *out,
11219 +                           loff_t *ppos, size_t len, unsigned int flags);
11220 +extern long vfs_splice_to(struct file *in, loff_t *ppos,
11221 +                         struct pipe_inode_info *pipe, size_t len,
11222 +                         unsigned int flags);
11223
11224  #endif
11225 --- /dev/null
11226 +++ kernel-2.6.28/include/linux/union_fs.h
11227 @@ -0,0 +1,22 @@
11228 +/*
11229 + * Copyright (c) 2003-2009 Erez Zadok
11230 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
11231 + * Copyright (c) 2003-2009 Stony Brook University
11232 + * Copyright (c) 2003-2009 The Research Foundation of SUNY
11233 + *
11234 + * This program is free software; you can redistribute it and/or modify
11235 + * it under the terms of the GNU General Public License version 2 as
11236 + * published by the Free Software Foundation.
11237 + */
11238 +
11239 +#ifndef _LINUX_UNION_FS_H
11240 +#define _LINUX_UNION_FS_H
11241 +
11242 +/*
11243 + * DEFINITIONS FOR USER AND KERNEL CODE:
11244 + */
11245 +# define UNIONFS_IOCTL_INCGEN          _IOR(0x15, 11, int)
11246 +# define UNIONFS_IOCTL_QUERYFILE       _IOR(0x15, 15, int)
11247 +
11248 +#endif /* _LINUX_UNIONFS_H */
11249 +
11250 --- kernel-2.6.28.orig/security/security.c
11251 +++ kernel-2.6.28/security/security.c
11252 @@ -448,6 +448,7 @@
11253                 return 0;
11254         return security_ops->inode_permission(inode, mask);
11255  }
11256 +EXPORT_SYMBOL(security_inode_permission);
11257
11258  int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
11259  {