<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel about="http://blog.gmane.org/gmane.linux.file-systems">
    <title>gmane.linux.file-systems</title>
    <link>http://blog.gmane.org/gmane.linux.file-systems</link>
    <description/>
    <syn:updatePeriod>hourly</syn:updatePeriod>
    <syn:updateFrequency>1</syn:updateFrequency>
    <syn:updateBase>1901-01-01T00:00+00:00</syn:updateBase>
    <items>
      <rdf:Seq>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.file-systems/27841"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.file-systems/27831"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.file-systems/27827"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.file-systems/27826"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.file-systems/27814"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.file-systems/27813"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.file-systems/27812"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.file-systems/27811"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.file-systems/27810"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.file-systems/27809"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.file-systems/27808"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.file-systems/27807"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.file-systems/27806"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.file-systems/27805"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.file-systems/27804"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.file-systems/27763"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.file-systems/27746"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.file-systems/27742"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.file-systems/27701"/>
        <rdf:li rdf:resource="http://comments.gmane.org/gmane.linux.file-systems/27698"/>
      </rdf:Seq>
    </items>
    <image rdf:resource="http://gmane.org/img/gmane-25t.png"/>
    <textinput rdf:resource=""/>
  </channel>
  <image rdf:about="http://gmane.org/img/gmane-25t.png">
    <title>Gmane</title>
    <url>http://gmane.org/img/gmane-25t.png</url>
    <link>http://gmane.org</link>
  </image>
  <item rdf:about="http://comments.gmane.org/gmane.linux.file-systems/27841">
    <title>[PATCH] Update the exportfs documentation and comments and fix someexportfs issues</title>
    <link>http://comments.gmane.org/gmane.linux.file-systems/27841</link>
    <description>Update the exportfs documentation and comments to fit the code, and add an
extra FID type to be used for error indication (FILEID_ERROR).

The following other code changes have been made:

 (*) The encode_fh() op and exportfs_encode_fh() return enum fid_type rather
     than int.

 (*) The exportfs_encode_fh() function has it's acceptable() function pointer
     typedef'd.

 (*) The fh_to_dentry() and fh_to_parent() ops and exportfs_decode_fh() take
     enum fid_type rather than int.

 (*) The generic_fh_to_dentry() and generic_fh_to_parent() functions now take
     enum fid_type rather than int.  Their get_inode() function pointer has
     been typedef'd.

 (*) The generic_fh_to_dentry() and generic_fh_to_parent() functions no longer
     return NULL, but return -ESTALE instead.  exportfs_decode_fh() does not
     check for NULL, but will try to dereference it as IS_ERR() does not check
     for it.

 (*) Returns from encode_fh() implementations of 255 are changed to
     FILEID_ERROR.

 (*) FAT has had its fh type #defined (FAT_FILEID).

 (*) FUSE has had its fh types #defined (FUSE_FILEID, FUSE_FILEID_PARENT).

 (*) ISOFS has had its fh types #defined (ISOFS_FILEID, ISOFS_FILEID_PARENT).

 (*) OCFS2 has had its fh types #defined (OCFS2_FILEID, OCFS2_FILEID_PARENT).

 (*) The following filesystems have had their fh_to_dentry() and fh_to_parent()
     implementations made to return -ESTALE rather than NULL: FUSE, GFS2,
     ISOFS, OCFS2, UDF and XFS.

Signed-off-by: David Howells &lt;dhowells&lt; at &gt;redhat.com&gt;
---

 Documentation/filesystems/Exporting |  400 +++++++++++++++++++++++------------
 fs/efs/efs.h                        |    5 
 fs/efs/namei.c                      |    4 
 fs/exportfs/expfs.c                 |   95 ++++++--
 fs/ext2/super.c                     |    4 
 fs/ext3/super.c                     |    4 
 fs/ext4/super.c                     |    4 
 fs/fat/inode.c                      |   12 +
 fs/fuse/inode.c                     |   22 +-
 fs/gfs2/ops_export.c                |   18 +-
 fs/isofs/export.c                   |   21 +-
 fs/jffs2/super.c                    |    4 
 fs/jfs/jfs_inode.h                  |    5 
 fs/jfs/namei.c                      |    4 
 fs/libfs.c                          |   14 +
 fs/ntfs/namei.c                     |    4 
 fs/ocfs2/export.c                   |   25 +-
 fs/reiserfs/inode.c                 |   10 -
 fs/udf/namei.c                      |   18 +-
 fs/xfs/linux-2.6/xfs_export.c       |   36 ++-
 include/linux/exportfs.h            |  117 +++++++---
 include/linux/reiserfs_fs.h         |    9 -
 mm/shmem.c                          |    6 -
 23 files changed, 538 insertions(+), 303 deletions(-)


diff --git a/Documentation/filesystems/Exporting b/Documentation/filesystems/Exporting
index 87019d2..5d9d769 100644
--- a/Documentation/filesystems/Exporting
+++ b/Documentation/filesystems/Exporting
&lt; at &gt;&lt; at &gt; -1,147 +1,273 &lt; at &gt;&lt; at &gt;
+ =============================
+ MAKING FILESYSTEMS EXPORTABLE
+ =============================
 
-Making Filesystems Exportable
-=============================
-
-Overview
---------
-
-All filesystem operations require a dentry (or two) as a starting
-point.  Local applications have a reference-counted hold on suitable
-dentries via open file descriptors or cwd/root.  However remote
-applications that access a filesystem via a remote filesystem protocol
-such as NFS may not be able to hold such a reference, and so need a
-different way to refer to a particular dentry.  As the alternative
-form of reference needs to be stable across renames, truncates, and
-server-reboot (among other things, though these tend to be the most
-problematic), there is no simple answer like 'filename'.
-
-The mechanism discussed here allows each filesystem implementation to
-specify how to generate an opaque (outside of the filesystem) byte
-string for any dentry, and how to find an appropriate dentry for any
-given opaque byte string.
-This byte string will be called a "filehandle fragment" as it
-corresponds to part of an NFS filehandle.
-
-A filesystem which supports the mapping between filehandle fragments
-and dentries will be termed "exportable".
-
-
-
-Dcache Issues
--------------
-
-The dcache normally contains a proper prefix of any given filesystem
-tree.  This means that if any filesystem object is in the dcache, then
-all of the ancestors of that filesystem object are also in the dcache.
-As normal access is by filename this prefix is created naturally and
-maintained easily (by each object maintaining a reference count on
-its parent).
-
-However when objects are included into the dcache by interpreting a
-filehandle fragment, there is no automatic creation of a path prefix
-for the object.  This leads to two related but distinct features of
-the dcache that are not needed for normal filesystem access.
-
-1/ The dcache must sometimes contain objects that are not part of the
-   proper prefix. i.e that are not connected to the root.
-2/ The dcache must be prepared for a newly found (via -&gt;lookup) directory
-   to already have a (non-connected) dentry, and must be able to move
-   that dentry into place (based on the parent and name in the
-   -&gt;lookup).   This is particularly needed for directories as
-   it is a dcache invariant that directories only have one dentry.
-
-To implement these features, the dcache has:
-
-a/ A dentry flag DCACHE_DISCONNECTED which is set on
-   any dentry that might not be part of the proper prefix.
-   This is set when anonymous dentries are created, and cleared when a
-   dentry is noticed to be a child of a dentry which is in the proper
-   prefix. 
-
-b/ A per-superblock list "s_anon" of dentries which are the roots of
-   subtrees that are not in the proper prefix.  These dentries, as
-   well as the proper prefix, need to be released at unmount time.  As
-   these dentries will not be hashed, they are linked together on the
-   d_hash list_head.
-
-c/ Helper routines to allocate anonymous dentries, and to help attach
-   loose directory dentries at lookup time. They are:
-    d_alloc_anon(inode) will return a dentry for the given inode.
-      If the inode already has a dentry, one of those is returned.
-      If it doesn't, a new anonymous (IS_ROOT and
-        DCACHE_DISCONNECTED) dentry is allocated and attached.
-      In the case of a directory, care is taken that only one dentry
-      can ever be attached.
-    d_splice_alias(inode, dentry) will make sure that there is a
-      dentry with the same name and parent as the given dentry, and
-      which refers to the given inode.
-      If the inode is a directory and already has a dentry, then that
-      dentry is d_moved over the given dentry.
-      If the passed dentry gets attached, care is taken that this is
-      mutually exclusive to a d_alloc_anon operation.
-      If the passed dentry is used, NULL is returned, else the used
-      dentry is returned.  This corresponds to the calling pattern of
-      -&gt;lookup.
-  
- 
-Filesystem Issues
------------------
+Contents:
+
+ (*) Overview.
+
+ (*) Dealing with the directory entry cache.
+
+ (*) Making a filesystem exportable.
+
+ (*) Filehandle fragment format.
+
+ (*) Filehandle export operations.
+
+
+========
+OVERVIEW
+========
+
+All filesystem operations require a dentry (or two) as a starting point.  Local
+applications have a reference-counted hold on suitable dentries via open file
+descriptors, the current working directory and the current root directory.
+Remote applications that access a filesystem via a remote filesystem protocol
+such as NFS, however, may not be able to hold such a reference, and so need a
+different way to refer to a particular dentry.  As the alternative form of
+reference needs to be persistent across renames, truncates, and server reboots
+(among other events, though these tend to be the most problematic), there is no
+simple answer like 'filename'.
+
+The mechanism discussed here allows each filesystem implementation to specify
+how to generate an opaque (outside of the filesystem) byte string for any
+dentry, and how to find an appropriate dentry for any given opaque byte string.
+
+This byte string will be called a "filehandle fragment" as it corresponds to
+part of an NFS filehandle.
+
+A filesystem which supports the mapping between filehandle fragments and
+dentries will be termed "exportable".
+
+
+======================================
+DEALING WITH THE DIRECTORY ENTRY CACHE
+======================================
+
+Each superblock has a tree of directory entries (dentries) that is rooted at
+its s_root pointer.  For most filesystems, every dentry it currently has cached
+in RAM is connected to this tree, meaning that all the ancestors of most files
+are fully represented in the directory entry cache (dcache).
+
+As the normal method of accessing files is by filename, the superblock root
+tree builds up in a natural manner, and is maintained simply by each object
+having a reference count on its parent.  The pathwalk algorithm works by
+walking from the root and out along the branches to the desired object.
+
+However, when an object is brought into the dcache by interpreting a filehandle
+fragment, the entries for the directories that represent that object's ancestry
+are not automatically all brought into the cache as well.  This leads to two
+related but distinct issues in the dcache that are not ordinarily seen:
+
+ (1) Sometimes the dcache for a superblock must contain objects that are not
+     connected to that superblock's root tree.
+
+ (2) The dcache must be able to handle a lookup() operation that results in a
+     dentry that already exists.  In such a case, the already extant dentry
+     must be used instead of the candidate upon which the lookup was performed.
+
+     Furthermore, the lookup procedure must be able to handle unconnected
+     dentries so found by installing them in the dentry tree under the parent
+     specified directory.
+
+     This is particularly necessary for directories as it is a requirement of
+     the dcache that directories are represented by a single dentry and don't
+     have aliases.
+
+To manage the above, the dcache has the following features:
+
+ (A) A dentry flag DCACHE_DISCONNECTED which is set on any dentry that might
+     not be connected to the superblock root.  This is set when anonymous
+     dentries are created, and cleared when a dentry is noticed to be a child
+     of a dentry which is in the proper prefix.
+
+ (B) A per-superblock list of dentries (s_anon) which are the roots of subtrees
+     that are not connected to the superblock root.  These dentries, as well as
+     the superblock's rooted tree, need to be released at unmount time.
+
+     Note that as these dentries are unhashed, the s_anon list is linked
+     together using the d_hash list_head in the dentry struct.
+
+ (C) Helper routines to allocate anonymous dentries, and to help attach loose
+     directory dentries at lookup time. They are:
+
+     (*) Find or allocate a dentry for a given inode.
+
+struct dentry *d_obtain_alias(struct inode *inode);
+
+ This will return a dentry for the given inode.  If the inode already
+       has a dentry, one of those is returned.  If it doesn't, a new
+       anonymous (IS_ROOT() and DCACHE_DISCONNECTED) dentry is allocated and
+       attached.  In the case of a directory, care is taken that only one
+       dentry can ever be attached.
+
+     (*) Splice a disconnected dentry into the tree if one exists.
+
+struct dentry *d_splice_alias(struct inode *inode,
+      struct dentry *dentry);
+
+         This will make sure that there is a dentry with the same name and
+       parent as the given dentry, and which refers to the given inode.
+
+       If the inode is a directory and already has a dentry, then that dentry
+       is d_move()'d over the given dentry.  If the passed dentry gets
+       attached, care is taken that this is mutually exclusive to a
+       d_obtain_alias() operation.  If the passed dentry is used, NULL is
+       returned, else the used dentry is returned.  This corresponds to the
+       calling pattern of the lookup procedure.
+
+
+==============================
+MAKING A FILESYSTEM EXPORTABLE
+==============================
 
 For a filesystem to be exportable it must:
- 
-   1/ provide the filehandle fragment routines described below.
-   2/ make sure that d_splice_alias is used rather than d_add
-      when -&gt;lookup finds an inode for a given parent and name.
-      Typically the -&gt;lookup routine will end with a:
 
+ (1) Provide the filehandle export operations described below.
+
+ (2) Make sure that d_splice_alias() is used rather than d_add() when its
+     lookup() operation finds an inode for a given parent and name.  Typically
+     the lookup() routine will end with a:
+
+{
+...
 return d_splice_alias(inode, dentry);
 }
 
 
+==========================
+FILEHANDLE FRAGMENT FORMAT
+==========================
+
+A filehandle fragment consists of an array of 1 or more 32-bit words.  The
+contents and the layout of the data in the array are entirely at the whim of
+the filesystem that generated it.
+
+The filehandle fragment produced also has a 'type' associated with it.  This
+is a number between 0 and 254.  0 is a special reserved value (FILEID_ROOT)
+that encode_fh() may not produce.  The remaining values in that range can be
+chosen at will, though there are possible symbols in the fid_type enum that can
+be used if desired.  Beware, though, these may be decoded by protocol decoding
+programs, such as wireshark, so using a predefined type may confuse people if
+the corresponding format is not also adhered to (see the fid struct).
 
-  A file system implementation declares that instances of the filesystem
-are exportable by setting the s_export_op field in the struct
-super_block.  This field must point to a "struct export_operations"
-struct which has the following members:
-
- encode_fh  (optional)
-    Takes a dentry and creates a filehandle fragment which can later be used
-    to find or create a dentry for the same object.  The default
-    implementation creates a filehandle fragment that encodes a 32bit inode
-    and generation number for the inode encoded, and if necessary the
-    same information for the parent.
-
-  fh_to_dentry (mandatory)
-    Given a filehandle fragment, this should find the implied object and
-    create a dentry for it (possibly with d_alloc_anon).
-
-  fh_to_parent (optional but strongly recommended)
-    Given a filehandle fragment, this should find the parent of the
-    implied object and create a dentry for it (possibly with d_alloc_anon).
-    May fail if the filehandle fragment is too small.
-
-  get_parent (optional but strongly recommended)
-    When given a dentry for a directory, this should return  a dentry for
-    the parent.  Quite possibly the parent dentry will have been allocated
-    by d_alloc_anon.  The default get_parent function just returns an error
-    so any filehandle lookup that requires finding a parent will fail.
-    -&gt;lookup("..") is *not* used as a default as it can leave ".." entries
-    in the dcache which are too messy to work with.
-
-  get_name (optional)
-    When given a parent dentry and a child dentry, this should find a name
-    in the directory identified by the parent dentry, which leads to the
-    object identified by the child dentry.  If no get_name function is
-    supplied, a default implementation is provided which uses vfs_readdir
-    to find potential names, and matches inode numbers to find the correct
-    match.
-
-
-A filehandle fragment consists of an array of 1 or more 4byte words,
-together with a one byte "type".
-The decode_fh routine should not depend on the stated size that is
-passed to it.  This size may be larger than the original filehandle
-generated by encode_fh, in which case it will have been padded with
-nuls.  Rather, the encode_fh routine should choose a "type" which
-indicates the decode_fh how much of the filehandle is valid, and how
+The fh_to_dentry() and fh_to_parent() routines should not depend on the stated
+size that is passed to them.  This size may be larger than the original
+filehandle generated by encode_fh(), in which case it will have been padded
+with NULs.  Rather, the encode_fh() routine should choose a "type" which
+indicates the fh_to_*() operations how much of the filehandle is valid, and how
 it should be interpreted.
+
+
+============================
+FILEHANDLE EXPORT OPERATIONS
+============================
+
+A filesystem implementation declares that instances of the filesystem are
+exportable by setting the s_export_op field in the super_block struct.  This
+field must point to an export_operations struct which has the following members
+filled in:
+
+ (*) enum fid_type (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len,
+int connectable);
+
+     This operation is used to generate a filehandle fragment that can later be
+     used to find or create a dentry for the same filesystem object.  It is
+     optional and there's a default encoder.
+
+     The fragment is written into the supplied buffer - fh - which can hold up
+     to *max_len lots of 4-byte words.  The fragment generated should represent
+     the given dentry, and if connectable is set, the parent of the given
+     dentry too.
+
+     If successful, this operation should return a value that represents to the
+     decoder operations the format of the opaque fragment produced and it
+     should set *max_len to indicate the amount of data it stored in the buffer
+     in units of 32-bit words.
+
+     The caller is required to save the type value for presentation to the
+     decoder operations.  The type value is arbitrary, but must be between 1
+     and 254.  There are some predefined types in the fid_type enum that can be
+     selected, but use of these is not mandatory.
+
+     Upon failure, a value of FILEID_ERROR should be returned.
+
+     The default encoder uses the i_ino and i_generation numbers from the
+     inode, both masked down to 32-bits.  It also adds both values from the
+     parent inode if connectable.
+
+ (*) struct dentry *(*fh_to_dentry)(struct super_block *sb, struct fid *fid,
+    int fh_len, enum fid_type fh_type);
+
+     This operation is used to look up a file, given the filehandle fragment
+     that was generated by encode_fh().  The file, if it exists, will be one of
+     the set available through the specified superblock.  This operation is
+     mandatory.
+
+     The filehandle fragment is in the buffer specified by fid and is of up to
+     fh_len 32-bit words and is of type fh_type as indicated by encode_fh().
+     Note that there may be more data than expected in the buffer as the data
+     may have been padded out by the caller.  fh_type must be used to deal with
+     this.
+
+     If successful, this operation should return a pointer to a dcache entry
+     representing one of the aliases to the file.  This may be used by the
+     caller to query other aliases of the same file to find one it prefers.
+
+     Note that this operation is not required to produce an dentry that is
+     connected to the root of the superblock.  It is permitted to produce an
+     anonymous dentry, as might happen if encode_fh() was given connectable as
+     false.
+
+     On failure, a negative error code should be produced to indicate the
+     reason.  A NULL pointer must not be returned.
+
+ (*) struct dentry *(*fh_to_parent)(struct super_block *sb, struct fid *fid,
+    int fh_len, enum fid_type fh_type);
+
+     This is very similar to fh_to_dentry(), the difference being that it
+     attempts to retrieve the parent of the file to which the filehandle
+     fragment corresponds, rather than the file itself.  This operation is
+     optional, but is strongly recommended.
+
+     This may fail if the filehandle does not contain information on the
+     original file's parentage.  It also need not produce a fully connected
+     dentry.
+
+ (*) struct dentry *(*get_parent)(struct dentry *child);
+
+     This operation should return a dentry to represent the parent directory of
+     the specified dentry (which should itself be a directory).  This operation
+     is optional, but strongly recommended.
+
+     An anonymous, unconnected dentry can be returned as the parent if so
+     desired.  This may be allocated with d_obtain_alias() or suchlike.
+
+     On success, the parent dentry should be returned.  On failure, a negative
+     error code should be returned.
+
+     The default operation just returns an error, thus making any filehandle
+     lookup that requires finding the parent fail.  The default operation does
+     not try to use 'lookup("..")' as that could leave ".." entries in the
+     dcache.
+
+ (*) int (*get_name)(struct dentry *parent, char *name, struct dentry *child);
+
+     This operation is provided to determine the name of the directory entry in
+     the parent directory that points to the child object.  This operation is
+     optional.
+
+     On success, the NUL-terminated name of the directory entry should be
+     written into the name buffer, and zero should be returned.  The caller
+     must guarantee that the buffer is at least NAME_MAX + 1 in size.
+
+     On failure, a negative error code should be returned.
+
+     The default operation is available that uses vfs_readdir() to find
+     potential names and match inode numbers to select the correct entry.
+
+This interface can be included in the code by:
+
+#include &lt;linux/exportfs.h&gt;
+
+and making the user dependent on CONFIG_EXPORTFS.
diff --git a/fs/efs/efs.h b/fs/efs/efs.h
index d8305b5..db5e1c2 100644
--- a/fs/efs/efs.h
+++ b/fs/efs/efs.h
&lt; at &gt;&lt; at &gt; -8,6 +8,7 &lt; at &gt;&lt; at &gt;
 #define _EFS_EFS_H_
 
 #include &lt;linux/fs.h&gt;
+#include &lt;linux/exportfs.h&gt;
 #include &lt;asm/uaccess.h&gt;
 
 #define EFS_VERSION "1.0a"
&lt; at &gt;&lt; at &gt; -131,9 +132,9 &lt; at &gt;&lt; at &gt; extern int efs_get_block(struct inode *, sector_t, struct buffer_head *, int);
 
 extern struct dentry *efs_lookup(struct inode *, struct dentry *, struct nameidata *);
 extern struct dentry *efs_fh_to_dentry(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type);
+int fh_len, enum fid_type fh_type);
 extern struct dentry *efs_fh_to_parent(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type);
+int fh_len, enum fid_type fh_type);
 extern struct dentry *efs_get_parent(struct dentry *);
 extern int efs_bmap(struct inode *, int);
 
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index c3fb5f9..6b9c118 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
&lt; at &gt;&lt; at &gt; -97,14 +97,14 &lt; at &gt;&lt; at &gt; static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino,
 }
 
 struct dentry *efs_fh_to_dentry(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type)
+int fh_len, enum fid_type fh_type)
 {
 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
     efs_nfs_get_inode);
 }
 
 struct dentry *efs_fh_to_parent(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type)
+int fh_len, enum fid_type fh_type)
 {
 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
     efs_nfs_get_inode);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index ec1fb91..c1f2399 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
&lt; at &gt;&lt; at &gt; -35,12 +35,12 &lt; at &gt;&lt; at &gt; static int exportfs_get_name(struct vfsmount *mnt, struct dentry *dir,
 }
 
 /*
- * Check if the dentry or any of it's aliases is acceptable.
+ * Check if the dentry or any of it's aliases are acceptable.
  */
 static struct dentry *
 find_acceptable_alias(struct dentry *result,
-int (*acceptable)(void *context, struct dentry *dentry),
-void *context)
+      exportfs_acceptable_t acceptable,
+      void *context)
 {
 struct dentry *dentry, *toput = NULL;
 
&lt; at &gt;&lt; at &gt; -303,25 +303,25 &lt; at &gt;&lt; at &gt; out:
 
 /**
  * export_encode_fh - default export_operations-&gt;encode_fh function
- * &lt; at &gt;dentry:  the dentry to encode
- * &lt; at &gt;fh:      where to store the file handle fragment
- * &lt; at &gt;max_len: maximum length to store there
- * &lt; at &gt;connectable: whether to store parent information
+ * &lt; at &gt;dentry:  The dentry to encode
+ * &lt; at &gt;fid:     Where to store the file handle fragment
+ * &lt; at &gt;max_len: Maximum length to store there in 32-bit words
+ * &lt; at &gt;connectable: Whether to store parent information
  *
- * This default encode_fh function assumes that the 32 inode number
- * is suitable for locating an inode, and that the generation number
- * can be used to check that it is still valid.  It places them in the
- * filehandle fragment where export_decode_fh expects to find them.
+ * This default encode_fh function assumes that the 32 inode number is suitable
+ * for locating an inode, and that the generation number can be used to check
+ * that it is still valid.  It places them in the filehandle fragment where
+ * export_decode_fh expects to find them.
  */
-static int export_encode_fh(struct dentry *dentry, struct fid *fid,
-int *max_len, int connectable)
+static enum fid_type export_encode_fh(struct dentry *dentry, struct fid *fid,
+      int *max_len, int connectable)
 {
 struct inode * inode = dentry-&gt;d_inode;
 int len = *max_len;
 int type = FILEID_INO32_GEN;
-
+
 if (len &lt; 2 || (connectable &amp;&amp; len &lt; 4))
-return 255;
+return FILEID_ERROR;
 
 len = 2;
 fid-&gt;i32.ino = inode-&gt;i_ino;
&lt; at &gt;&lt; at &gt; -341,24 +341,71 &lt; at &gt;&lt; at &gt; static int export_encode_fh(struct dentry *dentry, struct fid *fid,
 return type;
 }
 
-int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len,
-int connectable)
+/**
+ * exportfs_encode_fh - Encode a dentry to a partial persistent file handle
+ * &lt; at &gt;dentry:  The dentry to encode
+ * &lt; at &gt;fid:     Where to store the file handle fragment
+ * &lt; at &gt;max_len: Maximum length to store there in 32-bit words
+ * &lt; at &gt;connectable: Whether to store parent information
+ *
+ * This function is used to get a partial persistent file handle to represent a
+ * file object as referred to by the given dentry.  The caller may also request
+ * that parentage information be noted in the file handle produced.
+ *
+ * The fid pointer should point to a buffer of *max_len size, where the maximum
+ * length is given in 32-bit words, _not_ bytes.  The contents of the returned
+ * file handle may or may not reflect the layout of the fid structure.  That is
+ * totally up to the filesystem being queried.
+ *
+ * On return, the file handle type will be returned, or FILEID_ERROR if the
+ * filesystem was unable to represent the file.  The caller is responsible for
+ * saving the type so that it can be passed to exportfs_decode_fh().
+ *
+ * If successful, *max_len will have been updated to specify the amount of
+ * buffer actually used.
+ */
+enum fid_type exportfs_encode_fh(struct dentry *dentry, struct fid *fid,
+ int *max_len, int connectable)
 {
 const struct export_operations *nop = dentry-&gt;d_sb-&gt;s_export_op;
-int error;
 
 if (nop-&gt;encode_fh)
-error = nop-&gt;encode_fh(dentry, fid-&gt;raw, max_len, connectable);
+return nop-&gt;encode_fh(dentry, fid-&gt;raw, max_len, connectable);
 else
-error = export_encode_fh(dentry, fid, max_len, connectable);
-
-return error;
+return export_encode_fh(dentry, fid, max_len, connectable);
 }
 EXPORT_SYMBOL_GPL(exportfs_encode_fh);
 
+/**
+ * exportfs_decode_fh - Decode a partial persistent file handle to a dentry
+ * &lt; at &gt;mnt: The mountpoint with which the dentry should be associated
+ * &lt; at &gt;fid: The partial file handle
+ * &lt; at &gt;fh_len: The length of the partial file handle
+ * &lt; at &gt;fileid_type: The type of partial file handle
+ * &lt; at &gt;acceptable: A filter routine to pick amongst the aliases for an inode
+ * &lt; at &gt;context: Context information for the acceptability filter
+ *
+ * This function function is used to turn a partial persistent file handle back
+ * into the dentry it represents.  The caller must indicate the set of dentries
+ * from which the target is to be selected by the VFS mountpoint supplied.
+ *
+ * The partial file handle is in the buffer pointed to by the fid pointer, and
+ * is fh_len 32-bit words in length.  The fileid_type as returned by the encode
+ * routine must also be presented by the caller.
+ *
+ * If a suitable inode is found, its list of aliases will be passed to the
+ * acceptability function to select a suitable one.  The context information
+ * supplied will be passed to the acceptability filter to aid in selection.
+ *
+ * If successful, a pointer to a suitable dentry will be returned.  Note that
+ * dentry returned may not be connected to any of the superblock's roots.
+ *
+ * If unsuccessful, a negative error code will be returned.
+ */
 struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
-int fh_len, int fileid_type,
-int (*acceptable)(void *, struct dentry *), void *context)
+  int fh_len, enum fid_type fileid_type,
+  exportfs_acceptable_t acceptable,
+  void *context)
 {
 const struct export_operations *nop = mnt-&gt;mnt_sb-&gt;s_export_op;
 struct dentry *result, *alias;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 647cd88..a3ef820 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
&lt; at &gt;&lt; at &gt; -340,14 +340,14 &lt; at &gt;&lt; at &gt; static struct inode *ext2_nfs_get_inode(struct super_block *sb,
 }
 
 static struct dentry *ext2_fh_to_dentry(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type)
+int fh_len, enum fid_type fh_type)
 {
 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
     ext2_nfs_get_inode);
 }
 
 static struct dentry *ext2_fh_to_parent(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type)
+int fh_len, enum fid_type fh_type)
 {
 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
     ext2_nfs_get_inode);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index f6c94f2..b549bbd 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
&lt; at &gt;&lt; at &gt; -669,14 +669,14 &lt; at &gt;&lt; at &gt; static struct inode *ext3_nfs_get_inode(struct super_block *sb,
 }
 
 static struct dentry *ext3_fh_to_dentry(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type)
+int fh_len, enum fid_type fh_type)
 {
 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
     ext3_nfs_get_inode);
 }
 
 static struct dentry *ext3_fh_to_parent(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type)
+int fh_len, enum fid_type fh_type)
 {
 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
     ext3_nfs_get_inode);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e4a241c..30e142b 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
&lt; at &gt;&lt; at &gt; -759,14 +759,14 &lt; at &gt;&lt; at &gt; static struct inode *ext4_nfs_get_inode(struct super_block *sb,
 }
 
 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type)
+int fh_len, enum fid_type fh_type)
 {
 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
     ext4_nfs_get_inode);
 }
 
 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type)
+int fh_len, enum fid_type fh_type)
 {
 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
     ext4_nfs_get_inode);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index d937aaf..742ee35 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
&lt; at &gt;&lt; at &gt; -649,14 +649,16 &lt; at &gt;&lt; at &gt; static const struct super_operations fat_sops = {
  * of i_logstart is used to store the directory entry offset.
  */
 
+#define FAT_FILEID((enum fid_type) 3)
+
 static struct dentry *fat_fh_to_dentry(struct super_block *sb,
-struct fid *fid, int fh_len, int fh_type)
+struct fid *fid, int fh_len, enum fid_type fh_type)
 {
 struct inode *inode = NULL;
 struct dentry *result;
 u32 *fh = fid-&gt;raw;
 
-if (fh_len &lt; 5 || fh_type != 3)
+if (fh_len &lt; 5 || fh_type != FAT_FILEID)
 return NULL;
 
 inode = ilookup(sb, fh[0]);
&lt; at &gt;&lt; at &gt; -704,7 +706,7 &lt; at &gt;&lt; at &gt; static struct dentry *fat_fh_to_dentry(struct super_block *sb,
 return result;
 }
 
-static int
+static enum fid_type
 fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable)
 {
 int len = *lenp;
&lt; at &gt;&lt; at &gt; -712,7 +714,7 &lt; at &gt;&lt; at &gt; fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable)
 u32 ipos_h, ipos_m, ipos_l;
 
 if (len &lt; 5)
-return 255; /* no room */
+return FILEID_ERROR; /* no room */
 
 ipos_h = MSDOS_I(inode)-&gt;i_pos &gt;&gt; 8;
 ipos_m = (MSDOS_I(inode)-&gt;i_pos &amp; 0xf0) &lt;&lt; 24;
&lt; at &gt;&lt; at &gt; -725,7 +727,7 &lt; at &gt;&lt; at &gt; fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable)
 spin_lock(&amp;de-&gt;d_lock);
 fh[4] = ipos_l | MSDOS_I(de-&gt;d_parent-&gt;d_inode)-&gt;i_logstart;
 spin_unlock(&amp;de-&gt;d_lock);
-return 3;
+return FAT_FILEID;
 }
 
 static struct dentry *fat_get_parent(struct dentry *child)
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 2e99f34..b0f6528 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
&lt; at &gt;&lt; at &gt; -610,8 +610,11 &lt; at &gt;&lt; at &gt; static struct dentry *fuse_get_dentry(struct super_block *sb,
 return ERR_PTR(err);
 }
 
-static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
-   int connectable)
+#define FUSE_FILEID((enum fid_type) 0x81)
+#define FUSE_FILEID_PARENT((enum fid_type) 0x82)
+
+static enum fid_type fuse_encode_fh(struct dentry *dentry, u32 *fh,
+    int *max_len, int connectable)
 {
 struct inode *inode = dentry-&gt;d_inode;
 bool encode_parent = connectable &amp;&amp; !S_ISDIR(inode-&gt;i_mode);
&lt; at &gt;&lt; at &gt; -620,7 +623,7 &lt; at &gt;&lt; at &gt; static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
 u32 generation;
 
 if (*max_len &lt; len)
-return  255;
+return FILEID_ERROR;
 
 nodeid = get_fuse_inode(inode)-&gt;nodeid;
 generation = inode-&gt;i_generation;
&lt; at &gt;&lt; at &gt; -644,16 +647,17 &lt; at &gt;&lt; at &gt; static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
 }
 
 *max_len = len;
-return encode_parent ? 0x82 : 0x81;
+return encode_parent ? FUSE_FILEID_PARENT : FUSE_FILEID;
 }
 
 static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
-struct fid *fid, int fh_len, int fh_type)
+struct fid *fid, int fh_len, enum fid_type fh_type)
 {
 struct fuse_inode_handle handle;
 
-if ((fh_type != 0x81 &amp;&amp; fh_type != 0x82) || fh_len &lt; 3)
-return NULL;
+if ((fh_type != FUSE_FILEID &amp;&amp; fh_type != FUSE_FILEID_PARENT) ||
+    fh_len &lt; 3)
+return ERR_PTR(-ESTALE);
 
 handle.nodeid = (u64) fid-&gt;raw[0] &lt;&lt; 32;
 handle.nodeid |= (u64) fid-&gt;raw[1];
&lt; at &gt;&lt; at &gt; -662,12 +666,12 &lt; at &gt;&lt; at &gt; static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
 }
 
 static struct dentry *fuse_fh_to_parent(struct super_block *sb,
-struct fid *fid, int fh_len, int fh_type)
+struct fid *fid, int fh_len, enum fid_type fh_type)
 {
 struct fuse_inode_handle parent;
 
 if (fh_type != 0x82 || fh_len &lt; 6)
-return NULL;
+return ERR_PTR(-ESTALE);
 
 parent.nodeid = (u64) fid-&gt;raw[3] &lt;&lt; 32;
 parent.nodeid |= (u64) fid-&gt;raw[4];
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index bbb8c36..8d0d6cf 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
&lt; at &gt;&lt; at &gt; -31,8 +31,8 &lt; at &gt;&lt; at &gt;
 #define GFS2_LARGE_FH_SIZE 8
 #define GFS2_OLD_FH_SIZE 10
 
-static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
-  int connectable)
+static enum fid_type gfs2_encode_fh(struct dentry *dentry, __u32 *p,
+       int *len, int connectable)
 {
 __be32 *fh = (__force __be32 *)p;
 struct inode *inode = dentry-&gt;d_inode;
&lt; at &gt;&lt; at &gt; -41,7 +41,7 &lt; at &gt;&lt; at &gt; static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
 
 if (*len &lt; GFS2_SMALL_FH_SIZE ||
     (connectable &amp;&amp; *len &lt; GFS2_LARGE_FH_SIZE))
-return 255;
+return FILEID_ERROR;
 
 fh[0] = cpu_to_be32(ip-&gt;i_no_formal_ino &gt;&gt; 32);
 fh[1] = cpu_to_be32(ip-&gt;i_no_formal_ino &amp; 0xFFFFFFFF);
&lt; at &gt;&lt; at &gt; -50,7 +50,7 &lt; at &gt;&lt; at &gt; static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
 *len = GFS2_SMALL_FH_SIZE;
 
 if (!connectable || inode == sb-&gt;s_root-&gt;d_inode)
-return *len;
+return GFS2_SMALL_FH_SIZE;
 
 spin_lock(&amp;dentry-&gt;d_lock);
 inode = dentry-&gt;d_parent-&gt;d_inode;
&lt; at &gt;&lt; at &gt; -66,7 +66,7 &lt; at &gt;&lt; at &gt; static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
 
 iput(inode);
 
-return *len;
+return GFS2_LARGE_FH_SIZE;
 }
 
 struct get_name_filldir {
&lt; at &gt;&lt; at &gt; -239,7 +239,7 &lt; at &gt;&lt; at &gt; fail:
 }
 
 static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type)
+int fh_len, enum fid_type fh_type)
 {
 struct gfs2_inum_host this;
 __be32 *fh = (__force __be32 *)fid-&gt;raw;
&lt; at &gt;&lt; at &gt; -254,12 +254,12 &lt; at &gt;&lt; at &gt; static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid,
 this.no_addr |= be32_to_cpu(fh[3]);
 return gfs2_get_dentry(sb, &amp;this);
 default:
-return NULL;
+return ERR_PTR(-ESTALE);
 }
 }
 
 static struct dentry *gfs2_fh_to_parent(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type)
+int fh_len, enum fid_type fh_type)
 {
 struct gfs2_inum_host parent;
 __be32 *fh = (__force __be32 *)fid-&gt;raw;
&lt; at &gt;&lt; at &gt; -273,7 +273,7 &lt; at &gt;&lt; at &gt; static struct dentry *gfs2_fh_to_parent(struct super_block *sb, struct fid *fid,
 parent.no_addr |= be32_to_cpu(fh[7]);
 return gfs2_get_dentry(sb, &amp;parent);
 default:
-return NULL;
+return ERR_PTR(-ESTALE);
 }
 }
 
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index e81a305..b66c083 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
&lt; at &gt;&lt; at &gt; -106,7 +106,10 &lt; at &gt;&lt; at &gt; static struct dentry *isofs_export_get_parent(struct dentry *child)
 return rv;
 }
 
-static int
+#define ISOFS_FILEID((enum fid_type) 1)
+#define ISOFS_FILEID_PARENT((enum fid_type) 2)
+
+static enum fid_type
 isofs_export_encode_fh(struct dentry *dentry,
        __u32 *fh32,
        int *max_len,
&lt; at &gt;&lt; at &gt; -115,7 +118,7 &lt; at &gt;&lt; at &gt; isofs_export_encode_fh(struct dentry *dentry,
 struct inode * inode = dentry-&gt;d_inode;
 struct iso_inode_info * ei = ISOFS_I(inode);
 int len = *max_len;
-int type = 1;
+enum fid_type type = ISOFS_FILEID;
 __u16 *fh16 = (__u16*)fh32;
 
 /*
&lt; at &gt;&lt; at &gt; -126,7 +129,7 &lt; at &gt;&lt; at &gt; isofs_export_encode_fh(struct dentry *dentry,
  */
 
 if (len &lt; 3 || (connectable &amp;&amp; len &lt; 5))
-return 255;
+return FILEID_ERROR;
 
 len = 3;
 fh32[0] = ei-&gt;i_iget5_block;
&lt; at &gt;&lt; at &gt; -143,7 +146,7 &lt; at &gt;&lt; at &gt; isofs_export_encode_fh(struct dentry *dentry,
 fh32[4] = parent-&gt;i_generation;
 spin_unlock(&amp;dentry-&gt;d_lock);
 len = 5;
-type = 2;
+type = ISOFS_FILEID_PARENT;
 }
 *max_len = len;
 return type;
&lt; at &gt;&lt; at &gt; -159,23 +162,23 &lt; at &gt;&lt; at &gt; struct isofs_fid {
 };
 
 static struct dentry *isofs_fh_to_dentry(struct super_block *sb,
-struct fid *fid, int fh_len, int fh_type)
+struct fid *fid, int fh_len, enum fid_type fh_type)
 {
 struct isofs_fid *ifid = (struct isofs_fid *)fid;
 
-if (fh_len &lt; 3 || fh_type &gt; 2)
-return NULL;
+if (fh_len &lt; 3 || fh_type &gt; ISOFS_FILEID_PARENT)
+return ERR_PTR(-ESTALE);
 
 return isofs_export_iget(sb, ifid-&gt;block, ifid-&gt;offset,
 ifid-&gt;generation);
 }
 
 static struct dentry *isofs_fh_to_parent(struct super_block *sb,
-struct fid *fid, int fh_len, int fh_type)
+struct fid *fid, int fh_len, enum fid_type fh_type)
 {
 struct isofs_fid *ifid = (struct isofs_fid *)fid;
 
-if (fh_type != 2)
+if (fh_type != ISOFS_FILEID_PARENT)
 return NULL;
 
 return isofs_export_iget(sb,
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 4c4e18c..bd4998e 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
&lt; at &gt;&lt; at &gt; -73,14 +73,14 &lt; at &gt;&lt; at &gt; static struct inode *jffs2_nfs_get_inode(struct super_block *sb, uint64_t ino,
 }
 
 static struct dentry *jffs2_fh_to_dentry(struct super_block *sb, struct fid *fid,
- int fh_len, int fh_type)
+ int fh_len, enum fid_type fh_type)
 {
         return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
                                     jffs2_nfs_get_inode);
 }
 
 static struct dentry *jffs2_fh_to_parent(struct super_block *sb, struct fid *fid,
- int fh_len, int fh_type)
+ int fh_len, enum fid_type fh_type)
 {
         return generic_fh_to_parent(sb, fid, fh_len, fh_type,
                                     jffs2_nfs_get_inode);
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index adb2faf..aa7d573 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
&lt; at &gt;&lt; at &gt; -19,6 +19,7 &lt; at &gt;&lt; at &gt;
 #define _H_JFS_INODE
 
 struct fid;
+enum fid_type;
 
 extern struct inode *ialloc(struct inode *, umode_t);
 extern int jfs_fsync(struct file *, struct dentry *, int);
&lt; at &gt;&lt; at &gt; -35,9 +36,9 &lt; at &gt;&lt; at &gt; extern void jfs_free_zero_link(struct inode *);
 extern struct dentry *jfs_get_parent(struct dentry *dentry);
 extern void jfs_get_inode_flags(struct jfs_inode_info *);
 extern struct dentry *jfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type);
+int fh_len, enum fid_type fh_type);
 extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type);
+int fh_len, enum fid_type fh_type);
 extern void jfs_set_inode_flags(struct inode *);
 extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
 
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index cc3cedf..b824836 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
&lt; at &gt;&lt; at &gt; -1496,14 +1496,14 &lt; at &gt;&lt; at &gt; static struct inode *jfs_nfs_get_inode(struct super_block *sb,
 }
 
 struct dentry *jfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type)
+int fh_len, enum fid_type fh_type)
 {
 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
     jfs_nfs_get_inode);
 }
 
 struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type)
+int fh_len, enum fid_type fh_type)
 {
 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
     jfs_nfs_get_inode);
diff --git a/fs/libfs.c b/fs/libfs.c
index e960a83..2a9eb1d 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
&lt; at &gt;&lt; at &gt; -745,18 +745,19 &lt; at &gt;&lt; at &gt; out:
  * inode for the object specified in the file handle.
  */
 struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type, struct inode *(*get_inode)
-(struct super_block *sb, u64 ino, u32 gen))
+    int fh_len, enum fid_type fh_type,
+    exportfs_get_inode_t get_inode)
 {
 struct inode *inode = NULL;
 
 if (fh_len &lt; 2)
-return NULL;
+return ERR_PTR(-ESTALE);
 
 switch (fh_type) {
 case FILEID_INO32_GEN:
 case FILEID_INO32_GEN_PARENT:
 inode = get_inode(sb, fid-&gt;i32.ino, fid-&gt;i32.gen);
+default:
 break;
 }
 
&lt; at &gt;&lt; at &gt; -778,18 +779,19 &lt; at &gt;&lt; at &gt; EXPORT_SYMBOL_GPL(generic_fh_to_dentry);
  * is specified in the file handle, or NULL otherwise.
  */
 struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type, struct inode *(*get_inode)
-(struct super_block *sb, u64 ino, u32 gen))
+    int fh_len, enum fid_type fh_type,
+    exportfs_get_inode_t get_inode)
 {
 struct inode *inode = NULL;
 
 if (fh_len &lt;= 2)
-return NULL;
+return ERR_PTR(-ESTALE);
 
 switch (fh_type) {
 case FILEID_INO32_GEN_PARENT:
 inode = get_inode(sb, fid-&gt;i32.parent_ino,
   (fh_len &gt; 3 ? fid-&gt;i32.parent_gen : 0));
+default:
 break;
 }
 
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 2ca0015..b3826c0 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
&lt; at &gt;&lt; at &gt; -364,14 +364,14 &lt; at &gt;&lt; at &gt; static struct inode *ntfs_nfs_get_inode(struct super_block *sb,
 }
 
 static struct dentry *ntfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type)
+int fh_len, enum fid_type fh_type)
 {
 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
     ntfs_nfs_get_inode);
 }
 
 static struct dentry *ntfs_fh_to_parent(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type)
+int fh_len, enum fid_type fh_type)
 {
 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
     ntfs_nfs_get_inode);
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 2f27b33..d064bfa 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
&lt; at &gt;&lt; at &gt; -116,12 +116,15 &lt; at &gt;&lt; at &gt; bail:
 return parent;
 }
 
-static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
-   int connectable)
+#define OCFS2_FILEID((enum fid_type) 1)
+#define OCFS2_FILEID_PARENT((enum fid_type) 2)
+
+static enum fid_type ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in,
+int *max_len, int connectable)
 {
 struct inode *inode = dentry-&gt;d_inode;
 int len = *max_len;
-int type = 1;
+int type = OCFS2_FILEID;
 u64 blkno;
 u32 generation;
 __le32 *fh = (__force __le32 *) fh_in;
&lt; at &gt;&lt; at &gt; -132,7 +135,7 &lt; at &gt;&lt; at &gt; static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
 
 if (len &lt; 3 || (connectable &amp;&amp; len &lt; 6)) {
 mlog(ML_ERROR, "fh buffer is too small for encoding\n");
-type = 255;
+type = FILEID_ERROR;
 goto bail;
 }
 
&lt; at &gt;&lt; at &gt; -163,7 +166,7 &lt; at &gt;&lt; at &gt; static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
 spin_unlock(&amp;dentry-&gt;d_lock);
 
 len = 6;
-type = 2;
+type = OCFS2_FILEID_PARENT;
 
 mlog(0, "Encoding parent: blkno: %llu, generation: %u\n",
      (unsigned long long)blkno, generation);
&lt; at &gt;&lt; at &gt; -177,12 +180,12 &lt; at &gt;&lt; at &gt; bail:
 }
 
 static struct dentry *ocfs2_fh_to_dentry(struct super_block *sb,
-struct fid *fid, int fh_len, int fh_type)
+struct fid *fid, int fh_len, enum fid_type fh_type)
 {
 struct ocfs2_inode_handle handle;
 
-if (fh_len &lt; 3 || fh_type &gt; 2)
-return NULL;
+if (fh_len &lt; 3 || fh_type &gt; OCFS2_FILEID_PARENT)
+return ERR_PTR(-ESTALE);
 
 handle.ih_blkno = (u64)le32_to_cpu(fid-&gt;raw[0]) &lt;&lt; 32;
 handle.ih_blkno |= (u64)le32_to_cpu(fid-&gt;raw[1]);
&lt; at &gt;&lt; at &gt; -191,12 +194,12 &lt; at &gt;&lt; at &gt; static struct dentry *ocfs2_fh_to_dentry(struct super_block *sb,
 }
 
 static struct dentry *ocfs2_fh_to_parent(struct super_block *sb,
-struct fid *fid, int fh_len, int fh_type)
+struct fid *fid, int fh_len, enum fid_type fh_type)
 {
 struct ocfs2_inode_handle parent;
 
-if (fh_type != 2 || fh_len &lt; 6)
-return NULL;
+if (fh_type != OCFS2_FILEID_PARENT || fh_len &lt; 6)
+return ERR_PTR(-ESTALE);
 
 parent.ih_blkno = (u64)le32_to_cpu(fid-&gt;raw[3]) &lt;&lt; 32;
 parent.ih_blkno |= (u64)le32_to_cpu(fid-&gt;raw[4]);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 6c4c2c6..3c548eb 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
&lt; at &gt;&lt; at &gt; -1539,7 +1539,7 &lt; at &gt;&lt; at &gt; static struct dentry *reiserfs_get_dentry(struct super_block *sb,
 }
 
 struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type)
+int fh_len, enum fid_type fh_type)
 {
 /* fhtype happens to reflect the number of u32s encoded.
  * due to a bug in earlier code, fhtype might indicate there
&lt; at &gt;&lt; at &gt; -1566,7 +1566,7 &lt; at &gt;&lt; at &gt; struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
 }
 
 struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type)
+int fh_len, enum fid_type fh_type)
 {
 if (fh_type &lt; 4)
 return NULL;
&lt; at &gt;&lt; at &gt; -1577,14 +1577,14 &lt; at &gt;&lt; at &gt; struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid,
 (fh_type == 6) ? fid-&gt;raw[5] : 0);
 }
 
-int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
-       int need_parent)
+enum fid_type reiserfs_encode_fh(struct dentry *dentry, __u32 * data,
+    int *lenp, int need_parent)
 {
 struct inode *inode = dentry-&gt;d_inode;
 int maxlen = *lenp;
 
 if (maxlen &lt; 3)
-return 255;
+return FILEID_ERROR;
 
 data[0] = inode-&gt;i_ino;
 data[1] = le32_to_cpu(INODE_PKEY(inode)-&gt;k_dir_id);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index f84bfaa..afaf45e 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
&lt; at &gt;&lt; at &gt; -1292,38 +1292,40 &lt; at &gt;&lt; at &gt; static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block,
 }
 
 static struct dentry *udf_fh_to_dentry(struct super_block *sb,
-       struct fid *fid, int fh_len, int fh_type)
+       struct fid *fid, int fh_len,
+       enum fid_type fh_type)
 {
 if ((fh_len != 3 &amp;&amp; fh_len != 5) ||
     (fh_type != FILEID_UDF_WITH_PARENT &amp;&amp;
      fh_type != FILEID_UDF_WITHOUT_PARENT))
-return NULL;
+return ERR_PTR(-ESTALE);
 
 return udf_nfs_get_inode(sb, fid-&gt;udf.block, fid-&gt;udf.partref,
 fid-&gt;udf.generation);
 }
 
 static struct dentry *udf_fh_to_parent(struct super_block *sb,
-       struct fid *fid, int fh_len, int fh_type)
+       struct fid *fid, int fh_len,
+       enum fid_type fh_type)
 {
 if (fh_len != 5 || fh_type != FILEID_UDF_WITH_PARENT)
-return NULL;
+return ERR_PTR(-ESTALE);
 
 return udf_nfs_get_inode(sb, fid-&gt;udf.parent_block,
  fid-&gt;udf.parent_partref,
  fid-&gt;udf.parent_generation);
 }
-static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
- int connectable)
+static enum fid_type udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
+   int connectable)
 {
 int len = *lenp;
 struct inode *inode =  de-&gt;d_inode;
 kernel_lb_addr location = UDF_I(inode)-&gt;i_location;
 struct fid *fid = (struct fid *)fh;
-int type = FILEID_UDF_WITHOUT_PARENT;
+enum fid_type type = FILEID_UDF_WITHOUT_PARENT;
 
 if (len &lt; 3 || (connectable &amp;&amp; len &lt; 5))
-return 255;
+return FILEID_ERROR;
 
 *lenp = 3;
 fid-&gt;udf.block = location.logicalBlockNum;
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 7f7abec..ce053aa 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
&lt; at &gt;&lt; at &gt; -51,7 +51,7 &lt; at &gt;&lt; at &gt; static int xfs_fileid_length(int fileid_type)
 return 255; /* invalid */
 }
 
-STATIC int
+STATIC enum fid_type
 xfs_fs_encode_fh(
 struct dentry*dentry,
 __u32*fh,
&lt; at &gt;&lt; at &gt; -61,18 +61,18 &lt; at &gt;&lt; at &gt; xfs_fs_encode_fh(
 struct fid*fid = (struct fid *)fh;
 struct xfs_fid64*fid64 = (struct xfs_fid64 *)fh;
 struct inode*inode = dentry-&gt;d_inode;
-intfileid_type;
+enum fid_typefid_type;
 intlen;
 
 /* Directories don't need their parent encoded, they have ".." */
 if (S_ISDIR(inode-&gt;i_mode) || !connectable)
-fileid_type = FILEID_INO32_GEN;
+fid_type = FILEID_INO32_GEN;
 else
-fileid_type = FILEID_INO32_GEN_PARENT;
+fid_type = FILEID_INO32_GEN_PARENT;
 
 /* filesystem may contain 64bit inode numbers */
 if (!(XFS_M(inode-&gt;i_sb)-&gt;m_flags &amp; XFS_MOUNT_SMALL_INUMS))
-fileid_type |= XFS_FILEID_TYPE_64FLAG;
+fid_type |= XFS_FILEID_TYPE_64FLAG;
 
 /*
  * Only encode if there is enough space given.  In practice
&lt; at &gt;&lt; at &gt; -80,12 +80,12 &lt; at &gt;&lt; at &gt; xfs_fs_encode_fh(
  * over NFSv2 with the subtree_check export option; the other
  * seven combinations work.  The real answer is "don't use v2".
  */
-len = xfs_fileid_length(fileid_type);
+len = xfs_fileid_length(fid_type);
 if (*max_len &lt; len)
-return 255;
+return FILEID_ERROR;
 *max_len = len;
 
-switch (fileid_type) {
+switch (fid_type) {
 case FILEID_INO32_GEN_PARENT:
 spin_lock(&amp;dentry-&gt;d_lock);
 fid-&gt;i32.parent_ino = dentry-&gt;d_parent-&gt;d_inode-&gt;i_ino;
&lt; at &gt;&lt; at &gt; -106,9 +106,11 &lt; at &gt;&lt; at &gt; xfs_fs_encode_fh(
 fid64-&gt;ino = inode-&gt;i_ino;
 fid64-&gt;gen = inode-&gt;i_generation;
 break;
+default:
+break;
 }
 
-return fileid_type;
+return fid_type;
 }
 
 STATIC struct inode *
&lt; at &gt;&lt; at &gt; -144,15 +146,15 &lt; at &gt;&lt; at &gt; xfs_nfs_get_inode(
 
 STATIC struct dentry *
 xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
- int fh_len, int fileid_type)
+ int fh_len, enum fid_type fid_type)
 {
 struct xfs_fid64*fid64 = (struct xfs_fid64 *)fid;
 struct inode*inode = NULL;
 
-if (fh_len &lt; xfs_fileid_length(fileid_type))
-return NULL;
+if (fh_len &lt; xfs_fileid_length(fid_type))
+return ERR_PTR(-ESTALE);
 
-switch (fileid_type) {
+switch (fid_type) {
 case FILEID_INO32_GEN_PARENT:
 case FILEID_INO32_GEN:
 inode = xfs_nfs_get_inode(sb, fid-&gt;i32.ino, fid-&gt;i32.gen);
&lt; at &gt;&lt; at &gt; -161,6 +163,8 &lt; at &gt;&lt; at &gt; xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
 case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
 inode = xfs_nfs_get_inode(sb, fid64-&gt;ino, fid64-&gt;gen);
 break;
+default:
+break;
 }
 
 return d_obtain_alias(inode);
&lt; at &gt;&lt; at &gt; -168,12 +172,12 &lt; at &gt;&lt; at &gt; xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
 
 STATIC struct dentry *
 xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid,
- int fh_len, int fileid_type)
+ int fh_len, enum fid_type fid_type)
 {
 struct xfs_fid64*fid64 = (struct xfs_fid64 *)fid;
 struct inode*inode = NULL;
 
-switch (fileid_type) {
+switch (fid_type) {
 case FILEID_INO32_GEN_PARENT:
 inode = xfs_nfs_get_inode(sb, fid-&gt;i32.parent_ino,
       fid-&gt;i32.parent_gen);
&lt; at &gt;&lt; at &gt; -182,6 +186,8 &lt; at &gt;&lt; at &gt; xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid,
 inode = xfs_nfs_get_inode(sb, fid64-&gt;parent_ino,
       fid64-&gt;parent_gen);
 break;
+default:
+break;
 }
 
 return d_obtain_alias(inode);
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 27e772c..d6995e8 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
&lt; at &gt;&lt; at &gt; -1,3 +1,9 &lt; at &gt;&lt; at &gt;
+/* Persistent file handle encoding and decoding interface
+ *
+ * See Documentation/filesystems/Exporting for details on how to use this
+ * interface correctly.
+ */
+
 #ifndef LINUX_EXPORTFS_H
 #define LINUX_EXPORTFS_H 1
 
&lt; at &gt;&lt; at &gt; -9,12 +15,15 &lt; at &gt;&lt; at &gt; struct super_block;
 struct vfsmount;
 
 /*
- * The fileid_type identifies how the file within the filesystem is encoded.
- * In theory this is freely set and parsed by the filesystem, but we try to
- * stick to conventions so we can share some generic code and don't confuse
- * sniffers like ethereal/wireshark.
+ * The fid_type identifies how the parameters specifying a file within the
+ * filesystem are encoded.  In theory this is freely set and parsed by the
+ * filesystem, but we try to stick to conventions so we can share some generic
+ * code and don't confuse sniffers like ethereal/wireshark.
  *
- * The filesystem must not use the value '0' or '0xff'.
+ * The filesystem may use arbitrary values rather than picking the constants
+ * from this set, with the restriction that the values chosen must be between 1
+ * and 254.  0 and 255 are special purpose, and the value must fit within an
+ * unsigned byte.
  */
 enum fid_type {
 /*
&lt; at &gt;&lt; at &gt; -67,6 +76,10 &lt; at &gt;&lt; at &gt; enum fid_type {
  * 32 bit parent block number, 32 bit parent generation number
  */
 FILEID_UDF_WITH_PARENT = 0x52,
+
+/* This is returned if the encode routine was unable to represent the
+ * file */
+FILEID_ERROR = 0xff
 };
 
 struct fid {
&lt; at &gt;&lt; at &gt; -101,73 +114,97 &lt; at &gt;&lt; at &gt; struct fid {
  * this interface correctly.
  *
  * encode_fh:
- *    &lt; at &gt;encode_fh should store in the file handle fragment &lt; at &gt;fh (using at most
- *    &lt; at &gt;max_len bytes) information that can be used by &lt; at &gt;decode_fh to recover the
- *    file refered to by the &amp;struct dentry &lt; at &gt;de.  If the &lt; at &gt;connectable flag is
- *    set, the encode_fh() should store sufficient information so that a good
- *    attempt can be made to find not only the file but also it's place in the
- *    filesystem.   This typically means storing a reference to de-&gt;d_parent in
- *    the filehandle fragment.  encode_fh() should return the number of bytes
- *    stored or a negative error code such as %-ENOSPC
+ *    The &lt; at &gt;encode_fh operation should store into the file handle fragment
+ *    buffer &lt; at &gt;fh at most *&lt; at &gt;max_len 32-bit words of information that can be used
+ *    by fh_to_dentry() or fh_to_parent() to recover the file referred to by
+ *    the &amp;struct dentry &lt; at &gt;de.
+ *
+ *    If the &lt; at &gt;connectable flag is set, the encode_fh() should store sufficient
+ *    information so that a good attempt can be made to find not only the file
+ *    but also it's place in the filesystem.  This typically means storing a
+ *    reference to de-&gt;d_parent in the filehandle fragment.
+ *
+ *    On success, encode_fh() should return the type of the file handle, which
+ *    the caller must retain in some manner so that it can be passed to
+ *    decode_fh().  See the comment on enum fid_type as to the permitted types
+ *    that may be returned.  Furthermore, *max_len should be updated upon
+ *    return to indicate the amount of buffer space actually filled.
+ *
+ *    On failure FILEID_ERROR should be returned.
  *
  * fh_to_dentry:
- *    &lt; at &gt;fh_to_dentry is given a &amp;struct super_block (&lt; at &gt;sb) and a file handle
- *    fragment (&lt; at &gt;fh, &lt; at &gt;fh_len). It should return a &amp;struct dentry which refers
- *    to the same file that the file handle fragment refers to.  If it cannot,
- *    it should return a %NULL pointer if the file was found but no acceptable
- *    &amp;dentries were available, or an %ERR_PTR error code indicating why it
- *    couldn't be found (e.g. %ENOENT or %ENOMEM).  Any suitable dentry can be
- *    returned including, if necessary, a new dentry created with d_alloc_root.
- *    The caller can then find any other extant dentries by following the
- *    d_alias links.
+ *    The &lt; at &gt;fh_to_dentry operation is given a &amp;struct super_block (&lt; at &gt;sb) and a
+ *    file handle fragment and type (&lt; at &gt;fh, &lt; at &gt;fh_len, &lt; at &gt;fh_type).  It should look
+ *    up a file by the data in the partial file handle.
+ *
+ *    On success, a pointer should be returned to a dentry that corresponds to
+ *    the file that the file handle fragment was generated from by encode_fh().
+ *    The dentry should come from the set of dentries available to the
+ *    specified superblock, whether they're in memory or in storage.
+ *
+ *    Any suitable dentry can be returned including, if necessary, a new dentry
+ *    created with d_alloc_root.  The caller can then find any other extant
+ *    dentries by following the d_alias links.
+ *
+ *    On failure, a negative error code should be returned indicating the
+ *    reason.  Note that a NULL pointer is not a valid return.
  *
  * fh_to_parent:
  *    Same as &lt; at &gt;fh_to_dentry, except that it returns a pointer to the parent
- *    dentry if it was encoded into the filehandle fragment by &lt; at &gt;encode_fh.
+ *    dentry if it was encoded into the filehandle fragment by encode_fh().
  *
  * get_name:
  *    &lt; at &gt;get_name should find a name for the given &lt; at &gt;child in the given &lt; at &gt;parent
  *    directory.  The name should be stored in the &lt; at &gt;name (with the
- *    understanding that it is already pointing to a a %NAME_MAX+1 sized
- *    buffer.   get_name() should return %0 on success, a negative error code
- *    or error.  &lt; at &gt;get_name will be called without &lt; at &gt;parent-&gt;i_mutex held.
+ *    understanding that it is already pointing to a %NAME_MAX+1 sized buffer.
+ *
+ *    get_name() should return zero on success and a negative error code on
+ *    failure.
+ *
+ *    get_name() will be called without &lt; at &gt;parent-&gt;i_mutex held.
  *
  * get_parent:
  *    &lt; at &gt;get_parent should find the parent directory for the given &lt; at &gt;child which
  *    is also a directory.  In the event that it cannot be found, or storage
- *    space cannot be allocated, a %ERR_PTR should be returned.
+ *    space cannot be allocated, a suitable negative error code should be
+ *    returned.
  *
  * Locking rules:
  *    get_parent is called with child-&gt;d_inode-&gt;i_mutex down
  *    get_name is not (which is possibly inconsistent)
  */
-
 struct export_operations {
-int (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len,
-int connectable);
+enum fid_type (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len,
+   int connectable);
 struct dentry * (*fh_to_dentry)(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type);
+int fh_len, enum fid_type fh_type);
 struct dentry * (*fh_to_parent)(struct super_block *sb, struct fid *fid,
-int fh_len, int fh_type);
+int fh_len, enum fid_type fh_type);
 int (*get_name)(struct dentry *parent, char *name,
 struct dentry *child);
 struct dentry * (*get_parent)(struct dentry *child);
 };
 
-extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid,
-int *max_len, int connectable);
+typedef int (*exportfs_acceptable_t)(void *context, struct dentry *dentry);
+
+extern enum fid_type exportfs_encode_fh(struct dentry *dentry, struct fid *fid,
+int *max_len, int connectable);
 extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
-int fh_len, int fileid_type, int (*acceptable)(void *, struct dentry *),
-void *context);
+ int fh_len, enum fid_type fileid_type,
+ exportfs_acceptable_t acceptable,
+ void *context);
 
 /*
  * Generic helpers for filesystems.
  */
+typedef struct inode *(*exportfs_get_inode_t)(struct super_block *sb,
+      u64 ino, u32 gen);
+
 extern struct dentry *generic_fh_to_dentry(struct super_block *sb,
-struct fid *fid, int fh_len, int fh_type,
-struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen));
+struct fid *fid, int fh_len, enum fid_type fh_type,
+exportfs_get_inode_t get_inode);
 extern struct dentry *generic_fh_to_parent(struct super_block *sb,
-struct fid *fid, int fh_len, int fh_type,
-struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen));
+struct fid *fid, int fh_len, enum fid_type fh_type,
+exportfs_get_inode_t get_inode);
 
 #endif /* LINUX_EXPORTFS_H */
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index bc5114d..0783dbe 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
&lt; at &gt;&lt; at &gt; -24,6 +24,7 &lt; at &gt;&lt; at &gt;
 #include &lt;linux/proc_fs.h&gt;
 #include &lt;linux/smp_lock.h&gt;
 #include &lt;linux/buffer_head.h&gt;
+#include &lt;linux/exportfs.h&gt;
 #include &lt;linux/reiserfs_fs_i.h&gt;
 #include &lt;linux/reiserfs_fs_sb.h&gt;
 #endif
&lt; at &gt;&lt; at &gt; -1880,11 +1881,11 &lt; at &gt;&lt; at &gt; int reiserfs_write_inode(struct inode *inode, int);
 int reiserfs_get_block(struct inode *inode, sector_t block,
        struct buffer_head *bh_result, int create);
 struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
-     int fh_len, int fh_type);
+     int fh_len, enum fid_type fh_type);
 struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid,
-     int fh_len, int fh_type);
-int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
-       int connectable);
+     int fh_len, enum fid_type fh_type);
+enum fid_type reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
+ int connectable);
 
 int reiserfs_truncate_file(struct inode *, int update_timestamps);
 void make_cpu_key(struct cpu_key *cpu_key, struct inode *inode, loff_t offset,
diff --git a/mm/shmem.c b/mm/shmem.c
index f1b0d48..d6e91dd 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
&lt; at &gt;&lt; at &gt; -2047,7 +2047,7 &lt; at &gt;&lt; at &gt; static int shmem_match(struct inode *ino, void *vfh)
 }
 
 static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
-struct fid *fid, int fh_len, int fh_type)
+struct fid *fid, int fh_len, enum fid_type fh_type)
 {
 struct inode *inode;
 struct dentry *dentry = NULL;
&lt; at &gt;&lt; at &gt; -2067,8 +2067,8 &lt; at &gt;&lt; at &gt; static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
 return dentry;
 }
 
-static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
-int connectable)
+static enum fid_type shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
+     int connectable)
 {
 struct inode *inode = dentry-&gt;d_inode;
 

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo&lt; at &gt;vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

</description>
    <dc:creator>David Howells</dc:creator>
    <dc:date>2008-12-02T21:02:33</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.file-systems/27831">
    <title>[0/5] New POHMELFS release.</title>
    <link>http://comments.gmane.org/gmane.linux.file-systems/27831</link>
    <description>Hello.

This is a new release of the POHMEL filesystem.
POHMELFS stands for Parallel Optimized Host Message Exchange Layered File System.

This is a high performance network filesystem with local coherent cache of data
and metadata. Its main goal is distributed parallel processing of data.
Network filesystem is a client transport.

POHMELFS protocol was proven to be superior to NFS in lots
(if not all, then it is in a roadmap) operations [3].

Basic POHMELFS features:

    * Local coherent cache for data and metadata. (Byte-range) locking. Locks were 
    prepared to be byte-range, but since all Linux filesystems lock the whole
inode, it was decided to lock the whole object during writing. Actual messages
are being sent for locking/cache coherency protocol are byte-range, but because
the whole inode is locked, lock is cached, so range actually is equal to the
inode size. One can simultaneously write into the same page via different offsets
from different client, and every time file will be coherent on all clients which
do it and on the server itself.
    * Completely async processing of all events (hard and symlinks are the only exceptions)
    including object creation and data reading and writing.
    * Flexible object architecture optimized for network processing. Ability to create long
    pathes to object and remove arbitrary huge directories in single network command.
    * High performance is one of the main design goals.
    * Very fast and scalable multithreaded userspace server. Being in userspace it works
    with any underlying filesystem and still is much faster than async in-kernel NFS one.
    * Transactions support. Full failover for all operations. Resending transactions to
    different servers on timeout or error.
    * Client is able to switch between different servers
    (if one goes down, client automatically reconnects to second and so on).
    * Client parallel extensions: ability to write to multiple servers and
    balance reading between them.
    * Client dynamic server reconfiguration: ability to add/remove servers from
    working set in run-time.
    * Strong authentification and possible data encryption in network channel.
    * Extended attributes support.
    * Read-only mounts, ability to limit maximum size of the exported directory.

This release brings following features:

    * Optimize locking commands (combine them with inode information update)
    * Added root capabilities (split from crypto handshake which settles crypto
    algorithms used by client and server). This allows to send filesystem
statistics from the server.
    * Read-only and xattrs server extensions. Xattrs processing optimization
    when they are not supported by server.
    * bug fixes
    * extended email documentation

Signed-off-by: Evgeniy Polyakov &lt;zbr&lt; at &gt;ioremap.net&gt;

1. POHMELFS homepage
http://www.ioremap.net/projects/pohmelfs

2. GIT trees.
http://www.ioremap.net/cgi-bin/gitweb.cgi

3. POHMELFS vs NFS benchmarks.
http://tservice.net.ru/~s0mbre/blog/devel/fs/2008_06_13_1.html

(POHMELFS cbc(aes)+hmac(sha1) vs plain NFS)
http://tservice.net.ru/~s0mbre/blog/devel/fs/2008_07_07.html
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo&lt; at &gt;vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

</description>
    <dc:creator>Evgeniy Polyakov</dc:creator>
    <dc:date>2008-12-02T19:30:49</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.file-systems/27827">
    <title>PROBLEM: hfsplus filesystem allows opendir() on plain files</title>
    <link>http://comments.gmane.org/gmane.linux.file-systems/27827</link>
    <description>1. hfsplus filesystem allows opendir() on plain files.

2. When opendir() is called on a plain file stored in an hfsplus 
filesystem, opendir() fails to return 0 and set errno to ENOTDIR.

3. filesystem, hfsplus

4. Linux version 2.6.27-7-generic (buildd&lt; at &gt;palmer) (gcc version 4.3.2 
(Ubuntu 4.3.2-1ubuntu11) ) #1 SMP Tue Nov 4 19:33:20 UTC 2008

5. unknown

6. n/a

7. Try the following program on files and directories first under a 
mountpoint for an ext3 partition then under a mountpoint for an hfsplus 
partition.

///////////////////////////////////
#include &lt;dirent.h&gt;
#include &lt;stdio.h&gt;
#include &lt;errno.h&gt;

// usage: opendir_test &lt;file&gt;|&lt;directory&gt;

int main( int argc, char** argv )
{
     DIR* result = opendir( argv[1] );
     printf("ENOTDIR is 0x%x\n", ENOTDIR );
     printf("opendir '%s' returned 0x%x, errno 0x%x\n", argv[1], result, 
errno );
     return 0;
}
//////////////////////////////////


I get the following output:

damian&lt; at &gt;damian-laptop:~/code/opendir$ mkdir test_directory
damian&lt; at &gt;damian-laptop:~/code/opendir$ echo hello &gt; test_file.txt
damian&lt; at &gt;damian-laptop:~/code/opendir$ ./opendir_test test_directory
ENOTDIR is 0x14
opendir 'test_directory' returned 0x88f6008, errno 0x0
damian&lt; at &gt;damian-laptop:~/code/opendir$ ./opendir_test test_file.txt
ENOTDIR is 0x14
opendir 'test_file.txt' returned 0x0, errno 0x14
# /osx is an HFSPLUS mountpoint
damian&lt; at &gt;damian-laptop:~/code/opendir$ cd /media/osx
# mkdir test_directory &amp;&amp; echo hello &gt; test_file.txt performed in OSX
damian&lt; at &gt;damian-laptop:/media/osx$ ./opendir_test test_directory
ENOTDIR is 0x14
opendir 'test_directory' returned 0x919d008, errno 0x0
# erroneous result follows
damian&lt; at &gt;damian-laptop:/media/osx$ ./opendir_test test_file.txt
ENOTDIR is 0x14
opendir 'test_file.txt' returned 0x9a23008, errno 0x0
damian&lt; at &gt;damian-laptop:/media/osx$

8. If some fields are empty or look unusual you may have an old version.
Compare to the current minimal requirements in Documentation/Changes.

Linux damian-laptop 2.6.27-7-generic #1 SMP Tue Nov 4 19:33:20 UTC 2008 
i686 GNU/Linux

Gnu C                  4.3.2-1ubuntu11)
Gnu make               3.81
binutils               Binutils
util-linux             2.14
mount                  support
module-init-tools      3.3-pre11
e2fsprogs              1.41.3
reiserfsprogs          3.6.19
pcmciautils            014
Linux C Library        2.8.90
Dynamic linker (ldd)   2.8.90
Procps                 3.2.7
Net-tools              1.60
Kbd                    1.14.1
Sh-utils               6.10
udev                   124
wireless-tools         29
Modules Loaded         ipv6 aes_i586 aes_generic nls_utf8 hfsplus i915 drm 
af_packet binfmt_misc btusb sco bridge stp bnep rfcomm l2cap bluetooth 
ppdev acpi_cpufreq cpufreq_stats cpufreq_powersave cpufreq_conservative 
cpufreq_userspace cpufreq_ondemand freq_table container wmi pci_slot sbs 
sbshc iptable_filter ip_tables x_tables sbp2 parport_pc lp parport joydev 
snd_hda_intel arc4 snd_pcm_oss snd_mixer_oss iTCO_wdt ecb crypto_blkcipher 
iTCO_vendor_support snd_pcm ath9k evdev appleir pcspkr appletouch 
snd_seq_dummy mac80211 snd_seq_oss cfg80211 snd_seq_midi snd_rawmidi 
snd_seq_midi_event snd_seq video snd_timer snd_seq_device output battery 
snd ac intel_agp button soundcore agpgart shpchp pci_hotplug snd_page_alloc 
ext3 jbd mbcache sd_mod crc_t10dif sr_mod cdrom sg ata_generic ata_piix 
usbhid hid pata_acpi ohci1394 libata ieee1394 scsi_mod ehci_hcd uhci_hcd 
dock usbcore sky2 thermal processor fan fbcon tileblit font bitblit 
softcursor fuse

thanks
d

</description>
    <dc:creator>Damian Stewart</dc:creator>
    <dc:date>2008-12-02T15:26:56</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.file-systems/27826">
    <title>force mandatory vs. using posix advisory byte range locking</title>
    <link>http://comments.gmane.org/gmane.linux.file-systems/27826</link>
    <description>Patch to force cifs to use mandatory locking instead of posix locking
even if server would otherwise support posix cifs extensions.



</description>
    <dc:creator>Steve French</dc:creator>
    <dc:date>2008-12-02T15:55:11</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.file-systems/27814">
    <title>[patch 11/11] VFS: lseek(fd, 0, SEEK_CUR) race condition</title>
    <link>http://comments.gmane.org/gmane.linux.file-systems/27814</link>
    <description>From: Alain Knaff &lt;alain&lt; at &gt;knaff.lu&gt;

This patch fixes a race condition in lseek.  While it is expected that
unpredictable behaviour may result while repositioning the offset of a
file descriptor concurrently with reading/writing to the same file
descriptor, this should not happen when merely *reading* the file
descriptor's offset.

Unfortunately, the only portable way in Unix to read a file
descriptor's offset is lseek(fd, 0, SEEK_CUR); however executing this
concurrently with read/write may mess up the position, as shown by the
testcase below:

#include &lt;sys/types.h&gt;
#include &lt;stdio.h&gt;
#include &lt;pthread.h&gt;
#include &lt;unistd.h&gt;
#include &lt;errno.h&gt;
#include &lt;string.h&gt;
#include &lt;pthread.h&gt;

void *loop(void *ptr)
{
  fprintf(stderr, "Starting seek thread\n");
  while(1) {
    if(lseek(0, 0LL, SEEK_CUR) &lt; 0LL)
      perror("seek");
  }
}

int main(int argc, char **argv) {
  long long l=0;
  int r;
  char buf[4096];

  pthread_t thread;
  pthread_create(&amp;thread, 0, loop, 0);

  for(r=0; 1 ; r++) {
    int n = read(0, buf, 4096);
    if(n == 0)
      break;
    if(n &lt; 4096) {
      fprintf(stderr, "Short read %d %s\n", n, strerror(errno));
    }
    l+= n;
  }
  fprintf(stderr, "Read %lld bytes\n", l);

  return 0;
}

Compile this and run it on a multi-processor machine as
 ./a.out &lt;bigFile

where bigFile is a 1 Gigabyte file. It should print 1073741824.
However, on a buggy kernel, it usually produces a bigger number. The
problem only happens on a multiprocessor machine. This is because an
lseek(fd, 0, SEEK_CUR) running concurrently with a read() or write()
will reset the position back to what it used to be when the read()
started.

This behavior was observed "in the wild" when using udpcast which uses
lseek to monitor progress of reading/writing the uncompressed data.

The patch below fixes the issue by "special-casing" the lseek(fd, 0,
SEEK_CUR) pattern.

Apparently, an attempt was already made to fix the issue by the
following code:

if (offset != file-&gt;f_pos) {
file-&gt;f_pos = offset;
file-&gt;f_version = 0;
}

However, this doesn't work if file-&gt;f_pos was changed (by read() or
write()) between the time offset was computed, and the time where it
considers writing it back.

Signed-off-by: Alain Knaff &lt;alain&lt; at &gt;knaff.lu&gt;
Cc: Al Viro &lt;viro&lt; at &gt;zeniv.linux.org.uk&gt;
Cc: Christoph Hellwig &lt;hch&lt; at &gt;lst.de&gt;
Signed-off-by: Andrew Morton &lt;akpm&lt; at &gt;linux-foundation.org&gt;
---

 fs/read_write.c |   17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff -puN fs/read_write.c~vfs-lseekfd-0-seek_cur-race-condition fs/read_write.c
--- a/fs/read_write.c~vfs-lseekfd-0-seek_cur-race-condition
+++ a/fs/read_write.c
&lt; at &gt;&lt; at &gt; -50,6 +50,14 &lt; at &gt;&lt; at &gt; generic_file_llseek_unlocked(struct file
 offset += inode-&gt;i_size;
 break;
 case SEEK_CUR:
+/*
+ * Here we special-case the lseek(fd, 0, SEEK_CUR)
+ * position-querying operation.  Avoid rewriting the "same"
+ * f_pos value back to the file because a concurrent read(),
+ * write() or lseek() might have altered it
+ */
+if (offset == 0)
+return file-&gt;f_pos;
 offset += file-&gt;f_pos;
 break;
 }
&lt; at &gt;&lt; at &gt; -105,6 +113,14 &lt; at &gt;&lt; at &gt; loff_t default_llseek(struct file *file,
 offset += i_size_read(file-&gt;f_path.dentry-&gt;d_inode);
 break;
 case SEEK_CUR:
+/*
+ * See SEEK_CUR description in
+ * generic_file_llseek_unlocked()
+ */
+if (offset == 0) {
+retval = file-&gt;f_pos;
+goto out;
+}
 offset += file-&gt;f_pos;
 }
 retval = -EINVAL;
&lt; at &gt;&lt; at &gt; -115,6 +131,7 &lt; at &gt;&lt; at &gt; loff_t default_llseek(struct file *file,
 }
 retval = offset;
 }
+out:
 unlock_kernel();
 return retval;
 }
_
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo&lt; at &gt;vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

</description>
    <dc:creator>akpm&lt; at &gt;linux-foundation.org</dc:creator>
    <dc:date>2008-12-01T22:35:01</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.file-systems/27813">
    <title>[patch 10/11] vfs: expand some comments (d_path / seq_path)</title>
    <link>http://comments.gmane.org/gmane.linux.file-systems/27813</link>
    <description>From: Arjan van de Ven &lt;arjan&lt; at &gt;infradead.org&gt;

Explain that you really need to use the return value of d_path rather than
the buffer you passed into it.

Also fix the comment for seq_path(), the function arguments changed
recently but the comment hadn't been updated in sync.

Signed-off-by: Arjan van de Ven &lt;arjan&lt; at &gt;linux.intel.com&gt;
Cc: Al Viro &lt;viro&lt; at &gt;zeniv.linux.org.uk&gt;
Cc: Christoph Hellwig &lt;hch&lt; at &gt;lst.de&gt;
Signed-off-by: Andrew Morton &lt;akpm&lt; at &gt;linux-foundation.org&gt;
---

 fs/dcache.c   |    8 ++++++--
 fs/seq_file.c |   10 ++++++++--
 2 files changed, 14 insertions(+), 4 deletions(-)

diff -puN fs/dcache.c~vfs-expand-some-comments-d_path-seq_path fs/dcache.c
--- a/fs/dcache.c~vfs-expand-some-comments-d_path-seq_path
+++ a/fs/dcache.c
&lt; at &gt;&lt; at &gt; -1912,7 +1912,8 &lt; at &gt;&lt; at &gt; static int prepend_name(char **buffer, i
  * Convert a dentry into an ASCII path name. If the entry has been deleted
  * the string " (deleted)" is appended. Note that this is ambiguous.
  *
- * Returns the buffer or an error code if the path was too long.
+ * Returns a pointer into the buffer or an error code if the
+ * path was too long.
  *
  * "buflen" should be positive. Caller holds the dcache_lock.
  *
&lt; at &gt;&lt; at &gt; -1988,7 +1989,10 &lt; at &gt;&lt; at &gt; Elong:
  * Convert a dentry into an ASCII path name. If the entry has been deleted
  * the string " (deleted)" is appended. Note that this is ambiguous.
  *
- * Returns the buffer or an error code if the path was too long.
+ * Returns a pointer into the buffer or an error code if the path was
+ * too long. Note: Callers should use the returned pointer, not the passed
+ * in buffer, to use the name! The implementation often starts at an offset
+ * into the buffer, and may leave 0 bytes at the start.
  *
  * "buflen" should be positive.
  */
diff -puN fs/seq_file.c~vfs-expand-some-comments-d_path-seq_path fs/seq_file.c
--- a/fs/seq_file.c~vfs-expand-some-comments-d_path-seq_path
+++ a/fs/seq_file.c
&lt; at &gt;&lt; at &gt; -389,8 +389,14 &lt; at &gt;&lt; at &gt; char *mangle_path(char *s, char *p, char
 }
 EXPORT_SYMBOL_GPL(mangle_path);
 
-/*
- * return the absolute path of 'dentry' residing in mount 'mnt'.
+/**
+ * seq_path - seq_file interface to print a pathname
+ * &lt; at &gt;m: the seq_file handle
+ * &lt; at &gt;path: the struct path to print
+ * &lt; at &gt;esc: set of characters to escape in the output
+ *
+ * return the absolute path of 'path', as represented by the
+ * dentry / mnt pair in the path parameter.
  */
 int seq_path(struct seq_file *m, struct path *path, char *esc)
 {
_
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo&lt; at &gt;vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

</description>
    <dc:creator>akpm&lt; at &gt;linux-foundation.org</dc:creator>
    <dc:date>2008-12-01T22:35:00</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.file-systems/27812">
    <title>[patch 09/11] vfs: correct wrong function name of d_put in kernel document and source comment</title>
    <link>http://comments.gmane.org/gmane.linux.file-systems/27812</link>
    <description>From: Zhaolei &lt;zhaolei&lt; at &gt;cn.fujitsu.com&gt;

no function named d_put(), it should be dput().

Impact: fix document and comment, no functionality changed

Signed-off-by: Zhao Lei &lt;zhaolei&lt; at &gt;cn.fuijtsu.com&gt;
Signed-off-by: Randy Dunlap &lt;rdunlap&lt; at &gt;xenotime.net&gt;
Signed-off-by: Andrew Morton &lt;akpm&lt; at &gt;linux-foundation.org&gt;
---

 Documentation/filesystems/vfs.txt |    2 +-
 fs/dcache.c                       |    2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff -puN Documentation/filesystems/vfs.txt~vfs-correct-wrong-function-name-of-d_put-in-kernel-document-and-source-comment Documentation/filesystems/vfs.txt
--- a/Documentation/filesystems/vfs.txt~vfs-correct-wrong-function-name-of-d_put-in-kernel-document-and-source-comment
+++ a/Documentation/filesystems/vfs.txt
&lt; at &gt;&lt; at &gt; -931,7 +931,7 &lt; at &gt;&lt; at &gt; manipulate dentries:
   d_lookup: look up a dentry given its parent and path name component
 It looks up the child of that given name from the dcache
 hash table. If it is found, the reference count is incremented
-and the dentry is returned. The caller must use d_put()
+and the dentry is returned. The caller must use dput()
 to free the dentry when it finishes using it.
 
 For further information on dentry locking, please refer to the document
diff -puN fs/dcache.c~vfs-correct-wrong-function-name-of-d_put-in-kernel-document-and-source-comment fs/dcache.c
--- a/fs/dcache.c~vfs-correct-wrong-function-name-of-d_put-in-kernel-document-and-source-comment
+++ a/fs/dcache.c
&lt; at &gt;&lt; at &gt; -1337,7 +1337,7 &lt; at &gt;&lt; at &gt; err_out:
  *
  * Searches the children of the parent dentry for the name in question. If
  * the dentry is found its reference count is incremented and the dentry
- * is returned. The caller must use d_put to free the entry when it has
+ * is returned. The caller must use dput to free the entry when it has
  * finished using it. %NULL is returned on failure.
  *
  * __d_lookup is dcache_lock free. The hash list is protected using RCU.
_
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo&lt; at &gt;vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

</description>
    <dc:creator>akpm&lt; at &gt;linux-foundation.org</dc:creator>
    <dc:date>2008-12-01T22:34:58</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.file-systems/27811">
    <title>[patch 04/11] vfs: introduce new LSM hooks where vfsmount is available.</title>
    <link>http://comments.gmane.org/gmane.linux.file-systems/27811</link>
    <description>From: Kentaro Takeda &lt;takedakn&lt; at &gt;nttdata.co.jp&gt;

----- What is this patch for? -----

There are security_inode_*() LSM hooks for attribute-based MAC, but they are not
suitable for pathname-based MAC because they don't receive "struct vfsmount"
information.

----- How this patch was developed? -----

Two pathname-based MACs, AppArmor and TOMOYO Linux, are trying to merge
upstream. But because of "struct vfsmount" problem, they have been unable to
merge upstream.

Here are the list of approaches and the reasons of denial.

(1) Not using LSM
 http://lwn.net/Articles/277833/

 This approach was rejected because security modules should use LSM because the
 whole idea behind LSM was to have a single set of hooks for all security
 modules; if every module now adds its own set of hooks, that purpose will have
 been defeated and the kernel will turn into a big mess of security hooks.

(2) Retrieving "struct vfsmount" from "struct task_struct".
 http://lkml.org/lkml/2007/11/5/388

 Since "struct task_struct" contains list of "struct vfsmount",
 "struct vfsmount" which corresponds to "struct dentry" can be retrieved from
 the list unless "mount --bind" is used.

 This approach turned out to cause a critical problem that getting namespace_sem
 lock from security_inode_*() triggers AB-BA deadlock.

(3) Adding "struct vfsmount" parameter to VFS helper functions.
 http://lkml.org/lkml/2008/5/29/207

 This approach adds "struct vfsmount" to VFS helper functions (e.g. vfs_mkdir()
 and vfs_symlink()) and LSM hooks inside VFS helper functions. This approach is
 helpful for not only AppArmor and TOMOYO Linux 2.x but also SELinux and
 auditing purpose, for this approach allows existent LSM users to use pathnames
 in their access control and audit logs.

 This approach was rejected by Al Viro, the VFS maintainer, because he thinks
 individual filesystem should remain "struct vfsmount"-unaware and VFS helper
 functions should not receive "struct vfsmount".

 Al Viro also suggested to move existing security_inode_*() to out of VFS
 helper functions so that security_inode_*() can receive "struct vfsmount"
 without modifying VFS helper functions, but this suggestion was opposed by
 Stephen Smalley because changing the order of permission checks (i.e.
 MAC checks before DAC checks) is not acceptable.

(4) Passing "struct vfsmount" via "struct task_struct".
 http://lkml.org/lkml/2007/11/16/157

 Since we didn't understand the reason why accessing "struct vfsmount" from
 LSM hooks inside VFS helper functions is not acceptable, we thought the reason
 why VFS helper functions don't receive "struct vfsmount" is the amount of
 modifications needed to do so. Thus, we proposed to pass "struct vfsmount" via
 "struct task_struct" so that modifications remain minimal.

 This approach was rejected because this is an abuse of "struct task_struct".

(5) Remembering pathname of "struct vfsmount" via "struct task_struct".
 http://lkml.org/lkml/2008/8/19/16

 Since pathname of a "struct dentry" up to the mount point can be calculated
 without "struct vfsmount", absolute pathname of a "struct dentry" can be
 calculated if "struct task_struct" can remember absolute pathname of a
 "struct vfsmount" which corresponds to "struct dentry".
 As we now understand that Al Viro is opposing to access "struct vfsmount" from
 LSM hooks inside VFS helper functions, we gave up delivering "struct vfsmount"
 to LSM hooks inside VFS helper functions.
 Kernel 2.6.26 introduced read-only bind mount feature, and hooks for that
 feature (i.e. mnt_want_write() and mnt_drop_write()) were inserted around
 VFS helper functions call. Since mnt_want_write() receives "struct vfsmount"
 which corresponds to "struct dentry" that will be passed to subsequent VFS
 helper functions call, we associated pathname of "struct vfsmount" with
 "struct task_struct" instead of associating "struct vfsmount" itself.

 This approach was not explicitly rejected, but there seems to be performance
 problem.

(6) Introducing new LSM hooks.
 (this patch)

 We understand that adding new LSM hooks which receive "struct vfsmount" outside
 VFS helper functions is the most straightforward approach. This approach has
 less impact to existing LSM module and no impact to VFS helper functions.

Signed-off-by: Kentaro Takeda &lt;takedakn&lt; at &gt;nttdata.co.jp&gt;
Signed-off-by: Tetsuo Handa &lt;penguin-kernel&lt; at &gt;I-love.SAKURA.ne.jp&gt;
Signed-off-by: Toshiharu Harada &lt;haradats&lt; at &gt;nttdata.co.jp&gt;
Cc: Al Viro &lt;viro&lt; at &gt;zeniv.linux.org.uk&gt;
Cc: Christoph Hellwig &lt;hch&lt; at &gt;lst.de&gt;
Cc: Crispin Cowan &lt;crispin&lt; at &gt;crispincowan.com&gt;
Cc: Stephen Smalley &lt;sds&lt; at &gt;tycho.nsa.gov&gt;
Cc: Casey Schaufler &lt;casey&lt; at &gt;schaufler-ca.com&gt;
Cc: James Morris &lt;jmorris&lt; at &gt;namei.org&gt;
Signed-off-by: Andrew Morton &lt;akpm&lt; at &gt;linux-foundation.org&gt;
---

 fs/namei.c               |   37 +++++++++
 fs/open.c                |    5 +
 include/linux/security.h |  139 +++++++++++++++++++++++++++++++++++++
 net/unix/af_unix.c       |    4 +
 security/Kconfig         |    9 ++
 security/capability.c    |   57 +++++++++++++++
 security/security.c      |   66 +++++++++++++++++
 7 files changed, 317 insertions(+)

diff -puN fs/namei.c~introduce-new-lsm-hooks-where-vfsmount-is-available fs/namei.c
--- a/fs/namei.c~introduce-new-lsm-hooks-where-vfsmount-is-available
+++ a/fs/namei.c
&lt; at &gt;&lt; at &gt; -1556,6 +1556,10 &lt; at &gt;&lt; at &gt; int may_open(struct nameidata *nd, int a
  * Refuse to truncate files with mandatory locks held on them.
  */
 error = locks_verify_locked(inode);
+if (!error)
+error = security_path_truncate(&amp;nd-&gt;path, 0,
+       ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
+       NULL);
 if (!error) {
 DQUOT_INIT(inode);
 
&lt; at &gt;&lt; at &gt; -1586,7 +1590,11 &lt; at &gt;&lt; at &gt; static int __open_namei_create(struct na
 
 if (!IS_POSIXACL(dir-&gt;d_inode))
 mode &amp;= ~current-&gt;fs-&gt;umask;
+error = security_path_mknod(&amp;nd-&gt;path, path-&gt;dentry, mode, 0);
+if (error)
+goto out_unlock;
 error = vfs_create(dir-&gt;d_inode, path-&gt;dentry, mode, nd);
+out_unlock:
 mutex_unlock(&amp;dir-&gt;d_inode-&gt;i_mutex);
 dput(nd-&gt;path.dentry);
 nd-&gt;path.dentry = path-&gt;dentry;
&lt; at &gt;&lt; at &gt; -1999,6 +2007,9 &lt; at &gt;&lt; at &gt; asmlinkage long sys_mknodat(int dfd, con
 error = mnt_want_write(nd.path.mnt);
 if (error)
 goto out_dput;
+error = security_path_mknod(&amp;nd.path, dentry, mode, dev);
+if (error)
+goto out_drop_write;
 switch (mode &amp; S_IFMT) {
 case 0: case S_IFREG:
 error = vfs_create(nd.path.dentry-&gt;d_inode,dentry,mode,&amp;nd);
&lt; at &gt;&lt; at &gt; -2011,6 +2022,7 &lt; at &gt;&lt; at &gt; asmlinkage long sys_mknodat(int dfd, con
 error = vfs_mknod(nd.path.dentry-&gt;d_inode,dentry,mode,0);
 break;
 }
+out_drop_write:
 mnt_drop_write(nd.path.mnt);
 out_dput:
 dput(dentry);
&lt; at &gt;&lt; at &gt; -2070,7 +2082,11 &lt; at &gt;&lt; at &gt; asmlinkage long sys_mkdirat(int dfd, con
 error = mnt_want_write(nd.path.mnt);
 if (error)
 goto out_dput;
+error = security_path_mkdir(&amp;nd.path, dentry, mode);
+if (error)
+goto out_drop_write;
 error = vfs_mkdir(nd.path.dentry-&gt;d_inode, dentry, mode);
+out_drop_write:
 mnt_drop_write(nd.path.mnt);
 out_dput:
 dput(dentry);
&lt; at &gt;&lt; at &gt; -2180,7 +2196,11 &lt; at &gt;&lt; at &gt; static long do_rmdir(int dfd, const char
 error = mnt_want_write(nd.path.mnt);
 if (error)
 goto exit3;
+error = security_path_rmdir(&amp;nd.path, dentry);
+if (error)
+goto exit4;
 error = vfs_rmdir(nd.path.dentry-&gt;d_inode, dentry);
+exit4:
 mnt_drop_write(nd.path.mnt);
 exit3:
 dput(dentry);
&lt; at &gt;&lt; at &gt; -2265,7 +2285,11 &lt; at &gt;&lt; at &gt; static long do_unlinkat(int dfd, const c
 error = mnt_want_write(nd.path.mnt);
 if (error)
 goto exit2;
+error = security_path_unlink(&amp;nd.path, dentry);
+if (error)
+goto exit3;
 error = vfs_unlink(nd.path.dentry-&gt;d_inode, dentry);
+exit3:
 mnt_drop_write(nd.path.mnt);
 exit2:
 dput(dentry);
&lt; at &gt;&lt; at &gt; -2346,7 +2370,11 &lt; at &gt;&lt; at &gt; asmlinkage long sys_symlinkat(const char
 error = mnt_want_write(nd.path.mnt);
 if (error)
 goto out_dput;
+error = security_path_symlink(&amp;nd.path, dentry, from);
+if (error)
+goto out_drop_write;
 error = vfs_symlink(nd.path.dentry-&gt;d_inode, dentry, from);
+out_drop_write:
 mnt_drop_write(nd.path.mnt);
 out_dput:
 dput(dentry);
&lt; at &gt;&lt; at &gt; -2443,7 +2471,11 &lt; at &gt;&lt; at &gt; asmlinkage long sys_linkat(int olddfd, c
 error = mnt_want_write(nd.path.mnt);
 if (error)
 goto out_dput;
+error = security_path_link(old_path.dentry, &amp;nd.path, new_dentry);
+if (error)
+goto out_drop_write;
 error = vfs_link(old_path.dentry, nd.path.dentry-&gt;d_inode, new_dentry);
+out_drop_write:
 mnt_drop_write(nd.path.mnt);
 out_dput:
 dput(new_dentry);
&lt; at &gt;&lt; at &gt; -2677,8 +2709,13 &lt; at &gt;&lt; at &gt; asmlinkage long sys_renameat(int olddfd,
 error = mnt_want_write(oldnd.path.mnt);
 if (error)
 goto exit5;
+error = security_path_rename(&amp;oldnd.path, old_dentry,
+     &amp;newnd.path, new_dentry);
+if (error)
+goto exit6;
 error = vfs_rename(old_dir-&gt;d_inode, old_dentry,
    new_dir-&gt;d_inode, new_dentry);
+exit6:
 mnt_drop_write(oldnd.path.mnt);
 exit5:
 dput(new_dentry);
diff -puN fs/open.c~introduce-new-lsm-hooks-where-vfsmount-is-available fs/open.c
--- a/fs/open.c~introduce-new-lsm-hooks-where-vfsmount-is-available
+++ a/fs/open.c
&lt; at &gt;&lt; at &gt; -272,6 +272,8 &lt; at &gt;&lt; at &gt; static long do_sys_truncate(const char _
 goto put_write_and_out;
 
 error = locks_verify_truncate(inode, NULL, length);
+if (!error)
+error = security_path_truncate(&amp;path, length, 0, NULL);
 if (!error) {
 DQUOT_INIT(inode);
 error = do_truncate(path.dentry, length, 0, NULL);
&lt; at &gt;&lt; at &gt; -329,6 +331,9 &lt; at &gt;&lt; at &gt; static long do_sys_ftruncate(unsigned in
 
 error = locks_verify_truncate(inode, file, length);
 if (!error)
+error = security_path_truncate(&amp;file-&gt;f_path, length,
+       ATTR_MTIME|ATTR_CTIME, file);
+if (!error)
 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
 out_putf:
 fput(file);
diff -puN include/linux/security.h~introduce-new-lsm-hooks-where-vfsmount-is-available include/linux/security.h
--- a/include/linux/security.h~introduce-new-lsm-hooks-where-vfsmount-is-available
+++ a/include/linux/security.h
&lt; at &gt;&lt; at &gt; -335,17 +335,37 &lt; at &gt;&lt; at &gt; static inline void security_free_mnt_opt
  *&lt; at &gt;dir contains the inode structure of the parent directory of the new link.
  *&lt; at &gt;new_dentry contains the dentry structure for the new link.
  *Return 0 if permission is granted.
+ * &lt; at &gt;path_link:
+ *Check permission before creating a new hard link to a file.
+ *&lt; at &gt;old_dentry contains the dentry structure for an existing link
+ *to the file.
+ *&lt; at &gt;new_dir contains the path structure of the parent directory of
+ *the new link.
+ *&lt; at &gt;new_dentry contains the dentry structure for the new link.
+ *Return 0 if permission is granted.
  * &lt; at &gt;inode_unlink:
  *Check the permission to remove a hard link to a file.
  *&lt; at &gt;dir contains the inode structure of parent directory of the file.
  *&lt; at &gt;dentry contains the dentry structure for file to be unlinked.
  *Return 0 if permission is granted.
+ * &lt; at &gt;path_unlink:
+ *Check the permission to remove a hard link to a file.
+ *&lt; at &gt;dir contains the path structure of parent directory of the file.
+ *&lt; at &gt;dentry contains the dentry structure for file to be unlinked.
+ *Return 0 if permission is granted.
  * &lt; at &gt;inode_symlink:
  *Check the permission to create a symbolic link to a file.
  *&lt; at &gt;dir contains the inode structure of parent directory of the symbolic link.
  *&lt; at &gt;dentry contains the dentry structure of the symbolic link.
  *&lt; at &gt;old_name contains the pathname of file.
  *Return 0 if permission is granted.
+ * &lt; at &gt;path_symlink:
+ *Check the permission to create a symbolic link to a file.
+ *&lt; at &gt;dir contains the path structure of parent directory of
+ *the symbolic link.
+ *&lt; at &gt;dentry contains the dentry structure of the symbolic link.
+ *&lt; at &gt;old_name contains the pathname of file.
+ *Return 0 if permission is granted.
  * &lt; at &gt;inode_mkdir:
  *Check permissions to create a new directory in the existing directory
  *associated with inode strcture &lt; at &gt;dir.
&lt; at &gt;&lt; at &gt; -353,11 +373,25 &lt; at &gt;&lt; at &gt; static inline void security_free_mnt_opt
  *&lt; at &gt;dentry contains the dentry structure of new directory.
  *&lt; at &gt;mode contains the mode of new directory.
  *Return 0 if permission is granted.
+ * &lt; at &gt;path_mkdir:
+ *Check permissions to create a new directory in the existing directory
+ *associated with path strcture &lt; at &gt;path.
+ *&lt; at &gt;dir containst the path structure of parent of the directory
+ *to be created.
+ *&lt; at &gt;dentry contains the dentry structure of new directory.
+ *&lt; at &gt;mode contains the mode of new directory.
+ *Return 0 if permission is granted.
  * &lt; at &gt;inode_rmdir:
  *Check the permission to remove a directory.
  *&lt; at &gt;dir contains the inode structure of parent of the directory to be removed.
  *&lt; at &gt;dentry contains the dentry structure of directory to be removed.
  *Return 0 if permission is granted.
+ * &lt; at &gt;path_rmdir:
+ *Check the permission to remove a directory.
+ *&lt; at &gt;dir contains the path structure of parent of the directory to be
+ *removed.
+ *&lt; at &gt;dentry contains the dentry structure of directory to be removed.
+ *Return 0 if permission is granted.
  * &lt; at &gt;inode_mknod:
  *Check permissions when creating a special file (or a socket or a fifo
  *file created via the mknod system call).  Note that if mknod operation
&lt; at &gt;&lt; at &gt; -368,6 +402,15 &lt; at &gt;&lt; at &gt; static inline void security_free_mnt_opt
  *&lt; at &gt;mode contains the mode of the new file.
  *&lt; at &gt;dev contains the device number.
  *Return 0 if permission is granted.
+ * &lt; at &gt;path_mknod:
+ *Check permissions when creating a file. Note that this hook is called
+ *even if mknod operation is being done for a regular file.
+ *&lt; at &gt;dir contains the path structure of parent of the new file.
+ *&lt; at &gt;dentry contains the dentry structure of the new file.
+ *&lt; at &gt;mode contains the mode of the new file.
+ *&lt; at &gt;dev contains the undecoded device number. Use new_decode_dev() to get
+ *the decoded device number.
+ *Return 0 if permission is granted.
  * &lt; at &gt;inode_rename:
  *Check for permission to rename a file or directory.
  *&lt; at &gt;old_dir contains the inode structure for parent of the old link.
&lt; at &gt;&lt; at &gt; -375,6 +418,13 &lt; at &gt;&lt; at &gt; static inline void security_free_mnt_opt
  *&lt; at &gt;new_dir contains the inode structure for parent of the new link.
  *&lt; at &gt;new_dentry contains the dentry structure of the new link.
  *Return 0 if permission is granted.
+ * &lt; at &gt;path_rename:
+ *Check for permission to rename a file or directory.
+ *&lt; at &gt;old_dir contains the path structure for parent of the old link.
+ *&lt; at &gt;old_dentry contains the dentry structure of the old link.
+ *&lt; at &gt;new_dir contains the path structure for parent of the new link.
+ *&lt; at &gt;new_dentry contains the dentry structure of the new link.
+ *Return 0 if permission is granted.
  * &lt; at &gt;inode_readlink:
  *Check the permission to read the symbolic link.
  *&lt; at &gt;dentry contains the dentry structure for the file link.
&lt; at &gt;&lt; at &gt; -403,6 +453,13 &lt; at &gt;&lt; at &gt; static inline void security_free_mnt_opt
  *&lt; at &gt;dentry contains the dentry structure for the file.
  *&lt; at &gt;attr is the iattr structure containing the new file attributes.
  *Return 0 if permission is granted.
+ * &lt; at &gt;path_truncate:
+ *Check permission before truncating a file.
+ *&lt; at &gt;path contains the path structure for the file.
+ *&lt; at &gt;length is the new length of the file.
+ *&lt; at &gt;time_attrs is the flags passed to do_truncate().
+ *&lt; at &gt;filp is the file structure (may be NULL).
+ *Return 0 if permission is granted.
  * &lt; at &gt;inode_getattr:
  *Check permission before obtaining file attributes.
  *&lt; at &gt;mnt is the vfsmount where the dentry was looked up
&lt; at &gt;&lt; at &gt; -1331,6 +1388,22 &lt; at &gt;&lt; at &gt; struct security_operations {
    struct super_block *newsb);
 int (*sb_parse_opts_str) (char *options, struct security_mnt_opts *opts);
 
+#ifdef CONFIG_SECURITY_PATH
+int (*path_unlink) (struct path *dir, struct dentry *dentry);
+int (*path_mkdir) (struct path *dir, struct dentry *dentry, int mode);
+int (*path_rmdir) (struct path *dir, struct dentry *dentry);
+int (*path_mknod) (struct path *dir, struct dentry *dentry, int mode,
+   unsigned int dev);
+int (*path_truncate) (struct path *path, loff_t length,
+      unsigned int time_attrs, struct file *filp);
+int (*path_symlink) (struct path *dir, struct dentry *dentry,
+     const char *old_name);
+int (*path_link) (struct dentry *old_dentry, struct path *new_dir,
+  struct dentry *new_dentry);
+int (*path_rename) (struct path *old_dir, struct dentry *old_dentry,
+    struct path *new_dir, struct dentry *new_dentry);
+#endif
+
 int (*inode_alloc_security) (struct inode *inode);
 void (*inode_free_security) (struct inode *inode);
 int (*inode_init_security) (struct inode *inode, struct inode *dir,
&lt; at &gt;&lt; at &gt; -2705,6 +2778,72 &lt; at &gt;&lt; at &gt; static inline void security_skb_classify
 
 #endif/* CONFIG_SECURITY_NETWORK_XFRM */
 
+#ifdef CONFIG_SECURITY_PATH
+int security_path_unlink(struct path *dir, struct dentry *dentry);
+int security_path_mkdir(struct path *dir, struct dentry *dentry, int mode);
+int security_path_rmdir(struct path *dir, struct dentry *dentry);
+int security_path_mknod(struct path *dir, struct dentry *dentry, int mode,
+unsigned int dev);
+int security_path_truncate(struct path *path, loff_t length,
+   unsigned int time_attrs, struct file *filp);
+int security_path_symlink(struct path *dir, struct dentry *dentry,
+  const char *old_name);
+int security_path_link(struct dentry *old_dentry, struct path *new_dir,
+       struct dentry *new_dentry);
+int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
+ struct path *new_dir, struct dentry *new_dentry);
+#else/* CONFIG_SECURITY_PATH */
+static inline int security_path_unlink(struct path *dir, struct dentry *dentry)
+{
+return 0;
+}
+
+static inline int security_path_mkdir(struct path *dir, struct dentry *dentry,
+      int mode)
+{
+return 0;
+}
+
+static inline int security_path_rmdir(struct path *dir, struct dentry *dentry)
+{
+return 0;
+}
+
+static inline int security_path_mknod(struct path *dir, struct dentry *dentry,
+      int mode, unsigned int dev)
+{
+return 0;
+}
+
+static inline int security_path_truncate(struct path *path, loff_t length,
+ unsigned int time_attrs,
+ struct file *filp)
+{
+return 0;
+}
+
+static inline int security_path_symlink(struct path *dir, struct dentry *dentry,
+const char *old_name)
+{
+return 0;
+}
+
+static inline int security_path_link(struct dentry *old_dentry,
+     struct path *new_dir,
+     struct dentry *new_dentry)
+{
+return 0;
+}
+
+static inline int security_path_rename(struct path *old_dir,
+       struct dentry *old_dentry,
+       struct path *new_dir,
+       struct dentry *new_dentry)
+{
+return 0;
+}
+#endif/* CONFIG_SECURITY_PATH */
+
 #ifdef CONFIG_KEYS
 #ifdef CONFIG_SECURITY
 
diff -puN net/unix/af_unix.c~introduce-new-lsm-hooks-where-vfsmount-is-available net/unix/af_unix.c
--- a/net/unix/af_unix.c~introduce-new-lsm-hooks-where-vfsmount-is-available
+++ a/net/unix/af_unix.c
&lt; at &gt;&lt; at &gt; -836,7 +836,11 &lt; at &gt;&lt; at &gt; static int unix_bind(struct socket *sock
 err = mnt_want_write(nd.path.mnt);
 if (err)
 goto out_mknod_dput;
+err = security_path_mknod(&amp;nd.path, dentry, mode, 0);
+if (err)
+goto out_mknod_drop_write;
 err = vfs_mknod(nd.path.dentry-&gt;d_inode, dentry, mode, 0);
+out_mknod_drop_write:
 mnt_drop_write(nd.path.mnt);
 if (err)
 goto out_mknod_dput;
diff -puN security/Kconfig~introduce-new-lsm-hooks-where-vfsmount-is-available security/Kconfig
--- a/security/Kconfig~introduce-new-lsm-hooks-where-vfsmount-is-available
+++ a/security/Kconfig
&lt; at &gt;&lt; at &gt; -81,6 +81,15 &lt; at &gt;&lt; at &gt; config SECURITY_NETWORK_XFRM
   IPSec.
   If you are unsure how to answer this question, answer N.
 
+config SECURITY_PATH
+bool "Security hooks for pathname based access control"
+depends on SECURITY
+help
+  This enables the security hooks for pathname based access control.
+  If enabled, a security module can use these hooks to
+  implement pathname based access controls.
+  If you are unsure how to answer this question, answer N.
+
 config SECURITY_FILE_CAPABILITIES
 bool "File POSIX Capabilities"
 default n
diff -puN security/capability.c~introduce-new-lsm-hooks-where-vfsmount-is-available security/capability.c
--- a/security/capability.c~introduce-new-lsm-hooks-where-vfsmount-is-available
+++ a/security/capability.c
&lt; at &gt;&lt; at &gt; -263,6 +263,53 &lt; at &gt;&lt; at &gt; static void cap_inode_getsecid(const str
 *secid = 0;
 }
 
+#ifdef CONFIG_SECURITY_PATH
+static int cap_path_mknod(struct path *dir, struct dentry *dentry, int mode,
+  unsigned int dev)
+{
+return 0;
+}
+
+static int cap_path_mkdir(struct path *dir, struct dentry *dentry, int mode)
+{
+return 0;
+}
+
+static int cap_path_rmdir(struct path *dir, struct dentry *dentry)
+{
+return 0;
+}
+
+static int cap_path_unlink(struct path *dir, struct dentry *dentry)
+{
+return 0;
+}
+
+static int cap_path_symlink(struct path *dir, struct dentry *dentry,
+    const char *old_name)
+{
+return 0;
+}
+
+static int cap_path_link(struct dentry *old_dentry, struct path *new_dir,
+ struct dentry *new_dentry)
+{
+return 0;
+}
+
+static int cap_path_rename(struct path *old_path, struct dentry *old_dentry,
+   struct path *new_path, struct dentry *new_dentry)
+{
+return 0;
+}
+
+static int cap_path_truncate(struct path *path, loff_t length,
+     unsigned int time_attrs, struct file *filp)
+{
+return 0;
+}
+#endif
+
 static int cap_file_permission(struct file *file, int mask)
 {
 return 0;
&lt; at &gt;&lt; at &gt; -883,6 +930,16 &lt; at &gt;&lt; at &gt; void security_fixup_ops(struct security_
 set_to_cap_if_null(ops, inode_setsecurity);
 set_to_cap_if_null(ops, inode_listsecurity);
 set_to_cap_if_null(ops, inode_getsecid);
+#ifdef CONFIG_SECURITY_PATH
+set_to_cap_if_null(ops, path_mknod);
+set_to_cap_if_null(ops, path_mkdir);
+set_to_cap_if_null(ops, path_rmdir);
+set_to_cap_if_null(ops, path_unlink);
+set_to_cap_if_null(ops, path_symlink);
+set_to_cap_if_null(ops, path_link);
+set_to_cap_if_null(ops, path_rename);
+set_to_cap_if_null(ops, path_truncate);
+#endif
 set_to_cap_if_null(ops, file_permission);
 set_to_cap_if_null(ops, file_alloc_security);
 set_to_cap_if_null(ops, file_free_security);
diff -puN security/security.c~introduce-new-lsm-hooks-where-vfsmount-is-available security/security.c
--- a/security/security.c~introduce-new-lsm-hooks-where-vfsmount-is-available
+++ a/security/security.c
&lt; at &gt;&lt; at &gt; -355,6 +355,72 &lt; at &gt;&lt; at &gt; int security_inode_init_security(struct 
 }
 EXPORT_SYMBOL(security_inode_init_security);
 
+#ifdef CONFIG_SECURITY_PATH
+int security_path_mknod(struct path *path, struct dentry *dentry, int mode,
+unsigned int dev)
+{
+if (unlikely(IS_PRIVATE(path-&gt;dentry-&gt;d_inode)))
+return 0;
+return security_ops-&gt;path_mknod(path, dentry, mode, dev);
+}
+EXPORT_SYMBOL(security_path_mknod);
+
+int security_path_mkdir(struct path *path, struct dentry *dentry, int mode)
+{
+if (unlikely(IS_PRIVATE(path-&gt;dentry-&gt;d_inode)))
+return 0;
+return security_ops-&gt;path_mkdir(path, dentry, mode);
+}
+
+int security_path_rmdir(struct path *path, struct dentry *dentry)
+{
+if (unlikely(IS_PRIVATE(path-&gt;dentry-&gt;d_inode)))
+return 0;
+return security_ops-&gt;path_rmdir(path, dentry);
+}
+
+int security_path_unlink(struct path *path, struct dentry *dentry)
+{
+if (unlikely(IS_PRIVATE(path-&gt;dentry-&gt;d_inode)))
+return 0;
+return security_ops-&gt;path_unlink(path, dentry);
+}
+
+int security_path_symlink(struct path *path, struct dentry *dentry,
+  const char *old_name)
+{
+if (unlikely(IS_PRIVATE(path-&gt;dentry-&gt;d_inode)))
+return 0;
+return security_ops-&gt;path_symlink(path, dentry, old_name);
+}
+
+int security_path_link(struct dentry *old_dentry, struct path *new_dir,
+       struct dentry *new_dentry)
+{
+if (unlikely(IS_PRIVATE(old_dentry-&gt;d_inode)))
+return 0;
+return security_ops-&gt;path_link(old_dentry, new_dir, new_dentry);
+}
+
+int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
+ struct path *new_dir, struct dentry *new_dentry)
+{
+if (unlikely(IS_PRIVATE(old_dentry-&gt;d_inode) ||
+     (new_dentry-&gt;d_inode &amp;&amp; IS_PRIVATE(new_dentry-&gt;d_inode))))
+return 0;
+return security_ops-&gt;path_rename(old_dir, old_dentry, new_dir,
+ new_dentry);
+}
+
+int security_path_truncate(struct path *path, loff_t length,
+   unsigned int time_attrs, struct file *filp)
+{
+if (unlikely(IS_PRIVATE(path-&gt;dentry-&gt;d_inode)))
+return 0;
+return security_ops-&gt;path_truncate(path, length, time_attrs, filp);
+}
+#endif
+
 int security_inode_create(struct inode *dir, struct dentry *dentry, int mode)
 {
 if (unlikely(IS_PRIVATE(dir)))
_
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo&lt; at &gt;vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

</description>
    <dc:creator>akpm&lt; at &gt;linux-foundation.org</dc:creator>
    <dc:date>2008-12-01T22:34:54</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.file-systems/27810">
    <title>[patch 03/11] fs/namespace.c: drop code after return</title>
    <link>http://comments.gmane.org/gmane.linux.file-systems/27810</link>
    <description>From: Julia Lawall &lt;julia&lt; at &gt;diku.dk&gt;

The extra semicolon serves no purpose.

Signed-off-by: Julia Lawall &lt;julia&lt; at &gt;diku.dk&gt;
Reviewed-by: Richard Genoud &lt;richard.genoud&lt; at &gt;gmail.com&gt;
Cc: Al Viro &lt;viro&lt; at &gt;zeniv.linux.org.uk&gt;
Cc: Christoph Hellwig &lt;hch&lt; at &gt;lst.de&gt;
Signed-off-by: Andrew Morton &lt;akpm&lt; at &gt;linux-foundation.org&gt;
---

 fs/namespace.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff -puN fs/namespace.c~fs-namespacec-drop-code-after-return fs/namespace.c
--- a/fs/namespace.c~fs-namespacec-drop-code-after-return
+++ a/fs/namespace.c
&lt; at &gt;&lt; at &gt; -1990,7 +1990,7 &lt; at &gt;&lt; at &gt; static struct mnt_namespace *dup_mnt_ns(
 if (!new_ns-&gt;root) {
 up_write(&amp;namespace_sem);
 kfree(new_ns);
-return ERR_PTR(-ENOMEM);;
+return ERR_PTR(-ENOMEM);
 }
 spin_lock(&amp;vfsmount_lock);
 list_add_tail(&amp;new_ns-&gt;list, &amp;new_ns-&gt;root-&gt;mnt_list);
_
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo&lt; at &gt;vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

</description>
    <dc:creator>akpm&lt; at &gt;linux-foundation.org</dc:creator>
    <dc:date>2008-12-01T22:34:51</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.file-systems/27809">
    <title>[patch 01/11] vfs: fix vfs_rename_dir for FS_RENAME_DOES_D_MOVE filesystems</title>
    <link>http://comments.gmane.org/gmane.linux.file-systems/27809</link>
    <description>From: Miklos Szeredi &lt;mszeredi&lt; at &gt;suse.cz&gt;

vfs_rename_dir() doesn't properly account for filesystems with
FS_RENAME_DOES_D_MOVE.  If new_dentry has a target inode attached, it
unhashes the new_dentry prior to the rename() iop and rehashes it after,
but doesn't account for the possibility that rename() may have swapped
{old,new}_dentry.  For FS_RENAME_DOES_D_MOVE filesystems, it rehashes
new_dentry (now the old renamed-from name, which d_move() expected to go
away), such that a subsequent lookup will find it.

This was caught by the recently posted POSIX fstest suite, rename/10.t
test 62 (and others) on ceph.

The bug was introduced by: commit 349457ccf2592c14bdf13b6706170ae2e94931b1
"[PATCH] Allow file systems to manually d_move() inside of -&gt;rename()"

Fix by not rehashing the new dentry.  Rehashing used to be needed by
d_move() but isn't anymore.

Reported-by: Sage Weil &lt;sage&lt; at &gt;newdream.net&gt;
Cc: Zach Brown &lt;zach.brown&lt; at &gt;oracle.com&gt;
Signed-off-by: Miklos Szeredi &lt;mszeredi&lt; at &gt;suse.cz&gt;
Cc: Mark Fasheh &lt;mark.fasheh&lt; at &gt;oracle.com&gt;
Cc: Trond Myklebust &lt;trond.myklebust&lt; at &gt;fys.uio.no&gt;
Cc: Al Viro &lt;viro&lt; at &gt;zeniv.linux.org.uk&gt;
Cc: Christoph Hellwig &lt;hch&lt; at &gt;lst.de&gt;
Signed-off-by: Andrew Morton &lt;akpm&lt; at &gt;linux-foundation.org&gt;
---

 fs/namei.c |    2 --
 1 file changed, 2 deletions(-)

diff -puN fs/namei.c~vfs-fix-vfs_rename_dir-for-fs_rename_does_d_move-filesystems fs/namei.c
--- a/fs/namei.c~vfs-fix-vfs_rename_dir-for-fs_rename_does_d_move-filesystems
+++ a/fs/namei.c
&lt; at &gt;&lt; at &gt; -2528,8 +2528,6 &lt; at &gt;&lt; at &gt; static int vfs_rename_dir(struct inode *
 if (!error)
 target-&gt;i_flags |= S_DEAD;
 mutex_unlock(&amp;target-&gt;i_mutex);
-if (d_unhashed(new_dentry))
-d_rehash(new_dentry);
 dput(new_dentry);
 }
 if (!error)
_
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo&lt; at &gt;vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

</description>
    <dc:creator>akpm&lt; at &gt;linux-foundation.org</dc:creator>
    <dc:date>2008-12-01T22:34:49</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.file-systems/27808">
    <title>[patch 08/11] vfs: document FMODE_ constants</title>
    <link>http://comments.gmane.org/gmane.linux.file-systems/27808</link>
    <description>From: Christoph Hellwig &lt;hch&lt; at &gt;lst.de&gt;

Ensure that all FMODE_ constants are documented, and ensure a coherent
style for the already existing comments.

Signed-off-by: Christoph Hellwig &lt;hch&lt; at &gt;lst.de&gt;
Signed-off-by: Andrew Morton &lt;akpm&lt; at &gt;linux-foundation.org&gt;
---

 include/linux/fs.h |   32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff -puN include/linux/fs.h~vfs-document-fmode_-constants include/linux/fs.h
--- a/include/linux/fs.h~vfs-document-fmode_-constants
+++ a/include/linux/fs.h
&lt; at &gt;&lt; at &gt; -54,21 +54,23 &lt; at &gt;&lt; at &gt; struct inodes_stat_t {
 #define MAY_ACCESS 16
 #define MAY_OPEN 32
 
-#define FMODE_READ ((__force fmode_t)1)
-#define FMODE_WRITE ((__force fmode_t)2)
-
-/* Internal kernel extensions */
-#define FMODE_LSEEK((__force fmode_t)4)
-#define FMODE_PREAD((__force fmode_t)8)
-#define FMODE_PWRITEFMODE_PREAD/* These go hand in hand */
-
-/* File is being opened for execution. Primary users of this flag are
-   distributed filesystems that can use it to achieve correct ETXTBUSY
-   behavior for cross-node execution/opening_for_writing of files */
-#define FMODE_EXEC((__force fmode_t)16)
-
-#define FMODE_NDELAY((__force fmode_t)32)
-#define FMODE_EXCL((__force fmode_t)64)
+/* file is open for reading */
+#define FMODE_READ((__force fmode_t)1)
+/* file is open for writing */
+#define FMODE_WRITE((__force fmode_t)2)
+/* file is seekable */
+#define FMODE_LSEEK((__force fmode_t)4)
+/* file can be accessed using pread/pwrite */
+#define FMODE_PREAD((__force fmode_t)8)
+#define FMODE_PWRITEFMODE_PREAD/* These go hand in hand */
+/* File is opened for execution with sys_execve / sys_uselib */
+#define FMODE_EXEC((__force fmode_t)16)
+/* File is opened with O_NDELAY (only set for block devices) */
+#define FMODE_NDELAY((__force fmode_t)32)
+/* File is opened with O_EXCL (only set for block devices) */
+#define FMODE_EXCL((__force fmode_t)64)
+/* File is opened using open(.., 3, ..) and is writeable only for ioctls
+   (specialy hack for floppy.c) */
 #define FMODE_WRITE_IOCTL((__force fmode_t)128)
 
 #define RW_MASK1
_
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo&lt; at &gt;vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

</description>
    <dc:creator>akpm&lt; at &gt;linux-foundation.org</dc:creator>
    <dc:date>2008-12-01T22:34:58</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.file-systems/27807">
    <title>[patch 06/11] kill suid bit only for regular files</title>
    <link>http://comments.gmane.org/gmane.linux.file-systems/27807</link>
    <description>From: Dmitri Monakhov &lt;dmonakhov&lt; at &gt;openvz.org&gt;

We don't have to do it because it is useless for non regular files.
In fact block device may trigger this path without dentry-&gt;d_inode-&gt;i_mutex.

(akpm: concerns were expressed (by me) about S_ISDIR inodes)

Signed-off-by: Dmitri Monakhov &lt;dmonakhov&lt; at &gt;openvz.org&gt;
Cc: Al Viro &lt;viro&lt; at &gt;zeniv.linux.org.uk&gt;
Signed-off-by: Andrew Morton &lt;akpm&lt; at &gt;linux-foundation.org&gt;
---

 mm/filemap.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff -puN mm/filemap.c~kill-suid-bit-only-for-regular-files mm/filemap.c
--- a/mm/filemap.c~kill-suid-bit-only-for-regular-files
+++ a/mm/filemap.c
&lt; at &gt;&lt; at &gt; -1766,7 +1766,7 &lt; at &gt;&lt; at &gt; int should_remove_suid(struct dentry *de
 if (unlikely((mode &amp; S_ISGID) &amp;&amp; (mode &amp; S_IXGRP)))
 kill |= ATTR_KILL_SGID;
 
-if (unlikely(kill &amp;&amp; !capable(CAP_FSETID)))
+if (unlikely(kill &amp;&amp; !capable(CAP_FSETID) &amp;&amp; S_ISREG(mode)))
 return kill;
 
 return 0;
_
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo&lt; at &gt;vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

</description>
    <dc:creator>akpm&lt; at &gt;linux-foundation.org</dc:creator>
    <dc:date>2008-12-01T22:34:56</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.file-systems/27806">
    <title>[patch 02/11] include: linux/fs.h: put declarations in __KERNEL__</title>
    <link>http://comments.gmane.org/gmane.linux.file-systems/27806</link>
    <description>From: Jan Engelhardt &lt;jengelh&lt; at &gt;medozas.de&gt;

An anonymous user tried to use symbols from /usr/include/linux/fs.h.
Since these however are not defined in libc, but are specific to the
kernel, they should be in an #ifdef __KERNEL__ section. Move them
there.

Signed-off-by: Jan Engelhardt &lt;jengelh&lt; at &gt;medozas.de&gt;
Cc: Al Viro &lt;viro&lt; at &gt;zeniv.linux.org.uk&gt;
Signed-off-by: Andrew Morton &lt;akpm&lt; at &gt;linux-foundation.org&gt;
---

 include/linux/fs.h |   18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff -puN include/linux/fs.h~include-linux-fsh-put-declarations-in-__kernel__ include/linux/fs.h
--- a/include/linux/fs.h~include-linux-fsh-put-declarations-in-__kernel__
+++ a/include/linux/fs.h
&lt; at &gt;&lt; at &gt; -21,7 +21,6 &lt; at &gt;&lt; at &gt;
 
 /* Fixed constants first: */
 #undef NR_OPEN
-extern int sysctl_nr_open;
 #define INR_OPEN 1024/* Initial setting for nfile rlimits */
 
 #define BLOCK_SIZE_BITS 10
&lt; at &gt;&lt; at &gt; -38,21 +37,13 &lt; at &gt;&lt; at &gt; struct files_stat_struct {
 int nr_free_files;/* read only */
 int max_files;/* tunable */
 };
-extern struct files_stat_struct files_stat;
-extern int get_max_files(void);
 
 struct inodes_stat_t {
 int nr_inodes;
 int nr_unused;
 int dummy[5];/* padding for sysctl ABI compatibility */
 };
-extern struct inodes_stat_t inodes_stat;
 
-extern int leases_enable, lease_break_time;
-
-#ifdef CONFIG_DNOTIFY
-extern int dir_notify_enable;
-#endif
 
 #define NR_FILE  8192/* this can well be larger on a larger system */
 
&lt; at &gt;&lt; at &gt; -321,6 +312,15 &lt; at &gt;&lt; at &gt; extern void __init inode_init(void);
 extern void __init inode_init_early(void);
 extern void __init files_init(unsigned long);
 
+extern struct files_stat_struct files_stat;
+extern int get_max_files(void);
+extern int sysctl_nr_open;
+extern struct inodes_stat_t inodes_stat;
+extern int leases_enable, lease_break_time;
+#ifdef CONFIG_DNOTIFY
+extern int dir_notify_enable;
+#endif
+
 struct buffer_head;
 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
 struct buffer_head *bh_result, int create);
_
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo&lt; at &gt;vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

</description>
    <dc:creator>akpm&lt; at &gt;linux-foundation.org</dc:creator>
    <dc:date>2008-12-01T22:34:50</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.file-systems/27805">
    <title>[patch 05/11] fs/block_dev.c: __read_mostly improvement and sb_is_blkdev_sb utilization</title>
    <link>http://comments.gmane.org/gmane.linux.file-systems/27805</link>
    <description>From: Denis ChengRq &lt;crquan&lt; at &gt;gmail.com&gt;

- iget5_locked in bdget really needs blockdev_superblock, instead of
  bd_mnt, so bd_mnt could be just a local variable;

- blockdev_superblock really needs __read_mostly, while local var bd_mnt
  not;

- make use of sb_is_blkdev_sb in bd_forget, instead of direct reference
  to blockdev_superblock.

Signed-off-by: Denis ChengRq &lt;crquan&lt; at &gt;gmail.com&gt;
Cc: Al Viro &lt;viro&lt; at &gt;zeniv.linux.org.uk&gt;
Signed-off-by: Andrew Morton &lt;akpm&lt; at &gt;linux-foundation.org&gt;
---

 fs/block_dev.c |    9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff -puN fs/block_dev.c~fs-block_devc-__read_mostly-improvement-and-sb_is_blkdev_sb-utilization fs/block_dev.c
--- a/fs/block_dev.c~fs-block_devc-__read_mostly-improvement-and-sb_is_blkdev_sb-utilization
+++ a/fs/block_dev.c
&lt; at &gt;&lt; at &gt; -326,12 +326,13 &lt; at &gt;&lt; at &gt; static struct file_system_type bd_type =
 .kill_sb= kill_anon_super,
 };
 
-static struct vfsmount *bd_mnt __read_mostly;
-struct super_block *blockdev_superblock;
+struct super_block *blockdev_superblock __read_mostly;
 
 void __init bdev_cache_init(void)
 {
 int err;
+struct vfsmount *bd_mnt;
+
 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
 SLAB_MEM_SPREAD|SLAB_PANIC),
&lt; at &gt;&lt; at &gt; -373,7 +374,7 &lt; at &gt;&lt; at &gt; struct block_device *bdget(dev_t dev)
 struct block_device *bdev;
 struct inode *inode;
 
-inode = iget5_locked(bd_mnt-&gt;mnt_sb, hash(dev),
+inode = iget5_locked(blockdev_superblock, hash(dev),
 bdev_test, bdev_set, &amp;dev);
 
 if (!inode)
&lt; at &gt;&lt; at &gt; -463,7 +464,7 &lt; at &gt;&lt; at &gt; void bd_forget(struct inode *inode)
 
 spin_lock(&amp;bdev_lock);
 if (inode-&gt;i_bdev) {
-if (inode-&gt;i_sb != blockdev_superblock)
+if (!sb_is_blkdev_sb(inode-&gt;i_sb))
 bdev = inode-&gt;i_bdev;
 __bd_forget(inode);
 }
_
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo&lt; at &gt;vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

</description>
    <dc:creator>akpm&lt; at &gt;linux-foundation.org</dc:creator>
    <dc:date>2008-12-01T22:34:56</dc:date>
  </item>
  <item rdf:about="http://comments.gmane.org/gmane.linux.file-systems/27804">
    <title>[patch 07/11] vfs: kill FMODE_NDELAY_NOW</title>
    <link>http://comments.gmane.org/gmane.linux.file-systems/27804</link>
    <description>From: Christoph Hellwig &lt;hch&lt; at &gt;lst.de&gt;

Update FMODE_NDELAY before each ioctl call so that we can kill the magic
FMODE_NDELAY_NOW.  It would be even better to do this directly in setfl(),
but for that we'd need to have FMODE_NDELAY for all files, not just block
special files.

Signed-off-by: Christoph Hellwig &lt;hch&lt; at &gt;lst.de&gt;
Signed-off-by: Andrew Morton &lt;akpm&lt; at &gt;linux-foundation.org&gt;
---

 block/compat_ioctl.c |    8 +++++++-
 drivers/scsi/sd.c    |    2 +-
 drivers/scsi/sr.c    |    2 +-
 fs/block_dev.c       |   10 +++++++++-
 include/linux/fs.h   |    1 -
 5 files changed, 18 insertions(+), 5 deletions(-)

diff -puN block/compat_ioctl.c~vfs-kill-fmode_ndelay_now block/compat_ioctl.c
--- a/block/compat_ioctl.c~vfs-kill-fmode_ndelay_now
+++ a/block/compat_ioctl.c
&lt; at &gt;&lt; at &gt; -699,8 +699,14 &lt; at &gt;&lt; at &gt; long compat_blkdev_ioctl(struct file *fi
 struct backing_dev_info *bdi;
 loff_t size;
 
+/*
+ * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
+ * to updated it before every ioctl.
+ */
 if (file-&gt;f_flags &amp; O_NDELAY)
-mode |= FMODE_NDELAY_NOW;
+mode |= FMODE_NDELAY;
+else
+mode &amp;= ~FMODE_NDELAY;
 
 switch (cmd) {
 case HDIO_GETGEO:
diff -puN drivers/scsi/sd.c~vfs-kill-fmode_ndelay_now drivers/scsi/sd.c
--- a/drivers/scsi/sd.c~vfs-kill-fmode_ndelay_now
+++ a/drivers/scsi/sd.c
&lt; at &gt;&lt; at &gt; -757,7 +757,7 &lt; at &gt;&lt; at &gt; static int sd_ioctl(struct block_device 
  * access to the device is prohibited.
  */
 error = scsi_nonblockable_ioctl(sdp, cmd, p,
-(mode &amp; FMODE_NDELAY_NOW) != 0);
+(mode &amp; FMODE_NDELAY) != 0);
 if (!scsi_block_when_processing_errors(sdp) || !error)
 return error;
 
diff -puN drivers/scsi/sr.c~vfs-kill-fmode_ndelay_now drivers/scsi/sr.c
--- a/drivers/scsi/sr.c~vfs-kill-fmode_ndelay_now
+++ a/drivers/scsi/sr.c
&lt; at &gt;&lt; at &gt; -521,7 +521,7 &lt; at &gt;&lt; at &gt; static int sr_block_ioctl(struct block_d
  * if it doesn't recognise the ioctl
  */
 ret = scsi_nonblockable_ioctl(sdev, cmd, argp,
-(mode &amp; FMODE_NDELAY_NOW) != 0);
+(mode &amp; FMODE_NDELAY) != 0);
 if (ret != -ENODEV)
 return ret;
 return scsi_ioctl(sdev, cmd, argp);
diff -puN fs/block_dev.c~vfs-kill-fmode_ndelay_now fs/block_dev.c
--- a/fs/block_dev.c~vfs-kill-fmode_ndelay_now
+++ a/fs/block_dev.c
&lt; at &gt;&lt; at &gt; -1218,8 +1218,16 &lt; at &gt;&lt; at &gt; static long block_ioctl(struct file *fil
 {
 struct block_device *bdev = I_BDEV(file-&gt;f_mapping-&gt;host);
 fmode_t mode = file-&gt;f_mode;
+
+/*
+ * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
+ * to updated it before every ioctl.
+ */
 if (file-&gt;f_flags &amp; O_NDELAY)
-mode |= FMODE_NDELAY_NOW;
+mode |= FMODE_NDELAY;
+else
+mode &amp;= ~FMODE_NDELAY;
+
 return blkdev_ioctl(bdev, mode, cmd, arg);
 }
 
diff -puN include/linux/fs.h~vfs-kill-fmode_ndelay_now include/linux/fs.h
--- a/include/linux/fs.h~vfs-kil