patch-2.3.99-pre4 linux/fs/nfs/dir.c
Next file: linux/fs/nfs/file.c
Previous file: linux/fs/nfs/Makefile
Back to the patch index
Back to the overall index
-  Lines: 1172
-  Date:
Sat Apr  1 08:04:27 2000
-  Orig file: 
v2.3.99-pre3/linux/fs/nfs/dir.c
-  Orig date: 
Mon Mar 27 08:08:29 2000
diff -u --recursive --new-file v2.3.99-pre3/linux/fs/nfs/dir.c linux/fs/nfs/dir.c
@@ -17,7 +17,6 @@
  *  6 Jun 1999	Cache readdir lookups in the page cache. -DaveM
  */
 
-#define NFS_NEED_XDR_TYPES
 #include <linux/sched.h>
 #include <linux/errno.h>
 #include <linux/stat.h>
@@ -28,7 +27,7 @@
 #include <linux/mm.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/nfs_fs.h>
-#include <linux/nfs.h>
+#include <linux/nfs_mount.h>
 #include <linux/pagemap.h>
 
 #include <asm/segment.h>	/* for fs functions */
@@ -71,202 +70,131 @@
 	setattr:	nfs_notify_change,
 };
 
-/* Each readdir response is composed of entries which look
- * like the following, as per the NFSv2 RFC:
- *
- *	__u32	not_end			zero if end of response
- *	__u32	file ID			opaque ino_t
- *	__u32	namelen			size of name string
- *	VAR	name string		the string, padded to modulo 4 bytes
- *	__u32	cookie			opaque ID of next entry
- *
- * When you hit not_end being zero, the next __u32 is non-zero if
- * this is the end of the complete set of readdir entires for this
- * directory.  This can be used, for example, to initiate pre-fetch.
- *
- * In order to know what to ask the server for, we only need to know
- * the final cookie of the previous page, and offset zero has cookie
- * zero, so we cache cookie to page offset translations in chunks.
- */
-#define COOKIES_PER_CHUNK (8 - ((sizeof(void *) / sizeof(__u32))))
-struct nfs_cookie_table {
-	struct nfs_cookie_table *next;
-	__u32	cookies[COOKIES_PER_CHUNK];
-};
-static kmem_cache_t *nfs_cookie_cachep;
+typedef u32 * (*decode_dirent_t)(u32 *, struct nfs_entry *, int);
 
-/* This whole scheme relies on the fact that dirent cookies
- * are monotonically increasing.
- *
- * Another invariant is that once we have a valid non-zero
- * EOF marker cached, we also have the complete set of cookie
- * table entries.
+/*
+ * Given a pointer to a buffer that has already been filled by a call
+ * to readdir, find the next entry.
  *
- * We return the page offset assosciated with the page where
- * cookie must be if it exists at all, however if we can not
- * figure that out conclusively, we return < 0.
- */
-static long __nfs_readdir_offset(struct inode *inode, __u32 cookie)
-{
-	struct nfs_cookie_table *p;
-	unsigned long ret = 0;
-
-	for(p = NFS_COOKIES(inode); p != NULL; p = p->next) {
-		int i;
-
-		for (i = 0; i < COOKIES_PER_CHUNK; i++) {
-			__u32 this_cookie = p->cookies[i];
-
-			/* End of known cookies, EOF is our only hope. */
-			if (!this_cookie)
-				goto check_eof;
-
-			/* Next cookie is larger, must be in previous page. */
-			if (this_cookie > cookie)
-				return ret;
-
-			ret += 1;
-
-			/* Exact cookie match, it must be in this page :-) */
-			if (this_cookie == cookie)
-				return ret;
+ * If the end of the buffer has been reached, return -EAGAIN, if not,
+ * return the offset within the buffer of the next entry to be
+ * read.
+ */
+static inline
+long find_dirent(struct page *page, loff_t offset,
+		 struct nfs_entry *entry,
+		 decode_dirent_t decode, int plus, int use_cookie)
+{
+	u8		*p = (u8 *)kmap(page),
+			*start = p;
+	unsigned long	base = page_offset(page),
+			pg_offset = 0;
+	int		loop_count = 0;
+
+	if (!p)
+		return -EIO;
+	for(;;) {
+		p = (u8*)decode((__u32*)p, entry, plus);
+		if (IS_ERR(p))
+			break;
+		pg_offset = p - start;
+		entry->prev = entry->offset;
+		entry->offset = base + pg_offset;
+		if ((use_cookie ? entry->cookie : entry->offset) > offset)
+			break;
+		if (loop_count++ > 200) {
+			loop_count = 0;
+			schedule();
 		}
 	}
-check_eof:
-	if (NFS_DIREOF(inode) != 0)
-		return ret;
-
-	return -1L;
-}
 
-static __inline__ long nfs_readdir_offset(struct inode *inode, __u32 cookie)
-{
-	/* Cookie zero is always at page offset zero.   Optimize the
-	 * other common case since most directories fit entirely
-	 * in one page.
-	 */
-	if (!cookie || (!NFS_COOKIES(inode) && NFS_DIREOF(inode)))
-		return 0;
-	return __nfs_readdir_offset(inode, cookie);
+	kunmap(page);
+	return (IS_ERR(p)) ?  PTR_ERR(p) : (long)pg_offset;
 }
 
-/* Since a cookie of zero is declared special by the NFS
- * protocol, we easily can tell if a cookie in an existing
- * table chunk is valid or not.
+/*
+ * Find the given page, and call find_dirent() in order to try to
+ * return the next entry.
  *
- * NOTE: The cookies are indexed off-by-one because zero
- *       need not an entry.
+ * Returns -EIO if the page is not available, or up to date.
  */
-static __inline__ __u32 *find_cookie(struct inode *inode, unsigned long off)
-{
-	static __u32 cookie_zero = 0;
-	struct nfs_cookie_table *p;
-	__u32 *ret;
-
-	if (!off)
-		return &cookie_zero;
-	off -= 1;
-	p = NFS_COOKIES(inode);
-	while(off >= COOKIES_PER_CHUNK && p) {
-		off -= COOKIES_PER_CHUNK;
-		p = p->next;
-	}
-	ret = NULL;
-	if (p) {
-		ret = &p->cookies[off];
-		if (!*ret)
-			ret = NULL;
-	}
-	return ret;
-}
-
-#define NFS_NAMELEN_ALIGN(__len) ((((__len)+3)>>2)<<2)
-static int create_cookie(__u32 cookie, unsigned long off, struct inode *inode)
-{
-	struct nfs_cookie_table **cpp;
-
-	cpp = (struct nfs_cookie_table **) &NFS_COOKIES(inode);
-	while (off >= COOKIES_PER_CHUNK && *cpp) {
-		off -= COOKIES_PER_CHUNK;
-		cpp = &(*cpp)->next;
-	}
-	if (*cpp) {
-		(*cpp)->cookies[off] = cookie;
-	} else {
-		struct nfs_cookie_table *new;
-		int i;
+static inline
+long find_dirent_page(struct inode *inode, loff_t offset,
+		      struct nfs_entry *entry)
+{
+	decode_dirent_t	decode = NFS_PROTO(inode)->decode_dirent;
+	struct page	*page;
+	unsigned long	index = entry->offset >> PAGE_CACHE_SHIFT;
+	long		status = -EIO;
+	int		plus = NFS_USE_READDIRPLUS(inode),
+			use_cookie = NFS_MONOTONE_COOKIES(inode);
+
+	dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", entry->offset & PAGE_CACHE_MASK);
+
+	if (entry->page)
+		page_cache_release(entry->page);
+
+	page = find_get_page(&inode->i_data, index);
+
+	if (page && Page_Uptodate(page))
+		status = find_dirent(page, offset, entry, decode, plus, use_cookie);
+
+	/* NB: on successful return we will be holding the page */
+	if (status < 0) {
+		entry->page = NULL;
+		if (page)
+			page_cache_release(page);
+	} else
+		entry->page = page;
 
-		new = kmem_cache_alloc(nfs_cookie_cachep, SLAB_ATOMIC);
-		if(!new)
-			return -1;
-		*cpp = new;
-		new->next = NULL;
-		for(i = 0; i < COOKIES_PER_CHUNK; i++) {
-			if (i == off) {
-				new->cookies[i] = cookie;
-			} else {
-				new->cookies[i] = 0;
-			}
-		}
-	}
-	return 0;
+	dfprintk(VFS, "NFS: find_dirent_page() returns %ld\n", status);
+	return status;
 }
 
-static struct page *try_to_get_dirent_page(struct file *, __u32, int);
 
-/* Recover from a revalidation flush.  The case here is that
- * the inode for the directory got invalidated somehow, and
- * all of our cached information is lost.  In order to get
- * a correct cookie for the current readdir request from the
- * user, we must (re-)fetch older readdir page cache entries.
+/*
+ * Recurse through the page cache pages, and return a
+ * filled nfs_entry structure of the next directory entry if possible.
+ *
+ * The target for the search is position 'offset'.
+ * The latter may either be an offset into the page cache, or (better)
+ * a cookie depending on whether we're interested in strictly following
+ * the RFC wrt. not assuming monotonicity of cookies or not.
  *
- * Returns < 0 if some error occurrs, else it is the page offset
- * to fetch.
+ * For most systems, the latter is more reliable since it naturally
+ * copes with holes in the directory.
  */
-static long refetch_to_readdir_cookie(struct file *file, struct inode *inode)
+static inline
+long search_cached_dirent_pages(struct inode *inode, loff_t offset,
+				struct nfs_entry *entry)
 {
-	struct page *page;
-	u32 goal_cookie = file->f_pos;
-	long cur_off, ret = -1L;
+	long		res = 0;
+	int		loop_count = 0;
 
-again:
-	cur_off = 0;
+	dfprintk(VFS, "NFS: search_cached_dirent_pages() searching for cookie %Ld\n", (long long)offset);
 	for (;;) {
-		page = find_get_page(&inode->i_data, cur_off);
-		if (page) {
-			if (!Page_Uptodate(page))
-				goto out_error;
-		} else {
-			__u32 *cp = find_cookie(inode, cur_off);
-
-			if (!cp)
-				goto out_error;
-
-			page = try_to_get_dirent_page(file, *cp, 0);
-			if (!page) {
-				if (!cur_off)
-					goto out_error;
-
-				/* Someone touched the dir on us. */
-				goto again;
-			}
+		res = find_dirent_page(inode, offset, entry);
+		if (res == -EAGAIN) {
+			/* Align to beginning of next page */
+			entry->offset &= PAGE_CACHE_MASK;
+			entry->offset += PAGE_CACHE_SIZE;
+		}
+		if (res != -EAGAIN)
+			break;
+		if (loop_count++ > 200) {
+			loop_count = 0;
+			schedule();
 		}
-		page_cache_release(page);
-
-		if ((ret = nfs_readdir_offset(inode, goal_cookie)) >= 0)
-			goto out;
-
-		cur_off += 1;
 	}
-out:
-	return ret;
-
-out_error:
-	if (page)
-		page_cache_release(page);
-	goto out;
+	if (res < 0 && entry->page) {
+		page_cache_release(entry->page);
+		entry->page = NULL;
+	}
+	dfprintk(VFS, "NFS: search_cached_dirent_pages() returned %ld\n", res);
+	return res;
 }
 
+
 /* Now we cache directories properly, by stuffing the dirent
  * data directly in the page cache.
  *
@@ -279,198 +207,240 @@
  *	 page-in of the RPC reply, nowhere else, this simplies
  *	 things substantially.
  */
+static inline
+long try_to_get_dirent_page(struct file *file, struct inode *inode,
+			    struct nfs_entry *entry)
+{
+	struct dentry	*dir = file->f_dentry;
+	struct page	*page;
+	__u32		*p;
+	unsigned long	index = entry->offset >> PAGE_CACHE_SHIFT;
+	long		res = 0;
+	unsigned int	dtsize = NFS_SERVER(inode)->dtsize;
+	int		plus = NFS_USE_READDIRPLUS(inode);
 
-static int nfs_dir_filler(struct dentry *dentry, struct page *page)
-{
-	struct nfs_readdirargs rd_args;
-	struct nfs_readdirres rd_res;
-	struct inode *inode = dentry->d_inode;
-	long offset = page->index;
-	__u32 *cookiep;
-	int err;
-
-	kmap(page);
-
-	err = -EIO;
-	cookiep = find_cookie(inode, offset);
-	if (!cookiep)
-		goto fail;
-
-	rd_args.fh = NFS_FH(dentry);
-	rd_res.buffer = (char *)page_address(page);
-	rd_res.bufsiz = PAGE_CACHE_SIZE;
-	rd_res.cookie = *cookiep;
-	do {
-		rd_args.buffer = rd_res.buffer;
-		rd_args.bufsiz = rd_res.bufsiz;
-		rd_args.cookie = rd_res.cookie;
-		err = rpc_call(NFS_CLIENT(inode),
-			     NFSPROC_READDIR, &rd_args, &rd_res, 0); 
-		if (err < 0)
-			goto fail;
-	} while(rd_res.bufsiz > 0);
-
-	err = -EIO;
-	if (rd_res.bufsiz < 0)
-		NFS_DIREOF(inode) = rd_res.cookie;
-	else if (create_cookie(rd_res.cookie, offset, inode))
-		goto fail;
+	dfprintk(VFS, "NFS: try_to_get_dirent_page() reading directory page @ index %ld\n", index);
 
-	SetPageUptodate(page);
-	kunmap(page);
-	UnlockPage(page);
-	return 0;
-fail:
-	SetPageError(page);
-	kunmap(page);
-	UnlockPage(page);
-	return err;
-}
+	page = grab_cache_page(&inode->i_data, index);
 
-static struct page *try_to_get_dirent_page(struct file *file, __u32 cookie, int refetch_ok)
-{
-	struct dentry *dentry = file->f_dentry;
-	struct inode *inode = dentry->d_inode;
-	struct page *page;
-	long offset;
+	if (!page) {
+		res = -ENOMEM;
+		goto out;
+	}
 
-	if ((offset = nfs_readdir_offset(inode, cookie)) < 0) {
-		if (!refetch_ok ||
-		    (offset = refetch_to_readdir_cookie(file, inode)) < 0) {
-			goto fail;
-		}
+	if (Page_Uptodate(page)) {
+		dfprintk(VFS, "NFS: try_to_get_dirent_page(): page already up to date.\n");
+		goto unlock_out;
 	}
 
-	page = read_cache_page(&inode->i_data, offset,
-				(filler_t *)nfs_dir_filler, dentry);
-	if (IS_ERR(page))
-		goto fail;
-	if (!Page_Uptodate(page))
-		goto fail2;
-	return page;
+	p = (__u32 *)kmap(page);
 
-fail2:
+	if (dtsize > PAGE_CACHE_SIZE)
+		dtsize = PAGE_CACHE_SIZE;
+	res = NFS_PROTO(inode)->readdir(dir, entry->cookie, p, dtsize, plus);
+
+	kunmap(page);
+
+	if (res < 0)
+		goto error;
+	if (PageError(page))
+		ClearPageError(page);
+	SetPageUptodate(page);
+
+ unlock_out:
+	UnlockPage(page);
 	page_cache_release(page);
-fail:
-	return NULL;
+ out:
+	dfprintk(VFS, "NFS: try_to_get_dirent_page() returns %ld\n", res);
+	return res;
+ error:
+	SetPageError(page);
+	goto unlock_out;
 }
 
-/* Seek up to dirent assosciated with the passed in cookie,
- * then fill in dirents found.  Return the last cookie
- * actually given to the user, to update the file position.
+/* Recover from a revalidation flush.  The case here is that
+ * the inode for the directory got invalidated somehow, and
+ * all of our cached information is lost.  In order to get
+ * a correct cookie for the current readdir request from the
+ * user, we must (re-)fetch all the older readdir page cache
+ * entries.
+ *
+ * Returns < 0 if some error occurs.
  */
-static __inline__ u32 nfs_do_filldir(__u32 *p, u32 cookie,
-				     void *dirent, filldir_t filldir)
-{
-	u32 end;
+static inline
+long refetch_to_readdir(struct file *file, struct inode *inode,
+			loff_t off, struct nfs_entry *entry)
+{
+	struct nfs_entry	my_dirent,
+				*dirent = &my_dirent;
+	long			res;
+	int			plus = NFS_USE_READDIRPLUS(inode),
+				use_cookie = NFS_MONOTONE_COOKIES(inode),
+				loop_count = 0;
+
+	dfprintk(VFS, "NFS: refetch_to_readdir() searching for cookie %Ld\n", (long long)off);
+	*dirent = *entry;
+	entry->page = NULL;
+
+	for (res = 0;res >= 0;) {
+		if (loop_count++ > 200) {
+			loop_count = 0;
+			schedule();
+		}
 
-	while((end = *p++) != 0) {
-		__u32 fileid, len, skip, this_cookie;
-		char *name;
-
-		fileid = *p++;
-		len = *p++;
-		name = (char *) p;
-		skip = NFS_NAMELEN_ALIGN(len);
-		p += (skip >> 2);
-		this_cookie = *p++;
+		/* Search for last cookie in page cache */
+		res = search_cached_dirent_pages(inode, off, dirent);
 
-		if (this_cookie < cookie)
+		if (res >= 0) {
+			/* Cookie was found */
+			if ((use_cookie?dirent->cookie:dirent->offset) > off) {
+				*entry = *dirent;
+				dirent->page = NULL;
+				break;
+			}
 			continue;
+		}
+
+		if (dirent->page)
+			page_cache_release(dirent->page);
+		dirent->page = NULL;
 
-		cookie = this_cookie;
-		if (filldir(dirent, name, len, cookie, fileid) < 0)
+		if (res != -EIO) {
+			*entry = *dirent;
 			break;
+		}
+
+		/* Read in a new page */
+		res = try_to_get_dirent_page(file, inode, dirent);
+		if (res == -EBADCOOKIE) {
+			memset(dirent, 0, sizeof(*dirent));
+			nfs_zap_caches(inode);
+			res = 0;
+		}
+		/* We requested READDIRPLUS, but the server doesn't grok it */
+		if (plus && res == -ENOTSUPP) {
+			NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS;
+			memset(dirent, 0, sizeof(*dirent));
+			nfs_zap_caches(inode);
+			plus = 0;
+			res = 0;
+		}
 	}
+	if (dirent->page)
+		page_cache_release(dirent->page);
 
-	return cookie;
+	dfprintk(VFS, "NFS: refetch_to_readdir() returns %ld\n", res);
+	return res;
 }
 
-/* The file offset position is represented in pure bytes, to
- * make the page cache interface straight forward.
- *
- * However, some way is needed to make the connection between the
- * opaque NFS directory entry cookies and our offsets, so a per-inode
- * cookie cache table is used.
+/*
+ * Once we've found the start of the dirent within a page: fill 'er up...
  */
-static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
-{
-	struct dentry *dentry = filp->f_dentry;
-	struct inode *inode = dentry->d_inode;
-	struct page *page;
-	long offset;
-	int res;
-
-	res = nfs_revalidate_inode(NFS_DSERVER(dentry), dentry);
-	if (res < 0)
-		return res;
-
-	if (NFS_DIREOF(inode) && filp->f_pos >= NFS_DIREOF(inode))
-		return 0;
-
-	if ((offset = nfs_readdir_offset(inode, filp->f_pos)) < 0)
-		goto no_dirent_page;
-
-	page = find_get_page(&inode->i_data, offset);
-	if (!page)
-		goto no_dirent_page;
-	if (!Page_Uptodate(page))
-		goto dirent_read_error;
-success:
-	kmap(page);
-	filp->f_pos = nfs_do_filldir((__u32 *) page_address(page),
-				     filp->f_pos, dirent, filldir);
+static
+int nfs_do_filldir(struct file *file, struct inode *inode,
+		   struct nfs_entry *entry, void *dirent, filldir_t filldir)
+{
+	decode_dirent_t	decode = NFS_PROTO(inode)->decode_dirent;
+	struct page	*page = entry->page;
+	__u8		*p,
+			*start;
+	unsigned long	base = page_offset(page),
+			offset = entry->offset,
+			pg_offset,
+			fileid;
+	int		plus = NFS_USE_READDIRPLUS(inode),
+			use_cookie = NFS_MONOTONE_COOKIES(inode),
+			loop_count = 0,
+			res = 0;
+
+	dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ offset %ld\n", entry->offset);
+	pg_offset = offset & ~PAGE_CACHE_MASK;
+	start = (u8*)kmap(page);
+	p = start + pg_offset;
+
+	for(;;) {
+		/* Note: entry->prev contains the offset of the start of the
+		 *       current dirent */
+		fileid = nfs_fileid_to_ino_t(entry->ino);
+		if (use_cookie)
+			res = filldir(dirent, entry->name, entry->len, entry->prev_cookie, fileid);
+		else
+			res = filldir(dirent, entry->name, entry->len, entry->prev, fileid);
+		if (res < 0)
+			break;
+		file->f_pos = (use_cookie) ? entry->cookie : entry->offset;
+		p = (u8*)decode((__u32*)p, entry, plus);
+		if (!p || IS_ERR(p))
+			break;
+		pg_offset = p - start;
+		entry->prev = entry->offset;
+		entry->offset = base + pg_offset;
+		if (loop_count++ > 200) {
+			loop_count = 0;
+			schedule();
+		}
+	}
 	kunmap(page);
-	page_cache_release(page);
-	return 0;
 
-no_dirent_page:
-	page = try_to_get_dirent_page(filp, filp->f_pos, 1);
-	if (!page)
-		goto no_page;
-
-	if (Page_Uptodate(page))
-		goto success;
-dirent_read_error:
-	page_cache_release(page);
-no_page:
-	return -EIO;
+	dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ offset %ld; returning = %d\n", entry->offset, res);
+	return res;
 }
 
-/* Flush directory cookie and EOF caches for an inode.
- * So we don't thrash allocating/freeing cookie tables,
- * we keep the cookies around until the inode is
- * deleted/reused.
+/* The file offset position is now represented as a true offset into the
+ * page cache as is the case in most of the other filesystems.
  */
-__inline__ void nfs_flush_dircache(struct inode *inode)
+static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct nfs_cookie_table *p = NFS_COOKIES(inode);
+	struct dentry	*dentry = filp->f_dentry;
+	struct inode	*inode = dentry->d_inode;
+	struct page	*page;
+	struct nfs_entry my_entry,
+			*entry = &my_entry;
+	loff_t		offset;
+	long		res;
 
-	while (p != NULL) {
-		int i;
+	res = nfs_revalidate(dentry);
+	if (res < 0)
+		return res;
 
-		for(i = 0; i < COOKIES_PER_CHUNK; i++)
-			p->cookies[i] = 0;
+	/*
+	 * filp->f_pos points to the file offset in the page cache.
+	 * but if the cache has meanwhile been zapped, we need to
+	 * read from the last dirent to revalidate f_pos
+	 * itself.
+	 */
+	memset(entry, 0, sizeof(*entry));
 
-		p = p->next;
-	}
-	NFS_DIREOF(inode) = 0;
-}
+	offset = filp->f_pos;
 
-/* Free up directory cache state, this happens when
- * nfs_delete_inode is called on an NFS directory.
- */
-void nfs_free_dircache(struct inode *inode)
-{
-	struct nfs_cookie_table *p = NFS_COOKIES(inode);
+	while(!entry->eof) {
+		res = search_cached_dirent_pages(inode, offset, entry);
+
+		if (res < 0) {
+			if (entry->eof)
+				break;
+			res = refetch_to_readdir(filp, inode, offset, entry);
+			if (res < 0)
+				break;
+		}
 
-	while (p != NULL) {
-		struct nfs_cookie_table *next = p->next;
-		kmem_cache_free(nfs_cookie_cachep, p);
-		p = next;
+		page = entry->page;
+		if (!page)
+			printk(KERN_ERR "NFS: Missing page...\n");
+		res = nfs_do_filldir(filp, inode, entry, dirent, filldir);
+		page_cache_release(page);
+		entry->page = NULL;
+		if (res < 0) {
+			res = 0;
+			break;
+		}
+		offset = filp->f_pos;
 	}
-	NFS_COOKIES(inode) = NULL;
-	NFS_DIREOF(inode) = 0;
+	if (entry->page)
+		page_cache_release(entry->page);
+	if (res < 0 && res != -EBADCOOKIE)
+		return res;
+	return 0;
 }
 
 /*
@@ -540,7 +510,8 @@
  */
 static int nfs_lookup_revalidate(struct dentry * dentry, int flags)
 {
-	struct dentry * parent = dentry->d_parent;
+	struct dentry *dir = dentry->d_parent;
+	struct inode *dir_i = dir->d_inode;
 	struct inode * inode = dentry->d_inode;
 	int error;
 	struct nfs_fh fhandle;
@@ -559,7 +530,7 @@
 
 	if (is_bad_inode(inode)) {
 		dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n",
-			parent->d_name.name, dentry->d_name.name);
+			dir->d_name.name, dentry->d_name.name);
 		goto out_bad;
 	}
 
@@ -574,13 +545,14 @@
 	/*
 	 * Do a new lookup and check the dentry attributes.
 	 */
-	error = nfs_proc_lookup(NFS_DSERVER(parent), NFS_FH(parent),
-				dentry->d_name.name, &fhandle, &fattr);
+	error = NFS_PROTO(dir_i)->lookup(dir, &dentry->d_name, &fhandle,
+					 &fattr);
 	if (error)
 		goto out_bad;
 
 	/* Inode number matches? */
-	if (NFS_FSID(inode) != fattr.fsid ||
+	if (!(fattr.valid & NFS_ATTR_FATTR) ||
+	    NFS_FSID(inode) != fattr.fsid ||
 	    NFS_FILEID(inode) != fattr.fileid)
 		goto out_bad;
 
@@ -603,10 +575,9 @@
 		goto out_valid;
 	d_drop(dentry);
 	/* Purge readdir caches. */
-	if (dentry->d_parent->d_inode) {
-		nfs_zap_caches(dentry->d_parent->d_inode);
-		NFS_CACHEINV(dentry->d_parent->d_inode);
-	}
+	nfs_zap_caches(dir_i);
+	if (inode && S_ISDIR(inode->i_mode))
+		nfs_zap_caches(inode);
 	return 0;
 }
 
@@ -687,18 +658,19 @@
 #endif /* NFS_PARANOIA */
 #endif /* 0 */
 
-static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry)
+static struct dentry *nfs_lookup(struct inode *dir_i, struct dentry * dentry)
 {
+	struct dentry *dir = dentry->d_parent;
 	struct inode *inode;
 	int error;
 	struct nfs_fh fhandle;
 	struct nfs_fattr fattr;
 
 	dfprintk(VFS, "NFS: lookup(%s/%s)\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name);
+		dir->d_name.name, dentry->d_name.name);
 
 	error = -ENAMETOOLONG;
-	if (dentry->d_name.len > NFS_MAXNAMLEN)
+	if (dentry->d_name.len > NFS_SERVER(dir_i)->namelen)
 		goto out;
 
 	error = -ENOMEM;
@@ -709,8 +681,8 @@
 	}
 	dentry->d_op = &nfs_dentry_operations;
 
-	error = nfs_proc_lookup(NFS_SERVER(dir), NFS_FH(dentry->d_parent), 
-				dentry->d_name.name, &fhandle, &fattr);
+	error = NFS_PROTO(dir_i)->lookup(dir, &dentry->d_name, &fhandle,
+					 &fattr);
 	inode = NULL;
 	if (error == -ENOENT)
 		goto no_entry;
@@ -743,6 +715,7 @@
 		nfs_renew_times(dentry);
 		error = 0;
 	}
+	NFS_CACHEINV(dentry->d_parent->d_inode);
 	return error;
 }
 
@@ -752,29 +725,32 @@
  * that the operation succeeded on the server, but an error in the
  * reply path made it appear to have failed.
  */
-static int nfs_create(struct inode *dir, struct dentry *dentry, int mode)
+static int nfs_create(struct inode *dir_i, struct dentry *dentry, int mode)
 {
-	int error;
+	struct dentry *dir = dentry->d_parent;
 	struct iattr attr;
 	struct nfs_fattr fattr;
 	struct nfs_fh fhandle;
+	int error;
 
 	dfprintk(VFS, "NFS: create(%x/%ld, %s\n",
-		dir->i_dev, dir->i_ino, dentry->d_name.name);
+		dir_i->i_dev, dir_i->i_ino, dentry->d_name.name);
 
 	attr.ia_mode = mode;
 	attr.ia_valid = ATTR_MODE;
 
 	/*
-	 * Invalidate the dir cache before the operation to avoid a race.
+	 * The 0 argument passed into the create function should one day
+	 * contain the O_EXCL flag if requested. This allows NFSv3 to
+	 * select the appropriate create strategy. Currently open_namei
+	 * does not pass the create flags.
 	 */
-	invalidate_inode_pages(dir);
-	nfs_flush_dircache(dir);
-	error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
-			dentry->d_name.name, &attr, &fhandle, &fattr);
-	if (!error)
+	nfs_zap_caches(dir_i);
+	error = NFS_PROTO(dir_i)->create(dir, &dentry->d_name,
+					 &attr, 0, &fhandle, &fattr);
+	if (!error && fhandle.size != 0)
 		error = nfs_instantiate(dentry, &fhandle, &fattr);
-	if (error)
+	if (error || fhandle.size == 0)
 		d_drop(dentry);
 	return error;
 }
@@ -782,31 +758,26 @@
 /*
  * See comments for nfs_proc_create regarding failed operations.
  */
-static int nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rdev)
+static int nfs_mknod(struct inode *dir_i, struct dentry *dentry, int mode, int rdev)
 {
-	int error;
+	struct dentry *dir = dentry->d_parent;
 	struct iattr attr;
 	struct nfs_fattr fattr;
 	struct nfs_fh fhandle;
+	int error;
 
 	dfprintk(VFS, "NFS: mknod(%x/%ld, %s\n",
-		dir->i_dev, dir->i_ino, dentry->d_name.name);
+		dir_i->i_dev, dir_i->i_ino, dentry->d_name.name);
 
 	attr.ia_mode = mode;
 	attr.ia_valid = ATTR_MODE;
-	/* FIXME: move this to a special nfs_proc_mknod() */
-	if (S_ISCHR(mode) || S_ISBLK(mode)) {
-		attr.ia_size = rdev; /* get out your barf bag */
-		attr.ia_valid |= ATTR_SIZE;
-	}
 
-	invalidate_inode_pages(dir);
-	nfs_flush_dircache(dir);
-	error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
-				dentry->d_name.name, &attr, &fhandle, &fattr);
-	if (!error)
+	nfs_zap_caches(dir_i);
+	error = NFS_PROTO(dir_i)->mknod(dir, &dentry->d_name, &attr, rdev,
+					&fhandle, &fattr);
+	if (!error && fhandle.size != 0)
 		error = nfs_instantiate(dentry, &fhandle, &fattr);
-	if (error)
+	if (error || fhandle.size == 0)
 		d_drop(dentry);
 	return error;
 }
@@ -814,19 +785,21 @@
 /*
  * See comments for nfs_proc_create regarding failed operations.
  */
-static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int nfs_mkdir(struct inode *dir_i, struct dentry *dentry, int mode)
 {
-	int error;
+	struct dentry *dir = dentry->d_parent;
 	struct iattr attr;
 	struct nfs_fattr fattr;
 	struct nfs_fh fhandle;
+	int error;
 
 	dfprintk(VFS, "NFS: mkdir(%x/%ld, %s\n",
-		dir->i_dev, dir->i_ino, dentry->d_name.name);
+		dir_i->i_dev, dir_i->i_ino, dentry->d_name.name);
 
 	attr.ia_valid = ATTR_MODE;
 	attr.ia_mode = mode | S_IFDIR;
 
+#if 0
 	/*
 	 * Always drop the dentry, we can't always depend on
 	 * the fattr returned by the server (AIX seems to be
@@ -834,44 +807,48 @@
 	 * depending on potentially bogus information.
 	 */
 	d_drop(dentry);
-	invalidate_inode_pages(dir);
-	nfs_flush_dircache(dir);
-	error = nfs_proc_mkdir(NFS_DSERVER(dentry), NFS_FH(dentry->d_parent),
-				dentry->d_name.name, &attr, &fhandle, &fattr);
-	if (!error)
-		dir->i_nlink++;
+#endif
+	nfs_zap_caches(dir_i);
+	dir_i->i_nlink++;
+	error = NFS_PROTO(dir_i)->mkdir(dir, &dentry->d_name, &attr, &fhandle,
+					&fattr);
+	if (!error && fhandle.size != 0)
+		error = nfs_instantiate(dentry, &fhandle, &fattr);
+	if (error || fhandle.size == 0)
+		d_drop(dentry);
 	return error;
 }
 
-static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
+static int nfs_rmdir(struct inode *dir_i, struct dentry *dentry)
 {
+	struct dentry *dir = dentry->d_parent;
 	int error;
 
 	dfprintk(VFS, "NFS: rmdir(%x/%ld, %s\n",
-		dir->i_dev, dir->i_ino, dentry->d_name.name);
+		dir_i->i_dev, dir_i->i_ino, dentry->d_name.name);
 
-	invalidate_inode_pages(dir);
-	nfs_flush_dircache(dir);
-	error = nfs_proc_rmdir(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
-				dentry->d_name.name);
+	nfs_zap_caches(dir_i);
+	error = NFS_PROTO(dir_i)->rmdir(dir, &dentry->d_name);
 
 	/* Update i_nlink and invalidate dentry. */
 	if (!error) {
 		d_drop(dentry);
-		if (dir->i_nlink)
-			dir->i_nlink--;
+		if (dir_i->i_nlink)
+			dir_i->i_nlink--;
 	}
 
 	return error;
 }
 
-static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
+static int nfs_sillyrename(struct inode *dir_i, struct dentry *dentry)
 {
+	struct dentry *dir = dentry->d_parent;
 	static unsigned int sillycounter = 0;
-	const int      i_inosize  = sizeof(dir->i_ino)*2;
+	const int      i_inosize  = sizeof(dir_i->i_ino)*2;
 	const int      countersize = sizeof(sillycounter)*2;
 	const int      slen       = strlen(".nfs") + i_inosize + countersize;
 	char           silly[slen+1];
+	struct qstr    qsilly;
 	struct dentry *sdentry;
 	int            error = -EIO;
 
@@ -923,11 +900,10 @@
 			goto out;
 	} while(sdentry->d_inode != NULL); /* need negative lookup */
 
-	invalidate_inode_pages(dir);
-	nfs_flush_dircache(dir);
-	error = nfs_proc_rename(NFS_SERVER(dir),
-				NFS_FH(dentry->d_parent), dentry->d_name.name,
-				NFS_FH(dentry->d_parent), silly);
+	nfs_zap_caches(dir_i);
+	qsilly.name = silly;
+	qsilly.len  = strlen(silly);
+	error = NFS_PROTO(dir_i)->rename(dir, &dentry->d_name, dir, &qsilly);
 	if (!error) {
 		nfs_renew_times(dentry);
 		d_move(dentry, sdentry);
@@ -948,7 +924,8 @@
  */
 static int nfs_safe_remove(struct dentry *dentry)
 {
-	struct inode *dir = dentry->d_parent->d_inode;
+	struct dentry *dir = dentry->d_parent;
+	struct inode *dir_i = dir->d_inode;
 	struct inode *inode = dentry->d_inode;
 	int error, rehash = 0;
 		
@@ -979,22 +956,22 @@
 		d_drop(dentry);
 		rehash = 1;
 	}
+	nfs_zap_caches(dir_i);
+	error = NFS_PROTO(dir_i)->remove(dir, &dentry->d_name);
+	if (error < 0)
+		goto out;
 	/*
-	 * Update i_nlink and free the inode before unlinking.
+	 * Update i_nlink and free the inode
 	 */
 	if (inode) {
 		if (inode->i_nlink)
 			inode->i_nlink --;
 		d_delete(dentry);
 	}
-	invalidate_inode_pages(dir);
-	nfs_flush_dircache(dir);
-	error = nfs_proc_remove(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
-				dentry->d_name.name);
 	/*
 	 * Rehash the negative dentry if the operation succeeded.
 	 */
-	if (!error && rehash)
+	if (rehash)
 		d_add(dentry, NULL);
 out:
 	return error;
@@ -1023,16 +1000,22 @@
 }
 
 static int
-nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+nfs_symlink(struct inode *dir_i, struct dentry *dentry, const char *symname)
 {
+	struct dentry *dir = dentry->d_parent;
 	struct iattr attr;
+	struct nfs_fattr sym_attr;
+	struct nfs_fh sym_fh;
+	struct qstr qsymname;
+	unsigned int maxlen;
 	int error;
 
 	dfprintk(VFS, "NFS: symlink(%x/%ld, %s, %s)\n",
-		dir->i_dev, dir->i_ino, dentry->d_name.name, symname);
+		dir_i->i_dev, dir_i->i_ino, dentry->d_name.name, symname);
 
 	error = -ENAMETOOLONG;
-	if (strlen(symname) > NFS_MAXPATHLEN)
+	maxlen = (NFS_PROTO(dir_i)->version==2) ? NFS2_MAXPATHLEN : NFS3_MAXPATHLEN;
+	if (strlen(symname) > maxlen)
 		goto out;
 
 #ifdef NFS_PARANOIA
@@ -1047,21 +1030,19 @@
 	attr.ia_valid = ATTR_MODE;
 	attr.ia_mode = S_IFLNK | S_IRWXUGO;
 
-	/*
-	 * Drop the dentry in advance to force a new lookup.
-	 * Since nfs_proc_symlink doesn't return a fattr, we
-	 * can't instantiate the new inode.
-	 */
-	d_drop(dentry);
-	invalidate_inode_pages(dir);
-	nfs_flush_dircache(dir);
-	error = nfs_proc_symlink(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
-				dentry->d_name.name, symname, &attr);
-	if (!error) {
-		nfs_renew_times(dentry->d_parent);
-	} else if (error == -EEXIST) {
-		printk("nfs_proc_symlink: %s/%s already exists??\n",
-			dentry->d_parent->d_name.name, dentry->d_name.name);
+	qsymname.name = symname;
+	qsymname.len  = strlen(symname);
+
+	nfs_zap_caches(dir_i);
+	error = NFS_PROTO(dir_i)->symlink(dir, &dentry->d_name, &qsymname,
+					  &attr, &sym_fh, &sym_attr);
+	if (!error && sym_fh.size != 0 && (sym_attr.valid & NFS_ATTR_FATTR)) {
+		error = nfs_instantiate(dentry, &sym_fh, &sym_attr);
+	} else {
+		if (error == -EEXIST)
+			printk("nfs_proc_symlink: %s/%s already exists??\n",
+			       dir->d_name.name, dentry->d_name.name);
+		d_drop(dentry);
 	}
 
 out:
@@ -1069,8 +1050,9 @@
 }
 
 static int 
-nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
+nfs_link(struct dentry *old_dentry, struct inode *dir_i, struct dentry *dentry)
 {
+	struct dentry *dir = dentry->d_parent;
 	struct inode *inode = old_dentry->d_inode;
 	int error;
 
@@ -1084,10 +1066,8 @@
 	 * we can't use the existing dentry.
 	 */
 	d_drop(dentry);
-	invalidate_inode_pages(dir);
-	nfs_flush_dircache(dir);
-	error = nfs_proc_link(NFS_DSERVER(old_dentry), NFS_FH(old_dentry),
-				NFS_FH(dentry->d_parent), dentry->d_name.name);
+	nfs_zap_caches(dir_i);
+	error = NFS_PROTO(dir_i)->link(old_dentry, dir, &dentry->d_name);
 	if (!error) {
  		/*
 		 * Update the link count immediately, as some apps
@@ -1197,14 +1177,12 @@
 	if (new_inode)
 		d_delete(new_dentry);
 
-	invalidate_inode_pages(new_dir);
-	nfs_flush_dircache(new_dir);
-	invalidate_inode_pages(old_dir);
-	nfs_flush_dircache(old_dir);
-	error = nfs_proc_rename(NFS_DSERVER(old_dentry),
-			NFS_FH(old_dentry->d_parent), old_dentry->d_name.name,
-			NFS_FH(new_dentry->d_parent), new_dentry->d_name.name);
-
+	nfs_zap_caches(new_dir);
+	nfs_zap_caches(old_dir);
+	error = NFS_PROTO(old_dir)->rename(old_dentry->d_parent,
+					   &old_dentry->d_name,
+					   new_dentry->d_parent,
+					   &new_dentry->d_name);
 	NFS_CACHEINV(old_dir);
 	NFS_CACHEINV(new_dir);
 	/* Update the dcache if needed */
@@ -1229,14 +1207,13 @@
 	if (nfs_fh_cachep == NULL)
 		return -ENOMEM;
 
-	nfs_cookie_cachep = kmem_cache_create("nfs_dcookie",
-					      sizeof(struct nfs_cookie_table),
-					      0, SLAB_HWCACHE_ALIGN,
-					      NULL, NULL);
-	if (nfs_cookie_cachep == NULL)
-		return -ENOMEM;
-
 	return 0;
+}
+
+void nfs_destroy_fhcache(void)
+{
+	if (kmem_cache_destroy(nfs_fh_cachep))
+		printk(KERN_INFO "nfs_fh: not all structures were freed\n");
 }
 
 /*
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)