patch-2.4.0-prerelease linux/mm/filemap.c

Next file: linux/mm/memory.c
Previous file: linux/mm/Makefile
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.4.0-test12/linux/mm/filemap.c linux/mm/filemap.c
@@ -55,35 +55,48 @@
 #define CLUSTER_PAGES		(1 << page_cluster)
 #define CLUSTER_OFFSET(x)	(((x) >> page_cluster) << page_cluster)
 
-void __add_page_to_hash_queue(struct page * page, struct page **p)
+static void add_page_to_hash_queue(struct page * page, struct page **p)
 {
-	atomic_inc(&page_cache_size);
-	if((page->next_hash = *p) != NULL)
-		(*p)->pprev_hash = &page->next_hash;
+	struct page *next = *p;
+
 	*p = page;
+	page->next_hash = next;
 	page->pprev_hash = p;
+	if (next)
+		next->pprev_hash = &page->next_hash;
 	if (page->buffers)
 		PAGE_BUG(page);
+	atomic_inc(&page_cache_size);
 }
 
-static inline void remove_page_from_hash_queue(struct page * page)
+static inline void add_page_to_inode_queue(struct address_space *mapping, struct page * page)
 {
-	if(page->pprev_hash) {
-		if(page->next_hash)
-			page->next_hash->pprev_hash = page->pprev_hash;
-		*page->pprev_hash = page->next_hash;
-		page->pprev_hash = NULL;
-	}
-	atomic_dec(&page_cache_size);
+	struct list_head *head = &mapping->clean_pages;
+
+	mapping->nrpages++;
+	list_add(&page->list, head);
+	page->mapping = mapping;
 }
 
-static inline int sync_page(struct page *page)
+static inline void remove_page_from_inode_queue(struct page * page)
 {
-	struct address_space *mapping = page->mapping;
+	struct address_space * mapping = page->mapping;
 
-	if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
-		return mapping->a_ops->sync_page(page);
-	return 0;
+	mapping->nrpages--;
+	list_del(&page->list);
+	page->mapping = NULL;
+}
+
+static inline void remove_page_from_hash_queue(struct page * page)
+{
+	struct page *next = page->next_hash;
+	struct page **pprev = page->pprev_hash;
+
+	if (next)
+		next->pprev_hash = pprev;
+	*pprev = next;
+	page->pprev_hash = NULL;
+	atomic_dec(&page_cache_size);
 }
 
 /*
@@ -93,6 +106,7 @@
  */
 void __remove_inode_page(struct page *page)
 {
+	if (PageDirty(page)) BUG();
 	remove_page_from_inode_queue(page);
 	remove_page_from_hash_queue(page);
 	page->mapping = NULL;
@@ -108,6 +122,30 @@
 	spin_unlock(&pagecache_lock);
 }
 
+static inline int sync_page(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+
+	if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
+		return mapping->a_ops->sync_page(page);
+	return 0;
+}
+
+/*
+ * Add a page to the dirty page list.
+ */
+void __set_page_dirty(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+
+	spin_lock(&pagecache_lock);
+	list_del(&page->list);
+	list_add(&page->list, &mapping->dirty_pages);
+	spin_unlock(&pagecache_lock);
+
+	mark_inode_dirty_pages(mapping->host);
+}
+
 /**
  * invalidate_inode_pages - Invalidate all the unlocked pages of one inode
  * @inode: the inode which pages we want to invalidate
@@ -121,7 +159,7 @@
 	struct list_head *head, *curr;
 	struct page * page;
 
-	head = &inode->i_mapping->pages;
+	head = &inode->i_mapping->clean_pages;
 
 	spin_lock(&pagecache_lock);
 	spin_lock(&pagemap_lru_lock);
@@ -131,15 +169,17 @@
 		page = list_entry(curr, struct page, list);
 		curr = curr->next;
 
-		/* We cannot invalidate a locked page */
-		if (TryLockPage(page))
+		/* We cannot invalidate something in use.. */
+		if (page_count(page) != 1)
 			continue;
 
-		/* Neither can we invalidate something in use.. */
-		if (page_count(page) != 1) {
-			UnlockPage(page);
+		/* ..or dirty.. */
+		if (PageDirty(page))
+			continue;
+
+		/* ..or locked */
+		if (TryLockPage(page))
 			continue;
-		}
 
 		__lru_cache_del(page);
 		__remove_inode_page(page);
@@ -179,26 +219,12 @@
 	page_cache_release(page);
 }
 
-/**
- * truncate_inode_pages - truncate *all* the pages from an offset
- * @mapping: mapping to truncate
- * @lstart: offset from with to truncate
- *
- * Truncate the page cache at a set offset, removing the pages
- * that are beyond that offset (and zeroing out partial pages).
- * If any page is locked we wait for it to become unlocked.
- */
-void truncate_inode_pages(struct address_space * mapping, loff_t lstart)
+void truncate_list_pages(struct list_head *head, unsigned long start, unsigned partial)
 {
-	struct list_head *head, *curr;
+	struct list_head *curr;
 	struct page * page;
-	unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
-	unsigned long start;
-
-	start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
 repeat:
-	head = &mapping->pages;
 	spin_lock(&pagecache_lock);
 	curr = head->next;
 	while (curr != head) {
@@ -242,6 +268,26 @@
 	spin_unlock(&pagecache_lock);
 }
 
+
+/**
+ * truncate_inode_pages - truncate *all* the pages from an offset
+ * @mapping: mapping to truncate
+ * @lstart: offset from with to truncate
+ *
+ * Truncate the page cache at a set offset, removing the pages
+ * that are beyond that offset (and zeroing out partial pages).
+ * If any page is locked we wait for it to become unlocked.
+ */
+void truncate_inode_pages(struct address_space * mapping, loff_t lstart) 
+{
+	unsigned long start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
+
+	truncate_list_pages(&mapping->clean_pages, start, partial);
+	truncate_list_pages(&mapping->dirty_pages, start, partial);
+	truncate_list_pages(&mapping->locked_pages, start, partial);
+}
+
 static inline struct page * __find_page_nolock(struct address_space *mapping, unsigned long offset, struct page *page)
 {
 	goto inside;
@@ -303,14 +349,12 @@
 	return error;
 }
 
-static int do_buffer_fdatasync(struct inode *inode, unsigned long start, unsigned long end, int (*fn)(struct page *))
+static int do_buffer_fdatasync(struct list_head *head, unsigned long start, unsigned long end, int (*fn)(struct page *))
 {
-	struct list_head *head, *curr;
+	struct list_head *curr;
 	struct page *page;
 	int retval = 0;
 
-	head = &inode->i_mapping->pages;
-
 	spin_lock(&pagecache_lock);
 	curr = head->next;
 	while (curr != head) {
@@ -349,11 +393,89 @@
 {
 	int retval;
 
-	retval = do_buffer_fdatasync(inode, start_idx, end_idx, writeout_one_page);
-	retval |= do_buffer_fdatasync(inode, start_idx, end_idx, waitfor_one_page);
+	/* writeout dirty buffers on pages from both clean and dirty lists */
+	retval = do_buffer_fdatasync(&inode->i_mapping->dirty_pages, start_idx, end_idx, writeout_one_page);
+	retval |= do_buffer_fdatasync(&inode->i_mapping->clean_pages, start_idx, end_idx, writeout_one_page);
+	retval |= do_buffer_fdatasync(&inode->i_mapping->locked_pages, start_idx, end_idx, writeout_one_page);
+
+	/* now wait for locked buffers on pages from both clean and dirty lists */
+	retval |= do_buffer_fdatasync(&inode->i_mapping->dirty_pages, start_idx, end_idx, writeout_one_page);
+	retval |= do_buffer_fdatasync(&inode->i_mapping->clean_pages, start_idx, end_idx, waitfor_one_page);
+	retval |= do_buffer_fdatasync(&inode->i_mapping->locked_pages, start_idx, end_idx, waitfor_one_page);
+
 	return retval;
 }
 
+/**
+ *      filemap_fdatasync - walk the list of dirty pages of the given address space
+ *     	and writepage() all of them.
+ * 
+ *      @mapping: address space structure to write
+ *
+ */
+void filemap_fdatasync(struct address_space * mapping)
+{
+	int (*writepage)(struct page *) = mapping->a_ops->writepage;
+
+	spin_lock(&pagecache_lock);
+
+        while (!list_empty(&mapping->dirty_pages)) {
+		struct page *page = list_entry(mapping->dirty_pages.next, struct page, list);
+
+		list_del(&page->list);
+		list_add(&page->list, &mapping->locked_pages);
+
+		if (!PageDirty(page))
+			continue;
+
+		page_cache_get(page);
+		spin_unlock(&pagecache_lock);
+
+		lock_page(page);
+
+		if (PageDirty(page)) {
+			ClearPageDirty(page);
+			writepage(page);
+		} else
+			UnlockPage(page);
+
+		page_cache_release(page);
+		spin_lock(&pagecache_lock);
+	}
+	spin_unlock(&pagecache_lock);
+}
+
+/**
+ *      filemap_fdatawait - walk the list of locked pages of the given address space
+ *     	and wait for all of them.
+ * 
+ *      @mapping: address space structure to wait for
+ *
+ */
+void filemap_fdatawait(struct address_space * mapping)
+{
+	spin_lock(&pagecache_lock);
+
+        while (!list_empty(&mapping->locked_pages)) {
+		struct page *page = list_entry(mapping->locked_pages.next, struct page, list);
+
+		list_del(&page->list);
+		list_add(&page->list, &mapping->clean_pages);
+
+		if (!PageLocked(page))
+			continue;
+
+		page_cache_get(page);
+		spin_unlock(&pagecache_lock);
+
+		___wait_on_page(page);
+
+		page_cache_release(page);
+		spin_lock(&pagecache_lock);
+	}
+	spin_unlock(&pagecache_lock);
+}
+
 /*
  * Add a page to the inode page cache.
  *
@@ -369,7 +491,7 @@
 	spin_lock(&pagecache_lock);
 	page->index = index;
 	add_page_to_inode_queue(mapping, page);
-	__add_page_to_hash_queue(page, page_hash(mapping, index));
+	add_page_to_hash_queue(page, page_hash(mapping, index));
 	lru_cache_add(page);
 	spin_unlock(&pagecache_lock);
 }
@@ -392,7 +514,7 @@
 	page_cache_get(page);
 	page->index = offset;
 	add_page_to_inode_queue(mapping, page);
-	__add_page_to_hash_queue(page, hash);
+	add_page_to_hash_queue(page, hash);
 	lru_cache_add(page);
 }
 
@@ -542,8 +664,8 @@
  * a rather lightweight function, finding and getting a reference to a
  * hashed page atomically, waiting for it if it's locked.
  */
-static struct page * __find_get_page(struct address_space *mapping,
-				unsigned long offset, struct page **hash)
+struct page * __find_get_page(struct address_space *mapping,
+			      unsigned long offset, struct page **hash)
 {
 	struct page *page;
 
@@ -1460,72 +1582,19 @@
 	return NULL;
 }
 
-/*
- * If a task terminates while we're swapping the page, the vma and
- * and file could be released: try_to_swap_out has done a get_file.
- * vma/file is guaranteed to exist in the unmap/sync cases because
- * mmap_sem is held.
- *
- * The "mapping" test takes care of somebody having truncated the
- * page and thus made this write-page a no-op..
- */
-static int filemap_write_page(struct page * page, int wait)
-{
-	struct address_space * mapping = page->mapping;
-	int error = 0;
-
-	if (mapping && mapping->a_ops->writepage) {
-		ClearPageDirty(page);
-		error = mapping->a_ops->writepage(page);
-	}
-	return error;
-}
-
-
-/*
- * The page cache takes care of races between somebody
- * trying to swap something out and swap something in
- * at the same time..
- */
-extern void wakeup_bdflush(int);
-int filemap_swapout(struct page * page, struct file *file)
-{
-	SetPageDirty(page);
-	return 0;
-}
-
 /* Called with mm->page_table_lock held to protect against other
  * threads/the swapper from ripping pte's out from under us.
  */
 static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
 	unsigned long address, unsigned int flags)
 {
-	pte_t pte;
-	struct page *page;
-	int error;
-
-	pte = *ptep;
+	pte_t pte = *ptep;
 
-	if (!pte_present(pte))
-		goto out;
-	if (!ptep_test_and_clear_dirty(ptep))
-		goto out;
-
-	flush_page_to_ram(pte_page(pte));
-	flush_cache_page(vma, address);
-	flush_tlb_page(vma, address);
-	page = pte_page(pte);
-	page_cache_get(page);
-	spin_unlock(&vma->vm_mm->page_table_lock);
-
-	lock_page(page);
-	error = filemap_write_page(page, 1);
-	page_cache_free(page);
-
-	spin_lock(&vma->vm_mm->page_table_lock);
-	return error;
-
-out:
+	if (pte_present(pte) && ptep_test_and_clear_dirty(ptep)) {
+		struct page *page = pte_page(pte);
+		flush_tlb_page(vma, address);
+		set_page_dirty(page);
+	}
 	return 0;
 }
 
@@ -1623,9 +1692,7 @@
  * backing-store for swapping..
  */
 static struct vm_operations_struct file_shared_mmap = {
-	sync:		filemap_sync,
 	nopage:		filemap_nopage,
-	swapout:	filemap_swapout,
 };
 
 /*
@@ -1667,16 +1734,19 @@
 static int msync_interval(struct vm_area_struct * vma,
 	unsigned long start, unsigned long end, int flags)
 {
-	if (vma->vm_file && vma->vm_ops && vma->vm_ops->sync) {
+	struct file * file = vma->vm_file;
+	if (file && (vma->vm_flags & VM_SHARED)) {
 		int error;
-		error = vma->vm_ops->sync(vma, start, end-start, flags);
+		error = filemap_sync(vma, start, end-start, flags);
+
 		if (!error && (flags & MS_SYNC)) {
-			struct file * file = vma->vm_file;
-			if (file && file->f_op && file->f_op->fsync) {
-				down(&file->f_dentry->d_inode->i_sem);
+			struct inode * inode = file->f_dentry->d_inode;
+			down(&inode->i_sem);
+			filemap_fdatasync(inode->i_mapping);
+			if (file->f_op && file->f_op->fsync)
 				error = file->f_op->fsync(file, file->f_dentry, 1);
-				up(&file->f_dentry->d_inode->i_sem);
-			}
+			filemap_fdatawait(inode->i_mapping);
+			up(&inode->i_sem);
 		}
 		return error;
 	}
@@ -2438,6 +2508,17 @@
 		bytes = PAGE_CACHE_SIZE - offset;
 		if (bytes > count)
 			bytes = count;
+
+		/*
+		 * Bring in the user page that we will copy from _first_.
+		 * Otherwise there's a nasty deadlock on copying from the
+		 * same page as we're writing to, without it being marked
+		 * up-to-date.
+		 */
+		{ volatile unsigned char dummy;
+			__get_user(dummy, buf);
+			__get_user(dummy, buf+bytes-1);
+		}
 
 		status = -ENOMEM;	/* we'll assign it later anyway */
 		page = __grab_cache_page(mapping, index, &cached_page);

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)