patch-2.4.17 linux/fs/reiserfs/inode.c

Next file: linux/fs/reiserfs/ioctl.c
Previous file: linux/fs/reiserfs/fix_node.c
Back to the patch index
Back to the overall index

diff -Naur -X /home/marcelo/lib/dontdiff linux-2.4.16/fs/reiserfs/inode.c linux/fs/reiserfs/inode.c
@@ -17,6 +17,8 @@
 #define GET_BLOCK_READ_DIRECT 4  /* read the tail if indirect item not found */
 #define GET_BLOCK_NO_ISEM     8 /* i_sem is not held, don't preallocate */
 
+static int reiserfs_get_block (struct inode * inode, long block,
+			       struct buffer_head * bh_result, int create);
 //
 // initially this function was derived from minix or ext2's analog and
 // evolved as the prototype did
@@ -40,11 +42,13 @@
 
 	reiserfs_delete_object (&th, inode);
 	pop_journal_writer(windex) ;
-	reiserfs_release_objectid (&th, inode->i_ino);
 
 	journal_end(&th, inode->i_sb, jbegin_count) ;
 
-	up (&inode->i_sem); 
+        up (&inode->i_sem);
+
+        /* all items of file are deleted, so we can remove "save" link */
+	remove_save_link (inode, 0/* not truncate */);
     } else {
 	/* no object items are in the tree */
 	;
@@ -72,9 +76,9 @@
 void make_cpu_key (struct cpu_key * key, const struct inode * inode, loff_t offset,
 	      int type, int length )
 {
-  _make_cpu_key (key, inode_items_version (inode), le32_to_cpu (INODE_PKEY (inode)->k_dir_id),
-	  	   le32_to_cpu (INODE_PKEY (inode)->k_objectid), 
-		   offset, type, length);
+  _make_cpu_key (key, get_inode_item_key_version (inode), le32_to_cpu (INODE_PKEY (inode)->k_dir_id),
+		 le32_to_cpu (INODE_PKEY (inode)->k_objectid), 
+		 offset, type, length);
 }
 
 
@@ -215,9 +219,9 @@
 // files which were created in the earlier version can not be longer,
 // than 2 gb
 //
-int file_capable (struct inode * inode, long block)
+static int file_capable (struct inode * inode, long block)
 {
-    if (inode_items_version (inode) != ITEM_VERSION_1 || // it is new file.
+    if (get_inode_item_key_version (inode) != KEY_FORMAT_3_5 || // it is new file.
 	block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb
 	return 1;
 
@@ -511,8 +515,8 @@
 // determine which parts are derivative, if any, understanding that
 // there are only so many ways to code to a given interface.
 //
-int reiserfs_get_block (struct inode * inode, long block,
-			struct buffer_head * bh_result, int create)
+static int reiserfs_get_block (struct inode * inode, long block,
+			       struct buffer_head * bh_result, int create)
 {
     int repeat, retval;
     unsigned long tag;
@@ -540,7 +544,7 @@
 				/* bad.... */
     lock_kernel() ;
     th.t_trans_id = 0 ;
-    version = inode_items_version (inode);
+    version = get_inode_item_key_version (inode);
 
     if (block < 0) {
 	unlock_kernel();
@@ -564,7 +568,7 @@
 	return ret;
     }
 
-    inode->u.reiserfs_i.i_pack_on_close = 1 ;
+    inode->u.reiserfs_i.i_flags |= i_pack_on_close_mask;
 
     windex = push_journal_writer("reiserfs_get_block") ;
   
@@ -880,7 +884,8 @@
 	struct stat_data_v1 * sd = (struct stat_data_v1 *)B_I_PITEM (bh, ih);
 	unsigned long blocks;
 
-	inode_items_version (inode) = ITEM_VERSION_1;
+	set_inode_item_key_version (inode, KEY_FORMAT_3_5);
+        set_inode_sd_version (inode, STAT_DATA_V1);
 	inode->i_mode  = sd_v1_mode(sd);
 	inode->i_nlink = sd_v1_nlink(sd);
 	inode->i_uid   = sd_v1_uid(sd);
@@ -929,13 +934,13 @@
             inode->i_generation = sd_v2_generation(sd);
 
 	if (S_ISDIR (inode->i_mode) || S_ISLNK (inode->i_mode))
-	    inode_items_version (inode) = ITEM_VERSION_1;
+	    set_inode_item_key_version (inode, KEY_FORMAT_3_5);
 	else
-	    inode_items_version (inode) = ITEM_VERSION_2;
+            set_inode_item_key_version (inode, KEY_FORMAT_3_6);
     }
 
     /* nopack = 0, by default */
-    inode->u.reiserfs_i.nopack = 0;
+    inode->u.reiserfs_i.i_flags &= ~i_nopack_mask;
 
     pathrelse (path);
     if (S_ISREG (inode->i_mode)) {
@@ -1115,7 +1120,7 @@
 
     /* set version 1, version 2 could be used too, because stat data
        key is the same in both versions */
-    key.version = ITEM_VERSION_1;
+    key.version = KEY_FORMAT_3_5;
     key.on_disk_key.k_dir_id = dirino;
     key.on_disk_key.k_objectid = inode->i_ino;
     key.on_disk_key.u.k_offset_v1.k_offset = SD_OFFSET;
@@ -1138,6 +1143,30 @@
     }
 
     init_inode (inode, &path_to_sd);
+   
+    /* It is possible that knfsd is trying to access inode of a file
+       that is being removed from the disk by some other thread. As we
+       update sd on unlink all that is required is to check for nlink
+       here. This bug was first found by Sizif when debugging
+       SquidNG/Butterfly, forgotten, and found again after Philippe
+       Gramoulle <philippe.gramoulle@mmania.com> reproduced it. 
+
+       More logical fix would require changes in fs/inode.c:iput() to
+       remove inode from hash-table _after_ fs cleaned disk stuff up and
+       in iget() to return NULL if I_FREEING inode is found in
+       hash-table. */
+    /* Currently there is one place where it's ok to meet inode with
+       nlink==0: processing of open-unlinked and half-truncated files
+       during mount (fs/reiserfs/super.c:finish_unfinished()). */
+    if( ( inode -> i_nlink == 0 ) && 
+	! inode -> i_sb -> u.reiserfs_sb.s_is_unlinked_ok ) {
+	    reiserfs_warning( "vs-13075: reiserfs_read_inode2: "
+			      "dead inode read from disk %K. "
+			      "This is likely to be race with knfsd. Ignore\n", 
+			      &key );
+	    make_bad_inode( inode );
+    }
+
     reiserfs_check_path(&path_to_sd) ; /* init inode should be relsing */
 
 }
@@ -1162,7 +1191,7 @@
 }
 
 struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, __u32 *data,
-                                     int len, int fhtype, int parent) {
+				     int len, int fhtype, int parent) {
     struct cpu_key key ;
     struct inode *inode = NULL ;
     struct list_head *lp;
@@ -1303,26 +1332,6 @@
     }
 }
 
-void reiserfs_dirty_inode (struct inode * inode) {
-    struct reiserfs_transaction_handle th ;
-
-    if (inode->i_sb->s_flags & MS_RDONLY) {
-        reiserfs_warning("clm-6006: writing inode %lu on readonly FS\n", 
-	                  inode->i_ino) ;
-        return ;
-    }
-    lock_kernel() ;
-
-    /* this is really only used for atime updates, so they don't have
-    ** to be included in O_SYNC or fsync
-    */
-    journal_begin(&th, inode->i_sb, 1) ;
-    reiserfs_update_sd (&th, inode);
-    journal_end(&th, inode->i_sb, 1) ;
-    unlock_kernel() ;
-}
-
-
 /* FIXME: no need any more. right? */
 int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, struct inode * inode)
 {
@@ -1345,20 +1354,20 @@
     struct cpu_key key;
     int retval;
     
-    _make_cpu_key (&key, ITEM_VERSION_1, le32_to_cpu (ih->ih_key.k_dir_id),
+    _make_cpu_key (&key, KEY_FORMAT_3_5, le32_to_cpu (ih->ih_key.k_dir_id),
 		   le32_to_cpu (ih->ih_key.k_objectid), DOT_OFFSET, TYPE_DIRENTRY, 3/*key length*/);
     
     /* compose item head for new item. Directories consist of items of
        old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
        is done by reiserfs_new_inode */
     if (old_format_only (sb)) {
-	make_le_item_head (ih, 0, ITEM_VERSION_1, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
+	make_le_item_head (ih, 0, KEY_FORMAT_3_5, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
 	
 	make_empty_dir_item_v1 (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid,
 				INODE_PKEY (dir)->k_dir_id, 
 				INODE_PKEY (dir)->k_objectid );
     } else {
-	make_le_item_head (ih, 0, ITEM_VERSION_1, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2);
+	make_le_item_head (ih, 0, KEY_FORMAT_3_5, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2);
 	
 	make_empty_dir_item (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid,
 		   		INODE_PKEY (dir)->k_dir_id, 
@@ -1394,12 +1403,12 @@
     struct cpu_key key;
     int retval;
 
-    _make_cpu_key (&key, ITEM_VERSION_1, 
+    _make_cpu_key (&key, KEY_FORMAT_3_5, 
 		   le32_to_cpu (ih->ih_key.k_dir_id), 
 		   le32_to_cpu (ih->ih_key.k_objectid),
 		   1, TYPE_DIRECT, 3/*key length*/);
 
-    make_le_item_head (ih, 0, ITEM_VERSION_1, 1, TYPE_DIRECT, item_len, 0/*free_space*/);
+    make_le_item_head (ih, 0, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len, 0/*free_space*/);
 
     /* look for place in the tree for new item */
     retval = search_item (sb, &key, path);
@@ -1470,13 +1479,13 @@
       inode->i_generation = ++event;
 #endif
     if (old_format_only (sb))
-	make_le_item_head (&ih, 0, ITEM_VERSION_1, SD_OFFSET, TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
+	make_le_item_head (&ih, 0, KEY_FORMAT_3_5, SD_OFFSET, TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
     else
-	make_le_item_head (&ih, 0, ITEM_VERSION_2, SD_OFFSET, TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
+	make_le_item_head (&ih, 0, KEY_FORMAT_3_6, SD_OFFSET, TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
 
 
     /* key to search for correct place for new stat data */
-    _make_cpu_key (&key, ITEM_VERSION_2, le32_to_cpu (ih.ih_key.k_dir_id),
+    _make_cpu_key (&key, KEY_FORMAT_3_6, le32_to_cpu (ih.ih_key.k_dir_id),
 		   le32_to_cpu (ih.ih_key.k_objectid), SD_OFFSET, TYPE_STAT_DATA, 3/*key length*/);
 
     /* find proper place for inserting of stat data */
@@ -1512,9 +1521,16 @@
 
     INIT_LIST_HEAD(&inode->u.reiserfs_i.i_prealloc_list) ;
 
-    if (old_format_only (sb))
+    if (old_format_only (sb)) {
+	if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) {
+	    pathrelse (&path_to_key);
+	    /* i_uid or i_gid is too big to be stored in stat data v3.5 */
+	    iput (inode);
+	    *err = -EINVAL;
+	    return NULL;
+	}
 	inode2sd_v1 (&sd, inode);
-    else
+    } else
 	inode2sd (&sd, inode);
 
     // these do not go to on-disk stat data
@@ -1527,10 +1543,14 @@
     // format, other new objects will consist of new items)
     memcpy (INODE_PKEY (inode), &(ih.ih_key), KEY_SIZE);
     if (old_format_only (sb) || S_ISDIR(mode) || S_ISLNK(mode))
-	inode_items_version (inode) = ITEM_VERSION_1;
+        set_inode_item_key_version (inode, KEY_FORMAT_3_5);
     else
-	inode_items_version (inode) = ITEM_VERSION_2;
-
+        set_inode_item_key_version (inode, KEY_FORMAT_3_6);
+    if (old_format_only (sb))
+	set_inode_sd_version (inode, STAT_DATA_V1);
+    else
+	set_inode_sd_version (inode, STAT_DATA_V2);
+    
     /* insert the stat data into the tree */
     retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, (char *)(&sd));
     if (retval) {
@@ -1690,12 +1710,23 @@
     ** because the truncate might pack the item anyway 
     ** (it will unmap bh if it packs).
     */
-    journal_begin(&th, p_s_inode->i_sb,  JOURNAL_PER_BALANCE_CNT * 2 ) ;
+    /* it is enough to reserve space in transaction for 2 balancings:
+       one for "save" link adding and another for the first
+       cut_from_item. 1 is for update_sd */
+    journal_begin(&th, p_s_inode->i_sb,  JOURNAL_PER_BALANCE_CNT * 2 + 1 ) ;
     reiserfs_update_inode_transaction(p_s_inode) ;
     windex = push_journal_writer("reiserfs_vfs_truncate_file") ;
+    if (update_timestamps)
+           /* we are doing real truncate: if the system crashes before the last
+              transaction of truncating gets committed - on reboot the file
+              either appears truncated properly or not truncated at all */
+           add_save_link (&th, p_s_inode, 1);
     reiserfs_do_truncate (&th, p_s_inode, page, update_timestamps) ;
     pop_journal_writer(windex) ;
-    journal_end(&th, p_s_inode->i_sb,  JOURNAL_PER_BALANCE_CNT * 2 ) ;
+    journal_end(&th, p_s_inode->i_sb,  JOURNAL_PER_BALANCE_CNT * 2 + 1 ) ;
+
+    if (update_timestamps)
+       remove_save_link (p_s_inode, 1/* truncate */);
 
     if (page) {
         length = offset & (blocksize - 1) ;

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)