patch-2.4.0-test6 linux/fs/ext2/inode.c
Next file: linux/fs/ext2/ioctl.c
Previous file: linux/fs/ext2/ialloc.c
Back to the patch index
Back to the overall index
- Lines: 966
- Date:
Fri Aug 4 19:08:30 2000
- Orig file:
v2.4.0-test5/linux/fs/ext2/inode.c
- Orig date:
Thu Jul 27 17:38:01 2000
diff -u --recursive --new-file v2.4.0-test5/linux/fs/ext2/inode.c linux/fs/ext2/inode.c
@@ -18,6 +18,8 @@
* David S. Miller (davem@caip.rutgers.edu), 1995
* 64-bit file support on 64-bit platforms by Jakub Jelinek
* (jj@sunsite.ms.mff.cuni.cz)
+ *
+ * Assorted race fixes, rewrite of ext2_get_block() by Al Viro, 2000
*/
#include <linux/fs.h>
@@ -26,8 +28,6 @@
#include <linux/sched.h>
#include <linux/highuid.h>
-
-
static int ext2_update_inode(struct inode * inode, int do_sync);
/*
@@ -35,9 +35,7 @@
*/
void ext2_put_inode (struct inode * inode)
{
- lock_kernel();
ext2_discard_prealloc (inode);
- unlock_kernel();
}
/*
@@ -66,36 +64,20 @@
clear_inode(inode); /* We must guarantee clearing of inode... */
}
-#define inode_bmap(inode, nr) (le32_to_cpu((inode)->u.ext2_i.i_data[(nr)]))
-
-static inline int block_bmap (struct buffer_head * bh, int nr)
-{
- int tmp;
-
- if (!bh)
- return 0;
- tmp = le32_to_cpu(((u32 *) bh->b_data)[nr]);
- brelse (bh);
- return tmp;
-}
-
-/*
- * ext2_discard_prealloc and ext2_alloc_block are atomic wrt. the
- * superblock in the same manner as are ext2_free_blocks and
- * ext2_new_block. We just wait on the super rather than locking it
- * here, since ext2_new_block will do the necessary locking and we
- * can't block until then.
- */
void ext2_discard_prealloc (struct inode * inode)
{
#ifdef EXT2_PREALLOCATE
- unsigned short total;
-
+ lock_kernel();
+ /* Writer: ->i_prealloc* */
if (inode->u.ext2_i.i_prealloc_count) {
- total = inode->u.ext2_i.i_prealloc_count;
+ unsigned short total = inode->u.ext2_i.i_prealloc_count;
+ unsigned long block = inode->u.ext2_i.i_prealloc_block;
inode->u.ext2_i.i_prealloc_count = 0;
- ext2_free_blocks (inode, inode->u.ext2_i.i_prealloc_block, total);
+ inode->u.ext2_i.i_prealloc_block = 0;
+ /* Writer: end */
+ ext2_free_blocks (inode, block, total);
}
+ unlock_kernel();
#endif
}
@@ -106,22 +88,26 @@
#endif
unsigned long result;
- wait_on_super (inode->i_sb);
#ifdef EXT2_PREALLOCATE
+ /* Writer: ->i_prealloc* */
if (inode->u.ext2_i.i_prealloc_count &&
(goal == inode->u.ext2_i.i_prealloc_block ||
goal + 1 == inode->u.ext2_i.i_prealloc_block))
{
result = inode->u.ext2_i.i_prealloc_block++;
inode->u.ext2_i.i_prealloc_count--;
+ /* Writer: end */
+#ifdef EXT2FS_DEBUG
ext2_debug ("preallocation hit (%lu/%lu).\n",
++alloc_hits, ++alloc_attempts);
-
+#endif
} else {
ext2_discard_prealloc (inode);
+#ifdef EXT2FS_DEBUG
ext2_debug ("preallocation miss (%lu/%lu).\n",
alloc_hits, ++alloc_attempts);
+#endif
if (S_ISREG(inode->i_mode))
result = ext2_new_block (inode, goal,
&inode->u.ext2_i.i_prealloc_count,
@@ -135,387 +121,486 @@
return result;
}
-static inline long ext2_block_map (struct inode * inode, long block)
+typedef struct {
+ u32 *p;
+ u32 key;
+ struct buffer_head *bh;
+} Indirect;
+
+static inline void add_chain(Indirect *p, struct buffer_head *bh, u32 *v)
+{
+ p->key = *(p->p = v);
+ p->bh = bh;
+}
+
+static inline int verify_chain(Indirect *from, Indirect *to)
+{
+ while (from <= to && from->key == *from->p)
+ from++;
+ return (from > to);
+}
+
+/**
+ * ext2_block_to_path - parse the block number into array of offsets
+ * @inode: inode in question (we are only interested in its superblock)
+ * @i_block: block number to be parsed
+ * @offsets: array to store the offsets in
+ *
+ * To store the locations of file's data ext2 uses a data structure common
+ * for UNIX filesystems - tree of pointers anchored in the inode, with
+ * data blocks at leaves and indirect blocks in intermediate nodes.
+ * This function translates the block number into path in that tree -
+ * return value is the path length and @offsets[n] is the offset of
+ * pointer to (n+1)th node in the nth one. If @block is out of range
+ * (negative or too large) warning is printed and zero returned.
+ *
+ * Note: function doesn't find node addresses, so no IO is needed. All
+ * we need to know is the capacity of indirect blocks (taken from the
+ * inode->i_sb).
+ */
+
+/*
+ * Portability note: the last comparison (check that we fit into triple
+ * indirect block) is spelled differently, because otherwise on an
+ * architecture with 32-bit longs and 8Kb pages we might get into trouble
+ * if our filesystem had 8Kb blocks. We might use long long, but that would
+ * kill us on x86. Oh, well, at least the sign propagation does not matter -
+ * i_block would have to be negative in the very beginning, so we would not
+ * get there at all.
+ */
+
+static int ext2_block_to_path(struct inode *inode, long i_block, int offsets[4])
{
- int i, ret;
int ptrs = EXT2_ADDR_PER_BLOCK(inode->i_sb);
int ptrs_bits = EXT2_ADDR_PER_BLOCK_BITS(inode->i_sb);
+ const long direct_blocks = EXT2_NDIR_BLOCKS,
+ indirect_blocks = ptrs,
+ double_blocks = (1 << (ptrs_bits * 2));
+ int n = 0;
- ret = 0;
- lock_kernel();
- if (block < 0) {
- ext2_warning (inode->i_sb, "ext2_block_map", "block < 0");
- goto out;
- }
- if (block >= EXT2_NDIR_BLOCKS + ptrs +
- (1 << (ptrs_bits * 2)) +
- ((1 << (ptrs_bits * 2)) << ptrs_bits)) {
- ext2_warning (inode->i_sb, "ext2_block_map", "block > big");
- goto out;
- }
- if (block < EXT2_NDIR_BLOCKS) {
- ret = inode_bmap (inode, block);
- goto out;
- }
- block -= EXT2_NDIR_BLOCKS;
- if (block < ptrs) {
- i = inode_bmap (inode, EXT2_IND_BLOCK);
- if (!i)
- goto out;
- ret = block_bmap (bread (inode->i_dev, i,
- inode->i_sb->s_blocksize), block);
- goto out;
+ if (i_block < 0) {
+ ext2_warning (inode->i_sb, "ext2_block_to_path", "block < 0");
+ } else if (i_block < direct_blocks) {
+ offsets[n++] = i_block;
+ } else if ( (i_block -= direct_blocks) < indirect_blocks) {
+ offsets[n++] = EXT2_IND_BLOCK;
+ offsets[n++] = i_block;
+ } else if ((i_block -= indirect_blocks) < double_blocks) {
+ offsets[n++] = EXT2_DIND_BLOCK;
+ offsets[n++] = i_block >> ptrs_bits;
+ offsets[n++] = i_block & (ptrs - 1);
+ } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
+ offsets[n++] = EXT2_TIND_BLOCK;
+ offsets[n++] = i_block >> (ptrs_bits * 2);
+ offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
+ offsets[n++] = i_block & (ptrs - 1);
+ } else {
+ ext2_warning (inode->i_sb, "ext2_block_to_path", "block > big");
}
- block -= ptrs;
- if (block < (1 << (ptrs_bits * 2))) {
- i = inode_bmap (inode, EXT2_DIND_BLOCK);
- if (!i)
- goto out;
- i = block_bmap (bread (inode->i_dev, i,
- inode->i_sb->s_blocksize),
- block >> ptrs_bits);
- if (!i)
- goto out;
- ret = block_bmap (bread (inode->i_dev, i,
- inode->i_sb->s_blocksize),
- block & (ptrs - 1));
- goto out;
+ return n;
+}
+
+/**
+ * ext2_get_branch - read the chain of indirect blocks leading to data
+ * @inode: inode in question
+ * @depth: depth of the chain (1 - direct pointer, etc.)
+ * @offsets: offsets of pointers in inode/indirect blocks
+ * @chain: place to store the result
+ * @err: here we store the error value
+ *
+ * Function fills the array of triples <key, p, bh> and returns %NULL
+ * if everything went OK or the pointer to the last filled triple
+ * (incomplete one) otherwise. Upon the return chain[i].key contains
+ * the number of (i+1)-th block in the chain (as it is stored in memory,
+ * i.e. little-endian 32-bit), chain[i].p contains the address of that
+ * number (it points into struct inode for i==0 and into the bh->b_data
+ * for i>0) and chain[i].bh points to the buffer_head of i-th indirect
+ * block for i>0 and NULL for i==0. In other words, it holds the block
+ * numbers of the chain, addresses they were taken from (and where we can
+ * verify that chain did not change) and buffer_heads hosting these
+ * numbers.
+ *
+ * Function stops when it stumbles upon zero pointer (absent block)
+ * (pointer to last triple returned, *@err == 0)
+ * or when it gets an IO error reading an indirect block
+ * (ditto, *@err == -EIO)
+ * or when it notices that chain had been changed while it was reading
+ * (ditto, *@err == -EAGAIN)
+ * or when it reads all @depth-1 indirect blocks successfully and finds
+ * the whole chain, all way to the data (returns %NULL, *err == 0).
+ */
+static inline Indirect *ext2_get_branch(struct inode *inode,
+ int depth,
+ int *offsets,
+ Indirect chain[4],
+ int *err)
+{
+ kdev_t dev = inode->i_dev;
+ int size = inode->i_sb->s_blocksize;
+ Indirect *p = chain;
+ struct buffer_head *bh;
+
+ *err = 0;
+ /* i_data is not going away, no lock needed */
+ add_chain (chain, NULL, inode->u.ext2_i.i_data + *offsets);
+ if (!p->key)
+ goto no_block;
+ /*
+ * switch below is merely an unrolled loop - body should be
+ * repeated depth-1 times. Maybe loop would be actually better,
+ * but that way we get straight execution path in normal cases.
+ * Easy to change, anyway - all cases in switch are literally
+ * identical.
+ */
+ switch (depth) {
+ case 4:
+ bh = bread(dev, le32_to_cpu(p->key), size);
+ if (!bh)
+ goto failure;
+ /* Reader: pointers */
+ if (!verify_chain(chain, p))
+ goto changed;
+ add_chain(++p, bh, (u32*)bh->b_data + *++offsets);
+ /* Reader: end */
+ if (!p->key)
+ goto no_block;
+ case 3:
+ bh = bread(dev, le32_to_cpu(p->key), size);
+ if (!bh)
+ goto failure;
+ /* Reader: pointers */
+ if (!verify_chain(chain, p))
+ goto changed;
+ add_chain(++p, bh, (u32*)bh->b_data + *++offsets);
+ /* Reader: end */
+ if (!p->key)
+ goto no_block;
+ case 2:
+ bh = bread(dev, le32_to_cpu(p->key), size);
+ if (!bh)
+ goto failure;
+ /* Reader: pointers */
+ if (!verify_chain(chain, p))
+ goto changed;
+ add_chain(++p, bh, (u32*)bh->b_data + *++offsets);
+ /* Reader: end */
+ if (!p->key)
+ goto no_block;
}
- block -= (1 << (ptrs_bits * 2));
- i = inode_bmap (inode, EXT2_TIND_BLOCK);
- if (!i)
- goto out;
- i = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize),
- block >> (ptrs_bits * 2));
- if (!i)
- goto out;
- i = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize),
- (block >> ptrs_bits) & (ptrs - 1));
- if (!i)
- goto out;
- ret = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize),
- block & (ptrs - 1));
-out:
- unlock_kernel();
- return ret;
+ return NULL;
+
+changed:
+ *err = -EAGAIN;
+ goto no_block;
+failure:
+ *err = -EIO;
+no_block:
+ return p;
}
-static struct buffer_head * inode_getblk (struct inode * inode, int nr,
- int new_block, int * err, int metadata, long *phys, int *new)
+/**
+ * ext2_find_near - find a place for allocation with sufficient locality
+ * @inode: owner
+ * @ind: descriptor of indirect block.
+ *
+ * This function returns the prefered place for block allocation.
+ * It is used when heuristic for sequential allocation fails.
+ * Rules are:
+ * + if there is a block to the left of our position - allocate near it.
+ * + if pointer will live in indirect block - allocate near that block.
+ * + if pointer will live in inode - allocate in the same cylinder group.
+ * Caller must make sure that @ind is valid and will stay that way.
+ */
+
+static inline unsigned long ext2_find_near(struct inode *inode, Indirect *ind)
{
- u32 * p;
- int tmp, goal = 0;
- struct buffer_head * result;
- int blocksize = inode->i_sb->s_blocksize;
+ u32 *start = ind->bh ? (u32*) ind->bh->b_data : inode->u.ext2_i.i_data;
+ u32 *p;
- p = inode->u.ext2_i.i_data + nr;
-repeat:
- tmp = le32_to_cpu(*p);
- if (tmp) {
- if (metadata) {
- result = getblk (inode->i_dev, tmp, blocksize);
- if (tmp == le32_to_cpu(*p))
- return result;
- brelse (result);
- goto repeat;
- } else {
- *phys = tmp;
- return NULL;
- }
- }
+ /* Try to find previous block */
+ for (p = ind->p - 1; p >= start; p--)
+ if (*p)
+ return le32_to_cpu(*p);
+
+ /* No such thing, so let's try location of indirect block */
+ if (ind->bh)
+ return ind->bh->b_blocknr;
- if (inode->u.ext2_i.i_next_alloc_block == new_block)
- goal = inode->u.ext2_i.i_next_alloc_goal;
+ /*
+ * It is going to be refered from inode itself? OK, just put it into
+ * the same cylinder group then.
+ */
+ return (inode->u.ext2_i.i_block_group *
+ EXT2_BLOCKS_PER_GROUP(inode->i_sb)) +
+ le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_first_data_block);
+}
- ext2_debug ("hint = %d,", goal);
+/**
+ * ext2_find_goal - find a prefered place for allocation.
+ * @inode: owner
+ * @block: block we want
+ * @chain: chain of indirect blocks
+ * @partial: pointer to the last triple within a chain
+ * @goal: place to store the result.
+ *
+ * Normally this function find the prefered place for block allocation,
+ * stores it in *@goal and returns zero. If the branch had been changed
+ * under us we return -EAGAIN.
+ */
- if (!goal) {
- for (tmp = nr - 1; tmp >= 0; tmp--) {
- if (inode->u.ext2_i.i_data[tmp]) {
- goal = le32_to_cpu(inode->u.ext2_i.i_data[tmp]);
- break;
- }
- }
- if (!goal)
- goal = (inode->u.ext2_i.i_block_group *
- EXT2_BLOCKS_PER_GROUP(inode->i_sb)) +
- le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_first_data_block);
- }
-
- ext2_debug ("goal = %d.\n", goal);
-
- tmp = ext2_alloc_block (inode, goal, err);
- if (!tmp)
- return NULL;
-
- if (metadata) {
- result = getblk (inode->i_dev, tmp, blocksize);
- if (!buffer_uptodate(result))
- wait_on_buffer(result);
- memset(result->b_data, 0, blocksize);
- mark_buffer_uptodate(result, 1);
- mark_buffer_dirty(result, 1);
- if (*p) {
- ext2_free_blocks (inode, tmp, 1);
- bforget (result);
- goto repeat;
- }
- } else {
- if (*p) {
- /*
- * Nobody is allowed to change block allocation
- * state from under us:
- */
- ext2_error (inode->i_sb, "block_getblk",
- "data block filled under us");
- BUG();
- ext2_free_blocks (inode, tmp, 1);
- goto repeat;
- }
- *phys = tmp;
- result = NULL;
- *err = 0;
- *new = 1;
+static inline int ext2_find_goal(struct inode *inode,
+ long block,
+ Indirect chain[4],
+ Indirect *partial,
+ unsigned long *goal)
+{
+ /* Writer: ->i_next_alloc* */
+ if (block == inode->u.ext2_i.i_next_alloc_block + 1) {
+ inode->u.ext2_i.i_next_alloc_block++;
+ inode->u.ext2_i.i_next_alloc_goal++;
+ }
+ /* Writer: end */
+ /* Reader: pointers, ->i_next_alloc* */
+ if (verify_chain(chain, partial)) {
+ /*
+ * try the heuristic for sequential allocation,
+ * failing that at least try to get decent locality.
+ */
+ if (block == inode->u.ext2_i.i_next_alloc_block)
+ *goal = inode->u.ext2_i.i_next_alloc_goal;
+ if (!*goal)
+ *goal = ext2_find_near(inode, partial);
+ return 0;
}
- *p = cpu_to_le32(tmp);
-
- inode->u.ext2_i.i_next_alloc_block = new_block;
- inode->u.ext2_i.i_next_alloc_goal = tmp;
- inode->i_ctime = CURRENT_TIME;
- inode->i_blocks += blocksize/512;
- if (IS_SYNC(inode) || inode->u.ext2_i.i_osync)
- ext2_sync_inode (inode);
- else
- mark_inode_dirty(inode);
- return result;
+ /* Reader: end */
+ return -EAGAIN;
}
-/*
- * metadata / data
- * possibly create / access
- * can fail due to: - not present
- * - out of space
+/**
+ * ext2_alloc_branch - allocate and set up a chain of blocks.
+ * @inode: owner
+ * @num: depth of the chain (number of blocks to allocate)
+ * @offsets: offsets (in the blocks) to store the pointers to next.
+ * @branch: place to store the chain in.
*
- * NULL return in the data case is mandatory.
+ * This function allocates @num blocks, zeroes out all but the last one,
+ * links them into chain and (if we are synchronous) writes them to disk.
+ * In other words, it prepares a branch that can be spliced onto the
+ * inode. It stores the information about that chain in the branch[], in
+ * the same format as ext2_get_branch() would do. We are calling it after
+ * we had read the existing part of chain and partial points to the last
+ * triple of that (one with zero ->key). Upon the exit we have the same
+ * picture as after the successful ext2_get_block(), excpet that in one
+ * place chain is disconnected - *branch->p is still zero (we did not
+ * set the last link), but branch->key contains the number that should
+ * be placed into *branch->p to fill that gap.
+ *
+ * If allocation fails we free all blocks we've allocated (and forget
+ * ther buffer_heads) and return the error value the from failed
+ * ext2_alloc_block() (normally -ENOSPC). Otherwise we set the chain
+ * as described above and return 0.
*/
-static struct buffer_head * block_getblk (struct inode * inode,
- struct buffer_head * bh, int nr,
- int new_block, int * err, int metadata, long *phys, int *new)
-{
- int tmp, goal = 0;
- u32 * p;
- struct buffer_head * result;
+
+static int ext2_alloc_branch(struct inode *inode,
+ int num,
+ unsigned long goal,
+ int *offsets,
+ Indirect *branch)
+{
int blocksize = inode->i_sb->s_blocksize;
+ int n = 0;
+ int err;
+ int i;
+ int parent = ext2_alloc_block(inode, goal, &err);
- result = NULL;
- if (!bh)
- goto out;
- if (!buffer_uptodate(bh)) {
- ll_rw_block (READ, 1, &bh);
- wait_on_buffer (bh);
+ branch[0].key = cpu_to_le32(parent);
+ if (parent) for (n = 1; n < num; n++) {
+ struct buffer_head *bh;
+ /* Allocate the next block */
+ int nr = ext2_alloc_block(inode, parent, &err);
+ if (!nr)
+ break;
+ branch[n].key = cpu_to_le32(nr);
+ /*
+ * Get buffer_head for parent block, zero it out and set
+ * the pointer to new one, then send parent to disk.
+ */
+ bh = getblk(inode->i_dev, parent, blocksize);
if (!buffer_uptodate(bh))
- goto out;
- }
- p = (u32 *) bh->b_data + nr;
-repeat:
- tmp = le32_to_cpu(*p);
- if (tmp) {
- if (metadata) {
- result = getblk (bh->b_dev, tmp, blocksize);
- if (tmp == le32_to_cpu(*p))
- goto out;
- brelse (result);
- goto repeat;
- } else {
- *phys = tmp;
- /* result == NULL */
- goto out;
+ wait_on_buffer(bh);
+ memset(bh->b_data, 0, blocksize);
+ branch[n].bh = bh;
+ branch[n].p = (u32*) bh->b_data + offsets[n];
+ *branch[n].p = branch[n].key;
+ mark_buffer_uptodate(bh, 1);
+ mark_buffer_dirty(bh, 1);
+ if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) {
+ ll_rw_block (WRITE, 1, &bh);
+ wait_on_buffer (bh);
}
+ parent = nr;
}
+ if (n == num)
+ return 0;
- if (inode->u.ext2_i.i_next_alloc_block == new_block)
- goal = inode->u.ext2_i.i_next_alloc_goal;
- if (!goal) {
- for (tmp = nr - 1; tmp >= 0; tmp--) {
- if (le32_to_cpu(((u32 *) bh->b_data)[tmp])) {
- goal = le32_to_cpu(((u32 *)bh->b_data)[tmp]);
- break;
- }
- }
- if (!goal)
- goal = bh->b_blocknr;
- }
- tmp = ext2_alloc_block (inode, goal, err);
- if (!tmp)
- goto out;
- if (metadata) {
- result = getblk (bh->b_dev, tmp, blocksize);
- if (!buffer_uptodate(result))
- wait_on_buffer(result);
- memset(result->b_data, 0, inode->i_sb->s_blocksize);
- mark_buffer_uptodate(result, 1);
- mark_buffer_dirty(result, 1);
- if (*p) {
- ext2_free_blocks (inode, tmp, 1);
- bforget (result);
- goto repeat;
- }
- } else {
- if (*p) {
- /*
- * Nobody is allowed to change block allocation
- * state from under us:
- */
- ext2_error (inode->i_sb, "block_getblk",
- "data block filled under us");
- BUG();
- ext2_free_blocks (inode, tmp, 1);
- goto repeat;
- }
- *phys = tmp;
- *new = 1;
- }
- *p = le32_to_cpu(tmp);
- mark_buffer_dirty(bh, 1);
- if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) {
- ll_rw_block (WRITE, 1, &bh);
- wait_on_buffer (bh);
- }
- inode->i_ctime = CURRENT_TIME;
- inode->i_blocks += blocksize/512;
- mark_inode_dirty(inode);
- inode->u.ext2_i.i_next_alloc_block = new_block;
- inode->u.ext2_i.i_next_alloc_goal = tmp;
- *err = 0;
-out:
- brelse (bh);
- return result;
+ /* Allocation failed, free what we already allocated */
+ for (i = 1; i < n; i++)
+ bforget(branch[i].bh);
+ for (i = 0; i < n; i++)
+ ext2_free_blocks(inode, le32_to_cpu(branch[i].key), 1);
+ return err;
}
-static int ext2_get_block(struct inode *inode, long iblock, struct buffer_head *bh_result, int create)
+/**
+ * ext2_splice_branch - splice the allocated branch onto inode.
+ * @inode: owner
+ * @block: (logical) number of block we are adding
+ * @chain: chain of indirect blocks (with a missing link - see
+ * ext2_alloc_branch)
+ * @where: location of missing link
+ * @num: number of blocks we are adding
+ *
+ * This function verifies that chain (up to the missing link) had not
+ * changed, fills the missing link and does all housekeeping needed in
+ * inode (->i_blocks, etc.). In case of success we end up with the full
+ * chain to new block and return 0. Otherwise (== chain had been changed)
+ * we free the new blocks (forgetting their buffer_heads, indeed) and
+ * return -EAGAIN.
+ */
+
+static inline int ext2_splice_branch(struct inode *inode,
+ long block,
+ Indirect chain[4],
+ Indirect *where,
+ int num)
{
- int ret, err, new;
- struct buffer_head *bh;
- unsigned long ptr, phys;
- /*
- * block pointers per block
- */
- unsigned long ptrs = EXT2_ADDR_PER_BLOCK(inode->i_sb);
- int ptrs_bits = EXT2_ADDR_PER_BLOCK_BITS(inode->i_sb);
- const int direct_blocks = EXT2_NDIR_BLOCKS,
- indirect_blocks = ptrs,
- double_blocks = (1 << (ptrs_bits * 2)),
- triple_blocks = (1 << (ptrs_bits * 3));
+ int i;
- if (!create) {
- /*
- * Will clean this up further, ext2_block_map() should use the
- * bh instead of an integer block-number interface.
- */
- phys = ext2_block_map(inode, iblock);
- if (phys) {
- bh_result->b_dev = inode->i_dev;
- bh_result->b_blocknr = phys;
- bh_result->b_state |= (1UL << BH_Mapped);
+ /* Verify that place we are splicing to is still there and vacant */
+
+ /* Writer: pointers, ->i_next_alloc*, ->i_blocks */
+ if (!verify_chain(chain, where-1) || *where->p)
+ /* Writer: end */
+ goto changed;
+
+ /* That's it */
+
+ *where->p = where->key;
+ inode->u.ext2_i.i_next_alloc_block = block;
+ inode->u.ext2_i.i_next_alloc_goal = le32_to_cpu(where[num-1].key);
+ inode->i_blocks += num * inode->i_sb->s_blocksize/512;
+
+ /* Writer: end */
+
+ /* We are done with atomic stuff, now do the rest of housekeeping */
+
+ inode->i_ctime = CURRENT_TIME;
+
+ /* had we spliced it onto indirect block? */
+ if (where->bh) {
+ mark_buffer_dirty(where->bh, 1);
+ if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) {
+ ll_rw_block (WRITE, 1, &where->bh);
+ wait_on_buffer(where->bh);
}
- return 0;
}
- err = -EIO;
- new = 0;
- ret = 0;
- bh = NULL;
+ if (IS_SYNC(inode) || inode->u.ext2_i.i_osync)
+ ext2_sync_inode (inode);
+ else
+ mark_inode_dirty(inode);
+ return 0;
- lock_kernel();
+changed:
+ for (i = 1; i < num; i++)
+ bforget(where[i].bh);
+ for (i = 0; i < num; i++)
+ ext2_free_blocks(inode, le32_to_cpu(where[i].key), 1);
+ return -EAGAIN;
+}
- if (iblock < 0)
- goto abort_negative;
- if (iblock > direct_blocks + indirect_blocks +
- double_blocks + triple_blocks)
- goto abort_too_big;
+/*
+ * Allocation strategy is simple: if we have to allocate something, we will
+ * have to go the whole way to leaf. So let's do it before attaching anything
+ * to tree, set linkage between the newborn blocks, write them if sync is
+ * required, recheck the path, free and repeat if check fails, otherwise
+ * set the last missing link (that will protect us from any truncate-generated
+ * removals - all blocks on the path are immune now) and possibly force the
+ * write on the parent block.
+ * That has a nice additional property: no special recovery from the failed
+ * allocations is needed - we simply release blocks and do not touch anything
+ * reachable from inode.
+ */
- /*
- * If this is a sequential block allocation, set the next_alloc_block
- * to this block now so that all the indblock and data block
- * allocations use the same goal zone
- */
+static int ext2_get_block(struct inode *inode, long iblock, struct buffer_head *bh_result, int create)
+{
+ int err = -EIO;
+ int offsets[4];
+ Indirect chain[4];
+ Indirect *partial;
+ unsigned long goal;
+ int left;
+ int depth = ext2_block_to_path(inode, iblock, offsets);
- ext2_debug ("block %lu, next %lu, goal %lu.\n", iblock,
- inode->u.ext2_i.i_next_alloc_block,
- inode->u.ext2_i.i_next_alloc_goal);
+ if (depth == 0)
+ goto out;
- if (iblock == inode->u.ext2_i.i_next_alloc_block + 1) {
- inode->u.ext2_i.i_next_alloc_block++;
- inode->u.ext2_i.i_next_alloc_goal++;
- }
+ lock_kernel();
+reread:
+ partial = ext2_get_branch(inode, depth, offsets, chain, &err);
- err = 0;
- ptr = iblock;
+ /* Simplest case - block found, no allocation needed */
+ if (!partial) {
+got_it:
+ bh_result->b_dev = inode->i_dev;
+ bh_result->b_blocknr = le32_to_cpu(chain[depth-1].key);
+ bh_result->b_state |= (1UL << BH_Mapped);
+ /* Clean up and exit */
+ partial = chain+depth-1; /* the whole chain */
+ goto cleanup;
+ }
+
+ /* Next simple case - plain lookup or failed read of indirect block */
+ if (!create || err == -EIO) {
+cleanup:
+ while (partial > chain) {
+ brelse(partial->bh);
+ partial--;
+ }
+ unlock_kernel();
+out:
+ return err;
+ }
/*
- * ok, these macros clean the logic up a bit and make
- * it much more readable:
+ * Indirect block might be removed by truncate while we were
+ * reading it. Handling of that case (forget what we've got and
+ * reread) is taken out of the main path.
*/
-#define GET_INODE_DATABLOCK(x) \
- inode_getblk(inode, x, iblock, &err, 0, &phys, &new)
-#define GET_INODE_PTR(x) \
- inode_getblk(inode, x, iblock, &err, 1, NULL, NULL)
-#define GET_INDIRECT_DATABLOCK(x) \
- block_getblk (inode, bh, x, iblock, &err, 0, &phys, &new);
-#define GET_INDIRECT_PTR(x) \
- block_getblk (inode, bh, x, iblock, &err, 1, NULL, NULL);
+ if (err == -EAGAIN)
+ goto changed;
- if (ptr < direct_blocks) {
- bh = GET_INODE_DATABLOCK(ptr);
- goto out;
- }
- ptr -= direct_blocks;
- if (ptr < indirect_blocks) {
- bh = GET_INODE_PTR(EXT2_IND_BLOCK);
- goto get_indirect;
- }
- ptr -= indirect_blocks;
- if (ptr < double_blocks) {
- bh = GET_INODE_PTR(EXT2_DIND_BLOCK);
- goto get_double;
- }
- ptr -= double_blocks;
- bh = GET_INODE_PTR(EXT2_TIND_BLOCK);
- bh = GET_INDIRECT_PTR(ptr >> (ptrs_bits * 2));
-get_double:
- bh = GET_INDIRECT_PTR((ptr >> ptrs_bits) & (ptrs - 1));
-get_indirect:
- bh = GET_INDIRECT_DATABLOCK(ptr & (ptrs - 1));
-
-#undef GET_INODE_DATABLOCK
-#undef GET_INODE_PTR
-#undef GET_INDIRECT_DATABLOCK
-#undef GET_INDIRECT_PTR
+ if (ext2_find_goal(inode, iblock, chain, partial, &goal) < 0)
+ goto changed;
-out:
- if (bh)
- BUG(); // temporary debugging check
+ left = (chain + depth) - partial;
+ err = ext2_alloc_branch(inode, left, goal,
+ offsets+(partial-chain), partial);
if (err)
- goto abort;
- if (!phys)
- BUG(); // must not happen either
-
- bh_result->b_dev = inode->i_dev;
- bh_result->b_blocknr = phys;
- bh_result->b_state |= (1UL << BH_Mapped); /* safe */
- if (new)
- bh_result->b_state |= (1UL << BH_New);
-abort:
- unlock_kernel();
- return err;
+ goto cleanup;
+
+ if (ext2_splice_branch(inode, iblock, chain, partial, left) < 0)
+ goto changed;
-abort_negative:
- ext2_warning (inode->i_sb, "ext2_get_block", "block < 0");
- goto abort;
-
-abort_too_big:
- ext2_warning (inode->i_sb, "ext2_get_block", "block > big");
- goto abort;
+ bh_result->b_state |= (1UL << BH_New);
+ goto got_it;
+
+changed:
+ while (partial > chain) {
+ bforget(partial->bh);
+ partial--;
+ }
+ goto reread;
}
struct buffer_head * ext2_getblk(struct inode * inode, long block, int create, int * err)
@@ -749,7 +834,7 @@
inode->i_attr_flags = 0;
if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL) {
inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS;
- inode->i_flags |= MS_SYNCHRONOUS;
+ inode->i_flags |= S_SYNC;
}
if (inode->u.ext2_i.i_flags & EXT2_APPEND_FL) {
inode->i_attr_flags |= ATTR_FLAG_APPEND;
@@ -761,7 +846,7 @@
}
if (inode->u.ext2_i.i_flags & EXT2_NOATIME_FL) {
inode->i_attr_flags |= ATTR_FLAG_NOATIME;
- inode->i_flags |= MS_NOATIME;
+ inode->i_flags |= S_NOATIME;
}
return;
@@ -915,17 +1000,17 @@
flags = iattr->ia_attr_flags;
if (flags & ATTR_FLAG_SYNCRONOUS) {
- inode->i_flags |= MS_SYNCHRONOUS;
+ inode->i_flags |= S_SYNC;
inode->u.ext2_i.i_flags |= EXT2_SYNC_FL;
} else {
- inode->i_flags &= ~MS_SYNCHRONOUS;
+ inode->i_flags &= ~S_SYNC;
inode->u.ext2_i.i_flags &= ~EXT2_SYNC_FL;
}
if (flags & ATTR_FLAG_NOATIME) {
- inode->i_flags |= MS_NOATIME;
+ inode->i_flags |= S_NOATIME;
inode->u.ext2_i.i_flags |= EXT2_NOATIME_FL;
} else {
- inode->i_flags &= ~MS_NOATIME;
+ inode->i_flags &= ~S_NOATIME;
inode->u.ext2_i.i_flags &= ~EXT2_NOATIME_FL;
}
if (flags & ATTR_FLAG_APPEND) {
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)