patch-2.4.10 linux/fs/ntfs/super.c
Next file: linux/fs/ntfs/super.h
Previous file: linux/fs/ntfs/struct.h
Back to the patch index
Back to the overall index
-  Lines: 1358
-  Date:
Sat Sep  8 12:24:40 2001
-  Orig file: 
v2.4.9/linux/fs/ntfs/super.c
-  Orig date: 
Wed Jul 25 17:10:24 2001
diff -u --recursive --new-file v2.4.9/linux/fs/ntfs/super.c linux/fs/ntfs/super.c
@@ -7,16 +7,18 @@
  * Copyright (C) 2000-2001 Anton Altparmakov (AIA)
  */
 
+#include <linux/ntfs_fs.h>
+#include <linux/errno.h>
+#include <linux/bitops.h>
+#include <linux/module.h>
 #include "ntfstypes.h"
 #include "struct.h"
 #include "super.h"
-
-#include <linux/errno.h>
-#include <linux/bitops.h>
 #include "macros.h"
 #include "inode.h"
 #include "support.h"
 #include "util.h"
+#include <linux/smp_lock.h>
 
 /* All important structures in NTFS use 2 consistency checks:
  * . a magic structure identifier (FILE, INDX, RSTR, RCRD...)
@@ -24,14 +26,18 @@
  *   structure's record should end with the word at offset <n> of the first
  *   sector, and if it is the case, must be replaced with the words following
  *   <n>. The value of <n> and the number of fixups is taken from the fields
- *   at the offsets 4 and 6.
+ *   at the offsets 4 and 6. Note that the sector size is defined as
+ *   NTFS_SECTOR_SIZE and not as the hardware sector size (this is concordant
+ *   with what the Windows NTFS driver does).
  *
- * This function perform these 2 checks, and _fails_ if :
+ * This function performs these 2 checks, and _fails_ if:
+ * . the input size is invalid
+ * . the fixup header is invalid
+ * . the size does not match the number of sectors
  * . the magic identifier is wrong
- * . the size is given and does not match the number of sectors
  * . a fixup is invalid
  */
-int ntfs_fixup_record(ntfs_volume *vol, char *record, char *magic, int size)
+int ntfs_fixup_record(char *record, char *magic, int size)
 {
 	int start, count, offset;
 	ntfs_u16 fixup;
@@ -39,19 +45,24 @@
 	if (!IS_MAGIC(record, magic))
 		return 0;
 	start = NTFS_GETU16(record + 4);
-	count = NTFS_GETU16(record + 6);
-	count--;
-	if(size && vol->sector_size * count != size)
+	count = NTFS_GETU16(record + 6) - 1;
+	if (size & (NTFS_SECTOR_SIZE - 1) || start & 1 ||
+			start + count * 2 > size || size >> 9 != count) {
+		if (size <= 0)
+			printk(KERN_ERR "NTFS: BUG: ntfs_fixup_record() got "
+					"zero size! Please report this to "
+					"linux-ntfs-dev@lists.sf.net\n");
 		return 0;
+	}
 	fixup = NTFS_GETU16(record + start);
 	start += 2;
-	offset = vol->sector_size - 2;
-	while (count--){
+	offset = NTFS_SECTOR_SIZE - 2;
+	while (count--) {
 		if (NTFS_GETU16(record + offset) != fixup)
 			return 0;
 		NTFS_PUTU16(record + offset, NTFS_GETU16(record + start));
 		start += 2;
-		offset += vol->sector_size;
+		offset += NTFS_SECTOR_SIZE;
 	}
 	return 1;
 }
@@ -63,8 +74,10 @@
 int ntfs_init_volume(ntfs_volume *vol, char *boot)
 {
 	int sectors_per_cluster_bits;
+	__s64 ll;
+	ntfs_cluster_t mft_zone_size, tc;
 
-	/* Historical default values, in case we don't load $AttrDef. */
+	/* System defined default values, in case we don't load $AttrDef. */
 	vol->at_standard_information = 0x10;
 	vol->at_attribute_list = 0x20;
 	vol->at_file_name = 0x30;
@@ -77,7 +90,7 @@
 	vol->at_index_allocation = 0xA0;
 	vol->at_bitmap = 0xB0;
 	vol->at_symlink = 0xC0;
-	/* Sector size */
+	/* Sector size. */
 	vol->sector_size = NTFS_GETU16(boot + 0xB);
 	ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->sector_size = 0x%x\n",
 				vol->sector_size);
@@ -127,22 +140,87 @@
 		vol->index_record_size = 1 << -vol->index_clusters_per_record;
 	vol->index_record_size_bits = ffs(vol->index_record_size) - 1;
 	ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->index_record_size = "
-				"0x%x\n", vol->index_record_size); 
+				"0x%x\n", vol->index_record_size);
 	ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->index_record_size_bits "
-				"= 0x%x\n", vol->index_record_size_bits); 
+				"= 0x%x\n", vol->index_record_size_bits);
 	/*
-	 * Check mft and mftmirr locations for 64-bit-ness. NOTE: This is a
-	 * crude check only as many other things could be out of bounds later
-	 * on, but it will catch at least some of the cases, since the mftmirr
-	 * is located in the middle of the volume so if the volume is very
-	 * large the mftmirr probably will be out of 32-bit bounds.
+	 * Get the size of the volume in clusters (ofs 0x28 is nr_sectors) and
+	 * check for 64-bit-ness. Windows currently only uses 32 bits to save
+	 * the clusters so we do the same as it is much faster on 32-bit CPUs.
 	 */
-	vol->mft_lcn = NTFS_GETS64(boot + 0x30);
-	if (vol->mft_lcn >= (__s64)1 << 31 ||
-	    NTFS_GETS64(boot + 0x38) >= (__s64)1 << 31) {
-		ntfs_error("Cannot handle 64-bit clusters yet.\n");
+	ll = NTFS_GETS64(boot + 0x28) >> sectors_per_cluster_bits;
+	if (ll >= (__s64)1 << 31) {
+		ntfs_error("Cannot handle 64-bit clusters. Please inform "
+				"linux-ntfs-dev@lists.sf.net that you got this "
+				"error.\n");
 		return -1;
 	}
+	vol->nr_clusters = (ntfs_cluster_t)ll;
+	ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->nr_clusters = 0x%x\n",
+			vol->nr_clusters);
+	vol->mft_lcn = (ntfs_cluster_t)NTFS_GETS64(boot + 0x30);
+	vol->mft_mirr_lcn = (ntfs_cluster_t)NTFS_GETS64(boot + 0x38);
+	/* Determine MFT zone size. */
+	mft_zone_size = vol->nr_clusters;
+	switch (vol->mft_zone_multiplier) {  /* % of volume size in clusters */
+	case 4:
+		mft_zone_size >>= 1;			/* 50%   */
+		break;
+	case 3:
+		mft_zone_size = mft_zone_size * 3 >> 3;	/* 37.5% */
+		break;
+	case 2:
+		mft_zone_size >>= 2;			/* 25%   */
+		break;
+	/* case 1: */
+	default:
+		mft_zone_size >>= 3;			/* 12.5% */
+		break;
+	}
+	/* Setup mft zone. */
+	vol->mft_zone_start = vol->mft_zone_pos = vol->mft_lcn;
+	ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->mft_zone_pos = %x\n",
+			vol->mft_zone_pos);
+	/*
+	 * Calculate the mft_lcn for an unmodified NTFS volume (see mkntfs
+	 * source) and if the actual mft_lcn is in the expected place or even
+	 * further to the front of the volume, extend the mft_zone to cover the
+	 * beginning of the volume as well. This is in order to protect the
+	 * area reserved for the mft bitmap as well within the mft_zone itself.
+	 * On non-standard volumes we don't protect it as well as the overhead
+	 * would be higher than the speed increase we would get by doing it.
+	 */
+	tc = (8192 + 2 * vol->cluster_size - 1) / vol->cluster_size;
+	if (tc * vol->cluster_size < 16 * 1024)
+		tc = (16 * 1024 + vol->cluster_size - 1) / vol->cluster_size;
+	if (vol->mft_zone_start <= tc)
+		vol->mft_zone_start = (ntfs_cluster_t)0;
+	ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->mft_zone_start = %x\n",
+			vol->mft_zone_start);
+	/*
+	 * Need to cap the mft zone on non-standard volumes so that it does
+	 * not point outside the boundaries of the volume, we do this by
+	 * halving the zone size until we are inside the volume.
+	 */
+	vol->mft_zone_end = vol->mft_lcn + mft_zone_size;
+	while (vol->mft_zone_end >= vol->nr_clusters) {
+		mft_zone_size >>= 1;
+		vol->mft_zone_end = vol->mft_lcn + mft_zone_size;
+	}
+	ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->mft_zone_end = %x\n",
+			vol->mft_zone_end);
+	/*
+	 * Set the current position within each data zone to the start of the
+	 * respective zone.
+	 */
+	vol->data1_zone_pos = vol->mft_zone_end;
+	ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->data1_zone_pos = %x\n",
+			vol->data1_zone_pos);
+	vol->data2_zone_pos = (ntfs_cluster_t)0;
+	ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->data2_zone_pos = %x\n",
+			vol->data2_zone_pos);
+	/* Set the mft data allocation position to mft record 24. */
+	vol->mft_data_pos = 24UL;
 	/* This will be initialized later. */
 	vol->upcase = 0;
 	vol->upcase_length = 0;
@@ -304,29 +382,28 @@
 	error = -ENOMEM;
 	ntfs_debug(DEBUG_BSD, "Going to load MFT\n");
 	if (!vol->mft_ino || (error = ntfs_init_inode(vol->mft_ino, vol,
-						      FILE_$Mft)))
-	{
+			FILE_Mft))) {
 		ntfs_error("Problem loading MFT\n");
 		return error;
 	}
 	ntfs_debug(DEBUG_BSD, "Going to load MIRR\n");
-	if ((error = ntfs_init_inode(vol->mftmirr, vol, FILE_$MftMirr))) {
+	if ((error = ntfs_init_inode(vol->mftmirr, vol, FILE_MftMirr))) {
 		ntfs_error("Problem %d loading MFTMirr\n", error);
 		return error;
 	}
 	ntfs_debug(DEBUG_BSD, "Going to load BITMAP\n");
-	if ((error = ntfs_init_inode(vol->bitmap, vol, FILE_$BitMap))) {
+	if ((error = ntfs_init_inode(vol->bitmap, vol, FILE_BitMap))) {
 		ntfs_error("Problem loading Bitmap\n");
 		return error;
 	}
 	ntfs_debug(DEBUG_BSD, "Going to load UPCASE\n");
-	error = ntfs_init_inode(&upcase, vol, FILE_$UpCase);
+	error = ntfs_init_inode(&upcase, vol, FILE_UpCase);
 	if (error)
 		return error;
 	ntfs_init_upcase(&upcase);
 	ntfs_clear_inode(&upcase);
 	ntfs_debug(DEBUG_BSD, "Going to load ATTRDEF\n");
-	error = ntfs_init_inode(&attrdef, vol, FILE_$AttrDef);
+	error = ntfs_init_inode(&attrdef, vol, FILE_AttrDef);
 	if (error)
 		return error;
 	error = ntfs_init_attrdef(&attrdef);
@@ -337,7 +414,7 @@
 	/* Check for NTFS version and if Win2k version (ie. 3.0+) do not allow
 	 * write access since the driver write support is broken. */
 	ntfs_debug(DEBUG_BSD, "Going to load VOLUME\n");
-	error = ntfs_init_inode(&volume, vol, FILE_$Volume);
+	error = ntfs_init_inode(&volume, vol, FILE_Volume);
 	if (error)
 		return error;
 	if ((error = ntfs_get_version(&volume)) >= 0x0300 &&
@@ -349,8 +426,8 @@
 	ntfs_clear_inode(&volume);
 	if (error < 0)
 		return error;
-	ntfs_debug(DEBUG_BSD, "NTFS volume is version %d.%d\n", error >> 8,
-								error & 0xff);
+	ntfs_debug(DEBUG_BSD, "NTFS volume is v%d.%d\n", error >> 8,
+			error & 0xff);
 	return 0;
 }
 
@@ -443,254 +520,894 @@
 	return clusters;
 }
 
-/* Insert the fixups for the record. The number and location of the fixes
- * is obtained from the record header */
-void ntfs_insert_fixups(unsigned char *rec, int secsize)
+/*
+ * Insert the fixups for the record. The number and location of the fixes
+ * is obtained from the record header but we double check with @rec_size and
+ * use that as the upper boundary, if necessary overwriting the count value in
+ * the record header.
+ *
+ * We return 0 on success or -1 if fixup header indicated the beginning of the
+ * update sequence array to be beyond the valid limit.
+ */
+int ntfs_insert_fixups(unsigned char *rec, int rec_size)
 {
-	int first = NTFS_GETU16(rec + 4);
-	int count = NTFS_GETU16(rec + 6);
+	int first;
+	int count;
 	int offset = -2;
-	ntfs_u16 fix = NTFS_GETU16(rec + first);
+	ntfs_u16 fix;
 	
-	fix++;
+	first = NTFS_GETU16(rec + 4);
+	count = (rec_size >> NTFS_SECTOR_BITS) + 1;
+	if (first + count * 2 > NTFS_SECTOR_SIZE - 2) {
+		printk(KERN_CRIT "NTFS: ntfs_insert_fixups() detected corrupt "
+				"NTFS record update sequence array position. - "
+				"Cannot hotfix.\n");
+		return -1;
+	}
+	if (count != NTFS_GETU16(rec + 6)) {
+		printk(KERN_ERR "NTFS: ntfs_insert_fixups() detected corrupt "
+				"NTFS record update sequence array size. - "
+				"Applying hotfix.\n");
+		NTFS_PUTU16(rec + 6, count);
+	}
+	fix = (NTFS_GETU16(rec + first) + 1) & 0xffff;
 	if (fix == 0xffff || !fix)
 		fix = 1;
 	NTFS_PUTU16(rec + first, fix);
 	count--;
 	while (count--) {
 		first += 2;
-		offset += secsize;
+		offset += NTFS_SECTOR_SIZE;
 		NTFS_PUTU16(rec + first, NTFS_GETU16(rec + offset));
 		NTFS_PUTU16(rec + offset, fix);
-	};
+	}
+	return 0;
 }
 
-/* Search the bitmap bits of l bytes for *cnt zero bits. Return the bit number
- * in *loc, which is initially set to the number of the first bit. Return the
- * largest block found in *cnt. Return 0 on success, -ENOSPC if all bits are
- * used. */
-static int search_bits(unsigned char* bits, ntfs_cluster_t *loc, int *cnt,int l)
-{
-	unsigned char c = 0;
-	int bc = 0;
-	int bstart = 0, bstop = 0, found = 0;
-	int start, stop = 0, in = 0;
-	/* special case searching for a single block */
-	if (*cnt == 1) {
-		while (l && *bits == 0xFF) {
-			bits++;
-			*loc += 8;
-			l--;
-		}
-		if (!l)
-			return -ENOSPC;
-		for (c = *bits; c & 1; c >>= 1)
-			(*loc)++;
-		return 0;
+/**
+ * ntfs_allocate_clusters - allocate logical clusters on an ntfs volume
+ * @vol:	volume on which to allocate clusters
+ * @location:	preferred location for first allocated cluster
+ * @count:	number of clusters to allocate
+ * @rl:		address of pointer in which to return the allocated run list
+ * @rl_len:	the number of elements returned in @*rl
+ *
+ * Allocate @*count clusters (LCNs), preferably beginning at @*location in the
+ * bitmap of the volume @vol. If @*location is -1, it does not matter where the
+ * clusters are. @rl is the address of a ntfs_runlist pointer which this
+ * function will allocate and fill with the runlist of the allocated clusters.
+ * It is the callers responsibility to ntfs_vfree() @*rl after she is finished
+ * with it. If the function was not successful, @*rl will be set to NULL.
+ * @*rl_len will contain the number of ntfs_runlist elements in @*rl or 0 if
+ * @*rl is NULL.
+ *
+ * Return 0 on success, or -errno on error. On success, @*location and @*count
+ * say what was really allocated. On -ENOSPC, @*location and @*count say what
+ * could have been allocated. If nothing could be allocated or a different
+ * error occured, @*location = -1 and @*count = 0.
+ *
+ * There are two data zones. First is the area between the end of the mft zone
+ * and the end of the volume, and second is the area between the start of the
+ * volume and the start of the mft zone. On unmodified/standard volumes, the
+ * second mft zone doesn't exist due to the mft zone being expanded to cover
+ * the start of volume in order to reserve space for the mft bitmap attribute.
+ *
+ * This is not the prettiest function but the complexity stems from the need of
+ * implementing the mft vs data zoned approach and from the fact that we have
+ * access to the lcn bitmap in portions of PAGE_SIZE bytes at a time, so we
+ * need to cope with crossing over boundaries of two pages. Further, the fact
+ * that the allocator allows for caller supplied hints as to the location of
+ * where allocation should begin and the fact that the allocator keeps track of
+ * where in the data zones the next natural allocation should occur, contribute
+ * to the complexity of the function. But it should all be worthwhile, because
+ * this allocator should: 1) be a full implementation of the MFT zone approach
+ * used by Windows, 2) cause reduction in fragmentation as much as possible,
+ * and 3) be speedy in allocations (the code is not optimized for speed, but
+ * the algorithm is, so further speed improvements are probably possible).
+ *
+ * FIXME: Really need finer-grained locking but this will do for the moment. I
+ * just want to kill all races and have a working allocator. When that is done,
+ * we can beautify... (AIA)
+ * 
+ * FIXME: We should be monitoring cluster allocation and increment the MFT zone
+ * size dynamically but this is something for the future. We will just cause
+ * heavier fragmentation by not doing it and I am not even sure Windows would
+ * grow the MFT zone dynamically, so might even be correct not doing this. The
+ * overhead in doing dynamic MFT zone expansion would be very large and unlikely
+ * worth the effort. (AIA)
+ *
+ * TODO: I have added in double the required zone position pointer wrap around
+ * logic which can be optimized to having only one of the two logic sets.
+ * However, having the double logic will work fine, but if we have only one of
+ * the sets and we get it wrong somewhere, then we get into trouble, so
+ * removing the duplicate logic requires _very_ careful consideration of _all_
+ * possible code paths. So at least for now, I am leaving the double logic -
+ * better safe than sorry... (AIA)
+ */
+int ntfs_allocate_clusters(ntfs_volume *vol, ntfs_cluster_t *location,
+		ntfs_cluster_t *count, ntfs_runlist **rl, int *rl_len,
+		const NTFS_CLUSTER_ALLOCATION_ZONES zone)
+{
+	ntfs_runlist *rl2 = NULL, *rlt;
+	ntfs_attribute *data;
+	ntfs_cluster_t buf_pos, zone_start, zone_end, mft_zone_size;
+	ntfs_cluster_t lcn, last_read_pos, prev_lcn = (ntfs_cluster_t)0;
+	ntfs_cluster_t initial_location, prev_run_len = (ntfs_cluster_t)0;
+	ntfs_cluster_t clusters = (ntfs_cluster_t)0;
+	unsigned char *buf, *byte, bit, search_zone, done_zones;
+	unsigned char pass, need_writeback;
+	int rlpos = 0, rlsize, buf_size, err = 0;
+	ntfs_io io;
+
+	ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Entering with *location = "
+			"0x%x, *count = 0x%x, zone = %s_ZONE.\n", *location,
+			*count, zone == DATA_ZONE ? "DATA" : "MFT");
+	buf = (char*)__get_free_page(GFP_NOFS);
+	if (!buf) {
+		ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Returning "
+				"-ENOMEM.\n");
+		return -ENOMEM;
+	}
+	io.fn_put = ntfs_put;
+	io.fn_get = ntfs_get;
+	lock_kernel();
+	/* Get the $DATA attribute of $Bitmap. */
+	data = ntfs_find_attr(vol->bitmap, vol->at_data, 0);
+	if (!data) {
+		err = -EINVAL;
+		goto err_ret;
+	}
+	/*
+	 * If no specific location was requested, use the current data zone
+	 * position, otherwise use the requested location but make sure it lies
+	 * outside the mft zone. Also set done_zones to 0 (no zones done) and
+	 * pass depending on whether we are starting inside a zone (1) or
+	 * at the beginning of a zone (2). If requesting from the MFT_ZONE, then
+	 * we either start at the current position within the mft zone or at the
+	 * specified position and if the latter is out of bounds then we start
+	 * at the beginning of the MFT_ZONE.
+	 */
+	done_zones = 0;
+	pass = 1;
+	/*
+	 * zone_start and zone_end are the current search range. search_zone
+	 * is 1 for mft zone, 2 for data zone 1 (end of mft zone till end of
+	 * volume) and 4 for data zone 2 (start of volume till start of mft
+	 * zone).
+	 */
+	zone_start = *location;
+	if (zone_start < 0) {
+		if (zone == DATA_ZONE)
+			zone_start = vol->data1_zone_pos;
+		else
+			zone_start = vol->mft_zone_pos;
+		if (!zone_start)
+			/*
+			 * Zone starts at beginning of volume which means a
+			 * single pass is sufficient.
+			 */
+			pass = 2;
+	} else if (zone_start >= vol->mft_zone_start && zone_start <
+			vol->mft_zone_end && zone == DATA_ZONE) {
+		zone_start = vol->mft_zone_end;
+		pass = 2;
+	} else if ((zone_start < vol->mft_zone_start || zone_start >=
+			vol->mft_zone_end) && zone == MFT_ZONE) {
+		zone_start = vol->mft_lcn;
+		if (!vol->mft_zone_end)
+			zone_start = (ntfs_cluster_t)0;
+		pass = 2;
+	}
+	if (zone == DATA_ZONE) {
+		/* Skip searching the mft zone. */
+		done_zones |= 1;
+		if (zone_start >= vol->mft_zone_end) {
+			zone_end = vol->nr_clusters;
+			search_zone = 2;
+		} else {
+			zone_end = vol->mft_zone_start;
+			search_zone = 4;
+		}
+	} else /* if (zone == MFT_ZONE) */ {
+		zone_end = vol->mft_zone_end;
+		search_zone = 1;
 	}
-	start = *loc;
-	while (l || bc) {
-		if (bc == 0) {
-			c = *bits;
-			if (l) {
-				l--;
-				bits++;
+	/*
+	 * buf_pos is the current bit position inside the bitmap. We use
+	 * initial_location to determine whether or not to do a zone switch.
+	 */
+	buf_pos = initial_location = zone_start;
+	/* Loop until all clusters are allocated, i.e. clusters == 0. */
+	clusters = *count;
+	rlpos = rlsize = 0;
+	if (*count <= 0) {
+		ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): *count <= 0, "
+				"returning -EINVAL.\n");
+		err = -EINVAL;
+		goto err_ret;
+	}
+	while (1) {
+		ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Start of outer while "
+				"loop: done_zones = 0x%x, search_zone = %i, "
+				"pass = %i, zone_start = 0x%x, zone_end = "
+				"0x%x, initial_location = 0x%x, buf_pos = "
+				"0x%x, rlpos = %i, rlsize = %i.\n",
+				done_zones, search_zone, pass, zone_start,
+				zone_end, initial_location, buf_pos, rlpos,
+				rlsize);
+		/* Loop until we run out of free clusters. */
+		io.param = buf;
+		io.size = PAGE_SIZE;
+		io.do_read = 1;
+		last_read_pos = buf_pos >> 3;
+		ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): last_read_pos = "
+				"0x%x.\n", last_read_pos);
+		err = ntfs_readwrite_attr(vol->bitmap, data, last_read_pos,
+				&io);
+		if (err) {
+			ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): "
+					"ntfs_read_attr failed with error "
+					"code %i, going to err_ret.\n", -err);
+			goto err_ret;
+		}
+		if (!io.size) {
+			ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): !io.size, "
+					"going to zone_pass_done.\n");
+			goto zone_pass_done;
+		}
+		buf_size = io.size << 3;
+		lcn = buf_pos & 7;
+		buf_pos &= ~7;
+		need_writeback = 0;
+		ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Before inner while "
+				"loop: buf_size = 0x%x, lcn = 0x%x, buf_pos = "
+				"0x%x, need_writeback = %i.\n", buf_size, lcn,
+				buf_pos, need_writeback);
+		while (lcn < buf_size && lcn + buf_pos < zone_end) {
+			byte = buf + (lcn >> 3);
+			ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): In inner "
+					"while loop: buf_size = 0x%x, lcn = "
+					"0x%x, buf_pos = 0x%x, need_writeback "
+					"= %i, byte ofs = 0x%x, *byte = "
+					"0x%x.\n", buf_size, lcn, buf_pos,
+					need_writeback, lcn >> 3, *byte);
+			/* Skip full bytes. */
+			if (*byte == 0xff) {
+				lcn += 8;
+				ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): "
+						"continuing while loop 1.\n");
+				continue;
+			}
+			bit = 1 << (lcn & 7);
+			ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): bit = %i.\n",
+					bit);
+			/* If the bit is already set, go onto the next one. */
+			if (*byte & bit) {
+				lcn++;
+				ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): "
+						"continuing while loop 2.\n");
+				continue;
 			}
-			bc = 8;
+			/* Allocate the bitmap bit. */
+			*byte |= bit;
+			/* We need to write this bitmap buffer back to disk! */
+			need_writeback = 1;
+			ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): *byte = "
+					"0x%x, need_writeback = %i.\n", *byte,
+					need_writeback);
+			/* Reallocate memory if necessary. */
+			if ((rlpos + 2) * sizeof(ntfs_runlist) >= rlsize) {
+				ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): "
+						"Reallocating space.\n");
+				/* Setup first free bit return value. */
+				if (!rl2) {
+					*location = lcn + buf_pos;
+					ntfs_debug(DEBUG_OTHER, __FUNCTION__
+							"(): *location = "
+							"0x%x.\n", *location);
+				}
+				rlsize += PAGE_SIZE;
+				rlt = ntfs_vmalloc(rlsize);
+				if (!rlt) {
+					err = -ENOMEM;
+					ntfs_debug(DEBUG_OTHER, __FUNCTION__
+							"(): Failed to "
+							"allocate memory, "
+							"returning -ENOMEM, "
+							"going to "
+							"wb_err_ret.\n");
+					goto wb_err_ret;
+				}
+				if (rl2) {
+					ntfs_memcpy(rlt, rl2, rlsize -
+							PAGE_SIZE);
+					ntfs_vfree(rl2);
+				}
+				rl2 = rlt;
+				ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): "
+						"Reallocated memory, rlsize = "
+						"0x%x.\n", rlsize);
+			}
+			/*
+			 * Coalesce with previous run if adjacent LCNs.
+			 * Otherwise, append a new run.
+			 */
+			ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Adding run "
+					"(lcn 0x%x, len 0x%x), prev_lcn = "
+					"0x%x, lcn = 0x%x, buf_pos = 0x%x, "
+					"prev_run_len = 0x%x, rlpos = %i.\n",
+					lcn + buf_pos, 1, prev_lcn, lcn,
+					buf_pos, prev_run_len, rlpos);
+			if (prev_lcn == lcn + buf_pos - prev_run_len && rlpos) {
+				ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): "
+						"Coalescing to run (lcn 0x%x, "
+						"len 0x%x).\n",
+						rl2[rlpos - 1].lcn,
+						rl2[rlpos - 1].len);
+				rl2[rlpos - 1].len = ++prev_run_len;
+				ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): "
+						"Run now (lcn 0x%x, len 0x%x), "
+						"prev_run_len = 0x%x.\n",
+						rl2[rlpos - 1].lcn,
+						rl2[rlpos - 1].len,
+						prev_run_len);
+			} else {
+				if (rlpos)
+					ntfs_debug(DEBUG_OTHER, __FUNCTION__
+							"(): Adding new run, "
+							"(previous run lcn "
+							"0x%x, len 0x%x).\n",
+							rl2[rlpos - 1].lcn,
+							rl2[rlpos - 1].len);
+				else
+					ntfs_debug(DEBUG_OTHER, __FUNCTION__
+							"(): Adding new run, "
+							"is first run.\n");
+				rl2[rlpos].lcn = prev_lcn = lcn + buf_pos;
+				rl2[rlpos].len = prev_run_len =
+						(ntfs_cluster_t)1;
+				
+				rlpos++;
+			}
+			/* Done? */
+			if (!--clusters) {
+				ntfs_cluster_t tc;
+				/*
+				 * Update the current zone position. Positions
+				 * of already scanned zones have been updated
+				 * during the respective zone switches.
+				 */
+				tc = lcn + buf_pos + 1;
+				ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): "
+						"Done. Updating current zone "
+						"position, tc = 0x%x, "
+						"search_zone = %i.\n", tc,
+						search_zone);
+				switch (search_zone) {
+				case 1:
+					ntfs_debug(DEBUG_OTHER, __FUNCTION__
+							"(): Before checks, "
+							"vol->mft_zone_pos = "
+							"0x%x.\n",
+							vol->mft_zone_pos);
+					if (tc >= vol->mft_zone_end) {
+						vol->mft_zone_pos =
+								vol->mft_lcn;
+						if (!vol->mft_zone_end)
+							vol->mft_zone_pos =
+							     (ntfs_cluster_t)0;
+					} else if ((initial_location >=
+							vol->mft_zone_pos ||
+							tc > vol->mft_zone_pos)
+							&& tc >= vol->mft_lcn)
+						vol->mft_zone_pos = tc;
+					ntfs_debug(DEBUG_OTHER, __FUNCTION__
+							"(): After checks, "
+							"vol->mft_zone_pos = "
+							"0x%x.\n",
+							vol->mft_zone_pos);
+					break;
+				case 2:
+					ntfs_debug(DEBUG_OTHER, __FUNCTION__
+							"(): Before checks, "
+							"vol->data1_zone_pos = "
+							"0x%x.\n",
+							vol->data1_zone_pos);
+					if (tc >= vol->nr_clusters)
+						vol->data1_zone_pos =
+							     vol->mft_zone_end;
+					else if ((initial_location >=
+						    vol->data1_zone_pos ||
+						    tc > vol->data1_zone_pos)
+						    && tc >= vol->mft_zone_end)
+						vol->data1_zone_pos = tc;
+					ntfs_debug(DEBUG_OTHER, __FUNCTION__
+							"(): After checks, "
+							"vol->data1_zone_pos = "
+							"0x%x.\n",
+							vol->data1_zone_pos);
+					break;
+				case 4:
+					ntfs_debug(DEBUG_OTHER, __FUNCTION__
+							"(): Before checks, "
+							"vol->data2_zone_pos = "
+							"0x%x.\n",
+							vol->data2_zone_pos);
+					if (tc >= vol->mft_zone_start)
+						vol->data2_zone_pos =
+							(ntfs_cluster_t)0;
+					else if (initial_location >=
+						      vol->data2_zone_pos ||
+						      tc > vol->data2_zone_pos)
+						vol->data2_zone_pos = tc;
+					ntfs_debug(DEBUG_OTHER, __FUNCTION__
+							"(): After checks, "
+							"vol->data2_zone_pos = "
+							"0x%x.\n",
+							vol->data2_zone_pos);
+					break;
+				default:
+					BUG();
+				}
+				ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): "
+						"Going to done_ret.\n");
+				goto done_ret;
+			}
+			lcn++;
 		}
-		if (in) {
-			if ((c & 1) == 0)
-				stop++;
-			else { /* end of sequence of zeroes */
-				in = 0;
-				if (!found || bstop-bstart < stop-start) {
-					bstop = stop; bstart = start;
-					found = 1;
-					if (bstop - bstart > *cnt)
-						break;
+		buf_pos += buf_size;
+		ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): After inner while "
+				"loop: buf_size = 0x%x, lcn = 0x%x, buf_pos = "
+				"0x%x, need_writeback = %i.\n", buf_size, lcn,
+				buf_pos, need_writeback);
+		if (need_writeback) {
+			ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Writing "
+					"back.\n");
+			need_writeback = 0;
+			io.param = buf;
+			io.do_read = 0;
+			err = ntfs_readwrite_attr(vol->bitmap, data,
+					last_read_pos, &io);
+			if (err) {
+				ntfs_error(__FUNCTION__ "(): Bitmap writeback "
+						"failed in read next buffer "
+						"code path with error code "
+						"%i.\n", -err);
+				goto err_ret;
+			}
+		}
+		if (buf_pos < zone_end) {
+			ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Continuing "
+					"outer while loop, buf_pos = 0x%x, "
+					"zone_end = 0x%x.\n", buf_pos,
+					zone_end);
+			continue;
+		}
+zone_pass_done:	/* Finished with the current zone pass. */
+		ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): At zone_pass_done, "
+				"pass = %i.\n", pass);
+		if (pass == 1) {
+			/*
+			 * Now do pass 2, scanning the first part of the zone
+			 * we omitted in pass 1.
+			 */
+			pass = 2;
+			zone_end = zone_start;
+			switch (search_zone) {
+			case 1: /* mft_zone */
+				zone_start = vol->mft_zone_start;
+				break;
+			case 2: /* data1_zone */
+				zone_start = vol->mft_zone_end;
+				break;
+			case 4: /* data2_zone */
+				zone_start = (ntfs_cluster_t)0;
+				break;
+			default:
+				BUG();
+			}
+			/* Sanity check. */
+			if (zone_end < zone_start)
+				zone_end = zone_start;
+			buf_pos = zone_start;
+			ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Continuing "
+					"outer while loop, pass = 2, "
+					"zone_start = 0x%x, zone_end = 0x%x, "
+					"buf_pos = 0x%x.\n");
+			continue;
+		} /* pass == 2 */
+done_zones_check:
+		ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): At done_zones_check, "
+				"search_zone = %i, done_zones before = 0x%x, "
+				"done_zones after = 0x%x.\n",
+				search_zone, done_zones, done_zones |
+				search_zone);
+		done_zones |= search_zone;
+		if (done_zones < 7) {
+			ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Switching "
+					"zone.\n");
+			/* Now switch to the next zone we haven't done yet. */
+			pass = 1;
+			switch (search_zone) {
+			case 1:
+				ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): "
+						"Switching from mft zone to "
+						"data1 zone.\n");
+				/* Update mft zone position. */
+				if (rlpos) {
+					ntfs_cluster_t tc;
+					ntfs_debug(DEBUG_OTHER, __FUNCTION__
+							"(): Before checks, "
+							"vol->mft_zone_pos = "
+							"0x%x.\n",
+							vol->mft_zone_pos);
+					tc = rl2[rlpos - 1].lcn +
+							rl2[rlpos - 1].len;
+					if (tc >= vol->mft_zone_end) {
+						vol->mft_zone_pos =
+								vol->mft_lcn;
+						if (!vol->mft_zone_end)
+							vol->mft_zone_pos =
+							     (ntfs_cluster_t)0;
+					} else if ((initial_location >=
+							vol->mft_zone_pos ||
+							tc > vol->mft_zone_pos)
+							&& tc >= vol->mft_lcn)
+						vol->mft_zone_pos = tc;
+					ntfs_debug(DEBUG_OTHER, __FUNCTION__
+							"(): After checks, "
+							"vol->mft_zone_pos = "
+							"0x%x.\n",
+							vol->mft_zone_pos);
+				}
+				/* Switch from mft zone to data1 zone. */
+switch_to_data1_zone:		search_zone = 2;
+				zone_start = initial_location =
+						vol->data1_zone_pos;
+				zone_end = vol->nr_clusters;
+				if (zone_start == vol->mft_zone_end)
+					pass = 2;
+				if (zone_start >= zone_end) {
+					vol->data1_zone_pos = zone_start =
+							vol->mft_zone_end;
+					pass = 2;
 				}
-				start = stop + 1;
+				break;
+			case 2:
+				ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): "
+						"Switching from data1 zone to "
+						"data2 zone.\n");
+				/* Update data1 zone position. */
+				if (rlpos) {
+					ntfs_cluster_t tc;
+					ntfs_debug(DEBUG_OTHER, __FUNCTION__
+							"(): Before checks, "
+							"vol->data1_zone_pos = "
+							"0x%x.\n",
+							vol->data1_zone_pos);
+					tc = rl2[rlpos - 1].lcn +
+							rl2[rlpos - 1].len;
+					if (tc >= vol->nr_clusters)
+						vol->data1_zone_pos =
+							     vol->mft_zone_end;
+					else if ((initial_location >=
+						    vol->data1_zone_pos ||
+						    tc > vol->data1_zone_pos)
+						    && tc >= vol->mft_zone_end)
+						vol->data1_zone_pos = tc;
+					ntfs_debug(DEBUG_OTHER, __FUNCTION__
+							"(): After checks, "
+							"vol->data1_zone_pos = "
+							"0x%x.\n",
+							vol->data1_zone_pos);
+				}
+				/* Switch from data1 zone to data2 zone. */
+				search_zone = 4;
+				zone_start = initial_location =
+						vol->data2_zone_pos;
+				zone_end = vol->mft_zone_start;
+				if (!zone_start)
+					pass = 2;
+				if (zone_start >= zone_end) {
+					vol->data2_zone_pos = zone_start =
+							initial_location =
+							(ntfs_cluster_t)0;
+					pass = 2;
+				}
+				break;
+			case 4:
+				ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): "
+						"Switching from data2 zone to "
+						"data1 zone.\n");
+				/* Update data2 zone position. */
+				if (rlpos) {
+					ntfs_cluster_t tc;
+					ntfs_debug(DEBUG_OTHER, __FUNCTION__
+							"(): Before checks, "
+							"vol->data2_zone_pos = "
+							"0x%x.\n",
+							vol->data2_zone_pos);
+					tc = rl2[rlpos - 1].lcn +
+							rl2[rlpos - 1].len;
+					if (tc >= vol->mft_zone_start)
+						vol->data2_zone_pos =
+							     (ntfs_cluster_t)0;
+					else if (initial_location >=
+						      vol->data2_zone_pos ||
+						      tc > vol->data2_zone_pos)
+						vol->data2_zone_pos = tc;
+					ntfs_debug(DEBUG_OTHER, __FUNCTION__
+							"(): After checks, "
+							"vol->data2_zone_pos = "
+							"0x%x.\n",
+							vol->data2_zone_pos);
+				}
+				/* Switch from data2 zone to data1 zone. */
+				goto switch_to_data1_zone; /* See above. */
+			default:
+				BUG();
 			}
-		} else {
-			if (c & 1)
-				start++;
-			else { /*start of sequence*/
-				in = 1;
-				stop = start + 1;
+			ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): After zone "
+					"switch, search_zone = %i, pass = %i, "
+					"initial_location = 0x%x, zone_start "
+					"= 0x%x, zone_end = 0x%x.\n",
+					search_zone, pass, initial_location,
+					zone_start, zone_end);
+			buf_pos = zone_start;
+			if (zone_start == zone_end) {
+				ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): "
+						"Empty zone, going to "
+						"done_zones_check.\n");
+				/* Empty zone. Don't bother searching it. */
+				goto done_zones_check;
 			}
+			ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Continuing "
+					"outer while loop.\n");
+			continue;
+		} /* done_zones == 7 */
+		ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): All zones are "
+				"finished.\n");
+		/*
+		 * All zones are finished! If DATA_ZONE, shrink mft zone. If
+		 * MFT_ZONE, we have really run out of space.
+		 */
+		mft_zone_size = vol->mft_zone_end - vol->mft_zone_start;
+		ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): vol->mft_zone_start "
+				"= 0x%x, vol->mft_zone_end = 0x%x, "
+				"mft_zone_size = 0x%x.\n", vol->mft_zone_start,
+				vol->mft_zone_end, mft_zone_size);
+		if (zone == MFT_ZONE || mft_zone_size <= (ntfs_cluster_t)0) {
+			ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): No free "
+					"clusters left, returning -ENOSPC, "
+					"going to fail_ret.\n");
+			/* Really no more space left on device. */
+			err = -ENOSPC;
+			goto fail_ret;
+		} /* zone == DATA_ZONE && mft_zone_size > 0 */
+		ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Shrinking mft "
+				"zone.\n");
+		zone_end = vol->mft_zone_end;
+		mft_zone_size >>= 1;
+		if (mft_zone_size > (ntfs_cluster_t)0)
+			vol->mft_zone_end = vol->mft_zone_start + mft_zone_size;
+		else /* mft zone and data2 zone no longer exist. */
+			vol->data2_zone_pos = vol->mft_zone_start =
+					vol->mft_zone_end = (ntfs_cluster_t)0;
+		if (vol->mft_zone_pos >= vol->mft_zone_end) {
+			vol->mft_zone_pos = vol->mft_lcn;
+			if (!vol->mft_zone_end)
+				vol->mft_zone_pos = (ntfs_cluster_t)0;
+		}
+		buf_pos = zone_start = initial_location =
+				vol->data1_zone_pos = vol->mft_zone_end;
+		search_zone = 2;
+		pass = 2;
+		done_zones &= ~2;
+		ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): After shrinking mft "
+				"zone, mft_zone_size = 0x%x, "
+				"vol->mft_zone_start = 0x%x, vol->mft_zone_end "
+				"= 0x%x, vol->mft_zone_pos = 0x%x, search_zone "
+				"= 2, pass = 2, dones_zones = 0x%x, zone_start "
+				"= 0x%x, zone_end = 0x%x, vol->data1_zone_pos "
+				"= 0x%x, continuing outer while loop.\n",
+				mft_zone_size, vol->mft_zone_start,
+				vol->mft_zone_end, vol->mft_zone_pos,
+				search_zone, pass, done_zones, zone_start,
+				zone_end, vol->data1_zone_pos);
+	}
+	ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): After outer while loop.\n");
+done_ret:
+	ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): At done_ret.\n");
+	rl2[rlpos].lcn = (ntfs_cluster_t)-1;
+	rl2[rlpos].len = (ntfs_cluster_t)0;
+	*rl = rl2;
+	*rl_len = rlpos;
+	if (need_writeback) {
+		ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Writing back.\n");
+		need_writeback = 0;
+		io.param = buf;
+		io.do_read = 0;
+		err = ntfs_readwrite_attr(vol->bitmap, data, last_read_pos,
+				&io);
+		if (err) {
+			ntfs_error(__FUNCTION__ "(): Bitmap writeback failed "
+					"in done code path with error code "
+					"%i.\n", -err);
+			goto err_ret;
 		}
-		bc--;
-		c >>= 1;
+		ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Wrote 0x%Lx bytes.\n",
+				io.size);
 	}
-	if (in && (!found || bstop - bstart < stop - start)) {
-		bstop = stop; bstart = start; found = 1;
+done_fail_ret:
+	ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): At done_fail_ret (follows "
+			"done_ret).\n");
+	unlock_kernel();
+	free_page((unsigned long)buf);
+	if (err)
+		ntfs_debug(DEBUG_FILE3, __FUNCTION__ "(): Failed to allocate "
+				"clusters. Returning with error code %i.\n",
+				-err);
+	ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Syncing $Bitmap inode.\n");
+	if (ntfs_update_inode(vol->bitmap))
+		ntfs_error(__FUNCTION__ "(): Failed to sync inode $Bitmap. "
+				"Continuing anyway.\n");
+	ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Returning with code %i.\n",
+			err);
+	return err;
+fail_ret:
+	ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): At fail_ret.\n");
+	if (rl2) {
+		if (err == -ENOSPC) {
+			/* Return first free lcn and count of free clusters. */
+			*location = rl2[0].lcn;
+			*count -= clusters;
+			ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): err = "
+					"-ENOSPC, *location = 0x%x, *count = "
+					"0x%x.\n", *location, *count);
+		}
+		/* Deallocate all allocated clusters. */
+		ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Deallocating "
+				"allocated clusters.\n");
+		ntfs_deallocate_clusters(vol, rl2, rlpos);
+		/* Free the runlist. */
+		ntfs_vfree(rl2);
+	} else {
+		if (err == -ENOSPC) {
+			/* Nothing free at all. */
+			*location = vol->data1_zone_pos; /* Irrelevant... */
+			*count = 0;
+			ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): No space "
+					"left at all, err = -ENOSPC, *location "
+					"= 0x%x, *count = 0.\n", *location);
+		}
 	}
-	if (!found)
-		return -ENOSPC;
-	*loc = bstart;
-	if (*cnt > bstop - bstart)
-		*cnt = bstop - bstart;
-	return 0;
+	*rl = NULL;
+	*rl_len = 0;
+	ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): *rl = NULL, *rl_len = 0, "
+			"going to done_fail_ret.\n");
+	goto done_fail_ret;
+wb_err_ret:
+	ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): At wb_err_ret.\n");
+	if (need_writeback) {
+		int __err;
+		ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): Writing back.\n");
+		io.param = buf;
+		io.do_read = 0;
+		__err = ntfs_readwrite_attr(vol->bitmap, data, last_read_pos,
+				&io);
+		if (__err)
+			ntfs_error(__FUNCTION__ "(): Bitmap writeback failed "
+					"in error code path with error code "
+					"%i.\n", -__err);
+		need_writeback = 0;
+	}
+err_ret:
+	ntfs_debug(DEBUG_OTHER, __FUNCTION__ "(): At err_ret, *location = -1, "
+			"*count = 0, going to fail_ret.\n");
+	*location = -1;
+	*count = 0;
+	goto fail_ret;
 }
 
-int ntfs_set_bitrange(ntfs_inode* bitmap, ntfs_cluster_t loc, int cnt, int bit)
+/*
+ * IMPORTANT: Caller has to hold big kernel lock or the race monster will come
+ * to get you! (-;
+ * TODO: Need our own lock for bitmap accesses but BKL is more secure for now,
+ * considering we might not have covered all places with a lock yet. In that
+ * case the BKL offers a one way exclusion which is better than no exclusion
+ * at all... (AIA)
+ */
+static int ntfs_clear_bitrange(ntfs_inode *bitmap,
+		const ntfs_cluster_t start_bit, const ntfs_cluster_t count)
 {
-	int bsize, locit, error;
-	unsigned char *bits, *it;
+	ntfs_cluster_t buf_size, bit, nr_bits = count;
+	unsigned char *buf, *byte;
+	int err;
 	ntfs_io io;
 
 	io.fn_put = ntfs_put;
 	io.fn_get = ntfs_get;
-	bsize = (cnt + (loc & 7) + 7) >> 3; /* round up to multiple of 8*/
-	bits = ntfs_malloc(bsize);
-	if (!bits)
-		return -ENOMEM;
-	io.param = bits;
-	io.size = bsize;
-	error = ntfs_read_attr(bitmap, bitmap->vol->at_data, 0, loc >> 3, &io);
-	if (error || io.size != bsize){
-		ntfs_free(bits);
-		return error ? error : -EIO;
-	}
-	/* Now set the bits. */
-	it = bits;
-	locit = loc;
-	while (locit % 8 && cnt) { /* process first byte */
-		if (bit)
-			*it |= 1 << (locit % 8);
-		else
-			*it &= ~(1 << (locit % 8));
-		cnt--;
-		locit++;
-		if (locit % 8 == 0)
-			it++;
-	}
-	while (cnt > 8) { /*process full bytes */
-		*it = bit ? 0xFF : 0;
-		cnt -= 8;
-		locit += 8;
-		it++;
-	}
-	while (cnt) { /*process last byte */
-		if (bit)
-			*it |= 1 << (locit % 8);
-		else
-			*it &= ~(1 << (locit % 8));
-		cnt--;
-		locit++;
-	}
-	/* reset to start */
-	io.param = bits;
-	io.size = bsize;
-	error = ntfs_write_attr(bitmap, bitmap->vol->at_data, 0, loc >> 3, &io);
-	ntfs_free(bits);
-	if (error)
-		return error;
-	if (io.size != bsize)
-		return -EIO;
-	return 0;
-}
-	
-/* Allocate count clusters around location. If location is -1, it does not
- * matter where the clusters are. Result is 0 if success, in which case
- * location and count says what they really got. */
-int ntfs_search_bits(ntfs_inode* bitmap, ntfs_cluster_t *location, int *count,
-		     int flags)
-{
-	unsigned char *bits;
-	ntfs_io io;
-	int error = 0, found = 0;
-	int cnt, bloc = -1, bcnt = 0;
-	int start;
-	ntfs_cluster_t loc;
-
-	bits = ntfs_malloc(2048);
-	if (!bits)
+	/* Calculate the required buffer size in bytes. */
+	buf_size = (ntfs_cluster_t)((start_bit & 7) + nr_bits + 7) >> 3;
+	if (buf_size <= (ntfs_cluster_t)(64 * 1024))
+		buf = ntfs_malloc(buf_size);
+	else
+		buf = ntfs_vmalloc(buf_size);
+	if (!buf)
 		return -ENOMEM;
-	io.fn_put = ntfs_put;
-	io.fn_get = ntfs_get;
-	io.param = bits;
-
-	/* first search within +/- 8192 clusters */
-	start = *location >> 3;
-	start = start > 1024 ? start - 1024 : 0;
-	io.size = 2048;
-	error = ntfs_read_attr(bitmap, bitmap->vol->at_data, 0, start, &io);
-	if (error)
-		goto fail;
-	loc = start * 8;
-	cnt = *count;
-	error = search_bits(bits, &loc, &cnt, io.size);
-	if (error)
-		goto fail;
-	if (*count == cnt){
-		bloc = loc;
-		bcnt = cnt;
-		goto success;
-	}
-	/* Now search from the beginning. */
-	for (start = 0; 1; start += 2048) {
-		io.param = bits;
-		io.size = 2048;
-		error = ntfs_read_attr(bitmap, bitmap->vol->at_data, 0, start,
-									&io);
-		if (error)
-			goto fail;
-		if (io.size == 0) {
-			if(found)
-				goto success;
-			else {
-				error = -ENOSPC;
-				goto fail;
-			}
-		}
-		loc = start * 8;
-		cnt = *count;
-		error = search_bits(bits, &loc, &cnt, io.size);
-		if (error)
-			goto fail;
-		if (*count == cnt)
-			goto success;
-		if (bcnt < cnt) {
-			bcnt = cnt;
-			bloc = loc;
-			found = 1;
-		}
-	}
- success:
-	ntfs_free(bits);
-	/* check flags */
-	if ((flags & ALLOC_REQUIRE_LOCATION) && *location != bloc)
-		error = -ENOSPC;
-	else if ((flags & ALLOC_REQUIRE_SIZE) && *count != bcnt)
-		error = -ENOSPC;
+	/* Read the bitmap from the data attribute. */
+	io.param = byte = buf;
+	io.size = buf_size;
+	err = ntfs_read_attr(bitmap, bitmap->vol->at_data, 0, start_bit >> 3,
+			&io);
+	if (err || io.size != buf_size)
+		goto err_out;
+	/* Now clear the bits in the read bitmap. */
+	bit = start_bit & 7;
+	while (bit && nr_bits) { /* Process first partial byte, if present. */
+		*byte &= ~(1 << bit++);
+		nr_bits--;
+		bit &= 7;
+		if (!bit)
+			byte++;
+	}
+	while (nr_bits >= 8) { /* Process full bytes. */
+		*byte = 0;
+		nr_bits -= 8;
+		byte++;
+	}
+	bit = 0;
+	while (nr_bits) { /* Process last partial byte, if present. */
+		*byte &= ~(1 << bit);
+		nr_bits--;
+		bit++;
+	}
+	/* Write the modified bitmap back to disk. */
+	io.param = buf;
+	io.size = buf_size;
+	err = ntfs_write_attr(bitmap, bitmap->vol->at_data, 0, start_bit >> 3,
+			&io);
+err_out:
+	if (buf_size <= (ntfs_cluster_t)(64 * 1024))
+		ntfs_free(buf);
 	else
-		ntfs_set_bitrange(bitmap, bloc, bcnt, 1);
-	/* If allocation failed due to the flags, tell the caller what he could
-	 * have gotten */
-	*location = bloc;
-	*count = bcnt;
-	return 0;
- fail:
-	*location = -1;
-	*count = 0;
-	ntfs_free(bits);
-	return error;
+		ntfs_vfree(buf);
+	if (!err && io.size != buf_size)
+		err = -EIO;
+	return err;
 }
 
-int ntfs_allocate_clusters(ntfs_volume *vol, ntfs_cluster_t *location,
-			   int *count, int flags)
+/*
+ * See comments for lack of zone adjustments below in the description of the
+ * function ntfs_deallocate_clusters().
+ */
+int ntfs_deallocate_cluster_run(const ntfs_volume *vol,
+		const ntfs_cluster_t lcn, const ntfs_cluster_t len)
 {
-	int error;
-	error = ntfs_search_bits(vol->bitmap, location, count, flags);
-	return error;
+	int err;
+
+	lock_kernel();
+	err = ntfs_clear_bitrange(vol->bitmap, lcn, len);
+	unlock_kernel();
+	return err;
 }
 
-int ntfs_deallocate_clusters(ntfs_volume *vol, ntfs_cluster_t location,
-			     int count)
+/*
+ * This is inefficient, but logically trivial, so will do for now. Note, we
+ * do not touch the mft nor the data zones here because we want to minimize
+ * recycling of clusters to enhance the chances of data being undeleteable.
+ * Also we don't want the overhead. Instead we do one additional sweep of the
+ * current data zone during cluster allocation to check for freed clusters.
+ */
+int ntfs_deallocate_clusters(const ntfs_volume *vol, const ntfs_runlist *rl,
+		const int rl_len)
 {
-	int error;
-	error = ntfs_set_bitrange(vol->bitmap, location, count, 0);
-	return error;
+	int i, err;
+
+	lock_kernel();
+	for (i = err = 0; i < rl_len && !err; i++)
+		err = ntfs_clear_bitrange(vol->bitmap, rl[i].lcn, rl[i].len);
+	unlock_kernel();
+	return err;
 }
 
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)