patch-2.4.0-prerelease linux/drivers/md/lvm-snap.c

Next file: linux/drivers/md/lvm.c
Previous file: linux/drivers/md/Makefile
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.4.0-test12/linux/drivers/md/lvm-snap.c linux/drivers/md/lvm-snap.c
@@ -2,13 +2,14 @@
  * kernel/lvm-snap.c
  *
  * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
+ *                    Heinz Mauelshagen, Sistina Software (persistent snapshots)
  *
  * LVM snapshot driver is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  * 
- * LVM driver is distributed in the hope that it will be useful,
+ * LVM snapshot driver is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
@@ -29,13 +30,27 @@
 #include <linux/lvm.h>
 
 
-static char *lvm_snap_version __attribute__ ((unused)) = "LVM 0.8final (15/02/2000)\n";
+static char *lvm_snap_version __attribute__ ((unused)) = "LVM 0.9 snapshot code (13/11/2000)\n";
 
 extern const char *const lvm_name;
 extern int lvm_blocksizes[];
 
 void lvm_snapshot_release(lv_t *);
 
+uint lvm_pv_get_number(vg_t * vg, kdev_t rdev)
+{
+	uint p;
+
+	for ( p = 0; p < vg->pv_max; p++)
+	{
+		if ( vg->pv[p] == NULL) continue;
+		if ( vg->pv[p]->pv_dev == rdev) break;
+	}
+
+	return vg->pv[p]->pv_number;
+}
+
+
 #define hashfn(dev,block,mask,chunk_size) \
 	((HASHDEV(dev)^((block)/(chunk_size))) & (mask))
 
@@ -72,9 +87,9 @@
 	return ret;
 }
 
-static inline void lvm_hash_link(lv_block_exception_t * exception,
-				 kdev_t org_dev, unsigned long org_start,
-				 lv_t * lv)
+inline void lvm_hash_link(lv_block_exception_t * exception,
+			  kdev_t org_dev, unsigned long org_start,
+			  lv_t * lv)
 {
 	struct list_head * hash_table = lv->lv_snapshot_hash_table;
 	unsigned long mask = lv->lv_snapshot_hash_mask;
@@ -97,7 +112,6 @@
 	pe_adjustment = (*org_sector-pe_off) % chunk_size;
 	__org_start = *org_sector - pe_adjustment;
 	__org_dev = *org_dev;
-
 	ret = 0;
 	exception = lvm_find_exception_table(__org_dev, __org_start, lv);
 	if (exception)
@@ -109,7 +123,7 @@
 	return ret;
 }
 
-static void lvm_drop_snapshot(lv_t * lv_snap, const char * reason)
+void lvm_drop_snapshot(lv_t * lv_snap, const char * reason)
 {
 	kdev_t last_dev;
 	int i;
@@ -118,8 +132,7 @@
 	   or error on this snapshot --> release it */
 	invalidate_buffers(lv_snap->lv_dev);
 
-	last_dev = 0;
-	for (i = 0; i < lv_snap->lv_remap_ptr; i++) {
+	for (i = last_dev = 0; i < lv_snap->lv_remap_ptr; i++) {
 		if ( lv_snap->lv_block_exception[i].rdev_new != last_dev) {
 			last_dev = lv_snap->lv_block_exception[i].rdev_new;
 			invalidate_buffers(last_dev);
@@ -149,7 +162,7 @@
 		blocks[i] = start++;
 }
 
-static inline int get_blksize(kdev_t dev)
+inline int lvm_get_blksize(kdev_t dev)
 {
 	int correct_size = BLOCK_SIZE, i, major;
 
@@ -185,6 +198,133 @@
 }
 #endif
 
+
+void lvm_snapshot_fill_COW_page(vg_t * vg, lv_t * lv_snap)
+{
+	int 	id = 0, is = lv_snap->lv_remap_ptr;
+	ulong	blksize_snap;
+	lv_COW_table_disk_t * lv_COW_table =
+	   ( lv_COW_table_disk_t *) page_address(lv_snap->lv_COW_table_page);
+
+	if (is == 0) return;
+	is--;
+        blksize_snap = lvm_get_blksize(lv_snap->lv_block_exception[is].rdev_new);
+        is -= is % (blksize_snap / sizeof(lv_COW_table_disk_t));
+
+	memset(lv_COW_table, 0, blksize_snap);
+	for ( ; is < lv_snap->lv_remap_ptr; is++, id++) {
+		/* store new COW_table entry */
+		lv_COW_table[id].pv_org_number = LVM_TO_DISK64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[is].rdev_org));
+		lv_COW_table[id].pv_org_rsector = LVM_TO_DISK64(lv_snap->lv_block_exception[is].rsector_org);
+		lv_COW_table[id].pv_snap_number = LVM_TO_DISK64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[is].rdev_new));
+		lv_COW_table[id].pv_snap_rsector = LVM_TO_DISK64(lv_snap->lv_block_exception[is].rsector_new);
+	}
+}
+
+
+/*
+ * writes a COW exception table sector to disk (HM)
+ *
+ */
+
+int lvm_write_COW_table_block(vg_t * vg,
+			      lv_t * lv_snap)
+{
+	int blksize_snap;
+	int end_of_table;
+	int idx = lv_snap->lv_remap_ptr, idx_COW_table;
+	int nr_pages_tmp;
+	int length_tmp;
+	ulong snap_pe_start, COW_table_sector_offset,
+	      COW_entries_per_pe, COW_chunks_per_pe, COW_entries_per_block;
+	ulong blocks[1];
+	const char * reason;
+	kdev_t snap_phys_dev;
+	struct kiobuf * iobuf = lv_snap->lv_iobuf;
+	struct page * page_tmp;
+	lv_COW_table_disk_t * lv_COW_table =
+	   ( lv_COW_table_disk_t *) page_address(lv_snap->lv_COW_table_page);
+
+	idx--;
+
+	COW_chunks_per_pe = LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv_snap);
+	COW_entries_per_pe = LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg, lv_snap);
+
+	/* get physical addresse of destination chunk */
+	snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
+	snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
+
+	blksize_snap = lvm_get_blksize(snap_phys_dev);
+
+        COW_entries_per_block = blksize_snap / sizeof(lv_COW_table_disk_t);
+        idx_COW_table = idx % COW_entries_per_pe % COW_entries_per_block;
+
+	if ( idx_COW_table == 0) memset(lv_COW_table, 0, blksize_snap);
+
+	/* sector offset into the on disk COW table */
+	COW_table_sector_offset = (idx % COW_entries_per_pe) / (SECTOR_SIZE / sizeof(lv_COW_table_disk_t));
+
+        /* COW table block to write next */
+	blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10);
+
+	/* store new COW_table entry */
+	lv_COW_table[idx_COW_table].pv_org_number = LVM_TO_DISK64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[idx].rdev_org));
+	lv_COW_table[idx_COW_table].pv_org_rsector = LVM_TO_DISK64(lv_snap->lv_block_exception[idx].rsector_org);
+	lv_COW_table[idx_COW_table].pv_snap_number = LVM_TO_DISK64(lvm_pv_get_number(vg, snap_phys_dev));
+	lv_COW_table[idx_COW_table].pv_snap_rsector = LVM_TO_DISK64(lv_snap->lv_block_exception[idx].rsector_new);
+
+	length_tmp = iobuf->length;
+	iobuf->length = blksize_snap;
+	page_tmp = iobuf->maplist[0];
+        iobuf->maplist[0] = lv_snap->lv_COW_table_page;
+	nr_pages_tmp = iobuf->nr_pages;
+	iobuf->nr_pages = 1;
+
+	if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
+		       blocks, blksize_snap) != blksize_snap)
+		goto fail_raw_write;
+
+
+	/* initialization of next COW exception table block with zeroes */
+	end_of_table = idx % COW_entries_per_pe == COW_entries_per_pe - 1;
+	if (idx_COW_table % COW_entries_per_block == COW_entries_per_block - 1 || end_of_table)
+	{
+		/* don't go beyond the end */
+		if (idx + 1 >= lv_snap->lv_remap_end) goto good_out;
+
+		memset(lv_COW_table, 0, blksize_snap);
+
+		if (end_of_table)
+		{
+			idx++;
+			snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
+			snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
+			blksize_snap = lvm_get_blksize(snap_phys_dev);
+			blocks[0] = snap_pe_start >> (blksize_snap >> 10);
+		} else blocks[0]++;
+
+		if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
+			       blocks, blksize_snap) != blksize_snap)
+			goto fail_raw_write;
+	}
+
+
+ good_out:
+	iobuf->length = length_tmp;
+        iobuf->maplist[0] = page_tmp;
+	iobuf->nr_pages = nr_pages_tmp;
+	return 0;
+
+	/* slow path */
+ out:
+	lvm_drop_snapshot(lv_snap, reason);
+	return 1;
+
+ fail_raw_write:
+	reason = "write error";
+	goto out;
+}
+
 /*
  * copy on write handler for one snapshot logical volume
  *
@@ -200,9 +340,8 @@
 		     lv_t * lv_snap)
 {
 	const char * reason;
-	unsigned long org_start, snap_start, virt_start, pe_off;
+	unsigned long org_start, snap_start, snap_phys_dev, virt_start, pe_off;
 	int idx = lv_snap->lv_remap_ptr, chunk_size = lv_snap->lv_chunk_size;
-	kdev_t snap_phys_dev;
 	struct kiobuf * iobuf;
 	unsigned long blocks[KIO_MAX_SECTORS];
 	int blksize_snap, blksize_org, min_blksize, max_blksize;
@@ -238,8 +377,8 @@
 
 	iobuf = lv_snap->lv_iobuf;
 
-	blksize_org = get_blksize(org_phys_dev);
-	blksize_snap = get_blksize(snap_phys_dev);
+	blksize_org = lvm_get_blksize(org_phys_dev);
+	blksize_snap = lvm_get_blksize(snap_phys_dev);
 	max_blksize = max(blksize_org, blksize_snap);
 	min_blksize = min(blksize_org, blksize_snap);
 	max_sectors = KIO_MAX_SECTORS * (min_blksize>>9);
@@ -268,7 +407,7 @@
 	}
 
 #ifdef DEBUG_SNAPSHOT
-	/* invalidate the logcial snapshot buffer cache */
+	/* invalidate the logical snapshot buffer cache */
 	invalidate_snap_cache(virt_start, lv_snap->lv_chunk_size,
 			      lv_snap->lv_dev);
 #endif
@@ -277,15 +416,20 @@
 	   so update the execption table */
 	lv_snap->lv_block_exception[idx].rdev_org = org_phys_dev;
 	lv_snap->lv_block_exception[idx].rsector_org = org_start;
+
 	lvm_hash_link(lv_snap->lv_block_exception + idx,
 		      org_phys_dev, org_start, lv_snap);
 	lv_snap->lv_remap_ptr = idx + 1;
-	return 1;
+	if (lv_snap->lv_snapshot_use_rate > 0) {
+		if (lv_snap->lv_remap_ptr * 100 / lv_snap->lv_remap_end >= lv_snap->lv_snapshot_use_rate)
+			wake_up_interruptible(&lv_snap->lv_snapshot_wait);
+	}
+	return 0;
 
 	/* slow path */
  out:
 	lvm_drop_snapshot(lv_snap, reason);
-	return -1;
+	return 1;
 
  fail_out_of_space:
 	reason = "out of space";
@@ -301,7 +445,7 @@
 	goto out;
 }
 
-static int lvm_snapshot_alloc_iobuf_pages(struct kiobuf * iobuf, int sectors)
+int lvm_snapshot_alloc_iobuf_pages(struct kiobuf * iobuf, int sectors)
 {
 	int bytes, nr_pages, err, i;
 
@@ -312,33 +456,17 @@
 		goto out;
 
 	err = -ENOMEM;
-	iobuf->locked = 1;
+	iobuf->locked = 0;
 	iobuf->nr_pages = 0;
 	for (i = 0; i < nr_pages; i++)
 	{
 		struct page * page;
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,27)
 		page = alloc_page(GFP_KERNEL);
 		if (!page)
 			goto out;
-#else
-		{
-			unsigned long addr = __get_free_page(GFP_USER);
-			if (!addr)
-				goto out;
-			iobuf->pagelist[i] = addr;
-			page = virt_to_page(addr);
-		}
-#endif
 
 		iobuf->maplist[i] = page;
-		/* the only point to lock the page here is to be allowed
-		   to share unmap_kiobuf() in the fail-path */
-#ifndef LockPage
-#define LockPage(map) set_bit(PG_locked, &(map)->flags)
-#endif
-		LockPage(page);
 		iobuf->nr_pages++;
 	}
 	iobuf->offset = 0;
@@ -360,7 +488,7 @@
 	return mem;
 }
 
-static int lvm_snapshot_alloc_hash_table(lv_t * lv)
+int lvm_snapshot_alloc_hash_table(lv_t * lv)
 {
 	int err;
 	unsigned long buckets, max_buckets, size;
@@ -380,6 +508,7 @@
 
 	if (!hash)
 		goto out;
+	lv->lv_snapshot_hash_table_size = size;
 
 	lv->lv_snapshot_hash_mask = buckets-1;
 	while (buckets--)
@@ -407,12 +536,20 @@
 	err = lvm_snapshot_alloc_hash_table(lv_snap);
 	if (err)
 		goto out_free_kiovec;
+
+
+		lv_snap->lv_COW_table_page = alloc_page(GFP_KERNEL);
+		if (!lv_snap->lv_COW_table_page)
+			goto out_free_kiovec;
+
  out:
 	return err;
 
  out_free_kiovec:
 	unmap_kiobuf(lv_snap->lv_iobuf);
 	free_kiovec(1, &lv_snap->lv_iobuf);
+	vfree(lv_snap->lv_snapshot_hash_table);
+	lv_snap->lv_snapshot_hash_table = NULL;
 	goto out;
 }
 
@@ -427,10 +564,17 @@
 	{
 		vfree(lv->lv_snapshot_hash_table);
 		lv->lv_snapshot_hash_table = NULL;
+		lv->lv_snapshot_hash_table_size = 0;
 	}
 	if (lv->lv_iobuf)
 	{
+		unmap_kiobuf(lv->lv_iobuf);
 		free_kiovec(1, &lv->lv_iobuf);
 		lv->lv_iobuf = NULL;
+	}
+	if (lv->lv_COW_table_page)
+	{
+		free_page((ulong)lv->lv_COW_table_page);
+		lv->lv_COW_table_page = NULL;
 	}
 }

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)