[PATCH] Mostly 4K IO for RAW (BUG FIX)

Badari Pulavarty (pbadari@us.ibm.com)
Wed, 16 Jan 2002 11:19:07 -0800 (PST)


I found few problems with my RAW VARY patch and I fixed them.

1) My patch was broken for architectures where PAGE_SIZE is not 4K.

In brw_kiovec() I had following:

iosize = RAWIO_BLOCKSIZE - offset;

where RAWIO_BLOCKSIZE is defined to 4K.

Now it does following:

Do 512 byte buffer heads till you get aligned to PAGE_SIZE
(offset == 0). Then start issuing 4K buffer heads. In this approach
it will generate more buffer heads, but it is safe on all archs.

2) Further restricted my patch to work only with raw devices for which
corresponding block device is not mounted. If the block device is
mounted, do not do RAW VARY. Use block_size of the filesystem
(as it currently is) for IO.

Here is the latest patch for 2.4.17.

Thanks,
Badari

diff -Nur -X dontdiff linux/drivers/block/ll_rw_blk.c linux.new/drivers/block/ll_rw_blk.c
--- linux/drivers/block/ll_rw_blk.c Mon Oct 29 12:11:17 2001
+++ linux.new/drivers/block/ll_rw_blk.c Wed Jan 16 17:55:28 2002
@@ -118,6 +118,13 @@
int * max_sectors[MAX_BLKDEV];

/*
+ * blkdev_varyio indicates if variable size IO can be done on a device.
+ *
+ * Currently used for doing variable size IO on RAW devices.
+ */
+char * blkdev_varyio[MAX_BLKDEV];
+
+/*
* How many reqeusts do we allocate per queue,
* and how many do we "batch" on freeing them?
*/
@@ -902,6 +909,38 @@
*/
bh->b_rdev = bh->b_dev;
bh->b_rsector = bh->b_blocknr * count;
+
+ generic_make_request(rw, bh);
+
+ switch (rw) {
+ case WRITE:
+ kstat.pgpgout += count;
+ break;
+ default:
+ kstat.pgpgin += count;
+ break;
+ }
+}
+
+/*
+ * submit_bh_blknr() - same as submit_bh() except that b_rsector is
+ * set to b_blocknr. Used for RAW VARY.
+ */
+void submit_bh_blknr(int rw, struct buffer_head * bh)
+{
+ int count = bh->b_size >> 9;
+
+ if (!test_bit(BH_Lock, &bh->b_state))
+ BUG();
+
+ set_bit(BH_Req, &bh->b_state);
+
+ /*
+ * First step, 'identity mapping' - RAID or LVM might
+ * further remap this.
+ */
+ bh->b_rdev = bh->b_dev;
+ bh->b_rsector = bh->b_blocknr;

generic_make_request(rw, bh);

diff -Nur -X dontdiff linux/drivers/char/raw.c linux.new/drivers/char/raw.c
--- linux/drivers/char/raw.c Sat Sep 22 20:35:43 2001
+++ linux.new/drivers/char/raw.c Wed Jan 16 17:55:28 2002
@@ -23,6 +23,7 @@
struct block_device *binding;
int inuse, sector_size, sector_bits;
struct semaphore mutex;
+ int can_do_vary;
} raw_device_data_t;

static raw_device_data_t raw_devices[256];
@@ -117,6 +118,8 @@
if (raw_devices[minor].inuse++)
goto out;

+ raw_devices[minor].can_do_vary =
+ get_blkdev_varyio(MAJOR(rdev), MINOR(rdev));
/*
* Don't interfere with mounted devices: we cannot safely set
* the blocksize on a device which is already mounted.
@@ -126,6 +129,7 @@
if (is_mounted(rdev)) {
if (blksize_size[MAJOR(rdev)])
sector_size = blksize_size[MAJOR(rdev)][MINOR(rdev)];
+ raw_devices[minor].can_do_vary = 0;
} else {
if (hardsect_size[MAJOR(rdev)])
sector_size = hardsect_size[MAJOR(rdev)][MINOR(rdev)];
@@ -133,6 +137,7 @@

set_blocksize(rdev, sector_size);
raw_devices[minor].sector_size = sector_size;
+ filp->f_iobuf->dovary = raw_devices[minor].can_do_vary;

for (sector_bits = 0; !(sector_size & 1); )
sector_size>>=1, sector_bits++;
@@ -301,6 +306,7 @@
if (err)
goto out;
new_iobuf = 1;
+ iobuf->dovary = raw_devices[minor].can_do_vary;
}

dev = to_kdev_t(raw_devices[minor].binding->bd_dev);
diff -Nur -X dontdiff linux/drivers/scsi/aic7xxx/aic7xxx_linux_host.h linux.new/drivers/scsi/aic7xxx/aic7xxx_linux_host.h
--- linux/drivers/scsi/aic7xxx/aic7xxx_linux_host.h Thu Oct 25 13:53:49 2001
+++ linux.new/drivers/scsi/aic7xxx/aic7xxx_linux_host.h Wed Jan 16 17:55:28 2002
@@ -89,7 +89,8 @@
present: 0, /* number of 7xxx's present */\
unchecked_isa_dma: 0, /* no memory DMA restrictions */\
use_clustering: ENABLE_CLUSTERING, \
- use_new_eh_code: 1 \
+ use_new_eh_code: 1, \
+ can_do_varyio: 1 \
}

#endif /* _AIC7XXX_LINUX_HOST_H_ */
diff -Nur -X dontdiff linux/drivers/scsi/hosts.h linux.new/drivers/scsi/hosts.h
--- linux/drivers/scsi/hosts.h Thu Nov 22 11:49:15 2001
+++ linux.new/drivers/scsi/hosts.h Wed Jan 16 17:55:28 2002
@@ -292,6 +292,11 @@
unsigned emulated:1;

/*
+ * True for drivers which can handle variable length IO
+ */
+ unsigned can_do_varyio:1;
+
+ /*
* Name of proc directory
*/
char *proc_name;
diff -Nur -X dontdiff linux/drivers/scsi/qlogicisp.h linux.new/drivers/scsi/qlogicisp.h
--- linux/drivers/scsi/qlogicisp.h Fri Nov 12 04:40:46 1999
+++ linux.new/drivers/scsi/qlogicisp.h Wed Jan 16 17:55:28 2002
@@ -84,7 +84,8 @@
cmd_per_lun: 1, \
present: 0, \
unchecked_isa_dma: 0, \
- use_clustering: DISABLE_CLUSTERING \
+ use_clustering: DISABLE_CLUSTERING, \
+ can_do_varyio: 1 \
}

#endif /* _QLOGICISP_H */
diff -Nur -X dontdiff linux/drivers/scsi/sd.c linux.new/drivers/scsi/sd.c
--- linux/drivers/scsi/sd.c Fri Nov 9 14:05:06 2001
+++ linux.new/drivers/scsi/sd.c Wed Jan 16 17:55:28 2002
@@ -91,6 +91,7 @@
static int *sd_blocksizes;
static int *sd_hardsizes; /* Hardware sector size */
static int *sd_max_sectors;
+static char *sd_varyio;

static int check_scsidisk_media_change(kdev_t);
static int fop_revalidate_scsidisk(kdev_t);
@@ -1110,6 +1111,12 @@
if (!sd_max_sectors)
goto cleanup_max_sectors;

+ sd_varyio = kmalloc((sd_template.dev_max << 4), GFP_ATOMIC);
+ if (!sd_varyio)
+ goto cleanup_varyio;
+
+ memset(sd_varyio, 0, (sd_template.dev_max << 4));
+
for (i = 0; i < sd_template.dev_max << 4; i++) {
sd_blocksizes[i] = 1024;
sd_hardsizes[i] = 512;
@@ -1179,6 +1186,8 @@
cleanup_sd_gendisks:
kfree(sd);
cleanup_sd:
+ kfree(sd_varyio);
+cleanup_varyio:
kfree(sd_max_sectors);
cleanup_max_sectors:
kfree(sd_hardsizes);
@@ -1241,6 +1250,8 @@
return 1;
}

+#define SD_DISK_MAJOR(i) SD_MAJOR((i) >> 4)
+
static int sd_attach(Scsi_Device * SDp)
{
unsigned int devnum;
@@ -1274,6 +1285,14 @@
printk("Attached scsi %sdisk %s at scsi%d, channel %d, id %d, lun %d\n",
SDp->removable ? "removable " : "",
nbuff, SDp->host->host_no, SDp->channel, SDp->id, SDp->lun);
+
+ if (SDp->host->hostt->can_do_varyio) {
+ if (blkdev_varyio[SD_DISK_MAJOR(i)] == NULL) {
+ blkdev_varyio[SD_DISK_MAJOR(i)] =
+ sd_varyio + ((i / SCSI_DISKS_PER_MAJOR) >> 8);
+ }
+ memset(blkdev_varyio[SD_DISK_MAJOR(i)] + (devnum << 4), 1, 16);
+ }
return 0;
}

@@ -1399,6 +1418,7 @@
kfree(sd_sizes);
kfree(sd_blocksizes);
kfree(sd_hardsizes);
+ kfree(sd_varyio);
kfree((char *) sd);
}
for (i = 0; i < N_USED_SD_MAJORS; i++) {
diff -Nur -X dontdiff linux/fs/buffer.c linux.new/fs/buffer.c
--- linux/fs/buffer.c Wed Jan 16 17:59:42 2002
+++ linux.new/fs/buffer.c Wed Jan 16 17:55:28 2002
@@ -2071,11 +2071,11 @@
err = 0;

for (i = nr; --i >= 0; ) {
- iosize += size;
tmp = bh[i];
if (buffer_locked(tmp)) {
wait_on_buffer(tmp);
}
+ iosize += tmp->b_size;

if (!buffer_uptodate(tmp)) {
/* We are traversing bh'es in reverse order so
@@ -2118,6 +2118,7 @@
struct kiobuf * iobuf = NULL;
struct page * map;
struct buffer_head *tmp, **bhs = NULL;
+ int iosize = size;

if (!nr)
return 0;
@@ -2154,7 +2155,7 @@
}

while (length > 0) {
- blocknr = b[bufind++];
+ blocknr = b[bufind];
if (blocknr == -1UL) {
if (rw == READ) {
/* there was an hole in the filesystem */
@@ -2167,9 +2168,15 @@
} else
BUG();
}
+ if (iobuf->dovary && (offset == 0)) {
+ iosize = RAWIO_BLOCKSIZE;
+ if (iosize > length)
+ iosize = length;
+ }
+ bufind += (iosize/size);
tmp = bhs[bhind++];

- tmp->b_size = size;
+ tmp->b_size = iosize;
set_bh_page(tmp, map, offset);
tmp->b_this_page = tmp;

@@ -2185,7 +2192,10 @@
set_bit(BH_Uptodate, &tmp->b_state);

atomic_inc(&iobuf->io_count);
- submit_bh(rw, tmp);
+ if (iobuf->dovary)
+ submit_bh_blknr(rw, tmp);
+ else
+ submit_bh(rw, tmp);
/*
* Wait for IO if we have got too much
*/
@@ -2200,8 +2210,8 @@
}

skip_block:
- length -= size;
- offset += size;
+ length -= iosize;
+ offset += iosize;

if (offset >= PAGE_SIZE) {
offset = 0;
diff -Nur -X dontdiff linux/include/linux/blkdev.h linux.new/include/linux/blkdev.h
--- linux/include/linux/blkdev.h Mon Nov 26 05:29:17 2001
+++ linux.new/include/linux/blkdev.h Wed Jan 16 17:55:29 2002
@@ -175,6 +175,8 @@

extern int * max_segments[MAX_BLKDEV];

+extern char * blkdev_varyio[MAX_BLKDEV];
+
#define MAX_SEGMENTS 128
#define MAX_SECTORS 255

@@ -228,4 +230,12 @@
return retval;
}

+static inline int get_blkdev_varyio(int major, int minor)
+{
+ int retval = 0;
+ if (blkdev_varyio[major]) {
+ retval = blkdev_varyio[major][minor];
+ }
+ return retval;
+}
#endif
diff -Nur -X dontdiff linux/include/linux/fs.h linux.new/include/linux/fs.h
--- linux/include/linux/fs.h Wed Jan 16 17:59:42 2002
+++ linux.new/include/linux/fs.h Wed Jan 16 17:55:29 2002
@@ -1350,6 +1350,7 @@
extern struct buffer_head * getblk(kdev_t, int, int);
extern void ll_rw_block(int, int, struct buffer_head * bh[]);
extern void submit_bh(int, struct buffer_head *);
+extern void submit_bh_blknr(int, struct buffer_head *);
extern int is_read_only(kdev_t);
extern void __brelse(struct buffer_head *);
static inline void brelse(struct buffer_head *buf)
diff -Nur -X dontdiff linux/include/linux/iobuf.h linux.new/include/linux/iobuf.h
--- linux/include/linux/iobuf.h Thu Nov 22 11:46:26 2001
+++ linux.new/include/linux/iobuf.h Wed Jan 16 17:55:29 2002
@@ -28,6 +28,8 @@
#define KIO_STATIC_PAGES (KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1)
#define KIO_MAX_SECTORS (KIO_MAX_ATOMIC_IO * 2)

+#define RAWIO_BLOCKSIZE 4096
+
/* The main kiobuf struct used for all our IO! */

struct kiobuf
@@ -44,7 +46,8 @@

struct page ** maplist;

- unsigned int locked : 1; /* If set, pages has been locked */
+ unsigned int locked : 1, /* If set, pages has been locked */
+ dovary : 1; /* If set, do variable size IO */

/* Always embed enough struct pages for atomic IO */
struct page * map_array[KIO_STATIC_PAGES];
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/