[PATCH] Mostly PAGE_SIZE IO for RAW for -ac kernel

Badari Pulavarty (pbadari@us.ibm.com)
Tue, 15 Jan 2002 17:02:50 -0800 (PST)


Alan,

Could you please consider this one for your next -ac kernel ?
My 2.4.17 version of the patch applied cleanly on 2.4.18-pre3-ac2.

If you want me to make the patch available for another -ac kernel
let me know. I will be happy to assist you.

Regards,
Badari

>
>
>
> On Mon, 14 Jan 2002, Badari Pulavarty wrote:
>
> >
> > Here is the final version of the patch for doing mostly 4K size IO
> > for RAW. (2.4.17). In this version, I incorporate all the code
> > review comments from Andrea. Thanks to Andrea.
> >
> > Marcelo, would you please consider this patch for 2.4.18-pre4 ?
> > Please let me know, if you want me to make the patch for 2.4.18-pre3.
>
> I want to make sure the drivers able to do 4K IO really can do that with
> reliability.
>
> I think we can test that on -ac kernels.
>
>

diff -Nur -X dontdiff linux/drivers/block/ll_rw_blk.c linux-2417newvary/drivers/block/ll_rw_blk.c
--- linux/drivers/block/ll_rw_blk.c Mon Oct 29 12:11:17 2001
+++ linux-2417newvary/drivers/block/ll_rw_blk.c Mon Jan 14 20:30:01 2002
@@ -118,6 +118,13 @@
int * max_sectors[MAX_BLKDEV];

/*
+ * blkdev_varyio indicates if variable size IO can be done on a device.
+ *
+ * Currently used for doing variable size IO on RAW devices.
+ */
+char * blkdev_varyio[MAX_BLKDEV];
+
+/*
* How many reqeusts do we allocate per queue,
* and how many do we "batch" on freeing them?
*/
@@ -902,6 +909,38 @@
*/
bh->b_rdev = bh->b_dev;
bh->b_rsector = bh->b_blocknr * count;
+
+ generic_make_request(rw, bh);
+
+ switch (rw) {
+ case WRITE:
+ kstat.pgpgout += count;
+ break;
+ default:
+ kstat.pgpgin += count;
+ break;
+ }
+}
+
+/*
+ * submit_bh_blknr() - same as submit_bh() except that b_rsector is
+ * set to b_blocknr. Used for RAW VARY.
+ */
+void submit_bh_blknr(int rw, struct buffer_head * bh)
+{
+ int count = bh->b_size >> 9;
+
+ if (!test_bit(BH_Lock, &bh->b_state))
+ BUG();
+
+ set_bit(BH_Req, &bh->b_state);
+
+ /*
+ * First step, 'identity mapping' - RAID or LVM might
+ * further remap this.
+ */
+ bh->b_rdev = bh->b_dev;
+ bh->b_rsector = bh->b_blocknr;

generic_make_request(rw, bh);

diff -Nur -X dontdiff linux/drivers/char/raw.c linux-2417newvary/drivers/char/raw.c
--- linux/drivers/char/raw.c Sat Sep 22 20:35:43 2001
+++ linux-2417newvary/drivers/char/raw.c Mon Jan 14 17:58:42 2002
@@ -23,6 +23,7 @@
struct block_device *binding;
int inuse, sector_size, sector_bits;
struct semaphore mutex;
+ int can_do_vary;
} raw_device_data_t;

static raw_device_data_t raw_devices[256];
@@ -137,6 +138,7 @@
for (sector_bits = 0; !(sector_size & 1); )
sector_size>>=1, sector_bits++;
raw_devices[minor].sector_bits = sector_bits;
+ filp->f_iobuf->dovary = raw_devices[minor].can_do_vary;

out:
up(&raw_devices[minor].mutex);
@@ -225,6 +227,8 @@
bdput(raw_devices[minor].binding);
raw_devices[minor].binding =
bdget(kdev_t_to_nr(MKDEV(rq.block_major, rq.block_minor)));
+ raw_devices[minor].can_do_vary =
+ get_blkdev_varyio(rq.block_major, rq.block_minor);
up(&raw_devices[minor].mutex);
} else {
struct block_device *bdev;
@@ -301,6 +305,7 @@
if (err)
goto out;
new_iobuf = 1;
+ iobuf->dovary = raw_devices[minor].can_do_vary;
}

dev = to_kdev_t(raw_devices[minor].binding->bd_dev);
diff -Nur -X dontdiff linux/drivers/scsi/aic7xxx/aic7xxx_linux_host.h linux-2417newvary/drivers/scsi/aic7xxx/aic7xxx_linux_host.h
--- linux/drivers/scsi/aic7xxx/aic7xxx_linux_host.h Thu Oct 25 13:53:49 2001
+++ linux-2417newvary/drivers/scsi/aic7xxx/aic7xxx_linux_host.h Fri Jan 11 21:39:18 2002
@@ -89,7 +89,8 @@
present: 0, /* number of 7xxx's present */\
unchecked_isa_dma: 0, /* no memory DMA restrictions */\
use_clustering: ENABLE_CLUSTERING, \
- use_new_eh_code: 1 \
+ use_new_eh_code: 1, \
+ can_do_varyio: 1 \
}

#endif /* _AIC7XXX_LINUX_HOST_H_ */
diff -Nur -X dontdiff linux/drivers/scsi/hosts.h linux-2417newvary/drivers/scsi/hosts.h
--- linux/drivers/scsi/hosts.h Thu Nov 22 11:49:15 2001
+++ linux-2417newvary/drivers/scsi/hosts.h Mon Jan 14 20:27:43 2002
@@ -292,6 +292,11 @@
unsigned emulated:1;

/*
+ * True for drivers which can handle variable length IO
+ */
+ unsigned can_do_varyio:1;
+
+ /*
* Name of proc directory
*/
char *proc_name;
diff -Nur -X dontdiff linux/drivers/scsi/qlogicisp.h linux-2417newvary/drivers/scsi/qlogicisp.h
--- linux/drivers/scsi/qlogicisp.h Fri Nov 12 04:40:46 1999
+++ linux-2417newvary/drivers/scsi/qlogicisp.h Thu Jan 10 22:51:55 2002
@@ -84,7 +84,8 @@
cmd_per_lun: 1, \
present: 0, \
unchecked_isa_dma: 0, \
- use_clustering: DISABLE_CLUSTERING \
+ use_clustering: DISABLE_CLUSTERING, \
+ can_do_varyio: 1 \
}

#endif /* _QLOGICISP_H */
diff -Nur -X dontdiff linux/drivers/scsi/sd.c linux-2417newvary/drivers/scsi/sd.c
--- linux/drivers/scsi/sd.c Fri Nov 9 14:05:06 2001
+++ linux-2417newvary/drivers/scsi/sd.c Mon Jan 14 20:26:30 2002
@@ -91,6 +91,7 @@
static int *sd_blocksizes;
static int *sd_hardsizes; /* Hardware sector size */
static int *sd_max_sectors;
+static char *sd_varyio;

static int check_scsidisk_media_change(kdev_t);
static int fop_revalidate_scsidisk(kdev_t);
@@ -1110,6 +1111,12 @@
if (!sd_max_sectors)
goto cleanup_max_sectors;

+ sd_varyio = kmalloc((sd_template.dev_max << 4), GFP_ATOMIC);
+ if (!sd_varyio)
+ goto cleanup_varyio;
+
+ memset(sd_varyio, 0, (sd_template.dev_max << 4));
+
for (i = 0; i < sd_template.dev_max << 4; i++) {
sd_blocksizes[i] = 1024;
sd_hardsizes[i] = 512;
@@ -1179,6 +1186,8 @@
cleanup_sd_gendisks:
kfree(sd);
cleanup_sd:
+ kfree(sd_varyio);
+cleanup_varyio:
kfree(sd_max_sectors);
cleanup_max_sectors:
kfree(sd_hardsizes);
@@ -1241,6 +1250,8 @@
return 1;
}

+#define SD_DISK_MAJOR(i) SD_MAJOR((i) >> 4)
+
static int sd_attach(Scsi_Device * SDp)
{
unsigned int devnum;
@@ -1274,6 +1285,14 @@
printk("Attached scsi %sdisk %s at scsi%d, channel %d, id %d, lun %d\n",
SDp->removable ? "removable " : "",
nbuff, SDp->host->host_no, SDp->channel, SDp->id, SDp->lun);
+
+ if (SDp->host->hostt->can_do_varyio) {
+ if (blkdev_varyio[SD_DISK_MAJOR(i)] == NULL) {
+ blkdev_varyio[SD_DISK_MAJOR(i)] =
+ sd_varyio + ((i / SCSI_DISKS_PER_MAJOR) >> 8);
+ }
+ memset(blkdev_varyio[SD_DISK_MAJOR(i)] + (devnum << 4), 1, 16);
+ }
return 0;
}

@@ -1399,6 +1418,7 @@
kfree(sd_sizes);
kfree(sd_blocksizes);
kfree(sd_hardsizes);
+ kfree(sd_varyio);
kfree((char *) sd);
}
for (i = 0; i < N_USED_SD_MAJORS; i++) {
diff -Nur -X dontdiff linux/fs/buffer.c linux-2417newvary/fs/buffer.c
--- linux/fs/buffer.c Fri Jan 11 18:11:37 2002
+++ linux-2417newvary/fs/buffer.c Mon Jan 14 20:23:20 2002
@@ -2071,11 +2071,11 @@
err = 0;

for (i = nr; --i >= 0; ) {
- iosize += size;
tmp = bh[i];
if (buffer_locked(tmp)) {
wait_on_buffer(tmp);
}
+ iosize += tmp->b_size;

if (!buffer_uptodate(tmp)) {
/* We are traversing bh'es in reverse order so
@@ -2118,6 +2118,7 @@
struct kiobuf * iobuf = NULL;
struct page * map;
struct buffer_head *tmp, **bhs = NULL;
+ int iosize = size;

if (!nr)
return 0;
@@ -2154,7 +2155,7 @@
}

while (length > 0) {
- blocknr = b[bufind++];
+ blocknr = b[bufind];
if (blocknr == -1UL) {
if (rw == READ) {
/* there was an hole in the filesystem */
@@ -2167,9 +2168,15 @@
} else
BUG();
}
+ if (iobuf->dovary) {
+ iosize = RAWIO_BLOCKSIZE - offset;
+ if (iosize > length)
+ iosize = length;
+ }
+ bufind += (iosize/size);
tmp = bhs[bhind++];

- tmp->b_size = size;
+ tmp->b_size = iosize;
set_bh_page(tmp, map, offset);
tmp->b_this_page = tmp;

@@ -2185,7 +2192,10 @@
set_bit(BH_Uptodate, &tmp->b_state);

atomic_inc(&iobuf->io_count);
- submit_bh(rw, tmp);
+ if (iobuf->dovary)
+ submit_bh_blknr(rw, tmp);
+ else
+ submit_bh(rw, tmp);
/*
* Wait for IO if we have got too much
*/
@@ -2200,8 +2210,8 @@
}

skip_block:
- length -= size;
- offset += size;
+ length -= iosize;
+ offset += iosize;

if (offset >= PAGE_SIZE) {
offset = 0;
diff -Nur -X dontdiff linux/include/linux/blkdev.h linux-2417newvary/include/linux/blkdev.h
--- linux/include/linux/blkdev.h Mon Nov 26 05:29:17 2001
+++ linux-2417newvary/include/linux/blkdev.h Mon Jan 14 20:26:02 2002
@@ -175,6 +175,8 @@

extern int * max_segments[MAX_BLKDEV];

+extern char * blkdev_varyio[MAX_BLKDEV];
+
#define MAX_SEGMENTS 128
#define MAX_SECTORS 255

@@ -228,4 +230,12 @@
return retval;
}

+static inline int get_blkdev_varyio(int major, int minor)
+{
+ int retval = 0;
+ if (blkdev_varyio[major]) {
+ retval = blkdev_varyio[major][minor];
+ }
+ return retval;
+}
#endif
diff -Nur -X dontdiff linux/include/linux/fs.h linux-2417newvary/include/linux/fs.h
--- linux/include/linux/fs.h Fri Jan 11 18:11:38 2002
+++ linux-2417newvary/include/linux/fs.h Mon Jan 14 20:24:00 2002
@@ -1350,6 +1350,7 @@
extern struct buffer_head * getblk(kdev_t, int, int);
extern void ll_rw_block(int, int, struct buffer_head * bh[]);
extern void submit_bh(int, struct buffer_head *);
+extern void submit_bh_blknr(int, struct buffer_head *);
extern int is_read_only(kdev_t);
extern void __brelse(struct buffer_head *);
static inline void brelse(struct buffer_head *buf)
diff -Nur -X dontdiff linux/include/linux/iobuf.h linux-2417newvary/include/linux/iobuf.h
--- linux/include/linux/iobuf.h Thu Nov 22 11:46:26 2001
+++ linux-2417newvary/include/linux/iobuf.h Mon Jan 14 20:26:02 2002
@@ -28,6 +28,8 @@
#define KIO_STATIC_PAGES (KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1)
#define KIO_MAX_SECTORS (KIO_MAX_ATOMIC_IO * 2)

+#define RAWIO_BLOCKSIZE 4096
+
/* The main kiobuf struct used for all our IO! */

struct kiobuf
@@ -44,7 +46,8 @@

struct page ** maplist;

- unsigned int locked : 1; /* If set, pages has been locked */
+ unsigned int locked : 1, /* If set, pages has been locked */
+ dovary : 1; /* If set, do variable size IO */

/* Always embed enough struct pages for atomic IO */
struct page * map_array[KIO_STATIC_PAGES];
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/