Re: Patch??: linux-2.5.20/fs/bio.c - ll_rw_kio could generate bio's bigger than queue could handle

Adam J. Richter (adam@yggdrasil.com)
Thu, 6 Jun 2002 16:26:02 -0700


Here is version 4 of my changes to fs/bio.c (ll_rw_kio) and
fs/mpage.c. Andrew, I believe it this accomodates all of your
requests. Please let me know if I missed anything. If it looks
good, I would apperciate your recommendation on how to proceed with
getting this into Linus's tree.

The one new behavior in this version is that bio_max_iovecs()
will treat q->max_sectors == 0 as meaning that there is no limit.
This should accomodate any drivers that set max_sectors == 0.

We might want to keep the max_sectors == 0 behavior anyhow.
It is useful to have a way for drivers to indicate that they
don't care about the total number of sectors, rather than require
drivers that have no inherent limit to pick something arbitrarily.

Adam J. Richter __ ______________ 575 Oroville Road
adam@yggdrasil.com \ / Milpitas, California 95035
+1 408 309-6081 | g g d r a s i l United States of America
"Free Software For The Rest Of Us."

--- linux-2.5.20/include/linux/bio.h 2002-06-02 18:44:52.000000000 -0700
+++ linux/include/linux/bio.h 2002-06-06 15:52:29.000000000 -0700
@@ -34,11 +34,8 @@
#define BIO_BUG_ON
#endif

-#define BIO_MAX_SECTORS 128
-#define BIO_MAX_SIZE (BIO_MAX_SECTORS << 9)
-
/*
* was unsigned short, but we might as well be ready for > 64kB I/O pages
*/
--- linux-2.5.20/include/linux/blkdev.h 2002-06-02 18:44:44.000000000 -0700
+++ linux/include/linux/blkdev.h 2002-06-06 15:52:50.000000000 -0700
@@ -418,5 +428,7 @@
page_cache_release(p.v);
}

+extern int bio_max_iovecs(request_queue_t *q, int *iovec_size);
+
#endif
--- linux-2.5.20/fs/bio.c 2002-06-02 18:44:40.000000000 -0700
+++ linux/fs/bio.c 2002-06-06 16:17:03.000000000 -0700
@@ -50,8 +50,6 @@
};
#undef BV

-#define BIO_MAX_PAGES (bvec_array[BIOVEC_NR_POOLS - 1].size)
-
static void *slab_pool_alloc(int gfp_mask, void *data)
{
return kmem_cache_alloc(data, gfp_mask);
@@ -316,6 +314,22 @@
return NULL;
}

+int bio_max_iovecs(request_queue_t *q, int *iovec_size)
+{
+ unsigned max_iovecs = min(q->max_phys_segments, q->max_hw_segments);
+
+ if (q->max_sectors != 0) {
+ unsigned int max_bytes = q->max_sectors << 9;
+ if (*iovec_size > max_bytes) {
+ *iovec_size = max_bytes;
+ return 1;
+ }
+ max_iovecs = min(max_iovecs, max_bytes / *iovec_size);
+ }
+
+ return max_iovecs;
+}
+
static void bio_end_io_kio(struct bio *bio)
{
struct kiobuf *kio = (struct kiobuf *) bio->bi_private;
@@ -338,10 +352,11 @@
**/
void ll_rw_kio(int rw, struct kiobuf *kio, struct block_device *bdev, sector_t sector)
{
- int i, offset, size, err, map_i, total_nr_pages, nr_pages;
+ int offset, size, err, map_i, total_nr_pages, nr_bvecs;
struct bio_vec *bvec;
struct bio *bio;
kdev_t dev = to_kdev_t(bdev->bd_dev);
+ int bytes_per_iovec, max_bvecs;

err = 0;
if ((rw & WRITE) && is_read_only(dev)) {
@@ -367,17 +382,20 @@

map_i = 0;

+ bytes_per_iovec = PAGE_SIZE;
+ max_bvecs = bio_max_iovecs(bdev->bd_queue, &bytes_per_iovec);
+
next_chunk:
- nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - 9);
- if (nr_pages > total_nr_pages)
- nr_pages = total_nr_pages;
+ nr_bvecs = max_bvecs;
+ if (nr_bvecs > total_nr_pages)
+ nr_bvecs = total_nr_pages;

atomic_inc(&kio->io_count);

/*
* allocate bio and do initial setup
*/
- if ((bio = bio_alloc(GFP_NOIO, nr_pages)) == NULL) {
+ if ((bio = bio_alloc(GFP_NOIO, nr_bvecs)) == NULL) {
err = -ENOMEM;
goto out;
}
@@ -389,15 +407,17 @@
bio->bi_private = kio;

bvec = bio->bi_io_vec;
- for (i = 0; i < nr_pages; i++, bvec++, map_i++) {
+ for (;total_nr_pages > 0; bvec++, nr_bvecs--) {
int nbytes = PAGE_SIZE - offset;

if (nbytes > size)
nbytes = size;
+ if (nbytes > bytes_per_iovec)
+ nbytes = bytes_per_iovec;

BUG_ON(kio->maplist[map_i] == NULL);

- if (bio->bi_size + nbytes > (BIO_MAX_SECTORS << 9))
+ if (nr_bvecs == 0)
goto queue_io;

bio->bi_vcnt++;
@@ -407,14 +427,14 @@
bvec->bv_len = nbytes;
bvec->bv_offset = offset;

- /*
- * kiobuf only has an offset into the first page
- */
- offset = 0;
+ offset = (offset + nbytes) & PAGE_MASK;
+ if (offset == 0) {
+ total_nr_pages--;
+ map_i++;
+ }

sector += nbytes >> 9;
size -= nbytes;
- total_nr_pages--;
kio->offset += nbytes;
}

--- linux-2.5.20/fs/mpage.c 2002-06-06 12:30:14.000000000 -0700
+++ linux/fs/mpage.c 2002-06-06 15:44:24.000000000 -0700
@@ -21,12 +21,6 @@
#include <linux/mpage.h>

/*
- * The largest-sized BIO which this code will assemble, in bytes. Set this
- * to PAGE_CACHE_SIZE if your drivers are broken.
- */
-#define MPAGE_BIO_MAX_SIZE BIO_MAX_SIZE
-
-/*
* I/O completion handler for multipage BIOs.
*
* The mpage code never puts partial pages into a BIO (except for end-of-file).
@@ -220,9 +214,13 @@
bio = mpage_bio_submit(READ, bio);

if (bio == NULL) {
- unsigned nr_bvecs = MPAGE_BIO_MAX_SIZE / PAGE_CACHE_SIZE;
+ int bvec_size = PAGE_SIZE;
+ unsigned nr_bvecs = bio_max_iovecs(bdev->bd_queue,&bvec_size);

- if (nr_bvecs > nr_pages)
+ if (bvec_size != PAGE_SIZE)
+ goto confused;
+
+ if (nr_bvecs > nr_pages)
nr_bvecs = nr_pages;
bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
nr_bvecs, GFP_KERNEL);
@@ -422,7 +420,11 @@
bio = mpage_bio_submit(WRITE, bio);

if (bio == NULL) {
- unsigned nr_bvecs = MPAGE_BIO_MAX_SIZE / PAGE_CACHE_SIZE;
+ int bvec_size = PAGE_SIZE;
+ unsigned nr_bvecs = bio_max_iovecs(bdev->bd_queue,&bvec_size);
+
+ if (bvec_size != PAGE_SIZE)
+ goto confused;

bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
nr_bvecs, GFP_NOFS);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/