blk-merge.c 10.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
/*
 * Functions related to segment and merge handling
 */
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/scatterlist.h>

#include "blk.h"

void blk_recalc_rq_sectors(struct request *rq, int nsect)
{
14
	if (blk_fs_request(rq) || blk_discard_rq(rq)) {
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
		rq->hard_sector += nsect;
		rq->hard_nr_sectors -= nsect;

		/*
		 * Move the I/O submission pointers ahead if required.
		 */
		if ((rq->nr_sectors >= rq->hard_nr_sectors) &&
		    (rq->sector <= rq->hard_sector)) {
			rq->sector = rq->hard_sector;
			rq->nr_sectors = rq->hard_nr_sectors;
			rq->hard_cur_sectors = bio_cur_sectors(rq->bio);
			rq->current_nr_sectors = rq->hard_cur_sectors;
			rq->buffer = bio_data(rq->bio);
		}

		/*
		 * if total number of sectors is less than the first segment
		 * size, something has gone terribly wrong
		 */
		if (rq->nr_sectors < rq->current_nr_sectors) {
35
			printk(KERN_ERR "blk: request botched\n");
36
37
38
39
40
			rq->nr_sectors = rq->current_nr_sectors;
		}
	}
}

41
static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
42
					     struct bio *bio)
43
44
45
{
	unsigned int phys_size;
	struct bio_vec *bv, *bvprv = NULL;
46
47
	int cluster, i, high, highprv = 1;
	unsigned int seg_size, nr_phys_segs;
48
	struct bio *fbio, *bbio;
49

50
51
	if (!bio)
		return 0;
52

53
	fbio = bio;
54
	cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
Mikulas Patocka's avatar
Mikulas Patocka committed
55
56
	seg_size = 0;
	phys_size = nr_phys_segs = 0;
57
58
59
60
61
62
63
64
65
	for_each_bio(bio) {
		bio_for_each_segment(bv, bio, i) {
			/*
			 * the trick here is making sure that a high page is
			 * never considered part of another segment, since that
			 * might change with the bounce page.
			 */
			high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
			if (high || highprv)
66
				goto new_segment;
67
68
69
70
71
72
73
			if (cluster) {
				if (seg_size + bv->bv_len > q->max_segment_size)
					goto new_segment;
				if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
					goto new_segment;
				if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
					goto new_segment;
74

75
76
77
78
				seg_size += bv->bv_len;
				bvprv = bv;
				continue;
			}
79
new_segment:
80
81
82
			if (nr_phys_segs == 1 && seg_size >
			    fbio->bi_seg_front_size)
				fbio->bi_seg_front_size = seg_size;
83

84
85
86
87
88
			nr_phys_segs++;
			bvprv = bv;
			seg_size = bv->bv_len;
			highprv = high;
		}
89
		bbio = bio;
90
91
	}

92
93
94
95
	if (nr_phys_segs == 1 && seg_size > fbio->bi_seg_front_size)
		fbio->bi_seg_front_size = seg_size;
	if (seg_size > bbio->bi_seg_back_size)
		bbio->bi_seg_back_size = seg_size;
96
97
98
99
100
101

	return nr_phys_segs;
}

void blk_recalc_rq_segments(struct request *rq)
{
102
	rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio);
103
104
105
106
107
}

void blk_recount_segments(struct request_queue *q, struct bio *bio)
{
	struct bio *nxt = bio->bi_next;
108

109
	bio->bi_next = NULL;
110
	bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio);
111
112
113
114
115
116
117
118
	bio->bi_next = nxt;
	bio->bi_flags |= (1 << BIO_SEG_VALID);
}
EXPORT_SYMBOL(blk_recount_segments);

static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
				   struct bio *nxt)
{
119
	if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
120
121
		return 0;

122
123
	if (bio->bi_seg_back_size + nxt->bi_seg_front_size >
	    q->max_segment_size)
124
125
		return 0;

126
127
128
129
130
131
	if (!bio_has_data(bio))
		return 1;

	if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
		return 0;

132
	/*
133
	 * bio and nxt are contiguous in memory; check if the queue allows
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
	 * these two to be merged into one
	 */
	if (BIO_SEG_BOUNDARY(q, bio, nxt))
		return 1;

	return 0;
}

/*
 * map a request to scatterlist, return number of sg entries setup. Caller
 * must make sure sg can hold rq->nr_phys_segments entries
 */
int blk_rq_map_sg(struct request_queue *q, struct request *rq,
		  struct scatterlist *sglist)
{
	struct bio_vec *bvec, *bvprv;
	struct req_iterator iter;
	struct scatterlist *sg;
	int nsegs, cluster;

	nsegs = 0;
155
	cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199

	/*
	 * for each bio in rq
	 */
	bvprv = NULL;
	sg = NULL;
	rq_for_each_segment(bvec, rq, iter) {
		int nbytes = bvec->bv_len;

		if (bvprv && cluster) {
			if (sg->length + nbytes > q->max_segment_size)
				goto new_segment;

			if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
				goto new_segment;
			if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
				goto new_segment;

			sg->length += nbytes;
		} else {
new_segment:
			if (!sg)
				sg = sglist;
			else {
				/*
				 * If the driver previously mapped a shorter
				 * list, we could see a termination bit
				 * prematurely unless it fully inits the sg
				 * table on each mapping. We KNOW that there
				 * must be more entries here or the driver
				 * would be buggy, so force clear the
				 * termination bit to avoid doing a full
				 * sg_init_table() in drivers for each command.
				 */
				sg->page_link &= ~0x02;
				sg = sg_next(sg);
			}

			sg_set_page(sg, bvec->bv_page, nbytes, bvec->bv_offset);
			nsegs++;
		}
		bvprv = bvec;
	} /* segments in rq */

200
201
202
203
204
205
206
207
208

	if (unlikely(rq->cmd_flags & REQ_COPY_USER) &&
	    (rq->data_len & q->dma_pad_mask)) {
		unsigned int pad_len = (q->dma_pad_mask & ~rq->data_len) + 1;

		sg->length += pad_len;
		rq->extra_len += pad_len;
	}

209
	if (q->dma_drain_size && q->dma_drain_needed(rq)) {
210
211
212
		if (rq->cmd_flags & REQ_RW)
			memset(q->dma_drain_buffer, 0, q->dma_drain_size);

213
214
215
216
217
218
219
		sg->page_link &= ~0x02;
		sg = sg_next(sg);
		sg_set_page(sg, virt_to_page(q->dma_drain_buffer),
			    q->dma_drain_size,
			    ((unsigned long)q->dma_drain_buffer) &
			    (PAGE_SIZE - 1));
		nsegs++;
220
		rq->extra_len += q->dma_drain_size;
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
	}

	if (sg)
		sg_mark_end(sg);

	return nsegs;
}
EXPORT_SYMBOL(blk_rq_map_sg);

static inline int ll_new_hw_segment(struct request_queue *q,
				    struct request *req,
				    struct bio *bio)
{
	int nr_phys_segs = bio_phys_segments(q, bio);

Mikulas Patocka's avatar
Mikulas Patocka committed
236
	if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
	    || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
		req->cmd_flags |= REQ_NOMERGE;
		if (req == q->last_merge)
			q->last_merge = NULL;
		return 0;
	}

	/*
	 * This will form the start of a new hw segment.  Bump both
	 * counters.
	 */
	req->nr_phys_segments += nr_phys_segs;
	return 1;
}

int ll_back_merge_fn(struct request_queue *q, struct request *req,
		     struct bio *bio)
{
	unsigned short max_sectors;

	if (unlikely(blk_pc_request(req)))
		max_sectors = q->max_hw_sectors;
	else
		max_sectors = q->max_sectors;

262
	if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
263
264
265
266
267
		req->cmd_flags |= REQ_NOMERGE;
		if (req == q->last_merge)
			q->last_merge = NULL;
		return 0;
	}
268
	if (!bio_flagged(req->biotail, BIO_SEG_VALID))
269
		blk_recount_segments(q, req->biotail);
270
	if (!bio_flagged(bio, BIO_SEG_VALID))
271
272
273
274
275
		blk_recount_segments(q, bio);

	return ll_new_hw_segment(q, req, bio);
}

276
int ll_front_merge_fn(struct request_queue *q, struct request *req,
277
278
279
280
281
282
283
284
285
286
		      struct bio *bio)
{
	unsigned short max_sectors;

	if (unlikely(blk_pc_request(req)))
		max_sectors = q->max_hw_sectors;
	else
		max_sectors = q->max_sectors;


287
	if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
288
289
290
291
292
		req->cmd_flags |= REQ_NOMERGE;
		if (req == q->last_merge)
			q->last_merge = NULL;
		return 0;
	}
293
	if (!bio_flagged(bio, BIO_SEG_VALID))
294
		blk_recount_segments(q, bio);
295
	if (!bio_flagged(req->bio, BIO_SEG_VALID))
296
297
298
299
300
301
302
303
304
		blk_recount_segments(q, req->bio);

	return ll_new_hw_segment(q, req, bio);
}

static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
				struct request *next)
{
	int total_phys_segments;
305
306
	unsigned int seg_size =
		req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size;
307
308
309
310
311
312
313
314
315
316
317

	/*
	 * First check if the either of the requests are re-queued
	 * requests.  Can't merge them if they are.
	 */
	if (req->special || next->special)
		return 0;

	/*
	 * Will it become too large?
	 */
318
	if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > q->max_sectors)
319
320
321
		return 0;

	total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
322
323
324
325
326
	if (blk_phys_contig_segment(q, req->biotail, next->bio)) {
		if (req->nr_phys_segments == 1)
			req->bio->bi_seg_front_size = seg_size;
		if (next->nr_phys_segments == 1)
			next->biotail->bi_seg_back_size = seg_size;
327
		total_phys_segments--;
328
	}
329
330
331
332

	if (total_phys_segments > q->max_phys_segments)
		return 0;

Mikulas Patocka's avatar
Mikulas Patocka committed
333
	if (total_phys_segments > q->max_hw_segments)
334
335
336
337
338
339
340
		return 0;

	/* Merge is OK... */
	req->nr_phys_segments = total_phys_segments;
	return 1;
}

341
342
343
344
345
346
347
static void blk_account_io_merge(struct request *req)
{
	if (blk_do_io_stat(req)) {
		struct hd_struct *part;
		int cpu;

		cpu = part_stat_lock();
348
		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
349
350
351
352
353
354
355
356

		part_round_stats(cpu, part);
		part_dec_in_flight(part);

		part_stat_unlock();
	}
}

357
358
359
360
361
362
363
364
365
366
367
368
/*
 * Has to be called with the request spinlock acquired
 */
static int attempt_merge(struct request_queue *q, struct request *req,
			  struct request *next)
{
	if (!rq_mergeable(req) || !rq_mergeable(next))
		return 0;

	/*
	 * not contiguous
	 */
369
	if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next))
370
371
372
373
374
375
376
		return 0;

	if (rq_data_dir(req) != rq_data_dir(next)
	    || req->rq_disk != next->rq_disk
	    || next->special)
		return 0;

377
378
379
	if (blk_integrity_rq(req) != blk_integrity_rq(next))
		return 0;

380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
	/*
	 * If we are allowed to merge, then append bio list
	 * from next to rq and release next. merge_requests_fn
	 * will have updated segment counts, update sector
	 * counts here.
	 */
	if (!ll_merge_requests_fn(q, req, next))
		return 0;

	/*
	 * At this point we have either done a back merge
	 * or front merge. We need the smaller start_time of
	 * the merged requests to be the current request
	 * for accounting purposes.
	 */
	if (time_after(req->start_time, next->start_time))
		req->start_time = next->start_time;

	req->biotail->bi_next = next->bio;
	req->biotail = next->biotail;

	req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;

	elv_merge_requests(q, req, next);

405
406
407
408
	/*
	 * 'next' is going away, so update stats accordingly
	 */
	blk_account_io_merge(next);
409
410

	req->ioprio = ioprio_best(req->ioprio, next->ioprio);
411
412
	if (blk_rq_cpu_valid(next))
		req->cpu = next->cpu;
413

414
415
	/* owner-ship of bio passed from next to req */
	next->bio = NULL;
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
	__blk_put_request(q, next);
	return 1;
}

int attempt_back_merge(struct request_queue *q, struct request *rq)
{
	struct request *next = elv_latter_request(q, rq);

	if (next)
		return attempt_merge(q, rq, next);

	return 0;
}

int attempt_front_merge(struct request_queue *q, struct request *rq)
{
	struct request *prev = elv_former_request(q, rq);

	if (prev)
		return attempt_merge(q, prev, rq);

	return 0;
}