blk-core.c 61.3 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5
/*
 * Copyright (C) 1991, 1992 Linus Torvalds
 * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
 * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
6 7
 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>
 *	-  July2000
Linus Torvalds's avatar
Linus Torvalds committed
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001
 */

/*
 * This handles all read/write requests to block devices
 */
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/backing-dev.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/highmem.h>
#include <linux/mm.h>
#include <linux/kernel_stat.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/completion.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/writeback.h>
28
#include <linux/task_io_accounting_ops.h>
29
#include <linux/blktrace_api.h>
30
#include <linux/fault-inject.h>
31
#include <trace/block.h>
Linus Torvalds's avatar
Linus Torvalds committed
32

33 34
#include "blk.h"

35 36 37 38 39 40 41 42 43 44 45 46 47
DEFINE_TRACE(block_plug);
DEFINE_TRACE(block_unplug_io);
DEFINE_TRACE(block_unplug_timer);
DEFINE_TRACE(block_getrq);
DEFINE_TRACE(block_sleeprq);
DEFINE_TRACE(block_rq_requeue);
DEFINE_TRACE(block_bio_backmerge);
DEFINE_TRACE(block_bio_frontmerge);
DEFINE_TRACE(block_bio_queue);
DEFINE_TRACE(block_rq_complete);
DEFINE_TRACE(block_remap);	/* Also used in drivers/md/dm.c */
EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);

48
static int __make_request(struct request_queue *q, struct bio *bio);
Linus Torvalds's avatar
Linus Torvalds committed
49 50 51 52

/*
 * For the allocated request tables
 */
53
static struct kmem_cache *request_cachep;
Linus Torvalds's avatar
Linus Torvalds committed
54 55 56 57

/*
 * For queue allocation
 */
58
struct kmem_cache *blk_requestq_cachep;
Linus Torvalds's avatar
Linus Torvalds committed
59 60 61 62

/*
 * Controlling structure to kblockd
 */
63
static struct workqueue_struct *kblockd_workqueue;
Linus Torvalds's avatar
Linus Torvalds committed
64

65 66
static void drive_stat_acct(struct request *rq, int new_io)
{
67
	struct hd_struct *part;
68
	int rw = rq_data_dir(rq);
Tejun Heo's avatar
Tejun Heo committed
69
	int cpu;
70

71
	if (!blk_do_io_stat(rq))
72 73
		return;

74
	cpu = part_stat_lock();
75
	part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
Tejun Heo's avatar
Tejun Heo committed
76

77
	if (!new_io)
78
		part_stat_inc(cpu, part, merges[rw]);
79
	else {
80 81
		part_round_stats(cpu, part);
		part_inc_in_flight(part);
82
	}
83

84
	part_stat_unlock();
85 86
}

87
void blk_queue_congestion_threshold(struct request_queue *q)
Linus Torvalds's avatar
Linus Torvalds committed
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
{
	int nr;

	nr = q->nr_requests - (q->nr_requests / 8) + 1;
	if (nr > q->nr_requests)
		nr = q->nr_requests;
	q->nr_congestion_on = nr;

	nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
	if (nr < 1)
		nr = 1;
	q->nr_congestion_off = nr;
}

/**
 * blk_get_backing_dev_info - get the address of a queue's backing_dev_info
 * @bdev:	device
 *
 * Locates the passed device's request queue and returns the address of its
 * backing_dev_info
 *
 * Will return NULL if the request queue cannot be located.
 */
struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
{
	struct backing_dev_info *ret = NULL;
114
	struct request_queue *q = bdev_get_queue(bdev);
Linus Torvalds's avatar
Linus Torvalds committed
115 116 117 118 119 120 121

	if (q)
		ret = &q->backing_dev_info;
	return ret;
}
EXPORT_SYMBOL(blk_get_backing_dev_info);

122
void blk_rq_init(struct request_queue *q, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
123
{
124 125
	memset(rq, 0, sizeof(*rq));

Linus Torvalds's avatar
Linus Torvalds committed
126
	INIT_LIST_HEAD(&rq->queuelist);
127
	INIT_LIST_HEAD(&rq->timeout_list);
128
	rq->cpu = -1;
Jens Axboe's avatar
Jens Axboe committed
129
	rq->q = q;
130
	rq->__sector = (sector_t) -1;
131 132
	INIT_HLIST_NODE(&rq->hash);
	RB_CLEAR_NODE(&rq->rb_node);
133
	rq->cmd = rq->__cmd;
134
	rq->cmd_len = BLK_MAX_CDB;
Jens Axboe's avatar
Jens Axboe committed
135
	rq->tag = -1;
Linus Torvalds's avatar
Linus Torvalds committed
136
	rq->ref_count = 1;
137
	rq->start_time = jiffies;
Linus Torvalds's avatar
Linus Torvalds committed
138
}
139
EXPORT_SYMBOL(blk_rq_init);
Linus Torvalds's avatar
Linus Torvalds committed
140

141 142
static void req_bio_endio(struct request *rq, struct bio *bio,
			  unsigned int nbytes, int error)
Linus Torvalds's avatar
Linus Torvalds committed
143
{
144
	struct request_queue *q = rq->q;
145

146 147 148 149 150
	if (&q->bar_rq != rq) {
		if (error)
			clear_bit(BIO_UPTODATE, &bio->bi_flags);
		else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
			error = -EIO;
151

152
		if (unlikely(nbytes > bio->bi_size)) {
153
			printk(KERN_ERR "%s: want %u bytes done, %u left\n",
154
			       __func__, nbytes, bio->bi_size);
155 156
			nbytes = bio->bi_size;
		}
157

158 159 160
		if (unlikely(rq->cmd_flags & REQ_QUIET))
			set_bit(BIO_QUIET, &bio->bi_flags);

161 162
		bio->bi_size -= nbytes;
		bio->bi_sector += (nbytes >> 9);
163 164 165 166

		if (bio_integrity(bio))
			bio_integrity_advance(bio, nbytes);

167
		if (bio->bi_size == 0)
168
			bio_endio(bio, error);
169 170 171 172 173 174 175 176 177
	} else {

		/*
		 * Okay, this is the barrier request in progress, just
		 * record the error;
		 */
		if (error && !q->orderr)
			q->orderr = error;
	}
Linus Torvalds's avatar
Linus Torvalds committed
178 179 180 181 182 183
}

void blk_dump_rq_flags(struct request *rq, char *msg)
{
	int bit;

184
	printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,
185 186
		rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
		rq->cmd_flags);
Linus Torvalds's avatar
Linus Torvalds committed
187

188 189 190
	printk(KERN_INFO "  sector %llu, nr/cnr %u/%u\n",
	       (unsigned long long)blk_rq_pos(rq),
	       blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
Tejun Heo's avatar
Tejun Heo committed
191
	printk(KERN_INFO "  bio %p, biotail %p, buffer %p, len %u\n",
192
	       rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));
Linus Torvalds's avatar
Linus Torvalds committed
193

194
	if (blk_pc_request(rq)) {
195
		printk(KERN_INFO "  cdb: ");
196
		for (bit = 0; bit < BLK_MAX_CDB; bit++)
Linus Torvalds's avatar
Linus Torvalds committed
197 198 199 200 201 202 203 204 205 206 207 208 209 210
			printk("%02x ", rq->cmd[bit]);
		printk("\n");
	}
}
EXPORT_SYMBOL(blk_dump_rq_flags);

/*
 * "plug" the device if there are no outstanding requests: this will
 * force the transfer to start only after we have put all the requests
 * on the list.
 *
 * This is called with interrupts off and no requests on the queue and
 * with the queue lock held.
 */
211
void blk_plug_device(struct request_queue *q)
Linus Torvalds's avatar
Linus Torvalds committed
212 213 214 215 216 217 218
{
	WARN_ON(!irqs_disabled());

	/*
	 * don't plug a stopped queue, it must be paired with blk_start_queue()
	 * which will restart the queueing
	 */
219
	if (blk_queue_stopped(q))
Linus Torvalds's avatar
Linus Torvalds committed
220 221
		return;

Jens Axboe's avatar
Jens Axboe committed
222
	if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
Linus Torvalds's avatar
Linus Torvalds committed
223
		mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
224
		trace_block_plug(q);
225
	}
Linus Torvalds's avatar
Linus Torvalds committed
226 227 228
}
EXPORT_SYMBOL(blk_plug_device);

229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
/**
 * blk_plug_device_unlocked - plug a device without queue lock held
 * @q:    The &struct request_queue to plug
 *
 * Description:
 *   Like @blk_plug_device(), but grabs the queue lock and disables
 *   interrupts.
 **/
void blk_plug_device_unlocked(struct request_queue *q)
{
	unsigned long flags;

	spin_lock_irqsave(q->queue_lock, flags);
	blk_plug_device(q);
	spin_unlock_irqrestore(q->queue_lock, flags);
}
EXPORT_SYMBOL(blk_plug_device_unlocked);

Linus Torvalds's avatar
Linus Torvalds committed
247 248 249 250
/*
 * remove the queue from the plugged list, if present. called with
 * queue lock held and interrupts disabled.
 */
251
int blk_remove_plug(struct request_queue *q)
Linus Torvalds's avatar
Linus Torvalds committed
252 253 254
{
	WARN_ON(!irqs_disabled());

Jens Axboe's avatar
Jens Axboe committed
255
	if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))
Linus Torvalds's avatar
Linus Torvalds committed
256 257 258 259 260 261 262 263 264 265
		return 0;

	del_timer(&q->unplug_timer);
	return 1;
}
EXPORT_SYMBOL(blk_remove_plug);

/*
 * remove the plug and let it rip..
 */
266
void __generic_unplug_device(struct request_queue *q)
Linus Torvalds's avatar
Linus Torvalds committed
267
{
268
	if (unlikely(blk_queue_stopped(q)))
Linus Torvalds's avatar
Linus Torvalds committed
269
		return;
270
	if (!blk_remove_plug(q) && !blk_queue_nonrot(q))
Linus Torvalds's avatar
Linus Torvalds committed
271 272
		return;

273
	q->request_fn(q);
Linus Torvalds's avatar
Linus Torvalds committed
274 275 276 277
}

/**
 * generic_unplug_device - fire a request queue
278
 * @q:    The &struct request_queue in question
Linus Torvalds's avatar
Linus Torvalds committed
279 280 281 282 283 284 285 286
 *
 * Description:
 *   Linux uses plugging to build bigger requests queues before letting
 *   the device have at them. If a queue is plugged, the I/O scheduler
 *   is still adding and merging requests on the queue. Once the queue
 *   gets unplugged, the request_fn defined for the queue is invoked and
 *   transfers started.
 **/
287
void generic_unplug_device(struct request_queue *q)
Linus Torvalds's avatar
Linus Torvalds committed
288
{
289 290 291 292 293
	if (blk_queue_plugged(q)) {
		spin_lock_irq(q->queue_lock);
		__generic_unplug_device(q);
		spin_unlock_irq(q->queue_lock);
	}
Linus Torvalds's avatar
Linus Torvalds committed
294 295 296 297 298 299
}
EXPORT_SYMBOL(generic_unplug_device);

static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
				   struct page *page)
{
300
	struct request_queue *q = bdi->unplug_io_data;
Linus Torvalds's avatar
Linus Torvalds committed
301

302
	blk_unplug(q);
Linus Torvalds's avatar
Linus Torvalds committed
303 304
}

305
void blk_unplug_work(struct work_struct *work)
Linus Torvalds's avatar
Linus Torvalds committed
306
{
307 308
	struct request_queue *q =
		container_of(work, struct request_queue, unplug_work);
Linus Torvalds's avatar
Linus Torvalds committed
309

310
	trace_block_unplug_io(q);
Linus Torvalds's avatar
Linus Torvalds committed
311 312 313
	q->unplug_fn(q);
}

314
void blk_unplug_timeout(unsigned long data)
Linus Torvalds's avatar
Linus Torvalds committed
315
{
316
	struct request_queue *q = (struct request_queue *)data;
Linus Torvalds's avatar
Linus Torvalds committed
317

318
	trace_block_unplug_timer(q);
319
	kblockd_schedule_work(q, &q->unplug_work);
Linus Torvalds's avatar
Linus Torvalds committed
320 321
}

322 323 324 325 326 327
void blk_unplug(struct request_queue *q)
{
	/*
	 * devices don't necessarily have an ->unplug_fn defined
	 */
	if (q->unplug_fn) {
328
		trace_block_unplug_io(q);
329 330 331 332 333
		q->unplug_fn(q);
	}
}
EXPORT_SYMBOL(blk_unplug);

Linus Torvalds's avatar
Linus Torvalds committed
334 335
/**
 * blk_start_queue - restart a previously stopped queue
336
 * @q:    The &struct request_queue in question
Linus Torvalds's avatar
Linus Torvalds committed
337 338 339 340 341 342
 *
 * Description:
 *   blk_start_queue() will clear the stop flag on the queue, and call
 *   the request_fn for the queue if it was in a stopped state when
 *   entered. Also see blk_stop_queue(). Queue lock must be held.
 **/
343
void blk_start_queue(struct request_queue *q)
Linus Torvalds's avatar
Linus Torvalds committed
344
{
345 346
	WARN_ON(!irqs_disabled());

347
	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
348
	__blk_run_queue(q);
Linus Torvalds's avatar
Linus Torvalds committed
349 350 351 352 353
}
EXPORT_SYMBOL(blk_start_queue);

/**
 * blk_stop_queue - stop a queue
354
 * @q:    The &struct request_queue in question
Linus Torvalds's avatar
Linus Torvalds committed
355 356 357 358 359 360 361 362 363 364 365
 *
 * Description:
 *   The Linux block layer assumes that a block driver will consume all
 *   entries on the request queue when the request_fn strategy is called.
 *   Often this will not happen, because of hardware limitations (queue
 *   depth settings). If a device driver gets a 'queue full' response,
 *   or if it simply chooses not to queue more I/O at one point, it can
 *   call this function to prevent the request_fn from being called until
 *   the driver has signalled it's ready to go again. This happens by calling
 *   blk_start_queue() to restart queue operations. Queue lock must be held.
 **/
366
void blk_stop_queue(struct request_queue *q)
Linus Torvalds's avatar
Linus Torvalds committed
367 368
{
	blk_remove_plug(q);
369
	queue_flag_set(QUEUE_FLAG_STOPPED, q);
Linus Torvalds's avatar
Linus Torvalds committed
370 371 372 373 374 375 376 377 378 379 380 381
}
EXPORT_SYMBOL(blk_stop_queue);

/**
 * blk_sync_queue - cancel any pending callbacks on a queue
 * @q: the queue
 *
 * Description:
 *     The block layer may perform asynchronous callback activity
 *     on a queue, such as calling the unplug function after a timeout.
 *     A block device may call blk_sync_queue to ensure that any
 *     such activity is cancelled, thus allowing it to release resources
382
 *     that the callbacks might use. The caller must already have made sure
Linus Torvalds's avatar
Linus Torvalds committed
383 384 385 386 387 388 389
 *     that its ->make_request_fn will not re-add plugging prior to calling
 *     this function.
 *
 */
void blk_sync_queue(struct request_queue *q)
{
	del_timer_sync(&q->unplug_timer);
390
	del_timer_sync(&q->timeout);
391
	cancel_work_sync(&q->unplug_work);
Linus Torvalds's avatar
Linus Torvalds committed
392 393 394 395
}
EXPORT_SYMBOL(blk_sync_queue);

/**
396
 * __blk_run_queue - run a single device queue
Linus Torvalds's avatar
Linus Torvalds committed
397
 * @q:	The queue to run
398 399 400 401 402
 *
 * Description:
 *    See @blk_run_queue. This variant must be called with the queue lock
 *    held and interrupts disabled.
 *
Linus Torvalds's avatar
Linus Torvalds committed
403
 */
404
void __blk_run_queue(struct request_queue *q)
Linus Torvalds's avatar
Linus Torvalds committed
405 406
{
	blk_remove_plug(q);
407

408 409 410 411 412 413
	if (unlikely(blk_queue_stopped(q)))
		return;

	if (elv_queue_empty(q))
		return;

414 415 416 417
	/*
	 * Only recurse once to avoid overrunning the stack, let the unplug
	 * handling reinvoke the handler shortly if we already got there.
	 */
418 419 420 421 422 423 424
	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
		q->request_fn(q);
		queue_flag_clear(QUEUE_FLAG_REENTER, q);
	} else {
		queue_flag_set(QUEUE_FLAG_PLUGGED, q);
		kblockd_schedule_work(q, &q->unplug_work);
	}
425 426
}
EXPORT_SYMBOL(__blk_run_queue);
427

428 429 430
/**
 * blk_run_queue - run a single device queue
 * @q: The queue to run
431 432 433
 *
 * Description:
 *    Invoke request handling on this queue, if it has pending work to do.
Tejun Heo's avatar
Tejun Heo committed
434
 *    May be used to restart queueing when a request has completed.
435 436 437 438 439 440 441
 */
void blk_run_queue(struct request_queue *q)
{
	unsigned long flags;

	spin_lock_irqsave(q->queue_lock, flags);
	__blk_run_queue(q);
Linus Torvalds's avatar
Linus Torvalds committed
442 443 444 445
	spin_unlock_irqrestore(q->queue_lock, flags);
}
EXPORT_SYMBOL(blk_run_queue);

446
void blk_put_queue(struct request_queue *q)
447 448 449 450
{
	kobject_put(&q->kobj);
}

451
void blk_cleanup_queue(struct request_queue *q)
452
{
453 454 455 456 457 458 459 460
	/*
	 * We know we have process context here, so we can be a little
	 * cautious and ensure that pending block actions on this device
	 * are done before moving on. Going into this function, we should
	 * not have processes doing IO to this device.
	 */
	blk_sync_queue(q);

461
	mutex_lock(&q->sysfs_lock);
462
	queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
463 464 465 466 467 468 469
	mutex_unlock(&q->sysfs_lock);

	if (q->elevator)
		elevator_exit(q->elevator);

	blk_put_queue(q);
}
Linus Torvalds's avatar
Linus Torvalds committed
470 471
EXPORT_SYMBOL(blk_cleanup_queue);

472
static int blk_init_free_list(struct request_queue *q)
Linus Torvalds's avatar
Linus Torvalds committed
473 474 475
{
	struct request_list *rl = &q->rq;

476 477
	rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
	rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
478
	rl->elvpriv = 0;
479 480
	init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
	init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
Linus Torvalds's avatar
Linus Torvalds committed
481

482 483
	rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
				mempool_free_slab, request_cachep, q->node);
Linus Torvalds's avatar
Linus Torvalds committed
484 485 486 487 488 489 490

	if (!rl->rq_pool)
		return -ENOMEM;

	return 0;
}

491
struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
Linus Torvalds's avatar
Linus Torvalds committed
492
{
493 494 495
	return blk_alloc_queue_node(gfp_mask, -1);
}
EXPORT_SYMBOL(blk_alloc_queue);
Linus Torvalds's avatar
Linus Torvalds committed
496

497
struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
498
{
499
	struct request_queue *q;
Peter Zijlstra's avatar
Peter Zijlstra committed
500
	int err;
501

502
	q = kmem_cache_alloc_node(blk_requestq_cachep,
503
				gfp_mask | __GFP_ZERO, node_id);
Linus Torvalds's avatar
Linus Torvalds committed
504 505 506
	if (!q)
		return NULL;

Peter Zijlstra's avatar
Peter Zijlstra committed
507 508 509 510
	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
	q->backing_dev_info.unplug_io_data = q;
	err = bdi_init(&q->backing_dev_info);
	if (err) {
511
		kmem_cache_free(blk_requestq_cachep, q);
Peter Zijlstra's avatar
Peter Zijlstra committed
512 513 514
		return NULL;
	}

Linus Torvalds's avatar
Linus Torvalds committed
515
	init_timer(&q->unplug_timer);
516 517
	setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
	INIT_LIST_HEAD(&q->timeout_list);
518
	INIT_WORK(&q->unplug_work, blk_unplug_work);
519

520
	kobject_init(&q->kobj, &blk_queue_ktype);
Linus Torvalds's avatar
Linus Torvalds committed
521

522
	mutex_init(&q->sysfs_lock);
523
	spin_lock_init(&q->__queue_lock);
524

Linus Torvalds's avatar
Linus Torvalds committed
525 526
	return q;
}
527
EXPORT_SYMBOL(blk_alloc_queue_node);
Linus Torvalds's avatar
Linus Torvalds committed
528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550

/**
 * blk_init_queue  - prepare a request queue for use with a block device
 * @rfn:  The function to be called to process requests that have been
 *        placed on the queue.
 * @lock: Request queue spin lock
 *
 * Description:
 *    If a block device wishes to use the standard request handling procedures,
 *    which sorts requests and coalesces adjacent requests, then it must
 *    call blk_init_queue().  The function @rfn will be called when there
 *    are requests on the queue that need to be processed.  If the device
 *    supports plugging, then @rfn may not be called immediately when requests
 *    are available on the queue, but may be called at some time later instead.
 *    Plugged queues are generally unplugged when a buffer belonging to one
 *    of the requests on the queue is needed, or due to memory pressure.
 *
 *    @rfn is not required, or even expected, to remove all requests off the
 *    queue, but only as many as it can handle at a time.  If it does leave
 *    requests on the queue, it is responsible for arranging that the requests
 *    get dealt with eventually.
 *
 *    The queue spin lock must be held while manipulating the requests on the
551 552
 *    request queue; this lock will be taken also from interrupt context, so irq
 *    disabling is needed for it.
Linus Torvalds's avatar
Linus Torvalds committed
553
 *
554
 *    Function returns a pointer to the initialized request queue, or %NULL if
Linus Torvalds's avatar
Linus Torvalds committed
555 556 557 558 559 560
 *    it didn't succeed.
 *
 * Note:
 *    blk_init_queue() must be paired with a blk_cleanup_queue() call
 *    when the block device is deactivated (such as at module unload).
 **/
561

562
struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
Linus Torvalds's avatar
Linus Torvalds committed
563
{
564 565 566 567
	return blk_init_queue_node(rfn, lock, -1);
}
EXPORT_SYMBOL(blk_init_queue);

568
struct request_queue *
569 570
blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
{
571
	struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id);
Linus Torvalds's avatar
Linus Torvalds committed
572 573 574 575

	if (!q)
		return NULL;

576
	q->node = node_id;
577
	if (blk_init_free_list(q)) {
578
		kmem_cache_free(blk_requestq_cachep, q);
579 580
		return NULL;
	}
Linus Torvalds's avatar
Linus Torvalds committed
581

's avatar
committed
582 583 584 585
	/*
	 * if caller didn't supply a lock, they get per-queue locking with
	 * our embedded lock
	 */
586
	if (!lock)
's avatar
committed
587 588
		lock = &q->__queue_lock;

Linus Torvalds's avatar
Linus Torvalds committed
589 590 591
	q->request_fn		= rfn;
	q->prep_rq_fn		= NULL;
	q->unplug_fn		= generic_unplug_device;
592
	q->queue_flags		= QUEUE_FLAG_DEFAULT;
Linus Torvalds's avatar
Linus Torvalds committed
593 594
	q->queue_lock		= lock;

595 596 597
	/*
	 * This also sets hw/phys segments, boundary and size
	 */
Linus Torvalds's avatar
Linus Torvalds committed
598 599
	blk_queue_make_request(q, __make_request);

600 601
	q->sg_reserved_size = INT_MAX;

602 603
	blk_set_cmd_filter_defaults(&q->cmd_filter);

Linus Torvalds's avatar
Linus Torvalds committed
604 605 606 607 608 609 610 611
	/*
	 * all done
	 */
	if (!elevator_init(q, NULL)) {
		blk_queue_congestion_threshold(q);
		return q;
	}

612
	blk_put_queue(q);
Linus Torvalds's avatar
Linus Torvalds committed
613 614
	return NULL;
}
615
EXPORT_SYMBOL(blk_init_queue_node);
Linus Torvalds's avatar
Linus Torvalds committed
616

617
int blk_get_queue(struct request_queue *q)
Linus Torvalds's avatar
Linus Torvalds committed
618
{
Nick Piggin's avatar
Nick Piggin committed
619
	if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
620
		kobject_get(&q->kobj);
Linus Torvalds's avatar
Linus Torvalds committed
621 622 623 624 625 626
		return 0;
	}

	return 1;
}

627
static inline void blk_free_request(struct request_queue *q, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
628
{
629
	if (rq->cmd_flags & REQ_ELVPRIV)
630
		elv_put_request(q, rq);
Linus Torvalds's avatar
Linus Torvalds committed
631 632 633
	mempool_free(rq, q->rq.rq_pool);
}

Jens Axboe's avatar
Jens Axboe committed
634
static struct request *
635
blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
Linus Torvalds's avatar
Linus Torvalds committed
636 637 638 639 640 641
{
	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);

	if (!rq)
		return NULL;

642
	blk_rq_init(q, rq);
643

644
	rq->cmd_flags = flags | REQ_ALLOCED;
Linus Torvalds's avatar
Linus Torvalds committed
645

646
	if (priv) {
647
		if (unlikely(elv_set_request(q, rq, gfp_mask))) {
648 649 650
			mempool_free(rq, q->rq.rq_pool);
			return NULL;
		}
651
		rq->cmd_flags |= REQ_ELVPRIV;
652
	}
Linus Torvalds's avatar
Linus Torvalds committed
653

654
	return rq;
Linus Torvalds's avatar
Linus Torvalds committed
655 656 657 658 659 660
}

/*
 * ioc_batching returns true if the ioc is a valid batching request and
 * should be given priority access to a request.
 */
661
static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
Linus Torvalds's avatar
Linus Torvalds committed
662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681
{
	if (!ioc)
		return 0;

	/*
	 * Make sure the process is able to allocate at least 1 request
	 * even if the batch times out, otherwise we could theoretically
	 * lose wakeups.
	 */
	return ioc->nr_batch_requests == q->nr_batching ||
		(ioc->nr_batch_requests > 0
		&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
}

/*
 * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This
 * will cause the process to be a "batcher" on all queues in the system. This
 * is the behaviour we want though - once it gets a wakeup it should be given
 * a nice run.
 */
682
static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
Linus Torvalds's avatar
Linus Torvalds committed
683 684 685 686 687 688 689 690
{
	if (!ioc || ioc_batching(q, ioc))
		return;

	ioc->nr_batch_requests = q->nr_batching;
	ioc->last_waited = jiffies;
}

691
static void __freed_request(struct request_queue *q, int sync)
Linus Torvalds's avatar
Linus Torvalds committed
692 693 694
{
	struct request_list *rl = &q->rq;

695 696
	if (rl->count[sync] < queue_congestion_off_threshold(q))
		blk_clear_queue_congested(q, sync);
Linus Torvalds's avatar
Linus Torvalds committed
697

698 699 700
	if (rl->count[sync] + 1 <= q->nr_requests) {
		if (waitqueue_active(&rl->wait[sync]))
			wake_up(&rl->wait[sync]);
Linus Torvalds's avatar
Linus Torvalds committed
701

702
		blk_clear_queue_full(q, sync);
Linus Torvalds's avatar
Linus Torvalds committed
703 704 705 706 707 708 709
	}
}

/*
 * A request has just been released.  Account for it, update the full and
 * congestion status, wake up any waiters.   Called under q->queue_lock.
 */
710
static void freed_request(struct request_queue *q, int sync, int priv)
Linus Torvalds's avatar
Linus Torvalds committed
711 712 713
{
	struct request_list *rl = &q->rq;

714
	rl->count[sync]--;
715 716
	if (priv)
		rl->elvpriv--;
Linus Torvalds's avatar
Linus Torvalds committed
717

718
	__freed_request(q, sync);
Linus Torvalds's avatar
Linus Torvalds committed
719

720 721
	if (unlikely(rl->starved[sync ^ 1]))
		__freed_request(q, sync ^ 1);
Linus Torvalds's avatar
Linus Torvalds committed
722 723 724
}

/*
Nick Piggin's avatar
Nick Piggin committed
725 726 727
 * Get a free request, queue_lock must be held.
 * Returns NULL on failure, with queue_lock held.
 * Returns !NULL on success, with queue_lock *not held*.
Linus Torvalds's avatar
Linus Torvalds committed
728
 */
729
static struct request *get_request(struct request_queue *q, int rw_flags,
730
				   struct bio *bio, gfp_t gfp_mask)
Linus Torvalds's avatar
Linus Torvalds committed
731 732 733
{
	struct request *rq = NULL;
	struct request_list *rl = &q->rq;
734
	struct io_context *ioc = NULL;
735
	const bool is_sync = rw_is_sync(rw_flags) != 0;
736 737
	int may_queue, priv;

738
	may_queue = elv_may_queue(q, rw_flags);
739 740 741
	if (may_queue == ELV_MQUEUE_NO)
		goto rq_starved;

742 743
	if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
		if (rl->count[is_sync]+1 >= q->nr_requests) {
744
			ioc = current_io_context(GFP_ATOMIC, q->node);
745 746 747 748 749 750
			/*
			 * The queue will fill after this allocation, so set
			 * it as full, and mark this process as "batching".
			 * This process will be allowed to complete a batch of
			 * requests, others will be blocked.
			 */
751
			if (!blk_queue_full(q, is_sync)) {
752
				ioc_set_batching(q, ioc);
753
				blk_set_queue_full(q, is_sync);
754 755 756 757 758 759 760 761 762 763 764
			} else {
				if (may_queue != ELV_MQUEUE_MUST
						&& !ioc_batching(q, ioc)) {
					/*
					 * The queue is full and the allocating
					 * process is not a "batcher", and not
					 * exempted by the IO scheduler
					 */
					goto out;
				}
			}
Linus Torvalds's avatar
Linus Torvalds committed
765
		}
766
		blk_set_queue_congested(q, is_sync);
Linus Torvalds's avatar
Linus Torvalds committed
767 768
	}

769 770 771 772 773
	/*
	 * Only allow batching queuers to allocate up to 50% over the defined
	 * limit of requests, otherwise we could have thousands of requests
	 * allocated with any setting of ->nr_requests
	 */
774
	if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
775
		goto out;
776

777 778
	rl->count[is_sync]++;
	rl->starved[is_sync] = 0;
779

780
	priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
781 782 783
	if (priv)
		rl->elvpriv++;

784 785
	if (blk_queue_io_stat(q))
		rw_flags |= REQ_IO_STAT;
Linus Torvalds's avatar
Linus Torvalds committed
786 787
	spin_unlock_irq(q->queue_lock);

788
	rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
789
	if (unlikely(!rq)) {
Linus Torvalds's avatar
Linus Torvalds committed
790 791 792 793 794 795 796 797
		/*
		 * Allocation failed presumably due to memory. Undo anything
		 * we might have messed up.
		 *
		 * Allocating task should really be put onto the front of the
		 * wait queue, but this is pretty rare.
		 */
		spin_lock_irq(q->queue_lock);
798
		freed_request(q, is_sync, priv);
Linus Torvalds's avatar
Linus Torvalds committed
799 800 801 802 803 804 805 806 807

		/*
		 * in the very unlikely event that allocation failed and no
		 * requests for this direction was pending, mark us starved
		 * so that freeing of a request in the other direction will
		 * notice us. another possible fix would be to split the
		 * rq mempool into READ and WRITE
		 */
rq_starved:
808 809
		if (unlikely(rl->count[is_sync] == 0))
			rl->starved[is_sync] = 1;
Linus Torvalds's avatar
Linus Torvalds committed
810 811 812 813

		goto out;
	}

814 815 816 817 818 819
	/*
	 * ioc may be NULL here, and ioc_batching will be false. That's
	 * OK, if the queue is under the request limit then requests need
	 * not count toward the nr_batch_requests limit. There will always
	 * be some limit enforced by BLK_BATCH_TIME.
	 */
Linus Torvalds's avatar
Linus Torvalds committed
820 821
	if (ioc_batching(q, ioc))
		ioc->nr_batch_requests--;
822

823
	trace_block_getrq(q, bio, rw_flags & 1);
Linus Torvalds's avatar
Linus Torvalds committed
824 825 826 827 828 829 830
out:
	return rq;
}

/*
 * No available requests for this queue, unplug the device and wait for some
 * requests to become available.
Nick Piggin's avatar
Nick Piggin committed
831 832
 *
 * Called with q->queue_lock held, and returns with it unlocked.
Linus Torvalds's avatar
Linus Torvalds committed
833
 */
834
static struct request *get_request_wait(struct request_queue *q, int rw_flags,
835
					struct bio *bio)
Linus Torvalds's avatar
Linus Torvalds committed
836
{
837
	const bool is_sync = rw_is_sync(rw_flags) != 0;
Linus Torvalds's avatar
Linus Torvalds committed
838 839
	struct request *rq;

840
	rq = get_request(q, rw_flags, bio, GFP_NOIO);
841 842
	while (!rq) {
		DEFINE_WAIT(wait);
843
		struct io_context *ioc;
Linus Torvalds's avatar
Linus Torvalds committed
844 845
		struct request_list *rl = &q->rq;

846
		prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
Linus Torvalds's avatar
Linus Torvalds committed
847 848
				TASK_UNINTERRUPTIBLE);

849
		trace_block_sleeprq(q, bio, rw_flags & 1);
Linus Torvalds's avatar
Linus Torvalds committed
850

851 852 853
		__generic_unplug_device(q);
		spin_unlock_irq(q->queue_lock);
		io_schedule();
Linus Torvalds's avatar
Linus Torvalds committed
854

855 856 857 858 859 860 861 862
		/*
		 * After sleeping, we become a "batching" process and
		 * will be able to allocate at least one request, and
		 * up to a big batch of them for a small period time.
		 * See ioc_batching, ioc_set_batching
		 */
		ioc = current_io_context(GFP_NOIO, q->node);
		ioc_set_batching(q, ioc);
Nick Piggin's avatar
Nick Piggin committed
863

864
		spin_lock_irq(q->queue_lock);
865
		finish_wait(&rl->wait[is_sync], &wait);
866 867 868

		rq = get_request(q, rw_flags, bio, GFP_NOIO);
	};
Linus Torvalds's avatar
Linus Torvalds committed
869 870 871 872

	return rq;
}

873
struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
Linus Torvalds's avatar
Linus Torvalds committed
874 875 876 877 878
{
	struct request *rq;

	BUG_ON(rw != READ && rw != WRITE);

Nick Piggin's avatar
Nick Piggin committed
879 880
	spin_lock_irq(q->queue_lock);
	if (gfp_mask & __GFP_WAIT) {
881
		rq = get_request_wait(q, rw, NULL);
Nick Piggin's avatar
Nick Piggin committed
882
	} else {
883
		rq = get_request(q, rw, NULL, gfp_mask);
Nick Piggin's avatar
Nick Piggin committed
884 885 886 887
		if (!rq)
			spin_unlock_irq(q->queue_lock);
	}
	/* q->queue_lock is unlocked at this point */
Linus Torvalds's avatar
Linus Torvalds committed
888 889 890 891 892

	return rq;
}
EXPORT_SYMBOL(blk_get_request);

893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912
/**
 * blk_make_request - given a bio, allocate a corresponding struct request.
 *
 * @bio:  The bio describing the memory mappings that will be submitted for IO.
 *        It may be a chained-bio properly constructed by block/bio layer.
 *
 * blk_make_request is the parallel of generic_make_request for BLOCK_PC
 * type commands. Where the struct request needs to be farther initialized by
 * the caller. It is passed a &struct bio, which describes the memory info of
 * the I/O transfer.
 *
 * The caller of blk_make_request must make sure that bi_io_vec
 * are set to describe the memory buffers. That bio_data_dir() will return
 * the needed direction of the request. (And all bio's in the passed bio-chain
 * are properly set accordingly)
 *
 * If called under none-sleepable conditions, mapped bio buffers must not
 * need bouncing, by calling the appropriate masked or flagged allocator,
 * suitable for the target device. Otherwise the call to blk_queue_bounce will
 * BUG.
913 914 915 916 917 918 919 920 921
 *
 * WARNING: When allocating/cloning a bio-chain, careful consideration should be
 * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for
 * anything but the first bio in the chain. Otherwise you risk waiting for IO
 * completion of a bio that hasn't been submitted yet, thus resulting in a
 * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead
 * of bio_alloc(), as that avoids the mempool deadlock.
 * If possible a big IO should be split into smaller parts when allocation
 * fails. Partial allocation should not be an error, or you risk a live-lock.
922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946
 */
struct request *blk_make_request(struct request_queue *q, struct bio *bio,
				 gfp_t gfp_mask)
{
	struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);

	if (unlikely(!rq))
		return ERR_PTR(-ENOMEM);

	for_each_bio(bio) {
		struct bio *bounce_bio = bio;
		int ret;

		blk_queue_bounce(q, &bounce_bio);
		ret = blk_rq_append_bio(q, rq, bounce_bio);
		if (unlikely(ret)) {
			blk_put_request(rq);
			return ERR_PTR(ret);
		}
	}

	return rq;
}
EXPORT_SYMBOL(blk_make_request);

Linus Torvalds's avatar
Linus Torvalds committed
947 948 949 950 951 952 953 954 955 956
/**
 * blk_requeue_request - put a request back on queue
 * @q:		request queue where request should be inserted
 * @rq:		request to be inserted
 *
 * Description:
 *    Drivers often keep queueing requests until the hardware cannot accept
 *    more, when that condition happens we need to put the request back
 *    on the queue. Must be called with queue lock held.
 */
957
void blk_requeue_request(struct request_queue *q, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
958
{
959 960
	BUG_ON(blk_queued_rq(rq));

961 962
	blk_delete_timer(rq);
	blk_clear_rq_complete(rq);
963
	trace_block_rq_requeue(q, rq);
964

Linus Torvalds's avatar
Linus Torvalds committed
965 966 967 968 969 970 971 972
	if (blk_rq_tagged(rq))
		blk_queue_end_tag(q, rq);

	elv_requeue_request(q, rq);
}
EXPORT_SYMBOL(blk_requeue_request);

/**
973
 * blk_insert_request - insert a special request into a request queue
Linus Torvalds's avatar
Linus Torvalds committed
974 975 976 977 978 979 980 981 982
 * @q:		request queue where request should be inserted
 * @rq:		request to be inserted
 * @at_head:	insert request at head or tail of queue
 * @data:	private data
 *
 * Description:
 *    Many block devices need to execute commands asynchronously, so they don't
 *    block the whole kernel from preemption during request execution.  This is
 *    accomplished normally by inserting aritficial requests tagged as
983 984
 *    REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
 *    be scheduled for actual execution by the request queue.
Linus Torvalds's avatar
Linus Torvalds committed
985 986 987 988 989 990
 *
 *    We have the option of inserting the head or the tail of the queue.
 *    Typically we use the tail for new ioctls and so forth.  We use the head
 *    of the queue for things like a QUEUE_FULL message from a device, or a
 *    host that is unable to accept a particular command.
 */
991
void blk_insert_request(struct request_queue *q, struct request *rq,
992
			int at_head, void *data)
Linus Torvalds's avatar
Linus Torvalds committed
993
{
994
	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
Linus Torvalds's avatar
Linus Torvalds committed
995 996 997 998 999 1000 1001
	unsigned long flags;

	/*
	 * tell I/O scheduler that this isn't a regular read/write (ie it
	 * must not attempt merges on this) and that it acts as a soft
	 * barrier
	 */
1002
	rq->cmd_type = REQ_TYPE_SPECIAL;
Linus Torvalds's avatar
Linus Torvalds committed
1003 1004 1005 1006 1007 1008 1009 1010

	rq->special = data;

	spin_lock_irqsave(q->queue_lock, flags);

	/*
	 * If command is tagged, release the tag
	 */
1011 1012
	if (blk_rq_tagged(rq))
		blk_queue_end_tag(q, rq);
Linus Torvalds's avatar
Linus Torvalds committed
1013

1014
	drive_stat_acct(rq, 1);
1015
	__elv_add_request(q, rq, where, 0);
Tejun Heo's avatar
Tejun Heo committed
1016
	__blk_run_queue(q);
Linus Torvalds's avatar
Linus Torvalds committed
1017 1018 1019 1020 1021 1022 1023 1024 1025
	spin_unlock_irqrestore(q->queue_lock, flags);
}
EXPORT_SYMBOL(blk_insert_request);

/*
 * add-request adds a request to the linked list.
 * queue lock is held and interrupts disabled, as we muck with the
 * request queue list.
 */
1026
static inline void add_request(struct request_queue *q, struct request *req)
Linus Torvalds's avatar
Linus Torvalds committed
1027
{
1028
	drive_stat_acct(req, 1);
Linus Torvalds's avatar
Linus Torvalds committed
1029 1030 1031 1032 1033 1034 1035

	/*
	 * elevator indicated where it wants this request to be
	 * inserted at elevator_merge time
	 */
	__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
}
1036

1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051
static void part_round_stats_single(int cpu, struct hd_struct *part,
				    unsigned long now)
{
	if (now == part->stamp)
		return;

	if (part->in_flight) {
		__part_stat_add(cpu, part, time_in_queue,
				part->in_flight * (now - part->stamp));
		__part_stat_add(cpu, part, io_ticks, (now - part->stamp));
	}
	part->stamp = now;
}

/**
1052 1053 1054
 * part_round_stats() - Round off the performance stats on a struct disk_stats.
 * @cpu: cpu number for stats access
 * @part: target partition
Linus Torvalds's avatar
Linus Torvalds committed
1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066
 *
 * The average IO queue length and utilisation statistics are maintained
 * by observing the current state of the queue length and the amount of
 * time it has been in this state for.
 *
 * Normally, that accounting is done on IO completion, but that can result
 * in more than a second's worth of IO being accounted for within any one
 * second, leading to >100% utilisation.  To deal with that, we call this
 * function to do a round-off before returning the results when reading
 * /proc/diskstats.  This accounts immediately for all queue usage up to
 * the current jiffies and restarts the counters again.
 */
Tejun Heo's avatar
Tejun Heo committed
1067
void part_round_stats(int cpu, struct hd_struct *part)
1068 1069 1070
{
	unsigned long now = jiffies;

1071 1072 1073
	if (part->partno)
		part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
	part_round_stats_single(cpu, part, now);
1074
}
1075
EXPORT_SYMBOL_GPL(part_round_stats);
1076

Linus Torvalds's avatar
Linus Torvalds committed
1077 1078 1079
/*
 * queue lock must be held
 */
1080
void __blk_put_request(struct request_queue *q, struct request *req)
Linus Torvalds's avatar
Linus Torvalds committed
1081 1082 1083 1084 1085 1086
{
	if (unlikely(!q))
		return;
	if (unlikely(--req->ref_count))
		return;

1087 1088
	elv_completed_request(q, req);

1089 1090 1091
	/* this is a bio leak */
	WARN_ON(req->bio != NULL);

Linus Torvalds's avatar
Linus Torvalds committed
1092 1093 1094 1095
	/*
	 * Request may not have originated from ll_rw_blk. if not,
	 * it didn't come out of our reserved rq pools
	 */
1096
	if (req->cmd_flags & REQ_ALLOCED) {
1097
		int is_sync = rq_is_sync(req) != 0;
1098
		int priv = req->cmd_flags & REQ_ELVPRIV;
Linus Torvalds's avatar
Linus Torvalds committed
1099 1100

		BUG_ON(!list_empty(&req->queuelist));
1101
		BUG_ON(!hlist_unhashed(&req->hash));
Linus Torvalds's avatar
Linus Torvalds committed
1102 1103

		blk_free_request(q, req);
1104
		freed_request(q, is_sync, priv);
Linus Torvalds's avatar
Linus Torvalds committed
1105 1106
	}
}
1107 1108
EXPORT_SYMBOL_GPL(__blk_put_request);

Linus Torvalds's avatar
Linus Torvalds committed
1109 1110
void blk_put_request(struct request *req)
{
1111
	unsigned long flags;
1112
	struct request_queue *q = req->q;
1113

1114 1115 1116
	spin_lock_irqsave(q->queue_lock, flags);
	__blk_put_request(q, req);
	spin_unlock_irqrestore(q->queue_lock, flags);
Linus Torvalds's avatar
Linus Torvalds committed
1117 1118 1119
}
EXPORT_SYMBOL(blk_put_request);

1120
void init_request_from_bio(struct request *req, struct bio *bio)
1121
{
1122
	req->cpu = bio->bi_comp_cpu;
1123
	req->cmd_type = REQ_TYPE_FS;
1124 1125 1126 1127

	/*
	 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
	 */
1128 1129 1130 1131 1132 1133 1134 1135 1136
	if (bio_rw_ahead(bio))
		req->cmd_flags |= (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
				   REQ_FAILFAST_DRIVER);
	if (bio_failfast_dev(bio))
		req->cmd_flags |= REQ_FAILFAST_DEV;
	if (bio_failfast_transport(bio))
		req->cmd_flags |= REQ_FAILFAST_TRANSPORT;
	if (bio_failfast_driver(bio))
		req->cmd_flags |= REQ_FAILFAST_DRIVER;
1137

1138
	if (unlikely(bio_discard(bio))) {
1139 1140 1141
		req->cmd_flags |= REQ_DISCARD;
		if (bio_barrier(bio))
			req->cmd_flags |= REQ_SOFTBARRIER;
1142
		req->q->prepare_discard_fn(req->q, req);
1143
	} else if (unlikely(bio_barrier(bio)))
1144
		req->cmd_flags |= REQ_HARDBARRIER;
1145

Jens Axboe's avatar
Jens Axboe committed
1146
	if (bio_sync(bio))
1147
		req->cmd_flags |= REQ_RW_SYNC;
1148 1149
	if (bio_rw_meta(bio))
		req->cmd_flags |= REQ_RW_META;
1150 1151
	if (bio_noidle(bio))
		req->cmd_flags |= REQ_NOIDLE;
Jens Axboe's avatar
Jens Axboe committed
1152

1153
	req->errors = 0;
1154
	req->__sector = bio->bi_sector;
1155
	req->ioprio = bio_prio(bio);
1156
	blk_rq_bio_prep(req->q, req, bio);
1157 1158
}

1159 1160 1161 1162 1163 1164 1165 1166 1167
/*
 * Only disabling plugging for non-rotational devices if it does tagging
 * as well, otherwise we do need the proper merging
 */
static inline bool queue_should_plug(struct request_queue *q)
{
	return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
}

1168
static int __make_request(struct request_queue *q, struct bio *bio)
Linus Torvalds's avatar
Linus Torvalds committed
1169
{
1170
	struct request *req;
1171 1172
	int el_ret;
	unsigned int bytes = bio->bi_size;
1173 1174
	const unsigned short prio = bio_prio(bio);
	const int sync = bio_sync(bio);
1175
	const int unplug = bio_unplug(bio);
1176
	int rw_flags;
Linus Torvalds's avatar
Linus Torvalds committed
1177 1178 1179 1180 1181 1182 1183 1184 1185 1186

	/*
	 * low level driver can indicate that it wants pages above a
	 * certain limit bounced to low memory (ie for highmem, or even
	 * ISA dma in theory)
	 */
	blk_queue_bounce(q, &bio);

	spin_lock_irq(q->queue_lock);

1187
	if (unlikely(bio_barrier(bio)) || elv_queue_empty(q))
Linus Torvalds's avatar
Linus Torvalds committed
1188 1189 1190 1191
		goto get_rq;

	el_ret = elv_merge(q, &req, bio);
	switch (el_ret) {
1192 1193
	case ELEVATOR_BACK_MERGE:
		BUG_ON(!rq_mergeable(req));
Linus Torvalds's avatar
Linus Torvalds committed
1194

1195 1196
		if (!ll_back_merge_fn(q, req, bio))
			break;
Linus Torvalds's avatar
Linus Torvalds committed
1197

1198
		trace_block_bio_backmerge(q, bio);
1199

1200 1201
		req->biotail->bi_next = bio;
		req->biotail = bio;
1202
		req->__data_len += bytes;
1203
		req->ioprio = ioprio_best(req->ioprio, prio);
1204 1205
		if (!blk_rq_cpu_valid(req))
			req->cpu = bio->bi_comp_cpu;
1206 1207 1208 1209
		drive_stat_acct(req, 0);
		if (!attempt_back_merge(q, req))
			elv_merged_request(q, req, el_ret);
		goto out;
Linus Torvalds's avatar
Linus Torvalds committed
1210

1211 1212
	case ELEVATOR_FRONT_MERGE:
		BUG_ON(!rq_mergeable(req));
Linus Torvalds's avatar
Linus Torvalds committed
1213

1214 1215
		if (!ll_front_merge_fn(q, req, bio))
			break;
Linus Torvalds's avatar
Linus Torvalds committed
1216

1217
		trace_block_bio_frontmerge(q, bio);
1218

1219 1220
		bio->bi_next = req->bio;
		req->bio = bio;
Linus Torvalds's avatar
Linus Torvalds committed
1221

1222 1223 1224 1225 1226 1227
		/*
		 * may not be valid. if the low level driver said
		 * it didn't need a bounce buffer then it better
		 * not touch req->buffer either...
		 */
		req->buffer = bio_data(bio);
1228 1229
		req->__sector = bio->bi_sector;
		req->__data_len += bytes;
1230
		req->ioprio = ioprio_best(req->ioprio, prio);
1231 1232
		if (!blk_rq_cpu_valid(req))
			req->cpu = bio->bi_comp_cpu;