deadline-iosched.c 11.7 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3
/*
 *  Deadline i/o scheduler.
 *
4
 *  Copyright (C) 2002 Jens Axboe <axboe@kernel.dk>
Linus Torvalds's avatar
Linus Torvalds committed
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
 */
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/elevator.h>
#include <linux/bio.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/compiler.h>
#include <linux/rbtree.h>

/*
 * See Documentation/block/deadline-iosched.txt
 */
20 21 22 23
static const int read_expire = HZ / 2;  /* max time before a read is submitted. */
static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
static const int writes_starved = 2;    /* max times reads can starve a write */
static const int fifo_batch = 16;       /* # of sequential requests treated as one
Linus Torvalds's avatar
Linus Torvalds committed
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
				     by the above parameters. For throughput. */

struct deadline_data {
	/*
	 * run time data
	 */

	/*
	 * requests (deadline_rq s) are present on both sort_list and fifo_list
	 */
	struct rb_root sort_list[2];	
	struct list_head fifo_list[2];
	
	/*
	 * next in sort order. read, write or both are NULL
	 */
40
	struct request *next_rq[2];
Linus Torvalds's avatar
Linus Torvalds committed
41 42 43 44 45 46 47 48 49 50 51 52 53
	unsigned int batching;		/* number of sequential requests made */
	sector_t last_sector;		/* head position */
	unsigned int starved;		/* times reads have starved writes */

	/*
	 * settings that change how the i/o scheduler behaves
	 */
	int fifo_expire[2];
	int fifo_batch;
	int writes_starved;
	int front_merges;
};

54
static void deadline_move_request(struct deadline_data *, struct request *);
Linus Torvalds's avatar
Linus Torvalds committed
55

56
#define RQ_RB_ROOT(dd, rq)	(&(dd)->sort_list[rq_data_dir((rq))])
Linus Torvalds's avatar
Linus Torvalds committed
57

58 59 60 61 62 63 64 65 66 67 68 69 70 71
/*
 * get the request after `rq' in sector-sorted order
 */
static inline struct request *
deadline_latter_request(struct request *rq)
{
	struct rb_node *node = rb_next(&rq->rb_node);

	if (node)
		return rb_entry_rq(node);

	return NULL;
}

Linus Torvalds's avatar
Linus Torvalds committed
72
static void
73
deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
74
{
75 76
	struct rb_root *root = RQ_RB_ROOT(dd, rq);
	struct request *__alias;
Linus Torvalds's avatar
Linus Torvalds committed
77 78

retry:
79 80
	__alias = elv_rb_add(root, rq);
	if (unlikely(__alias)) {
81
		deadline_move_request(dd, __alias);
82
		goto retry;
Linus Torvalds's avatar
Linus Torvalds committed
83 84 85 86
	}
}

static inline void
87
deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
88
{
89
	const int data_dir = rq_data_dir(rq);
Linus Torvalds's avatar
Linus Torvalds committed
90

91 92
	if (dd->next_rq[data_dir] == rq)
		dd->next_rq[data_dir] = deadline_latter_request(rq);
Linus Torvalds's avatar
Linus Torvalds committed
93

94
	elv_rb_del(RQ_RB_ROOT(dd, rq), rq);
Linus Torvalds's avatar
Linus Torvalds committed
95 96 97
}

/*
98
 * add rq to rbtree and fifo
Linus Torvalds's avatar
Linus Torvalds committed
99
 */
100
static void
Linus Torvalds's avatar
Linus Torvalds committed
101 102 103
deadline_add_request(struct request_queue *q, struct request *rq)
{
	struct deadline_data *dd = q->elevator->elevator_data;
104
	const int data_dir = rq_data_dir(rq);
Linus Torvalds's avatar
Linus Torvalds committed
105

106
	deadline_add_rq_rb(dd, rq);
107

Linus Torvalds's avatar
Linus Torvalds committed
108 109 110
	/*
	 * set expire time (only used for reads) and add to fifo list
	 */
111 112
	rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]);
	list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
Linus Torvalds's avatar
Linus Torvalds committed
113 114 115
}

/*
116
 * remove rq from rbtree and fifo.
Linus Torvalds's avatar
Linus Torvalds committed
117
 */
118
static void deadline_remove_request(struct request_queue *q, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
119
{
120
	struct deadline_data *dd = q->elevator->elevator_data;
Linus Torvalds's avatar
Linus Torvalds committed
121

122 123
	rq_fifo_clear(rq);
	deadline_del_rq_rb(dd, rq);
Linus Torvalds's avatar
Linus Torvalds committed
124 125 126
}

static int
127
deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
Linus Torvalds's avatar
Linus Torvalds committed
128 129 130 131 132 133 134 135 136
{
	struct deadline_data *dd = q->elevator->elevator_data;
	struct request *__rq;
	int ret;

	/*
	 * check for front merge
	 */
	if (dd->front_merges) {
137
		sector_t sector = bio->bi_sector + bio_sectors(bio);
Linus Torvalds's avatar
Linus Torvalds committed
138

139
		__rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
Linus Torvalds's avatar
Linus Torvalds committed
140
		if (__rq) {
141
			BUG_ON(sector != __rq->sector);
Linus Torvalds's avatar
Linus Torvalds committed
142 143 144 145 146 147 148 149 150 151 152 153 154 155

			if (elv_rq_merge_ok(__rq, bio)) {
				ret = ELEVATOR_FRONT_MERGE;
				goto out;
			}
		}
	}

	return ELEVATOR_NO_MERGE;
out:
	*req = __rq;
	return ret;
}

156 157
static void deadline_merged_request(struct request_queue *q,
				    struct request *req, int type)
Linus Torvalds's avatar
Linus Torvalds committed
158 159 160 161 162 163
{
	struct deadline_data *dd = q->elevator->elevator_data;

	/*
	 * if the merge was a front merge, we need to reposition request
	 */
164 165
	if (type == ELEVATOR_FRONT_MERGE) {
		elv_rb_del(RQ_RB_ROOT(dd, req), req);
166
		deadline_add_rq_rb(dd, req);
Linus Torvalds's avatar
Linus Torvalds committed
167 168 169 170
	}
}

static void
171
deadline_merged_requests(struct request_queue *q, struct request *req,
Linus Torvalds's avatar
Linus Torvalds committed
172 173 174
			 struct request *next)
{
	/*
175 176
	 * if next expires before rq, assign its expire time to rq
	 * and move into next position (next will be deleted) in fifo
Linus Torvalds's avatar
Linus Torvalds committed
177
	 */
178 179 180 181
	if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
		if (time_before(rq_fifo_time(next), rq_fifo_time(req))) {
			list_move(&req->queuelist, &next->queuelist);
			rq_set_fifo_time(req, rq_fifo_time(next));
Linus Torvalds's avatar
Linus Torvalds committed
182 183 184 185 186 187 188 189 190 191 192 193 194
		}
	}

	/*
	 * kill knowledge of next, this one is a goner
	 */
	deadline_remove_request(q, next);
}

/*
 * move request from sort list to dispatch queue.
 */
static inline void
195
deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
196
{
197
	struct request_queue *q = rq->q;
Linus Torvalds's avatar
Linus Torvalds committed
198

199 200
	deadline_remove_request(q, rq);
	elv_dispatch_add_tail(q, rq);
Linus Torvalds's avatar
Linus Torvalds committed
201 202 203 204 205 206
}

/*
 * move an entry to dispatch queue
 */
static void
207
deadline_move_request(struct deadline_data *dd, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
208
{
209
	const int data_dir = rq_data_dir(rq);
Linus Torvalds's avatar
Linus Torvalds committed
210

211 212
	dd->next_rq[READ] = NULL;
	dd->next_rq[WRITE] = NULL;
213
	dd->next_rq[data_dir] = deadline_latter_request(rq);
Linus Torvalds's avatar
Linus Torvalds committed
214

215
	dd->last_sector = rq->sector + rq->nr_sectors;
Linus Torvalds's avatar
Linus Torvalds committed
216 217 218 219 220

	/*
	 * take it off the sort and fifo list, move
	 * to dispatch queue
	 */
221
	deadline_move_to_dispatch(dd, rq);
Linus Torvalds's avatar
Linus Torvalds committed
222 223 224 225 226 227 228 229
}

/*
 * deadline_check_fifo returns 0 if there are no expired reads on the fifo,
 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
 */
static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
{
230
	struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next);
Linus Torvalds's avatar
Linus Torvalds committed
231 232

	/*
233
	 * rq is expired!
Linus Torvalds's avatar
Linus Torvalds committed
234
	 */
235
	if (time_after(jiffies, rq_fifo_time(rq)))
Linus Torvalds's avatar
Linus Torvalds committed
236 237 238 239 240 241 242 243 244
		return 1;

	return 0;
}

/*
 * deadline_dispatch_requests selects the best request according to
 * read/write expire, fifo_batch, etc
 */
245
static int deadline_dispatch_requests(struct request_queue *q, int force)
Linus Torvalds's avatar
Linus Torvalds committed
246
{
247
	struct deadline_data *dd = q->elevator->elevator_data;
Linus Torvalds's avatar
Linus Torvalds committed
248 249
	const int reads = !list_empty(&dd->fifo_list[READ]);
	const int writes = !list_empty(&dd->fifo_list[WRITE]);
250
	struct request *rq;
251
	int data_dir;
Linus Torvalds's avatar
Linus Torvalds committed
252 253 254 255

	/*
	 * batches are currently reads XOR writes
	 */
256 257
	if (dd->next_rq[WRITE])
		rq = dd->next_rq[WRITE];
258
	else
259
		rq = dd->next_rq[READ];
Linus Torvalds's avatar
Linus Torvalds committed
260

261
	if (rq) {
Linus Torvalds's avatar
Linus Torvalds committed
262 263
		/* we have a "next request" */
		
264
		if (dd->last_sector != rq->sector)
Linus Torvalds's avatar
Linus Torvalds committed
265 266 267 268 269 270 271 272 273 274 275 276 277 278
			/* end the batch on a non sequential request */
			dd->batching += dd->fifo_batch;
		
		if (dd->batching < dd->fifo_batch)
			/* we are still entitled to batch */
			goto dispatch_request;
	}

	/*
	 * at this point we are not running a batch. select the appropriate
	 * data direction (read / write)
	 */

	if (reads) {
279
		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
Linus Torvalds's avatar
Linus Torvalds committed
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294

		if (writes && (dd->starved++ >= dd->writes_starved))
			goto dispatch_writes;

		data_dir = READ;

		goto dispatch_find_request;
	}

	/*
	 * there are either no reads or writes have been starved
	 */

	if (writes) {
dispatch_writes:
295
		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE]));
Linus Torvalds's avatar
Linus Torvalds committed
296 297 298 299 300 301 302 303 304 305 306 307 308

		dd->starved = 0;

		data_dir = WRITE;

		goto dispatch_find_request;
	}

	return 0;

dispatch_find_request:
	/*
	 * we are not running a batch, find best request for selected data_dir
309
	 * and start a new batch
Linus Torvalds's avatar
Linus Torvalds committed
310 311 312
	 */
	if (deadline_check_fifo(dd, data_dir)) {
		/* An expired request exists - satisfy it */
313 314
		rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
	} else if (dd->next_rq[data_dir]) {
Linus Torvalds's avatar
Linus Torvalds committed
315 316 317 318
		/*
		 * The last req was the same dir and we have a next request in
		 * sort order. No expired requests so continue on from here.
		 */
319
		rq = dd->next_rq[data_dir];
Linus Torvalds's avatar
Linus Torvalds committed
320
	} else {
321
		struct rb_node *node;
Linus Torvalds's avatar
Linus Torvalds committed
322 323 324 325 326
		/*
		 * The last req was the other direction or we have run out of
		 * higher-sectored requests. Go back to the lowest sectored
		 * request (1 way elevator) and start a new batch.
		 */
327 328 329
		node = rb_first(&dd->sort_list[data_dir]);
		if (node)
			rq = rb_entry_rq(node);
Linus Torvalds's avatar
Linus Torvalds committed
330 331
	}

332 333
	dd->batching = 0;

Linus Torvalds's avatar
Linus Torvalds committed
334 335
dispatch_request:
	/*
336
	 * rq is the selected appropriate request.
Linus Torvalds's avatar
Linus Torvalds committed
337 338
	 */
	dd->batching++;
339
	deadline_move_request(dd, rq);
Linus Torvalds's avatar
Linus Torvalds committed
340 341 342 343

	return 1;
}

344
static int deadline_queue_empty(struct request_queue *q)
Linus Torvalds's avatar
Linus Torvalds committed
345 346 347
{
	struct deadline_data *dd = q->elevator->elevator_data;

348 349
	return list_empty(&dd->fifo_list[WRITE])
		&& list_empty(&dd->fifo_list[READ]);
Linus Torvalds's avatar
Linus Torvalds committed
350 351 352 353 354 355 356 357 358 359 360 361 362
}

static void deadline_exit_queue(elevator_t *e)
{
	struct deadline_data *dd = e->elevator_data;

	BUG_ON(!list_empty(&dd->fifo_list[READ]));
	BUG_ON(!list_empty(&dd->fifo_list[WRITE]));

	kfree(dd);
}

/*
363
 * initialize elevator private data (deadline_data).
Linus Torvalds's avatar
Linus Torvalds committed
364
 */
365
static void *deadline_init_queue(struct request_queue *q)
Linus Torvalds's avatar
Linus Torvalds committed
366 367 368
{
	struct deadline_data *dd;

369
	dd = kmalloc_node(sizeof(*dd), GFP_KERNEL | __GFP_ZERO, q->node);
Linus Torvalds's avatar
Linus Torvalds committed
370
	if (!dd)
Jens Axboe's avatar
Jens Axboe committed
371
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
372 373 374 375 376 377 378 379 380 381

	INIT_LIST_HEAD(&dd->fifo_list[READ]);
	INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
	dd->sort_list[READ] = RB_ROOT;
	dd->sort_list[WRITE] = RB_ROOT;
	dd->fifo_expire[READ] = read_expire;
	dd->fifo_expire[WRITE] = write_expire;
	dd->writes_starved = writes_starved;
	dd->front_merges = 1;
	dd->fifo_batch = fifo_batch;
Jens Axboe's avatar
Jens Axboe committed
382
	return dd;
Linus Torvalds's avatar
Linus Torvalds committed
383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404
}

/*
 * sysfs parts below
 */

static ssize_t
deadline_var_show(int var, char *page)
{
	return sprintf(page, "%d\n", var);
}

static ssize_t
deadline_var_store(int *var, const char *page, size_t count)
{
	char *p = (char *) page;

	*var = simple_strtol(p, &p, 10);
	return count;
}

#define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\
405
static ssize_t __FUNC(elevator_t *e, char *page)			\
Linus Torvalds's avatar
Linus Torvalds committed
406
{									\
407 408
	struct deadline_data *dd = e->elevator_data;			\
	int __data = __VAR;						\
Linus Torvalds's avatar
Linus Torvalds committed
409 410 411 412
	if (__CONV)							\
		__data = jiffies_to_msecs(__data);			\
	return deadline_var_show(__data, (page));			\
}
413 414 415 416 417
SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1);
SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1);
SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0);
SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0);
SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
Linus Torvalds's avatar
Linus Torvalds committed
418 419 420
#undef SHOW_FUNCTION

#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
421
static ssize_t __FUNC(elevator_t *e, const char *page, size_t count)	\
Linus Torvalds's avatar
Linus Torvalds committed
422
{									\
423
	struct deadline_data *dd = e->elevator_data;			\
Linus Torvalds's avatar
Linus Torvalds committed
424 425 426 427 428 429 430 431 432 433 434 435
	int __data;							\
	int ret = deadline_var_store(&__data, (page), count);		\
	if (__data < (MIN))						\
		__data = (MIN);						\
	else if (__data > (MAX))					\
		__data = (MAX);						\
	if (__CONV)							\
		*(__PTR) = msecs_to_jiffies(__data);			\
	else								\
		*(__PTR) = __data;					\
	return ret;							\
}
436 437 438 439 440
STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0);
STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
Linus Torvalds's avatar
Linus Torvalds committed
441 442
#undef STORE_FUNCTION

443 444 445 446 447 448 449 450 451 452 453
#define DD_ATTR(name) \
	__ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
				      deadline_##name##_store)

static struct elv_fs_entry deadline_attrs[] = {
	DD_ATTR(read_expire),
	DD_ATTR(write_expire),
	DD_ATTR(writes_starved),
	DD_ATTR(front_merges),
	DD_ATTR(fifo_batch),
	__ATTR_NULL
Linus Torvalds's avatar
Linus Torvalds committed
454 455 456 457 458 459 460
};

static struct elevator_type iosched_deadline = {
	.ops = {
		.elevator_merge_fn = 		deadline_merge,
		.elevator_merged_fn =		deadline_merged_request,
		.elevator_merge_req_fn =	deadline_merged_requests,
461 462
		.elevator_dispatch_fn =		deadline_dispatch_requests,
		.elevator_add_req_fn =		deadline_add_request,
Linus Torvalds's avatar
Linus Torvalds committed
463
		.elevator_queue_empty_fn =	deadline_queue_empty,
464 465
		.elevator_former_req_fn =	elv_rb_former_request,
		.elevator_latter_req_fn =	elv_rb_latter_request,
Linus Torvalds's avatar
Linus Torvalds committed
466 467 468 469
		.elevator_init_fn =		deadline_init_queue,
		.elevator_exit_fn =		deadline_exit_queue,
	},

470
	.elevator_attrs = deadline_attrs,
Linus Torvalds's avatar
Linus Torvalds committed
471 472 473 474 475 476
	.elevator_name = "deadline",
	.elevator_owner = THIS_MODULE,
};

static int __init deadline_init(void)
{
477
	return elv_register(&iosched_deadline);
Linus Torvalds's avatar
Linus Torvalds committed
478 479 480 481 482 483 484 485 486 487 488 489 490
}

static void __exit deadline_exit(void)
{
	elv_unregister(&iosched_deadline);
}

module_init(deadline_init);
module_exit(deadline_exit);

MODULE_AUTHOR("Jens Axboe");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("deadline IO scheduler");