dm-raid1.c 34.7 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2
/*
 * Copyright (C) 2003 Sistina Software Limited.
3
 * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
Linus Torvalds's avatar
Linus Torvalds committed
4 5 6 7
 *
 * This file is released under the GPL.
 */

8
#include "dm-bio-record.h"
Linus Torvalds's avatar
Linus Torvalds committed
9 10 11 12 13 14 15

#include <linux/init.h>
#include <linux/mempool.h>
#include <linux/module.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
16
#include <linux/device-mapper.h>
Alasdair G Kergon's avatar
Alasdair G Kergon committed
17 18 19
#include <linux/dm-io.h>
#include <linux/dm-dirty-log.h>
#include <linux/dm-kcopyd.h>
20
#include <linux/dm-region-hash.h>
Linus Torvalds's avatar
Linus Torvalds committed
21

22
#define DM_MSG_PREFIX "raid1"
23 24

#define MAX_RECOVERY 1	/* Maximum number of regions recovered in parallel. */
25

26 27
#define DM_RAID1_HANDLE_ERRORS	0x01
#define DM_RAID1_KEEP_LOG	0x02
28
#define errors_handled(p)	((p)->features & DM_RAID1_HANDLE_ERRORS)
29
#define keep_log(p)		((p)->features & DM_RAID1_KEEP_LOG)
30

31
static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped);
Linus Torvalds's avatar
Linus Torvalds committed
32

33 34 35
/*-----------------------------------------------------------------
 * Mirror set structures.
 *---------------------------------------------------------------*/
36 37
enum dm_raid1_error {
	DM_RAID1_WRITE_ERROR,
38
	DM_RAID1_FLUSH_ERROR,
39 40 41 42
	DM_RAID1_SYNC_ERROR,
	DM_RAID1_READ_ERROR
};

43
struct mirror {
44
	struct mirror_set *ms;
45
	atomic_t error_count;
Al Viro's avatar
Al Viro committed
46
	unsigned long error_type;
47 48 49 50 51 52 53
	struct dm_dev *dev;
	sector_t offset;
};

struct mirror_set {
	struct dm_target *ti;
	struct list_head list;
54

55
	uint64_t features;
56

57
	spinlock_t lock;	/* protects the lists */
58 59
	struct bio_list reads;
	struct bio_list writes;
60
	struct bio_list failures;
61
	struct bio_list holds;	/* bios are waiting until suspend */
62

63 64
	struct dm_region_hash *rh;
	struct dm_kcopyd_client *kcopyd_client;
65 66
	struct dm_io_client *io_client;

67 68 69
	/* recovery */
	region_t nr_regions;
	int in_sync;
70
	int log_failure;
71
	int leg_failure;
72
	atomic_t suspend;
73

74
	atomic_t default_mirror;	/* Default mirror */
75

76 77
	struct workqueue_struct *kmirrord_wq;
	struct work_struct kmirrord_work;
Mikulas Patocka's avatar
Mikulas Patocka committed
78 79 80
	struct timer_list timer;
	unsigned long timer_pending;

81
	struct work_struct trigger_event;
82

83
	unsigned nr_mirrors;
84 85 86
	struct mirror mirror[0];
};

87 88 89
DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(raid1_resync_throttle,
		"A percentage of time allocated for raid resynchronization");

90
static void wakeup_mirrord(void *context)
Linus Torvalds's avatar
Linus Torvalds committed
91
{
92
	struct mirror_set *ms = context;
Linus Torvalds's avatar
Linus Torvalds committed
93

94 95 96
	queue_work(ms->kmirrord_wq, &ms->kmirrord_work);
}

Mikulas Patocka's avatar
Mikulas Patocka committed
97 98 99 100 101
static void delayed_wake_fn(unsigned long data)
{
	struct mirror_set *ms = (struct mirror_set *) data;

	clear_bit(0, &ms->timer_pending);
102
	wakeup_mirrord(ms);
Mikulas Patocka's avatar
Mikulas Patocka committed
103 104 105 106 107 108 109 110 111 112 113 114 115
}

static void delayed_wake(struct mirror_set *ms)
{
	if (test_and_set_bit(0, &ms->timer_pending))
		return;

	ms->timer.expires = jiffies + HZ / 5;
	ms->timer.data = (unsigned long) ms;
	ms->timer.function = delayed_wake_fn;
	add_timer(&ms->timer);
}

116
static void wakeup_all_recovery_waiters(void *context)
Linus Torvalds's avatar
Linus Torvalds committed
117
{
118
	wake_up_all(&_kmirrord_recovery_stopped);
Linus Torvalds's avatar
Linus Torvalds committed
119 120
}

121
static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw)
Linus Torvalds's avatar
Linus Torvalds committed
122 123 124
{
	unsigned long flags;
	int should_wake = 0;
125
	struct bio_list *bl;
Linus Torvalds's avatar
Linus Torvalds committed
126

127 128 129 130 131
	bl = (rw == WRITE) ? &ms->writes : &ms->reads;
	spin_lock_irqsave(&ms->lock, flags);
	should_wake = !(bl->head);
	bio_list_add(bl, bio);
	spin_unlock_irqrestore(&ms->lock, flags);
Linus Torvalds's avatar
Linus Torvalds committed
132 133

	if (should_wake)
134
		wakeup_mirrord(ms);
Linus Torvalds's avatar
Linus Torvalds committed
135 136
}

137
static void dispatch_bios(void *context, struct bio_list *bio_list)
Linus Torvalds's avatar
Linus Torvalds committed
138
{
139 140
	struct mirror_set *ms = context;
	struct bio *bio;
Linus Torvalds's avatar
Linus Torvalds committed
141

142 143
	while ((bio = bio_list_pop(bio_list)))
		queue_bio(ms, bio, WRITE);
Linus Torvalds's avatar
Linus Torvalds committed
144 145
}

146
struct dm_raid1_bio_record {
147 148
	struct mirror *m;
	struct dm_bio_details details;
149
	region_t write_region;
150 151
};

Linus Torvalds's avatar
Linus Torvalds committed
152 153 154 155 156 157
/*
 * Every mirror should look like this one.
 */
#define DEFAULT_MIRROR 0

/*
158 159
 * This is yucky.  We squirrel the mirror struct away inside
 * bi_next for read/write buffers.  This is safe since the bh
Linus Torvalds's avatar
Linus Torvalds committed
160 161
 * doesn't get submitted to the lower levels of block layer.
 */
162
static struct mirror *bio_get_m(struct bio *bio)
Linus Torvalds's avatar
Linus Torvalds committed
163
{
164
	return (struct mirror *) bio->bi_next;
Linus Torvalds's avatar
Linus Torvalds committed
165 166
}

167
static void bio_set_m(struct bio *bio, struct mirror *m)
Linus Torvalds's avatar
Linus Torvalds committed
168
{
169
	bio->bi_next = (struct bio *) m;
Linus Torvalds's avatar
Linus Torvalds committed
170 171
}

172 173 174 175 176 177 178 179 180 181 182 183 184
static struct mirror *get_default_mirror(struct mirror_set *ms)
{
	return &ms->mirror[atomic_read(&ms->default_mirror)];
}

static void set_default_mirror(struct mirror *m)
{
	struct mirror_set *ms = m->ms;
	struct mirror *m0 = &(ms->mirror[0]);

	atomic_set(&ms->default_mirror, m - m0);
}

185 186 187 188 189 190 191 192 193 194 195
static struct mirror *get_valid_mirror(struct mirror_set *ms)
{
	struct mirror *m;

	for (m = ms->mirror; m < ms->mirror + ms->nr_mirrors; m++)
		if (!atomic_read(&m->error_count))
			return m;

	return NULL;
}

196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
/* fail_mirror
 * @m: mirror device to fail
 * @error_type: one of the enum's, DM_RAID1_*_ERROR
 *
 * If errors are being handled, record the type of
 * error encountered for this device.  If this type
 * of error has already been recorded, we can return;
 * otherwise, we must signal userspace by triggering
 * an event.  Additionally, if the device is the
 * primary device, we must choose a new primary, but
 * only if the mirror is in-sync.
 *
 * This function must not block.
 */
static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type)
{
	struct mirror_set *ms = m->ms;
	struct mirror *new;

215 216
	ms->leg_failure = 1;

217 218 219 220 221 222 223 224 225 226
	/*
	 * error_count is used for nothing more than a
	 * simple way to tell if a device has encountered
	 * errors.
	 */
	atomic_inc(&m->error_count);

	if (test_and_set_bit(error_type, &m->error_type))
		return;

Jonathan Brassow's avatar
Jonathan Brassow committed
227 228 229
	if (!errors_handled(ms))
		return;

230 231 232
	if (m != get_default_mirror(ms))
		goto out;

233
	if (!ms->in_sync && !keep_log(ms)) {
234 235 236 237 238 239 240 241 242
		/*
		 * Better to issue requests to same failing device
		 * than to risk returning corrupt data.
		 */
		DMERR("Primary mirror (%s) failed while out-of-sync: "
		      "Reads may fail.", m->dev->name);
		goto out;
	}

243 244 245 246
	new = get_valid_mirror(ms);
	if (new)
		set_default_mirror(new);
	else
247 248 249 250 251 252
		DMWARN("All sides of mirror have failed.");

out:
	schedule_work(&ms->trigger_event);
}

253 254 255 256 257 258 259 260 261
static int mirror_flush(struct dm_target *ti)
{
	struct mirror_set *ms = ti->private;
	unsigned long error_bits;

	unsigned int i;
	struct dm_io_region io[ms->nr_mirrors];
	struct mirror *m;
	struct dm_io_request io_req = {
Mike Christie's avatar
Mike Christie committed
262
		.bi_op = REQ_OP_WRITE,
263
		.bi_op_flags = REQ_PREFLUSH,
264
		.mem.type = DM_IO_KMEM,
Mike Snitzer's avatar
Mike Snitzer committed
265
		.mem.ptr.addr = NULL,
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
		.client = ms->io_client,
	};

	for (i = 0, m = ms->mirror; i < ms->nr_mirrors; i++, m++) {
		io[i].bdev = m->dev->bdev;
		io[i].sector = 0;
		io[i].count = 0;
	}

	error_bits = -1;
	dm_io(&io_req, ms->nr_mirrors, io, &error_bits);
	if (unlikely(error_bits != 0)) {
		for (i = 0; i < ms->nr_mirrors; i++)
			if (test_bit(i, &error_bits))
				fail_mirror(ms->mirror + i,
281
					    DM_RAID1_FLUSH_ERROR);
282 283 284 285 286 287
		return -EIO;
	}

	return 0;
}

Linus Torvalds's avatar
Linus Torvalds committed
288 289 290 291 292 293 294
/*-----------------------------------------------------------------
 * Recovery.
 *
 * When a mirror is first activated we may find that some regions
 * are in the no-sync state.  We have to recover these by
 * recopying from the default mirror to all the others.
 *---------------------------------------------------------------*/
295
static void recovery_complete(int read_err, unsigned long write_err,
Linus Torvalds's avatar
Linus Torvalds committed
296 297
			      void *context)
{
298 299
	struct dm_region *reg = context;
	struct mirror_set *ms = dm_rh_region_context(reg);
300
	int m, bit = 0;
Linus Torvalds's avatar
Linus Torvalds committed
301

302
	if (read_err) {
303 304
		/* Read error means the failure of default mirror. */
		DMERR_LIMIT("Unable to read primary mirror during recovery");
305 306
		fail_mirror(get_default_mirror(ms), DM_RAID1_SYNC_ERROR);
	}
307

308
	if (write_err) {
309
		DMERR_LIMIT("Write error during recovery (error = 0x%lx)",
310
			    write_err);
311 312 313 314 315 316 317 318 319 320 321 322 323
		/*
		 * Bits correspond to devices (excluding default mirror).
		 * The default mirror cannot change during recovery.
		 */
		for (m = 0; m < ms->nr_mirrors; m++) {
			if (&ms->mirror[m] == get_default_mirror(ms))
				continue;
			if (test_bit(bit, &write_err))
				fail_mirror(ms->mirror + m,
					    DM_RAID1_SYNC_ERROR);
			bit++;
		}
	}
324

325
	dm_rh_recovery_end(reg, !(read_err || write_err));
Linus Torvalds's avatar
Linus Torvalds committed
326 327
}

328
static int recover(struct mirror_set *ms, struct dm_region *reg)
Linus Torvalds's avatar
Linus Torvalds committed
329 330
{
	int r;
331
	unsigned i;
332
	struct dm_io_region from, to[DM_KCOPYD_MAX_REGIONS], *dest;
Linus Torvalds's avatar
Linus Torvalds committed
333 334
	struct mirror *m;
	unsigned long flags = 0;
335 336
	region_t key = dm_rh_get_region_key(reg);
	sector_t region_size = dm_rh_get_region_size(ms->rh);
Linus Torvalds's avatar
Linus Torvalds committed
337 338

	/* fill in the source */
339
	m = get_default_mirror(ms);
Linus Torvalds's avatar
Linus Torvalds committed
340
	from.bdev = m->dev->bdev;
341 342
	from.sector = m->offset + dm_rh_region_to_sector(ms->rh, key);
	if (key == (ms->nr_regions - 1)) {
Linus Torvalds's avatar
Linus Torvalds committed
343 344 345 346
		/*
		 * The final region may be smaller than
		 * region_size.
		 */
347
		from.count = ms->ti->len & (region_size - 1);
Linus Torvalds's avatar
Linus Torvalds committed
348
		if (!from.count)
349
			from.count = region_size;
Linus Torvalds's avatar
Linus Torvalds committed
350
	} else
351
		from.count = region_size;
Linus Torvalds's avatar
Linus Torvalds committed
352 353 354

	/* fill in the destinations */
	for (i = 0, dest = to; i < ms->nr_mirrors; i++) {
355
		if (&ms->mirror[i] == get_default_mirror(ms))
Linus Torvalds's avatar
Linus Torvalds committed
356 357 358 359
			continue;

		m = ms->mirror + i;
		dest->bdev = m->dev->bdev;
360
		dest->sector = m->offset + dm_rh_region_to_sector(ms->rh, key);
Linus Torvalds's avatar
Linus Torvalds committed
361 362 363 364 365
		dest->count = from.count;
		dest++;
	}

	/* hand to kcopyd */
366 367 368
	if (!errors_handled(ms))
		set_bit(DM_KCOPYD_IGNORE_ERROR, &flags);

369 370
	r = dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to,
			   flags, recovery_complete, reg);
Linus Torvalds's avatar
Linus Torvalds committed
371 372 373 374

	return r;
}

375 376 377 378 379 380 381 382 383 384 385
static void reset_ms_flags(struct mirror_set *ms)
{
	unsigned int m;

	ms->leg_failure = 0;
	for (m = 0; m < ms->nr_mirrors; m++) {
		atomic_set(&(ms->mirror[m].error_count), 0);
		ms->mirror[m].error_type = 0;
	}
}

Linus Torvalds's avatar
Linus Torvalds committed
386 387
static void do_recovery(struct mirror_set *ms)
{
388 389
	struct dm_region *reg;
	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
Linus Torvalds's avatar
Linus Torvalds committed
390 391 392 393 394
	int r;

	/*
	 * Start quiescing some regions.
	 */
395
	dm_rh_recovery_prepare(ms->rh);
Linus Torvalds's avatar
Linus Torvalds committed
396 397 398 399

	/*
	 * Copy any already quiesced regions.
	 */
400
	while ((reg = dm_rh_recovery_start(ms->rh))) {
Linus Torvalds's avatar
Linus Torvalds committed
401 402
		r = recover(ms, reg);
		if (r)
403
			dm_rh_recovery_end(reg, 0);
Linus Torvalds's avatar
Linus Torvalds committed
404 405 406 407 408 409 410 411 412 413
	}

	/*
	 * Update the in sync flag.
	 */
	if (!ms->in_sync &&
	    (log->type->get_sync_count(log) == ms->nr_regions)) {
		/* the sync is complete */
		dm_table_event(ms->ti->table);
		ms->in_sync = 1;
414
		reset_ms_flags(ms);
Linus Torvalds's avatar
Linus Torvalds committed
415 416 417 418 419 420 421 422
	}
}

/*-----------------------------------------------------------------
 * Reads
 *---------------------------------------------------------------*/
static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector)
{
423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444
	struct mirror *m = get_default_mirror(ms);

	do {
		if (likely(!atomic_read(&m->error_count)))
			return m;

		if (m-- == ms->mirror)
			m += ms->nr_mirrors;
	} while (m != get_default_mirror(ms));

	return NULL;
}

static int default_ok(struct mirror *m)
{
	struct mirror *default_mirror = get_default_mirror(m->ms);

	return !atomic_read(&default_mirror->error_count);
}

static int mirror_available(struct mirror_set *ms, struct bio *bio)
{
445 446
	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
	region_t region = dm_rh_bio_to_region(ms->rh, bio);
447

448
	if (log->type->in_sync(log, region, 0))
449
		return choose_mirror(ms,  bio->bi_iter.bi_sector) ? 1 : 0;
450 451

	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
452 453 454 455 456
}

/*
 * remap a buffer to a particular mirror.
 */
457 458
static sector_t map_sector(struct mirror *m, struct bio *bio)
{
459
	if (unlikely(!bio->bi_iter.bi_size))
Mikulas Patocka's avatar
Mikulas Patocka committed
460
		return 0;
461
	return m->offset + dm_target_offset(m->ms->ti, bio->bi_iter.bi_sector);
462 463 464
}

static void map_bio(struct mirror *m, struct bio *bio)
Linus Torvalds's avatar
Linus Torvalds committed
465 466
{
	bio->bi_bdev = m->dev->bdev;
467
	bio->bi_iter.bi_sector = map_sector(m, bio);
468 469
}

Heinz Mauelshagen's avatar
Heinz Mauelshagen committed
470
static void map_region(struct dm_io_region *io, struct mirror *m,
471 472 473 474
		       struct bio *bio)
{
	io->bdev = m->dev->bdev;
	io->sector = map_sector(m, bio);
475
	io->count = bio_sectors(bio);
476 477
}

478 479 480
static void hold_bio(struct mirror_set *ms, struct bio *bio)
{
	/*
481 482
	 * Lock is required to avoid race condition during suspend
	 * process.
483
	 */
484 485
	spin_lock_irq(&ms->lock);

486
	if (atomic_read(&ms->suspend)) {
487 488 489 490 491
		spin_unlock_irq(&ms->lock);

		/*
		 * If device is suspended, complete the bio.
		 */
492
		if (dm_noflush_suspending(ms->ti))
493
			bio->bi_error = DM_ENDIO_REQUEUE;
494
		else
495 496 497
			bio->bi_error = -EIO;

		bio_endio(bio);
498 499 500 501 502 503 504 505 506 507
		return;
	}

	/*
	 * Hold bio until the suspend is complete.
	 */
	bio_list_add(&ms->holds, bio);
	spin_unlock_irq(&ms->lock);
}

508 509 510 511 512 513 514 515 516 517 518 519
/*-----------------------------------------------------------------
 * Reads
 *---------------------------------------------------------------*/
static void read_callback(unsigned long error, void *context)
{
	struct bio *bio = context;
	struct mirror *m;

	m = bio_get_m(bio);
	bio_set_m(bio, NULL);

	if (likely(!error)) {
520
		bio_endio(bio);
521 522 523 524 525 526 527 528 529
		return;
	}

	fail_mirror(m, DM_RAID1_READ_ERROR);

	if (likely(default_ok(m)) || mirror_available(m->ms, bio)) {
		DMWARN_LIMIT("Read failure on mirror device %s.  "
			     "Trying alternative device.",
			     m->dev->name);
530
		queue_bio(m->ms, bio, bio_data_dir(bio));
531 532 533 534 535
		return;
	}

	DMERR_LIMIT("Read failure on mirror device %s.  Failing I/O.",
		    m->dev->name);
536
	bio_io_error(bio);
537 538 539 540 541
}

/* Asynchronous read. */
static void read_async_bio(struct mirror *m, struct bio *bio)
{
Heinz Mauelshagen's avatar
Heinz Mauelshagen committed
542
	struct dm_io_region io;
543
	struct dm_io_request io_req = {
Mike Christie's avatar
Mike Christie committed
544 545
		.bi_op = REQ_OP_READ,
		.bi_op_flags = 0,
546 547
		.mem.type = DM_IO_BIO,
		.mem.ptr.bio = bio,
548 549 550 551 552 553 554
		.notify.fn = read_callback,
		.notify.context = bio,
		.client = m->ms->io_client,
	};

	map_region(&io, m, bio);
	bio_set_m(bio, m);
555 556 557 558 559 560 561 562
	BUG_ON(dm_io(&io_req, 1, &io, NULL));
}

static inline int region_in_sync(struct mirror_set *ms, region_t region,
				 int may_block)
{
	int state = dm_rh_get_state(ms->rh, region, may_block);
	return state == DM_RH_CLEAN || state == DM_RH_DIRTY;
Linus Torvalds's avatar
Linus Torvalds committed
563 564 565 566 567 568 569 570 571
}

static void do_reads(struct mirror_set *ms, struct bio_list *reads)
{
	region_t region;
	struct bio *bio;
	struct mirror *m;

	while ((bio = bio_list_pop(reads))) {
572
		region = dm_rh_bio_to_region(ms->rh, bio);
573
		m = get_default_mirror(ms);
Linus Torvalds's avatar
Linus Torvalds committed
574 575 576 577

		/*
		 * We can only read balance if the region is in sync.
		 */
578
		if (likely(region_in_sync(ms, region, 1)))
579
			m = choose_mirror(ms, bio->bi_iter.bi_sector);
580 581
		else if (m && atomic_read(&m->error_count))
			m = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
582

583 584 585
		if (likely(m))
			read_async_bio(m, bio);
		else
586
			bio_io_error(bio);
Linus Torvalds's avatar
Linus Torvalds committed
587 588 589 590 591 592 593 594 595 596 597 598 599
	}
}

/*-----------------------------------------------------------------
 * Writes.
 *
 * We do different things with the write io depending on the
 * state of the region that it's in:
 *
 * SYNC: 	increment pending, use kcopyd to write to *all* mirrors
 * RECOVERING:	delay the io until recovery completes
 * NOSYNC:	increment pending, just write to the default mirror
 *---------------------------------------------------------------*/
600 601


Linus Torvalds's avatar
Linus Torvalds committed
602 603
static void write_callback(unsigned long error, void *context)
{
604
	unsigned i;
Linus Torvalds's avatar
Linus Torvalds committed
605 606
	struct bio *bio = (struct bio *) context;
	struct mirror_set *ms;
607 608
	int should_wake = 0;
	unsigned long flags;
Linus Torvalds's avatar
Linus Torvalds committed
609

610 611
	ms = bio_get_m(bio)->ms;
	bio_set_m(bio, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
612 613 614 615 616 617 618

	/*
	 * NOTE: We don't decrement the pending count here,
	 * instead it is done by the targets endio function.
	 * This way we handle both writes to SYNC and NOSYNC
	 * regions with the same code.
	 */
619
	if (likely(!error)) {
620
		bio_endio(bio);
621 622
		return;
	}
Linus Torvalds's avatar
Linus Torvalds committed
623

624 625 626 627
	/*
	 * If the bio is discard, return an error, but do not
	 * degrade the array.
	 */
Mike Christie's avatar
Mike Christie committed
628
	if (bio_op(bio) == REQ_OP_DISCARD) {
629 630
		bio->bi_error = -EOPNOTSUPP;
		bio_endio(bio);
631 632 633
		return;
	}

634 635 636 637
	for (i = 0; i < ms->nr_mirrors; i++)
		if (test_bit(i, &error))
			fail_mirror(ms->mirror + i, DM_RAID1_WRITE_ERROR);

638 639 640 641 642 643 644 645 646 647 648 649
	/*
	 * Need to raise event.  Since raising
	 * events can block, we need to do it in
	 * the main thread.
	 */
	spin_lock_irqsave(&ms->lock, flags);
	if (!ms->failures.head)
		should_wake = 1;
	bio_list_add(&ms->failures, bio);
	spin_unlock_irqrestore(&ms->lock, flags);
	if (should_wake)
		wakeup_mirrord(ms);
Linus Torvalds's avatar
Linus Torvalds committed
650 651 652 653 654
}

static void do_write(struct mirror_set *ms, struct bio *bio)
{
	unsigned int i;
Heinz Mauelshagen's avatar
Heinz Mauelshagen committed
655
	struct dm_io_region io[ms->nr_mirrors], *dest = io;
Linus Torvalds's avatar
Linus Torvalds committed
656
	struct mirror *m;
657
	struct dm_io_request io_req = {
Mike Christie's avatar
Mike Christie committed
658
		.bi_op = REQ_OP_WRITE,
659
		.bi_op_flags = bio->bi_opf & (REQ_FUA | REQ_PREFLUSH),
660 661
		.mem.type = DM_IO_BIO,
		.mem.ptr.bio = bio,
662 663 664 665
		.notify.fn = write_callback,
		.notify.context = bio,
		.client = ms->io_client,
	};
Linus Torvalds's avatar
Linus Torvalds committed
666

Mike Christie's avatar
Mike Christie committed
667 668
	if (bio_op(bio) == REQ_OP_DISCARD) {
		io_req.bi_op = REQ_OP_DISCARD;
Mike Snitzer's avatar
Mike Snitzer committed
669 670 671 672
		io_req.mem.type = DM_IO_KMEM;
		io_req.mem.ptr.addr = NULL;
	}

673 674
	for (i = 0, m = ms->mirror; i < ms->nr_mirrors; i++, m++)
		map_region(dest++, m, bio);
Linus Torvalds's avatar
Linus Torvalds committed
675

676 677 678 679 680
	/*
	 * Use default mirror because we only need it to retrieve the reference
	 * to the mirror set in write_callback().
	 */
	bio_set_m(bio, get_default_mirror(ms));
681

682
	BUG_ON(dm_io(&io_req, ms->nr_mirrors, io, NULL));
Linus Torvalds's avatar
Linus Torvalds committed
683 684 685 686 687 688 689
}

static void do_writes(struct mirror_set *ms, struct bio_list *writes)
{
	int state;
	struct bio *bio;
	struct bio_list sync, nosync, recover, *this_list = NULL;
690 691 692
	struct bio_list requeue;
	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
	region_t region;
Linus Torvalds's avatar
Linus Torvalds committed
693 694 695 696 697 698 699 700 701 702

	if (!writes->head)
		return;

	/*
	 * Classify each write.
	 */
	bio_list_init(&sync);
	bio_list_init(&nosync);
	bio_list_init(&recover);
703
	bio_list_init(&requeue);
Linus Torvalds's avatar
Linus Torvalds committed
704 705

	while ((bio = bio_list_pop(writes))) {
706
		if ((bio->bi_opf & REQ_PREFLUSH) ||
Mike Christie's avatar
Mike Christie committed
707
		    (bio_op(bio) == REQ_OP_DISCARD)) {
Mikulas Patocka's avatar
Mikulas Patocka committed
708 709 710 711
			bio_list_add(&sync, bio);
			continue;
		}

712 713 714 715 716 717 718 719 720
		region = dm_rh_bio_to_region(ms->rh, bio);

		if (log->type->is_remote_recovering &&
		    log->type->is_remote_recovering(log, region)) {
			bio_list_add(&requeue, bio);
			continue;
		}

		state = dm_rh_get_state(ms->rh, region, 1);
Linus Torvalds's avatar
Linus Torvalds committed
721
		switch (state) {
722 723
		case DM_RH_CLEAN:
		case DM_RH_DIRTY:
Linus Torvalds's avatar
Linus Torvalds committed
724 725 726
			this_list = &sync;
			break;

727
		case DM_RH_NOSYNC:
Linus Torvalds's avatar
Linus Torvalds committed
728 729 730
			this_list = &nosync;
			break;

731
		case DM_RH_RECOVERING:
Linus Torvalds's avatar
Linus Torvalds committed
732 733 734 735 736 737 738
			this_list = &recover;
			break;
		}

		bio_list_add(this_list, bio);
	}

739 740 741 742 743 744 745 746
	/*
	 * Add bios that are delayed due to remote recovery
	 * back on to the write queue
	 */
	if (unlikely(requeue.head)) {
		spin_lock_irq(&ms->lock);
		bio_list_merge(&ms->writes, &requeue);
		spin_unlock_irq(&ms->lock);
747
		delayed_wake(ms);
748 749
	}

Linus Torvalds's avatar
Linus Torvalds committed
750 751 752 753 754
	/*
	 * Increment the pending counts for any regions that will
	 * be written to (writes to recover regions are going to
	 * be delayed).
	 */
755 756
	dm_rh_inc_pending(ms->rh, &sync);
	dm_rh_inc_pending(ms->rh, &nosync);
757 758 759 760 761 762 763

	/*
	 * If the flush fails on a previous call and succeeds here,
	 * we must not reset the log_failure variable.  We need
	 * userspace interaction to do that.
	 */
	ms->log_failure = dm_rh_flush(ms->rh) ? 1 : ms->log_failure;
Linus Torvalds's avatar
Linus Torvalds committed
764 765 766 767

	/*
	 * Dispatch io.
	 */
768
	if (unlikely(ms->log_failure) && errors_handled(ms)) {
769 770 771
		spin_lock_irq(&ms->lock);
		bio_list_merge(&ms->failures, &sync);
		spin_unlock_irq(&ms->lock);
772
		wakeup_mirrord(ms);
773
	} else
774
		while ((bio = bio_list_pop(&sync)))
775
			do_write(ms, bio);
Linus Torvalds's avatar
Linus Torvalds committed
776 777

	while ((bio = bio_list_pop(&recover)))
778
		dm_rh_delay(ms->rh, bio);
Linus Torvalds's avatar
Linus Torvalds committed
779 780

	while ((bio = bio_list_pop(&nosync))) {
781
		if (unlikely(ms->leg_failure) && errors_handled(ms) && !keep_log(ms)) {
782 783 784 785 786
			spin_lock_irq(&ms->lock);
			bio_list_add(&ms->failures, bio);
			spin_unlock_irq(&ms->lock);
			wakeup_mirrord(ms);
		} else {
787 788 789
			map_bio(get_default_mirror(ms), bio);
			generic_make_request(bio);
		}
Linus Torvalds's avatar
Linus Torvalds committed
790 791 792
	}
}

793 794 795 796
static void do_failures(struct mirror_set *ms, struct bio_list *failures)
{
	struct bio *bio;

797
	if (likely(!failures->head))
798 799
		return;

800 801
	/*
	 * If the log has failed, unattempted writes are being
802
	 * put on the holds list.  We can't issue those writes
803 804 805 806 807 808 809 810 811 812 813 814 815 816
	 * until a log has been marked, so we must store them.
	 *
	 * If a 'noflush' suspend is in progress, we can requeue
	 * the I/O's to the core.  This give userspace a chance
	 * to reconfigure the mirror, at which point the core
	 * will reissue the writes.  If the 'noflush' flag is
	 * not set, we have no choice but to return errors.
	 *
	 * Some writes on the failures list may have been
	 * submitted before the log failure and represent a
	 * failure to write to one of the devices.  It is ok
	 * for us to treat them the same and requeue them
	 * as well.
	 */
817
	while ((bio = bio_list_pop(failures))) {
818
		if (!ms->log_failure) {
819
			ms->in_sync = 0;
820
			dm_rh_mark_nosync(ms->rh, bio);
821
		}
822 823 824

		/*
		 * If all the legs are dead, fail the I/O.
825 826 827 828 829 830 831
		 * If the device has failed and keep_log is enabled,
		 * fail the I/O.
		 *
		 * If we have been told to handle errors, and keep_log
		 * isn't enabled, hold the bio and wait for userspace to
		 * deal with the problem.
		 *
832 833 834 835
		 * Otherwise pretend that the I/O succeeded. (This would
		 * be wrong if the failed leg returned after reboot and
		 * got replicated back to the good legs.)
		 */
836
		if (unlikely(!get_valid_mirror(ms) || (keep_log(ms) && ms->log_failure)))
837
			bio_io_error(bio);
838
		else if (errors_handled(ms) && !keep_log(ms))
839 840
			hold_bio(ms, bio);
		else
841
			bio_endio(bio);
842
	}
843 844 845 846 847 848 849 850 851 852
}

static void trigger_event(struct work_struct *work)
{
	struct mirror_set *ms =
		container_of(work, struct mirror_set, trigger_event);

	dm_table_event(ms->ti->table);
}

Linus Torvalds's avatar
Linus Torvalds committed
853 854 855
/*-----------------------------------------------------------------
 * kmirrord
 *---------------------------------------------------------------*/
Mikulas Patocka's avatar
Mikulas Patocka committed
856
static void do_mirror(struct work_struct *work)
Linus Torvalds's avatar
Linus Torvalds committed
857
{
858 859
	struct mirror_set *ms = container_of(work, struct mirror_set,
					     kmirrord_work);
860 861
	struct bio_list reads, writes, failures;
	unsigned long flags;
Linus Torvalds's avatar
Linus Torvalds committed
862

863
	spin_lock_irqsave(&ms->lock, flags);
Linus Torvalds's avatar
Linus Torvalds committed
864 865
	reads = ms->reads;
	writes = ms->writes;
866
	failures = ms->failures;
Linus Torvalds's avatar
Linus Torvalds committed
867 868
	bio_list_init(&ms->reads);
	bio_list_init(&ms->writes);
869 870
	bio_list_init(&ms->failures);
	spin_unlock_irqrestore(&ms->lock, flags);
Linus Torvalds's avatar
Linus Torvalds committed
871

872
	dm_rh_update_states(ms->rh, errors_handled(ms));
Linus Torvalds's avatar
Linus Torvalds committed
873 874 875
	do_recovery(ms);
	do_reads(ms, &reads);
	do_writes(ms, &writes);
876
	do_failures(ms, &failures);
Linus Torvalds's avatar
Linus Torvalds committed
877 878 879 880 881 882 883 884
}

/*-----------------------------------------------------------------
 * Target functions
 *---------------------------------------------------------------*/
static struct mirror_set *alloc_context(unsigned int nr_mirrors,
					uint32_t region_size,
					struct dm_target *ti,
Heinz Mauelshagen's avatar
Heinz Mauelshagen committed
885
					struct dm_dirty_log *dl)
Linus Torvalds's avatar
Linus Torvalds committed
886 887 888 889 890 891
{
	size_t len;
	struct mirror_set *ms = NULL;

	len = sizeof(*ms) + (sizeof(ms->mirror[0]) * nr_mirrors);

892
	ms = kzalloc(len, GFP_KERNEL);
Linus Torvalds's avatar
Linus Torvalds committed
893
	if (!ms) {
894
		ti->error = "Cannot allocate mirror context";
Linus Torvalds's avatar
Linus Torvalds committed
895 896 897 898
		return NULL;
	}

	spin_lock_init(&ms->lock);
899 900 901 902
	bio_list_init(&ms->reads);
	bio_list_init(&ms->writes);
	bio_list_init(&ms->failures);
	bio_list_init(&ms->holds);
Linus Torvalds's avatar
Linus Torvalds committed
903 904 905 906 907

	ms->ti = ti;
	ms->nr_mirrors = nr_mirrors;
	ms->nr_regions = dm_sector_div_up(ti->len, region_size);
	ms->in_sync = 0;
908
	ms->log_failure = 0;
909
	ms->leg_failure = 0;
910
	atomic_set(&ms->suspend, 0);
911
	atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
Linus Torvalds's avatar
Linus Torvalds committed
912

913
	ms->io_client = dm_io_client_create();
914 915 916 917 918 919
	if (IS_ERR(ms->io_client)) {
		ti->error = "Error creating dm_io client";
		kfree(ms);
 		return NULL;
	}

920 921 922 923 924
	ms->rh = dm_region_hash_create(ms, dispatch_bios, wakeup_mirrord,
				       wakeup_all_recovery_waiters,
				       ms->ti->begin, MAX_RECOVERY,
				       dl, region_size, ms->nr_regions);
	if (IS_ERR(ms->rh)) {
925
		ti->error = "Error creating dirty region hash";
Dmitry Monakhov's avatar
Dmitry Monakhov committed
926
		dm_io_client_destroy(ms->io_client);
Linus Torvalds's avatar
Linus Torvalds committed
927 928 929 930 931 932 933 934 935 936 937 938 939
		kfree(ms);
		return NULL;
	}

	return ms;
}

static void free_context(struct mirror_set *ms, struct dm_target *ti,
			 unsigned int m)
{
	while (m--)
		dm_put_device(ti, ms->mirror[m].dev);

940
	dm_io_client_destroy(ms->io_client);
941
	dm_region_hash_destroy(ms->rh);
Linus Torvalds's avatar
Linus Torvalds committed
942 943 944 945 946 947
	kfree(ms);
}

static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
		      unsigned int mirror, char **argv)
{
948
	unsigned long long offset;
949
	char dummy;
950
	int ret;
Linus Torvalds's avatar
Linus Torvalds committed
951

952
	if (sscanf(argv[1], "%llu%c", &offset, &dummy) != 1) {
953
		ti->error = "Invalid offset";
Linus Torvalds's avatar
Linus Torvalds committed
954 955 956
		return -EINVAL;
	}

957 958 959
	ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
			    &ms->mirror[mirror].dev);
	if (ret) {
960
		ti->error = "Device lookup failure";
961
		return ret;
Linus Torvalds's avatar
Linus Torvalds committed
962 963
	}

964
	ms->mirror[mirror].ms = ms;
965 966
	atomic_set(&(ms->mirror[mirror].error_count), 0);
	ms->mirror[mirror].error_type = 0;
Linus Torvalds's avatar
Linus Torvalds committed
967 968 969 970 971 972 973 974
	ms->mirror[mirror].offset = offset;

	return 0;
}

/*
 * Create dirty log: log_type #log_params <log_params>
 */
Heinz Mauelshagen's avatar
Heinz Mauelshagen committed
975
static struct dm_dirty_log *create_dirty_log(struct dm_target *ti,
976 977
					     unsigned argc, char **argv,
					     unsigned *args_used)
Linus Torvalds's avatar
Linus Torvalds committed
978
{
979
	unsigned param_count;
Heinz Mauelshagen's avatar
Heinz Mauelshagen committed
980
	struct dm_dirty_log *dl;
981
	char dummy;
Linus Torvalds's avatar
Linus Torvalds committed
982 983

	if (argc < 2) {
984
		ti->error = "Insufficient mirror log arguments";
Linus Torvalds's avatar
Linus Torvalds committed
985 986 987
		return NULL;
	}

988
	if (sscanf(argv[1], "%u%c", &param_count, &dummy) != 1) {
989
		ti->error = "Invalid mirror log argument count";
Linus Torvalds's avatar
Linus Torvalds committed
990 991 992 993 994 995
		return NULL;
	}

	*args_used = 2 + param_count;

	if (argc < *args_used) {
996
		ti->error = "Insufficient mirror log arguments";
Linus Torvalds's avatar
Linus Torvalds committed
997 998 999
		return NULL;
	}

1000 1001
	dl = dm_dirty_log_create(argv[0], ti, mirror_flush, param_count,
				 argv + 2);
Linus Torvalds's avatar
Linus Torvalds committed
1002
	if (!dl) {
1003
		ti->error = "Error creating mirror dirty log";
Linus Torvalds's avatar
Linus Torvalds committed
1004 1005 1006 1007 1008 1009
		return NULL;
	}

	return dl;
}

1010 1011 1012 1013 1014
static int parse_features(struct mirror_set *ms, unsigned argc, char **argv,
			  unsigned *args_used)
{
	unsigned num_features;
	struct dm_target *ti = ms->ti;
1015
	char dummy;
1016
	int i;
1017 1018 1019 1020 1021 1022

	*args_used = 0;

	if (!argc)
		return 0;

1023
	if (sscanf(argv[0], "%u%c", &num_features, &dummy) != 1) {
1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036
		ti->error = "Invalid number of features";
		return -EINVAL;
	}

	argc--;
	argv++;
	(*args_used)++;

	if (num_features > argc) {
		ti->error = "Not enough arguments to support feature count";
		return -EINVAL;
	}

1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052
	for (i = 0; i < num_features; i++) {
		if (!strcmp("handle_errors", argv[0]))
			ms->features |= DM_RAID1_HANDLE_ERRORS;
		else if (!strcmp("keep_log", argv[0]))
			ms->features |= DM_RAID1_KEEP_LOG;
		else {
			ti->error = "Unrecognised feature requested";
			return -EINVAL;
		}

		argc--;
		argv++;
		(*args_used)++;
	}
	if (!errors_handled(ms) && keep_log(ms)) {
		ti->error = "keep_log feature requires the handle_errors feature";
1053 1054 1055 1056 1057 1058
		return -EINVAL;
	}

	return 0;
}

Linus Torvalds's avatar
Linus Torvalds committed
1059 1060 1061 1062 1063
/*
 * Construct a mirror mapping:
 *
 * log_type #log_params <log_params>
 * #mirrors [mirror_path offset]{2,}
1064
 * [#features <features>]
Linus Torvalds's avatar
Linus Torvalds committed
1065 1066 1067
 *
 * log_type is "core" or "disk"
 * #log_params is between 1 and 3
1068
 *
1069
 * If present, supported features are "handle_errors" and "keep_log".
Linus Torvalds's avatar
Linus Torvalds committed
1070 1071 1072 1073 1074 1075
 */
static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
	int r;
	unsigned int nr_mirrors, m, args_used;
	struct mirror_set *ms;
Heinz Mauelshagen's avatar
Heinz Mauelshagen committed
1076
	struct dm_dirty_log *dl;
1077
	char dummy;
Linus Torvalds's avatar
Linus Torvalds committed
1078 1079 1080 1081 1082 1083 1084 1085

	dl = create_dirty_log(ti, argc, argv, &args_used);
	if (!dl)
		return -EINVAL;

	argv += args_used;
	argc -= args_used;

1086
	if (!argc || sscanf(argv[0], "%u%c", &nr_mirrors, &dummy) != 1 ||
1087
	    nr_mirrors < 2 || nr_mirrors > DM_KCOPYD_MAX_REGIONS + 1) {
1088
		ti->error = "Invalid number of mirrors";
Heinz Mauelshagen's avatar
Heinz Mauelshagen committed
1089
		dm_dirty_log_destroy(dl);
Linus Torvalds's avatar
Linus Torvalds committed
1090 1091 1092 1093 1094
		return -EINVAL;
	}

	argv++, argc--;

1095 1096
	if (argc < nr_mirrors * 2) {
		ti->error = "Too few mirror arguments";
Heinz Mauelshagen's avatar
Heinz Mauelshagen committed
1097
		dm_dirty_log_destroy(dl);
Linus Torvalds's avatar
Linus Torvalds committed
1098 1099 1100 1101 1102
		return -EINVAL;
	}

	ms = alloc_context(nr_mirrors, dl->type->get_region_size(dl), ti, dl);
	if (!ms) {
Heinz Mauelshagen's avatar
Heinz Mauelshagen committed
1103
		dm_dirty_log_destroy(dl);
Linus Torvalds's avatar
Linus Torvalds committed
1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118
		return -ENOMEM;
	}

	/* Get the mirror parameter sets */
	for (m = 0; m < nr_mirrors; m++) {
		r = get_mirror(ms, ti, m, argv);
		if (r) {
			free_context(ms, ti, m);
			return r;
		}
		argv += 2;
		argc -= 2;
	}

	ti->private = ms;
1119 1120 1121 1122 1123

	r = dm_set_target_max_io_len(ti, dm_rh_get_region_size(ms->rh));
	if (r)
		goto err_free_context;

1124 1125
	ti->num_flush_bios = 1;
	ti->num_discard_bios = 1;
1126
	ti->per_io_data_size = sizeof(struct dm_raid1_bio_record);
Linus Torvalds's avatar
Linus Torvalds committed
1127

Tejun Heo's avatar
Tejun Heo committed
1128
	ms->kmirrord_wq = alloc_workqueue("kmirrord", WQ_MEM_RECLAIM, 0);
1129 1130
	if (!ms->kmirrord_wq) {
		DMERR("couldn't start kmirrord");
Dmitry Monakhov's avatar
Dmitry Monakhov committed
1131 1132
		r = -ENOMEM;
		goto err_free_context;
1133 1134
	}
	INIT_WORK(&ms->kmirrord_work, do_mirror);
Mikulas Patocka's avatar
Mikulas Patocka committed
1135 1136
	init_timer(&ms->timer);
	ms->timer_pending = 0;
1137
	INIT_WORK(&ms->trigger_event, trigger_event);
1138

1139
	r = parse_features(ms, argc, argv, &args_used);
Dmitry Monakhov's avatar
Dmitry Monakhov committed
1140 1141
	if (r)
		goto err_destroy_wq;
1142 1143 1144 1145

	argv += args_used;
	argc -= args_used;

1146 1147 1148 1149 1150 1151 1152 1153 1154
	/*
	 * Any read-balancing addition depends on the
	 * DM_RAID1_HANDLE_ERRORS flag being present.
	 * This is because the decision to balance depends
	 * on the sync state of a region.  If the above
	 * flag is not present, we ignore errors; and
	 * the sync state may be inaccurate.
	 */

1155 1156
	if (argc) {
		ti->error = "Too many mirror arguments";
Dmitry Monakhov's avatar
Dmitry Monakhov committed
1157 1158
		r = -EINVAL;
		goto err_destroy_wq;
1159 1160
	}

1161
	ms->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
1162 1163
	if (IS_ERR(ms->kcopyd_client)) {
		r = PTR_ERR(ms->kcopyd_client);
Dmitry Monakhov's avatar
Dmitry Monakhov committed
1164
		goto err_destroy_wq;
1165
	}
Linus Torvalds's avatar
Linus Torvalds committed
1166

1167
	wakeup_mirrord(ms);
Linus Torvalds's avatar
Linus Torvalds committed
1168
	return 0;
Dmitry Monakhov's avatar
Dmitry Monakhov committed
1169 1170 1171 1172 1173 1174

err_destroy_wq:
	destroy_workqueue(ms->kmirrord_wq);
err_free_context:
	free_context(ms, ti, ms->nr_mirrors);
	return r;
Linus Torvalds's avatar
Linus Torvalds committed
1175 1176 1177 1178 1179 1180
}

static void mirror_dtr(struct dm_target *ti)
{
	struct mirror_set *ms = (struct mirror_set *) ti->private;

Mikulas Patocka's avatar
Mikulas Patocka committed
1181
	del_timer_sync(&ms->timer);
1182
	flush_workqueue(ms->kmirrord_wq);
1183
	flush_work(&ms->trigger_event);
1184
	dm_kcopyd_client_destroy(ms->kcopyd_client);
1185
	destroy_workqueue(ms->kmirrord_wq);
Linus Torvalds's avatar
Linus Torvalds committed
1186 1187 1188 1189 1190 1191
	free_context(ms, ti, ms->nr_mirrors);
}

/*
 * Mirror mapping function
 */
Mikulas Patocka's avatar
Mikulas Patocka committed
1192
static int mirror_map(struct dm_target *ti, struct bio *bio)
Linus Torvalds's avatar
Linus Torvalds committed
1193
{
1194
	int r, rw = bio_data_dir(bio);
Linus Torvalds's avatar
Linus Torvalds committed
1195 1196
	struct mirror *m;
	struct mirror_set *ms = ti->private;
1197
	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
1198 1199 1200
	struct dm_raid1_bio_record *bio_record =
	  dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record));

Linus Torvalds's avatar
Linus Torvalds committed
1201
	if (rw == WRITE) {
1202
		/* Save region for mirror_end_io() handler */
1203
		bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio);
Linus Torvalds's avatar
Linus Torvalds committed
1204
		queue_bio(ms, bio, rw);
1205
		return DM_MAPIO_SUBMITTED;
Linus Torvalds's avatar
Linus Torvalds committed
1206 1207
	}

1208
	r = log->type->in_sync(log, dm_rh_bio_to_region(ms->rh, bio), 0);
Linus Torvalds's avatar
Linus Torvalds committed
1209
	if (r < 0 && r != -EWOULDBLOCK)
1210
		return DM_MAPIO_KILL;
Linus Torvalds's avatar
Linus Torvalds committed
1211 1212

	/*
1213
	 * If region is not in-sync queue the bio.
Linus Torvalds's avatar
Linus Torvalds committed
1214
	 */
1215
	if (!r || (r == -EWOULDBLOCK)) {
1216
		if (bio->bi_opf & REQ_RAHEAD)
1217
			return DM_MAPIO_KILL;
Linus Torvalds's avatar
Linus Torvalds committed
1218 1219

		queue_bio(ms, bio, rw);
1220
		return DM_MAPIO_SUBMITTED;
Linus Torvalds's avatar
Linus Torvalds committed
1221 1222
	}

1223 1224 1225 1226
	/*
	 * The region is in-sync and we can perform reads directly.
	 * Store enough information so we can retry if it fails.
	 */
1227
	m = choose_mirror(ms, bio->bi_iter.bi_sector);
1228
	if (unlikely(!m))
1229
		return DM_MAPIO_KILL;
Linus Torvalds's avatar
Linus Torvalds committed
1230

1231 1232
	dm_bio_record(&bio_record->details, bio);
	bio_record->m = m;
1233 1234 1235

	map_bio(m, bio);

1236
	return DM_MAPIO_REMAPPED;
Linus Torvalds's avatar
Linus Torvalds committed
1237 1238