Commit b29d4986 authored by Joe Thornber's avatar Joe Thornber Committed by Mike Snitzer

dm cache: significant rework to leverage dm-bio-prison-v2

The cache policy interfaces have been updated to work well with the new
bio-prison v2 interface's ability to queue work immediately (promotion,
demotion, etc) -- overriding benefit being reduced latency on processing
IO through the cache.  Previously such work would be left for the DM
cache core to queue on various lists and then process in batches later
-- this caused a serious delay in latency for IO driven by the cache.

The background tracker code was factored out so that all cache policies
can make use of it.

Also, the "cleaner" policy has been removed and is now a variant of the
smq policy that simply disallows migrations.
Signed-off-by: default avatarJoe Thornber <ejt@redhat.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent 742c8fdc
......@@ -325,14 +325,6 @@ config DM_CACHE_SMQ
of less memory utilization, improved performance and increased
adaptability in the face of changing workloads.
config DM_CACHE_CLEANER
tristate "Cleaner Cache Policy (EXPERIMENTAL)"
depends on DM_CACHE
default y
---help---
A simple cache policy that writes back all data to the
origin. Used when decommissioning a dm-cache.
config DM_ERA
tristate "Era target (EXPERIMENTAL)"
depends on BLK_DEV_DM
......
......@@ -13,9 +13,9 @@ dm-log-userspace-y \
+= dm-log-userspace-base.o dm-log-userspace-transfer.o
dm-bio-prison-y += dm-bio-prison-v1.o dm-bio-prison-v2.o
dm-thin-pool-y += dm-thin.o dm-thin-metadata.o
dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o
dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o \
dm-cache-background-tracker.o
dm-cache-smq-y += dm-cache-policy-smq.o
dm-cache-cleaner-y += dm-cache-policy-cleaner.o
dm-era-y += dm-era-target.o
dm-verity-y += dm-verity-target.o
md-mod-y += md.o bitmap.o
......@@ -57,7 +57,6 @@ obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o
obj-$(CONFIG_DM_VERITY) += dm-verity.o
obj-$(CONFIG_DM_CACHE) += dm-cache.o
obj-$(CONFIG_DM_CACHE_SMQ) += dm-cache-smq.o
obj-$(CONFIG_DM_CACHE_CLEANER) += dm-cache-cleaner.o
obj-$(CONFIG_DM_ERA) += dm-era.o
obj-$(CONFIG_DM_LOG_WRITES) += dm-log-writes.o
......
/*
* Copyright (C) 2017 Red Hat. All rights reserved.
*
* This file is released under the GPL.
*/
#include "dm-cache-background-tracker.h"
/*----------------------------------------------------------------*/
#define DM_MSG_PREFIX "dm-background-tracker"
struct bt_work {
struct list_head list;
struct rb_node node;
struct policy_work work;
};
struct background_tracker {
unsigned max_work;
atomic_t pending_promotes;
atomic_t pending_writebacks;
atomic_t pending_demotes;
struct list_head issued;
struct list_head queued;
struct rb_root pending;
struct kmem_cache *work_cache;
};
struct background_tracker *btracker_create(unsigned max_work)
{
struct background_tracker *b = kmalloc(sizeof(*b), GFP_KERNEL);
b->max_work = max_work;
atomic_set(&b->pending_promotes, 0);
atomic_set(&b->pending_writebacks, 0);
atomic_set(&b->pending_demotes, 0);
INIT_LIST_HEAD(&b->issued);
INIT_LIST_HEAD(&b->queued);
b->pending = RB_ROOT;
b->work_cache = KMEM_CACHE(bt_work, 0);
if (!b->work_cache) {
DMERR("couldn't create mempool for background work items");
kfree(b);
b = NULL;
}
return b;
}
EXPORT_SYMBOL_GPL(btracker_create);
void btracker_destroy(struct background_tracker *b)
{
kmem_cache_destroy(b->work_cache);
kfree(b);
}
EXPORT_SYMBOL_GPL(btracker_destroy);
static int cmp_oblock(dm_oblock_t lhs, dm_oblock_t rhs)
{
if (from_oblock(lhs) < from_oblock(rhs))
return -1;
if (from_oblock(rhs) < from_oblock(lhs))
return 1;
return 0;
}
static bool __insert_pending(struct background_tracker *b,
struct bt_work *nw)
{
int cmp;
struct bt_work *w;
struct rb_node **new = &b->pending.rb_node, *parent = NULL;
while (*new) {
w = container_of(*new, struct bt_work, node);
parent = *new;
cmp = cmp_oblock(w->work.oblock, nw->work.oblock);
if (cmp < 0)
new = &((*new)->rb_left);
else if (cmp > 0)
new = &((*new)->rb_right);
else
/* already present */
return false;
}
rb_link_node(&nw->node, parent, new);
rb_insert_color(&nw->node, &b->pending);
return true;
}
static struct bt_work *__find_pending(struct background_tracker *b,
dm_oblock_t oblock)
{
int cmp;
struct bt_work *w;
struct rb_node **new = &b->pending.rb_node;
while (*new) {
w = container_of(*new, struct bt_work, node);
cmp = cmp_oblock(w->work.oblock, oblock);
if (cmp < 0)
new = &((*new)->rb_left);
else if (cmp > 0)
new = &((*new)->rb_right);
else
break;
}
return *new ? w : NULL;
}
static void update_stats(struct background_tracker *b, struct policy_work *w, int delta)
{
switch (w->op) {
case POLICY_PROMOTE:
atomic_add(delta, &b->pending_promotes);
break;
case POLICY_DEMOTE:
atomic_add(delta, &b->pending_demotes);
break;
case POLICY_WRITEBACK:
atomic_add(delta, &b->pending_writebacks);
break;
}
}
unsigned btracker_nr_writebacks_queued(struct background_tracker *b)
{
return atomic_read(&b->pending_writebacks);
}
EXPORT_SYMBOL_GPL(btracker_nr_writebacks_queued);
unsigned btracker_nr_demotions_queued(struct background_tracker *b)
{
return atomic_read(&b->pending_demotes);
}
EXPORT_SYMBOL_GPL(btracker_nr_demotions_queued);
static bool max_work_reached(struct background_tracker *b)
{
// FIXME: finish
return false;
}
int btracker_queue(struct background_tracker *b,
struct policy_work *work,
struct policy_work **pwork)
{
struct bt_work *w;
if (pwork)
*pwork = NULL;
if (max_work_reached(b))
return -ENOMEM;
w = kmem_cache_alloc(b->work_cache, GFP_NOWAIT);
if (!w)
return -ENOMEM;
memcpy(&w->work, work, sizeof(*work));
if (!__insert_pending(b, w)) {
/*
* There was a race, we'll just ignore this second
* bit of work for the same oblock.
*/
kmem_cache_free(b->work_cache, w);
return -EINVAL;
}
if (pwork) {
*pwork = &w->work;
list_add(&w->list, &b->issued);
} else
list_add(&w->list, &b->queued);
update_stats(b, &w->work, 1);
return 0;
}
EXPORT_SYMBOL_GPL(btracker_queue);
/*
* Returns -ENODATA if there's no work.
*/
int btracker_issue(struct background_tracker *b, struct policy_work **work)
{
struct bt_work *w;
if (list_empty(&b->queued))
return -ENODATA;
w = list_first_entry(&b->queued, struct bt_work, list);
list_move(&w->list, &b->issued);
*work = &w->work;
return 0;
}
EXPORT_SYMBOL_GPL(btracker_issue);
void btracker_complete(struct background_tracker *b,
struct policy_work *op)
{
struct bt_work *w = container_of(op, struct bt_work, work);
update_stats(b, &w->work, -1);
rb_erase(&w->node, &b->pending);
list_del(&w->list);
kmem_cache_free(b->work_cache, w);
}
EXPORT_SYMBOL_GPL(btracker_complete);
bool btracker_promotion_already_present(struct background_tracker *b,
dm_oblock_t oblock)
{
return __find_pending(b, oblock) != NULL;
}
EXPORT_SYMBOL_GPL(btracker_promotion_already_present);
/*----------------------------------------------------------------*/
/*
* Copyright (C) 2017 Red Hat. All rights reserved.
*
* This file is released under the GPL.
*/
#ifndef DM_CACHE_BACKGROUND_WORK_H
#define DM_CACHE_BACKGROUND_WORK_H
#include <linux/vmalloc.h>
#include "dm-cache-policy.h"
/*----------------------------------------------------------------*/
struct background_work;
struct background_tracker;
/*
* FIXME: discuss lack of locking in all methods.
*/
struct background_tracker *btracker_create(unsigned max_work);
void btracker_destroy(struct background_tracker *b);
unsigned btracker_nr_writebacks_queued(struct background_tracker *b);
unsigned btracker_nr_demotions_queued(struct background_tracker *b);
/*
* returns -EINVAL iff the work is already queued. -ENOMEM if the work
* couldn't be queued for another reason.
*/
int btracker_queue(struct background_tracker *b,
struct policy_work *work,
struct policy_work **pwork);
/*
* Returns -ENODATA if there's no work.
*/
int btracker_issue(struct background_tracker *b, struct policy_work **work);
void btracker_complete(struct background_tracker *b,
struct policy_work *op);
bool btracker_promotion_already_present(struct background_tracker *b,
dm_oblock_t oblock);
/*----------------------------------------------------------------*/
#endif
......@@ -50,6 +50,8 @@
#define DM_CACHE_FEATURE_COMPAT_RO_SUPP 0UL
#define DM_CACHE_FEATURE_INCOMPAT_SUPP 0UL
struct dm_cache_metadata;
/*
* Reopens or creates a new, empty metadata volume. Returns an ERR_PTR on
* failure. If reopening then features must match.
......
This diff is collapsed.
......@@ -12,70 +12,65 @@
/*----------------------------------------------------------------*/
/*
* Little inline functions that simplify calling the policy methods.
*/
static inline int policy_map(struct dm_cache_policy *p, dm_oblock_t oblock,
bool can_block, bool can_migrate, bool discarded_oblock,
struct bio *bio, struct policy_locker *locker,
struct policy_result *result)
static inline int policy_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock,
int data_dir, bool fast_copy, bool *background_queued)
{
return p->map(p, oblock, can_block, can_migrate, discarded_oblock, bio, locker, result);
return p->lookup(p, oblock, cblock, data_dir, fast_copy, background_queued);
}
static inline int policy_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock)
static inline int policy_lookup_with_work(struct dm_cache_policy *p,
dm_oblock_t oblock, dm_cblock_t *cblock,
int data_dir, bool fast_copy,
struct policy_work **work)
{
BUG_ON(!p->lookup);
return p->lookup(p, oblock, cblock);
}
if (!p->lookup_with_work) {
*work = NULL;
return p->lookup(p, oblock, cblock, data_dir, fast_copy, NULL);
}
static inline void policy_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
{
if (p->set_dirty)
p->set_dirty(p, oblock);
return p->lookup_with_work(p, oblock, cblock, data_dir, fast_copy, work);
}
static inline void policy_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
static inline int policy_get_background_work(struct dm_cache_policy *p,
bool idle, struct policy_work **result)
{
if (p->clear_dirty)
p->clear_dirty(p, oblock);
return p->get_background_work(p, idle, result);
}
static inline int policy_load_mapping(struct dm_cache_policy *p,
dm_oblock_t oblock, dm_cblock_t cblock,
uint32_t hint, bool hint_valid)
static inline void policy_complete_background_work(struct dm_cache_policy *p,
struct policy_work *work,
bool success)
{
return p->load_mapping(p, oblock, cblock, hint, hint_valid);
return p->complete_background_work(p, work, success);
}
static inline uint32_t policy_get_hint(struct dm_cache_policy *p,
dm_cblock_t cblock)
static inline void policy_set_dirty(struct dm_cache_policy *p, dm_cblock_t cblock)
{
return p->get_hint ? p->get_hint(p, cblock) : 0;
p->set_dirty(p, cblock);
}
static inline int policy_writeback_work(struct dm_cache_policy *p,
dm_oblock_t *oblock,
dm_cblock_t *cblock,
bool critical_only)
static inline void policy_clear_dirty(struct dm_cache_policy *p, dm_cblock_t cblock)
{
return p->writeback_work ? p->writeback_work(p, oblock, cblock, critical_only) : -ENOENT;
p->clear_dirty(p, cblock);
}
static inline void policy_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
static inline int policy_load_mapping(struct dm_cache_policy *p,
dm_oblock_t oblock, dm_cblock_t cblock,
bool dirty, uint32_t hint, bool hint_valid)
{
p->remove_mapping(p, oblock);
return p->load_mapping(p, oblock, cblock, dirty, hint, hint_valid);
}
static inline int policy_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock)
static inline int policy_invalidate_mapping(struct dm_cache_policy *p,
dm_cblock_t cblock)
{
return p->remove_cblock(p, cblock);
return p->invalidate_mapping(p, cblock);
}
static inline void policy_force_mapping(struct dm_cache_policy *p,
dm_oblock_t current_oblock, dm_oblock_t new_oblock)
static inline uint32_t policy_get_hint(struct dm_cache_policy *p,
dm_cblock_t cblock)
{
return p->force_mapping(p, current_oblock, new_oblock);
return p->get_hint ? p->get_hint(p, cblock) : 0;
}
static inline dm_cblock_t policy_residency(struct dm_cache_policy *p)
......@@ -107,6 +102,11 @@ static inline int policy_set_config_value(struct dm_cache_policy *p,
return p->set_config_value ? p->set_config_value(p, key, value) : -EINVAL;
}
static inline void policy_allow_migrations(struct dm_cache_policy *p, bool allow)
{
return p->allow_migrations(p, allow);
}
/*----------------------------------------------------------------*/
/*
......
This diff is collapsed.
......@@ -13,183 +13,100 @@
/*----------------------------------------------------------------*/
/* FIXME: make it clear which methods are optional. Get debug policy to
* double check this at start.
*/
/*
* The cache policy makes the important decisions about which blocks get to
* live on the faster cache device.
*
* When the core target has to remap a bio it calls the 'map' method of the
* policy. This returns an instruction telling the core target what to do.
*
* POLICY_HIT:
* That block is in the cache. Remap to the cache and carry on.
*
* POLICY_MISS:
* This block is on the origin device. Remap and carry on.
*
* POLICY_NEW:
* This block is currently on the origin device, but the policy wants to
* move it. The core should:
*
* - hold any further io to this origin block
* - copy the origin to the given cache block
* - release all the held blocks
* - remap the original block to the cache
*
* POLICY_REPLACE:
* This block is currently on the origin device. The policy wants to
* move it to the cache, with the added complication that the destination
* cache block needs a writeback first. The core should:
*
* - hold any further io to this origin block
* - hold any further io to the origin block that's being written back
* - writeback
* - copy new block to cache
* - release held blocks
* - remap bio to cache and reissue.
*
* Should the core run into trouble while processing a POLICY_NEW or
* POLICY_REPLACE instruction it will roll back the policies mapping using
* remove_mapping() or force_mapping(). These methods must not fail. This
* approach avoids having transactional semantics in the policy (ie, the
* core informing the policy when a migration is complete), and hence makes
* it easier to write new policies.
*
* In general policy methods should never block, except in the case of the
* map function when can_migrate is set. So be careful to implement using
* bounded, preallocated memory.
*/
enum policy_operation {
POLICY_HIT,
POLICY_MISS,
POLICY_NEW,
POLICY_REPLACE
};
/*
* When issuing a POLICY_REPLACE the policy needs to make a callback to
* lock the block being demoted. This doesn't need to occur during a
* writeback operation since the block remains in the cache.
*/
struct policy_locker;
typedef int (*policy_lock_fn)(struct policy_locker *l, dm_oblock_t oblock);
struct policy_locker {
policy_lock_fn fn;
POLICY_PROMOTE,
POLICY_DEMOTE,
POLICY_WRITEBACK
};
/*
* This is the instruction passed back to the core target.
*/
struct policy_result {
struct policy_work {
enum policy_operation op;
dm_oblock_t old_oblock; /* POLICY_REPLACE */
dm_cblock_t cblock; /* POLICY_HIT, POLICY_NEW, POLICY_REPLACE */
dm_oblock_t oblock;
dm_cblock_t cblock;
};
/*
* The cache policy object. Just a bunch of methods. It is envisaged that
* this structure will be embedded in a bigger, policy specific structure
* (ie. use container_of()).
* The cache policy object. It is envisaged that this structure will be
* embedded in a bigger, policy specific structure (ie. use container_of()).
*/
struct dm_cache_policy {
/*
* FIXME: make it clear which methods are optional, and which may
* block.
*/
/*
* Destroys this object.
*/
void (*destroy)(struct dm_cache_policy *p);
/*
* See large comment above.
*
* oblock - the origin block we're interested in.
*
* can_block - indicates whether the current thread is allowed to
* block. -EWOULDBLOCK returned if it can't and would.
*
* can_migrate - gives permission for POLICY_NEW or POLICY_REPLACE
* instructions. If denied and the policy would have
* returned one of these instructions it should
* return -EWOULDBLOCK.
* Find the location of a block.
*
* discarded_oblock - indicates whether the whole origin block is
* in a discarded state (FIXME: better to tell the
* policy about this sooner, so it can recycle that
* cache block if it wants.)
* bio - the bio that triggered this call.
* result - gets filled in with the instruction.
* Must not block.
*
* May only return 0, or -EWOULDBLOCK (if !can_migrate)
* Returns 0 if in cache (cblock will be set), -ENOENT if not, < 0 for
* other errors (-EWOULDBLOCK would be typical). data_dir should be
* READ or WRITE. fast_copy should be set if migrating this block would
* be 'cheap' somehow (eg, discarded data). background_queued will be set
* if a migration has just been queued.
*/
int (*map)(struct dm_cache_policy *p, dm_oblock_t oblock,
bool can_block, bool can_migrate, bool discarded_oblock,
struct bio *bio, struct policy_locker *locker,
struct policy_result *result);
int (*lookup)(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock,
int data_dir, bool fast_copy, bool *background_queued);
/*
* Sometimes we want to see if a block is in the cache, without
* triggering any update of stats. (ie. it's not a real hit).
*
* Must not block.
* Sometimes the core target can optimise a migration, eg, the
* block may be discarded, or the bio may cover an entire block.
* In order to optimise it needs the migration immediately though
* so it knows to do something different with the bio.
*
* Returns 0 if in cache, -ENOENT if not, < 0 for other errors
* (-EWOULDBLOCK would be typical).
* This method is optional (policy-internal will fallback to using
* lookup).
*/
int (*lookup)(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock);
void (*set_dirty)(struct dm_cache_policy *p, dm_oblock_t oblock);
void (*clear_dirty)(struct dm_cache_policy *p, dm_oblock_t oblock);
int (*lookup_with_work)(struct dm_cache_policy *p,
dm_oblock_t oblock, dm_cblock_t *cblock,
int data_dir, bool fast_copy,
struct policy_work **work);
/*
* Called when a cache target is first created. Used to load a
* mapping from the metadata device into the policy.
* Retrieves background work. Returns -ENODATA when there's no
* background work.
*/
int (*load_mapping)(struct dm_cache_policy *p, dm_oblock_t oblock,
dm_cblock_t cblock, uint32_t hint, bool hint_valid);
int (*get_background_work)(struct dm_cache_policy *p, bool idle,
struct policy_work **result);
/*
* Gets the hint for a given cblock. Called in a single threaded
* context. So no locking required.
* You must pass in the same work pointer that you were given, not
* a copy.
*/
uint32_t (*get_hint)(struct dm_cache_policy *p, dm_cblock_t cblock);
void (*complete_background_work)(struct dm_cache_policy *p,
struct policy_work *work,
bool success);
void (*set_dirty)(struct dm_cache_policy *p, dm_cblock_t cblock);
void (*clear_dirty)(struct dm_cache_policy *p, dm_cblock_t cblock);
/*
* Override functions used on the error paths of the core target.
* They must succeed.
* Called when a cache target is first created. Used to load a
* mapping from the metadata device into the policy.
*/
void (*remove_mapping)(struct dm_cache_policy *p, dm_oblock_t oblock);
void (*force_mapping)(struct dm_cache_policy *p, dm_oblock_t current_oblock,
dm_oblock_t new_oblock);
int (*load_mapping)(struct dm_cache_policy *p, dm_oblock_t oblock,
dm_cblock_t cblock, bool dirty,
uint32_t hint, bool hint_valid);
/*
* This is called via the invalidate_cblocks message. It is
* possible the particular cblock has already been removed due to a
* write io in passthrough mode. In which case this should return
* -ENODATA.
* Drops the mapping, irrespective of whether it's clean or dirty.
* Returns -ENODATA if cblock is not mapped.
*/
int (*remove_cblock)(struct dm_cache_policy *p, dm_cblock_t cblock);
int (*invalidate_mapping)(struct dm_cache_policy *p, dm_cblock_t cblock);
/*
* Provide a dirty block to be written back by the core target. If
* critical_only is set then the policy should only provide work if
* it urgently needs it.
*
* Returns:
*
* 0 and @cblock,@oblock: block to write back provided
*
* -ENODATA: no dirty blocks available
* Gets the hint for a given cblock. Called in a single threaded
* context. So no locking required.
*/
int (*writeback_work)(struct dm_cache_policy *p, dm_oblock_t *oblock, dm_cblock_t *cblock,
bool critical_only);
uint32_t (*get_hint)(struct dm_cache_policy *p, dm_cblock_t cblock);
/*
* How full is the cache?
......@@ -202,6 +119,8 @@ struct dm_cache_policy {
* queue merging has occurred). To stop the policy being fooled by
* these, the core target sends regular tick() calls to the policy.
* The policy should only count an entry as hit once per tick.
*
* This method is optional.
*/
void (*tick)(struct dm_cache_policy *p, bool can_block);
......@@ -213,6 +132,8 @@ struct dm_cache_policy {
int (*set_config_value)(struct dm_cache_policy *p,
const char *key, const char *value);
void (*allow_migrations)(struct dm_cache_policy *p, bool allow);
/*
* Book keeping ptr for the policy register, not for general use.
*/
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment