Commit e9d8faf9 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains Netfilter fixes for net:

1) Disable BH while holding list spinlock in nf_conncount, from
   Taehee Yoo.

2) List corruption in nf_conncount, also from Taehee.

3) Fix race that results in leaving around an empty list node in
   nf_conncount, from Taehee Yoo.

4) Proper chain handling for inactive chains from the commit path,
   from Florian Westphal. This includes a selftest for this.

5) Do duplicate rule handles when replacing rules, also from Florian.

6) Remove net_exit path in xt_RATEEST that results in splat, from Taehee.

7) Possible use-after-free in nft_compat when releasing extensions.
   From Florian.

8) Memory leak in xt_hashlimit, from Taehee.

9) Call ip_vs_dst_notifier after ipv6_dev_notf, from Xin Long.

10) Fix cttimeout with udplite and gre, from Florian.

11) Preserve oif for IPv6 link-local generated traffic from mangle
    table, from Alin Nastac.

12) Missing error handling in masquerade notifiers, from Taehee Yoo.

13) Use mutex to protect registration/unregistration of masquerade
    extensions in order to prevent a race, from Taehee.

14) Incorrect condition check in tree_nodes_free(), also from Taehee.

15) Fix chain counter leak in rule replacement path, from Taehee.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents c7589401 ca089878
......@@ -21,6 +21,19 @@ struct nf_ct_gre_keymap {
struct nf_conntrack_tuple tuple;
};
enum grep_conntrack {
GRE_CT_UNREPLIED,
GRE_CT_REPLIED,
GRE_CT_MAX
};
struct netns_proto_gre {
struct nf_proto_net nf;
rwlock_t keymap_lock;
struct list_head keymap_list;
unsigned int gre_timeouts[GRE_CT_MAX];
};
/* add new tuple->key_reply pair to keymap */
int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
struct nf_conntrack_tuple *t);
......
......@@ -9,7 +9,7 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
const struct nf_nat_range2 *range,
const struct net_device *out);
void nf_nat_masquerade_ipv4_register_notifier(void);
int nf_nat_masquerade_ipv4_register_notifier(void);
void nf_nat_masquerade_ipv4_unregister_notifier(void);
#endif /*_NF_NAT_MASQUERADE_IPV4_H_ */
......@@ -5,7 +5,7 @@
unsigned int
nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
const struct net_device *out);
void nf_nat_masquerade_ipv6_register_notifier(void);
int nf_nat_masquerade_ipv6_register_notifier(void);
void nf_nat_masquerade_ipv6_unregister_notifier(void);
#endif /* _NF_NAT_MASQUERADE_IPV6_H_ */
......@@ -81,9 +81,12 @@ static int __init masquerade_tg_init(void)
int ret;
ret = xt_register_target(&masquerade_tg_reg);
if (ret)
return ret;
if (ret == 0)
nf_nat_masquerade_ipv4_register_notifier();
ret = nf_nat_masquerade_ipv4_register_notifier();
if (ret)
xt_unregister_target(&masquerade_tg_reg);
return ret;
}
......
......@@ -147,28 +147,50 @@ static struct notifier_block masq_inet_notifier = {
.notifier_call = masq_inet_event,
};
static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
static int masq_refcnt;
static DEFINE_MUTEX(masq_mutex);
void nf_nat_masquerade_ipv4_register_notifier(void)
int nf_nat_masquerade_ipv4_register_notifier(void)
{
int ret = 0;
mutex_lock(&masq_mutex);
/* check if the notifier was already set */
if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
return;
if (++masq_refcnt > 1)
goto out_unlock;
/* Register for device down reports */
register_netdevice_notifier(&masq_dev_notifier);
ret = register_netdevice_notifier(&masq_dev_notifier);
if (ret)
goto err_dec;
/* Register IP address change reports */
register_inetaddr_notifier(&masq_inet_notifier);
ret = register_inetaddr_notifier(&masq_inet_notifier);
if (ret)
goto err_unregister;
mutex_unlock(&masq_mutex);
return ret;
err_unregister:
unregister_netdevice_notifier(&masq_dev_notifier);
err_dec:
masq_refcnt--;
out_unlock:
mutex_unlock(&masq_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier);
void nf_nat_masquerade_ipv4_unregister_notifier(void)
{
mutex_lock(&masq_mutex);
/* check if the notifier still has clients */
if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
return;
if (--masq_refcnt > 0)
goto out_unlock;
unregister_netdevice_notifier(&masq_dev_notifier);
unregister_inetaddr_notifier(&masq_inet_notifier);
out_unlock:
mutex_unlock(&masq_mutex);
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier);
......@@ -69,7 +69,9 @@ static int __init nft_masq_ipv4_module_init(void)
if (ret < 0)
return ret;
nf_nat_masquerade_ipv4_register_notifier();
ret = nf_nat_masquerade_ipv4_register_notifier();
if (ret)
nft_unregister_expr(&nft_masq_ipv4_type);
return ret;
}
......
......@@ -24,7 +24,8 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
unsigned int hh_len;
struct dst_entry *dst;
struct flowi6 fl6 = {
.flowi6_oif = sk ? sk->sk_bound_dev_if : 0,
.flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if :
rt6_need_strict(&iph->daddr) ? skb_dst(skb)->dev->ifindex : 0,
.flowi6_mark = skb->mark,
.flowi6_uid = sock_net_uid(net, sk),
.daddr = iph->daddr,
......
......@@ -58,8 +58,12 @@ static int __init masquerade_tg6_init(void)
int err;
err = xt_register_target(&masquerade_tg6_reg);
if (err == 0)
nf_nat_masquerade_ipv6_register_notifier();
if (err)
return err;
err = nf_nat_masquerade_ipv6_register_notifier();
if (err)
xt_unregister_target(&masquerade_tg6_reg);
return err;
}
......
......@@ -132,8 +132,8 @@ static void iterate_cleanup_work(struct work_struct *work)
* of ipv6 addresses being deleted), we also need to add an upper
* limit to the number of queued work items.
*/
static int masq_inet_event(struct notifier_block *this,
unsigned long event, void *ptr)
static int masq_inet6_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct inet6_ifaddr *ifa = ptr;
const struct net_device *dev;
......@@ -171,30 +171,53 @@ static int masq_inet_event(struct notifier_block *this,
return NOTIFY_DONE;
}
static struct notifier_block masq_inet_notifier = {
.notifier_call = masq_inet_event,
static struct notifier_block masq_inet6_notifier = {
.notifier_call = masq_inet6_event,
};
static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
static int masq_refcnt;
static DEFINE_MUTEX(masq_mutex);
void nf_nat_masquerade_ipv6_register_notifier(void)
int nf_nat_masquerade_ipv6_register_notifier(void)
{
int ret = 0;
mutex_lock(&masq_mutex);
/* check if the notifier is already set */
if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
return;
if (++masq_refcnt > 1)
goto out_unlock;
ret = register_netdevice_notifier(&masq_dev_notifier);
if (ret)
goto err_dec;
ret = register_inet6addr_notifier(&masq_inet6_notifier);
if (ret)
goto err_unregister;
register_netdevice_notifier(&masq_dev_notifier);
register_inet6addr_notifier(&masq_inet_notifier);
mutex_unlock(&masq_mutex);
return ret;
err_unregister:
unregister_netdevice_notifier(&masq_dev_notifier);
err_dec:
masq_refcnt--;
out_unlock:
mutex_unlock(&masq_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier);
void nf_nat_masquerade_ipv6_unregister_notifier(void)
{
mutex_lock(&masq_mutex);
/* check if the notifier still has clients */
if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
return;
if (--masq_refcnt > 0)
goto out_unlock;
unregister_inet6addr_notifier(&masq_inet_notifier);
unregister_inet6addr_notifier(&masq_inet6_notifier);
unregister_netdevice_notifier(&masq_dev_notifier);
out_unlock:
mutex_unlock(&masq_mutex);
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);
......@@ -70,7 +70,9 @@ static int __init nft_masq_ipv6_module_init(void)
if (ret < 0)
return ret;
nf_nat_masquerade_ipv6_register_notifier();
ret = nf_nat_masquerade_ipv6_register_notifier();
if (ret)
nft_unregister_expr(&nft_masq_ipv6_type);
return ret;
}
......
......@@ -3980,6 +3980,9 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
static struct notifier_block ip_vs_dst_notifier = {
.notifier_call = ip_vs_dst_event,
#ifdef CONFIG_IP_VS_IPV6
.priority = ADDRCONF_NOTIFY_PRIORITY + 5,
#endif
};
int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
......
......@@ -49,6 +49,7 @@ struct nf_conncount_tuple {
struct nf_conntrack_zone zone;
int cpu;
u32 jiffies32;
bool dead;
struct rcu_head rcu_head;
};
......@@ -106,15 +107,16 @@ nf_conncount_add(struct nf_conncount_list *list,
conn->zone = *zone;
conn->cpu = raw_smp_processor_id();
conn->jiffies32 = (u32)jiffies;
spin_lock(&list->list_lock);
conn->dead = false;
spin_lock_bh(&list->list_lock);
if (list->dead == true) {
kmem_cache_free(conncount_conn_cachep, conn);
spin_unlock(&list->list_lock);
spin_unlock_bh(&list->list_lock);
return NF_CONNCOUNT_SKIP;
}
list_add_tail(&conn->node, &list->head);
list->count++;
spin_unlock(&list->list_lock);
spin_unlock_bh(&list->list_lock);
return NF_CONNCOUNT_ADDED;
}
EXPORT_SYMBOL_GPL(nf_conncount_add);
......@@ -132,19 +134,22 @@ static bool conn_free(struct nf_conncount_list *list,
{
bool free_entry = false;
spin_lock(&list->list_lock);
spin_lock_bh(&list->list_lock);
if (list->count == 0) {
spin_unlock(&list->list_lock);
return free_entry;
if (conn->dead) {
spin_unlock_bh(&list->list_lock);
return free_entry;
}
list->count--;
conn->dead = true;
list_del_rcu(&conn->node);
if (list->count == 0)
if (list->count == 0) {
list->dead = true;
free_entry = true;
}
spin_unlock(&list->list_lock);
spin_unlock_bh(&list->list_lock);
call_rcu(&conn->rcu_head, __conn_free);
return free_entry;
}
......@@ -245,7 +250,7 @@ void nf_conncount_list_init(struct nf_conncount_list *list)
{
spin_lock_init(&list->list_lock);
INIT_LIST_HEAD(&list->head);
list->count = 1;
list->count = 0;
list->dead = false;
}
EXPORT_SYMBOL_GPL(nf_conncount_list_init);
......@@ -259,6 +264,7 @@ bool nf_conncount_gc_list(struct net *net,
struct nf_conn *found_ct;
unsigned int collected = 0;
bool free_entry = false;
bool ret = false;
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
found = find_or_evict(net, list, conn, &free_entry);
......@@ -288,7 +294,15 @@ bool nf_conncount_gc_list(struct net *net,
if (collected > CONNCOUNT_GC_MAX_NODES)
return false;
}
return false;
spin_lock_bh(&list->list_lock);
if (!list->count) {
list->dead = true;
ret = true;
}
spin_unlock_bh(&list->list_lock);
return ret;
}
EXPORT_SYMBOL_GPL(nf_conncount_gc_list);
......@@ -309,11 +323,8 @@ static void tree_nodes_free(struct rb_root *root,
while (gc_count) {
rbconn = gc_nodes[--gc_count];
spin_lock(&rbconn->list.list_lock);
if (rbconn->list.count == 0 && rbconn->list.dead == false) {
rbconn->list.dead = true;
rb_erase(&rbconn->node, root);
call_rcu(&rbconn->rcu_head, __tree_nodes_free);
}
rb_erase(&rbconn->node, root);
call_rcu(&rbconn->rcu_head, __tree_nodes_free);
spin_unlock(&rbconn->list.list_lock);
}
}
......@@ -414,6 +425,7 @@ insert_tree(struct net *net,
nf_conncount_list_init(&rbconn->list);
list_add(&conn->node, &rbconn->list.head);
count = 1;
rbconn->list.count = count;
rb_link_node(&rbconn->node, parent, rbnode);
rb_insert_color(&rbconn->node, root);
......
......@@ -43,24 +43,12 @@
#include <linux/netfilter/nf_conntrack_proto_gre.h>
#include <linux/netfilter/nf_conntrack_pptp.h>
enum grep_conntrack {
GRE_CT_UNREPLIED,
GRE_CT_REPLIED,
GRE_CT_MAX
};
static const unsigned int gre_timeouts[GRE_CT_MAX] = {
[GRE_CT_UNREPLIED] = 30*HZ,
[GRE_CT_REPLIED] = 180*HZ,
};
static unsigned int proto_gre_net_id __read_mostly;
struct netns_proto_gre {
struct nf_proto_net nf;
rwlock_t keymap_lock;
struct list_head keymap_list;
unsigned int gre_timeouts[GRE_CT_MAX];
};
static inline struct netns_proto_gre *gre_pernet(struct net *net)
{
......@@ -402,6 +390,8 @@ static int __init nf_ct_proto_gre_init(void)
{
int ret;
BUILD_BUG_ON(offsetof(struct netns_proto_gre, nf) != 0);
ret = register_pernet_subsys(&proto_gre_net_ops);
if (ret < 0)
goto out_pernet;
......
......@@ -2457,7 +2457,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
struct nft_rule *rule)
{
struct nft_expr *expr;
struct nft_expr *expr, *next;
/*
* Careful: some expressions might not be initialized in case this
......@@ -2465,8 +2465,9 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
*/
expr = nft_expr_first(rule);
while (expr != nft_expr_last(rule) && expr->ops) {
next = nft_expr_next(expr);
nf_tables_expr_destroy(ctx, expr);
expr = nft_expr_next(expr);
expr = next;
}
kfree(rule);
}
......@@ -2589,17 +2590,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
if (chain->use == UINT_MAX)
return -EOVERFLOW;
}
if (nla[NFTA_RULE_POSITION]) {
if (!(nlh->nlmsg_flags & NLM_F_CREATE))
return -EOPNOTSUPP;
pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
old_rule = __nft_rule_lookup(chain, pos_handle);
if (IS_ERR(old_rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
return PTR_ERR(old_rule);
if (nla[NFTA_RULE_POSITION]) {
pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
old_rule = __nft_rule_lookup(chain, pos_handle);
if (IS_ERR(old_rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
return PTR_ERR(old_rule);
}
}
}
......@@ -2669,21 +2667,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
}
if (nlh->nlmsg_flags & NLM_F_REPLACE) {
if (!nft_is_active_next(net, old_rule)) {
err = -ENOENT;
goto err2;
}
trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE,
old_rule);
trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
if (trans == NULL) {
err = -ENOMEM;
goto err2;
}
nft_deactivate_next(net, old_rule);
chain->use--;
if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
err = -ENOMEM;
err = nft_delrule(&ctx, old_rule);
if (err < 0) {
nft_trans_destroy(trans);
goto err2;
}
......@@ -6324,7 +6315,7 @@ static void nf_tables_commit_chain_free_rules_old(struct nft_rule **rules)
call_rcu(&old->h, __nf_tables_commit_chain_free_rules_old);
}
static void nf_tables_commit_chain_active(struct net *net, struct nft_chain *chain)
static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
{
struct nft_rule **g0, **g1;
bool next_genbit;
......@@ -6441,11 +6432,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
/* step 2. Make rules_gen_X visible to packet path */
list_for_each_entry(table, &net->nft.tables, list) {
list_for_each_entry(chain, &table->chains, list) {
if (!nft_is_active_next(net, chain))
continue;
nf_tables_commit_chain_active(net, chain);
}
list_for_each_entry(chain, &table->chains, list)
nf_tables_commit_chain(net, chain);
}
/*
......
......@@ -455,7 +455,8 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
case IPPROTO_TCP:
timeouts = nf_tcp_pernet(net)->timeouts;
break;
case IPPROTO_UDP:
case IPPROTO_UDP: /* fallthrough */
case IPPROTO_UDPLITE:
timeouts = nf_udp_pernet(net)->timeouts;
break;
case IPPROTO_DCCP:
......@@ -469,13 +470,23 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
case IPPROTO_SCTP:
#ifdef CONFIG_NF_CT_PROTO_SCTP
timeouts = nf_sctp_pernet(net)->timeouts;
#endif
break;
case IPPROTO_GRE:
#ifdef CONFIG_NF_CT_PROTO_GRE
if (l4proto->net_id) {
struct netns_proto_gre *net_gre;
net_gre = net_generic(net, *l4proto->net_id);
timeouts = net_gre->gre_timeouts;
}
#endif
break;
case 255:
timeouts = &nf_generic_pernet(net)->timeout;
break;
default:
WARN_ON_ONCE(1);
WARN_ONCE(1, "Missing timeouts for proto %d", l4proto->l4proto);
break;
}
......
......@@ -520,6 +520,7 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr,
void *info)
{
struct xt_match *match = expr->ops->data;
struct module *me = match->me;
struct xt_mtdtor_param par;
par.net = ctx->net;
......@@ -530,7 +531,7 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr,
par.match->destroy(&par);
if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
module_put(match->me);
module_put(me);
}
static void
......
......@@ -214,7 +214,9 @@ static int __init nft_flow_offload_module_init(void)
{
int err;
register_netdevice_notifier(&flow_offload_netdev_notifier);
err = register_netdevice_notifier(&flow_offload_netdev_notifier);
if (err)
goto err;
err = nft_register_expr(&nft_flow_offload_type);
if (err < 0)
......@@ -224,6 +226,7 @@ static int __init nft_flow_offload_module_init(void)
register_expr:
unregister_netdevice_notifier(&flow_offload_netdev_notifier);
err:
return err;
}
......
......@@ -201,18 +201,8 @@ static __net_init int xt_rateest_net_init(struct net *net)
return 0;
}
static void __net_exit xt_rateest_net_exit(struct net *net)
{
struct xt_rateest_net *xn = net_generic(net, xt_rateest_id);
int i;
for (i = 0; i < ARRAY_SIZE(xn->hash); i++)
WARN_ON_ONCE(!hlist_empty(&xn->hash[i]));
}
static struct pernet_operations xt_rateest_net_ops = {
.init = xt_rateest_net_init,
.exit = xt_rateest_net_exit,
.id = &xt_rateest_id,
.size = sizeof(struct xt_rateest_net),
};
......
......@@ -295,9 +295,10 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
/* copy match config into hashtable config */
ret = cfg_copy(&hinfo->cfg, (void *)cfg, 3);
if (ret)
if (ret) {
vfree(hinfo);
return ret;
}
hinfo->cfg.size = size;
if (hinfo->cfg.max == 0)
......@@ -814,7 +815,6 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
int ret;
ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
if (ret)
return ret;
......@@ -830,7 +830,6 @@ hashlimit_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
int ret;
ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
if (ret)
return ret;
......@@ -921,7 +920,6 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)