diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index ada1296c87d50..72f5ebc5c97a9 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -197,7 +197,7 @@ struct ip_set_region { }; /* Max range where every element is added/deleted in one step */ -#define IPSET_MAX_RANGE (1<<20) +#define IPSET_MAX_RANGE (1<<14) /* The max revision number supported by any set type + 1 */ #define IPSET_REVISION_MAX 9 diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 241e005f290ad..e9a9ab34a7ccc 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -45,7 +45,6 @@ struct nfnetlink_subsystem { int (*commit)(struct net *net, struct sk_buff *skb); int (*abort)(struct net *net, struct sk_buff *skb, enum nfnl_abort_action action); - void (*cleanup)(struct net *net); bool (*valid_genid)(struct net *net, u32 genid); }; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e9b9c0f30e558..343ce45bc35c7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -261,6 +261,7 @@ struct nf_bridge_info { u8 pkt_otherhost:1; u8 in_prerouting:1; u8 bridged_dnat:1; + u8 sabotage_in_done:1; __u16 frag_max_size; struct net_device *physindev; diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 3cd3a6e631aa5..ff315e3ae75fc 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -94,7 +94,11 @@ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout) { if (timeout > INT_MAX) timeout = INT_MAX; - WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout); + + if (nf_ct_is_confirmed(ct)) + WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout); + else + ct->timeout = (u32)timeout; } int __nf_ct_change_timeout(struct nf_conn *ct, u64 cta_timeout); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 26411f1386227..a4fc4322f3f24 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -197,12 +197,12 @@ enum nft_data_desc_flags { struct nft_data_desc { enum nft_data_types type; + unsigned int size; unsigned int len; unsigned int flags; }; -int nft_data_init(const struct nft_ctx *ctx, - struct nft_data *data, unsigned int size, +int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, struct nft_data_desc *desc, const struct nlattr *nla); void nft_data_hold(const struct nft_data *data, enum nft_data_types type); void nft_data_release(const struct nft_data *data, enum nft_data_types type); @@ -243,6 +243,9 @@ struct nft_userdata { unsigned char data[]; }; +/* placeholder structure for opaque set element backend representation. */ +struct nft_elem_priv { }; + /** * struct nft_set_elem - generic representation of set elements * @@ -263,9 +266,14 @@ struct nft_set_elem { u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)]; struct nft_data val; } data; - void *priv; + struct nft_elem_priv *priv; }; +static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv) +{ + return (void *)priv; +} + struct nft_set; struct nft_set_iter { u8 genmask; @@ -360,7 +368,7 @@ static inline void *nft_expr_priv(const struct nft_expr *expr) int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src); void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); int nft_expr_dump(struct sk_buff *skb, unsigned int attr, - const struct nft_expr *expr); + const struct nft_expr *expr, bool reset); struct nft_set_ext; @@ -393,7 +401,8 @@ struct nft_set_ops { const struct nft_set_ext **ext); bool (*update)(struct nft_set *set, const u32 *key, - void *(*new)(struct nft_set *, + struct nft_elem_priv * + (*new)(struct nft_set *, const struct nft_expr *, struct nft_regs *), const struct nft_expr *expr, @@ -409,23 +418,24 @@ struct nft_set_ops { void (*activate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); - void * (*deactivate)(const struct net *net, + struct nft_elem_priv * (*deactivate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); - bool (*flush)(const struct net *net, + void (*flush)(const struct net *net, const struct nft_set *set, - void *priv); + struct nft_elem_priv *priv); void (*remove)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); void (*walk)(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter); - void * (*get)(const struct net *net, + struct nft_elem_priv * (*get)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags); - + void (*commit)(const struct nft_set *set); + void (*abort)(const struct nft_set *set); u64 (*privsize)(const struct nlattr * const nla[], const struct nft_set_desc *desc); bool (*estimate)(const struct nft_set_desc *desc, @@ -434,7 +444,8 @@ struct nft_set_ops { int (*init)(const struct nft_set *set, const struct nft_set_desc *desc, const struct nlattr * const nla[]); - void (*destroy)(const struct nft_set *set); + void (*destroy)(const struct nft_ctx *ctx, + const struct nft_set *set); void (*gc_init)(const struct nft_set *set); unsigned int elemsize; @@ -473,6 +484,7 @@ struct nft_set_elem_expr { * * @list: table set list node * @bindings: list of set bindings + * @refs: internal refcounting for async set destruction * @table: table this set belongs to * @net: netnamespace this set belongs to * @name: name of the set @@ -494,6 +506,7 @@ struct nft_set_elem_expr { * @expr: stateful expression * @ops: set ops * @flags: set flags + * @dead: set will be freed, never cleared * @genmask: generation mask * @klen: key length * @dlen: data length @@ -502,6 +515,7 @@ struct nft_set_elem_expr { struct nft_set { struct list_head list; struct list_head bindings; + refcount_t refs; struct nft_table *table; possible_net_t net; char *name; @@ -520,9 +534,11 @@ struct nft_set { u16 policy; u16 udlen; unsigned char *udata; + struct list_head pending_update; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; - u16 flags:14, + u16 flags:13, + dead:1, genmask:2; u8 klen; u8 dlen; @@ -543,6 +559,11 @@ static inline void *nft_set_priv(const struct nft_set *set) return (void *)set->data; } +static inline bool nft_set_gc_is_pending(const struct nft_set *s) +{ + return refcount_read(&s->refs) != 1; +} + static inline struct nft_set *nft_set_container_of(const void *priv) { return (void *)priv - offsetof(struct nft_set, data); @@ -556,7 +577,6 @@ struct nft_set *nft_set_lookup_global(const struct net *net, struct nft_set_ext *nft_set_catchall_lookup(const struct net *net, const struct nft_set *set); -void *nft_set_catchall_gc(const struct nft_set *set); static inline unsigned long nft_set_gc_interval(const struct nft_set *set) { @@ -639,6 +659,7 @@ extern const struct nft_set_ext_type nft_set_ext_types[]; struct nft_set_ext_tmpl { u16 len; u8 offset[NFT_SET_EXT_NUM]; + u8 ext_len[NFT_SET_EXT_NUM]; }; /** @@ -668,7 +689,8 @@ static inline int nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, return -EINVAL; tmpl->offset[id] = tmpl->len; - tmpl->len += nft_set_ext_types[id].len + len; + tmpl->ext_len[id] = nft_set_ext_types[id].len + len; + tmpl->len += tmpl->ext_len[id]; return 0; } @@ -746,9 +768,9 @@ static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) } static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, - void *elem) + const struct nft_elem_priv *elem_priv) { - return elem + set->ops->elemsize; + return (void *)elem_priv + set->ops->elemsize; } static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext *ext) @@ -760,70 +782,19 @@ struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx, const struct nft_set *set, const struct nlattr *attr); -void *nft_set_elem_init(const struct nft_set *set, - const struct nft_set_ext_tmpl *tmpl, - const u32 *key, const u32 *key_end, const u32 *data, - u64 timeout, u64 expiration, gfp_t gfp); +struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set, + const struct nft_set_ext_tmpl *tmpl, + const u32 *key, const u32 *key_end, + const u32 *data, + u64 timeout, u64 expiration, gfp_t gfp); int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, struct nft_expr *expr_array[]); -void nft_set_elem_destroy(const struct nft_set *set, void *elem, +void nft_set_elem_destroy(const struct nft_set *set, + const struct nft_elem_priv *elem_priv, bool destroy_expr); - -/** - * struct nft_set_gc_batch_head - nf_tables set garbage collection batch - * - * @rcu: rcu head - * @set: set the elements belong to - * @cnt: count of elements - */ -struct nft_set_gc_batch_head { - struct rcu_head rcu; - const struct nft_set *set; - unsigned int cnt; -}; - -#define NFT_SET_GC_BATCH_SIZE ((PAGE_SIZE - \ - sizeof(struct nft_set_gc_batch_head)) / \ - sizeof(void *)) - -/** - * struct nft_set_gc_batch - nf_tables set garbage collection batch - * - * @head: GC batch head - * @elems: garbage collection elements - */ -struct nft_set_gc_batch { - struct nft_set_gc_batch_head head; - void *elems[NFT_SET_GC_BATCH_SIZE]; -}; - -struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, - gfp_t gfp); -void nft_set_gc_batch_release(struct rcu_head *rcu); - -static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb) -{ - if (gcb != NULL) - call_rcu(&gcb->head.rcu, nft_set_gc_batch_release); -} - -static inline struct nft_set_gc_batch * -nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb, - gfp_t gfp) -{ - if (gcb != NULL) { - if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems)) - return gcb; - nft_set_gc_batch_complete(gcb); - } - return nft_set_gc_batch_alloc(set, gfp); -} - -static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb, - void *elem) -{ - gcb->elems[gcb->head.cnt++] = elem; -} +void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_elem_priv *elem_priv); struct nft_expr_ops; /** @@ -903,7 +874,8 @@ struct nft_expr_ops { void (*destroy_clone)(const struct nft_ctx *ctx, const struct nft_expr *expr); int (*dump)(struct sk_buff *skb, - const struct nft_expr *expr); + const struct nft_expr *expr, + bool reset); int (*validate)(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data); @@ -1137,6 +1109,29 @@ int __nft_release_basechain(struct nft_ctx *ctx); unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); +static inline bool nft_use_inc(u32 *use) +{ + if (*use == UINT_MAX) + return false; + + (*use)++; + + return true; +} + +static inline void nft_use_dec(u32 *use) +{ + WARN_ON_ONCE((*use)-- == 0); +} + +/* For error and abort path: restore use counter to previous state. */ +static inline void nft_use_inc_restore(u32 *use) +{ + WARN_ON_ONCE(!nft_use_inc(use)); +} + +#define nft_use_dec_restore nft_use_dec + /** * struct nft_table - nf_tables table * @@ -1220,8 +1215,8 @@ struct nft_object { struct list_head list; struct rhlist_head rhlhead; struct nft_object_hash_key key; - u32 genmask:2, - use:30; + u32 genmask:2; + u32 use; u64 handle; u16 udlen; u8 *udata; @@ -1323,8 +1318,8 @@ struct nft_flowtable { char *name; int hooknum; int ops_len; - u32 genmask:2, - use:30; + u32 genmask:2; + u32 use; u64 handle; /* runtime data below here */ struct list_head hook_list ____cacheline_aligned; @@ -1464,45 +1459,37 @@ static inline void nft_set_elem_change_active(const struct net *net, #endif /* IS_ENABLED(CONFIG_NF_TABLES) */ -/* - * We use a free bit in the genmask field to indicate the element - * is busy, meaning it is currently being processed either by - * the netlink API or GC. - * - * Even though the genmask is only a single byte wide, this works - * because the extension structure if fully constant once initialized, - * so there are no non-atomic write accesses unless it is already - * marked busy. - */ -#define NFT_SET_ELEM_BUSY_MASK (1 << 2) +#define NFT_SET_ELEM_DEAD_MASK (1 << 2) #if defined(__LITTLE_ENDIAN_BITFIELD) -#define NFT_SET_ELEM_BUSY_BIT 2 +#define NFT_SET_ELEM_DEAD_BIT 2 #elif defined(__BIG_ENDIAN_BITFIELD) -#define NFT_SET_ELEM_BUSY_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2) +#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2) #else #error #endif -static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext) +static inline void nft_set_elem_dead(struct nft_set_ext *ext) { unsigned long *word = (unsigned long *)ext; BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); - return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word); + set_bit(NFT_SET_ELEM_DEAD_BIT, word); } -static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext) +static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) { unsigned long *word = (unsigned long *)ext; - clear_bit(NFT_SET_ELEM_BUSY_BIT, word); + BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); + return test_bit(NFT_SET_ELEM_DEAD_BIT, word); } /** * struct nft_trans - nf_tables object update in transaction * * @list: used internally + * @binding_list: list of objects with possible bindings * @msg_type: message type * @put_net: ctx->net needs to be put * @ctx: transaction context @@ -1510,6 +1497,7 @@ static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext) */ struct nft_trans { struct list_head list; + struct list_head binding_list; int msg_type; bool put_net; struct nft_ctx ctx; @@ -1628,6 +1616,39 @@ struct nft_trans_flowtable { #define nft_trans_flowtable_flags(trans) \ (((struct nft_trans_flowtable *)trans->data)->flags) +#define NFT_TRANS_GC_BATCHCOUNT 256 + +struct nft_trans_gc { + struct list_head list; + struct net *net; + struct nft_set *set; + u32 seq; + u16 count; + void *priv[NFT_TRANS_GC_BATCHCOUNT]; + struct rcu_head rcu; +}; + +struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, + unsigned int gc_seq, gfp_t gfp); +void nft_trans_gc_destroy(struct nft_trans_gc *trans); + +struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, + unsigned int gc_seq, gfp_t gfp); +void nft_trans_gc_queue_async_done(struct nft_trans_gc *gc); + +struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp); +void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans); + +void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv); + +struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc, + unsigned int gc_seq); +struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc); + +void nft_setelem_data_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem); + int __init nft_chain_filter_init(void); void nft_chain_filter_fini(void); @@ -1648,11 +1669,13 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re struct nftables_pernet { struct list_head tables; struct list_head commit_list; + struct list_head binding_list; struct list_head module_list; struct list_head notify_list; struct mutex commit_mutex; u64 table_handle; unsigned int base_seq; + unsigned int gc_seq; u8 validate_state; }; diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h index 82d0e41b76f22..faa108b1ba675 100644 --- a/include/net/netfilter/nf_tproxy.h +++ b/include/net/netfilter/nf_tproxy.h @@ -17,6 +17,13 @@ static inline bool nf_tproxy_sk_is_transparent(struct sock *sk) return false; } +static inline void nf_tproxy_twsk_deschedule_put(struct inet_timewait_sock *tw) +{ + local_bh_disable(); + inet_twsk_deschedule_put(tw); + local_bh_enable(); +} + /* assign a socket to the skb -- consumes sk */ static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) { diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index 237f3757637e1..73482be7d61fa 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -18,7 +18,7 @@ nft_fib_is_loopback(const struct sk_buff *skb, const struct net_device *in) return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK; } -int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr); +int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset); int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]); int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h index 2dce55c736f40..8e923794c91bc 100644 --- a/include/net/netfilter/nft_meta.h +++ b/include/net/netfilter/nft_meta.h @@ -23,10 +23,10 @@ int nft_meta_set_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]); int nft_meta_get_dump(struct sk_buff *skb, - const struct nft_expr *expr); + const struct nft_expr *expr, bool reset); int nft_meta_set_dump(struct sk_buff *skb, - const struct nft_expr *expr); + const struct nft_expr *expr, bool reset); void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, diff --git a/include/net/netfilter/nft_reject.h b/include/net/netfilter/nft_reject.h index 56b123a42220e..6d9ba62efd750 100644 --- a/include/net/netfilter/nft_reject.h +++ b/include/net/netfilter/nft_reject.h @@ -22,7 +22,8 @@ int nft_reject_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]); -int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr); +int nft_reject_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset); int nft_reject_icmp_code(u8 code); int nft_reject_icmpv6_code(u8 code); diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 08db4ee06ab6f..67b73b5a8d5a5 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -97,6 +97,15 @@ enum nft_verdicts { * @NFT_MSG_NEWFLOWTABLE: add new flow table (enum nft_flowtable_attributes) * @NFT_MSG_GETFLOWTABLE: get flow table (enum nft_flowtable_attributes) * @NFT_MSG_DELFLOWTABLE: delete flow table (enum nft_flowtable_attributes) + * @NFT_MSG_GETRULE_RESET: get rules and reset stateful expressions (enum nft_obj_attributes) + * @NFT_MSG_DESTROYTABLE: destroy a table (enum nft_table_attributes) + * @NFT_MSG_DESTROYCHAIN: destroy a chain (enum nft_chain_attributes) + * @NFT_MSG_DESTROYRULE: destroy a rule (enum nft_rule_attributes) + * @NFT_MSG_DESTROYSET: destroy a set (enum nft_set_attributes) + * @NFT_MSG_DESTROYSETELEM: destroy a set element (enum nft_set_elem_attributes) + * @NFT_MSG_DESTROYOBJ: destroy a stateful object (enum nft_object_attributes) + * @NFT_MSG_DESTROYFLOWTABLE: destroy flow table (enum nft_flowtable_attributes) + * @NFT_MSG_GETSETELEM_RESET: get set elements and reset attached stateful expressions (enum nft_set_elem_attributes) */ enum nf_tables_msg_types { NFT_MSG_NEWTABLE, @@ -124,6 +133,15 @@ enum nf_tables_msg_types { NFT_MSG_NEWFLOWTABLE, NFT_MSG_GETFLOWTABLE, NFT_MSG_DELFLOWTABLE, + NFT_MSG_GETRULE_RESET, + NFT_MSG_DESTROYTABLE, + NFT_MSG_DESTROYCHAIN, + NFT_MSG_DESTROYRULE, + NFT_MSG_DESTROYSET, + NFT_MSG_DESTROYSETELEM, + NFT_MSG_DESTROYOBJ, + NFT_MSG_DESTROYFLOWTABLE, + NFT_MSG_GETSETELEM_RESET, NFT_MSG_MAX, }; diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index f20f4373ff408..812bd7e1750b6 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -868,11 +868,17 @@ static unsigned int ip_sabotage_in(void *priv, { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); - if (nf_bridge && !nf_bridge->in_prerouting && - !netif_is_l3_master(skb->dev) && - !netif_is_l3_slave(skb->dev)) { - state->okfn(state->net, state->sk, skb); - return NF_STOLEN; + if (nf_bridge) { + if (nf_bridge->sabotage_in_done) + return NF_ACCEPT; + + if (!nf_bridge->in_prerouting && + !netif_is_l3_master(skb->dev) && + !netif_is_l3_slave(skb->dev)) { + nf_bridge->sabotage_in_done = 1; + state->okfn(state->net, state->sk, skb); + return NF_STOLEN; + } } return NF_ACCEPT; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index efacd565a4e35..0f863762131c5 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1005,8 +1005,10 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, goto free_iterate; } - if (repl->valid_hooks != t->valid_hooks) + if (repl->valid_hooks != t->valid_hooks) { + ret = -EINVAL; goto free_unlock; + } if (repl->num_counters && repl->num_counters != t->private->nentries) { ret = -EINVAL; @@ -1053,7 +1055,7 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries, AUDIT_XT_OP_REPLACE, GFP_KERNEL); - return ret; + return 0; free_unlock: mutex_unlock(&ebt_mutex); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index dd2cbe74a810a..56973719e07fc 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1044,7 +1044,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_counters *counters; struct ipt_entry *iter; - ret = 0; counters = xt_counters_alloc(num_counters); if (!counters) { ret = -ENOMEM; @@ -1090,7 +1089,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n"); } vfree(counters); - return ret; + return 0; put_module: module_put(t->me); diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c index b22b2c745c76c..69e3317996043 100644 --- a/net/ipv4/netfilter/nf_tproxy_ipv4.c +++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c @@ -38,7 +38,7 @@ nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb, hp->source, lport ? lport : hp->dest, skb->dev, NF_TPROXY_LOOKUP_LISTENER); if (sk2) { - inet_twsk_deschedule_put(inet_twsk(sk)); + nf_tproxy_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; } } diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c index aeb631760eb9e..cae5b38335b3a 100644 --- a/net/ipv4/netfilter/nft_dup_ipv4.c +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -52,7 +52,8 @@ static int nft_dup_ipv4_init(const struct nft_ctx *ctx, return err; } -static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_dup_ipv4_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { struct nft_dup_ipv4 *priv = nft_expr_priv(expr); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 728b995561615..33b086d1c578d 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1061,7 +1061,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_counters *counters; struct ip6t_entry *iter; - ret = 0; counters = xt_counters_alloc(num_counters); if (!counters) { ret = -ENOMEM; @@ -1107,7 +1106,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n"); } vfree(counters); - return ret; + return 0; put_module: module_put(t->me); diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c index 929502e51203b..52f828bb5a83d 100644 --- a/net/ipv6/netfilter/nf_tproxy_ipv6.c +++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c @@ -63,7 +63,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, lport ? lport : hp->dest, skb->dev, NF_TPROXY_LOOKUP_LISTENER); if (sk2) { - inet_twsk_deschedule_put(inet_twsk(sk)); + nf_tproxy_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; } } diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c index 3a00d95e964e9..e859beb29bb11 100644 --- a/net/ipv6/netfilter/nft_dup_ipv6.c +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -50,7 +50,8 @@ static int nft_dup_ipv6_init(const struct nft_ctx *ctx, return err; } -static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_dup_ipv6_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { struct nft_dup_ipv6 *priv = nft_expr_priv(expr); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 9a28c0a88aae9..a3e380050ebe7 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -684,6 +684,14 @@ __ip_set_put(struct ip_set *set) /* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need * a separate reference counter */ +static void +__ip_set_get_netlink(struct ip_set *set) +{ + write_lock_bh(&ip_set_ref_lock); + set->ref_netlink++; + write_unlock_bh(&ip_set_ref_lock); +} + static void __ip_set_put_netlink(struct ip_set *set) { @@ -1698,13 +1706,22 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb, bool eexist = flags & IPSET_FLAG_EXIST, retried = false; do { + if (retried) { + __ip_set_get_netlink(set); + nfnl_unlock(NFNL_SUBSYS_IPSET); + cond_resched(); + nfnl_lock(NFNL_SUBSYS_IPSET); + __ip_set_put_netlink(set); + } + ip_set_lock(set); ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); ip_set_unlock(set); retried = true; - } while (ret == -EAGAIN && - set->variant->resize && - (ret = set->variant->resize(set, retried)) == 0); + } while (ret == -ERANGE || + (ret == -EAGAIN && + set->variant->resize && + (ret = set->variant->resize(set, retried)) == 0)); if (!ret || (ret == -IPSET_ERR_EXIST && eexist)) return 0; diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 75d556d71652d..24adcdd7a0b16 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -98,11 +98,11 @@ static int hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ip4 *h = set->data; + struct hash_ip4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = { 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0, hosts; + u32 ip = 0, ip_to = 0, hosts, i = 0; int ret = 0; if (tb[IPSET_ATTR_LINENO]) @@ -147,14 +147,14 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); - /* 64bit division is not allowed on 32bit */ - if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE) - return -ERANGE; - if (retried) ip = ntohl(h->next.ip); - for (; ip <= ip_to;) { + for (; ip <= ip_to; i++) { e.ip = htonl(ip); + if (i > IPSET_MAX_RANGE) { + hash_ip4_data_next(&h->next, &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index 153de3457423e..a22ec1a6f6ec8 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -97,11 +97,11 @@ static int hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipmark4 *h = set->data; + struct hash_ipmark4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipmark4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip, ip_to = 0; + u32 ip, ip_to = 0, i = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -148,13 +148,14 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], ip_set_mask_from_to(ip, ip_to, cidr); } - if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE) - return -ERANGE; - if (retried) ip = ntohl(h->next.ip); - for (; ip <= ip_to; ip++) { + for (; ip <= ip_to; ip++, i++) { e.ip = htonl(ip); + if (i > IPSET_MAX_RANGE) { + hash_ipmark4_data_next(&h->next, &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 7303138e46be1..10481760a9b25 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -105,11 +105,11 @@ static int hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipport4 *h = set->data; + struct hash_ipport4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip, ip_to = 0, p = 0, port, port_to; + u32 ip, ip_to = 0, p = 0, port, port_to, i = 0; bool with_ports = false; int ret; @@ -173,17 +173,18 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } - if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) - return -ERANGE; - if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; - for (; p <= port_to; p++) { + for (; p <= port_to; p++, i++) { e.ip = htonl(ip); e.port = htons(p); + if (i > IPSET_MAX_RANGE) { + hash_ipport4_data_next(&h->next, &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 334fb1ad0e86c..39a01934b1536 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -108,11 +108,11 @@ static int hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipportip4 *h = set->data; + struct hash_ipportip4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip, ip_to = 0, p = 0, port, port_to; + u32 ip, ip_to = 0, p = 0, port, port_to, i = 0; bool with_ports = false; int ret; @@ -180,17 +180,18 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } - if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) - return -ERANGE; - if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; - for (; p <= port_to; p++) { + for (; p <= port_to; p++, i++) { e.ip = htonl(ip); e.port = htons(p); + if (i > IPSET_MAX_RANGE) { + hash_ipportip4_data_next(&h->next, &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 7df94f437f600..5c6de605a9fb7 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -160,12 +160,12 @@ static int hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipportnet4 *h = set->data; + struct hash_ipportnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, p = 0, port, port_to; - u32 ip2_from = 0, ip2_to = 0, ip2; + u32 ip2_from = 0, ip2_to = 0, ip2, i = 0; bool with_ports = false; u8 cidr; int ret; @@ -253,9 +253,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } - if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) - return -ERANGE; - ip2_to = ip2_from; if (tb[IPSET_ATTR_IP2_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); @@ -282,9 +279,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], for (; p <= port_to; p++) { e.port = htons(p); do { + i++; e.ip2 = htonl(ip2); ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr); e.cidr = cidr - 1; + if (i > IPSET_MAX_RANGE) { + hash_ipportnet4_data_next(&h->next, + &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 1422739d9aa25..ce0a9ce5a91f1 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -136,11 +136,11 @@ static int hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_net4 *h = set->data; + struct hash_net4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = HOST_MASK }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0, ipn, n = 0; + u32 ip = 0, ip_to = 0, i = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -188,19 +188,16 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], if (ip + UINT_MAX == ip_to) return -IPSET_ERR_HASH_RANGE; } - ipn = ip; - do { - ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); - n++; - } while (ipn++ < ip_to); - - if (n > IPSET_MAX_RANGE) - return -ERANGE; if (retried) ip = ntohl(h->next.ip); do { + i++; e.ip = htonl(ip); + if (i > IPSET_MAX_RANGE) { + hash_net4_data_next(&h->next, &e); + return -ERANGE; + } ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 9810f5bf63f5e..0310732862362 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0, ipn, n = 0; + u32 ip = 0, ip_to = 0, i = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -256,19 +256,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip, ip_to, e.cidr); } - ipn = ip; - do { - ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); - n++; - } while (ipn++ < ip_to); - - if (n > IPSET_MAX_RANGE) - return -ERANGE; if (retried) ip = ntohl(h->next.ip); do { + i++; e.ip = htonl(ip); + if (i > IPSET_MAX_RANGE) { + hash_netiface4_data_next(&h->next, &e); + return -ERANGE; + } ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 3d09eefe998a7..c07b70bf32db4 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -163,13 +163,12 @@ static int hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_netnet4 *h = set->data; + struct hash_netnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0; - u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn; - u64 n = 0, m = 0; + u32 ip2 = 0, ip2_from = 0, ip2_to = 0, i = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -245,19 +244,6 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); } - ipn = ip; - do { - ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]); - n++; - } while (ipn++ < ip_to); - ipn = ip2_from; - do { - ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]); - m++; - } while (ipn++ < ip2_to); - - if (n*m > IPSET_MAX_RANGE) - return -ERANGE; if (retried) { ip = ntohl(h->next.ip[0]); @@ -270,7 +256,12 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], e.ip[0] = htonl(ip); ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); do { + i++; e.ip[1] = htonl(ip2); + if (i > IPSET_MAX_RANGE) { + hash_netnet4_data_next(&h->next, &e); + return -ERANGE; + } ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 09cf72eb37f8d..d1a0628df4ef3 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -154,12 +154,11 @@ static int hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_netport4 *h = set->data; + struct hash_netport4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn; - u64 n = 0; + u32 port, port_to, p = 0, ip = 0, ip_to = 0, i = 0; bool with_ports = false; u8 cidr; int ret; @@ -236,14 +235,6 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip, ip_to, e.cidr + 1); } - ipn = ip; - do { - ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr); - n++; - } while (ipn++ < ip_to); - - if (n*(port_to - port + 1) > IPSET_MAX_RANGE) - return -ERANGE; if (retried) { ip = ntohl(h->next.ip); @@ -255,8 +246,12 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], e.ip = htonl(ip); ip = ip_set_range_to_cidr(ip, ip_to, &cidr); e.cidr = cidr - 1; - for (; p <= port_to; p++) { + for (; p <= port_to; p++, i++) { e.port = htons(p); + if (i > IPSET_MAX_RANGE) { + hash_netport4_data_next(&h->next, &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index f9b16f2b22191..fdacbc3c15bef 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -599,13 +599,19 @@ static const struct seq_operations ip_vs_app_seq_ops = { int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs) { INIT_LIST_HEAD(&ipvs->app_list); - proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_seq_ops, - sizeof(struct seq_net_private)); +#ifdef CONFIG_PROC_FS + if (!proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, + &ip_vs_app_seq_ops, + sizeof(struct seq_net_private))) + return -ENOMEM; +#endif return 0; } void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs) { unregister_ip_vs_app(ipvs, NULL /* all */); +#ifdef CONFIG_PROC_FS remove_proc_entry("ip_vs_app", ipvs->net->proc_net); +#endif } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index db13288fddfad..cb6d68220c265 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1447,20 +1447,36 @@ int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs) { atomic_set(&ipvs->conn_count, 0); - proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net, - &ip_vs_conn_seq_ops, sizeof(struct ip_vs_iter_state)); - proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net, - &ip_vs_conn_sync_seq_ops, - sizeof(struct ip_vs_iter_state)); +#ifdef CONFIG_PROC_FS + if (!proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net, + &ip_vs_conn_seq_ops, + sizeof(struct ip_vs_iter_state))) + goto err_conn; + + if (!proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net, + &ip_vs_conn_sync_seq_ops, + sizeof(struct ip_vs_iter_state))) + goto err_conn_sync; +#endif + return 0; + +#ifdef CONFIG_PROC_FS +err_conn_sync: + remove_proc_entry("ip_vs_conn", ipvs->net->proc_net); +err_conn: + return -ENOMEM; +#endif } void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs) { /* flush all the connection entries first */ ip_vs_conn_flush(ipvs); +#ifdef CONFIG_PROC_FS remove_proc_entry("ip_vs_conn", ipvs->net->proc_net); remove_proc_entry("ip_vs_conn_sync", ipvs->net->proc_net); +#endif } int __init ip_vs_conn_init(void) diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index 1cd87b28c9b05..c4108e3ce637b 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -339,6 +339,9 @@ struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i) struct nf_conn *nfct = (struct nf_conn *)nfct_i; int err; + if (!nf_ct_is_confirmed(nfct)) + nfct->timeout += nfct_time_stamp; + nfct->status |= IPS_CONFIRMED; err = nf_conntrack_hash_check_insert(nfct); if (err < 0) { nf_conntrack_free(nfct); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e8843c397b0b7..e9f43f320221c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -96,8 +96,8 @@ static DEFINE_MUTEX(nf_conntrack_mutex); #define GC_SCAN_MAX_DURATION msecs_to_jiffies(10) #define GC_SCAN_EXPIRED_MAX (64000u / HZ) -#define MIN_CHAINLEN 8u -#define MAX_CHAINLEN (32u - MIN_CHAINLEN) +#define MIN_CHAINLEN 50u +#define MAX_CHAINLEN (80u - MIN_CHAINLEN) static struct conntrack_gc_work conntrack_gc_work; @@ -890,10 +890,8 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) zone = nf_ct_zone(ct); - if (!nf_ct_ext_valid_pre(ct->ext)) { - NF_CT_STAT_INC_ATOMIC(net, insert_failed); - return -ETIMEDOUT; - } + if (!nf_ct_ext_valid_pre(ct->ext)) + return -EAGAIN; local_bh_disable(); do { @@ -928,6 +926,18 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) goto chaintoolong; } + /* If genid has changed, we can't insert anymore because ct + * extensions could have stale pointers and nf_ct_iterate_destroy + * might have completed its table scan already. + * + * Increment of the ext genid right after this check is fine: + * nf_ct_iterate_destroy blocks until locks are released. + */ + if (!nf_ct_ext_valid_post(ct->ext)) { + err = -EAGAIN; + goto out; + } + smp_wmb(); /* The caller holds a reference to this object */ refcount_set(&ct->ct_general.use, 2); @@ -936,12 +946,6 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) NF_CT_STAT_INC(net, insert); local_bh_enable(); - if (!nf_ct_ext_valid_post(ct->ext)) { - nf_ct_kill(ct); - NF_CT_STAT_INC_ATOMIC(net, drop); - return -ETIMEDOUT; - } - return 0; chaintoolong: NF_CT_STAT_INC(net, chaintoolong); @@ -2291,6 +2295,9 @@ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct, return 0; helper = rcu_dereference(help->helper); + if (!helper) + return 0; + if (!(helper->flags & NF_CT_HELPER_F_USERSPACE)) return 0; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 04169b54f2a2b..c5a7668535db2 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -176,7 +176,12 @@ static int ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct) static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct, bool skip_zero) { - long timeout = nf_ct_expires(ct) / HZ; + long timeout; + + if (nf_ct_is_confirmed(ct)) + timeout = nf_ct_expires(ct) / HZ; + else + timeout = ct->timeout / HZ; if (skip_zero && timeout == 0) return 0; @@ -2248,9 +2253,6 @@ ctnetlink_create_conntrack(struct net *net, if (!cda[CTA_TIMEOUT]) goto err1; - timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; - __nf_ct_set_timeout(ct, timeout); - rcu_read_lock(); if (cda[CTA_HELP]) { char *helpname = NULL; @@ -2319,6 +2321,9 @@ ctnetlink_create_conntrack(struct net *net, /* we must add conntrack extensions before confirmation. */ ct->status |= IPS_CONFIRMED; + timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; + __nf_ct_set_timeout(ct, timeout); + if (cda[CTA_STATUS]) { err = ctnetlink_change_status(ct, cda); if (err < 0) @@ -2375,12 +2380,15 @@ ctnetlink_create_conntrack(struct net *net, err = nf_conntrack_hash_check_insert(ct); if (err < 0) - goto err2; + goto err3; rcu_read_unlock(); return ct; +err3: + if (ct->master) + nf_ct_put(ct->master); err2: rcu_read_unlock(); err1: diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5264437d3b89b..0b97c0a744227 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -31,7 +31,9 @@ static LIST_HEAD(nf_tables_expressions); static LIST_HEAD(nf_tables_objects); static LIST_HEAD(nf_tables_flowtables); static LIST_HEAD(nf_tables_destroy_list); +static LIST_HEAD(nf_tables_gc_list); static DEFINE_SPINLOCK(nf_tables_destroy_list_lock); +static DEFINE_SPINLOCK(nf_tables_gc_list_lock); enum { NFT_VALIDATE_SKIP = 0, @@ -122,6 +124,9 @@ static void nft_validate_state_update(struct net *net, u8 new_validate_state) static void nf_tables_trans_destroy_work(struct work_struct *w); static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work); +static void nft_trans_gc_work(struct work_struct *work); +static DECLARE_WORK(trans_gc_work, nft_trans_gc_work); + static void nft_ctx_init(struct nft_ctx *ctx, struct net *net, const struct sk_buff *skb, @@ -153,6 +158,7 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, return NULL; INIT_LIST_HEAD(&trans->list); + INIT_LIST_HEAD(&trans->binding_list); trans->msg_type = msg_type; trans->ctx = *ctx; @@ -165,9 +171,15 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx, return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL); } -static void nft_trans_destroy(struct nft_trans *trans) +static void nft_trans_list_del(struct nft_trans *trans) { list_del(&trans->list); + list_del(&trans->binding_list); +} + +static void nft_trans_destroy(struct nft_trans *trans) +{ + nft_trans_list_del(trans); kfree(trans); } @@ -248,8 +260,10 @@ int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain) if (chain->bound) return -EBUSY; + if (!nft_use_inc(&chain->use)) + return -EMFILE; + chain->bound = true; - chain->use++; nft_chain_trans_bind(ctx, chain); return 0; @@ -359,6 +373,19 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr { struct nftables_pernet *nft_net = nft_pernet(net); + switch (trans->msg_type) { + case NFT_MSG_NEWSET: + if (!nft_trans_set_update(trans) && + nft_set_is_anonymous(nft_trans_set(trans))) + list_add_tail(&trans->binding_list, &nft_net->binding_list); + break; + case NFT_MSG_NEWCHAIN: + if (!nft_trans_chain_update(trans) && + nft_chain_binding(nft_trans_chain(trans))) + list_add_tail(&trans->binding_list, &nft_net->binding_list); + break; + } + list_add_tail(&trans->list, &nft_net->commit_list); } @@ -419,7 +446,7 @@ static int nft_delchain(struct nft_ctx *ctx) if (IS_ERR(trans)) return PTR_ERR(trans); - ctx->table->use--; + nft_use_dec(&ctx->table->use); nft_deactivate_next(ctx->net, ctx->chain); return 0; @@ -458,7 +485,7 @@ nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule) /* You cannot delete the same rule twice */ if (nft_is_active_next(ctx->net, rule)) { nft_deactivate_next(ctx->net, rule); - ctx->chain->use--; + nft_use_dec(&ctx->chain->use); return 0; } return -ENOENT; @@ -561,6 +588,54 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, return __nft_trans_set_add(ctx, msg_type, set, NULL); } +static int nft_mapelem_deactivate(const struct nft_ctx *ctx, + struct nft_set *set, + const struct nft_set_iter *iter, + struct nft_set_elem *elem) +{ + nft_setelem_data_deactivate(ctx->net, set, elem); + + return 0; +} + +struct nft_set_elem_catchall { + struct list_head list; + struct rcu_head rcu; + struct nft_elem_priv *elem; +}; + +static void nft_map_catchall_deactivate(const struct nft_ctx *ctx, + struct nft_set *set) +{ + u8 genmask = nft_genmask_next(ctx->net); + struct nft_set_elem_catchall *catchall; + struct nft_set_elem elem; + struct nft_set_ext *ext; + + list_for_each_entry(catchall, &set->catchall_list, list) { + ext = nft_set_elem_ext(set, catchall->elem); + if (!nft_set_elem_active(ext, genmask)) + continue; + + elem.priv = catchall->elem; + nft_setelem_data_deactivate(ctx->net, set, &elem); + break; + } +} + +static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set) +{ + struct nft_set_iter iter = { + .genmask = nft_genmask_next(ctx->net), + .fn = nft_mapelem_deactivate, + }; + + set->ops->walk(ctx, set, &iter); + WARN_ON_ONCE(iter.err); + + nft_map_catchall_deactivate(ctx, set); +} + static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) { int err; @@ -569,8 +644,11 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) if (err < 0) return err; + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + nft_deactivate_next(ctx->net, set); - ctx->table->use--; + nft_use_dec(&ctx->table->use); return err; } @@ -602,7 +680,7 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj) return err; nft_deactivate_next(ctx->net, obj); - ctx->table->use--; + nft_use_dec(&ctx->table->use); return err; } @@ -637,7 +715,7 @@ static int nft_delflowtable(struct nft_ctx *ctx, return err; nft_deactivate_next(ctx->net, flowtable); - ctx->table->use--; + nft_use_dec(&ctx->table->use); return err; } @@ -1081,6 +1159,10 @@ static int nf_tables_updtable(struct nft_ctx *ctx) flags & NFT_TABLE_F_OWNER)) return -EOPNOTSUPP; + /* No dormant off/on/off/on games in single transaction */ + if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + return -EINVAL; + trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, sizeof(struct nft_trans_table)); if (trans == NULL) @@ -1293,7 +1375,7 @@ static int nft_flush_table(struct nft_ctx *ctx) if (!nft_is_active_next(ctx->net, chain)) continue; - if (nft_chain_is_bound(chain)) + if (nft_chain_binding(chain)) continue; ctx->chain = chain; @@ -1307,8 +1389,7 @@ static int nft_flush_table(struct nft_ctx *ctx) if (!nft_is_active_next(ctx->net, set)) continue; - if (nft_set_is_anonymous(set) && - !list_empty(&set->bindings)) + if (nft_set_is_anonymous(set)) continue; err = nft_delset(ctx, set); @@ -1338,7 +1419,7 @@ static int nft_flush_table(struct nft_ctx *ctx) if (!nft_is_active_next(ctx->net, chain)) continue; - if (nft_chain_is_bound(chain)) + if (nft_chain_binding(chain)) continue; ctx->chain = chain; @@ -1413,6 +1494,10 @@ static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info, } if (IS_ERR(table)) { + if (PTR_ERR(table) == -ENOENT && + NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYTABLE) + return 0; + NL_SET_BAD_ATTR(extack, attr); return PTR_ERR(table); } @@ -1600,6 +1685,10 @@ static int nft_dump_basechain_hook(struct sk_buff *skb, int family, if (nft_base_chain_netdev(family, ops->hooknum)) { nest_devs = nla_nest_start_noflag(skb, NFTA_HOOK_DEVS); + + if (!nest_devs) + goto nla_put_failure; + list_for_each_entry(hook, &basechain->hook_list, list) { if (!first) first = hook; @@ -2215,9 +2304,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct nft_rule **rules; int err; - if (table->use == UINT_MAX) - return -EOVERFLOW; - if (nla[NFTA_CHAIN_HOOK]) { struct nft_stats __percpu *stats = NULL; struct nft_chain_hook hook; @@ -2314,6 +2400,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (err < 0) goto err_destroy_chain; + if (!nft_use_inc(&table->use)) { + err = -EMFILE; + goto err_use; + } + trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); if (IS_ERR(trans)) { err = PTR_ERR(trans); @@ -2330,10 +2421,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, goto err_unregister_hook; } - table->use++; - return 0; + err_unregister_hook: + nft_use_dec_restore(&table->use); +err_use: nf_tables_unregister_hook(net, table, chain); err_destroy_chain: nf_tables_chain_destroy(ctx); @@ -2636,10 +2728,17 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info, chain = nft_chain_lookup(net, table, attr, genmask); } if (IS_ERR(chain)) { + if (PTR_ERR(chain) == -ENOENT && + NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYCHAIN) + return 0; + NL_SET_BAD_ATTR(extack, attr); return PTR_ERR(chain); } + if (nft_chain_binding(chain)) + return -EOPNOTSUPP; + if (info->nlh->nlmsg_flags & NLM_F_NONREC && chain->use > 0) return -EBUSY; @@ -2768,7 +2867,7 @@ static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = { }; static int nf_tables_fill_expr_info(struct sk_buff *skb, - const struct nft_expr *expr) + const struct nft_expr *expr, bool reset) { if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->type->name)) goto nla_put_failure; @@ -2778,7 +2877,7 @@ static int nf_tables_fill_expr_info(struct sk_buff *skb, NFTA_EXPR_DATA); if (data == NULL) goto nla_put_failure; - if (expr->ops->dump(skb, expr) < 0) + if (expr->ops->dump(skb, expr, reset) < 0) goto nla_put_failure; nla_nest_end(skb, data); } @@ -2790,14 +2889,14 @@ static int nf_tables_fill_expr_info(struct sk_buff *skb, }; int nft_expr_dump(struct sk_buff *skb, unsigned int attr, - const struct nft_expr *expr) + const struct nft_expr *expr, bool reset) { struct nlattr *nest; nest = nla_nest_start_noflag(skb, attr); if (!nest) goto nla_put_failure; - if (nf_tables_fill_expr_info(skb, expr) < 0) + if (nf_tables_fill_expr_info(skb, expr, reset) < 0) goto nla_put_failure; nla_nest_end(skb, nest); return 0; @@ -3006,7 +3105,8 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, u32 flags, int family, const struct nft_table *table, const struct nft_chain *chain, - const struct nft_rule *rule, u64 handle) + const struct nft_rule *rule, u64 handle, + bool reset) { struct nlmsghdr *nlh; const struct nft_expr *expr, *next; @@ -3039,7 +3139,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, if (list == NULL) goto nla_put_failure; nft_rule_for_each_expr(expr, next, rule) { - if (nft_expr_dump(skb, NFTA_LIST_ELEM, expr) < 0) + if (nft_expr_dump(skb, NFTA_LIST_ELEM, expr, reset) < 0) goto nla_put_failure; } nla_nest_end(skb, list); @@ -3090,7 +3190,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq, event, flags, ctx->family, ctx->table, - ctx->chain, rule, handle); + ctx->chain, rule, handle, false); if (err < 0) { kfree_skb(skb); goto err; @@ -3111,7 +3211,8 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, unsigned int *idx, struct netlink_callback *cb, const struct nft_table *table, - const struct nft_chain *chain) + const struct nft_chain *chain, + bool reset) { struct net *net = sock_net(skb->sk); const struct nft_rule *rule, *prule; @@ -3138,7 +3239,7 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, NFT_MSG_NEWRULE, NLM_F_MULTI | NLM_F_APPEND, table->family, - table, chain, rule, handle) < 0) + table, chain, rule, handle, reset) < 0) return 1; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -3161,6 +3262,10 @@ static int nf_tables_dump_rules(struct sk_buff *skb, struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; struct nftables_pernet *nft_net; + bool reset = false; + + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET) + reset = true; rcu_read_lock(); nft_net = nft_pernet(net); @@ -3185,14 +3290,15 @@ static int nf_tables_dump_rules(struct sk_buff *skb, if (!nft_is_active(net, chain)) continue; __nf_tables_dump_rules(skb, &idx, - cb, table, chain); + cb, table, chain, reset); break; } goto done; } list_for_each_entry_rcu(chain, &table->chains, list) { - if (__nf_tables_dump_rules(skb, &idx, cb, table, chain)) + if (__nf_tables_dump_rules(skb, &idx, + cb, table, chain, reset)) goto done; } @@ -3263,6 +3369,7 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info, struct net *net = info->net; struct nft_table *table; struct sk_buff *skb2; + bool reset = false; int err; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { @@ -3299,9 +3406,12 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info, if (!skb2) return -ENOMEM; + if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET) + reset = true; + err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0, - family, table, chain, rule, 0); + family, table, chain, rule, 0, reset); if (err < 0) goto err_fill_rule_info; @@ -3415,12 +3525,6 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, return 0; } -struct nft_set_elem_catchall { - struct list_head list; - struct rcu_head rcu; - void *elem; -}; - int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set) { u8 genmask = nft_genmask_next(ctx->net); @@ -3447,6 +3551,26 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net, const struct nft_chain *chain, const struct nlattr *nla); +/* nft <= 1.0.6 appends rules to anon chains after they have been bound */ +static bool nft_rule_work_around_old_version(const struct nfnl_info *info, + struct nft_chain *chain) +{ + /* bound (anonymous) chain is already used */ + if (nft_is_active(info->net, chain)) + return false; + + /* nft never asks to replace rules here */ + if (info->nlh->nlmsg_flags & (NLM_F_REPLACE | NLM_F_EXCL)) + return false; + + /* nft and it only ever appends. */ + if ((info->nlh->nlmsg_flags & NLM_F_APPEND) == 0) + return false; + + pr_warn_once("enabling workaround for nftables 1.0.6 and older\n"); + return true; +} + #define NFT_RULE_MAXEXPRS 128 static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, @@ -3460,6 +3584,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, struct nft_expr_info *expr_info = NULL; u8 family = info->nfmsg->nfgen_family; struct nft_flow_rule *flow = NULL; + bool add_after_bind = false; struct net *net = info->net; struct nft_userdata *udata; struct nft_table *table; @@ -3499,8 +3624,12 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, return -EINVAL; } - if (nft_chain_is_bound(chain)) - return -EOPNOTSUPP; + if (nft_chain_is_bound(chain)) { + if (!nft_rule_work_around_old_version(info, chain)) + return -EOPNOTSUPP; + + add_after_bind = true; + } if (nla[NFTA_RULE_HANDLE]) { handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE])); @@ -3524,9 +3653,6 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, return -EINVAL; handle = nf_tables_alloc_handle(table); - if (chain->use == UINT_MAX) - return -EOVERFLOW; - if (nla[NFTA_RULE_POSITION]) { pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION])); old_rule = __nft_rule_lookup(chain, pos_handle); @@ -3620,7 +3746,17 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, } } + if (!nft_use_inc(&chain->use)) { + err = -EMFILE; + goto err_release_rule; + } + if (info->nlh->nlmsg_flags & NLM_F_REPLACE) { + if (nft_chain_binding(chain)) { + err = -EOPNOTSUPP; + goto err_destroy_flow_rule; + } + err = nft_delrule(&ctx, old_rule); if (err < 0) goto err_destroy_flow_rule; @@ -3638,6 +3774,9 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, goto err_destroy_flow_rule; } + if (add_after_bind) + nft_trans_rule_bound(trans) = true; + if (info->nlh->nlmsg_flags & NLM_F_APPEND) { if (old_rule) list_add_rcu(&rule->list, &old_rule->list); @@ -3651,7 +3790,6 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, } } kvfree(expr_info); - chain->use++; if (flow) nft_trans_flow_rule(trans) = flow; @@ -3662,6 +3800,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, return 0; err_destroy_flow_rule: + nft_use_dec_restore(&chain->use); if (flow) nft_flow_rule_destroy(flow); err_release_rule: @@ -3689,12 +3828,10 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net, struct nft_trans *trans; list_for_each_entry(trans, &nft_net->commit_list, list) { - struct nft_rule *rule = nft_trans_rule(trans); - if (trans->msg_type == NFT_MSG_NEWRULE && trans->ctx.chain == chain && id == nft_trans_rule_id(trans)) - return rule; + return nft_trans_rule(trans); } return ERR_PTR(-ENOENT); } @@ -3723,10 +3860,14 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info, chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask); if (IS_ERR(chain)) { + if (PTR_ERR(chain) == -ENOENT && + NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYRULE) + return 0; + NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); return PTR_ERR(chain); } - if (nft_chain_is_bound(chain)) + if (nft_chain_binding(chain)) return -EOPNOTSUPP; } @@ -3736,6 +3877,10 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info, if (nla[NFTA_RULE_HANDLE]) { rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); if (IS_ERR(rule)) { + if (PTR_ERR(rule) == -ENOENT && + NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYRULE) + return 0; + NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]); return PTR_ERR(rule); } @@ -3756,7 +3901,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info, list_for_each_entry(chain, &table->chains, list) { if (!nft_is_active_next(net, chain)) continue; - if (nft_chain_is_bound(chain)) + if (nft_chain_binding(chain)) continue; ctx.chain = chain; @@ -4137,7 +4282,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, if (set->num_exprs == 1) { nest = nla_nest_start_noflag(skb, NFTA_SET_EXPR); - if (nf_tables_fill_expr_info(skb, set->exprs[0]) < 0) + if (nf_tables_fill_expr_info(skb, set->exprs[0], false) < 0) goto nla_put_failure; nla_nest_end(skb, nest); @@ -4148,7 +4293,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, for (i = 0; i < set->num_exprs; i++) { if (nft_expr_dump(skb, NFTA_LIST_ELEM, - set->exprs[i]) < 0) + set->exprs[i], false) < 0) goto nla_put_failure; } nla_nest_end(skb, nest); @@ -4588,6 +4733,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout); if (err) return err; @@ -4596,6 +4744,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (nla[NFTA_SET_GC_INTERVAL] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); } @@ -4642,6 +4794,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (info->nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; + if (nft_set_is_anonymous(set)) + return -EOPNOTSUPP; + err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags); if (err < 0) return err; @@ -4678,9 +4833,15 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, alloc_size = sizeof(*set) + size + udlen; if (alloc_size < size || alloc_size > INT_MAX) return -ENOMEM; + + if (!nft_use_inc(&table->use)) + return -EMFILE; + set = kvzalloc(alloc_size, GFP_KERNEL); - if (!set) - return -ENOMEM; + if (!set) { + err = -ENOMEM; + goto err_alloc; + } name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL); if (!name) { @@ -4701,6 +4862,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, INIT_LIST_HEAD(&set->bindings); INIT_LIST_HEAD(&set->catchall_list); + refcount_set(&set->refs, 1); set->table = table; write_pnet(&set->net, net); set->ops = ops; @@ -4731,24 +4893,28 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, set->num_exprs = num_exprs; set->handle = nf_tables_alloc_handle(table); + INIT_LIST_HEAD(&set->pending_update); err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); if (err < 0) goto err_set_expr_alloc; list_add_tail_rcu(&set->list, &table->sets); - table->use++; + return 0; err_set_expr_alloc: for (i = 0; i < set->num_exprs; i++) nft_expr_destroy(&ctx, set->exprs[i]); err_set_destroy: - ops->destroy(set); + ops->destroy(&ctx, set); err_set_init: kfree(set->name); err_set_name: kvfree(set); +err_alloc: + nft_use_dec_restore(&table->use); + return err; } @@ -4759,11 +4925,19 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx, list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { list_del_rcu(&catchall->list); - nft_set_elem_destroy(set, catchall->elem, true); + nf_tables_set_elem_destroy(ctx, set, catchall->elem); kfree_rcu(catchall, rcu); } } +static void nft_set_put(struct nft_set *set) +{ + if (refcount_dec_and_test(&set->refs)) { + kfree(set->name); + kvfree(set); + } +} + static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) { int i; @@ -4774,10 +4948,9 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) for (i = 0; i < set->num_exprs; i++) nft_expr_destroy(ctx, set->exprs[i]); - set->ops->destroy(set); + set->ops->destroy(ctx, set); nft_set_catchall_destroy(ctx, set); - kfree(set->name); - kvfree(set); + nft_set_put(set); } static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info, @@ -4811,6 +4984,10 @@ static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info, } if (IS_ERR(set)) { + if (PTR_ERR(set) == -ENOENT && + NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYSET) + return 0; + NL_SET_BAD_ATTR(extack, attr); return PTR_ERR(set); } @@ -4883,9 +5060,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *i; struct nft_set_iter iter; - if (set->use == UINT_MAX) - return -EOVERFLOW; - if (!list_empty(&set->bindings) && nft_set_is_anonymous(set)) return -EBUSY; @@ -4913,10 +5087,12 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, return iter.err; } bind: + if (!nft_use_inc(&set->use)) + return -EMFILE; + binding->chain = ctx->chain; list_add_tail_rcu(&binding->list, &set->bindings); nft_set_trans_bind(ctx, set); - set->use++; return 0; } @@ -4929,18 +5105,69 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) { list_del_rcu(&set->list); + set->dead = 1; if (event) nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_KERNEL); } } +static void nft_setelem_data_activate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem); + +static int nft_mapelem_activate(const struct nft_ctx *ctx, + struct nft_set *set, + const struct nft_set_iter *iter, + struct nft_set_elem *elem) +{ + nft_setelem_data_activate(ctx->net, set, elem); + + return 0; +} + +static void nft_map_catchall_activate(const struct nft_ctx *ctx, + struct nft_set *set) +{ + u8 genmask = nft_genmask_next(ctx->net); + struct nft_set_elem_catchall *catchall; + struct nft_set_elem elem; + struct nft_set_ext *ext; + + list_for_each_entry(catchall, &set->catchall_list, list) { + ext = nft_set_elem_ext(set, catchall->elem); + if (!nft_set_elem_active(ext, genmask)) + continue; + + elem.priv = catchall->elem; + nft_setelem_data_activate(ctx->net, set, &elem); + break; + } +} + +static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set) +{ + struct nft_set_iter iter = { + .genmask = nft_genmask_next(ctx->net), + .fn = nft_mapelem_activate, + }; + + set->ops->walk(ctx, set, &iter); + WARN_ON_ONCE(iter.err); + + nft_map_catchall_activate(ctx, set); +} + void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) { - if (nft_set_is_anonymous(set)) + if (nft_set_is_anonymous(set)) { + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_activate(ctx, set); + nft_clear(ctx->net, set); + } - set->use++; + nft_use_inc_restore(&set->use); } EXPORT_SYMBOL_GPL(nf_tables_activate_set); @@ -4956,17 +5183,24 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, else list_del_rcu(&binding->list); - set->use--; + nft_use_dec(&set->use); break; case NFT_TRANS_PREPARE: - if (nft_set_is_anonymous(set)) - nft_deactivate_next(ctx->net, set); + if (nft_set_is_anonymous(set)) { + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); - set->use--; + nft_deactivate_next(ctx->net, set); + } + nft_use_dec(&set->use); return; case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: - set->use--; + if (nft_set_is_anonymous(set) && + set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + + nft_use_dec(&set->use); fallthrough; default: nf_tables_unbind_set(ctx, set, binding, @@ -5047,7 +5281,8 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + static int nft_set_elem_expr_dump(struct sk_buff *skb, const struct nft_set *set, - const struct nft_set_ext *ext) + const struct nft_set_ext *ext, + bool reset) { struct nft_set_elem_expr *elem_expr; u32 size, num_exprs = 0; @@ -5060,7 +5295,7 @@ static int nft_set_elem_expr_dump(struct sk_buff *skb, if (num_exprs == 1) { expr = nft_setelem_expr_at(elem_expr, 0); - if (nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, expr) < 0) + if (nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, expr, reset) < 0) return -1; return 0; @@ -5071,7 +5306,7 @@ static int nft_set_elem_expr_dump(struct sk_buff *skb, nft_setelem_expr_foreach(expr, elem_expr, size) { expr = nft_setelem_expr_at(elem_expr, size); - if (nft_expr_dump(skb, NFTA_LIST_ELEM, expr) < 0) + if (nft_expr_dump(skb, NFTA_LIST_ELEM, expr, reset) < 0) goto nla_put_failure; } nla_nest_end(skb, nest); @@ -5084,11 +5319,13 @@ static int nft_set_elem_expr_dump(struct sk_buff *skb, static int nf_tables_fill_setelem(struct sk_buff *skb, const struct nft_set *set, - const struct nft_set_elem *elem) + const struct nft_set_elem *elem, + bool reset) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; + u64 timeout = 0; nest = nla_nest_start_noflag(skb, NFTA_LIST_ELEM); if (nest == NULL) @@ -5111,7 +5348,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS) && - nft_set_elem_expr_dump(skb, set, ext)) + nft_set_elem_expr_dump(skb, set, ext, reset)) goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) && @@ -5124,11 +5361,15 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, htonl(*nft_set_ext_flags(ext)))) goto nla_put_failure; - if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) && - nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, - nf_jiffies64_to_msecs(*nft_set_ext_timeout(ext)), - NFTA_SET_ELEM_PAD)) - goto nla_put_failure; + if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) { + timeout = *nft_set_ext_timeout(ext); + if (nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, + nf_jiffies64_to_msecs(timeout), + NFTA_SET_ELEM_PAD)) + goto nla_put_failure; + } else if (set->flags & NFT_SET_TIMEOUT) { + timeout = READ_ONCE(set->timeout); + } if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { u64 expires, now = get_jiffies_64(); @@ -5143,6 +5384,9 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, nf_jiffies64_to_msecs(expires), NFTA_SET_ELEM_PAD)) goto nla_put_failure; + + if (reset) + *nft_set_ext_expiration(ext) = now + timeout; } if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) { @@ -5166,6 +5410,7 @@ struct nft_set_dump_args { const struct netlink_callback *cb; struct nft_set_iter iter; struct sk_buff *skb; + bool reset; }; static int nf_tables_dump_setelem(const struct nft_ctx *ctx, @@ -5173,10 +5418,14 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx, const struct nft_set_iter *iter, struct nft_set_elem *elem) { + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); struct nft_set_dump_args *args; + if (nft_set_elem_expired(ext)) + return 0; + args = container_of(iter, struct nft_set_dump_args, iter); - return nf_tables_fill_setelem(args->skb, set, elem); + return nf_tables_fill_setelem(args->skb, set, elem, args->reset); } struct nft_set_dump_ctx { @@ -5185,7 +5434,7 @@ struct nft_set_dump_ctx { }; static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb, - const struct nft_set *set) + const struct nft_set *set, bool reset) { struct nft_set_elem_catchall *catchall; u8 genmask = nft_genmask_cur(net); @@ -5200,7 +5449,7 @@ static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb, continue; elem.priv = catchall->elem; - ret = nf_tables_fill_setelem(skb, set, &elem); + ret = nf_tables_fill_setelem(skb, set, &elem, reset); break; } @@ -5218,6 +5467,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) bool set_found = false; struct nlmsghdr *nlh; struct nlattr *nest; + bool reset = false; u32 portid, seq; int event; @@ -5265,8 +5515,12 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) if (nest == NULL) goto nla_put_failure; + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET) + reset = true; + args.cb = cb; args.skb = skb; + args.reset = reset; args.iter.genmask = nft_genmask_cur(net); args.iter.skip = cb->args[0]; args.iter.count = 0; @@ -5275,7 +5529,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) set->ops->walk(&dump_ctx->ctx, set, &args.iter); if (!args.iter.err && args.iter.count == cb->args[0]) - args.iter.err = nft_set_catchall_dump(net, skb, set); + args.iter.err = nft_set_catchall_dump(net, skb, set, reset); rcu_read_unlock(); nla_nest_end(skb, nest); @@ -5313,7 +5567,8 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, const struct nft_ctx *ctx, u32 seq, u32 portid, int event, u16 flags, const struct nft_set *set, - const struct nft_set_elem *elem) + const struct nft_set_elem *elem, + bool reset) { struct nlmsghdr *nlh; struct nlattr *nest; @@ -5334,7 +5589,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, if (nest == NULL) goto nla_put_failure; - err = nf_tables_fill_setelem(skb, set, elem); + err = nf_tables_fill_setelem(skb, set, elem, reset); if (err < 0) goto nla_put_failure; @@ -5370,19 +5625,13 @@ static int nft_setelem_parse_flags(const struct nft_set *set, static int nft_setelem_parse_key(struct nft_ctx *ctx, struct nft_set *set, struct nft_data *key, struct nlattr *attr) { - struct nft_data_desc desc; - int err; - - err = nft_data_init(ctx, key, NFT_DATA_VALUE_MAXLEN, &desc, attr); - if (err < 0) - return err; - - if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) { - nft_data_release(key, desc.type); - return -EINVAL; - } + struct nft_data_desc desc = { + .type = NFT_DATA_VALUE, + .size = NFT_DATA_VALUE_MAXLEN, + .len = set->klen, + }; - return 0; + return nft_data_init(ctx, key, &desc, attr); } static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set, @@ -5391,26 +5640,18 @@ static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set, struct nlattr *attr) { u32 dtype; - int err; - - desc->flags = NFT_DATA_DESC_SETELEM; - - err = nft_data_init(ctx, data, NFT_DATA_VALUE_MAXLEN, desc, attr); - if (err < 0) - return err; if (set->dtype == NFT_DATA_VERDICT) dtype = NFT_DATA_VERDICT; else dtype = NFT_DATA_VALUE; - if (dtype != desc->type || - set->dlen != desc->len) { - nft_data_release(data, desc->type); - return -EINVAL; - } + desc->type = dtype; + desc->size = NFT_DATA_VALUE_MAXLEN; + desc->len = set->dlen; + desc->flags = NFT_DATA_DESC_SETELEM; - return 0; + return nft_data_init(ctx, data, desc, attr); } static void *nft_setelem_catchall_get(const struct net *net, @@ -5454,7 +5695,7 @@ static int nft_setelem_get(struct nft_ctx *ctx, struct nft_set *set, } static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, - const struct nlattr *attr) + const struct nlattr *attr, bool reset) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; struct nft_set_elem elem; @@ -5498,7 +5739,8 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, return err; err = nf_tables_fill_setelem_info(skb, ctx, ctx->seq, ctx->portid, - NFT_MSG_NEWSETELEM, 0, set, &elem); + NFT_MSG_NEWSETELEM, 0, set, &elem, + reset); if (err < 0) goto err_fill_setelem; @@ -5522,10 +5764,11 @@ static int nf_tables_getsetelem(struct sk_buff *skb, struct nft_set *set; struct nlattr *attr; struct nft_ctx ctx; + bool reset = false; int rem, err = 0; table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family, - genmask, NETLINK_CB(skb).portid); + genmask, 0); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]); return PTR_ERR(table); @@ -5556,10 +5799,15 @@ static int nf_tables_getsetelem(struct sk_buff *skb, if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) return -EINVAL; + if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET) + reset = true; + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { - err = nft_get_set_elem(&ctx, set, attr); - if (err < 0) + err = nft_get_set_elem(&ctx, set, attr, reset); + if (err < 0) { + NL_SET_BAD_ATTR(extack, attr); break; + } } return err; @@ -5588,7 +5836,7 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx, flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags, - set, elem); + set, elem, false); if (err < 0) { kfree_skb(skb); goto err; @@ -5642,27 +5890,58 @@ struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx, return ERR_PTR(err); } -void *nft_set_elem_init(const struct nft_set *set, - const struct nft_set_ext_tmpl *tmpl, - const u32 *key, const u32 *key_end, - const u32 *data, u64 timeout, u64 expiration, gfp_t gfp) +static int nft_set_ext_check(const struct nft_set_ext_tmpl *tmpl, u8 id, u32 len) +{ + len += nft_set_ext_types[id].len; + if (len > tmpl->ext_len[id] || + len > U8_MAX) + return -1; + + return 0; +} + +static int nft_set_ext_memcpy(const struct nft_set_ext_tmpl *tmpl, u8 id, + void *to, const void *from, u32 len) +{ + if (nft_set_ext_check(tmpl, id, len) < 0) + return -1; + + memcpy(to, from, len); + + return 0; +} + +struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set, + const struct nft_set_ext_tmpl *tmpl, + const u32 *key, const u32 *key_end, + const u32 *data, + u64 timeout, u64 expiration, gfp_t gfp) { struct nft_set_ext *ext; void *elem; elem = kzalloc(set->ops->elemsize + tmpl->len, gfp); if (elem == NULL) - return NULL; + return ERR_PTR(-ENOMEM); ext = nft_set_elem_ext(set, elem); nft_set_ext_init(ext, tmpl); - if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY)) - memcpy(nft_set_ext_key(ext), key, set->klen); - if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END)) - memcpy(nft_set_ext_key_end(ext), key_end, set->klen); - if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) - memcpy(nft_set_ext_data(ext), data, set->dlen); + if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY) && + nft_set_ext_memcpy(tmpl, NFT_SET_EXT_KEY, + nft_set_ext_key(ext), key, set->klen) < 0) + goto err_ext_check; + + if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END) && + nft_set_ext_memcpy(tmpl, NFT_SET_EXT_KEY_END, + nft_set_ext_key_end(ext), key_end, set->klen) < 0) + goto err_ext_check; + + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && + nft_set_ext_memcpy(tmpl, NFT_SET_EXT_DATA, + nft_set_ext_data(ext), data, set->dlen) < 0) + goto err_ext_check; + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { *nft_set_ext_expiration(ext) = get_jiffies_64() + expiration; if (expiration == 0) @@ -5672,6 +5951,11 @@ void *nft_set_elem_init(const struct nft_set *set, *nft_set_ext_timeout(ext) = timeout; return elem; + +err_ext_check: + kfree(elem); + + return ERR_PTR(-EINVAL); } static void __nft_set_elem_expr_destroy(const struct nft_ctx *ctx, @@ -5695,10 +5979,12 @@ static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx, __nft_set_elem_expr_destroy(ctx, expr); } -void nft_set_elem_destroy(const struct nft_set *set, void *elem, +/* Drop references and destroy. Called from gc, dynset and abort path. */ +void nft_set_elem_destroy(const struct nft_set *set, + const struct nft_elem_priv *elem_priv, bool destroy_expr) { - struct nft_set_ext *ext = nft_set_elem_ext(set, elem); + struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); struct nft_ctx ctx = { .net = read_pnet(&set->net), .family = set->table->family, @@ -5709,25 +5995,26 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, nft_data_release(nft_set_ext_data(ext), set->dtype); if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS)) nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext)); - if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) - (*nft_set_ext_obj(ext))->use--; - kfree(elem); + nft_use_dec(&(*nft_set_ext_obj(ext))->use); + + kfree(elem_priv); } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); -/* Only called from commit path, nft_setelem_data_deactivate() already deals - * with the refcounting from the preparation phase. +/* Destroy element. References have been already dropped in the preparation + * path via nft_setelem_data_deactivate(). */ -static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, - const struct nft_set *set, void *elem) +void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_elem_priv *elem_priv) { - struct nft_set_ext *ext = nft_set_elem_ext(set, elem); + struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS)) nft_set_elem_expr_destroy(ctx, nft_set_ext_expr(ext)); - kfree(elem); + kfree(elem_priv); } int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, @@ -5759,18 +6046,29 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, } static int nft_set_elem_expr_setup(struct nft_ctx *ctx, + const struct nft_set_ext_tmpl *tmpl, const struct nft_set_ext *ext, struct nft_expr *expr_array[], u32 num_exprs) { struct nft_set_elem_expr *elem_expr = nft_set_ext_expr(ext); + u32 len = sizeof(struct nft_set_elem_expr); struct nft_expr *expr; int i, err; - for (i = 0; i < num_exprs; i++) { - expr = nft_setelem_expr_at(elem_expr, elem_expr->size); - err = nft_expr_clone(expr, expr_array[i]); - if (err < 0) + if (num_exprs == 0) + return 0; + + for (i = 0; i < num_exprs; i++) + len += expr_array[i]->ops->size; + + if (nft_set_ext_check(tmpl, NFT_SET_EXT_EXPRESSIONS, len) < 0) + return -EINVAL; + + for (i = 0; i < num_exprs; i++) { + expr = nft_setelem_expr_at(elem_expr, elem_expr->size); + err = nft_expr_clone(expr, expr_array[i]); + if (err < 0) goto err_elem_expr_setup; elem_expr->size += expr_array[i]->ops->size; @@ -5799,7 +6097,8 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net, list_for_each_entry_rcu(catchall, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); if (nft_set_elem_active(ext, genmask) && - !nft_set_elem_expired(ext)) + !nft_set_elem_expired(ext) && + !nft_set_elem_is_dead(ext)) return ext; } @@ -5807,29 +6106,6 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net, } EXPORT_SYMBOL_GPL(nft_set_catchall_lookup); -void *nft_set_catchall_gc(const struct nft_set *set) -{ - struct nft_set_elem_catchall *catchall, *next; - struct nft_set_ext *ext; - void *elem = NULL; - - list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { - ext = nft_set_elem_ext(set, catchall->elem); - - if (!nft_set_elem_expired(ext) || - nft_set_elem_mark_busy(ext)) - continue; - - elem = catchall->elem; - list_del_rcu(&catchall->list); - kfree_rcu(catchall, rcu); - break; - } - - return elem; -} -EXPORT_SYMBOL_GPL(nft_set_catchall_gc); - static int nft_setelem_catchall_insert(const struct net *net, struct nft_set *set, const struct nft_set_elem *elem, @@ -5891,7 +6167,6 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set, if (nft_setelem_is_catchall(set, elem)) { nft_set_elem_change_active(net, set, ext); - nft_set_elem_clear_busy(ext); } else { set->ops->activate(net, set, elem); } @@ -5906,8 +6181,7 @@ static int nft_setelem_catchall_deactivate(const struct net *net, list_for_each_entry(catchall, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); - if (!nft_is_active(net, ext) || - nft_set_elem_mark_busy(ext)) + if (!nft_is_active(net, ext)) continue; kfree(elem->priv); @@ -5950,6 +6224,12 @@ static int nft_setelem_deactivate(const struct net *net, return ret; } +static void nft_setelem_catchall_destroy(struct nft_set_elem_catchall *catchall) +{ + list_del_rcu(&catchall->list); + kfree_rcu(catchall, rcu); +} + static void nft_setelem_catchall_remove(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) @@ -5958,8 +6238,7 @@ static void nft_setelem_catchall_remove(const struct net *net, list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { if (catchall->elem == elem->priv) { - list_del_rcu(&catchall->list); - kfree_rcu(catchall, rcu); + nft_setelem_catchall_destroy(catchall); break; } } @@ -6026,7 +6305,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) return err; - if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL)) + if (((flags & NFT_SET_ELEM_CATCHALL) && nla[NFTA_SET_ELEM_KEY]) || + (!(flags & NFT_SET_ELEM_CATCHALL) && !nla[NFTA_SET_ELEM_KEY])) return -EINVAL; if (flags != 0) { @@ -6075,7 +6355,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, &timeout); if (err) return err; - } else if (set->flags & NFT_SET_TIMEOUT) { + } else if (set->flags & NFT_SET_TIMEOUT && + !(flags & NFT_SET_ELEM_INTERVAL_END)) { timeout = READ_ONCE(set->timeout); } @@ -6141,7 +6422,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = -EOPNOTSUPP; goto err_set_elem_expr; } - } else if (set->num_exprs > 0) { + } else if (set->num_exprs > 0 && + !(flags & NFT_SET_ELEM_INTERVAL_END)) { err = nft_set_elem_expr_clone(ctx, set, expr_array); if (err < 0) goto err_set_elem_expr_clone; @@ -6199,8 +6481,16 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, set->objtype, genmask); if (IS_ERR(obj)) { err = PTR_ERR(obj); + obj = NULL; + goto err_parse_key_end; + } + + if (!nft_use_inc(&obj->use)) { + err = -EMFILE; + obj = NULL; goto err_parse_key_end; } + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF); if (err < 0) goto err_parse_key_end; @@ -6257,36 +6547,41 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, } } - err = -ENOMEM; elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, elem.key_end.val.data, elem.data.val.data, timeout, expiration, GFP_KERNEL); - if (elem.priv == NULL) + if (IS_ERR(elem.priv)) { + err = PTR_ERR(elem.priv); goto err_parse_data; + } ext = nft_set_elem_ext(set, elem.priv); if (flags) *nft_set_ext_flags(ext) = flags; + + if (obj) + *nft_set_ext_obj(ext) = obj; + if (ulen > 0) { + if (nft_set_ext_check(&tmpl, NFT_SET_EXT_USERDATA, ulen) < 0) { + err = -EINVAL; + goto err_elem_free; + } udata = nft_set_ext_userdata(ext); udata->len = ulen - 1; nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen); } - if (obj) { - *nft_set_ext_obj(ext) = obj; - obj->use++; - } - err = nft_set_elem_expr_setup(ctx, ext, expr_array, num_exprs); + err = nft_set_elem_expr_setup(ctx, &tmpl, ext, expr_array, num_exprs); if (err < 0) - goto err_elem_expr; + goto err_elem_free; trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); if (trans == NULL) { err = -ENOMEM; - goto err_elem_expr; + goto err_elem_free; } - ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK; + ext->genmask = nft_genmask_cur(ctx->net); err = nft_setelem_insert(ctx->net, set, &elem, &ext2, flags); if (err) { @@ -6329,15 +6624,15 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, nft_setelem_remove(ctx->net, set, &elem); err_element_clash: kfree(trans); -err_elem_expr: - if (obj) - obj->use--; - +err_elem_free: nf_tables_set_elem_destroy(ctx, set, elem.priv); err_parse_data: if (nla[NFTA_SET_ELEM_DATA] != NULL) nft_data_release(&elem.data.val, desc.type); err_parse_key_end: + if (obj) + nft_use_dec_restore(&obj->use); + nft_data_release(&elem.key_end.val, NFT_DATA_VALUE); err_parse_key: nft_data_release(&elem.key.val, NFT_DATA_VALUE); @@ -6378,15 +6673,18 @@ static int nf_tables_newsetelem(struct sk_buff *skb, if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + if (!list_empty(&set->bindings) && + (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) return -EBUSY; nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_add_set_elem(&ctx, set, attr, info->nlh->nlmsg_flags); - if (err < 0) + if (err < 0) { + NL_SET_BAD_ATTR(extack, attr); return err; + } } if (nft_net->validate_state == NFT_VALIDATE_DO) @@ -6415,7 +6713,7 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type) case NFT_JUMP: case NFT_GOTO: chain = data->verdict.chain; - chain->use++; + nft_use_inc_restore(&chain->use); break; } } @@ -6430,19 +6728,19 @@ static void nft_setelem_data_activate(const struct net *net, if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_hold(nft_set_ext_data(ext), set->dtype); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) - (*nft_set_ext_obj(ext))->use++; + nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use); } -static void nft_setelem_data_deactivate(const struct net *net, - const struct nft_set *set, - struct nft_set_elem *elem) +void nft_setelem_data_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_release(nft_set_ext_data(ext), set->dtype); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) - (*nft_set_ext_obj(ext))->use--; + nft_use_dec(&(*nft_set_ext_obj(ext))->use); } static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, @@ -6505,8 +6803,10 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, elem.key_end.val.data, NULL, 0, 0, GFP_KERNEL); - if (elem.priv == NULL) + if (IS_ERR(elem.priv)) { + err = PTR_ERR(elem.priv); goto fail_elem_key_end; + } ext = nft_set_elem_ext(set, elem.priv); if (flags) @@ -6543,17 +6843,13 @@ static int nft_setelem_flush(const struct nft_ctx *ctx, struct nft_set_elem *elem) { struct nft_trans *trans; - int err; trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM, sizeof(struct nft_trans_elem), GFP_ATOMIC); if (!trans) return -ENOMEM; - if (!set->ops->flush(ctx->net, set, elem->priv)) { - err = -ENOENT; - goto err1; - } + set->ops->flush(ctx->net, set, elem->priv); set->ndeact++; nft_setelem_data_deactivate(ctx->net, set, elem); @@ -6562,9 +6858,6 @@ static int nft_setelem_flush(const struct nft_ctx *ctx, nft_trans_commit_list_add_tail(ctx->net, trans); return 0; -err1: - kfree(trans); - return err; } static int __nft_set_catchall_flush(const struct nft_ctx *ctx, @@ -6597,14 +6890,14 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx, list_for_each_entry_rcu(catchall, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); - if (!nft_set_elem_active(ext, genmask) || - nft_set_elem_mark_busy(ext)) + if (!nft_set_elem_active(ext, genmask)) continue; elem.priv = catchall->elem; ret = __nft_set_catchall_flush(ctx, set, &elem); if (ret < 0) break; + nft_set_elem_change_active(ctx->net, set, ext); } return ret; @@ -6648,7 +6941,11 @@ static int nf_tables_delsetelem(struct sk_buff *skb, set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask); if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + + if (nft_set_is_anonymous(set)) + return -EOPNOTSUPP; + + if (!list_empty(&set->bindings) && (set->flags & NFT_SET_CONSTANT)) return -EBUSY; nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); @@ -6658,35 +6955,18 @@ static int nf_tables_delsetelem(struct sk_buff *skb, nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_del_setelem(&ctx, set, attr); - if (err < 0) + if (err == -ENOENT && + NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYSETELEM) + continue; + + if (err < 0) { + NL_SET_BAD_ATTR(extack, attr); break; + } } return err; } -void nft_set_gc_batch_release(struct rcu_head *rcu) -{ - struct nft_set_gc_batch *gcb; - unsigned int i; - - gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu); - for (i = 0; i < gcb->head.cnt; i++) - nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true); - kfree(gcb); -} - -struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, - gfp_t gfp) -{ - struct nft_set_gc_batch *gcb; - - gcb = kzalloc(sizeof(*gcb), gfp); - if (gcb == NULL) - return gcb; - gcb->head.set = set; - return gcb; -} - /* * Stateful objects */ @@ -6972,9 +7252,14 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); + if (!nft_use_inc(&table->use)) + return -EMFILE; + type = nft_obj_type_get(net, objtype); - if (IS_ERR(type)) - return PTR_ERR(type); + if (IS_ERR(type)) { + err = PTR_ERR(type); + goto err_type; + } obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]); if (IS_ERR(obj)) { @@ -7008,7 +7293,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, goto err_obj_ht; list_add_tail_rcu(&obj->list, &table->objects); - table->use++; + return 0; err_obj_ht: /* queued in transaction log */ @@ -7024,6 +7309,9 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, kfree(obj); err_init: module_put(type->owner); +err_type: + nft_use_dec_restore(&table->use); + return err; } @@ -7300,6 +7588,10 @@ static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info, } if (IS_ERR(obj)) { + if (PTR_ERR(obj) == -ENOENT && + NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYOBJ) + return 0; + NL_SET_BAD_ATTR(extack, attr); return PTR_ERR(obj); } @@ -7414,7 +7706,7 @@ void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx, case NFT_TRANS_PREPARE: case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: - flowtable->use--; + nft_use_dec(&flowtable->use); fallthrough; default: return; @@ -7762,9 +8054,14 @@ static int nf_tables_newflowtable(struct sk_buff *skb, nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); + if (!nft_use_inc(&table->use)) + return -EMFILE; + flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL); - if (!flowtable) - return -ENOMEM; + if (!flowtable) { + err = -ENOMEM; + goto flowtable_alloc; + } flowtable->table = table; flowtable->handle = nf_tables_alloc_handle(table); @@ -7819,7 +8116,6 @@ static int nf_tables_newflowtable(struct sk_buff *skb, goto err5; list_add_tail_rcu(&flowtable->list, &table->flowtables); - table->use++; return 0; err5: @@ -7836,6 +8132,9 @@ static int nf_tables_newflowtable(struct sk_buff *skb, kfree(flowtable->name); err1: kfree(flowtable); +flowtable_alloc: + nft_use_dec_restore(&table->use); + return err; } @@ -7931,6 +8230,10 @@ static int nf_tables_delflowtable(struct sk_buff *skb, } if (IS_ERR(flowtable)) { + if (PTR_ERR(flowtable) == -ENOENT && + NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYFLOWTABLE) + return 0; + NL_SET_BAD_ATTR(extack, attr); return PTR_ERR(flowtable); } @@ -8340,6 +8643,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .attr_count = NFTA_TABLE_MAX, .policy = nft_table_policy, }, + [NFT_MSG_DESTROYTABLE] = { + .call = nf_tables_deltable, + .type = NFNL_CB_BATCH, + .attr_count = NFTA_TABLE_MAX, + .policy = nft_table_policy, + }, [NFT_MSG_NEWCHAIN] = { .call = nf_tables_newchain, .type = NFNL_CB_BATCH, @@ -8358,6 +8667,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .attr_count = NFTA_CHAIN_MAX, .policy = nft_chain_policy, }, + [NFT_MSG_DESTROYCHAIN] = { + .call = nf_tables_delchain, + .type = NFNL_CB_BATCH, + .attr_count = NFTA_CHAIN_MAX, + .policy = nft_chain_policy, + }, [NFT_MSG_NEWRULE] = { .call = nf_tables_newrule, .type = NFNL_CB_BATCH, @@ -8370,12 +8685,24 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, }, + [NFT_MSG_GETRULE_RESET] = { + .call = nf_tables_getrule, + .type = NFNL_CB_RCU, + .attr_count = NFTA_RULE_MAX, + .policy = nft_rule_policy, + }, [NFT_MSG_DELRULE] = { .call = nf_tables_delrule, .type = NFNL_CB_BATCH, .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, }, + [NFT_MSG_DESTROYRULE] = { + .call = nf_tables_delrule, + .type = NFNL_CB_BATCH, + .attr_count = NFTA_RULE_MAX, + .policy = nft_rule_policy, + }, [NFT_MSG_NEWSET] = { .call = nf_tables_newset, .type = NFNL_CB_BATCH, @@ -8394,6 +8721,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .attr_count = NFTA_SET_MAX, .policy = nft_set_policy, }, + [NFT_MSG_DESTROYSET] = { + .call = nf_tables_delset, + .type = NFNL_CB_BATCH, + .attr_count = NFTA_SET_MAX, + .policy = nft_set_policy, + }, [NFT_MSG_NEWSETELEM] = { .call = nf_tables_newsetelem, .type = NFNL_CB_BATCH, @@ -8406,12 +8739,24 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, }, + [NFT_MSG_GETSETELEM_RESET] = { + .call = nf_tables_getsetelem, + .type = NFNL_CB_RCU, + .attr_count = NFTA_SET_ELEM_LIST_MAX, + .policy = nft_set_elem_list_policy, + }, [NFT_MSG_DELSETELEM] = { .call = nf_tables_delsetelem, .type = NFNL_CB_BATCH, .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, }, + [NFT_MSG_DESTROYSETELEM] = { + .call = nf_tables_delsetelem, + .type = NFNL_CB_BATCH, + .attr_count = NFTA_SET_ELEM_LIST_MAX, + .policy = nft_set_elem_list_policy, + }, [NFT_MSG_GETGEN] = { .call = nf_tables_getgen, .type = NFNL_CB_RCU, @@ -8434,6 +8779,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .attr_count = NFTA_OBJ_MAX, .policy = nft_obj_policy, }, + [NFT_MSG_DESTROYOBJ] = { + .call = nf_tables_delobj, + .type = NFNL_CB_BATCH, + .attr_count = NFTA_OBJ_MAX, + .policy = nft_obj_policy, + }, [NFT_MSG_GETOBJ_RESET] = { .call = nf_tables_getobj, .type = NFNL_CB_RCU, @@ -8458,6 +8809,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .attr_count = NFTA_FLOWTABLE_MAX, .policy = nft_flowtable_policy, }, + [NFT_MSG_DESTROYFLOWTABLE] = { + .call = nf_tables_delflowtable, + .type = NFNL_CB_BATCH, + .attr_count = NFTA_FLOWTABLE_MAX, + .policy = nft_flowtable_policy, + }, }; static int nf_tables_validate(struct net *net) @@ -8476,6 +8833,8 @@ static int nf_tables_validate(struct net *net) if (nft_table_validate(net, table) < 0) return -EAGAIN; } + + nft_validate_state_update(net, NFT_VALIDATE_SKIP); break; } @@ -8551,6 +8910,7 @@ static void nft_commit_release(struct nft_trans *trans) { switch (trans->msg_type) { case NFT_MSG_DELTABLE: + case NFT_MSG_DESTROYTABLE: nf_tables_table_destroy(&trans->ctx); break; case NFT_MSG_NEWCHAIN: @@ -8558,23 +8918,29 @@ static void nft_commit_release(struct nft_trans *trans) kfree(nft_trans_chain_name(trans)); break; case NFT_MSG_DELCHAIN: + case NFT_MSG_DESTROYCHAIN: nf_tables_chain_destroy(&trans->ctx); break; case NFT_MSG_DELRULE: + case NFT_MSG_DESTROYRULE: nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); break; case NFT_MSG_DELSET: + case NFT_MSG_DESTROYSET: nft_set_destroy(&trans->ctx, nft_trans_set(trans)); break; case NFT_MSG_DELSETELEM: + case NFT_MSG_DESTROYSETELEM: nf_tables_set_elem_destroy(&trans->ctx, nft_trans_elem_set(trans), nft_trans_elem(trans).priv); break; case NFT_MSG_DELOBJ: + case NFT_MSG_DESTROYOBJ: nft_obj_destroy(&trans->ctx, nft_trans_obj(trans)); break; case NFT_MSG_DELFLOWTABLE: + case NFT_MSG_DESTROYFLOWTABLE: if (nft_trans_flowtable_update(trans)) nft_flowtable_hooks_destroy(&nft_trans_flowtable_hooks(trans)); else @@ -8603,7 +8969,7 @@ static void nf_tables_trans_destroy_work(struct work_struct *w) synchronize_rcu(); list_for_each_entry_safe(trans, next, &head, list) { - list_del(&trans->list); + nft_trans_list_del(trans); nft_commit_release(trans); } } @@ -8746,6 +9112,251 @@ void nft_chain_del(struct nft_chain *chain) list_del_rcu(&chain->list); } +static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx, + struct nft_trans_gc *trans) +{ + void **priv = trans->priv; + unsigned int i; + + for (i = 0; i < trans->count; i++) { + struct nft_set_elem elem = { + .priv = priv[i], + }; + + nft_setelem_data_deactivate(ctx->net, trans->set, &elem); + nft_setelem_remove(ctx->net, trans->set, &elem); + } +} + +void nft_trans_gc_destroy(struct nft_trans_gc *trans) +{ + nft_set_put(trans->set); + put_net(trans->net); + kfree(trans); +} + +static void nft_trans_gc_trans_free(struct rcu_head *rcu) +{ + struct nft_set_elem elem = {}; + struct nft_trans_gc *trans; + struct nft_ctx ctx = {}; + unsigned int i; + + trans = container_of(rcu, struct nft_trans_gc, rcu); + ctx.net = read_pnet(&trans->set->net); + + for (i = 0; i < trans->count; i++) { + elem.priv = trans->priv[i]; + if (!nft_setelem_is_catchall(trans->set, &elem)) + atomic_dec(&trans->set->nelems); + + nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv); + } + + nft_trans_gc_destroy(trans); +} + +static bool nft_trans_gc_work_done(struct nft_trans_gc *trans) +{ + struct nftables_pernet *nft_net; + struct nft_ctx ctx = {}; + + nft_net = nft_pernet(trans->net); + + mutex_lock(&nft_net->commit_mutex); + + /* Check for race with transaction, otherwise this batch refers to + * stale objects that might not be there anymore. Skip transaction if + * set has been destroyed from control plane transaction in case gc + * worker loses race. + */ + if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) { + mutex_unlock(&nft_net->commit_mutex); + return false; + } + + ctx.net = trans->net; + ctx.table = trans->set->table; + + nft_trans_gc_setelem_remove(&ctx, trans); + mutex_unlock(&nft_net->commit_mutex); + + return true; +} + +static void nft_trans_gc_work(struct work_struct *work) +{ + struct nft_trans_gc *trans, *next; + LIST_HEAD(trans_gc_list); + + spin_lock(&nf_tables_gc_list_lock); + list_splice_init(&nf_tables_gc_list, &trans_gc_list); + spin_unlock(&nf_tables_gc_list_lock); + + list_for_each_entry_safe(trans, next, &trans_gc_list, list) { + list_del(&trans->list); + if (!nft_trans_gc_work_done(trans)) { + nft_trans_gc_destroy(trans); + continue; + } + call_rcu(&trans->rcu, nft_trans_gc_trans_free); + } +} + +struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, + unsigned int gc_seq, gfp_t gfp) +{ + struct net *net = read_pnet(&set->net); + struct nft_trans_gc *trans; + + trans = kzalloc(sizeof(*trans), gfp); + if (!trans) + return NULL; + + trans->net = maybe_get_net(net); + if (!trans->net) { + kfree(trans); + return NULL; + } + + refcount_inc(&set->refs); + trans->set = set; + trans->seq = gc_seq; + + return trans; +} + +void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv) +{ + trans->priv[trans->count++] = priv; +} + +static void nft_trans_gc_queue_work(struct nft_trans_gc *trans) +{ + spin_lock(&nf_tables_gc_list_lock); + list_add_tail(&trans->list, &nf_tables_gc_list); + spin_unlock(&nf_tables_gc_list_lock); + + schedule_work(&trans_gc_work); +} + +static int nft_trans_gc_space(struct nft_trans_gc *trans) +{ + return NFT_TRANS_GC_BATCHCOUNT - trans->count; +} + +struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, + unsigned int gc_seq, gfp_t gfp) +{ + struct nft_set *set; + + if (nft_trans_gc_space(gc)) + return gc; + + set = gc->set; + nft_trans_gc_queue_work(gc); + + return nft_trans_gc_alloc(set, gc_seq, gfp); +} + +void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans) +{ + if (trans->count == 0) { + nft_trans_gc_destroy(trans); + return; + } + + nft_trans_gc_queue_work(trans); +} + +struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp) +{ + struct nft_set *set; + + if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net))) + return NULL; + + if (nft_trans_gc_space(gc)) + return gc; + + set = gc->set; + call_rcu(&gc->rcu, nft_trans_gc_trans_free); + + return nft_trans_gc_alloc(set, 0, gfp); +} + +void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans) +{ + WARN_ON_ONCE(!lockdep_commit_lock_is_held(trans->net)); + + if (trans->count == 0) { + nft_trans_gc_destroy(trans); + return; + } + + call_rcu(&trans->rcu, nft_trans_gc_trans_free); +} + +struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc, + unsigned int gc_seq) +{ + struct nft_set_elem_catchall *catchall; + const struct nft_set *set = gc->set; + struct nft_set_ext *ext; + + list_for_each_entry_rcu(catchall, &set->catchall_list, list) { + ext = nft_set_elem_ext(set, catchall->elem); + + if (!nft_set_elem_expired(ext)) + continue; + if (nft_set_elem_is_dead(ext)) + goto dead_elem; + + nft_set_elem_dead(ext); +dead_elem: + gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + if (!gc) + return NULL; + + nft_trans_gc_elem_add(gc, catchall->elem); + } + + return gc; +} + +struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc) +{ + struct nft_set_elem_catchall *catchall, *next; + const struct nft_set *set = gc->set; + struct nft_elem_priv *elem_priv; + struct nft_set_elem elem; + struct nft_set_ext *ext; + + WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)); + + list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { + ext = nft_set_elem_ext(set, catchall->elem); + + if (!nft_set_elem_expired(ext)) + continue; + + gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); + if (!gc) + return NULL; + + elem_priv = catchall->elem; + + memset(&elem, 0, sizeof(elem)); + elem.priv = catchall->elem; + + nft_setelem_data_deactivate(gc->net, gc->set, &elem); + nft_setelem_catchall_destroy(catchall); + nft_trans_gc_elem_add(gc, elem_priv); + } + + return gc; +} + static void nf_tables_module_autoload_cleanup(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); @@ -8890,14 +9501,45 @@ static void nf_tables_commit_audit_log(struct list_head *adl, u32 generation) } } +static void nft_set_commit_update(struct list_head *set_update_list) +{ + struct nft_set *set, *next; + + list_for_each_entry_safe(set, next, set_update_list, pending_update) { + list_del_init(&set->pending_update); + + if (!set->ops->commit) + continue; + + set->ops->commit(set); + } +} + +static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net) +{ + unsigned int gc_seq; + + /* Bump gc counter, it becomes odd, this is the busy mark. */ + gc_seq = READ_ONCE(nft_net->gc_seq); + WRITE_ONCE(nft_net->gc_seq, ++gc_seq); + + return gc_seq; +} + +static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq) +{ + WRITE_ONCE(nft_net->gc_seq, ++gc_seq); +} + static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_trans *trans, *next; + unsigned int base_seq, gc_seq; + LIST_HEAD(set_update_list); struct nft_trans_elem *te; struct nft_chain *chain; struct nft_table *table; - unsigned int base_seq; LIST_HEAD(adl); int err; @@ -8906,6 +9548,27 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return 0; } + list_for_each_entry(trans, &nft_net->binding_list, binding_list) { + switch (trans->msg_type) { + case NFT_MSG_NEWSET: + if (!nft_trans_set_update(trans) && + nft_set_is_anonymous(nft_trans_set(trans)) && + !nft_trans_set_bound(trans)) { + pr_warn_once("nftables ruleset with unbound set\n"); + return -EINVAL; + } + break; + case NFT_MSG_NEWCHAIN: + if (!nft_trans_chain_update(trans) && + nft_chain_binding(nft_trans_chain(trans)) && + !nft_trans_chain_bound(trans)) { + pr_warn_once("nftables ruleset with unbound chain\n"); + return -EINVAL; + } + break; + } + } + /* 0. Validate ruleset, otherwise roll back for error reporting. */ if (nf_tables_validate(net) < 0) return -EAGAIN; @@ -8953,6 +9616,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) WRITE_ONCE(nft_net->base_seq, base_seq); + gc_seq = nft_gc_seq_begin(nft_net); + /* step 3. Start new generation, rules_gen_X now in use. */ net->nft.gencursor = nft_gencursor_next(net); @@ -8977,8 +9642,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_trans_destroy(trans); break; case NFT_MSG_DELTABLE: + case NFT_MSG_DESTROYTABLE: list_del_rcu(&trans->ctx.table->list); - nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE); + nf_tables_table_notify(&trans->ctx, trans->msg_type); break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { @@ -8993,8 +9659,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } break; case NFT_MSG_DELCHAIN: + case NFT_MSG_DESTROYCHAIN: nft_chain_del(trans->ctx.chain); - nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN); + nf_tables_chain_notify(&trans->ctx, trans->msg_type); nf_tables_unregister_hook(trans->ctx.net, trans->ctx.table, trans->ctx.chain); @@ -9010,10 +9677,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_trans_destroy(trans); break; case NFT_MSG_DELRULE: + case NFT_MSG_DESTROYRULE: list_del_rcu(&nft_trans_rule(trans)->list); nf_tables_rule_notify(&trans->ctx, nft_trans_rule(trans), - NFT_MSG_DELRULE); + trans->msg_type); nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans), NFT_TRANS_COMMIT); @@ -9034,16 +9702,18 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) */ if (nft_set_is_anonymous(nft_trans_set(trans)) && !list_empty(&nft_trans_set(trans)->bindings)) - trans->ctx.table->use--; + nft_use_dec(&trans->ctx.table->use); } nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), NFT_MSG_NEWSET, GFP_KERNEL); nft_trans_destroy(trans); break; case NFT_MSG_DELSET: + case NFT_MSG_DESTROYSET: + nft_trans_set(trans)->dead = 1; list_del_rcu(&nft_trans_set(trans)->list); nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), - NFT_MSG_DELSET, GFP_KERNEL); + trans->msg_type, GFP_KERNEL); break; case NFT_MSG_NEWSETELEM: te = (struct nft_trans_elem *)trans->data; @@ -9052,19 +9722,30 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_NEWSETELEM); + if (te->set->ops->commit && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: + case NFT_MSG_DESTROYSETELEM: te = (struct nft_trans_elem *)trans->data; nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, - NFT_MSG_DELSETELEM); + trans->msg_type); nft_setelem_remove(net, te->set, &te->elem); if (!nft_setelem_is_catchall(te->set, &te->elem)) { atomic_dec(&te->set->nelems); te->set->ndeact--; } + if (te->set->ops->commit && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { @@ -9081,9 +9762,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } break; case NFT_MSG_DELOBJ: + case NFT_MSG_DESTROYOBJ: nft_obj_del(nft_trans_obj(trans)); nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), - NFT_MSG_DELOBJ); + trans->msg_type); break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) { @@ -9105,11 +9787,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_trans_destroy(trans); break; case NFT_MSG_DELFLOWTABLE: + case NFT_MSG_DESTROYFLOWTABLE: if (nft_trans_flowtable_update(trans)) { nf_tables_flowtable_notify(&trans->ctx, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), - NFT_MSG_DELFLOWTABLE); + trans->msg_type); nft_unregister_flowtable_net_hooks(net, &nft_trans_flowtable_hooks(trans)); } else { @@ -9117,7 +9800,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_flowtable_notify(&trans->ctx, nft_trans_flowtable(trans), &nft_trans_flowtable(trans)->hook_list, - NFT_MSG_DELFLOWTABLE); + trans->msg_type); nft_unregister_flowtable_net_hooks(net, &nft_trans_flowtable(trans)->hook_list); } @@ -9125,9 +9808,13 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } } + nft_set_commit_update(&set_update_list); + nft_commit_notify(net, NETLINK_CB(skb).portid); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); nf_tables_commit_audit_log(&adl, nft_net->base_seq); + + nft_gc_seq_end(nft_net, gc_seq); nf_tables_commit_release(net); return 0; @@ -9181,15 +9868,31 @@ static void nf_tables_abort_release(struct nft_trans *trans) kfree(trans); } +static void nft_set_abort_update(struct list_head *set_update_list) +{ + struct nft_set *set, *next; + + list_for_each_entry_safe(set, next, set_update_list, pending_update) { + list_del_init(&set->pending_update); + + if (!set->ops->abort) + continue; + + set->ops->abort(set); + } +} + static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_trans *trans, *next; + LIST_HEAD(set_update_list); struct nft_trans_elem *te; + int err = 0; if (action == NFNL_ABORT_VALIDATE && nf_tables_validate(net) < 0) - return -EAGAIN; + err = -EAGAIN; list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { @@ -9213,6 +9916,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) } break; case NFT_MSG_DELTABLE: + case NFT_MSG_DESTROYTABLE: nft_clear(trans->ctx.net, trans->ctx.table); nft_trans_destroy(trans); break; @@ -9226,7 +9930,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); nft_chain_del(trans->ctx.chain); nf_tables_unregister_hook(trans->ctx.net, trans->ctx.table, @@ -9234,7 +9938,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) } break; case NFT_MSG_DELCHAIN: - trans->ctx.table->use++; + case NFT_MSG_DESTROYCHAIN: + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, trans->ctx.chain); nft_trans_destroy(trans); break; @@ -9243,7 +9948,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } - trans->ctx.chain->use--; + nft_use_dec_restore(&trans->ctx.chain->use); list_del_rcu(&nft_trans_rule(trans)->list); nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans), @@ -9252,7 +9957,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_DELRULE: - trans->ctx.chain->use++; + case NFT_MSG_DESTROYRULE: + nft_use_inc_restore(&trans->ctx.chain->use); nft_clear(trans->ctx.net, nft_trans_rule(trans)); nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans)); if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) @@ -9265,7 +9971,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); if (nft_trans_set_bound(trans)) { nft_trans_destroy(trans); break; @@ -9273,8 +9979,12 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_del_rcu(&nft_trans_set(trans)->list); break; case NFT_MSG_DELSET: - trans->ctx.table->use++; + case NFT_MSG_DESTROYSET: + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, nft_trans_set(trans)); + if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_activate(&trans->ctx, nft_trans_set(trans)); + nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: @@ -9286,8 +9996,15 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_setelem_remove(net, te->set, &te->elem); if (!nft_setelem_is_catchall(te->set, &te->elem)) atomic_dec(&te->set->nelems); + + if (te->set->ops->abort && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } break; case NFT_MSG_DELSETELEM: + case NFT_MSG_DESTROYSETELEM: te = (struct nft_trans_elem *)trans->data; nft_setelem_data_activate(net, te->set, &te->elem); @@ -9295,6 +10012,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) if (!nft_setelem_is_catchall(te->set, &te->elem)) te->set->ndeact--; + if (te->set->ops->abort && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } nft_trans_destroy(trans); break; case NFT_MSG_NEWOBJ: @@ -9302,12 +10024,13 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans)); nft_trans_destroy(trans); } else { - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); nft_obj_del(nft_trans_obj(trans)); } break; case NFT_MSG_DELOBJ: - trans->ctx.table->use++; + case NFT_MSG_DESTROYOBJ: + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, nft_trans_obj(trans)); nft_trans_destroy(trans); break; @@ -9316,18 +10039,19 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_unregister_flowtable_net_hooks(net, &nft_trans_flowtable_hooks(trans)); } else { - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); list_del_rcu(&nft_trans_flowtable(trans)->list); nft_unregister_flowtable_net_hooks(net, &nft_trans_flowtable(trans)->hook_list); } break; case NFT_MSG_DELFLOWTABLE: + case NFT_MSG_DESTROYFLOWTABLE: if (nft_trans_flowtable_update(trans)) { list_splice(&nft_trans_flowtable_hooks(trans), &nft_trans_flowtable(trans)->hook_list); } else { - trans->ctx.table->use++; + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, nft_trans_flowtable(trans)); } nft_trans_destroy(trans); @@ -9335,32 +10059,39 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) } } + nft_set_abort_update(&set_update_list); + synchronize_rcu(); list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { - list_del(&trans->list); + nft_trans_list_del(trans); nf_tables_abort_release(trans); } - if (action == NFNL_ABORT_AUTOLOAD) - nf_tables_module_autoload(net); - else - nf_tables_module_autoload_cleanup(net); - - return 0; -} - -static void nf_tables_cleanup(struct net *net) -{ - nft_validate_state_update(net, NFT_VALIDATE_SKIP); + return err; } static int nf_tables_abort(struct net *net, struct sk_buff *skb, enum nfnl_abort_action action) { struct nftables_pernet *nft_net = nft_pernet(net); - int ret = __nf_tables_abort(net, action); + unsigned int gc_seq; + int ret; + + gc_seq = nft_gc_seq_begin(nft_net); + ret = __nf_tables_abort(net, action); + nft_gc_seq_end(nft_net, gc_seq); + + WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + + /* module autoload needs to happen after GC sequence update because it + * temporarily releases and grabs mutex again. + */ + if (action == NFNL_ABORT_AUTOLOAD) + nf_tables_module_autoload(net); + else + nf_tables_module_autoload_cleanup(net); mutex_unlock(&nft_net->commit_mutex); @@ -9389,7 +10120,6 @@ static const struct nfnetlink_subsystem nf_tables_subsys = { .cb = nf_tables_cb, .commit = nf_tables_commit, .abort = nf_tables_abort, - .cleanup = nf_tables_cleanup, .valid_genid = nf_tables_valid_genid, .owner = THIS_MODULE, }; @@ -9739,6 +10469,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, if (!tb[NFTA_VERDICT_CODE]) return -EINVAL; + + /* zero padding hole for memcmp */ + memset(data, 0, sizeof(*data)); data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); switch (data->verdict.code) { @@ -9775,8 +10508,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, if (desc->flags & NFT_DATA_DESC_SETELEM && chain->flags & NFT_CHAIN_BINDING) return -EINVAL; + if (!nft_use_inc(&chain->use)) + return -EMFILE; - chain->use++; data->verdict.chain = chain; break; default: @@ -9784,7 +10518,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, } desc->len = sizeof(data->verdict); - desc->type = NFT_DATA_VERDICT; + return 0; } @@ -9796,7 +10530,7 @@ static void nft_verdict_uninit(const struct nft_data *data) case NFT_JUMP: case NFT_GOTO: chain = data->verdict.chain; - chain->use--; + nft_use_dec(&chain->use); break; } } @@ -9827,20 +10561,25 @@ int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v) } static int nft_value_init(const struct nft_ctx *ctx, - struct nft_data *data, unsigned int size, - struct nft_data_desc *desc, const struct nlattr *nla) + struct nft_data *data, struct nft_data_desc *desc, + const struct nlattr *nla) { unsigned int len; len = nla_len(nla); if (len == 0) return -EINVAL; - if (len > size) + if (len > desc->size) return -EOVERFLOW; + if (desc->len) { + if (len != desc->len) + return -EINVAL; + } else { + desc->len = len; + } nla_memcpy(data->data, nla, len); - desc->type = NFT_DATA_VALUE; - desc->len = len; + return 0; } @@ -9860,7 +10599,6 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = { * * @ctx: context of the expression using the data * @data: destination struct nft_data - * @size: maximum data length * @desc: data description * @nla: netlink attribute containing data * @@ -9870,24 +10608,35 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = { * The caller can indicate that it only wants to accept data of type * NFT_DATA_VALUE by passing NULL for the ctx argument. */ -int nft_data_init(const struct nft_ctx *ctx, - struct nft_data *data, unsigned int size, +int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, struct nft_data_desc *desc, const struct nlattr *nla) { struct nlattr *tb[NFTA_DATA_MAX + 1]; int err; + if (WARN_ON_ONCE(!desc->size)) + return -EINVAL; + err = nla_parse_nested_deprecated(tb, NFTA_DATA_MAX, nla, nft_data_policy, NULL); if (err < 0) return err; - if (tb[NFTA_DATA_VALUE]) - return nft_value_init(ctx, data, size, desc, - tb[NFTA_DATA_VALUE]); - if (tb[NFTA_DATA_VERDICT] && ctx != NULL) - return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]); - return -EINVAL; + if (tb[NFTA_DATA_VALUE]) { + if (desc->type != NFT_DATA_VALUE) + return -EINVAL; + + err = nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]); + } else if (tb[NFTA_DATA_VERDICT] && ctx != NULL) { + if (desc->type != NFT_DATA_VERDICT) + return -EINVAL; + + err = nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]); + } else { + err = -EINVAL; + } + + return err; } EXPORT_SYMBOL_GPL(nft_data_init); @@ -9950,11 +10699,11 @@ int __nft_release_basechain(struct nft_ctx *ctx) nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain); list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { list_del(&rule->list); - ctx->chain->use--; + nft_use_dec(&ctx->chain->use); nf_tables_rule_release(ctx, rule); } nft_chain_del(ctx->chain); - ctx->table->use--; + nft_use_dec(&ctx->table->use); nf_tables_chain_destroy(ctx); return 0; @@ -10001,32 +10750,38 @@ static void __nft_release_table(struct net *net, struct nft_table *table) ctx.family = table->family; ctx.table = table; list_for_each_entry(chain, &table->chains, list) { + if (nft_chain_binding(chain)) + continue; + ctx.chain = chain; list_for_each_entry_safe(rule, nr, &chain->rules, list) { list_del(&rule->list); - chain->use--; + nft_use_dec(&chain->use); nf_tables_rule_release(&ctx, rule); } } list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { list_del(&flowtable->list); - table->use--; + nft_use_dec(&table->use); nf_tables_flowtable_destroy(flowtable); } list_for_each_entry_safe(set, ns, &table->sets, list) { list_del(&set->list); - table->use--; + nft_use_dec(&table->use); + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(&ctx, set); + nft_set_destroy(&ctx, set); } list_for_each_entry_safe(obj, ne, &table->objects, list) { nft_obj_del(obj); - table->use--; + nft_use_dec(&table->use); nft_obj_destroy(&ctx, obj); } list_for_each_entry_safe(chain, nc, &table->chains, list) { ctx.chain = chain; nft_chain_del(chain); - table->use--; + nft_use_dec(&table->use); nf_tables_chain_destroy(&ctx); } nf_tables_table_destroy(&ctx); @@ -10056,6 +10811,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, struct net *net = n->net; unsigned int deleted; bool restart = false; + unsigned int gc_seq; if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER) return NOTIFY_DONE; @@ -10063,8 +10819,11 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, nft_net = nft_pernet(net); deleted = 0; mutex_lock(&nft_net->commit_mutex); + + gc_seq = nft_gc_seq_begin(nft_net); + if (!list_empty(&nf_tables_destroy_list)) - rcu_barrier(); + nf_tables_trans_destroy_flush_work(); again: list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table) && @@ -10085,6 +10844,8 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, if (restart) goto again; } + nft_gc_seq_end(nft_net, gc_seq); + mutex_unlock(&nft_net->commit_mutex); return NOTIFY_DONE; @@ -10100,10 +10861,12 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&nft_net->tables); INIT_LIST_HEAD(&nft_net->commit_list); + INIT_LIST_HEAD(&nft_net->binding_list); INIT_LIST_HEAD(&nft_net->module_list); INIT_LIST_HEAD(&nft_net->notify_list); mutex_init(&nft_net->commit_mutex); nft_net->base_seq = 1; + nft_net->gc_seq = 0; nft_net->validate_state = NFT_VALIDATE_SKIP; return 0; @@ -10121,22 +10884,37 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net) static void __net_exit nf_tables_exit_net(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); + unsigned int gc_seq; mutex_lock(&nft_net->commit_mutex); - if (!list_empty(&nft_net->commit_list) || - !list_empty(&nft_net->module_list)) - __nf_tables_abort(net, NFNL_ABORT_NONE); + + gc_seq = nft_gc_seq_begin(nft_net); + + WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + + if (!list_empty(&nft_net->module_list)) + nf_tables_module_autoload_cleanup(net); + __nft_release_tables(net); + + nft_gc_seq_end(nft_net, gc_seq); + mutex_unlock(&nft_net->commit_mutex); WARN_ON_ONCE(!list_empty(&nft_net->tables)); WARN_ON_ONCE(!list_empty(&nft_net->module_list)); WARN_ON_ONCE(!list_empty(&nft_net->notify_list)); } +static void nf_tables_exit_batch(struct list_head *net_exit_list) +{ + flush_work(&trans_gc_work); +} + static struct pernet_operations nf_tables_net_ops = { .init = nf_tables_init_net, .pre_exit = nf_tables_pre_exit_net, .exit = nf_tables_exit_net, + .exit_batch = nf_tables_exit_batch, .id = &nf_tables_net_id, .size = sizeof(struct nftables_pernet), }; @@ -10208,6 +10986,7 @@ static void __exit nf_tables_module_exit(void) nft_chain_filter_fini(); nft_chain_route_fini(); unregister_pernet_subsys(&nf_tables_net_ops); + cancel_work_sync(&trans_gc_work); cancel_work_sync(&trans_destroy_work); rcu_barrier(); rhltable_destroy(&nft_objname_ht); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 82976e4df95f9..5c001be6825e3 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -532,7 +532,8 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, * processed, this avoids that the same error is * reported several times when replaying the batch. */ - if (nfnl_err_add(&err_list, nlh, err, &extack) < 0) { + if (err == -ENOMEM || + nfnl_err_add(&err_list, nlh, err, &extack) < 0) { /* We failed to enqueue an error, reset the * list of errors and send OOM to userspace * pointing to the batch header. @@ -589,8 +590,6 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, goto replay_abort; } } - if (ss->cleanup) - ss->cleanup(net); nfnl_err_deliver(&err_list, oskb); kfree_skb(skb); diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 47b0dba95054f..5bb2c61b6782e 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -93,7 +93,16 @@ static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = { static int nft_bitwise_init_bool(struct nft_bitwise *priv, const struct nlattr *const tb[]) { - struct nft_data_desc mask, xor; + struct nft_data_desc mask = { + .type = NFT_DATA_VALUE, + .size = sizeof(priv->mask), + .len = priv->len, + }; + struct nft_data_desc xor = { + .type = NFT_DATA_VALUE, + .size = sizeof(priv->xor), + .len = priv->len, + }; int err; if (tb[NFTA_BITWISE_DATA]) @@ -103,36 +112,30 @@ static int nft_bitwise_init_bool(struct nft_bitwise *priv, !tb[NFTA_BITWISE_XOR]) return -EINVAL; - err = nft_data_init(NULL, &priv->mask, sizeof(priv->mask), &mask, - tb[NFTA_BITWISE_MASK]); + err = nft_data_init(NULL, &priv->mask, &mask, tb[NFTA_BITWISE_MASK]); if (err < 0) return err; - if (mask.type != NFT_DATA_VALUE || mask.len != priv->len) { - err = -EINVAL; - goto err1; - } - err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &xor, - tb[NFTA_BITWISE_XOR]); + err = nft_data_init(NULL, &priv->xor, &xor, tb[NFTA_BITWISE_XOR]); if (err < 0) - goto err1; - if (xor.type != NFT_DATA_VALUE || xor.len != priv->len) { - err = -EINVAL; - goto err2; - } + goto err_xor_err; return 0; -err2: - nft_data_release(&priv->xor, xor.type); -err1: + +err_xor_err: nft_data_release(&priv->mask, mask.type); + return err; } static int nft_bitwise_init_shift(struct nft_bitwise *priv, const struct nlattr *const tb[]) { - struct nft_data_desc d; + struct nft_data_desc desc = { + .type = NFT_DATA_VALUE, + .size = sizeof(priv->data), + .len = sizeof(u32), + }; int err; if (tb[NFTA_BITWISE_MASK] || @@ -142,13 +145,12 @@ static int nft_bitwise_init_shift(struct nft_bitwise *priv, if (!tb[NFTA_BITWISE_DATA]) return -EINVAL; - err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &d, - tb[NFTA_BITWISE_DATA]); + err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_BITWISE_DATA]); if (err < 0) return err; - if (d.type != NFT_DATA_VALUE || d.len != sizeof(u32) || - priv->data.data[0] >= BITS_PER_TYPE(u32)) { - nft_data_release(&priv->data, d.type); + + if (priv->data.data[0] >= BITS_PER_TYPE(u32)) { + nft_data_release(&priv->data, desc.type); return -EINVAL; } @@ -230,7 +232,8 @@ static int nft_bitwise_dump_shift(struct sk_buff *skb, return 0; } -static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_bitwise_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_bitwise *priv = nft_expr_priv(expr); int err = 0; @@ -290,22 +293,21 @@ static const struct nft_expr_ops nft_bitwise_ops = { static int nft_bitwise_extract_u32_data(const struct nlattr * const tb, u32 *out) { - struct nft_data_desc desc; struct nft_data data; - int err = 0; + struct nft_data_desc desc = { + .type = NFT_DATA_VALUE, + .size = sizeof(data), + .len = sizeof(u32), + }; + int err; - err = nft_data_init(NULL, &data, sizeof(data), &desc, tb); + err = nft_data_init(NULL, &data, &desc, tb); if (err < 0) return err; - if (desc.type != NFT_DATA_VALUE || desc.len != sizeof(u32)) { - err = -EINVAL; - goto err; - } *out = data.data[0]; -err: - nft_data_release(&data, desc.type); - return err; + + return 0; } static int nft_bitwise_fast_init(const struct nft_ctx *ctx, @@ -344,7 +346,8 @@ static int nft_bitwise_fast_init(const struct nft_ctx *ctx, } static int -nft_bitwise_fast_dump(struct sk_buff *skb, const struct nft_expr *expr) +nft_bitwise_fast_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr); struct nft_data data; diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index 7b0b8fecb2205..73d6bfbdd1d7e 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -147,7 +147,8 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, priv->len); } -static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_byteorder_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_byteorder *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index c3563f0be2692..680fe557686e4 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -344,6 +344,12 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev, return; } + /* UNREGISTER events are also happening on netns exit. + * + * Although nf_tables core releases all tables/chains, only this event + * handler provides guarantee that hook->ops.dev is still accessible, + * so we cannot skip exiting net namespaces. + */ __nft_release_basechain(ctx); } @@ -362,9 +368,6 @@ static int nf_tables_netdev_event(struct notifier_block *this, event != NETDEV_CHANGENAME) return NOTIFY_DONE; - if (!check_net(ctx.net)) - return NOTIFY_DONE; - nft_net = nft_pernet(ctx.net); mutex_lock(&nft_net->commit_mutex); list_for_each_entry(table, &nft_net->tables, list) { diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 47b6d05f1ae69..5e625f5120bed 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -73,20 +73,16 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_cmp_expr *priv = nft_expr_priv(expr); - struct nft_data_desc desc; + struct nft_data_desc desc = { + .type = NFT_DATA_VALUE, + .size = sizeof(priv->data), + }; int err; - err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &desc, - tb[NFTA_CMP_DATA]); + err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]); if (err < 0) return err; - if (desc.type != NFT_DATA_VALUE) { - err = -EINVAL; - nft_data_release(&priv->data, desc.type); - return err; - } - err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; @@ -96,7 +92,8 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return 0; } -static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_cmp_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_cmp_expr *priv = nft_expr_priv(expr); @@ -201,12 +198,14 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); - struct nft_data_desc desc; struct nft_data data; + struct nft_data_desc desc = { + .type = NFT_DATA_VALUE, + .size = sizeof(data), + }; int err; - err = nft_data_init(NULL, &data, sizeof(data), &desc, - tb[NFTA_CMP_DATA]); + err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]); if (err < 0) return err; @@ -242,7 +241,8 @@ static int nft_cmp_fast_offload(struct nft_offload_ctx *ctx, return __nft_cmp_offload(ctx, flow, &cmp); } -static int nft_cmp_fast_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_cmp_fast_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); enum nft_cmp_ops op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ; @@ -275,8 +275,11 @@ const struct nft_expr_ops nft_cmp_fast_ops = { static const struct nft_expr_ops * nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { - struct nft_data_desc desc; struct nft_data data; + struct nft_data_desc desc = { + .type = NFT_DATA_VALUE, + .size = sizeof(data), + }; enum nft_cmp_ops op; int err; @@ -298,21 +301,14 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) return ERR_PTR(-EINVAL); } - err = nft_data_init(NULL, &data, sizeof(data), &desc, - tb[NFTA_CMP_DATA]); + err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]); if (err < 0) return ERR_PTR(err); - if (desc.type != NFT_DATA_VALUE) - goto err1; - if (desc.len <= sizeof(u32) && (op == NFT_CMP_EQ || op == NFT_CMP_NEQ)) return &nft_cmp_fast_ops; return &nft_cmp_ops; -err1: - nft_data_release(&data, desc.type); - return ERR_PTR(-EINVAL); } struct nft_expr_type nft_cmp_type __read_mostly = { diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index d354cefa783ca..e3c17b5d12d66 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -326,7 +326,8 @@ static int nft_extension_dump_info(struct sk_buff *skb, int attr, return 0; } -static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_target_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct xt_target *target = expr->ops->data; void *info = nft_expr_priv(expr); @@ -574,12 +575,14 @@ static int __nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr, return -1; } -static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_match_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { return __nft_match_dump(skb, expr, nft_expr_priv(expr)); } -static int nft_match_large_dump(struct sk_buff *skb, const struct nft_expr *e) +static int nft_match_large_dump(struct sk_buff *skb, + const struct nft_expr *e, bool reset) { struct nft_xt_match_priv *priv = nft_expr_priv(e); diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index 7d0761fad37ef..2fe5ae339cfa8 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -171,7 +171,8 @@ static void nft_connlimit_eval(const struct nft_expr *expr, nft_connlimit_do_eval(priv, regs, pkt, NULL); } -static int nft_connlimit_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_connlimit_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { struct nft_connlimit *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 8edd3b3c173d7..fc2b892a48ea1 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -201,11 +201,12 @@ static void nft_counter_eval(const struct nft_expr *expr, nft_counter_do_eval(priv, regs, pkt); } -static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_counter_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); - return nft_counter_do_dump(skb, priv, false); + return nft_counter_do_dump(skb, priv, reset); } static int nft_counter_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 9c7472af9e4a1..0378691a01d91 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -638,7 +638,8 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx, nf_ct_netns_put(ctx->net, ctx->family); } -static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_ct_get_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_ct *priv = nft_expr_priv(expr); @@ -677,7 +678,8 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) return -1; } -static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_ct_set_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_ct *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c index 5b5c607fbf83f..2007700bef8e6 100644 --- a/net/netfilter/nft_dup_netdev.c +++ b/net/netfilter/nft_dup_netdev.c @@ -44,7 +44,8 @@ static int nft_dup_netdev_init(const struct nft_ctx *ctx, sizeof(int)); } -static int nft_dup_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_dup_netdev_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { struct nft_dup_netdev *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 29c7ae8789e95..12a03c25629c1 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -44,33 +44,34 @@ static int nft_dynset_expr_setup(const struct nft_dynset *priv, return 0; } -static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr, - struct nft_regs *regs) +static struct nft_elem_priv *nft_dynset_new(struct nft_set *set, + const struct nft_expr *expr, + struct nft_regs *regs) { const struct nft_dynset *priv = nft_expr_priv(expr); struct nft_set_ext *ext; + void *elem_priv; u64 timeout; - void *elem; if (!atomic_add_unless(&set->nelems, 1, set->size)) return NULL; timeout = priv->timeout ? : set->timeout; - elem = nft_set_elem_init(set, &priv->tmpl, - ®s->data[priv->sreg_key], NULL, - ®s->data[priv->sreg_data], - timeout, 0, GFP_ATOMIC); - if (elem == NULL) + elem_priv = nft_set_elem_init(set, &priv->tmpl, + ®s->data[priv->sreg_key], NULL, + ®s->data[priv->sreg_data], + timeout, 0, GFP_ATOMIC); + if (IS_ERR(elem_priv)) goto err1; - ext = nft_set_elem_ext(set, elem); + ext = nft_set_elem_ext(set, elem_priv); if (priv->num_exprs && nft_dynset_expr_setup(priv, ext) < 0) goto err2; - return elem; + return elem_priv; err2: - nft_set_elem_destroy(set, elem, false); + nft_set_elem_destroy(set, elem_priv, false); err1: if (set->size) atomic_dec(&set->nelems); @@ -191,6 +192,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (IS_ERR(set)) return PTR_ERR(set); + if (set->flags & NFT_SET_OBJECT) + return -EOPNOTSUPP; + if (set->ops->update == NULL) return -EOPNOTSUPP; @@ -357,7 +361,8 @@ static void nft_dynset_destroy(const struct nft_ctx *ctx, nf_tables_destroy_set(ctx, priv->set); } -static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_dynset_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_dynset *priv = nft_expr_priv(expr); u32 flags = priv->invert ? NFT_DYNSET_F_INV : 0; @@ -379,7 +384,7 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) if (priv->set->num_exprs == 0) { if (priv->num_exprs == 1) { if (nft_expr_dump(skb, NFTA_DYNSET_EXPR, - priv->expr_array[0])) + priv->expr_array[0], reset)) goto nla_put_failure; } else if (priv->num_exprs > 1) { struct nlattr *nest; @@ -390,7 +395,7 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) for (i = 0; i < priv->num_exprs; i++) { if (nft_expr_dump(skb, NFTA_LIST_ELEM, - priv->expr_array[i])) + priv->expr_array[i], reset)) goto nla_put_failure; } nla_nest_end(skb, nest); diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 3609680831a14..7b432f17ab434 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -503,7 +503,8 @@ static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr * return -1; } -static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_exthdr_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_exthdr *priv = nft_expr_priv(expr); @@ -513,7 +514,8 @@ static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr) return nft_exthdr_dump_common(skb, priv); } -static int nft_exthdr_dump_set(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_exthdr_dump_set(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_exthdr *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index b10ce732b337c..b125efa18db8c 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -114,7 +114,7 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, } EXPORT_SYMBOL_GPL(nft_fib_init); -int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr) +int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_fib *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index aac6db8680d47..9a05fca9c48b7 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -381,8 +381,10 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx, if (IS_ERR(flowtable)) return PTR_ERR(flowtable); + if (!nft_use_inc(&flowtable->use)) + return -EMFILE; + priv->flowtable = flowtable; - flowtable->use++; return nf_ct_netns_get(ctx->net, ctx->family); } @@ -401,7 +403,7 @@ static void nft_flow_offload_activate(const struct nft_ctx *ctx, { struct nft_flow_offload *priv = nft_expr_priv(expr); - priv->flowtable->use++; + nft_use_inc_restore(&priv->flowtable->use); } static void nft_flow_offload_destroy(const struct nft_ctx *ctx, @@ -410,7 +412,8 @@ static void nft_flow_offload_destroy(const struct nft_ctx *ctx, nf_ct_netns_put(ctx->net, ctx->family); } -static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_flow_offload_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { struct nft_flow_offload *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 08e7a289738e0..a534d060ce1b6 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -56,7 +56,8 @@ static int nft_fwd_netdev_init(const struct nft_ctx *ctx, sizeof(int)); } -static int nft_fwd_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_fwd_netdev_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { struct nft_fwd_netdev *priv = nft_expr_priv(expr); @@ -186,7 +187,8 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx, addr_len); } -static int nft_fwd_neigh_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_fwd_neigh_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { struct nft_fwd_neigh *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index f829f5289e162..4fc99c80b28e2 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -139,7 +139,7 @@ static int nft_symhash_init(const struct nft_ctx *ctx, } static int nft_jhash_dump(struct sk_buff *skb, - const struct nft_expr *expr) + const struct nft_expr *expr, bool reset) { const struct nft_jhash *priv = nft_expr_priv(expr); @@ -166,7 +166,7 @@ static int nft_jhash_dump(struct sk_buff *skb, } static int nft_symhash_dump(struct sk_buff *skb, - const struct nft_expr *expr) + const struct nft_expr *expr, bool reset) { const struct nft_symhash *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 3042b32310fae..5e64c4eb5e96e 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -29,20 +29,36 @@ static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = { [NFTA_IMMEDIATE_DATA] = { .type = NLA_NESTED }, }; +static enum nft_data_types nft_reg_to_type(const struct nlattr *nla) +{ + enum nft_data_types type; + u8 reg; + + reg = ntohl(nla_get_be32(nla)); + if (reg == NFT_REG_VERDICT) + type = NFT_DATA_VERDICT; + else + type = NFT_DATA_VALUE; + + return type; +} + static int nft_immediate_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_immediate_expr *priv = nft_expr_priv(expr); - struct nft_data_desc desc; + struct nft_data_desc desc = { + .size = sizeof(priv->data), + }; int err; if (tb[NFTA_IMMEDIATE_DREG] == NULL || tb[NFTA_IMMEDIATE_DATA] == NULL) return -EINVAL; - err = nft_data_init(ctx, &priv->data, sizeof(priv->data), &desc, - tb[NFTA_IMMEDIATE_DATA]); + desc.type = nft_reg_to_type(tb[NFTA_IMMEDIATE_DREG]); + err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]); if (err < 0) return err; @@ -109,15 +125,27 @@ static void nft_immediate_activate(const struct nft_ctx *ctx, return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg)); } +static void nft_immediate_chain_deactivate(const struct nft_ctx *ctx, + struct nft_chain *chain, + enum nft_trans_phase phase) +{ + struct nft_ctx chain_ctx; + struct nft_rule *rule; + + chain_ctx = *ctx; + chain_ctx.chain = chain; + + list_for_each_entry(rule, &chain->rules, list) + nft_rule_expr_deactivate(&chain_ctx, rule, phase); +} + static void nft_immediate_deactivate(const struct nft_ctx *ctx, const struct nft_expr *expr, enum nft_trans_phase phase) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); const struct nft_data *data = &priv->data; - struct nft_ctx chain_ctx; struct nft_chain *chain; - struct nft_rule *rule; if (priv->dreg == NFT_REG_VERDICT) { switch (data->verdict.code) { @@ -127,23 +155,20 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx, if (!nft_chain_binding(chain)) break; - chain_ctx = *ctx; - chain_ctx.chain = chain; - - list_for_each_entry(rule, &chain->rules, list) - nft_rule_expr_deactivate(&chain_ctx, rule, phase); - switch (phase) { case NFT_TRANS_PREPARE_ERROR: nf_tables_unbind_chain(ctx, chain); - fallthrough; + nft_deactivate_next(ctx->net, chain); + break; case NFT_TRANS_PREPARE: + nft_immediate_chain_deactivate(ctx, chain, phase); nft_deactivate_next(ctx->net, chain); break; default: + nft_immediate_chain_deactivate(ctx, chain, phase); nft_chain_del(chain); chain->bound = false; - chain->table->use--; + nft_use_dec(&chain->table->use); break; } break; @@ -182,7 +207,7 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx, * let the transaction records release this chain and its rules. */ if (chain->bound) { - chain->use--; + nft_use_dec(&chain->use); break; } @@ -190,9 +215,9 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx, chain_ctx = *ctx; chain_ctx.chain = chain; - chain->use--; + nft_use_dec(&chain->use); list_for_each_entry_safe(rule, n, &chain->rules, list) { - chain->use--; + nft_use_dec(&chain->use); list_del(&rule->list); nf_tables_rule_destroy(&chain_ctx, rule); } @@ -203,7 +228,8 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx, } } -static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_immediate_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c index 304e33cbed9b4..7935f4849a444 100644 --- a/net/netfilter/nft_last.c +++ b/net/netfilter/nft_last.c @@ -54,7 +54,8 @@ static void nft_last_eval(const struct nft_expr *expr, WRITE_ONCE(priv->last_set, 1); } -static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_last_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { struct nft_last_priv *priv = nft_expr_priv(expr); unsigned long last_jiffies = READ_ONCE(priv->last_jiffies); diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index c626dc10df78e..6675014fa080d 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -168,7 +168,8 @@ static int nft_limit_pkts_init(const struct nft_ctx *ctx, return 0; } -static int nft_limit_pkts_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_limit_pkts_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_limit_pkts *priv = nft_expr_priv(expr); @@ -205,7 +206,7 @@ static int nft_limit_bytes_init(const struct nft_ctx *ctx, } static int nft_limit_bytes_dump(struct sk_buff *skb, - const struct nft_expr *expr) + const struct nft_expr *expr, bool reset) { const struct nft_limit *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 54f6c2035e84d..84e3744118773 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -241,7 +241,8 @@ static void nft_log_destroy(const struct nft_ctx *ctx, nf_logger_put(ctx->family, li->type); } -static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_log_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_log *priv = nft_expr_priv(expr); const struct nf_loginfo *li = &priv->loginfo; diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 9d18c5428d53c..5ba799a7722d2 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -19,6 +19,7 @@ struct nft_lookup { struct nft_set *set; u8 sreg; u8 dreg; + bool dreg_set; bool invert; struct nft_set_binding binding; }; @@ -75,7 +76,7 @@ void nft_lookup_eval(const struct nft_expr *expr, } if (ext) { - if (set->flags & NFT_SET_MAP) + if (priv->dreg_set) nft_data_copy(®s->data[priv->dreg], nft_set_ext_data(ext), set->dlen); @@ -122,11 +123,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx, if (flags & ~NFT_LOOKUP_F_INV) return -EINVAL; - if (flags & NFT_LOOKUP_F_INV) { - if (set->flags & NFT_SET_MAP) - return -EINVAL; + if (flags & NFT_LOOKUP_F_INV) priv->invert = true; - } } if (tb[NFTA_LOOKUP_DREG] != NULL) { @@ -140,8 +138,17 @@ static int nft_lookup_init(const struct nft_ctx *ctx, set->dlen); if (err < 0) return err; - } else if (set->flags & NFT_SET_MAP) - return -EINVAL; + priv->dreg_set = true; + } else if (set->flags & NFT_SET_MAP) { + /* Map given, but user asks for lookup only (i.e. to + * ignore value assoicated with key). + * + * This makes no sense for anonymous maps since they are + * scoped to the rule, but for named sets this can be useful. + */ + if (set->flags & NFT_SET_ANONYMOUS) + return -EINVAL; + } priv->binding.flags = set->flags & NFT_SET_MAP; @@ -178,7 +185,8 @@ static void nft_lookup_destroy(const struct nft_ctx *ctx, nf_tables_destroy_set(ctx, priv->set); } -static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_lookup_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_lookup *priv = nft_expr_priv(expr); u32 flags = priv->invert ? NFT_LOOKUP_F_INV : 0; @@ -187,7 +195,7 @@ static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; if (nft_dump_register(skb, NFTA_LOOKUP_SREG, priv->sreg)) goto nla_put_failure; - if (priv->set->flags & NFT_SET_MAP) + if (priv->dreg_set) if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_LOOKUP_FLAGS, htonl(flags))) diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 9953e80537536..667ab4ec09914 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -43,7 +43,7 @@ static int nft_masq_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { - u32 plen = sizeof_field(struct nf_nat_range, min_addr.all); + u32 plen = sizeof_field(struct nf_nat_range, min_proto.all); struct nft_masq *priv = nft_expr_priv(expr); int err; @@ -73,7 +73,8 @@ static int nft_masq_init(const struct nft_ctx *ctx, return nf_ct_netns_get(ctx->net, ctx->family); } -static int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_masq_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_masq *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 2f8686b711cf4..7fb92f2683b26 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -667,7 +667,7 @@ int nft_meta_set_init(const struct nft_ctx *ctx, EXPORT_SYMBOL_GPL(nft_meta_set_init); int nft_meta_get_dump(struct sk_buff *skb, - const struct nft_expr *expr) + const struct nft_expr *expr, bool reset) { const struct nft_meta *priv = nft_expr_priv(expr); @@ -682,7 +682,8 @@ int nft_meta_get_dump(struct sk_buff *skb, } EXPORT_SYMBOL_GPL(nft_meta_get_dump); -int nft_meta_set_dump(struct sk_buff *skb, const struct nft_expr *expr) +int nft_meta_set_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_meta *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index db8f9116eeb43..296667e254201 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -226,7 +226,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, priv->flags |= NF_NAT_RANGE_MAP_IPS; } - plen = sizeof_field(struct nf_nat_range, min_addr.all); + plen = sizeof_field(struct nf_nat_range, min_proto.all); if (tb[NFTA_NAT_REG_PROTO_MIN]) { err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MIN], &priv->sreg_proto_min, plen); @@ -255,7 +255,8 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return nf_ct_netns_get(ctx->net, family); } -static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_nat_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_nat *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 4e43214e88def..f3391583abe44 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -89,7 +89,8 @@ static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, return -1; } -static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_ng_inc_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_ng_inc *priv = nft_expr_priv(expr); @@ -137,7 +138,8 @@ static int nft_ng_random_init(const struct nft_ctx *ctx, NULL, NFT_DATA_VALUE, sizeof(u32)); } -static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_ng_random_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_ng_random *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 3ff91bcaa5f24..10850266221a6 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -41,13 +41,16 @@ static int nft_objref_init(const struct nft_ctx *ctx, if (IS_ERR(obj)) return -ENOENT; + if (!nft_use_inc(&obj->use)) + return -EMFILE; + nft_objref_priv(expr) = obj; - obj->use++; return 0; } -static int nft_objref_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_objref_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_object *obj = nft_objref_priv(expr); @@ -71,7 +74,7 @@ static void nft_objref_deactivate(const struct nft_ctx *ctx, if (phase == NFT_TRANS_COMMIT) return; - obj->use--; + nft_use_dec(&obj->use); } static void nft_objref_activate(const struct nft_ctx *ctx, @@ -79,7 +82,7 @@ static void nft_objref_activate(const struct nft_ctx *ctx, { struct nft_object *obj = nft_objref_priv(expr); - obj->use++; + nft_use_inc_restore(&obj->use); } static struct nft_expr_type nft_objref_type; @@ -155,7 +158,8 @@ static int nft_objref_map_init(const struct nft_ctx *ctx, return 0; } -static int nft_objref_map_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_objref_map_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_objref_map *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index 720dc9fba6d4f..12b5dedd006e8 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -92,7 +92,8 @@ static int nft_osf_init(const struct nft_ctx *ctx, return 0; } -static int nft_osf_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_osf_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_osf *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 92422bc720a88..a5a89c238c3ba 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -150,7 +150,8 @@ static int nft_payload_init(const struct nft_ctx *ctx, priv->len); } -static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_payload_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_payload *priv = nft_expr_priv(expr); @@ -708,7 +709,8 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, priv->len); } -static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_payload_set_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_payload_set *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index 9ba1de51ac070..6758d9d72ca05 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -127,7 +127,8 @@ static int nft_queue_sreg_init(const struct nft_ctx *ctx, return 0; } -static int nft_queue_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_queue_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_queue *priv = nft_expr_priv(expr); @@ -143,7 +144,8 @@ static int nft_queue_dump(struct sk_buff *skb, const struct nft_expr *expr) } static int -nft_queue_sreg_dump(struct sk_buff *skb, const struct nft_expr *expr) +nft_queue_sreg_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_queue *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index c4d1389f7185a..133807253498b 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -198,11 +198,12 @@ static int nft_quota_init(const struct nft_ctx *ctx, return nft_quota_do_init(tb, priv); } -static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_quota_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { struct nft_quota *priv = nft_expr_priv(expr); - return nft_quota_do_dump(skb, priv, false); + return nft_quota_do_dump(skb, priv, reset); } static struct nft_expr_type nft_quota_type; diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index e4a1c44d7f513..f8258d5202297 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -51,7 +51,14 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr const struct nlattr * const tb[]) { struct nft_range_expr *priv = nft_expr_priv(expr); - struct nft_data_desc desc_from, desc_to; + struct nft_data_desc desc_from = { + .type = NFT_DATA_VALUE, + .size = sizeof(priv->data_from), + }; + struct nft_data_desc desc_to = { + .type = NFT_DATA_VALUE, + .size = sizeof(priv->data_to), + }; int err; u32 op; @@ -61,26 +68,16 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr !tb[NFTA_RANGE_TO_DATA]) return -EINVAL; - err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from), - &desc_from, tb[NFTA_RANGE_FROM_DATA]); + err = nft_data_init(NULL, &priv->data_from, &desc_from, + tb[NFTA_RANGE_FROM_DATA]); if (err < 0) return err; - if (desc_from.type != NFT_DATA_VALUE) { - err = -EINVAL; - goto err1; - } - - err = nft_data_init(NULL, &priv->data_to, sizeof(priv->data_to), - &desc_to, tb[NFTA_RANGE_TO_DATA]); + err = nft_data_init(NULL, &priv->data_to, &desc_to, + tb[NFTA_RANGE_TO_DATA]); if (err < 0) goto err1; - if (desc_to.type != NFT_DATA_VALUE) { - err = -EINVAL; - goto err2; - } - if (desc_from.len != desc_to.len) { err = -EINVAL; goto err2; @@ -114,7 +111,8 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr return err; } -static int nft_range_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_range_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_range_expr *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index ba09890dddb50..a42f3620dd3e1 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -48,7 +48,7 @@ static int nft_redir_init(const struct nft_ctx *ctx, unsigned int plen; int err; - plen = sizeof_field(struct nf_nat_range, min_addr.all); + plen = sizeof_field(struct nf_nat_range, min_proto.all); if (tb[NFTA_REDIR_REG_PROTO_MIN]) { err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MIN], &priv->sreg_proto_min, plen); @@ -75,7 +75,8 @@ static int nft_redir_init(const struct nft_ctx *ctx, return nf_ct_netns_get(ctx->net, ctx->family); } -static int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_redir_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_redir *priv = nft_expr_priv(expr); @@ -232,7 +233,7 @@ static struct nft_expr_type nft_redir_inet_type __read_mostly = { .name = "redir", .ops = &nft_redir_inet_ops, .policy = nft_redir_policy, - .maxattr = NFTA_MASQ_MAX, + .maxattr = NFTA_REDIR_MAX, .owner = THIS_MODULE, }; diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index 927ff8459bd90..f2addc844dd2d 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c @@ -69,7 +69,8 @@ int nft_reject_init(const struct nft_ctx *ctx, } EXPORT_SYMBOL_GPL(nft_reject_init); -int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr) +int nft_reject_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { const struct nft_reject *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index bcd01a63e38f1..63a6069aa02b9 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -146,7 +146,7 @@ static int nft_rt_get_init(const struct nft_ctx *ctx, } static int nft_rt_get_dump(struct sk_buff *skb, - const struct nft_expr *expr) + const struct nft_expr *expr, bool reset) { const struct nft_rt *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index e7ae5914971e7..30213bcfce648 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -13,6 +13,7 @@ #include struct nft_bitmap_elem { + struct nft_elem_priv priv; struct list_head head; struct nft_set_ext ext; }; @@ -104,8 +105,9 @@ nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this, return NULL; } -static void *nft_bitmap_get(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem, unsigned int flags) +static struct nft_elem_priv * +nft_bitmap_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) { const struct nft_bitmap *priv = nft_set_priv(set); u8 genmask = nft_genmask_cur(net); @@ -116,7 +118,7 @@ static void *nft_bitmap_get(const struct net *net, const struct nft_set *set, !nft_set_elem_active(&be->ext, genmask)) continue; - return be; + return &be->priv; } return ERR_PTR(-ENOENT); } @@ -125,8 +127,8 @@ static int nft_bitmap_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, struct nft_set_ext **ext) { + struct nft_bitmap_elem *new = nft_elem_priv_cast(elem->priv), *be; struct nft_bitmap *priv = nft_set_priv(set); - struct nft_bitmap_elem *new = elem->priv, *be; u8 genmask = nft_genmask_next(net); u32 idx, off; @@ -148,8 +150,8 @@ static void nft_bitmap_remove(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { + struct nft_bitmap_elem *be = nft_elem_priv_cast(elem->priv); struct nft_bitmap *priv = nft_set_priv(set); - struct nft_bitmap_elem *be = elem->priv; u8 genmask = nft_genmask_next(net); u32 idx, off; @@ -163,8 +165,8 @@ static void nft_bitmap_activate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { + struct nft_bitmap_elem *be = nft_elem_priv_cast(elem->priv); struct nft_bitmap *priv = nft_set_priv(set); - struct nft_bitmap_elem *be = elem->priv; u8 genmask = nft_genmask_next(net); u32 idx, off; @@ -174,28 +176,27 @@ static void nft_bitmap_activate(const struct net *net, nft_set_elem_change_active(net, set, &be->ext); } -static bool nft_bitmap_flush(const struct net *net, - const struct nft_set *set, void *_be) +static void nft_bitmap_flush(const struct net *net, + const struct nft_set *set, + struct nft_elem_priv *elem_priv) { + struct nft_bitmap_elem *be = nft_elem_priv_cast(elem_priv); struct nft_bitmap *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); - struct nft_bitmap_elem *be = _be; u32 idx, off; nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off); /* Enter 10 state, similar to deactivation. */ priv->bitmap[idx] &= ~(genmask << off); nft_set_elem_change_active(net, set, &be->ext); - - return true; } -static void *nft_bitmap_deactivate(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static struct nft_elem_priv * +nft_bitmap_deactivate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) { + struct nft_bitmap_elem *this = nft_elem_priv_cast(elem->priv), *be; struct nft_bitmap *priv = nft_set_priv(set); - struct nft_bitmap_elem *this = elem->priv, *be; u8 genmask = nft_genmask_next(net); u32 idx, off; @@ -209,7 +210,7 @@ static void *nft_bitmap_deactivate(const struct net *net, priv->bitmap[idx] &= ~(genmask << off); nft_set_elem_change_active(net, set, &be->ext); - return be; + return &be->priv; } static void nft_bitmap_walk(const struct nft_ctx *ctx, @@ -226,7 +227,7 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx, if (!nft_set_elem_active(&be->ext, iter->genmask)) goto cont; - elem.priv = be; + elem.priv = &be->priv; iter->err = iter->fn(ctx, set, iter, &elem); @@ -265,19 +266,22 @@ static int nft_bitmap_init(const struct nft_set *set, { struct nft_bitmap *priv = nft_set_priv(set); + BUILD_BUG_ON(offsetof(struct nft_bitmap_elem, priv) != 0); + INIT_LIST_HEAD(&priv->list); priv->bitmap_size = nft_bitmap_size(set->klen); return 0; } -static void nft_bitmap_destroy(const struct nft_set *set) +static void nft_bitmap_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_bitmap *priv = nft_set_priv(set); struct nft_bitmap_elem *be, *n; list_for_each_entry_safe(be, n, &priv->list, head) - nft_set_elem_destroy(set, be, true); + nf_tables_set_elem_destroy(ctx, set, &be->priv); } static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 76de6c8d98655..0691565caa81b 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -27,6 +27,7 @@ struct nft_rhash { }; struct nft_rhash_elem { + struct nft_elem_priv priv; struct rhash_head node; struct nft_set_ext ext; }; @@ -59,6 +60,8 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg, if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) return 1; + if (nft_set_elem_is_dead(&he->ext)) + return 1; if (nft_set_elem_expired(&he->ext)) return 1; if (!nft_set_elem_active(&he->ext, x->genmask)) @@ -93,8 +96,9 @@ bool nft_rhash_lookup(const struct net *net, const struct nft_set *set, return !!he; } -static void *nft_rhash_get(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem, unsigned int flags) +static struct nft_elem_priv * +nft_rhash_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) { struct nft_rhash *priv = nft_set_priv(set); struct nft_rhash_elem *he; @@ -106,13 +110,14 @@ static void *nft_rhash_get(const struct net *net, const struct nft_set *set, he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); if (he != NULL) - return he; + return &he->priv; return ERR_PTR(-ENOENT); } static bool nft_rhash_update(struct nft_set *set, const u32 *key, - void *(*new)(struct nft_set *, + struct nft_elem_priv * + (*new)(struct nft_set *, const struct nft_expr *, struct nft_regs *regs), const struct nft_expr *expr, @@ -121,6 +126,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, { struct nft_rhash *priv = nft_set_priv(set); struct nft_rhash_elem *he, *prev; + struct nft_elem_priv *elem_priv; struct nft_rhash_cmp_arg arg = { .genmask = NFT_GENMASK_ANY, .set = set, @@ -131,10 +137,11 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, if (he != NULL) goto out; - he = new(set, expr, regs); - if (he == NULL) + elem_priv = new(set, expr, regs); + if (!elem_priv) goto err1; + he = nft_elem_priv_cast(elem_priv); prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, nft_rhash_params); if (IS_ERR(prev)) @@ -142,7 +149,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, /* Another cpu may race to insert the element with the same key */ if (prev) { - nft_set_elem_destroy(set, he, true); + nft_set_elem_destroy(set, &he->priv, true); atomic_dec(&set->nelems); he = prev; } @@ -152,7 +159,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, return true; err2: - nft_set_elem_destroy(set, he, true); + nft_set_elem_destroy(set, &he->priv, true); atomic_dec(&set->nelems); err1: return false; @@ -162,8 +169,8 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, struct nft_set_ext **ext) { + struct nft_rhash_elem *he = nft_elem_priv_cast(elem->priv); struct nft_rhash *priv = nft_set_priv(set); - struct nft_rhash_elem *he = elem->priv; struct nft_rhash_cmp_arg arg = { .genmask = nft_genmask_next(net), .set = set, @@ -185,28 +192,23 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set, static void nft_rhash_activate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_rhash_elem *he = elem->priv; + struct nft_rhash_elem *he = nft_elem_priv_cast(elem->priv); nft_set_elem_change_active(net, set, &he->ext); - nft_set_elem_clear_busy(&he->ext); } -static bool nft_rhash_flush(const struct net *net, - const struct nft_set *set, void *priv) +static void nft_rhash_flush(const struct net *net, + const struct nft_set *set, + struct nft_elem_priv *elem_priv) { - struct nft_rhash_elem *he = priv; + struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv); - if (!nft_set_elem_mark_busy(&he->ext) || - !nft_is_active(net, &he->ext)) { - nft_set_elem_change_active(net, set, &he->ext); - return true; - } - return false; + nft_set_elem_change_active(net, set, &he->ext); } -static void *nft_rhash_deactivate(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static struct nft_elem_priv * +nft_rhash_deactivate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) { struct nft_rhash *priv = nft_set_priv(set); struct nft_rhash_elem *he; @@ -218,21 +220,20 @@ static void *nft_rhash_deactivate(const struct net *net, rcu_read_lock(); he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); - if (he != NULL && - !nft_rhash_flush(net, set, he)) - he = NULL; + if (he) + nft_set_elem_change_active(net, set, &he->ext); rcu_read_unlock(); - return he; + return &he->priv; } static void nft_rhash_remove(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { + struct nft_rhash_elem *he = nft_elem_priv_cast(elem->priv); struct nft_rhash *priv = nft_set_priv(set); - struct nft_rhash_elem *he = elem->priv; rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params); } @@ -252,7 +253,9 @@ static bool nft_rhash_delete(const struct nft_set *set, if (he == NULL) return false; - return rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params) == 0; + nft_set_elem_dead(&he->ext); + + return true; } static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, @@ -278,12 +281,10 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, if (iter->count < iter->skip) goto cont; - if (nft_set_elem_expired(&he->ext)) - goto cont; if (!nft_set_elem_active(&he->ext, iter->genmask)) goto cont; - elem.priv = he; + elem.priv = &he->priv; iter->err = iter->fn(ctx, set, iter, &elem); if (iter->err < 0) @@ -314,25 +315,48 @@ static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set, static void nft_rhash_gc(struct work_struct *work) { + struct nftables_pernet *nft_net; struct nft_set *set; struct nft_rhash_elem *he; struct nft_rhash *priv; - struct nft_set_gc_batch *gcb = NULL; struct rhashtable_iter hti; + struct nft_trans_gc *gc; + struct net *net; + u32 gc_seq; priv = container_of(work, struct nft_rhash, gc_work.work); set = nft_set_container_of(priv); + net = read_pnet(&set->net); + nft_net = nft_pernet(net); + gc_seq = READ_ONCE(nft_net->gc_seq); + + if (nft_set_gc_is_pending(set)) + goto done; + + gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); + if (!gc) + goto done; rhashtable_walk_enter(&priv->ht, &hti); rhashtable_walk_start(&hti); while ((he = rhashtable_walk_next(&hti))) { if (IS_ERR(he)) { - if (PTR_ERR(he) != -EAGAIN) - break; - continue; + nft_trans_gc_destroy(gc); + gc = NULL; + goto try_later; } + /* Ruleset has been updated, try later. */ + if (READ_ONCE(nft_net->gc_seq) != gc_seq) { + nft_trans_gc_destroy(gc); + gc = NULL; + goto try_later; + } + + if (nft_set_elem_is_dead(&he->ext)) + goto dead_elem; + if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPRESSIONS) && nft_rhash_expr_needs_gc_run(set, &he->ext)) goto needs_gc_run; @@ -340,26 +364,26 @@ static void nft_rhash_gc(struct work_struct *work) if (!nft_set_elem_expired(&he->ext)) continue; needs_gc_run: - if (nft_set_elem_mark_busy(&he->ext)) - continue; + nft_set_elem_dead(&he->ext); +dead_elem: + gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + if (!gc) + goto try_later; - gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); - if (gcb == NULL) - break; - rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params); - atomic_dec(&set->nelems); - nft_set_gc_batch_add(gcb, he); + nft_trans_gc_elem_add(gc, he); } + + gc = nft_trans_gc_catchall_async(gc, gc_seq); + +try_later: + /* catchall list iteration requires rcu read side lock. */ rhashtable_walk_stop(&hti); rhashtable_walk_exit(&hti); - he = nft_set_catchall_gc(set); - if (he) { - gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); - if (gcb) - nft_set_gc_batch_add(gcb, he); - } - nft_set_gc_batch_complete(gcb); + if (gc) + nft_trans_gc_queue_async_done(gc); + +done: queue_delayed_work(system_power_efficient_wq, &priv->gc_work, nft_set_gc_interval(set)); } @@ -386,6 +410,8 @@ static int nft_rhash_init(const struct nft_set *set, struct rhashtable_params params = nft_rhash_params; int err; + BUILD_BUG_ON(offsetof(struct nft_rhash_elem, priv) != 0); + params.nelem_hint = desc->size ?: NFT_RHASH_ELEMENT_HINT; params.key_len = set->klen; @@ -394,25 +420,37 @@ static int nft_rhash_init(const struct nft_set *set, return err; INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc); - if (set->flags & NFT_SET_TIMEOUT) + if (set->flags & (NFT_SET_TIMEOUT | NFT_SET_EVAL)) nft_rhash_gc_init(set); return 0; } +struct nft_rhash_ctx { + const struct nft_ctx ctx; + const struct nft_set *set; +}; + static void nft_rhash_elem_destroy(void *ptr, void *arg) { - nft_set_elem_destroy(arg, ptr, true); + struct nft_rhash_ctx *rhash_ctx = arg; + struct nft_rhash_elem *he = ptr; + + nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, &he->priv); } -static void nft_rhash_destroy(const struct nft_set *set) +static void nft_rhash_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_ctx rhash_ctx = { + .ctx = *ctx, + .set = set, + }; cancel_delayed_work_sync(&priv->gc_work); - rcu_barrier(); rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy, - (void *)set); + (void *)&rhash_ctx); } /* Number of buckets is stored in u32, so cap our result to 1U<<31 */ @@ -445,6 +483,7 @@ struct nft_hash { }; struct nft_hash_elem { + struct nft_elem_priv priv; struct hlist_node node; struct nft_set_ext ext; }; @@ -470,8 +509,9 @@ bool nft_hash_lookup(const struct net *net, const struct nft_set *set, return false; } -static void *nft_hash_get(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem, unsigned int flags) +static struct nft_elem_priv * +nft_hash_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) { struct nft_hash *priv = nft_set_priv(set); u8 genmask = nft_genmask_cur(net); @@ -483,7 +523,7 @@ static void *nft_hash_get(const struct net *net, const struct nft_set *set, hlist_for_each_entry_rcu(he, &priv->table[hash], node) { if (!memcmp(nft_set_ext_key(&he->ext), elem->key.val.data, set->klen) && nft_set_elem_active(&he->ext, genmask)) - return he; + return &he->priv; } return ERR_PTR(-ENOENT); } @@ -533,7 +573,7 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, struct nft_set_ext **ext) { - struct nft_hash_elem *this = elem->priv, *he; + struct nft_hash_elem *this = nft_elem_priv_cast(elem->priv), *he; struct nft_hash *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); u32 hash; @@ -554,26 +594,26 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set, static void nft_hash_activate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_hash_elem *he = elem->priv; + struct nft_hash_elem *he = nft_elem_priv_cast(elem->priv); nft_set_elem_change_active(net, set, &he->ext); } -static bool nft_hash_flush(const struct net *net, - const struct nft_set *set, void *priv) +static void nft_hash_flush(const struct net *net, + const struct nft_set *set, + struct nft_elem_priv *elem_priv) { - struct nft_hash_elem *he = priv; + struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv); nft_set_elem_change_active(net, set, &he->ext); - return true; } -static void *nft_hash_deactivate(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static struct nft_elem_priv * +nft_hash_deactivate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) { + struct nft_hash_elem *this = nft_elem_priv_cast(elem->priv), *he; struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *this = elem->priv, *he; u8 genmask = nft_genmask_next(net); u32 hash; @@ -583,7 +623,7 @@ static void *nft_hash_deactivate(const struct net *net, set->klen) && nft_set_elem_active(&he->ext, genmask)) { nft_set_elem_change_active(net, set, &he->ext); - return he; + return &he->priv; } } return NULL; @@ -593,7 +633,7 @@ static void nft_hash_remove(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_hash_elem *he = elem->priv; + struct nft_hash_elem *he = nft_elem_priv_cast(elem->priv); hlist_del_rcu(&he->node); } @@ -613,7 +653,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set, if (!nft_set_elem_active(&he->ext, iter->genmask)) goto cont; - elem.priv = he; + elem.priv = &he->priv; iter->err = iter->fn(ctx, set, iter, &elem); if (iter->err < 0) @@ -643,7 +683,8 @@ static int nft_hash_init(const struct nft_set *set, return 0; } -static void nft_hash_destroy(const struct nft_set *set) +static void nft_hash_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he; @@ -653,7 +694,7 @@ static void nft_hash_destroy(const struct nft_set *set) for (i = 0; i < priv->buckets; i++) { hlist_for_each_entry_safe(he, next, &priv->table[i], node) { hlist_del_rcu(&he->node); - nft_set_elem_destroy(set, he, true); + nf_tables_set_elem_destroy(ctx, set, &he->priv); } } } diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 11e24f56bf805..c4da1a1c1a1bf 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -566,8 +566,9 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, goto out; if (last) { - if (nft_set_elem_expired(&f->mt[b].e->ext) || - (genmask && + if (nft_set_elem_expired(&f->mt[b].e->ext)) + goto next_match; + if ((genmask && !nft_set_elem_active(&f->mt[b].e->ext, genmask))) goto next_match; @@ -598,11 +599,18 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, * @elem: nftables API element representation containing key data * @flags: Unused */ -static void *nft_pipapo_get(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem, unsigned int flags) +static struct nft_elem_priv * +nft_pipapo_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) { - return pipapo_get(net, set, (const u8 *)elem->key.val.data, - nft_genmask_cur(net)); + static struct nft_pipapo_elem *e; + + e = pipapo_get(net, set, (const u8 *)elem->key.val.data, + nft_genmask_cur(net)); + if (IS_ERR(e)) + return ERR_CAST(e); + + return &e->priv; } /** @@ -901,12 +909,14 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f) static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k, int mask_bits) { - int rule = f->rules++, group, ret, bit_offset = 0; + int rule = f->rules, group, ret, bit_offset = 0; - ret = pipapo_resize(f, f->rules - 1, f->rules); + ret = pipapo_resize(f, f->rules, f->rules + 1); if (ret) return ret; + f->rules++; + for (group = 0; group < f->groups; group++) { int i, v; u8 mask; @@ -1051,7 +1061,9 @@ static int pipapo_expand(struct nft_pipapo_field *f, step++; if (step >= len) { if (!masks) { - pipapo_insert(f, base, 0); + err = pipapo_insert(f, base, 0); + if (err < 0) + return err; masks = 1; } goto out; @@ -1157,10 +1169,10 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; const u8 *start = (const u8 *)elem->key.val.data, *end; - struct nft_pipapo_elem *e = elem->priv, *dup; struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m = priv->clone; u8 genmask = nft_genmask_next(net); + struct nft_pipapo_elem *e, *dup; struct nft_pipapo_field *f; const u8 *start_p, *end_p; int i, bsize_max, err = 0; @@ -1234,6 +1246,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, else ret = pipapo_expand(f, start, end, f->groups * f->bb); + if (ret < 0) + return ret; + if (f->bsize > bsize_max) bsize_max = f->bsize; @@ -1255,6 +1270,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, put_cpu_ptr(m->scratch); } + e = nft_elem_priv_cast(elem->priv); *ext2 = &e->ext; pipapo_map(m, rulemap, e); @@ -1529,16 +1545,34 @@ static void pipapo_drop(struct nft_pipapo_match *m, } } +static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set, + struct nft_pipapo_elem *e) + +{ + struct nft_set_elem elem = { + .priv = &e->priv, + }; + + nft_setelem_data_deactivate(net, set, &elem); +} + /** * pipapo_gc() - Drop expired entries from set, destroy start and end elements - * @set: nftables API set representation + * @_set: nftables API set representation * @m: Matching data */ -static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m) +static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m) { + struct nft_set *set = (struct nft_set *) _set; struct nft_pipapo *priv = nft_set_priv(set); + struct net *net = read_pnet(&set->net); int rules_f0, first_rule = 0; struct nft_pipapo_elem *e; + struct nft_trans_gc *gc; + + gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL); + if (!gc) + return; while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) { union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; @@ -1562,13 +1596,20 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m) f--; i--; e = f->mt[rulemap[i].to].e; - if (nft_set_elem_expired(&e->ext) && - !nft_set_elem_mark_busy(&e->ext)) { + + /* synchronous gc never fails, there is no need to set on + * NFT_SET_ELEM_DEAD_BIT. + */ + if (nft_set_elem_expired(&e->ext)) { priv->dirty = true; - pipapo_drop(m, rulemap); - rcu_barrier(); - nft_set_elem_destroy(set, e, true); + gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); + if (!gc) + return; + + nft_pipapo_gc_deactivate(net, set, e); + pipapo_drop(m, rulemap); + nft_trans_gc_elem_add(gc, e); /* And check again current first rule, which is now the * first we haven't checked. @@ -1578,11 +1619,11 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m) } } - e = nft_set_catchall_gc(set); - if (e) - nft_set_elem_destroy(set, e, true); - - priv->last_gc = jiffies; + gc = nft_trans_gc_catchall_sync(gc); + if (gc) { + nft_trans_gc_queue_sync_done(gc); + priv->last_gc = jiffies; + } } /** @@ -1600,17 +1641,10 @@ static void pipapo_free_fields(struct nft_pipapo_match *m) } } -/** - * pipapo_reclaim_match - RCU callback to free fields from old matching data - * @rcu: RCU head - */ -static void pipapo_reclaim_match(struct rcu_head *rcu) +static void pipapo_free_match(struct nft_pipapo_match *m) { - struct nft_pipapo_match *m; int i; - m = container_of(rcu, struct nft_pipapo_match, rcu); - for_each_possible_cpu(i) kfree(*per_cpu_ptr(m->scratch, i)); @@ -1625,7 +1659,19 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) } /** - * pipapo_commit() - Replace lookup data with current working copy + * pipapo_reclaim_match - RCU callback to free fields from old matching data + * @rcu: RCU head + */ +static void pipapo_reclaim_match(struct rcu_head *rcu) +{ + struct nft_pipapo_match *m; + + m = container_of(rcu, struct nft_pipapo_match, rcu); + pipapo_free_match(m); +} + +/** + * nft_pipapo_commit() - Replace lookup data with current working copy * @set: nftables API set representation * * While at it, check if we should perform garbage collection on the working @@ -1635,7 +1681,7 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) * We also need to create a new working copy for subsequent insertions and * deletions. */ -static void pipapo_commit(const struct nft_set *set) +static void nft_pipapo_commit(const struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *new_clone, *old; @@ -1660,6 +1706,37 @@ static void pipapo_commit(const struct nft_set *set) priv->clone = new_clone; } +static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set) +{ +#ifdef CONFIG_PROVE_LOCKING + const struct net *net = read_pnet(&set->net); + + return lockdep_is_held(&nft_pernet(net)->commit_mutex); +#else + return true; +#endif +} + +static void nft_pipapo_abort(const struct nft_set *set) +{ + struct nft_pipapo *priv = nft_set_priv(set); + struct nft_pipapo_match *new_clone, *m; + + if (!priv->dirty) + return; + + m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set)); + + new_clone = pipapo_clone(m); + if (IS_ERR(new_clone)) + return; + + priv->dirty = false; + + pipapo_free_match(priv->clone); + priv->clone = new_clone; +} + /** * nft_pipapo_activate() - Mark element reference as active given key, commit * @net: Network namespace @@ -1667,8 +1744,7 @@ static void pipapo_commit(const struct nft_set *set) * @elem: nftables API element representation containing key data * * On insertion, elements are added to a copy of the matching data currently - * in use for lookups, and not directly inserted into current lookup data, so - * we'll take care of that by calling pipapo_commit() here. Both + * in use for lookups, and not directly inserted into current lookup data. Both * nft_pipapo_insert() and nft_pipapo_activate() are called once for each * element, hence we can't purpose either one as a real commit operation. */ @@ -1676,16 +1752,9 @@ static void nft_pipapo_activate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_pipapo_elem *e; - - e = pipapo_get(net, set, (const u8 *)elem->key.val.data, 0); - if (IS_ERR(e)) - return; + struct nft_pipapo_elem *e = nft_elem_priv_cast(elem->priv); nft_set_elem_change_active(net, set, &e->ext); - nft_set_elem_clear_busy(&e->ext); - - pipapo_commit(set); } /** @@ -1723,9 +1792,9 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set, * * Return: deactivated element if found, NULL otherwise. */ -static void *nft_pipapo_deactivate(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static struct nft_elem_priv * +nft_pipapo_deactivate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); @@ -1750,13 +1819,12 @@ static void *nft_pipapo_deactivate(const struct net *net, * * Return: true if element was found and deactivated. */ -static bool nft_pipapo_flush(const struct net *net, const struct nft_set *set, - void *elem) +static void nft_pipapo_flush(const struct net *net, const struct nft_set *set, + struct nft_elem_priv *elem_priv) { - struct nft_pipapo_elem *e = elem; + struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv); - return pipapo_deactivate(net, set, (const u8 *)nft_set_ext_key(&e->ext), - &e->ext); + nft_set_elem_change_active(net, set, &e->ext); } /** @@ -1891,16 +1959,13 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m = priv->clone; - struct nft_pipapo_elem *e = elem->priv; int rules_f0, first_rule = 0; + struct nft_pipapo_elem *e; const u8 *data; + e = nft_elem_priv_cast(elem->priv); data = (const u8 *)nft_set_ext_key(&e->ext); - e = pipapo_get(net, set, data, 0); - if (IS_ERR(e)) - return; - while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) { union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; const u8 *match_start, *match_end; @@ -1935,7 +2000,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, if (i == m->field_count) { priv->dirty = true; pipapo_drop(m, rulemap); - pipapo_commit(set); return; } @@ -1957,12 +2021,16 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter) { struct nft_pipapo *priv = nft_set_priv(set); + struct net *net = read_pnet(&set->net); struct nft_pipapo_match *m; struct nft_pipapo_field *f; int i, r; rcu_read_lock(); - m = rcu_dereference(priv->match); + if (iter->genmask == nft_genmask_cur(net)) + m = rcu_dereference(priv->match); + else + m = priv->clone; if (unlikely(!m)) goto out; @@ -1985,10 +2053,7 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, if (!nft_set_elem_active(&e->ext, iter->genmask)) goto cont; - if (nft_set_elem_expired(&e->ext)) - goto cont; - - elem.priv = e; + elem.priv = &e->priv; iter->err = iter->fn(ctx, set, iter, &elem); if (iter->err < 0) @@ -2062,6 +2127,8 @@ static int nft_pipapo_init(const struct nft_set *set, struct nft_pipapo_field *f; int err, i, field_count; + BUILD_BUG_ON(offsetof(struct nft_pipapo_elem, priv) != 0); + field_count = desc->field_count ? : 1; if (field_count > NFT_PIPAPO_MAX_FIELDS) @@ -2135,10 +2202,12 @@ static int nft_pipapo_init(const struct nft_set *set, /** * nft_set_pipapo_match_destroy() - Destroy elements from key mapping array + * @ctx: context * @set: nftables API set representation * @m: matching data pointing to key mapping array */ -static void nft_set_pipapo_match_destroy(const struct nft_set *set, +static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, struct nft_pipapo_match *m) { struct nft_pipapo_field *f; @@ -2155,15 +2224,17 @@ static void nft_set_pipapo_match_destroy(const struct nft_set *set, e = f->mt[r].e; - nft_set_elem_destroy(set, e, true); + nf_tables_set_elem_destroy(ctx, set, &e->priv); } } /** * nft_pipapo_destroy() - Free private data for set and all committed elements + * @ctx: context * @set: nftables API set representation */ -static void nft_pipapo_destroy(const struct nft_set *set) +static void nft_pipapo_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m; @@ -2173,7 +2244,7 @@ static void nft_pipapo_destroy(const struct nft_set *set) if (m) { rcu_barrier(); - nft_set_pipapo_match_destroy(set, m); + nft_set_pipapo_match_destroy(ctx, set, m); #ifdef NFT_PIPAPO_ALIGN free_percpu(m->scratch_aligned); @@ -2190,7 +2261,7 @@ static void nft_pipapo_destroy(const struct nft_set *set) m = priv->clone; if (priv->dirty) - nft_set_pipapo_match_destroy(set, m); + nft_set_pipapo_match_destroy(ctx, set, m); #ifdef NFT_PIPAPO_ALIGN free_percpu(priv->clone->scratch_aligned); @@ -2238,6 +2309,8 @@ const struct nft_set_type nft_set_pipapo_type = { .init = nft_pipapo_init, .destroy = nft_pipapo_destroy, .gc_init = nft_pipapo_gc_init, + .commit = nft_pipapo_commit, + .abort = nft_pipapo_abort, .elemsize = offsetof(struct nft_pipapo_elem, ext), }, }; @@ -2260,6 +2333,8 @@ const struct nft_set_type nft_set_pipapo_avx2_type = { .init = nft_pipapo_init, .destroy = nft_pipapo_destroy, .gc_init = nft_pipapo_gc_init, + .commit = nft_pipapo_commit, + .abort = nft_pipapo_abort, .elemsize = offsetof(struct nft_pipapo_elem, ext), }, }; diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 25a75591583eb..2a64b416a2306 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -170,10 +170,12 @@ struct nft_pipapo_elem; /** * struct nft_pipapo_elem - API-facing representation of single set element + * @priv: element placeholder * @ext: nftables API extensions */ struct nft_pipapo_elem { - struct nft_set_ext ext; + struct nft_elem_priv priv; + struct nft_set_ext ext; }; int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 7325bee7d1442..7567a96b3304a 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -23,6 +23,7 @@ struct nft_rbtree { }; struct nft_rbtree_elem { + struct nft_elem_priv priv; struct rb_node node; struct nft_set_ext ext; }; @@ -38,10 +39,18 @@ static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe) return !nft_rbtree_interval_end(rbe); } -static bool nft_rbtree_equal(const struct nft_set *set, const void *this, - const struct nft_rbtree_elem *interval) +static int nft_rbtree_cmp(const struct nft_set *set, + const struct nft_rbtree_elem *e1, + const struct nft_rbtree_elem *e2) { - return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0; + return memcmp(nft_set_ext_key(&e1->ext), nft_set_ext_key(&e2->ext), + set->klen); +} + +static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe) +{ + return nft_set_elem_expired(&rbe->ext) || + nft_set_elem_is_dead(&rbe->ext); } static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, @@ -52,7 +61,6 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set const struct nft_rbtree_elem *rbe, *interval = NULL; u8 genmask = nft_genmask_cur(net); const struct rb_node *parent; - const void *this; int d; parent = rcu_dereference_raw(priv->root.rb_node); @@ -62,12 +70,11 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set rbe = rb_entry(parent, struct nft_rbtree_elem, node); - this = nft_set_ext_key(&rbe->ext); - d = memcmp(this, key, set->klen); + d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen); if (d < 0) { parent = rcu_dereference_raw(parent->rb_left); if (interval && - nft_rbtree_equal(set, this, interval) && + !nft_rbtree_cmp(set, rbe, interval) && nft_rbtree_interval_end(rbe) && nft_rbtree_interval_start(interval)) continue; @@ -80,7 +87,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set continue; } - if (nft_set_elem_expired(&rbe->ext)) + if (nft_rbtree_elem_expired(rbe)) return false; if (nft_rbtree_interval_end(rbe)) { @@ -98,7 +105,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set if (set->flags & NFT_SET_INTERVAL && interval != NULL && nft_set_elem_active(&interval->ext, genmask) && - !nft_set_elem_expired(&interval->ext) && + !nft_rbtree_elem_expired(interval) && nft_rbtree_interval_start(interval)) { *ext = &interval->ext; return true; @@ -191,8 +198,9 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set, return false; } -static void *nft_rbtree_get(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem, unsigned int flags) +static struct nft_elem_priv * +nft_rbtree_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) { struct nft_rbtree *priv = nft_set_priv(set); unsigned int seq = read_seqcount_begin(&priv->count); @@ -203,166 +211,276 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set, ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask); if (ret || !read_seqcount_retry(&priv->count, seq)) - return rbe; + return &rbe->priv; read_lock_bh(&priv->lock); seq = read_seqcount_begin(&priv->count); ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask); - if (!ret) - rbe = ERR_PTR(-ENOENT); read_unlock_bh(&priv->lock); - return rbe; + if (!ret) + return ERR_PTR(-ENOENT); + + return &rbe->priv; +} + +static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set, + struct nft_rbtree *priv, + struct nft_rbtree_elem *rbe) +{ + struct nft_set_elem elem = { + .priv = &rbe->priv, + }; + + nft_setelem_data_deactivate(net, set, &elem); + rb_erase(&rbe->node, &priv->root); +} + +static const struct nft_rbtree_elem * +nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, + struct nft_rbtree_elem *rbe) +{ + struct nft_set *set = (struct nft_set *)__set; + struct rb_node *prev = rb_prev(&rbe->node); + struct net *net = read_pnet(&set->net); + struct nft_rbtree_elem *rbe_prev; + struct nft_trans_gc *gc; + + gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC); + if (!gc) + return ERR_PTR(-ENOMEM); + + /* search for end interval coming before this element. + * end intervals don't carry a timeout extension, they + * are coupled with the interval start element. + */ + while (prev) { + rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); + if (nft_rbtree_interval_end(rbe_prev) && + nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY)) + break; + + prev = rb_prev(prev); + } + + rbe_prev = NULL; + if (prev) { + rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); + nft_rbtree_gc_remove(net, set, priv, rbe_prev); + + /* There is always room in this trans gc for this element, + * memory allocation never actually happens, hence, the warning + * splat in such case. No need to set NFT_SET_ELEM_DEAD_BIT, + * this is synchronous gc which never fails. + */ + gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); + if (WARN_ON_ONCE(!gc)) + return ERR_PTR(-ENOMEM); + + nft_trans_gc_elem_add(gc, rbe_prev); + } + + nft_rbtree_gc_remove(net, set, priv, rbe); + gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); + if (WARN_ON_ONCE(!gc)) + return ERR_PTR(-ENOMEM); + + nft_trans_gc_elem_add(gc, rbe); + + nft_trans_gc_queue_sync_done(gc); + + return rbe_prev; +} + +static bool nft_rbtree_update_first(const struct nft_set *set, + struct nft_rbtree_elem *rbe, + struct rb_node *first) +{ + struct nft_rbtree_elem *first_elem; + + first_elem = rb_entry(first, struct nft_rbtree_elem, node); + /* this element is closest to where the new element is to be inserted: + * update the first element for the node list path. + */ + if (nft_rbtree_cmp(set, rbe, first_elem) < 0) + return true; + + return false; } static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree_elem *new, struct nft_set_ext **ext) { - bool overlap = false, dup_end_left = false, dup_end_right = false; + struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL; + struct rb_node *node, *next, *parent, **p, *first = NULL; struct nft_rbtree *priv = nft_set_priv(set); + u8 cur_genmask = nft_genmask_cur(net); u8 genmask = nft_genmask_next(net); - struct nft_rbtree_elem *rbe; - struct rb_node *parent, **p; int d; - /* Detect overlaps as we descend the tree. Set the flag in these cases: - * - * a1. _ _ __>| ?_ _ __| (insert end before existing end) - * a2. _ _ ___| ?_ _ _>| (insert end after existing end) - * a3. _ _ ___? >|_ _ __| (insert start before existing end) - * - * and clear it later on, as we eventually reach the points indicated by - * '?' above, in the cases described below. We'll always meet these - * later, locally, due to tree ordering, and overlaps for the intervals - * that are the closest together are always evaluated last. - * - * b1. _ _ __>| !_ _ __| (insert end before existing start) - * b2. _ _ ___| !_ _ _>| (insert end after existing start) - * b3. _ _ ___! >|_ _ __| (insert start after existing end, as a leaf) - * '--' no nodes falling in this range - * b4. >|_ _ ! (insert start before existing start) - * - * Case a3. resolves to b3.: - * - if the inserted start element is the leftmost, because the '0' - * element in the tree serves as end element - * - otherwise, if an existing end is found immediately to the left. If - * there are existing nodes in between, we need to further descend the - * tree before we can conclude the new start isn't causing an overlap - * - * or to b4., which, preceded by a3., means we already traversed one or - * more existing intervals entirely, from the right. - * - * For a new, rightmost pair of elements, we'll hit cases b3. and b2., - * in that order. - * - * The flag is also cleared in two special cases: - * - * b5. |__ _ _!|<_ _ _ (insert start right before existing end) - * b6. |__ _ >|!__ _ _ (insert end right after existing start) - * - * which always happen as last step and imply that no further - * overlapping is possible. - * - * Another special case comes from the fact that start elements matching - * an already existing start element are allowed: insertion is not - * performed but we return -EEXIST in that case, and the error will be - * cleared by the caller if NLM_F_EXCL is not present in the request. - * This way, request for insertion of an exact overlap isn't reported as - * error to userspace if not desired. - * - * However, if the existing start matches a pre-existing start, but the - * end element doesn't match the corresponding pre-existing end element, - * we need to report a partial overlap. This is a local condition that - * can be noticed without need for a tracking flag, by checking for a - * local duplicated end for a corresponding start, from left and right, - * separately. + /* Descend the tree to search for an existing element greater than the + * key value to insert that is greater than the new element. This is the + * first element to walk the ordered elements to find possible overlap. */ - parent = NULL; p = &priv->root.rb_node; while (*p != NULL) { parent = *p; rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = memcmp(nft_set_ext_key(&rbe->ext), - nft_set_ext_key(&new->ext), - set->klen); + d = nft_rbtree_cmp(set, rbe, new); + if (d < 0) { p = &parent->rb_left; - - if (nft_rbtree_interval_start(new)) { - if (nft_rbtree_interval_end(rbe) && - nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext) && !*p) - overlap = false; - } else { - if (dup_end_left && !*p) - return -ENOTEMPTY; - - overlap = nft_rbtree_interval_end(rbe) && - nft_set_elem_active(&rbe->ext, - genmask) && - !nft_set_elem_expired(&rbe->ext); - - if (overlap) { - dup_end_right = true; - continue; - } - } } else if (d > 0) { - p = &parent->rb_right; + if (!first || + nft_rbtree_update_first(set, rbe, first)) + first = &rbe->node; - if (nft_rbtree_interval_end(new)) { - if (dup_end_right && !*p) - return -ENOTEMPTY; - - overlap = nft_rbtree_interval_end(rbe) && - nft_set_elem_active(&rbe->ext, - genmask) && - !nft_set_elem_expired(&rbe->ext); - - if (overlap) { - dup_end_left = true; - continue; - } - } else if (nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext)) { - overlap = nft_rbtree_interval_end(rbe); - } + p = &parent->rb_right; } else { - if (nft_rbtree_interval_end(rbe) && - nft_rbtree_interval_start(new)) { + if (nft_rbtree_interval_end(rbe)) p = &parent->rb_left; - - if (nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext)) - overlap = false; - } else if (nft_rbtree_interval_start(rbe) && - nft_rbtree_interval_end(new)) { + else p = &parent->rb_right; + } + } + + if (!first) + first = rb_first(&priv->root); + + /* Detect overlap by going through the list of valid tree nodes. + * Values stored in the tree are in reversed order, starting from + * highest to lowest value. + */ + for (node = first; node != NULL; node = next) { + next = rb_next(node); + + rbe = rb_entry(node, struct nft_rbtree_elem, node); + + if (!nft_set_elem_active(&rbe->ext, genmask)) + continue; + + /* perform garbage collection to avoid bogus overlap reports + * but skip new elements in this transaction. + */ + if (nft_set_elem_expired(&rbe->ext) && + nft_set_elem_active(&rbe->ext, cur_genmask)) { + const struct nft_rbtree_elem *removed_end; + + removed_end = nft_rbtree_gc_elem(set, priv, rbe); + if (IS_ERR(removed_end)) + return PTR_ERR(removed_end); + + if (removed_end == rbe_le || removed_end == rbe_ge) + return -EAGAIN; + + continue; + } + + d = nft_rbtree_cmp(set, rbe, new); + if (d == 0) { + /* Matching end element: no need to look for an + * overlapping greater or equal element. + */ + if (nft_rbtree_interval_end(rbe)) { + rbe_le = rbe; + break; + } + + /* first element that is greater or equal to key value. */ + if (!rbe_ge) { + rbe_ge = rbe; + continue; + } - if (nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext)) - overlap = false; - } else if (nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext)) { - *ext = &rbe->ext; - return -EEXIST; - } else { - overlap = false; - if (nft_rbtree_interval_end(rbe)) - p = &parent->rb_left; - else - p = &parent->rb_right; + /* this is a closer more or equal element, update it. */ + if (nft_rbtree_cmp(set, rbe_ge, new) != 0) { + rbe_ge = rbe; + continue; } + + /* element is equal to key value, make sure flags are + * the same, an existing more or equal start element + * must not be replaced by more or equal end element. + */ + if ((nft_rbtree_interval_start(new) && + nft_rbtree_interval_start(rbe_ge)) || + (nft_rbtree_interval_end(new) && + nft_rbtree_interval_end(rbe_ge))) { + rbe_ge = rbe; + continue; + } + } else if (d > 0) { + /* annotate element greater than the new element. */ + rbe_ge = rbe; + continue; + } else if (d < 0) { + /* annotate element less than the new element. */ + rbe_le = rbe; + break; } + } + + /* - new start element matching existing start element: full overlap + * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. + */ + if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) && + nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) { + *ext = &rbe_ge->ext; + return -EEXIST; + } - dup_end_left = dup_end_right = false; + /* - new end element matching existing end element: full overlap + * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. + */ + if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) && + nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) { + *ext = &rbe_le->ext; + return -EEXIST; } - if (overlap) + /* - new start element with existing closest, less or equal key value + * being a start element: partial overlap, reported as -ENOTEMPTY. + * Anonymous sets allow for two consecutive start element since they + * are constant, skip them to avoid bogus overlap reports. + */ + if (!nft_set_is_anonymous(set) && rbe_le && + nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new)) + return -ENOTEMPTY; + + /* - new end element with existing closest, less or equal key value + * being a end element: partial overlap, reported as -ENOTEMPTY. + */ + if (rbe_le && + nft_rbtree_interval_end(rbe_le) && nft_rbtree_interval_end(new)) return -ENOTEMPTY; + /* - new end element with existing closest, greater or equal key value + * being an end element: partial overlap, reported as -ENOTEMPTY + */ + if (rbe_ge && + nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new)) + return -ENOTEMPTY; + + /* Accepted element: pick insertion point depending on key value */ + parent = NULL; + p = &priv->root.rb_node; + while (*p != NULL) { + parent = *p; + rbe = rb_entry(parent, struct nft_rbtree_elem, node); + d = nft_rbtree_cmp(set, rbe, new); + + if (d < 0) + p = &parent->rb_left; + else if (d > 0) + p = &parent->rb_right; + else if (nft_rbtree_interval_end(rbe)) + p = &parent->rb_left; + else + p = &parent->rb_right; + } + rb_link_node_rcu(&new->node, parent, p); rb_insert_color(&new->node, &priv->root); return 0; @@ -372,15 +490,22 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, struct nft_set_ext **ext) { + struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem->priv); struct nft_rbtree *priv = nft_set_priv(set); - struct nft_rbtree_elem *rbe = elem->priv; int err; - write_lock_bh(&priv->lock); - write_seqcount_begin(&priv->count); - err = __nft_rbtree_insert(net, set, rbe, ext); - write_seqcount_end(&priv->count); - write_unlock_bh(&priv->lock); + do { + if (fatal_signal_pending(current)) + return -EINTR; + + cond_resched(); + + write_lock_bh(&priv->lock); + write_seqcount_begin(&priv->count); + err = __nft_rbtree_insert(net, set, rbe, ext); + write_seqcount_end(&priv->count); + write_unlock_bh(&priv->lock); + } while (err == -EAGAIN); return err; } @@ -389,8 +514,8 @@ static void nft_rbtree_remove(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { + struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem->priv); struct nft_rbtree *priv = nft_set_priv(set); - struct nft_rbtree_elem *rbe = elem->priv; write_lock_bh(&priv->lock); write_seqcount_begin(&priv->count); @@ -403,32 +528,27 @@ static void nft_rbtree_activate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_rbtree_elem *rbe = elem->priv; + struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem->priv); nft_set_elem_change_active(net, set, &rbe->ext); - nft_set_elem_clear_busy(&rbe->ext); } -static bool nft_rbtree_flush(const struct net *net, - const struct nft_set *set, void *priv) +static void nft_rbtree_flush(const struct net *net, + const struct nft_set *set, + struct nft_elem_priv *elem_priv) { - struct nft_rbtree_elem *rbe = priv; + struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv); - if (!nft_set_elem_mark_busy(&rbe->ext) || - !nft_is_active(net, &rbe->ext)) { - nft_set_elem_change_active(net, set, &rbe->ext); - return true; - } - return false; + nft_set_elem_change_active(net, set, &rbe->ext); } -static void *nft_rbtree_deactivate(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static struct nft_elem_priv * +nft_rbtree_deactivate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) { + struct nft_rbtree_elem *rbe, *this = nft_elem_priv_cast(elem->priv); const struct nft_rbtree *priv = nft_set_priv(set); const struct rb_node *parent = priv->root.rb_node; - struct nft_rbtree_elem *rbe, *this = elem->priv; u8 genmask = nft_genmask_next(net); int d; @@ -454,8 +574,8 @@ static void *nft_rbtree_deactivate(const struct net *net, parent = parent->rb_left; continue; } - nft_rbtree_flush(net, set, rbe); - return rbe; + nft_rbtree_flush(net, set, &rbe->priv); + return &rbe->priv; } } return NULL; @@ -476,12 +596,10 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, if (iter->count < iter->skip) goto cont; - if (nft_set_elem_expired(&rbe->ext)) - goto cont; if (!nft_set_elem_active(&rbe->ext, iter->genmask)) goto cont; - elem.priv = rbe; + elem.priv = &rbe->priv; iter->err = iter->fn(ctx, set, iter, &elem); if (iter->err < 0) { @@ -496,64 +614,83 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, static void nft_rbtree_gc(struct work_struct *work) { - struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL; - struct nft_set_gc_batch *gcb = NULL; + struct nft_rbtree_elem *rbe, *rbe_end = NULL; + struct nftables_pernet *nft_net; struct nft_rbtree *priv; + struct nft_trans_gc *gc; struct rb_node *node; struct nft_set *set; + unsigned int gc_seq; + struct net *net; priv = container_of(work, struct nft_rbtree, gc_work.work); set = nft_set_container_of(priv); + net = read_pnet(&set->net); + nft_net = nft_pernet(net); + gc_seq = READ_ONCE(nft_net->gc_seq); - write_lock_bh(&priv->lock); - write_seqcount_begin(&priv->count); + if (nft_set_gc_is_pending(set)) + goto done; + + gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); + if (!gc) + goto done; + + read_lock_bh(&priv->lock); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { + + /* Ruleset has been updated, try later. */ + if (READ_ONCE(nft_net->gc_seq) != gc_seq) { + nft_trans_gc_destroy(gc); + gc = NULL; + goto try_later; + } + rbe = rb_entry(node, struct nft_rbtree_elem, node); + if (nft_set_elem_is_dead(&rbe->ext)) + goto dead_elem; + + /* elements are reversed in the rbtree for historical reasons, + * from highest to lowest value, that is why end element is + * always visited before the start element. + */ if (nft_rbtree_interval_end(rbe)) { rbe_end = rbe; continue; } if (!nft_set_elem_expired(&rbe->ext)) continue; - if (nft_set_elem_mark_busy(&rbe->ext)) + + nft_set_elem_dead(&rbe->ext); + + if (!rbe_end) continue; - if (rbe_prev) { - rb_erase(&rbe_prev->node, &priv->root); - rbe_prev = NULL; - } - gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); - if (!gcb) - break; + nft_set_elem_dead(&rbe_end->ext); - atomic_dec(&set->nelems); - nft_set_gc_batch_add(gcb, rbe); - rbe_prev = rbe; + gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + if (!gc) + goto try_later; - if (rbe_end) { - atomic_dec(&set->nelems); - nft_set_gc_batch_add(gcb, rbe_end); - rb_erase(&rbe_end->node, &priv->root); - rbe_end = NULL; - } - node = rb_next(node); - if (!node) - break; - } - if (rbe_prev) - rb_erase(&rbe_prev->node, &priv->root); - write_seqcount_end(&priv->count); - write_unlock_bh(&priv->lock); + nft_trans_gc_elem_add(gc, rbe_end); + rbe_end = NULL; +dead_elem: + gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + if (!gc) + goto try_later; - rbe = nft_set_catchall_gc(set); - if (rbe) { - gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); - if (gcb) - nft_set_gc_batch_add(gcb, rbe); + nft_trans_gc_elem_add(gc, rbe); } - nft_set_gc_batch_complete(gcb); + gc = nft_trans_gc_catchall_async(gc, gc_seq); + +try_later: + read_unlock_bh(&priv->lock); + + if (gc) + nft_trans_gc_queue_async_done(gc); +done: queue_delayed_work(system_power_efficient_wq, &priv->gc_work, nft_set_gc_interval(set)); } @@ -570,6 +707,8 @@ static int nft_rbtree_init(const struct nft_set *set, { struct nft_rbtree *priv = nft_set_priv(set); + BUILD_BUG_ON(offsetof(struct nft_rbtree_elem, priv) != 0); + rwlock_init(&priv->lock); seqcount_rwlock_init(&priv->count, &priv->lock); priv->root = RB_ROOT; @@ -582,7 +721,8 @@ static int nft_rbtree_init(const struct nft_set *set, return 0; } -static void nft_rbtree_destroy(const struct nft_set *set) +static void nft_rbtree_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe; @@ -593,7 +733,7 @@ static void nft_rbtree_destroy(const struct nft_set *set) while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_set_elem_destroy(set, rbe, true); + nf_tables_set_elem_destroy(ctx, set, &rbe->priv); } } diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 9ad9cc0d1d27c..dac1797e98e85 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -196,7 +196,7 @@ static int nft_socket_init(const struct nft_ctx *ctx, } static int nft_socket_dump(struct sk_buff *skb, - const struct nft_expr *expr) + const struct nft_expr *expr, bool reset) { const struct nft_socket *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index 1133e06f3c40e..bf7268908154a 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -272,7 +272,8 @@ static void nft_synproxy_destroy(const struct nft_ctx *ctx, nft_synproxy_do_destroy(ctx); } -static int nft_synproxy_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_synproxy_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset) { struct nft_synproxy *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index 9fea90ed79d44..4909f940f5cc5 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -294,7 +294,7 @@ static void nft_tproxy_destroy(const struct nft_ctx *ctx, } static int nft_tproxy_dump(struct sk_buff *skb, - const struct nft_expr *expr) + const struct nft_expr *expr, bool reset) { const struct nft_tproxy *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 2ee50996da8cc..a5b6ba4779d1f 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -106,7 +106,7 @@ static int nft_tunnel_get_init(const struct nft_ctx *ctx, } static int nft_tunnel_get_dump(struct sk_buff *skb, - const struct nft_expr *expr) + const struct nft_expr *expr, bool reset) { const struct nft_tunnel *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c index cbbbc4ecad3ae..f823007f9f70f 100644 --- a/net/netfilter/nft_xfrm.c +++ b/net/netfilter/nft_xfrm.c @@ -210,7 +210,7 @@ static void nft_xfrm_get_eval(const struct nft_expr *expr, } static int nft_xfrm_get_dump(struct sk_buff *skb, - const struct nft_expr *expr) + const struct nft_expr *expr, bool reset) { const struct nft_xfrm *priv = nft_expr_priv(expr);