blob: 4080ed6a072bf7c4bb414e19a38f9a663557198a [file] [log] [blame]
/*
* Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/list.h>
#include <linux/log2.h>
#include <linux/jhash.h>
#include <linux/netlink.h>
#include <linux/vmalloc.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#define NFT_HASH_MIN_SIZE 4UL
struct nft_hash {
struct nft_hash_table __rcu *tbl;
};
struct nft_hash_table {
unsigned int size;
struct nft_hash_elem __rcu *buckets[];
};
struct nft_hash_elem {
struct nft_hash_elem __rcu *next;
struct nft_data key;
struct nft_data data[];
};
#define nft_hash_for_each_entry(i, head) \
for (i = nft_dereference(head); i != NULL; i = nft_dereference(i->next))
#define nft_hash_for_each_entry_rcu(i, head) \
for (i = rcu_dereference(head); i != NULL; i = rcu_dereference(i->next))
static u32 nft_hash_rnd __read_mostly;
static bool nft_hash_rnd_initted __read_mostly;
static unsigned int nft_hash_data(const struct nft_data *data,
unsigned int hsize, unsigned int len)
{
unsigned int h;
h = jhash(data->data, len, nft_hash_rnd);
return h & (hsize - 1);
}
static bool nft_hash_lookup(const struct nft_set *set,
const struct nft_data *key,
struct nft_data *data)
{
const struct nft_hash *priv = nft_set_priv(set);
const struct nft_hash_table *tbl = rcu_dereference(priv->tbl);
const struct nft_hash_elem *he;
unsigned int h;
h = nft_hash_data(key, tbl->size, set->klen);
nft_hash_for_each_entry_rcu(he, tbl->buckets[h]) {
if (nft_data_cmp(&he->key, key, set->klen))
continue;
if (set->flags & NFT_SET_MAP)
nft_data_copy(data, he->data);
return true;
}
return false;
}
static void nft_hash_tbl_free(const struct nft_hash_table *tbl)
{
kvfree(tbl);
}
static unsigned int nft_hash_tbl_size(unsigned int nelem)
{
return max(roundup_pow_of_two(nelem * 4 / 3), NFT_HASH_MIN_SIZE);
}
static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets)
{
struct nft_hash_table *tbl;
size_t size;
size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
tbl = kzalloc(size, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN);
if (tbl == NULL)
tbl = vzalloc(size);
if (tbl == NULL)
return NULL;
tbl->size = nbuckets;
return tbl;
}
static void nft_hash_chain_unzip(const struct nft_set *set,
const struct nft_hash_table *ntbl,
struct nft_hash_table *tbl, unsigned int n)
{
struct nft_hash_elem *he, *last, *next;
unsigned int h;
he = nft_dereference(tbl->buckets[n]);
if (he == NULL)
return;
h = nft_hash_data(&he->key, ntbl->size, set->klen);
/* Find last element of first chain hashing to bucket h */
last = he;
nft_hash_for_each_entry(he, he->next) {
if (nft_hash_data(&he->key, ntbl->size, set->klen) != h)
break;
last = he;
}
/* Unlink first chain from the old table */
RCU_INIT_POINTER(tbl->buckets[n], last->next);
/* If end of chain reached, done */
if (he == NULL)
return;
/* Find first element of second chain hashing to bucket h */
next = NULL;
nft_hash_for_each_entry(he, he->next) {
if (nft_hash_data(&he->key, ntbl->size, set->klen) != h)
continue;
next = he;
break;
}
/* Link the two chains */
RCU_INIT_POINTER(last->next, next);
}
static int nft_hash_tbl_expand(const struct nft_set *set, struct nft_hash *priv)
{
struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl;
struct nft_hash_elem *he;
unsigned int i, h;
bool complete;
ntbl = nft_hash_tbl_alloc(tbl->size * 2);
if (ntbl == NULL)
return -ENOMEM;
/* Link new table's buckets to first element in the old table
* hashing to the new bucket.
*/
for (i = 0; i < ntbl->size; i++) {
h = i < tbl->size ? i : i - tbl->size;
nft_hash_for_each_entry(he, tbl->buckets[h]) {
if (nft_hash_data(&he->key, ntbl->size, set->klen) != i)
continue;
RCU_INIT_POINTER(ntbl->buckets[i], he);
break;
}
}
/* Publish new table */
rcu_assign_pointer(priv->tbl, ntbl);
/* Unzip interleaved hash chains */
do {
/* Wait for readers to use new table/unzipped chains */
synchronize_rcu();
complete = true;
for (i = 0; i < tbl->size; i++) {
nft_hash_chain_unzip(set, ntbl, tbl, i);
if (tbl->buckets[i] != NULL)
complete = false;
}
} while (!complete);
nft_hash_tbl_free(tbl);
return 0;
}
static int nft_hash_tbl_shrink(const struct nft_set *set, struct nft_hash *priv)
{
struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl;
struct nft_hash_elem __rcu **pprev;
unsigned int i;
ntbl = nft_hash_tbl_alloc(tbl->size / 2);
if (ntbl == NULL)
return -ENOMEM;
for (i = 0; i < ntbl->size; i++) {
ntbl->buckets[i] = tbl->buckets[i];
for (pprev = &ntbl->buckets[i]; *pprev != NULL;
pprev = &nft_dereference(*pprev)->next)
;
RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]);
}
/* Publish new table */
rcu_assign_pointer(priv->tbl, ntbl);
synchronize_rcu();
nft_hash_tbl_free(tbl);
return 0;
}
static int nft_hash_insert(const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_table *tbl = nft_dereference(priv->tbl);
struct nft_hash_elem *he;
unsigned int size, h;
if (elem->flags != 0)
return -EINVAL;
size = sizeof(*he);
if (set->flags & NFT_SET_MAP)
size += sizeof(he->data[0]);
he = kzalloc(size, GFP_KERNEL);
if (he == NULL)
return -ENOMEM;
nft_data_copy(&he->key, &elem->key);
if (set->flags & NFT_SET_MAP)
nft_data_copy(he->data, &elem->data);
h = nft_hash_data(&he->key, tbl->size, set->klen);
RCU_INIT_POINTER(he->next, tbl->buckets[h]);
rcu_assign_pointer(tbl->buckets[h], he);
/* Expand table when exceeding 75% load */
if (set->nelems + 1 > tbl->size / 4 * 3)
nft_hash_tbl_expand(set, priv);
return 0;
}
static void nft_hash_elem_destroy(const struct nft_set *set,
struct nft_hash_elem *he)
{
nft_data_uninit(&he->key, NFT_DATA_VALUE);
if (set->flags & NFT_SET_MAP)
nft_data_uninit(he->data, set->dtype);
kfree(he);
}
static void nft_hash_remove(const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_table *tbl = nft_dereference(priv->tbl);
struct nft_hash_elem *he, __rcu **pprev;
pprev = elem->cookie;
he = nft_dereference((*pprev));
RCU_INIT_POINTER(*pprev, he->next);
synchronize_rcu();
kfree(he);
/* Shrink table beneath 30% load */
if (set->nelems - 1 < tbl->size * 3 / 10 &&
tbl->size > NFT_HASH_MIN_SIZE)
nft_hash_tbl_shrink(set, priv);
}
static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
{
const struct nft_hash *priv = nft_set_priv(set);
const struct nft_hash_table *tbl = nft_dereference(priv->tbl);
struct nft_hash_elem __rcu * const *pprev;
struct nft_hash_elem *he;
unsigned int h;
h = nft_hash_data(&elem->key, tbl->size, set->klen);
pprev = &tbl->buckets[h];
nft_hash_for_each_entry(he, tbl->buckets[h]) {
if (nft_data_cmp(&he->key, &elem->key, set->klen)) {
pprev = &he->next;
continue;
}
elem->cookie = (void *)pprev;
elem->flags = 0;
if (set->flags & NFT_SET_MAP)
nft_data_copy(&elem->data, he->data);
return 0;
}
return -ENOENT;
}
static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
struct nft_set_iter *iter)
{
const struct nft_hash *priv = nft_set_priv(set);
const struct nft_hash_table *tbl = nft_dereference(priv->tbl);
const struct nft_hash_elem *he;
struct nft_set_elem elem;
unsigned int i;
for (i = 0; i < tbl->size; i++) {
nft_hash_for_each_entry(he, tbl->buckets[i]) {
if (iter->count < iter->skip)
goto cont;
memcpy(&elem.key, &he->key, sizeof(elem.key));
if (set->flags & NFT_SET_MAP)
memcpy(&elem.data, he->data, sizeof(elem.data));
elem.flags = 0;
iter->err = iter->fn(ctx, set, iter, &elem);
if (iter->err < 0)
return;
cont:
iter->count++;
}
}
}
static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
{
return sizeof(struct nft_hash);
}
static int nft_hash_init(const struct nft_set *set,
const struct nft_set_desc *desc,
const struct nlattr * const tb[])
{
struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_table *tbl;
unsigned int size;
if (unlikely(!nft_hash_rnd_initted)) {
get_random_bytes(&nft_hash_rnd, 4);
nft_hash_rnd_initted = true;
}
size = NFT_HASH_MIN_SIZE;
if (desc->size)
size = nft_hash_tbl_size(desc->size);
tbl = nft_hash_tbl_alloc(size);
if (tbl == NULL)
return -ENOMEM;
RCU_INIT_POINTER(priv->tbl, tbl);
return 0;
}
static void nft_hash_destroy(const struct nft_set *set)
{
const struct nft_hash *priv = nft_set_priv(set);
const struct nft_hash_table *tbl = nft_dereference(priv->tbl);
struct nft_hash_elem *he, *next;
unsigned int i;
for (i = 0; i < tbl->size; i++) {
for (he = nft_dereference(tbl->buckets[i]); he != NULL;
he = next) {
next = nft_dereference(he->next);
nft_hash_elem_destroy(set, he);
}
}
kfree(tbl);
}
static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
struct nft_set_estimate *est)
{
unsigned int esize;
esize = sizeof(struct nft_hash_elem);
if (features & NFT_SET_MAP)
esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]);
if (desc->size) {
est->size = sizeof(struct nft_hash) +
nft_hash_tbl_size(desc->size) *
sizeof(struct nft_hash_elem *) +
desc->size * esize;
} else {
/* Resizing happens when the load drops below 30% or goes
* above 75%. The average of 52.5% load (approximated by 50%)
* is used for the size estimation of the hash buckets,
* meaning we calculate two buckets per element.
*/
est->size = esize + 2 * sizeof(struct nft_hash_elem *);
}
est->class = NFT_SET_CLASS_O_1;
return true;
}
static struct nft_set_ops nft_hash_ops __read_mostly = {
.privsize = nft_hash_privsize,
.estimate = nft_hash_estimate,
.init = nft_hash_init,
.destroy = nft_hash_destroy,
.get = nft_hash_get,
.insert = nft_hash_insert,
.remove = nft_hash_remove,
.lookup = nft_hash_lookup,
.walk = nft_hash_walk,
.features = NFT_SET_MAP,
.owner = THIS_MODULE,
};
static int __init nft_hash_module_init(void)
{
return nft_register_set(&nft_hash_ops);
}
static void __exit nft_hash_module_exit(void)
{
nft_unregister_set(&nft_hash_ops);
}
module_init(nft_hash_module_init);
module_exit(nft_hash_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_SET();