Skip to content

Commit 8d5d885

Browse files
netoptimizerdavem330
authored andcommitted
xdp: rhashtable with allocator ID to pointer mapping
Use the IDA infrastructure for getting a cyclic increasing ID number, that is used for keeping track of each registered allocator per RX-queue xdp_rxq_info. Instead of using the IDR infrastructure, which uses a radix tree, use a dynamic rhashtable, for creating ID to pointer lookup table, because this is faster. The problem that is being solved here is that, the xdp_rxq_info pointer (stored in xdp_buff) cannot be used directly, as the guaranteed lifetime is too short. The info is needed on a (potentially) remote CPU during DMA-TX completion time . In an xdp_frame the xdp_mem_info is stored, when it got converted from an xdp_buff, which is sufficient for the simple page refcnt based recycle schemes. For more advanced allocators there is a need to store a pointer to the registered allocator. Thus, there is a need to guard the lifetime or validity of the allocator pointer, which is done through this rhashtable ID map to pointer. The removal and validity of of the allocator and helper struct xdp_mem_allocator is guarded by RCU. The allocator will be created by the driver, and registered with xdp_rxq_info_reg_mem_model(). It is up-to debate who is responsible for freeing the allocator pointer or invoking the allocator destructor function. In any case, this must happen via RCU freeing. Use the IDA infrastructure for getting a cyclic increasing ID number, that is used for keeping track of each registered allocator per RX-queue xdp_rxq_info. V4: Per req of Jason Wang - Use xdp_rxq_info_reg_mem_model() in all drivers implementing XDP_REDIRECT, even-though it's not strictly necessary when allocator==NULL for type MEM_TYPE_PAGE_SHARED (given it's zero). V6: Per req of Alex Duyck - Introduce rhashtable_lookup() call in later patch V8: Address sparse should be static warnings (from kbuild test robot) Signed-off-by: Jesper Dangaard Brouer <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 84f5e3f commit 8d5d885

File tree

5 files changed

+241
-18
lines changed

5 files changed

+241
-18
lines changed

drivers/net/ethernet/intel/ixgbe/ixgbe_main.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6370,7 +6370,7 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
63706370
struct device *dev = rx_ring->dev;
63716371
int orig_node = dev_to_node(dev);
63726372
int ring_node = -1;
6373-
int size;
6373+
int size, err;
63746374

63756375
size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count;
63766376

@@ -6407,6 +6407,13 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
64076407
rx_ring->queue_index) < 0)
64086408
goto err;
64096409

6410+
err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq,
6411+
MEM_TYPE_PAGE_SHARED, NULL);
6412+
if (err) {
6413+
xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
6414+
goto err;
6415+
}
6416+
64106417
rx_ring->xdp_prog = adapter->xdp_prog;
64116418

64126419
return 0;

drivers/net/tun.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -854,6 +854,12 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
854854
tun->dev, tfile->queue_index);
855855
if (err < 0)
856856
goto out;
857+
err = xdp_rxq_info_reg_mem_model(&tfile->xdp_rxq,
858+
MEM_TYPE_PAGE_SHARED, NULL);
859+
if (err < 0) {
860+
xdp_rxq_info_unreg(&tfile->xdp_rxq);
861+
goto out;
862+
}
857863
err = 0;
858864
}
859865

drivers/net/virtio_net.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1305,6 +1305,13 @@ static int virtnet_open(struct net_device *dev)
13051305
if (err < 0)
13061306
return err;
13071307

1308+
err = xdp_rxq_info_reg_mem_model(&vi->rq[i].xdp_rxq,
1309+
MEM_TYPE_PAGE_SHARED, NULL);
1310+
if (err < 0) {
1311+
xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
1312+
return err;
1313+
}
1314+
13081315
virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
13091316
virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
13101317
}

include/net/xdp.h

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ enum xdp_mem_type {
4141

4242
struct xdp_mem_info {
4343
u32 type; /* enum xdp_mem_type, but known size type */
44+
u32 id;
4445
};
4546

4647
struct xdp_rxq_info {
@@ -99,18 +100,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
99100
return xdp_frame;
100101
}
101102

102-
static inline
103-
void xdp_return_frame(void *data, struct xdp_mem_info *mem)
104-
{
105-
if (mem->type == MEM_TYPE_PAGE_SHARED)
106-
page_frag_free(data);
107-
108-
if (mem->type == MEM_TYPE_PAGE_ORDER0) {
109-
struct page *page = virt_to_page(data); /* Assumes order0 page*/
110-
111-
put_page(page);
112-
}
113-
}
103+
void xdp_return_frame(void *data, struct xdp_mem_info *mem);
114104

115105
int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
116106
struct net_device *dev, u32 queue_index);

net/core/xdp.c

Lines changed: 218 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
*/
66
#include <linux/types.h>
77
#include <linux/mm.h>
8+
#include <linux/slab.h>
9+
#include <linux/idr.h>
10+
#include <linux/rhashtable.h>
811

912
#include <net/xdp.h>
1013

@@ -13,6 +16,99 @@
1316
#define REG_STATE_UNREGISTERED 0x2
1417
#define REG_STATE_UNUSED 0x3
1518

19+
static DEFINE_IDA(mem_id_pool);
20+
static DEFINE_MUTEX(mem_id_lock);
21+
#define MEM_ID_MAX 0xFFFE
22+
#define MEM_ID_MIN 1
23+
static int mem_id_next = MEM_ID_MIN;
24+
25+
static bool mem_id_init; /* false */
26+
static struct rhashtable *mem_id_ht;
27+
28+
struct xdp_mem_allocator {
29+
struct xdp_mem_info mem;
30+
void *allocator;
31+
struct rhash_head node;
32+
struct rcu_head rcu;
33+
};
34+
35+
static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
36+
{
37+
const u32 *k = data;
38+
const u32 key = *k;
39+
40+
BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id)
41+
!= sizeof(u32));
42+
43+
/* Use cyclic increasing ID as direct hash key, see rht_bucket_index */
44+
return key << RHT_HASH_RESERVED_SPACE;
45+
}
46+
47+
static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg,
48+
const void *ptr)
49+
{
50+
const struct xdp_mem_allocator *xa = ptr;
51+
u32 mem_id = *(u32 *)arg->key;
52+
53+
return xa->mem.id != mem_id;
54+
}
55+
56+
static const struct rhashtable_params mem_id_rht_params = {
57+
.nelem_hint = 64,
58+
.head_offset = offsetof(struct xdp_mem_allocator, node),
59+
.key_offset = offsetof(struct xdp_mem_allocator, mem.id),
60+
.key_len = FIELD_SIZEOF(struct xdp_mem_allocator, mem.id),
61+
.max_size = MEM_ID_MAX,
62+
.min_size = 8,
63+
.automatic_shrinking = true,
64+
.hashfn = xdp_mem_id_hashfn,
65+
.obj_cmpfn = xdp_mem_id_cmp,
66+
};
67+
68+
static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
69+
{
70+
struct xdp_mem_allocator *xa;
71+
72+
xa = container_of(rcu, struct xdp_mem_allocator, rcu);
73+
74+
/* Allow this ID to be reused */
75+
ida_simple_remove(&mem_id_pool, xa->mem.id);
76+
77+
/* TODO: Depending on allocator type/pointer free resources */
78+
79+
/* Poison memory */
80+
xa->mem.id = 0xFFFF;
81+
xa->mem.type = 0xF0F0;
82+
xa->allocator = (void *)0xDEAD9001;
83+
84+
kfree(xa);
85+
}
86+
87+
static void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
88+
{
89+
struct xdp_mem_allocator *xa;
90+
int id = xdp_rxq->mem.id;
91+
int err;
92+
93+
if (id == 0)
94+
return;
95+
96+
mutex_lock(&mem_id_lock);
97+
98+
xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
99+
if (!xa) {
100+
mutex_unlock(&mem_id_lock);
101+
return;
102+
}
103+
104+
err = rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params);
105+
WARN_ON(err);
106+
107+
call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
108+
109+
mutex_unlock(&mem_id_lock);
110+
}
111+
16112
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
17113
{
18114
/* Simplify driver cleanup code paths, allow unreg "unused" */
@@ -21,8 +117,14 @@ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
21117

22118
WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG");
23119

120+
__xdp_rxq_info_unreg_mem_model(xdp_rxq);
121+
24122
xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
25123
xdp_rxq->dev = NULL;
124+
125+
/* Reset mem info to defaults */
126+
xdp_rxq->mem.id = 0;
127+
xdp_rxq->mem.type = 0;
26128
}
27129
EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg);
28130

@@ -72,20 +174,131 @@ bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq)
72174
}
73175
EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg);
74176

177+
static int __mem_id_init_hash_table(void)
178+
{
179+
struct rhashtable *rht;
180+
int ret;
181+
182+
if (unlikely(mem_id_init))
183+
return 0;
184+
185+
rht = kzalloc(sizeof(*rht), GFP_KERNEL);
186+
if (!rht)
187+
return -ENOMEM;
188+
189+
ret = rhashtable_init(rht, &mem_id_rht_params);
190+
if (ret < 0) {
191+
kfree(rht);
192+
return ret;
193+
}
194+
mem_id_ht = rht;
195+
smp_mb(); /* mutex lock should provide enough pairing */
196+
mem_id_init = true;
197+
198+
return 0;
199+
}
200+
201+
/* Allocate a cyclic ID that maps to allocator pointer.
202+
* See: https://www.kernel.org/doc/html/latest/core-api/idr.html
203+
*
204+
* Caller must lock mem_id_lock.
205+
*/
206+
static int __mem_id_cyclic_get(gfp_t gfp)
207+
{
208+
int retries = 1;
209+
int id;
210+
211+
again:
212+
id = ida_simple_get(&mem_id_pool, mem_id_next, MEM_ID_MAX, gfp);
213+
if (id < 0) {
214+
if (id == -ENOSPC) {
215+
/* Cyclic allocator, reset next id */
216+
if (retries--) {
217+
mem_id_next = MEM_ID_MIN;
218+
goto again;
219+
}
220+
}
221+
return id; /* errno */
222+
}
223+
mem_id_next = id + 1;
224+
225+
return id;
226+
}
227+
75228
int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
76229
enum xdp_mem_type type, void *allocator)
77230
{
231+
struct xdp_mem_allocator *xdp_alloc;
232+
gfp_t gfp = GFP_KERNEL;
233+
int id, errno, ret;
234+
void *ptr;
235+
236+
if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
237+
WARN(1, "Missing register, driver bug");
238+
return -EFAULT;
239+
}
240+
78241
if (type >= MEM_TYPE_MAX)
79242
return -EINVAL;
80243

81244
xdp_rxq->mem.type = type;
82245

83-
if (allocator)
84-
return -EOPNOTSUPP;
246+
if (!allocator)
247+
return 0;
248+
249+
/* Delay init of rhashtable to save memory if feature isn't used */
250+
if (!mem_id_init) {
251+
mutex_lock(&mem_id_lock);
252+
ret = __mem_id_init_hash_table();
253+
mutex_unlock(&mem_id_lock);
254+
if (ret < 0) {
255+
WARN_ON(1);
256+
return ret;
257+
}
258+
}
259+
260+
xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
261+
if (!xdp_alloc)
262+
return -ENOMEM;
263+
264+
mutex_lock(&mem_id_lock);
265+
id = __mem_id_cyclic_get(gfp);
266+
if (id < 0) {
267+
errno = id;
268+
goto err;
269+
}
270+
xdp_rxq->mem.id = id;
271+
xdp_alloc->mem = xdp_rxq->mem;
272+
xdp_alloc->allocator = allocator;
273+
274+
/* Insert allocator into ID lookup table */
275+
ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
276+
if (IS_ERR(ptr)) {
277+
errno = PTR_ERR(ptr);
278+
goto err;
279+
}
280+
281+
mutex_unlock(&mem_id_lock);
85282

86-
/* TODO: Allocate an ID that maps to allocator pointer
87-
* See: https://www.kernel.org/doc/html/latest/core-api/idr.html
88-
*/
89283
return 0;
284+
err:
285+
mutex_unlock(&mem_id_lock);
286+
kfree(xdp_alloc);
287+
return errno;
90288
}
91289
EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
290+
291+
void xdp_return_frame(void *data, struct xdp_mem_info *mem)
292+
{
293+
if (mem->type == MEM_TYPE_PAGE_SHARED) {
294+
page_frag_free(data);
295+
return;
296+
}
297+
298+
if (mem->type == MEM_TYPE_PAGE_ORDER0) {
299+
struct page *page = virt_to_page(data); /* Assumes order0 page*/
300+
301+
put_page(page);
302+
}
303+
}
304+
EXPORT_SYMBOL_GPL(xdp_return_frame);

0 commit comments

Comments
 (0)