Skip to content

Commit eaa6bcb

Browse files
haoluo1022Alexei Starovoitov
authored andcommitted
bpf: Introduce bpf_per_cpu_ptr()
Add bpf_per_cpu_ptr() to help bpf programs access percpu vars. bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the kernel except that it may return NULL. This happens when the cpu parameter is out of range. So the caller must check the returned value. Signed-off-by: Hao Luo <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]> Acked-by: Andrii Nakryiko <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent 2c2f6ab commit eaa6bcb

File tree

8 files changed

+132
-13
lines changed

8 files changed

+132
-13
lines changed

include/linux/bpf.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,7 @@ enum bpf_arg_type {
293293
ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */
294294
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
295295
ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
296+
ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
296297
__BPF_ARG_TYPE_MAX,
297298
};
298299

@@ -307,6 +308,7 @@ enum bpf_return_type {
307308
RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */
308309
RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */
309310
RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */
311+
RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */
310312
};
311313

312314
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -405,6 +407,7 @@ enum bpf_reg_type {
405407
PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */
406408
PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */
407409
PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
410+
PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */
408411
};
409412

410413
/* The information passed from prog-specific *_is_valid_access
@@ -1828,6 +1831,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
18281831
extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
18291832
extern const struct bpf_func_proto bpf_copy_from_user_proto;
18301833
extern const struct bpf_func_proto bpf_snprintf_btf_proto;
1834+
extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
18311835

18321836
const struct bpf_func_proto *bpf_tracing_func_proto(
18331837
enum bpf_func_id func_id, const struct bpf_prog *prog);

include/linux/btf.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,11 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type,
110110
i < btf_type_vlen(struct_type); \
111111
i++, member++)
112112

113+
#define for_each_vsi(i, datasec_type, member) \
114+
for (i = 0, member = btf_type_var_secinfo(datasec_type); \
115+
i < btf_type_vlen(datasec_type); \
116+
i++, member++)
117+
113118
static inline bool btf_type_is_ptr(const struct btf_type *t)
114119
{
115120
return BTF_INFO_KIND(t->info) == BTF_KIND_PTR;
@@ -194,6 +199,12 @@ static inline const struct btf_member *btf_type_member(const struct btf_type *t)
194199
return (const struct btf_member *)(t + 1);
195200
}
196201

202+
static inline const struct btf_var_secinfo *btf_type_var_secinfo(
203+
const struct btf_type *t)
204+
{
205+
return (const struct btf_var_secinfo *)(t + 1);
206+
}
207+
197208
#ifdef CONFIG_BPF_SYSCALL
198209
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
199210
const char *btf_name_by_offset(const struct btf *btf, u32 offset);

include/uapi/linux/bpf.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3686,6 +3686,23 @@ union bpf_attr {
36863686
* Return
36873687
* The helper returns **TC_ACT_REDIRECT** on success or
36883688
* **TC_ACT_SHOT** on error.
3689+
*
3690+
* void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu)
3691+
* Description
3692+
* Take a pointer to a percpu ksym, *percpu_ptr*, and return a
3693+
* pointer to the percpu kernel variable on *cpu*. A ksym is an
3694+
* extern variable decorated with '__ksym'. For ksym, there is a
3695+
* global var (either static or global) defined of the same name
3696+
* in the kernel. The ksym is percpu if the global var is percpu.
3697+
* The returned pointer points to the global percpu var on *cpu*.
3698+
*
3699+
* bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
3700+
* kernel, except that bpf_per_cpu_ptr() may return NULL. This
3701+
* happens if *cpu* is larger than nr_cpu_ids. The caller of
3702+
* bpf_per_cpu_ptr() must check the returned value.
3703+
* Return
3704+
* A pointer pointing to the kernel percpu variable on *cpu*, or
3705+
* NULL, if *cpu* is invalid.
36893706
*/
36903707
#define __BPF_FUNC_MAPPER(FN) \
36913708
FN(unspec), \
@@ -3841,6 +3858,7 @@ union bpf_attr {
38413858
FN(seq_printf_btf), \
38423859
FN(skb_cgroup_classid), \
38433860
FN(redirect_neigh), \
3861+
FN(bpf_per_cpu_ptr), \
38443862
/* */
38453863

38463864
/* integer value in 'imm' field of BPF_CALL instruction selects which helper

kernel/bpf/btf.c

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -188,11 +188,6 @@
188188
i < btf_type_vlen(struct_type); \
189189
i++, member++)
190190

191-
#define for_each_vsi(i, struct_type, member) \
192-
for (i = 0, member = btf_type_var_secinfo(struct_type); \
193-
i < btf_type_vlen(struct_type); \
194-
i++, member++)
195-
196191
#define for_each_vsi_from(i, from, struct_type, member) \
197192
for (i = from, member = btf_type_var_secinfo(struct_type) + from; \
198193
i < btf_type_vlen(struct_type); \
@@ -598,11 +593,6 @@ static const struct btf_var *btf_type_var(const struct btf_type *t)
598593
return (const struct btf_var *)(t + 1);
599594
}
600595

601-
static const struct btf_var_secinfo *btf_type_var_secinfo(const struct btf_type *t)
602-
{
603-
return (const struct btf_var_secinfo *)(t + 1);
604-
}
605-
606596
static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
607597
{
608598
return kind_ops[BTF_INFO_KIND(t->info)];

kernel/bpf/helpers.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,22 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
623623
.arg3_type = ARG_ANYTHING,
624624
};
625625

626+
BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
627+
{
628+
if (cpu >= nr_cpu_ids)
629+
return (unsigned long)NULL;
630+
631+
return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu);
632+
}
633+
634+
const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
635+
.func = bpf_per_cpu_ptr,
636+
.gpl_only = false,
637+
.ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL,
638+
.arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
639+
.arg2_type = ARG_ANYTHING,
640+
};
641+
626642
const struct bpf_func_proto bpf_get_current_task_proto __weak;
627643
const struct bpf_func_proto bpf_probe_read_user_proto __weak;
628644
const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
@@ -689,6 +705,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
689705
return &bpf_snprintf_btf_proto;
690706
case BPF_FUNC_jiffies64:
691707
return &bpf_jiffies64_proto;
708+
case BPF_FUNC_bpf_per_cpu_ptr:
709+
return &bpf_per_cpu_ptr_proto;
692710
default:
693711
break;
694712
}

kernel/bpf/verifier.c

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,8 @@ struct bpf_call_arg_meta {
238238
u64 msize_max_value;
239239
int ref_obj_id;
240240
int func_id;
241+
u32 btf_id;
242+
u32 ret_btf_id;
241243
};
242244

243245
struct btf *btf_vmlinux;
@@ -517,6 +519,7 @@ static const char * const reg_type_str[] = {
517519
[PTR_TO_XDP_SOCK] = "xdp_sock",
518520
[PTR_TO_BTF_ID] = "ptr_",
519521
[PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_",
522+
[PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_",
520523
[PTR_TO_MEM] = "mem",
521524
[PTR_TO_MEM_OR_NULL] = "mem_or_null",
522525
[PTR_TO_RDONLY_BUF] = "rdonly_buf",
@@ -583,7 +586,9 @@ static void print_verifier_state(struct bpf_verifier_env *env,
583586
/* reg->off should be 0 for SCALAR_VALUE */
584587
verbose(env, "%lld", reg->var_off.value + reg->off);
585588
} else {
586-
if (t == PTR_TO_BTF_ID || t == PTR_TO_BTF_ID_OR_NULL)
589+
if (t == PTR_TO_BTF_ID ||
590+
t == PTR_TO_BTF_ID_OR_NULL ||
591+
t == PTR_TO_PERCPU_BTF_ID)
587592
verbose(env, "%s", kernel_type_name(reg->btf_id));
588593
verbose(env, "(id=%d", reg->id);
589594
if (reg_type_may_be_refcounted_or_null(t))
@@ -2204,6 +2209,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
22042209
case PTR_TO_RDONLY_BUF_OR_NULL:
22052210
case PTR_TO_RDWR_BUF:
22062211
case PTR_TO_RDWR_BUF_OR_NULL:
2212+
case PTR_TO_PERCPU_BTF_ID:
22072213
return true;
22082214
default:
22092215
return false;
@@ -4017,6 +4023,7 @@ static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
40174023
static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
40184024
static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
40194025
static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
4026+
static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
40204027

40214028
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
40224029
[ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
@@ -4042,6 +4049,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
40424049
[ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types,
40434050
[ARG_PTR_TO_INT] = &int_ptr_types,
40444051
[ARG_PTR_TO_LONG] = &int_ptr_types,
4052+
[ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
40454053
};
40464054

40474055
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
@@ -4205,6 +4213,12 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
42054213
err = check_helper_mem_access(env, regno,
42064214
meta->map_ptr->value_size, false,
42074215
meta);
4216+
} else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
4217+
if (!reg->btf_id) {
4218+
verbose(env, "Helper has invalid btf_id in R%d\n", regno);
4219+
return -EACCES;
4220+
}
4221+
meta->ret_btf_id = reg->btf_id;
42084222
} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
42094223
if (meta->func_id == BPF_FUNC_spin_lock) {
42104224
if (process_spin_lock(env, regno, true))
@@ -5114,6 +5128,30 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
51145128
regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
51155129
regs[BPF_REG_0].id = ++env->id_gen;
51165130
regs[BPF_REG_0].mem_size = meta.mem_size;
5131+
} else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL) {
5132+
const struct btf_type *t;
5133+
5134+
mark_reg_known_zero(env, regs, BPF_REG_0);
5135+
t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL);
5136+
if (!btf_type_is_struct(t)) {
5137+
u32 tsize;
5138+
const struct btf_type *ret;
5139+
const char *tname;
5140+
5141+
/* resolve the type size of ksym. */
5142+
ret = btf_resolve_size(btf_vmlinux, t, &tsize);
5143+
if (IS_ERR(ret)) {
5144+
tname = btf_name_by_offset(btf_vmlinux, t->name_off);
5145+
verbose(env, "unable to resolve the size of type '%s': %ld\n",
5146+
tname, PTR_ERR(ret));
5147+
return -EINVAL;
5148+
}
5149+
regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
5150+
regs[BPF_REG_0].mem_size = tsize;
5151+
} else {
5152+
regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
5153+
regs[BPF_REG_0].btf_id = meta.ret_btf_id;
5154+
}
51175155
} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
51185156
int ret_btf_id;
51195157

@@ -7523,6 +7561,7 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
75237561
dst_reg->mem_size = aux->btf_var.mem_size;
75247562
break;
75257563
case PTR_TO_BTF_ID:
7564+
case PTR_TO_PERCPU_BTF_ID:
75267565
dst_reg->btf_id = aux->btf_var.btf_id;
75277566
break;
75287567
default:
@@ -9449,10 +9488,14 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
94499488
struct bpf_insn *insn,
94509489
struct bpf_insn_aux_data *aux)
94519490
{
9452-
u32 type, id = insn->imm;
9491+
u32 datasec_id, type, id = insn->imm;
9492+
const struct btf_var_secinfo *vsi;
9493+
const struct btf_type *datasec;
94539494
const struct btf_type *t;
94549495
const char *sym_name;
9496+
bool percpu = false;
94559497
u64 addr;
9498+
int i;
94569499

94579500
if (!btf_vmlinux) {
94589501
verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
@@ -9484,12 +9527,27 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
94849527
return -ENOENT;
94859528
}
94869529

9530+
datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu",
9531+
BTF_KIND_DATASEC);
9532+
if (datasec_id > 0) {
9533+
datasec = btf_type_by_id(btf_vmlinux, datasec_id);
9534+
for_each_vsi(i, datasec, vsi) {
9535+
if (vsi->type == id) {
9536+
percpu = true;
9537+
break;
9538+
}
9539+
}
9540+
}
9541+
94879542
insn[0].imm = (u32)addr;
94889543
insn[1].imm = addr >> 32;
94899544

94909545
type = t->type;
94919546
t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
9492-
if (!btf_type_is_struct(t)) {
9547+
if (percpu) {
9548+
aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
9549+
aux->btf_var.btf_id = type;
9550+
} else if (!btf_type_is_struct(t)) {
94939551
const struct btf_type *ret;
94949552
const char *tname;
94959553
u32 tsize;

kernel/trace/bpf_trace.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1327,6 +1327,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
13271327
return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
13281328
case BPF_FUNC_snprintf_btf:
13291329
return &bpf_snprintf_btf_proto;
1330+
case BPF_FUNC_bpf_per_cpu_ptr:
1331+
return &bpf_per_cpu_ptr_proto;
13301332
default:
13311333
return NULL;
13321334
}

tools/include/uapi/linux/bpf.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3686,6 +3686,23 @@ union bpf_attr {
36863686
* Return
36873687
* The helper returns **TC_ACT_REDIRECT** on success or
36883688
* **TC_ACT_SHOT** on error.
3689+
*
3690+
* void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu)
3691+
* Description
3692+
* Take a pointer to a percpu ksym, *percpu_ptr*, and return a
3693+
* pointer to the percpu kernel variable on *cpu*. A ksym is an
3694+
* extern variable decorated with '__ksym'. For ksym, there is a
3695+
* global var (either static or global) defined of the same name
3696+
* in the kernel. The ksym is percpu if the global var is percpu.
3697+
* The returned pointer points to the global percpu var on *cpu*.
3698+
*
3699+
* bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
3700+
* kernel, except that bpf_per_cpu_ptr() may return NULL. This
3701+
* happens if *cpu* is larger than nr_cpu_ids. The caller of
3702+
* bpf_per_cpu_ptr() must check the returned value.
3703+
* Return
3704+
* A pointer pointing to the kernel percpu variable on *cpu*, or
3705+
* NULL, if *cpu* is invalid.
36893706
*/
36903707
#define __BPF_FUNC_MAPPER(FN) \
36913708
FN(unspec), \
@@ -3841,6 +3858,7 @@ union bpf_attr {
38413858
FN(seq_printf_btf), \
38423859
FN(skb_cgroup_classid), \
38433860
FN(redirect_neigh), \
3861+
FN(bpf_per_cpu_ptr), \
38443862
/* */
38453863

38463864
/* integer value in 'imm' field of BPF_CALL instruction selects which helper

0 commit comments

Comments
 (0)