diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/core.c | 5 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 441 | 
2 files changed, 438 insertions, 8 deletions
| diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index e4248fe79513..d781b077431f 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -794,6 +794,11 @@ void __weak bpf_int_jit_compile(struct bpf_prog *prog)  {  } +bool __weak bpf_helper_changes_skb_data(void *func) +{ +	return false; +} +  /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call   * skb_copy_bits(), so provide a weak definition of it for NET-less config.   */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index afeb62808902..6338c61fc2a1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1,4 +1,5 @@  /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * Copyright (c) 2016 Facebook   *   * This program is free software; you can redistribute it and/or   * modify it under the terms of version 2 of the GNU General Public @@ -136,13 +137,32 @@ enum bpf_reg_type {  	FRAME_PTR,		 /* reg == frame_pointer */  	PTR_TO_STACK,		 /* reg == frame_pointer + imm */  	CONST_IMM,		 /* constant integer value */ + +	/* PTR_TO_PACKET represents: +	 * skb->data +	 * skb->data + imm +	 * skb->data + (u16) var +	 * skb->data + (u16) var + imm +	 * if (range > 0) then [ptr, ptr + range - off) is safe to access +	 * if (id > 0) means that some 'var' was added +	 * if (off > 0) menas that 'imm' was added +	 */ +	PTR_TO_PACKET, +	PTR_TO_PACKET_END,	 /* skb->data + headlen */  };  struct reg_state {  	enum bpf_reg_type type;  	union { -		/* valid when type == CONST_IMM | PTR_TO_STACK */ -		long imm; +		/* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ +		s64 imm; + +		/* valid when type == PTR_TO_PACKET* */ +		struct { +			u32 id; +			u16 off; +			u16 range; +		};  		/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |  		 *   PTR_TO_MAP_VALUE_OR_NULL @@ -247,6 +267,8 @@ static const char * const reg_type_str[] = {  	[FRAME_PTR]		= "fp",  	[PTR_TO_STACK]		= "fp",  	[CONST_IMM]		= "imm", +	[PTR_TO_PACKET]		= "pkt", +	[PTR_TO_PACKET_END]	= "pkt_end",  };  static void print_verifier_state(struct verifier_state *state) @@ -262,7 +284,12 @@ static void print_verifier_state(struct verifier_state *state)  			continue;  		verbose(" R%d=%s", i, reg_type_str[t]);  		if (t == CONST_IMM || t == PTR_TO_STACK) -			verbose("%ld", reg->imm); +			verbose("%lld", reg->imm); +		else if (t == PTR_TO_PACKET) +			verbose("(id=%d,off=%d,r=%d)", +				reg->id, reg->off, reg->range); +		else if (t == UNKNOWN_VALUE && reg->imm) +			verbose("%lld", reg->imm);  		else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE ||  			 t == PTR_TO_MAP_VALUE_OR_NULL)  			verbose("(ks=%d,vs=%d)", @@ -548,6 +575,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)  	case PTR_TO_MAP_VALUE_OR_NULL:  	case PTR_TO_STACK:  	case PTR_TO_CTX: +	case PTR_TO_PACKET: +	case PTR_TO_PACKET_END:  	case FRAME_PTR:  	case CONST_PTR_TO_MAP:  		return true; @@ -647,6 +676,27 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off,  	return 0;  } +#define MAX_PACKET_OFF 0xffff + +static int check_packet_access(struct verifier_env *env, u32 regno, int off, +			       int size) +{ +	struct reg_state *regs = env->cur_state.regs; +	struct reg_state *reg = ®s[regno]; +	int linear_size = (int) reg->range - (int) reg->off; + +	if (linear_size < 0 || linear_size >= MAX_PACKET_OFF) { +		verbose("verifier bug\n"); +		return -EFAULT; +	} +	if (off < 0 || off + size > linear_size) { +		verbose("invalid access to packet, off=%d size=%d, allowed=%d\n", +			off, size, linear_size); +		return -EACCES; +	} +	return 0; +} +  /* check access to 'struct bpf_context' fields */  static int check_ctx_access(struct verifier_env *env, int off, int size,  			    enum bpf_access_type t) @@ -677,6 +727,45 @@ static bool is_pointer_value(struct verifier_env *env, int regno)  	}  } +static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg, +			       int off, int size) +{ +	if (reg->type != PTR_TO_PACKET) { +		if (off % size != 0) { +			verbose("misaligned access off %d size %d\n", off, size); +			return -EACCES; +		} else { +			return 0; +		} +	} + +	switch (env->prog->type) { +	case BPF_PROG_TYPE_SCHED_CLS: +	case BPF_PROG_TYPE_SCHED_ACT: +		break; +	default: +		verbose("verifier is misconfigured\n"); +		return -EACCES; +	} + +	if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) +		/* misaligned access to packet is ok on x86,arm,arm64 */ +		return 0; + +	if (reg->id && size != 1) { +		verbose("Unknown packet alignment. Only byte-sized access allowed\n"); +		return -EACCES; +	} + +	/* skb->data is NET_IP_ALIGN-ed */ +	if ((NET_IP_ALIGN + reg->off + off) % size != 0) { +		verbose("misaligned packet access off %d+%d+%d size %d\n", +			NET_IP_ALIGN, reg->off, off, size); +		return -EACCES; +	} +	return 0; +} +  /* check whether memory at (regno + off) is accessible for t = (read | write)   * if t==write, value_regno is a register which value is stored into memory   * if t==read, value_regno is a register which will receive the value from memory @@ -698,10 +787,9 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,  	if (size < 0)  		return size; -	if (off % size != 0) { -		verbose("misaligned access off %d size %d\n", off, size); -		return -EACCES; -	} +	err = check_ptr_alignment(env, reg, off, size); +	if (err) +		return err;  	if (reg->type == PTR_TO_MAP_VALUE) {  		if (t == BPF_WRITE && value_regno >= 0 && @@ -720,8 +808,16 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,  			return -EACCES;  		}  		err = check_ctx_access(env, off, size, t); -		if (!err && t == BPF_READ && value_regno >= 0) +		if (!err && t == BPF_READ && value_regno >= 0) {  			mark_reg_unknown_value(state->regs, value_regno); +			if (off == offsetof(struct __sk_buff, data) && +			    env->allow_ptr_leaks) +				/* note that reg.[id|off|range] == 0 */ +				state->regs[value_regno].type = PTR_TO_PACKET; +			else if (off == offsetof(struct __sk_buff, data_end) && +				 env->allow_ptr_leaks) +				state->regs[value_regno].type = PTR_TO_PACKET_END; +		}  	} else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) {  		if (off >= 0 || off < -MAX_BPF_STACK) { @@ -739,11 +835,28 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,  		} else {  			err = check_stack_read(state, off, size, value_regno);  		} +	} else if (state->regs[regno].type == PTR_TO_PACKET) { +		if (t == BPF_WRITE) { +			verbose("cannot write into packet\n"); +			return -EACCES; +		} +		err = check_packet_access(env, regno, off, size); +		if (!err && t == BPF_READ && value_regno >= 0) +			mark_reg_unknown_value(state->regs, value_regno);  	} else {  		verbose("R%d invalid mem access '%s'\n",  			regno, reg_type_str[reg->type]);  		return -EACCES;  	} + +	if (!err && size <= 2 && value_regno >= 0 && env->allow_ptr_leaks && +	    state->regs[value_regno].type == UNKNOWN_VALUE) { +		/* 1 or 2 byte load zero-extends, determine the number of +		 * zero upper bits. Not doing it fo 4 byte load, since +		 * such values cannot be added to ptr_to_packet anyway. +		 */ +		state->regs[value_regno].imm = 64 - size * 8; +	}  	return err;  } @@ -1001,6 +1114,29 @@ static int check_raw_mode(const struct bpf_func_proto *fn)  	return count > 1 ? -EINVAL : 0;  } +static void clear_all_pkt_pointers(struct verifier_env *env) +{ +	struct verifier_state *state = &env->cur_state; +	struct reg_state *regs = state->regs, *reg; +	int i; + +	for (i = 0; i < MAX_BPF_REG; i++) +		if (regs[i].type == PTR_TO_PACKET || +		    regs[i].type == PTR_TO_PACKET_END) +			mark_reg_unknown_value(regs, i); + +	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { +		if (state->stack_slot_type[i] != STACK_SPILL) +			continue; +		reg = &state->spilled_regs[i / BPF_REG_SIZE]; +		if (reg->type != PTR_TO_PACKET && +		    reg->type != PTR_TO_PACKET_END) +			continue; +		reg->type = UNKNOWN_VALUE; +		reg->imm = 0; +	} +} +  static int check_call(struct verifier_env *env, int func_id)  {  	struct verifier_state *state = &env->cur_state; @@ -1008,6 +1144,7 @@ static int check_call(struct verifier_env *env, int func_id)  	struct reg_state *regs = state->regs;  	struct reg_state *reg;  	struct bpf_call_arg_meta meta; +	bool changes_data;  	int i, err;  	/* find function prototype */ @@ -1030,6 +1167,8 @@ static int check_call(struct verifier_env *env, int func_id)  		return -EINVAL;  	} +	changes_data = bpf_helper_changes_skb_data(fn->func); +  	memset(&meta, 0, sizeof(meta));  	/* We only support one arg being in raw mode at the moment, which @@ -1100,6 +1239,189 @@ static int check_call(struct verifier_env *env, int func_id)  	if (err)  		return err; +	if (changes_data) +		clear_all_pkt_pointers(env); +	return 0; +} + +static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn) +{ +	struct reg_state *regs = env->cur_state.regs; +	struct reg_state *dst_reg = ®s[insn->dst_reg]; +	struct reg_state *src_reg = ®s[insn->src_reg]; +	s32 imm; + +	if (BPF_SRC(insn->code) == BPF_K) { +		/* pkt_ptr += imm */ +		imm = insn->imm; + +add_imm: +		if (imm <= 0) { +			verbose("addition of negative constant to packet pointer is not allowed\n"); +			return -EACCES; +		} +		if (imm >= MAX_PACKET_OFF || +		    imm + dst_reg->off >= MAX_PACKET_OFF) { +			verbose("constant %d is too large to add to packet pointer\n", +				imm); +			return -EACCES; +		} +		/* a constant was added to pkt_ptr. +		 * Remember it while keeping the same 'id' +		 */ +		dst_reg->off += imm; +	} else { +		if (src_reg->type == CONST_IMM) { +			/* pkt_ptr += reg where reg is known constant */ +			imm = src_reg->imm; +			goto add_imm; +		} +		/* disallow pkt_ptr += reg +		 * if reg is not uknown_value with guaranteed zero upper bits +		 * otherwise pkt_ptr may overflow and addition will become +		 * subtraction which is not allowed +		 */ +		if (src_reg->type != UNKNOWN_VALUE) { +			verbose("cannot add '%s' to ptr_to_packet\n", +				reg_type_str[src_reg->type]); +			return -EACCES; +		} +		if (src_reg->imm < 48) { +			verbose("cannot add integer value with %lld upper zero bits to ptr_to_packet\n", +				src_reg->imm); +			return -EACCES; +		} +		/* dst_reg stays as pkt_ptr type and since some positive +		 * integer value was added to the pointer, increment its 'id' +		 */ +		dst_reg->id++; + +		/* something was added to pkt_ptr, set range and off to zero */ +		dst_reg->off = 0; +		dst_reg->range = 0; +	} +	return 0; +} + +static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn) +{ +	struct reg_state *regs = env->cur_state.regs; +	struct reg_state *dst_reg = ®s[insn->dst_reg]; +	u8 opcode = BPF_OP(insn->code); +	s64 imm_log2; + +	/* for type == UNKNOWN_VALUE: +	 * imm > 0 -> number of zero upper bits +	 * imm == 0 -> don't track which is the same as all bits can be non-zero +	 */ + +	if (BPF_SRC(insn->code) == BPF_X) { +		struct reg_state *src_reg = ®s[insn->src_reg]; + +		if (src_reg->type == UNKNOWN_VALUE && src_reg->imm > 0 && +		    dst_reg->imm && opcode == BPF_ADD) { +			/* dreg += sreg +			 * where both have zero upper bits. Adding them +			 * can only result making one more bit non-zero +			 * in the larger value. +			 * Ex. 0xffff (imm=48) + 1 (imm=63) = 0x10000 (imm=47) +			 *     0xffff (imm=48) + 0xffff = 0x1fffe (imm=47) +			 */ +			dst_reg->imm = min(dst_reg->imm, src_reg->imm); +			dst_reg->imm--; +			return 0; +		} +		if (src_reg->type == CONST_IMM && src_reg->imm > 0 && +		    dst_reg->imm && opcode == BPF_ADD) { +			/* dreg += sreg +			 * where dreg has zero upper bits and sreg is const. +			 * Adding them can only result making one more bit +			 * non-zero in the larger value. +			 */ +			imm_log2 = __ilog2_u64((long long)src_reg->imm); +			dst_reg->imm = min(dst_reg->imm, 63 - imm_log2); +			dst_reg->imm--; +			return 0; +		} +		/* all other cases non supported yet, just mark dst_reg */ +		dst_reg->imm = 0; +		return 0; +	} + +	/* sign extend 32-bit imm into 64-bit to make sure that +	 * negative values occupy bit 63. Note ilog2() would have +	 * been incorrect, since sizeof(insn->imm) == 4 +	 */ +	imm_log2 = __ilog2_u64((long long)insn->imm); + +	if (dst_reg->imm && opcode == BPF_LSH) { +		/* reg <<= imm +		 * if reg was a result of 2 byte load, then its imm == 48 +		 * which means that upper 48 bits are zero and shifting this reg +		 * left by 4 would mean that upper 44 bits are still zero +		 */ +		dst_reg->imm -= insn->imm; +	} else if (dst_reg->imm && opcode == BPF_MUL) { +		/* reg *= imm +		 * if multiplying by 14 subtract 4 +		 * This is conservative calculation of upper zero bits. +		 * It's not trying to special case insn->imm == 1 or 0 cases +		 */ +		dst_reg->imm -= imm_log2 + 1; +	} else if (opcode == BPF_AND) { +		/* reg &= imm */ +		dst_reg->imm = 63 - imm_log2; +	} else if (dst_reg->imm && opcode == BPF_ADD) { +		/* reg += imm */ +		dst_reg->imm = min(dst_reg->imm, 63 - imm_log2); +		dst_reg->imm--; +	} else if (opcode == BPF_RSH) { +		/* reg >>= imm +		 * which means that after right shift, upper bits will be zero +		 * note that verifier already checked that +		 * 0 <= imm < 64 for shift insn +		 */ +		dst_reg->imm += insn->imm; +		if (unlikely(dst_reg->imm > 64)) +			/* some dumb code did: +			 * r2 = *(u32 *)mem; +			 * r2 >>= 32; +			 * and all bits are zero now */ +			dst_reg->imm = 64; +	} else { +		/* all other alu ops, means that we don't know what will +		 * happen to the value, mark it with unknown number of zero bits +		 */ +		dst_reg->imm = 0; +	} + +	if (dst_reg->imm < 0) { +		/* all 64 bits of the register can contain non-zero bits +		 * and such value cannot be added to ptr_to_packet, since it +		 * may overflow, mark it as unknown to avoid further eval +		 */ +		dst_reg->imm = 0; +	} +	return 0; +} + +static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn) +{ +	struct reg_state *regs = env->cur_state.regs; +	struct reg_state *dst_reg = ®s[insn->dst_reg]; +	struct reg_state *src_reg = ®s[insn->src_reg]; +	u8 opcode = BPF_OP(insn->code); + +	/* dst_reg->type == CONST_IMM here, simulate execution of 'add' insn. +	 * Don't care about overflow or negative values, just add them +	 */ +	if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_K) +		dst_reg->imm += insn->imm; +	else if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_X && +		 src_reg->type == CONST_IMM) +		dst_reg->imm += src_reg->imm; +	else +		mark_reg_unknown_value(regs, insn->dst_reg);  	return 0;  } @@ -1245,6 +1567,21 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)  			dst_reg->type = PTR_TO_STACK;  			dst_reg->imm = insn->imm;  			return 0; +		} else if (opcode == BPF_ADD && +			   BPF_CLASS(insn->code) == BPF_ALU64 && +			   dst_reg->type == PTR_TO_PACKET) { +			/* ptr_to_packet += K|X */ +			return check_packet_ptr_add(env, insn); +		} else if (BPF_CLASS(insn->code) == BPF_ALU64 && +			   dst_reg->type == UNKNOWN_VALUE && +			   env->allow_ptr_leaks) { +			/* unknown += K|X */ +			return evaluate_reg_alu(env, insn); +		} else if (BPF_CLASS(insn->code) == BPF_ALU64 && +			   dst_reg->type == CONST_IMM && +			   env->allow_ptr_leaks) { +			/* reg_imm += K|X */ +			return evaluate_reg_imm_alu(env, insn);  		} else if (is_pointer_value(env, insn->dst_reg)) {  			verbose("R%d pointer arithmetic prohibited\n",  				insn->dst_reg); @@ -1263,6 +1600,34 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)  	return 0;  } +static void find_good_pkt_pointers(struct verifier_env *env, +				   struct reg_state *dst_reg) +{ +	struct verifier_state *state = &env->cur_state; +	struct reg_state *regs = state->regs, *reg; +	int i; +	/* r2 = r3; +	 * r2 += 8 +	 * if (r2 > pkt_end) goto somewhere +	 * r2 == dst_reg, pkt_end == src_reg, +	 * r2=pkt(id=n,off=8,r=0) +	 * r3=pkt(id=n,off=0,r=0) +	 * find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) +	 * so that range of bytes [r3, r3 + 8) is safe to access +	 */ +	for (i = 0; i < MAX_BPF_REG; i++) +		if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id) +			regs[i].range = dst_reg->off; + +	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { +		if (state->stack_slot_type[i] != STACK_SPILL) +			continue; +		reg = &state->spilled_regs[i / BPF_REG_SIZE]; +		if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id) +			reg->range = dst_reg->off; +	} +} +  static int check_cond_jmp_op(struct verifier_env *env,  			     struct bpf_insn *insn, int *insn_idx)  { @@ -1346,6 +1711,10 @@ static int check_cond_jmp_op(struct verifier_env *env,  			regs[insn->dst_reg].type = CONST_IMM;  			regs[insn->dst_reg].imm = 0;  		} +	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && +		   dst_reg->type == PTR_TO_PACKET && +		   regs[insn->src_reg].type == PTR_TO_PACKET_END) { +		find_good_pkt_pointers(env, dst_reg);  	} else if (is_pointer_value(env, insn->dst_reg)) {  		verbose("R%d pointer comparison prohibited\n", insn->dst_reg);  		return -EACCES; @@ -1685,6 +2054,58 @@ err_free:  	return ret;  } +/* the following conditions reduce the number of explored insns + * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet + */ +static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur) +{ +	if (old->id != cur->id) +		return false; + +	/* old ptr_to_packet is more conservative, since it allows smaller +	 * range. Ex: +	 * old(off=0,r=10) is equal to cur(off=0,r=20), because +	 * old(off=0,r=10) means that with range=10 the verifier proceeded +	 * further and found no issues with the program. Now we're in the same +	 * spot with cur(off=0,r=20), so we're safe too, since anything further +	 * will only be looking at most 10 bytes after this pointer. +	 */ +	if (old->off == cur->off && old->range < cur->range) +		return true; + +	/* old(off=20,r=10) is equal to cur(off=22,re=22 or 5 or 0) +	 * since both cannot be used for packet access and safe(old) +	 * pointer has smaller off that could be used for further +	 * 'if (ptr > data_end)' check +	 * Ex: +	 * old(off=20,r=10) and cur(off=22,r=22) and cur(off=22,r=0) mean +	 * that we cannot access the packet. +	 * The safe range is: +	 * [ptr, ptr + range - off) +	 * so whenever off >=range, it means no safe bytes from this pointer. +	 * When comparing old->off <= cur->off, it means that older code +	 * went with smaller offset and that offset was later +	 * used to figure out the safe range after 'if (ptr > data_end)' check +	 * Say, 'old' state was explored like: +	 * ... R3(off=0, r=0) +	 * R4 = R3 + 20 +	 * ... now R4(off=20,r=0)  <-- here +	 * if (R4 > data_end) +	 * ... R4(off=20,r=20), R3(off=0,r=20) and R3 can be used to access. +	 * ... the code further went all the way to bpf_exit. +	 * Now the 'cur' state at the mark 'here' has R4(off=30,r=0). +	 * old_R4(off=20,r=0) equal to cur_R4(off=30,r=0), since if the verifier +	 * goes further, such cur_R4 will give larger safe packet range after +	 * 'if (R4 > data_end)' and all further insn were already good with r=20, +	 * so they will be good with r=30 and we can prune the search. +	 */ +	if (old->off <= cur->off && +	    old->off >= old->range && cur->off >= cur->range) +		return true; + +	return false; +} +  /* compare two verifier states   *   * all states stored in state_list are known to be valid, since @@ -1727,6 +2148,10 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur)  		    (rold->type == UNKNOWN_VALUE && rcur->type != NOT_INIT))  			continue; +		if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET && +		    compare_ptrs_to_packet(rold, rcur)) +			continue; +  		return false;  	} | 
