diff options
Diffstat (limited to 'kernel/bpf/hashtab.c')
| -rw-r--r-- | kernel/bpf/hashtab.c | 84 | 
1 files changed, 73 insertions, 11 deletions
| diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index fff3650d52fc..570eeca7bdfa 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -26,11 +26,18 @@ struct bpf_htab {  	struct bucket *buckets;  	void *elems;  	struct pcpu_freelist freelist; +	void __percpu *extra_elems;  	atomic_t count;	/* number of elements in this hashtable */  	u32 n_buckets;	/* number of hash buckets */  	u32 elem_size;	/* size of each element in bytes */  }; +enum extra_elem_state { +	HTAB_NOT_AN_EXTRA_ELEM = 0, +	HTAB_EXTRA_ELEM_FREE, +	HTAB_EXTRA_ELEM_USED +}; +  /* each htab element is struct htab_elem + key + value */  struct htab_elem {  	union { @@ -38,7 +45,10 @@ struct htab_elem {  		struct bpf_htab *htab;  		struct pcpu_freelist_node fnode;  	}; -	struct rcu_head rcu; +	union { +		struct rcu_head rcu; +		enum extra_elem_state state; +	};  	u32 hash;  	char key[0] __aligned(8);  }; @@ -113,6 +123,23 @@ free_elems:  	return err;  } +static int alloc_extra_elems(struct bpf_htab *htab) +{ +	void __percpu *pptr; +	int cpu; + +	pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN); +	if (!pptr) +		return -ENOMEM; + +	for_each_possible_cpu(cpu) { +		((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state = +			HTAB_EXTRA_ELEM_FREE; +	} +	htab->extra_elems = pptr; +	return 0; +} +  /* Called from syscall */  static struct bpf_map *htab_map_alloc(union bpf_attr *attr)  { @@ -185,6 +212,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)  	if (percpu)  		cost += (u64) round_up(htab->map.value_size, 8) *  			num_possible_cpus() * htab->map.max_entries; +	else +	       cost += (u64) htab->elem_size * num_possible_cpus();  	if (cost >= U32_MAX - PAGE_SIZE)  		/* make sure page count doesn't overflow */ @@ -212,14 +241,22 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)  		raw_spin_lock_init(&htab->buckets[i].lock);  	} +	if (!percpu) { +		err = alloc_extra_elems(htab); +		if (err) +			goto free_buckets; +	} +  	if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {  		err = prealloc_elems_and_freelist(htab);  		if (err) -			goto free_buckets; +			goto free_extra_elems;  	}  	return &htab->map; +free_extra_elems: +	free_percpu(htab->extra_elems);  free_buckets:  	kvfree(htab->buckets);  free_htab: @@ -349,7 +386,6 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)  	if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)  		free_percpu(htab_elem_get_ptr(l, htab->map.key_size));  	kfree(l); -  }  static void htab_elem_free_rcu(struct rcu_head *head) @@ -370,6 +406,11 @@ static void htab_elem_free_rcu(struct rcu_head *head)  static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)  { +	if (l->state == HTAB_EXTRA_ELEM_USED) { +		l->state = HTAB_EXTRA_ELEM_FREE; +		return; +	} +  	if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {  		pcpu_freelist_push(&htab->freelist, &l->fnode);  	} else { @@ -381,25 +422,44 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)  static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,  					 void *value, u32 key_size, u32 hash, -					 bool percpu, bool onallcpus) +					 bool percpu, bool onallcpus, +					 bool old_elem_exists)  {  	u32 size = htab->map.value_size;  	bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);  	struct htab_elem *l_new;  	void __percpu *pptr; +	int err = 0;  	if (prealloc) {  		l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);  		if (!l_new) -			return ERR_PTR(-E2BIG); +			err = -E2BIG;  	} else {  		if (atomic_inc_return(&htab->count) > htab->map.max_entries) {  			atomic_dec(&htab->count); -			return ERR_PTR(-E2BIG); +			err = -E2BIG; +		} else { +			l_new = kmalloc(htab->elem_size, +					GFP_ATOMIC | __GFP_NOWARN); +			if (!l_new) +				return ERR_PTR(-ENOMEM);  		} -		l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN); -		if (!l_new) -			return ERR_PTR(-ENOMEM); +	} + +	if (err) { +		if (!old_elem_exists) +			return ERR_PTR(err); + +		/* if we're updating the existing element and the hash table +		 * is full, use per-cpu extra elems +		 */ +		l_new = this_cpu_ptr(htab->extra_elems); +		if (l_new->state != HTAB_EXTRA_ELEM_FREE) +			return ERR_PTR(-E2BIG); +		l_new->state = HTAB_EXTRA_ELEM_USED; +	} else { +		l_new->state = HTAB_NOT_AN_EXTRA_ELEM;  	}  	memcpy(l_new->key, key, key_size); @@ -489,7 +549,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,  	if (ret)  		goto err; -	l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false); +	l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, +				!!l_old);  	if (IS_ERR(l_new)) {  		/* all pre-allocated elements are in use or memory exhausted */  		ret = PTR_ERR(l_new); @@ -563,7 +624,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,  		}  	} else {  		l_new = alloc_htab_elem(htab, key, value, key_size, -					hash, true, onallcpus); +					hash, true, onallcpus, false);  		if (IS_ERR(l_new)) {  			ret = PTR_ERR(l_new);  			goto err; @@ -652,6 +713,7 @@ static void htab_map_free(struct bpf_map *map)  		htab_free_elems(htab);  		pcpu_freelist_destroy(&htab->freelist);  	} +	free_percpu(htab->extra_elems);  	kvfree(htab->buckets);  	kfree(htab);  } | 
