diff options
| -rw-r--r-- | Documentation/admin-guide/kernel-parameters.txt | 2 | ||||
| -rw-r--r-- | Documentation/trace/debugging.rst | 2 | ||||
| -rw-r--r-- | kernel/trace/ring_buffer.c | 5 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 66 | ||||
| -rw-r--r-- | kernel/trace/trace.h | 1 | 
5 files changed, 50 insertions, 26 deletions
| diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 3f35d5b8c296..f5af86b3c4a2 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -7288,6 +7288,8 @@  			This is just one of many ways that can clear memory. Make sure your system  			keeps the content of memory across reboots before relying on this option. +			NB: Both the mapped address and size must be page aligned for the architecture. +  			See also Documentation/trace/debugging.rst diff --git a/Documentation/trace/debugging.rst b/Documentation/trace/debugging.rst index 54fb16239d70..d54bc500af80 100644 --- a/Documentation/trace/debugging.rst +++ b/Documentation/trace/debugging.rst @@ -136,6 +136,8 @@ kernel, so only the same kernel is guaranteed to work if the mapping is  preserved. Switching to a different kernel version may find a different  layout and mark the buffer as invalid. +NB: Both the mapped address and size must be page aligned for the architecture. +  Using trace_printk() in the boot instance  -----------------------------------------  By default, the content of trace_printk() goes into the top level tracing diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d8d7b28e2c2f..c0f877d39a24 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -6016,7 +6016,7 @@ static void rb_update_meta_page(struct ring_buffer_per_cpu *cpu_buffer)  	meta->read = cpu_buffer->read;  	/* Some archs do not have data cache coherency between kernel and user-space */ -	flush_dcache_folio(virt_to_folio(cpu_buffer->meta_page)); +	flush_kernel_vmap_range(cpu_buffer->meta_page, PAGE_SIZE);  }  static void @@ -7319,7 +7319,8 @@ consume:  out:  	/* Some archs do not have data cache coherency between kernel and user-space */ -	flush_dcache_folio(virt_to_folio(cpu_buffer->reader_page->page)); +	flush_kernel_vmap_range(cpu_buffer->reader_page->page, +				buffer->subbuf_size + BUF_PAGE_HDR_SIZE);  	rb_update_meta_page(cpu_buffer); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d058b519d502..b581e388a9d9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -50,6 +50,7 @@  #include <linux/irq_work.h>  #include <linux/workqueue.h>  #include <linux/sort.h> +#include <linux/io.h> /* vmap_page_range() */  #include <asm/setup.h> /* COMMAND_LINE_SIZE */ @@ -8500,6 +8501,10 @@ static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)  	struct trace_iterator *iter = &info->iter;  	int ret = 0; +	/* A memmap'ed buffer is not supported for user space mmap */ +	if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP) +		return -ENODEV; +  	/* Currently the boot mapped buffer is not supported for mmap */  	if (iter->tr->flags & TRACE_ARRAY_FL_BOOT)  		return -ENODEV; @@ -9609,9 +9614,6 @@ static void free_trace_buffers(struct trace_array *tr)  #ifdef CONFIG_TRACER_MAX_TRACE  	free_trace_buffer(&tr->max_buffer);  #endif - -	if (tr->range_addr_start) -		vunmap((void *)tr->range_addr_start);  }  static void init_trace_flags_index(struct trace_array *tr) @@ -9804,29 +9806,27 @@ static int instance_mkdir(const char *name)  	return ret;  } -static u64 map_pages(u64 start, u64 size) +static u64 map_pages(unsigned long start, unsigned long size)  { -	struct page **pages; -	phys_addr_t page_start; -	unsigned int page_count; -	unsigned int i; -	void *vaddr; - -	page_count = DIV_ROUND_UP(size, PAGE_SIZE); +	unsigned long vmap_start, vmap_end; +	struct vm_struct *area; +	int ret; -	page_start = start; -	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL); -	if (!pages) +	area = get_vm_area(size, VM_IOREMAP); +	if (!area)  		return 0; -	for (i = 0; i < page_count; i++) { -		phys_addr_t addr = page_start + i * PAGE_SIZE; -		pages[i] = pfn_to_page(addr >> PAGE_SHIFT); +	vmap_start = (unsigned long) area->addr; +	vmap_end = vmap_start + size; + +	ret = vmap_page_range(vmap_start, vmap_end, +			      start, pgprot_nx(PAGE_KERNEL)); +	if (ret < 0) { +		free_vm_area(area); +		return 0;  	} -	vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL); -	kfree(pages); -	return (u64)(unsigned long)vaddr; +	return (u64)vmap_start;  }  /** @@ -10705,6 +10705,7 @@ static inline void do_allocate_snapshot(const char *name) { }  __init static void enable_instances(void)  {  	struct trace_array *tr; +	bool memmap_area = false;  	char *curr_str;  	char *name;  	char *str; @@ -10773,6 +10774,7 @@ __init static void enable_instances(void)  					name);  				continue;  			} +			memmap_area = true;  		} else if (tok) {  			if (!reserve_mem_find_by_name(tok, &start, &size)) {  				start = 0; @@ -10783,7 +10785,20 @@ __init static void enable_instances(void)  		}  		if (start) { -			addr = map_pages(start, size); +			/* Start and size must be page aligned */ +			if (start & ~PAGE_MASK) { +				pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start); +				continue; +			} +			if (size & ~PAGE_MASK) { +				pr_warn("Tracing: mapping size %pa is not page aligned\n", &size); +				continue; +			} + +			if (memmap_area) +				addr = map_pages(start, size); +			else +				addr = (unsigned long)phys_to_virt(start);  			if (addr) {  				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",  					name, &start, (unsigned long)size); @@ -10810,10 +10825,13 @@ __init static void enable_instances(void)  			update_printk_trace(tr);  		/* -		 * If start is set, then this is a mapped buffer, and -		 * cannot be deleted by user space, so keep the reference -		 * to it. +		 * memmap'd buffers can not be freed.  		 */ +		if (memmap_area) { +			tr->flags |= TRACE_ARRAY_FL_MEMMAP; +			tr->ref++; +		} +  		if (start) {  			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;  			tr->range_name = no_free_ptr(rname); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f4f859d69bcf..79be1995db44 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -447,6 +447,7 @@ enum {  	TRACE_ARRAY_FL_BOOT		= BIT(1),  	TRACE_ARRAY_FL_LAST_BOOT	= BIT(2),  	TRACE_ARRAY_FL_MOD_INIT		= BIT(3), +	TRACE_ARRAY_FL_MEMMAP		= BIT(4),  };  #ifdef CONFIG_MODULES | 
