diff options
Diffstat (limited to 'arch/x86/kernel')
| -rw-r--r-- | arch/x86/kernel/aperture_64.c | 59 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/hw_nmi.c | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 22 | ||||
| -rw-r--r-- | arch/x86/kernel/uprobes.c | 551 | 
6 files changed, 401 insertions, 235 deletions
| diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 9fa8aa051f54..76164e173a24 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -10,6 +10,8 @@   *   * Copyright 2002 Andi Kleen, SuSE Labs.   */ +#define pr_fmt(fmt) "AGP: " fmt +  #include <linux/kernel.h>  #include <linux/types.h>  #include <linux/init.h> @@ -75,14 +77,13 @@ static u32 __init allocate_aperture(void)  	addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR,  				      aper_size, aper_size);  	if (!addr) { -		printk(KERN_ERR -			"Cannot allocate aperture memory hole (%lx,%uK)\n", -				addr, aper_size>>10); +		pr_err("Cannot allocate aperture memory hole [mem %#010lx-%#010lx] (%uKB)\n", +		       addr, addr + aper_size - 1, aper_size >> 10);  		return 0;  	}  	memblock_reserve(addr, aper_size); -	printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", -			aper_size >> 10, addr); +	pr_info("Mapping aperture over RAM [mem %#010lx-%#010lx] (%uKB)\n", +		addr, addr + aper_size - 1, aper_size >> 10);  	register_nosave_region(addr >> PAGE_SHIFT,  			       (addr+aper_size) >> PAGE_SHIFT); @@ -126,10 +127,11 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order)  	u64 aper;  	u32 old_order; -	printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", bus, slot, func); +	pr_info("pci 0000:%02x:%02x:%02x: AGP bridge\n", bus, slot, func);  	apsizereg = read_pci_config_16(bus, slot, func, cap + 0x14);  	if (apsizereg == 0xffffffff) { -		printk(KERN_ERR "APSIZE in AGP bridge unreadable\n"); +		pr_err("pci 0000:%02x:%02x.%d: APSIZE unreadable\n", +		       bus, slot, func);  		return 0;  	} @@ -153,16 +155,18 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order)  	 * On some sick chips, APSIZE is 0. It means it wants 4G  	 * so let double check that order, and lets trust AMD NB settings:  	 */ -	printk(KERN_INFO "Aperture from AGP @ %Lx old size %u MB\n", -			aper, 32 << old_order); +	pr_info("pci 0000:%02x:%02x.%d: AGP aperture [bus addr %#010Lx-%#010Lx] (old size %uMB)\n", +		bus, slot, func, aper, aper + (32ULL << (old_order + 20)) - 1, +		32 << old_order);  	if (aper + (32ULL<<(20 + *order)) > 0x100000000ULL) { -		printk(KERN_INFO "Aperture size %u MB (APSIZE %x) is not right, using settings from NB\n", -				32 << *order, apsizereg); +		pr_info("pci 0000:%02x:%02x.%d: AGP aperture size %uMB (APSIZE %#x) is not right, using settings from NB\n", +			bus, slot, func, 32 << *order, apsizereg);  		*order = old_order;  	} -	printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", -			aper, 32 << *order, apsizereg); +	pr_info("pci 0000:%02x:%02x.%d: AGP aperture [bus addr %#010Lx-%#010Lx] (%uMB, APSIZE %#x)\n", +		bus, slot, func, aper, aper + (32ULL << (*order + 20)) - 1, +		32 << *order, apsizereg);  	if (!aperture_valid(aper, (32*1024*1024) << *order, 32<<20))  		return 0; @@ -218,7 +222,7 @@ static u32 __init search_agp_bridge(u32 *order, int *valid_agp)  			}  		}  	} -	printk(KERN_INFO "No AGP bridge found\n"); +	pr_info("No AGP bridge found\n");  	return 0;  } @@ -310,7 +314,8 @@ void __init early_gart_iommu_check(void)  		if (e820_any_mapped(aper_base, aper_base + aper_size,  				    E820_RAM)) {  			/* reserve it, so we can reuse it in second kernel */ -			printk(KERN_INFO "update e820 for GART\n"); +			pr_info("e820: reserve [mem %#010Lx-%#010Lx] for GART\n", +				aper_base, aper_base + aper_size - 1);  			e820_add_region(aper_base, aper_size, E820_RESERVED);  			update_e820();  		} @@ -354,7 +359,7 @@ int __init gart_iommu_hole_init(void)  	    !early_pci_allowed())  		return -ENODEV; -	printk(KERN_INFO  "Checking aperture...\n"); +	pr_info("Checking aperture...\n");  	if (!fallback_aper_force)  		agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp); @@ -395,8 +400,9 @@ int __init gart_iommu_hole_init(void)  			aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff;  			aper_base <<= 25; -			printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n", -					node, aper_base, aper_size >> 20); +			pr_info("Node %d: aperture [bus addr %#010Lx-%#010Lx] (%uMB)\n", +				node, aper_base, aper_base + aper_size - 1, +				aper_size >> 20);  			node++;  			if (!aperture_valid(aper_base, aper_size, 64<<20)) { @@ -407,9 +413,9 @@ int __init gart_iommu_hole_init(void)  					if (!no_iommu &&  					    max_pfn > MAX_DMA32_PFN &&  					    !printed_gart_size_msg) { -						printk(KERN_ERR "you are using iommu with agp, but GART size is less than 64M\n"); -						printk(KERN_ERR "please increase GART size in your BIOS setup\n"); -						printk(KERN_ERR "if BIOS doesn't have that option, contact your HW vendor!\n"); +						pr_err("you are using iommu with agp, but GART size is less than 64MB\n"); +						pr_err("please increase GART size in your BIOS setup\n"); +						pr_err("if BIOS doesn't have that option, contact your HW vendor!\n");  						printed_gart_size_msg = 1;  					}  				} else { @@ -446,13 +452,10 @@ out:  		   force_iommu ||  		   valid_agp ||  		   fallback_aper_force) { -		printk(KERN_INFO -			"Your BIOS doesn't leave a aperture memory hole\n"); -		printk(KERN_INFO -			"Please enable the IOMMU option in the BIOS setup\n"); -		printk(KERN_INFO -			"This costs you %d MB of RAM\n", -				32 << fallback_aper_order); +		pr_info("Your BIOS doesn't leave a aperture memory hole\n"); +		pr_info("Please enable the IOMMU option in the BIOS setup\n"); +		pr_info("This costs you %dMB of RAM\n", +			32 << fallback_aper_order);  		aper_order = fallback_aper_order;  		aper_alloc = allocate_aperture(); diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index a698d7165c96..eab67047dec3 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -57,7 +57,7 @@ void arch_trigger_all_cpu_backtrace(void)  	}  	clear_bit(0, &backtrace_flag); -	smp_mb__after_clear_bit(); +	smp_mb__after_atomic();  }  static int __kprobes diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ae407f7226c8..89f3b7c1af20 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -721,6 +721,7 @@ int perf_assign_events(struct perf_event **events, int n,  	return sched.state.unassigned;  } +EXPORT_SYMBOL_GPL(perf_assign_events);  int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)  { diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index aa333d966886..adb02aa62af5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -169,7 +169,6 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =  {  	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */  	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ -	FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */  	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */  	EVENT_CONSTRAINT_END  }; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index ae96cfa5eddd..980970cb744d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -108,15 +108,31 @@ static u64 precise_store_data(u64 status)  	return val;  } -static u64 precise_store_data_hsw(u64 status) +static u64 precise_store_data_hsw(struct perf_event *event, u64 status)  {  	union perf_mem_data_src dse; +	u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK;  	dse.val = 0;  	dse.mem_op = PERF_MEM_OP_STORE;  	dse.mem_lvl = PERF_MEM_LVL_NA; + +	/* +	 * L1 info only valid for following events: +	 * +	 * MEM_UOPS_RETIRED.STLB_MISS_STORES +	 * MEM_UOPS_RETIRED.LOCK_STORES +	 * MEM_UOPS_RETIRED.SPLIT_STORES +	 * MEM_UOPS_RETIRED.ALL_STORES +	 */ +	if (cfg != 0x12d0 && cfg != 0x22d0 && cfg != 0x42d0 && cfg != 0x82d0) +		return dse.mem_lvl; +  	if (status & 1) -		dse.mem_lvl = PERF_MEM_LVL_L1; +		dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; +	else +		dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; +  	/* Nothing else supported. Sorry. */  	return dse.val;  } @@ -887,7 +903,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,  				data.data_src.val = load_latency_data(pebs->dse);  			else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)  				data.data_src.val = -					precise_store_data_hsw(pebs->dse); +					precise_store_data_hsw(event, pebs->dse);  			else  				data.data_src.val = precise_store_data(pebs->dse);  		} diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 2ed845928b5f..ace22916ade3 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -53,7 +53,7 @@  #define OPCODE1(insn)		((insn)->opcode.bytes[0])  #define OPCODE2(insn)		((insn)->opcode.bytes[1])  #define OPCODE3(insn)		((insn)->opcode.bytes[2]) -#define MODRM_REG(insn)		X86_MODRM_REG(insn->modrm.value) +#define MODRM_REG(insn)		X86_MODRM_REG((insn)->modrm.value)  #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\  	(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \ @@ -229,63 +229,6 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)  	return -ENOTSUPP;  } -/* - * Figure out which fixups arch_uprobe_post_xol() will need to perform, and - * annotate arch_uprobe->fixups accordingly.  To start with, - * arch_uprobe->fixups is either zero or it reflects rip-related fixups. - */ -static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) -{ -	bool fix_ip = true, fix_call = false;	/* defaults */ -	int reg; - -	insn_get_opcode(insn);	/* should be a nop */ - -	switch (OPCODE1(insn)) { -	case 0x9d: -		/* popf */ -		auprobe->fixups |= UPROBE_FIX_SETF; -		break; -	case 0xc3:		/* ret/lret */ -	case 0xcb: -	case 0xc2: -	case 0xca: -		/* ip is correct */ -		fix_ip = false; -		break; -	case 0xe8:		/* call relative - Fix return addr */ -		fix_call = true; -		break; -	case 0x9a:		/* call absolute - Fix return addr, not ip */ -		fix_call = true; -		fix_ip = false; -		break; -	case 0xff: -		insn_get_modrm(insn); -		reg = MODRM_REG(insn); -		if (reg == 2 || reg == 3) { -			/* call or lcall, indirect */ -			/* Fix return addr; ip is correct. */ -			fix_call = true; -			fix_ip = false; -		} else if (reg == 4 || reg == 5) { -			/* jmp or ljmp, indirect */ -			/* ip is correct. */ -			fix_ip = false; -		} -		break; -	case 0xea:		/* jmp absolute -- ip is correct */ -		fix_ip = false; -		break; -	default: -		break; -	} -	if (fix_ip) -		auprobe->fixups |= UPROBE_FIX_IP; -	if (fix_call) -		auprobe->fixups |= UPROBE_FIX_CALL; -} -  #ifdef CONFIG_X86_64  /*   * If arch_uprobe->insn doesn't use rip-relative addressing, return @@ -310,15 +253,11 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)   *  - The displacement is always 4 bytes.   */  static void -handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) +handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)  {  	u8 *cursor;  	u8 reg; -	if (mm->context.ia32_compat) -		return; - -	auprobe->rip_rela_target_address = 0x0;  	if (!insn_rip_relative(insn))  		return; @@ -372,7 +311,48 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct ins  		cursor++;  		memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes);  	} -	return; +} + +/* + * If we're emulating a rip-relative instruction, save the contents + * of the scratch register and store the target address in that register. + */ +static void +pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, +				struct arch_uprobe_task *autask) +{ +	if (auprobe->fixups & UPROBE_FIX_RIP_AX) { +		autask->saved_scratch_register = regs->ax; +		regs->ax = current->utask->vaddr; +		regs->ax += auprobe->rip_rela_target_address; +	} else if (auprobe->fixups & UPROBE_FIX_RIP_CX) { +		autask->saved_scratch_register = regs->cx; +		regs->cx = current->utask->vaddr; +		regs->cx += auprobe->rip_rela_target_address; +	} +} + +static void +handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) +{ +	if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { +		struct arch_uprobe_task *autask; + +		autask = ¤t->utask->autask; +		if (auprobe->fixups & UPROBE_FIX_RIP_AX) +			regs->ax = autask->saved_scratch_register; +		else +			regs->cx = autask->saved_scratch_register; + +		/* +		 * The original instruction includes a displacement, and so +		 * is 4 bytes longer than what we've just single-stepped. +		 * Caller may need to apply other fixups to handle stuff +		 * like "jmpq *...(%rip)" and "callq *...(%rip)". +		 */ +		if (correction) +			*correction += 4; +	}  }  static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) @@ -401,9 +381,19 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,  	return validate_insn_64bits(auprobe, insn);  }  #else /* 32-bit: */ -static void handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) +/* + * No RIP-relative addressing on 32-bit + */ +static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) +{ +} +static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, +				struct arch_uprobe_task *autask) +{ +} +static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, +					long *correction)  { -	/* No RIP-relative addressing on 32-bit */  }  static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,  struct insn *insn) @@ -412,141 +402,311 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,  }  #endif /* CONFIG_X86_64 */ -/** - * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. - * @mm: the probed address space. - * @arch_uprobe: the probepoint information. - * @addr: virtual address at which to install the probepoint - * Return 0 on success or a -ve number on error. +struct uprobe_xol_ops { +	bool	(*emulate)(struct arch_uprobe *, struct pt_regs *); +	int	(*pre_xol)(struct arch_uprobe *, struct pt_regs *); +	int	(*post_xol)(struct arch_uprobe *, struct pt_regs *); +}; + +static inline int sizeof_long(void) +{ +	return is_ia32_task() ? 4 : 8; +} + +static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ +	pre_xol_rip_insn(auprobe, regs, ¤t->utask->autask); +	return 0; +} + +/* + * Adjust the return address pushed by a call insn executed out of line.   */ -int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) +static int adjust_ret_addr(unsigned long sp, long correction)  { -	int ret; -	struct insn insn; +	int rasize = sizeof_long(); +	long ra; -	auprobe->fixups = 0; -	ret = validate_insn_bits(auprobe, mm, &insn); -	if (ret != 0) -		return ret; +	if (copy_from_user(&ra, (void __user *)sp, rasize)) +		return -EFAULT; -	handle_riprel_insn(auprobe, mm, &insn); -	prepare_fixups(auprobe, &insn); +	ra += correction; +	if (copy_to_user((void __user *)sp, &ra, rasize)) +		return -EFAULT;  	return 0;  } -#ifdef CONFIG_X86_64 -/* - * If we're emulating a rip-relative instruction, save the contents - * of the scratch register and store the target address in that register. - */ -static void -pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, -				struct arch_uprobe_task *autask) +static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)  { -	if (auprobe->fixups & UPROBE_FIX_RIP_AX) { -		autask->saved_scratch_register = regs->ax; -		regs->ax = current->utask->vaddr; -		regs->ax += auprobe->rip_rela_target_address; -	} else if (auprobe->fixups & UPROBE_FIX_RIP_CX) { -		autask->saved_scratch_register = regs->cx; -		regs->cx = current->utask->vaddr; -		regs->cx += auprobe->rip_rela_target_address; +	struct uprobe_task *utask = current->utask; +	long correction = (long)(utask->vaddr - utask->xol_vaddr); + +	handle_riprel_post_xol(auprobe, regs, &correction); +	if (auprobe->fixups & UPROBE_FIX_IP) +		regs->ip += correction; + +	if (auprobe->fixups & UPROBE_FIX_CALL) { +		if (adjust_ret_addr(regs->sp, correction)) { +			regs->sp += sizeof_long(); +			return -ERESTART; +		}  	} + +	return 0;  } -#else -static void -pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, -				struct arch_uprobe_task *autask) + +static struct uprobe_xol_ops default_xol_ops = { +	.pre_xol  = default_pre_xol_op, +	.post_xol = default_post_xol_op, +}; + +static bool branch_is_call(struct arch_uprobe *auprobe)  { -	/* No RIP-relative addressing on 32-bit */ +	return auprobe->branch.opc1 == 0xe8;  } -#endif -/* - * arch_uprobe_pre_xol - prepare to execute out of line. - * @auprobe: the probepoint information. - * @regs: reflects the saved user state of current task. - */ -int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) -{ -	struct arch_uprobe_task *autask; +#define CASE_COND					\ +	COND(70, 71, XF(OF))				\ +	COND(72, 73, XF(CF))				\ +	COND(74, 75, XF(ZF))				\ +	COND(78, 79, XF(SF))				\ +	COND(7a, 7b, XF(PF))				\ +	COND(76, 77, XF(CF) || XF(ZF))			\ +	COND(7c, 7d, XF(SF) != XF(OF))			\ +	COND(7e, 7f, XF(ZF) || XF(SF) != XF(OF)) -	autask = ¤t->utask->autask; -	autask->saved_trap_nr = current->thread.trap_nr; -	current->thread.trap_nr = UPROBE_TRAP_NR; -	regs->ip = current->utask->xol_vaddr; -	pre_xol_rip_insn(auprobe, regs, autask); +#define COND(op_y, op_n, expr)				\ +	case 0x ## op_y: DO((expr) != 0)		\ +	case 0x ## op_n: DO((expr) == 0) -	autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF); -	regs->flags |= X86_EFLAGS_TF; -	if (test_tsk_thread_flag(current, TIF_BLOCKSTEP)) -		set_task_blockstep(current, false); +#define XF(xf)	(!!(flags & X86_EFLAGS_ ## xf)) -	return 0; +static bool is_cond_jmp_opcode(u8 opcode) +{ +	switch (opcode) { +	#define DO(expr)	\ +		return true; +	CASE_COND +	#undef	DO + +	default: +		return false; +	}  } -/* - * This function is called by arch_uprobe_post_xol() to adjust the return - * address pushed by a call instruction executed out of line. - */ -static int adjust_ret_addr(unsigned long sp, long correction) +static bool check_jmp_cond(struct arch_uprobe *auprobe, struct pt_regs *regs)  { -	int rasize, ncopied; -	long ra = 0; +	unsigned long flags = regs->flags; -	if (is_ia32_task()) -		rasize = 4; -	else -		rasize = 8; +	switch (auprobe->branch.opc1) { +	#define DO(expr)	\ +		return expr; +	CASE_COND +	#undef	DO -	ncopied = copy_from_user(&ra, (void __user *)sp, rasize); -	if (unlikely(ncopied)) -		return -EFAULT; +	default:	/* not a conditional jmp */ +		return true; +	} +} -	ra += correction; -	ncopied = copy_to_user((void __user *)sp, &ra, rasize); -	if (unlikely(ncopied)) -		return -EFAULT; +#undef	XF +#undef	COND +#undef	CASE_COND -	return 0; +static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ +	unsigned long new_ip = regs->ip += auprobe->branch.ilen; +	unsigned long offs = (long)auprobe->branch.offs; + +	if (branch_is_call(auprobe)) { +		unsigned long new_sp = regs->sp - sizeof_long(); +		/* +		 * If it fails we execute this (mangled, see the comment in +		 * branch_clear_offset) insn out-of-line. In the likely case +		 * this should trigger the trap, and the probed application +		 * should die or restart the same insn after it handles the +		 * signal, arch_uprobe_post_xol() won't be even called. +		 * +		 * But there is corner case, see the comment in ->post_xol(). +		 */ +		if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long())) +			return false; +		regs->sp = new_sp; +	} else if (!check_jmp_cond(auprobe, regs)) { +		offs = 0; +	} + +	regs->ip = new_ip + offs; +	return true;  } -#ifdef CONFIG_X86_64 -static bool is_riprel_insn(struct arch_uprobe *auprobe) +static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)  { -	return ((auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) != 0); +	BUG_ON(!branch_is_call(auprobe)); +	/* +	 * We can only get here if branch_emulate_op() failed to push the ret +	 * address _and_ another thread expanded our stack before the (mangled) +	 * "call" insn was executed out-of-line. Just restore ->sp and restart. +	 * We could also restore ->ip and try to call branch_emulate_op() again. +	 */ +	regs->sp += sizeof_long(); +	return -ERESTART;  } -static void -handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) +static void branch_clear_offset(struct arch_uprobe *auprobe, struct insn *insn)  { -	if (is_riprel_insn(auprobe)) { -		struct arch_uprobe_task *autask; +	/* +	 * Turn this insn into "call 1f; 1:", this is what we will execute +	 * out-of-line if ->emulate() fails. We only need this to generate +	 * a trap, so that the probed task receives the correct signal with +	 * the properly filled siginfo. +	 * +	 * But see the comment in ->post_xol(), in the unlikely case it can +	 * succeed. So we need to ensure that the new ->ip can not fall into +	 * the non-canonical area and trigger #GP. +	 * +	 * We could turn it into (say) "pushf", but then we would need to +	 * divorce ->insn[] and ->ixol[]. We need to preserve the 1st byte +	 * of ->insn[] for set_orig_insn(). +	 */ +	memset(auprobe->insn + insn_offset_immediate(insn), +		0, insn->immediate.nbytes); +} -		autask = ¤t->utask->autask; -		if (auprobe->fixups & UPROBE_FIX_RIP_AX) -			regs->ax = autask->saved_scratch_register; -		else -			regs->cx = autask->saved_scratch_register; +static struct uprobe_xol_ops branch_xol_ops = { +	.emulate  = branch_emulate_op, +	.post_xol = branch_post_xol_op, +}; + +/* Returns -ENOSYS if branch_xol_ops doesn't handle this insn */ +static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) +{ +	u8 opc1 = OPCODE1(insn); + +	/* has the side-effect of processing the entire instruction */ +	insn_get_length(insn); +	if (WARN_ON_ONCE(!insn_complete(insn))) +		return -ENOEXEC; + +	switch (opc1) { +	case 0xeb:	/* jmp 8 */ +	case 0xe9:	/* jmp 32 */ +	case 0x90:	/* prefix* + nop; same as jmp with .offs = 0 */ +		break; + +	case 0xe8:	/* call relative */ +		branch_clear_offset(auprobe, insn); +		break; +	case 0x0f: +		if (insn->opcode.nbytes != 2) +			return -ENOSYS;  		/* -		 * The original instruction includes a displacement, and so -		 * is 4 bytes longer than what we've just single-stepped. -		 * Fall through to handle stuff like "jmpq *...(%rip)" and -		 * "callq *...(%rip)". +		 * If it is a "near" conditional jmp, OPCODE2() - 0x10 matches +		 * OPCODE1() of the "short" jmp which checks the same condition.  		 */ -		if (correction) -			*correction += 4; +		opc1 = OPCODE2(insn) - 0x10; +	default: +		if (!is_cond_jmp_opcode(opc1)) +			return -ENOSYS;  	} + +	auprobe->branch.opc1 = opc1; +	auprobe->branch.ilen = insn->length; +	auprobe->branch.offs = insn->immediate.value; + +	auprobe->ops = &branch_xol_ops; +	return 0;  } -#else -static void -handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) + +/** + * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. + * @mm: the probed address space. + * @arch_uprobe: the probepoint information. + * @addr: virtual address at which to install the probepoint + * Return 0 on success or a -ve number on error. + */ +int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) +{ +	struct insn insn; +	bool fix_ip = true, fix_call = false; +	int ret; + +	ret = validate_insn_bits(auprobe, mm, &insn); +	if (ret) +		return ret; + +	ret = branch_setup_xol_ops(auprobe, &insn); +	if (ret != -ENOSYS) +		return ret; + +	/* +	 * Figure out which fixups arch_uprobe_post_xol() will need to perform, +	 * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups +	 * is either zero or it reflects rip-related fixups. +	 */ +	switch (OPCODE1(&insn)) { +	case 0x9d:		/* popf */ +		auprobe->fixups |= UPROBE_FIX_SETF; +		break; +	case 0xc3:		/* ret or lret -- ip is correct */ +	case 0xcb: +	case 0xc2: +	case 0xca: +		fix_ip = false; +		break; +	case 0x9a:		/* call absolute - Fix return addr, not ip */ +		fix_call = true; +		fix_ip = false; +		break; +	case 0xea:		/* jmp absolute -- ip is correct */ +		fix_ip = false; +		break; +	case 0xff: +		insn_get_modrm(&insn); +		switch (MODRM_REG(&insn)) { +		case 2: case 3:			/* call or lcall, indirect */ +			fix_call = true; +		case 4: case 5:			/* jmp or ljmp, indirect */ +			fix_ip = false; +		} +		/* fall through */ +	default: +		handle_riprel_insn(auprobe, &insn); +	} + +	if (fix_ip) +		auprobe->fixups |= UPROBE_FIX_IP; +	if (fix_call) +		auprobe->fixups |= UPROBE_FIX_CALL; + +	auprobe->ops = &default_xol_ops; +	return 0; +} + +/* + * arch_uprobe_pre_xol - prepare to execute out of line. + * @auprobe: the probepoint information. + * @regs: reflects the saved user state of current task. + */ +int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)  { -	/* No RIP-relative addressing on 32-bit */ +	struct uprobe_task *utask = current->utask; + +	regs->ip = utask->xol_vaddr; +	utask->autask.saved_trap_nr = current->thread.trap_nr; +	current->thread.trap_nr = UPROBE_TRAP_NR; + +	utask->autask.saved_tf = !!(regs->flags & X86_EFLAGS_TF); +	regs->flags |= X86_EFLAGS_TF; +	if (test_tsk_thread_flag(current, TIF_BLOCKSTEP)) +		set_task_blockstep(current, false); + +	if (auprobe->ops->pre_xol) +		return auprobe->ops->pre_xol(auprobe, regs); +	return 0;  } -#endif  /*   * If xol insn itself traps and generates a signal(Say, @@ -592,22 +752,25 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t)   */  int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)  { -	struct uprobe_task *utask; -	long correction; -	int result = 0; +	struct uprobe_task *utask = current->utask;  	WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); -	utask = current->utask; -	current->thread.trap_nr = utask->autask.saved_trap_nr; -	correction = (long)(utask->vaddr - utask->xol_vaddr); -	handle_riprel_post_xol(auprobe, regs, &correction); -	if (auprobe->fixups & UPROBE_FIX_IP) -		regs->ip += correction; - -	if (auprobe->fixups & UPROBE_FIX_CALL) -		result = adjust_ret_addr(regs->sp, correction); +	if (auprobe->ops->post_xol) { +		int err = auprobe->ops->post_xol(auprobe, regs); +		if (err) { +			arch_uprobe_abort_xol(auprobe, regs); +			/* +			 * Restart the probed insn. ->post_xol() must ensure +			 * this is really possible if it returns -ERESTART. +			 */ +			if (err == -ERESTART) +				return 0; +			return err; +		} +	} +	current->thread.trap_nr = utask->autask.saved_trap_nr;  	/*  	 * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP  	 * so we can get an extra SIGTRAP if we do not clear TF. We need @@ -618,7 +781,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)  	else if (!(auprobe->fixups & UPROBE_FIX_SETF))  		regs->flags &= ~X86_EFLAGS_TF; -	return result; +	return 0;  }  /* callback routine for handling exceptions. */ @@ -652,8 +815,9 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,  /*   * This function gets called when XOL instruction either gets trapped or - * the thread has a fatal signal, so reset the instruction pointer to its - * probed address. + * the thread has a fatal signal, or if arch_uprobe_post_xol() failed. + * Reset the instruction pointer to its probed address for the potential + * restart or for post mortem analysis.   */  void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)  { @@ -668,25 +832,10 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)  		regs->flags &= ~X86_EFLAGS_TF;  } -/* - * Skip these instructions as per the currently known x86 ISA. - * rep=0x66*; nop=0x90 - */  static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)  { -	int i; - -	for (i = 0; i < MAX_UINSN_BYTES; i++) { -		if (auprobe->insn[i] == 0x66) -			continue; - -		if (auprobe->insn[i] == 0x90) { -			regs->ip += i + 1; -			return true; -		} - -		break; -	} +	if (auprobe->ops->emulate) +		return auprobe->ops->emulate(auprobe, regs);  	return false;  } @@ -701,23 +850,21 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)  unsigned long  arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs)  { -	int rasize, ncopied; +	int rasize = sizeof_long(), nleft;  	unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */ -	rasize = is_ia32_task() ? 4 : 8; -	ncopied = copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize); -	if (unlikely(ncopied)) +	if (copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize))  		return -1;  	/* check whether address has been already hijacked */  	if (orig_ret_vaddr == trampoline_vaddr)  		return orig_ret_vaddr; -	ncopied = copy_to_user((void __user *)regs->sp, &trampoline_vaddr, rasize); -	if (likely(!ncopied)) +	nleft = copy_to_user((void __user *)regs->sp, &trampoline_vaddr, rasize); +	if (likely(!nleft))  		return orig_ret_vaddr; -	if (ncopied != rasize) { +	if (nleft != rasize) {  		pr_err("uprobe: return address clobbered: pid=%d, %%sp=%#lx, "  			"%%ip=%#lx\n", current->pid, regs->sp, regs->ip); | 
