| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
 | // SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright © 2015 Intel Corporation.
 *
 * Authors: David Woodhouse <dwmw2@infradead.org>
 */
#include <linux/mmu_notifier.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/slab.h>
#include <linux/rculist.h>
#include <linux/pci.h>
#include <linux/pci-ats.h>
#include <linux/dmar.h>
#include <linux/interrupt.h>
#include <linux/mm_types.h>
#include <linux/xarray.h>
#include <asm/page.h>
#include <asm/fpu/api.h>
#include "iommu.h"
#include "pasid.h"
#include "perf.h"
#include "../iommu-pages.h"
#include "trace.h"
void intel_svm_check(struct intel_iommu *iommu)
{
	if (!pasid_supported(iommu))
		return;
	if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
	    !cap_fl1gp_support(iommu->cap)) {
		pr_err("%s SVM disabled, incompatible 1GB page capability\n",
		       iommu->name);
		return;
	}
	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
	    !cap_fl5lp_support(iommu->cap)) {
		pr_err("%s SVM disabled, incompatible paging mode\n",
		       iommu->name);
		return;
	}
	iommu->flags |= VTD_FLAG_SVM_CAPABLE;
}
/* Pages have been freed at this point */
static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
					struct mm_struct *mm,
					unsigned long start, unsigned long end)
{
	struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier);
	if (start == 0 && end == ULONG_MAX) {
		cache_tag_flush_all(domain);
		return;
	}
	/*
	 * The mm_types defines vm_end as the first byte after the end address,
	 * different from IOMMU subsystem using the last address of an address
	 * range.
	 */
	cache_tag_flush_range(domain, start, end - 1, 0);
}
static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
{
	struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier);
	struct dev_pasid_info *dev_pasid;
	struct device_domain_info *info;
	unsigned long flags;
	/* This might end up being called from exit_mmap(), *before* the page
	 * tables are cleared. And __mmu_notifier_release() will delete us from
	 * the list of notifiers so that our invalidate_range() callback doesn't
	 * get called when the page tables are cleared. So we need to protect
	 * against hardware accessing those page tables.
	 *
	 * We do it by clearing the entry in the PASID table and then flushing
	 * the IOTLB and the PASID table caches. This might upset hardware;
	 * perhaps we'll want to point the PASID to a dummy PGD (like the zero
	 * page) so that we end up taking a fault that the hardware really
	 * *has* to handle gracefully without affecting other processes.
	 */
	spin_lock_irqsave(&domain->lock, flags);
	list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) {
		info = dev_iommu_priv_get(dev_pasid->dev);
		intel_pasid_tear_down_entry(info->iommu, dev_pasid->dev,
					    dev_pasid->pasid, true);
	}
	spin_unlock_irqrestore(&domain->lock, flags);
}
static void intel_mm_free_notifier(struct mmu_notifier *mn)
{
	struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier);
	kfree(domain->qi_batch);
	kfree(domain);
}
static const struct mmu_notifier_ops intel_mmuops = {
	.release = intel_mm_release,
	.arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs,
	.free_notifier = intel_mm_free_notifier,
};
static int intel_iommu_sva_supported(struct device *dev)
{
	struct device_domain_info *info = dev_iommu_priv_get(dev);
	struct intel_iommu *iommu;
	if (!info || dmar_disabled)
		return -EINVAL;
	iommu = info->iommu;
	if (!iommu)
		return -EINVAL;
	if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE))
		return -ENODEV;
	if (!info->pasid_enabled || !info->ats_enabled)
		return -EINVAL;
	/*
	 * Devices having device-specific I/O fault handling should not
	 * support PCI/PRI. The IOMMU side has no means to check the
	 * capability of device-specific IOPF.  Therefore, IOMMU can only
	 * default that if the device driver enables SVA on a non-PRI
	 * device, it will handle IOPF in its own way.
	 */
	if (!info->pri_supported)
		return 0;
	/* Devices supporting PRI should have it enabled. */
	if (!info->pri_enabled)
		return -EINVAL;
	return 0;
}
static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
				   struct device *dev, ioasid_t pasid,
				   struct iommu_domain *old)
{
	struct device_domain_info *info = dev_iommu_priv_get(dev);
	struct intel_iommu *iommu = info->iommu;
	struct mm_struct *mm = domain->mm;
	struct dev_pasid_info *dev_pasid;
	unsigned long sflags;
	int ret = 0;
	ret = intel_iommu_sva_supported(dev);
	if (ret)
		return ret;
	dev_pasid = domain_add_dev_pasid(domain, dev, pasid);
	if (IS_ERR(dev_pasid))
		return PTR_ERR(dev_pasid);
	ret = iopf_for_domain_replace(domain, old, dev);
	if (ret)
		goto out_remove_dev_pasid;
	/* Setup the pasid table: */
	sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
	ret = __domain_setup_first_level(iommu, dev, pasid,
					 FLPT_DEFAULT_DID, __pa(mm->pgd),
					 sflags, old);
	if (ret)
		goto out_unwind_iopf;
	domain_remove_dev_pasid(old, dev, pasid);
	return 0;
out_unwind_iopf:
	iopf_for_domain_replace(old, domain, dev);
out_remove_dev_pasid:
	domain_remove_dev_pasid(domain, dev, pasid);
	return ret;
}
static void intel_svm_domain_free(struct iommu_domain *domain)
{
	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
	/* dmar_domain free is deferred to the mmu free_notifier callback. */
	mmu_notifier_put(&dmar_domain->notifier);
}
static const struct iommu_domain_ops intel_svm_domain_ops = {
	.set_dev_pasid		= intel_svm_set_dev_pasid,
	.free			= intel_svm_domain_free
};
struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
					    struct mm_struct *mm)
{
	struct dmar_domain *domain;
	int ret;
	ret = intel_iommu_sva_supported(dev);
	if (ret)
		return ERR_PTR(ret);
	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
	if (!domain)
		return ERR_PTR(-ENOMEM);
	domain->domain.ops = &intel_svm_domain_ops;
	INIT_LIST_HEAD(&domain->dev_pasids);
	INIT_LIST_HEAD(&domain->cache_tags);
	spin_lock_init(&domain->cache_lock);
	spin_lock_init(&domain->lock);
	domain->notifier.ops = &intel_mmuops;
	ret = mmu_notifier_register(&domain->notifier, mm);
	if (ret) {
		kfree(domain);
		return ERR_PTR(ret);
	}
	return &domain->domain;
}
 |