diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-01 11:26:46 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-01 11:26:46 -0700 | 
| commit | 477f70cd2a67904e04c2c2b9bd0fa2e95222f2f6 (patch) | |
| tree | 1897dd1de49e1ea24897163533e2d8ead5dad0ad /drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | |
| parent | 835d31d319d9c8c4eb6cac074643360ba0ecab10 (diff) | |
| parent | 8f0284f190e6a0aa09015090568c03f18288231a (diff) | |
Merge tag 'drm-next-2021-08-31-1' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie:
 "Highlights:
   - i915 has seen a lot of refactoring and uAPI cleanups due to a
     change in the upstream direction going forward
     This has all been audited with known userspace, but there may be
     some pitfalls that were missed.
   - i915 now uses common TTM to enable discrete memory on DG1/2 GPUs
   - i915 enables Jasper and Elkhart Lake by default and has preliminary
     XeHP/DG2 support
   - amdgpu adds support for Cyan Skillfish
   - lots of implicit fencing rules documented and fixed up in drivers
   - msm now uses the core scheduler
   - the irq midlayer has been removed for non-legacy drivers
   - the sysfb code now works on more than x86.
  Otherwise the usual smattering of stuff everywhere, panels, bridges,
  refactorings.
  Detailed summary:
  core:
   - extract i915 eDP backlight into core
   - DP aux bus support
   - drm_device.irq_enabled removed
   - port drivers to native irq interfaces
   - export gem shadow plane handling for vgem
   - print proper driver name in framebuffer registration
   - driver fixes for implicit fencing rules
   - ARM fixed rate compression modifier added
   - updated fb damage handling
   - rmfb ioctl logging/docs
   - drop drm_gem_object_put_locked
   - define DRM_FORMAT_MAX_PLANES
   - add gem fb vmap/vunmap helpers
   - add lockdep_assert(once) helpers
   - mark drm irq midlayer as legacy
   - use offset adjusted bo mapping conversion
  vgaarb:
   - cleanups
  fbdev:
   - extend efifb handling to all arches
   - div by 0 fixes for multiple drivers
  udmabuf:
   - add hugepage mapping support
  dma-buf:
   - non-dynamic exporter fixups
   - document implicit fencing rules
  amdgpu:
   - Initial Cyan Skillfish support
   - switch virtual DCE over to vkms based atomic
   - VCN/JPEG power down fixes
   - NAVI PCIE link handling fixes
   - AMD HDMI freesync fixes
   - Yellow Carp + Beige Goby fixes
   - Clockgating/S0ix/SMU/EEPROM fixes
   - embed hw fence in job
   - rework dma-resv handling
   - ensure eviction to system ram
  amdkfd:
   - uapi: SVM address range query added
   - sysfs leak fix
   - GPUVM TLB optimizations
   - vmfault/migration counters
  i915:
   - Enable JSL and EHL by default
   - preliminary XeHP/DG2 support
   - remove all CNL support (never shipped)
   - move to TTM for discrete memory support
   - allow mixed object mmap handling
   - GEM uAPI spring cleaning
       - add I915_MMAP_OBJECT_FIXED
       - reinstate ADL-P mmap ioctls
       - drop a bunch of unused by userspace features
       - disable and remove GPU relocations
   - revert some i915 misfeatures
   - major refactoring of GuC for Gen11+
   - execbuffer object locking separate step
   - reject caching/set-domain on discrete
   - Enable pipe DMC loading on XE-LPD and ADL-P
   - add PSF GV point support
   - Refactor and fix DDI buffer translations
   - Clean up FBC CFB allocation code
   - Finish INTEL_GEN() and friends macro conversions
  nouveau:
   - add eDP backlight support
   - implicit fence fix
  msm:
   - a680/7c3 support
   - drm/scheduler conversion
  panfrost:
   - rework GPU reset
  virtio:
   - fix fencing for planes
  ast:
   - add detect support
  bochs:
   - move to tiny GPU driver
  vc4:
   - use hotplug irqs
   - HDMI codec support
  vmwgfx:
   - use internal vmware device headers
  ingenic:
   - demidlayering irq
  rcar-du:
   - shutdown fixes
   - convert to bridge connector helpers
  zynqmp-dsub:
   - misc fixes
  mgag200:
   - convert PLL handling to atomic
  mediatek:
   - MT8133 AAL support
   - gem mmap object support
   - MT8167 support
  etnaviv:
   - NXP Layerscape LS1028A SoC support
   - GEM mmap cleanups
  tegra:
   - new user API
  exynos:
   - missing unlock fix
   - build warning fix
   - use refcount_t"
* tag 'drm-next-2021-08-31-1' of git://anongit.freedesktop.org/drm/drm: (1318 commits)
  drm/amd/display: Move AllowDRAMSelfRefreshOrDRAMClockChangeInVblank to bounding box
  drm/amd/display: Remove duplicate dml init
  drm/amd/display: Update bounding box states (v2)
  drm/amd/display: Update number of DCN3 clock states
  drm/amdgpu: disable GFX CGCG in aldebaran
  drm/amdgpu: Clear RAS interrupt status on aldebaran
  drm/amdgpu: Add support for RAS XGMI err query
  drm/amdkfd: Account for SH/SE count when setting up cu masks.
  drm/amdgpu: rename amdgpu_bo_get_preferred_pin_domain
  drm/amdgpu: drop redundant cancel_delayed_work_sync call
  drm/amdgpu: add missing cleanups for more ASICs on UVD/VCE suspend
  drm/amdgpu: add missing cleanups for Polaris12 UVD/VCE on suspend
  drm/amdkfd: map SVM range with correct access permission
  drm/amdkfd: check access permisson to restore retry fault
  drm/amdgpu: Update RAS XGMI Error Query
  drm/amdgpu: Add driver infrastructure for MCA RAS
  drm/amd/display: Add Logging for HDMI color depth information
  drm/amd/amdgpu: consolidate PSP TA init shared buf functions
  drm/amd/amdgpu: add name field back to ras_common_if
  drm/amdgpu: Fix build with missing pm_suspend_target_state module export
  ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 102 | 
1 files changed, 56 insertions, 46 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 3a55f08e00e1..38dade421d46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -149,14 +149,16 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,  			 * BOs to be evicted from VRAM  			 */  			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | -							 AMDGPU_GEM_DOMAIN_GTT); +							AMDGPU_GEM_DOMAIN_GTT | +							AMDGPU_GEM_DOMAIN_CPU);  			abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;  			abo->placements[0].lpfn = 0;  			abo->placement.busy_placement = &abo->placements[1];  			abo->placement.num_busy_placement = 1;  		} else {  			/* Move to GTT memory */ -			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); +			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT | +							AMDGPU_GEM_DOMAIN_CPU);  		}  		break;  	case TTM_PL_TT: @@ -521,7 +523,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,  			hop->fpfn = 0;  			hop->lpfn = 0;  			hop->mem_type = TTM_PL_TT; -			hop->flags = 0; +			hop->flags = TTM_PL_FLAG_TEMPORARY;  			return -EMULTIHOP;  		} @@ -1121,7 +1123,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,  	struct amdgpu_ttm_tt *gtt = (void *)ttm;  	/* user pages are bound by amdgpu_ttm_tt_pin_userptr() */ -	if (gtt && gtt->userptr) { +	if (gtt->userptr) {  		ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);  		if (!ttm->sg)  			return -ENOMEM; @@ -1146,7 +1148,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,  	struct amdgpu_ttm_tt *gtt = (void *)ttm;  	struct amdgpu_device *adev; -	if (gtt && gtt->userptr) { +	if (gtt->userptr) {  		amdgpu_ttm_tt_set_user_pages(ttm, NULL);  		kfree(ttm->sg);  		ttm->sg = NULL; @@ -1394,6 +1396,41 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,  	return ttm_bo_eviction_valuable(bo, place);  } +static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos, +				      void *buf, size_t size, bool write) +{ +	while (size) { +		uint64_t aligned_pos = ALIGN_DOWN(pos, 4); +		uint64_t bytes = 4 - (pos & 0x3); +		uint32_t shift = (pos & 0x3) * 8; +		uint32_t mask = 0xffffffff << shift; +		uint32_t value = 0; + +		if (size < bytes) { +			mask &= 0xffffffff >> (bytes - size) * 8; +			bytes = size; +		} + +		if (mask != 0xffffffff) { +			amdgpu_device_mm_access(adev, aligned_pos, &value, 4, false); +			if (write) { +				value &= ~mask; +				value |= (*(uint32_t *)buf << shift) & mask; +				amdgpu_device_mm_access(adev, aligned_pos, &value, 4, true); +			} else { +				value = (value & mask) >> shift; +				memcpy(buf, &value, bytes); +			} +		} else { +			amdgpu_device_mm_access(adev, aligned_pos, buf, 4, write); +		} + +		pos += bytes; +		buf += bytes; +		size -= bytes; +	} +} +  /**   * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.   * @@ -1413,8 +1450,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,  	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);  	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);  	struct amdgpu_res_cursor cursor; -	unsigned long flags; -	uint32_t value = 0;  	int ret = 0;  	if (bo->resource->mem_type != TTM_PL_VRAM) @@ -1422,41 +1457,21 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,  	amdgpu_res_first(bo->resource, offset, len, &cursor);  	while (cursor.remaining) { -		uint64_t aligned_pos = cursor.start & ~(uint64_t)3; -		uint64_t bytes = 4 - (cursor.start & 3); -		uint32_t shift = (cursor.start & 3) * 8; -		uint32_t mask = 0xffffffff << shift; - -		if (cursor.size < bytes) { -			mask &= 0xffffffff >> (bytes - cursor.size) * 8; -			bytes = cursor.size; -		} - -		if (mask != 0xffffffff) { -			spin_lock_irqsave(&adev->mmio_idx_lock, flags); -			WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000); -			WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); -			value = RREG32_NO_KIQ(mmMM_DATA); -			if (write) { -				value &= ~mask; -				value |= (*(uint32_t *)buf << shift) & mask; -				WREG32_NO_KIQ(mmMM_DATA, value); -			} -			spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); -			if (!write) { -				value = (value & mask) >> shift; -				memcpy(buf, &value, bytes); -			} -		} else { -			bytes = cursor.size & ~0x3ULL; -			amdgpu_device_vram_access(adev, cursor.start, -						  (uint32_t *)buf, bytes, -						  write); +		size_t count, size = cursor.size; +		loff_t pos = cursor.start; + +		count = amdgpu_device_aper_access(adev, pos, buf, size, write); +		size -= count; +		if (size) { +			/* using MM to access rest vram and handle un-aligned address */ +			pos += count; +			buf += count; +			amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);  		} -		ret += bytes; -		buf = (uint8_t *)buf + bytes; -		amdgpu_res_next(&cursor, bytes); +		ret += cursor.size; +		buf += cursor.size; +		amdgpu_res_next(&cursor, cursor.size);  	}  	return ret; @@ -2146,7 +2161,6 @@ static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,  		return -ENXIO;  	while (size) { -		unsigned long flags;  		uint32_t value;  		if (*pos >= adev->gmc.mc_vram_size) @@ -2156,11 +2170,7 @@ static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,  		if (r)  			return r; -		spin_lock_irqsave(&adev->mmio_idx_lock, flags); -		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000); -		WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31); -		WREG32_NO_KIQ(mmMM_DATA, value); -		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); +		amdgpu_device_mm_access(adev, *pos, &value, 4, true);  		result += 4;  		buf += 4; | 
