diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 664 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm | 6 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 19 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_events.c | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 26 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 179 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 12 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 118 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 191 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 9 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 45 | 
13 files changed, 641 insertions, 638 deletions
| diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index d7cd5fa313ff..df75863393fc 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -2069,7 +2069,7 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {  };  static const uint32_t cwsr_trap_gfx10_hex[] = { -	0xbf820001, 0xbf820220, +	0xbf820001, 0xbf820221,  	0xb0804004, 0xb978f802,  	0x8a78ff78, 0x00020006,  	0xb97bf803, 0x876eff78, @@ -2118,391 +2118,391 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {  	0xbf900004, 0xbf8cc07f,  	0x877aff7f, 0x04000000,  	0x8f7a857a, 0x886d7a6d, -	0xbefa037e, 0x877bff7f, -	0x0000ffff, 0xbefe03c1, -	0xbeff03c1, 0xdc5f8000, -	0x007a0000, 0x7e000280, -	0xbefe037a, 0xbeff037b, -	0xb97b02dc, 0x8f7b997b, -	0xb97a3a05, 0x807a817a, -	0xbf0d997b, 0xbf850002, -	0x8f7a897a, 0xbf820001, -	0x8f7a8a7a, 0xb97b1e06, -	0x8f7b8a7b, 0x807a7b7a, +	0x7e008200, 0xbefa037e,  	0x877bff7f, 0x0000ffff, -	0x807aff7a, 0x00000200, -	0x807a7e7a, 0x827b807b, -	0xd7610000, 0x00010870, -	0xd7610000, 0x00010a71, -	0xd7610000, 0x00010c72, -	0xd7610000, 0x00010e73, -	0xd7610000, 0x00011074, -	0xd7610000, 0x00011275, -	0xd7610000, 0x00011476, -	0xd7610000, 0x00011677, -	0xd7610000, 0x00011a79, -	0xd7610000, 0x00011c7e, -	0xd7610000, 0x00011e7f, -	0xbefe03ff, 0x00003fff, -	0xbeff0380, 0xdc5f8040, -	0x007a0000, 0xd760007a, -	0x00011d00, 0xd760007b, -	0x00011f00, 0xbefe037a, -	0xbeff037b, 0xbef4037e, -	0x8775ff7f, 0x0000ffff, -	0x8875ff75, 0x00040000, -	0xbef60380, 0xbef703ff, -	0x10807fac, 0xbef1037c, -	0xbef00380, 0xb97302dc, -	0x8f739973, 0xbefe03c1, -	0x907c9973, 0x877c817c, -	0xbf06817c, 0xbf850002, -	0xbeff0380, 0xbf820002, -	0xbeff03c1, 0xbf820009, +	0xbefe03c1, 0xbeff03c1, +	0xdc5f8000, 0x007a0000, +	0x7e000280, 0xbefe037a, +	0xbeff037b, 0xb97b02dc, +	0x8f7b997b, 0xb97a3a05, +	0x807a817a, 0xbf0d997b, +	0xbf850002, 0x8f7a897a, +	0xbf820001, 0x8f7a8a7a, +	0xb97b1e06, 0x8f7b8a7b, +	0x807a7b7a, 0x877bff7f, +	0x0000ffff, 0x807aff7a, +	0x00000200, 0x807a7e7a, +	0x827b807b, 0xd7610000, +	0x00010870, 0xd7610000, +	0x00010a71, 0xd7610000, +	0x00010c72, 0xd7610000, +	0x00010e73, 0xd7610000, +	0x00011074, 0xd7610000, +	0x00011275, 0xd7610000, +	0x00011476, 0xd7610000, +	0x00011677, 0xd7610000, +	0x00011a79, 0xd7610000, +	0x00011c7e, 0xd7610000, +	0x00011e7f, 0xbefe03ff, +	0x00003fff, 0xbeff0380, +	0xdc5f8040, 0x007a0000, +	0xd760007a, 0x00011d00, +	0xd760007b, 0x00011f00, +	0xbefe037a, 0xbeff037b, +	0xbef4037e, 0x8775ff7f, +	0x0000ffff, 0x8875ff75, +	0x00040000, 0xbef60380, +	0xbef703ff, 0x10807fac, +	0xbef1037c, 0xbef00380, +	0xb97302dc, 0x8f739973, +	0xbefe03c1, 0x907c9973, +	0x877c817c, 0xbf06817c, +	0xbf850002, 0xbeff0380, +	0xbf820002, 0xbeff03c1, +	0xbf820009, 0xbef603ff, +	0x01000000, 0xe0704080, +	0x705d0100, 0xe0704100, +	0x705d0200, 0xe0704180, +	0x705d0300, 0xbf820008,  	0xbef603ff, 0x01000000, -	0xe0704080, 0x705d0100, -	0xe0704100, 0x705d0200, -	0xe0704180, 0x705d0300, -	0xbf820008, 0xbef603ff, -	0x01000000, 0xe0704100, -	0x705d0100, 0xe0704200, -	0x705d0200, 0xe0704300, -	0x705d0300, 0xb9703a05, -	0x80708170, 0xbf0d9973, -	0xbf850002, 0x8f708970, -	0xbf820001, 0x8f708a70, -	0xb97a1e06, 0x8f7a8a7a, -	0x80707a70, 0x8070ff70, -	0x00000200, 0xbef603ff, -	0x01000000, 0x7e000280, -	0x7e020280, 0x7e040280, -	0xbefc0380, 0xd7610002, -	0x0000f871, 0x807c817c, -	0xd7610002, 0x0000f86c, -	0x807c817c, 0x8a7aff6d, -	0x80000000, 0xd7610002, -	0x0000f87a, 0x807c817c, -	0xd7610002, 0x0000f86e, -	0x807c817c, 0xd7610002, -	0x0000f86f, 0x807c817c, -	0xd7610002, 0x0000f878, -	0x807c817c, 0xb97af803, -	0xd7610002, 0x0000f87a, -	0x807c817c, 0xd7610002, -	0x0000f87b, 0x807c817c, -	0xb971f801, 0xd7610002, -	0x0000f871, 0x807c817c, -	0xb971f814, 0xd7610002, -	0x0000f871, 0x807c817c, -	0xb971f815, 0xd7610002, -	0x0000f871, 0x807c817c, -	0xbefe03ff, 0x0000ffff, -	0xbeff0380, 0xe0704000, -	0x705d0200, 0xbefe03c1, +	0xe0704100, 0x705d0100, +	0xe0704200, 0x705d0200, +	0xe0704300, 0x705d0300,  	0xb9703a05, 0x80708170,  	0xbf0d9973, 0xbf850002,  	0x8f708970, 0xbf820001,  	0x8f708a70, 0xb97a1e06,  	0x8f7a8a7a, 0x80707a70, +	0x8070ff70, 0x00000200,  	0xbef603ff, 0x01000000, -	0xbef90380, 0xbefc0380, -	0xbf800000, 0xbe802f00, -	0xbe822f02, 0xbe842f04, -	0xbe862f06, 0xbe882f08, -	0xbe8a2f0a, 0xbe8c2f0c, -	0xbe8e2f0e, 0xd7610002, -	0x0000f200, 0x80798179, -	0xd7610002, 0x0000f201, +	0x7e000280, 0x7e020280, +	0x7e040280, 0xbefc0380, +	0xd7610002, 0x0000f871, +	0x807c817c, 0xd7610002, +	0x0000f86c, 0x807c817c, +	0x8a7aff6d, 0x80000000, +	0xd7610002, 0x0000f87a, +	0x807c817c, 0xd7610002, +	0x0000f86e, 0x807c817c, +	0xd7610002, 0x0000f86f, +	0x807c817c, 0xd7610002, +	0x0000f878, 0x807c817c, +	0xb97af803, 0xd7610002, +	0x0000f87a, 0x807c817c, +	0xd7610002, 0x0000f87b, +	0x807c817c, 0xb971f801, +	0xd7610002, 0x0000f871, +	0x807c817c, 0xb971f814, +	0xd7610002, 0x0000f871, +	0x807c817c, 0xb971f815, +	0xd7610002, 0x0000f871, +	0x807c817c, 0xbefe03ff, +	0x0000ffff, 0xbeff0380, +	0xe0704000, 0x705d0200, +	0xbefe03c1, 0xb9703a05, +	0x80708170, 0xbf0d9973, +	0xbf850002, 0x8f708970, +	0xbf820001, 0x8f708a70, +	0xb97a1e06, 0x8f7a8a7a, +	0x80707a70, 0xbef603ff, +	0x01000000, 0xbef90380, +	0xbefc0380, 0xbf800000, +	0xbe802f00, 0xbe822f02, +	0xbe842f04, 0xbe862f06, +	0xbe882f08, 0xbe8a2f0a, +	0xbe8c2f0c, 0xbe8e2f0e, +	0xd7610002, 0x0000f200,  	0x80798179, 0xd7610002, -	0x0000f202, 0x80798179, -	0xd7610002, 0x0000f203, +	0x0000f201, 0x80798179, +	0xd7610002, 0x0000f202,  	0x80798179, 0xd7610002, -	0x0000f204, 0x80798179, -	0xd7610002, 0x0000f205, +	0x0000f203, 0x80798179, +	0xd7610002, 0x0000f204,  	0x80798179, 0xd7610002, -	0x0000f206, 0x80798179, -	0xd7610002, 0x0000f207, +	0x0000f205, 0x80798179, +	0xd7610002, 0x0000f206,  	0x80798179, 0xd7610002, -	0x0000f208, 0x80798179, -	0xd7610002, 0x0000f209, +	0x0000f207, 0x80798179, +	0xd7610002, 0x0000f208,  	0x80798179, 0xd7610002, -	0x0000f20a, 0x80798179, -	0xd7610002, 0x0000f20b, +	0x0000f209, 0x80798179, +	0xd7610002, 0x0000f20a,  	0x80798179, 0xd7610002, -	0x0000f20c, 0x80798179, -	0xd7610002, 0x0000f20d, +	0x0000f20b, 0x80798179, +	0xd7610002, 0x0000f20c,  	0x80798179, 0xd7610002, -	0x0000f20e, 0x80798179, -	0xd7610002, 0x0000f20f, -	0x80798179, 0xbf06a079, -	0xbf840006, 0xe0704000, -	0x705d0200, 0x8070ff70, -	0x00000080, 0xbef90380, -	0x7e040280, 0x807c907c, -	0xbf0aff7c, 0x00000060, -	0xbf85ffbc, 0xbe802f00, -	0xbe822f02, 0xbe842f04, -	0xbe862f06, 0xbe882f08, -	0xbe8a2f0a, 0xd7610002, -	0x0000f200, 0x80798179, -	0xd7610002, 0x0000f201, +	0x0000f20d, 0x80798179, +	0xd7610002, 0x0000f20e,  	0x80798179, 0xd7610002, -	0x0000f202, 0x80798179, -	0xd7610002, 0x0000f203, +	0x0000f20f, 0x80798179, +	0xbf06a079, 0xbf840006, +	0xe0704000, 0x705d0200, +	0x8070ff70, 0x00000080, +	0xbef90380, 0x7e040280, +	0x807c907c, 0xbf0aff7c, +	0x00000060, 0xbf85ffbc, +	0xbe802f00, 0xbe822f02, +	0xbe842f04, 0xbe862f06, +	0xbe882f08, 0xbe8a2f0a, +	0xd7610002, 0x0000f200,  	0x80798179, 0xd7610002, -	0x0000f204, 0x80798179, -	0xd7610002, 0x0000f205, +	0x0000f201, 0x80798179, +	0xd7610002, 0x0000f202,  	0x80798179, 0xd7610002, -	0x0000f206, 0x80798179, -	0xd7610002, 0x0000f207, +	0x0000f203, 0x80798179, +	0xd7610002, 0x0000f204,  	0x80798179, 0xd7610002, -	0x0000f208, 0x80798179, -	0xd7610002, 0x0000f209, +	0x0000f205, 0x80798179, +	0xd7610002, 0x0000f206,  	0x80798179, 0xd7610002, -	0x0000f20a, 0x80798179, -	0xd7610002, 0x0000f20b, -	0x80798179, 0xe0704000, -	0x705d0200, 0xbefe03c1, -	0x907c9973, 0x877c817c, -	0xbf06817c, 0xbf850002, -	0xbeff0380, 0xbf820001, -	0xbeff03c1, 0xb97b4306, -	0x877bc17b, 0xbf840044, -	0xbf8a0000, 0x877aff6d, -	0x80000000, 0xbf840040, -	0x8f7b867b, 0x8f7b827b, -	0xbef6037b, 0xb9703a05, -	0x80708170, 0xbf0d9973, -	0xbf850002, 0x8f708970, -	0xbf820001, 0x8f708a70, -	0xb97a1e06, 0x8f7a8a7a, -	0x80707a70, 0x8070ff70, -	0x00000200, 0x8070ff70, -	0x00000080, 0xbef603ff, -	0x01000000, 0xd7650000, -	0x000100c1, 0xd7660000, -	0x000200c1, 0x16000084, -	0x907c9973, 0x877c817c, -	0xbf06817c, 0xbefc0380, -	0xbf850012, 0xbe8303ff, -	0x00000080, 0xbf800000, -	0xbf800000, 0xbf800000, -	0xd8d80000, 0x01000000, -	0xbf8c0000, 0xe0704000, -	0x705d0100, 0x807c037c, -	0x80700370, 0xd5250000, -	0x0001ff00, 0x00000080, -	0xbf0a7b7c, 0xbf85fff4, -	0xbf820011, 0xbe8303ff, -	0x00000100, 0xbf800000, -	0xbf800000, 0xbf800000, -	0xd8d80000, 0x01000000, -	0xbf8c0000, 0xe0704000, -	0x705d0100, 0x807c037c, -	0x80700370, 0xd5250000, -	0x0001ff00, 0x00000100, -	0xbf0a7b7c, 0xbf85fff4, +	0x0000f207, 0x80798179, +	0xd7610002, 0x0000f208, +	0x80798179, 0xd7610002, +	0x0000f209, 0x80798179, +	0xd7610002, 0x0000f20a, +	0x80798179, 0xd7610002, +	0x0000f20b, 0x80798179, +	0xe0704000, 0x705d0200,  	0xbefe03c1, 0x907c9973,  	0x877c817c, 0xbf06817c, -	0xbf850004, 0xbef003ff, -	0x00000200, 0xbeff0380, -	0xbf820003, 0xbef003ff, -	0x00000400, 0xbeff03c1, -	0xb97b3a05, 0x807b817b, -	0x8f7b827b, 0x907c9973, +	0xbf850002, 0xbeff0380, +	0xbf820001, 0xbeff03c1, +	0xb97b4306, 0x877bc17b, +	0xbf840044, 0xbf8a0000, +	0x877aff6d, 0x80000000, +	0xbf840040, 0x8f7b867b, +	0x8f7b827b, 0xbef6037b, +	0xb9703a05, 0x80708170, +	0xbf0d9973, 0xbf850002, +	0x8f708970, 0xbf820001, +	0x8f708a70, 0xb97a1e06, +	0x8f7a8a7a, 0x80707a70, +	0x8070ff70, 0x00000200, +	0x8070ff70, 0x00000080, +	0xbef603ff, 0x01000000, +	0xd7650000, 0x000100c1, +	0xd7660000, 0x000200c1, +	0x16000084, 0x907c9973,  	0x877c817c, 0xbf06817c, -	0xbf850017, 0xbef603ff, -	0x01000000, 0xbefc0384, -	0xbf0a7b7c, 0xbf840037, -	0x7e008700, 0x7e028701, -	0x7e048702, 0x7e068703, -	0xe0704000, 0x705d0000, -	0xe0704080, 0x705d0100, -	0xe0704100, 0x705d0200, -	0xe0704180, 0x705d0300, -	0x807c847c, 0x8070ff70, -	0x00000200, 0xbf0a7b7c, -	0xbf85ffef, 0xbf820025, +	0xbefc0380, 0xbf850012, +	0xbe8303ff, 0x00000080, +	0xbf800000, 0xbf800000, +	0xbf800000, 0xd8d80000, +	0x01000000, 0xbf8c0000, +	0xe0704000, 0x705d0100, +	0x807c037c, 0x80700370, +	0xd5250000, 0x0001ff00, +	0x00000080, 0xbf0a7b7c, +	0xbf85fff4, 0xbf820011, +	0xbe8303ff, 0x00000100, +	0xbf800000, 0xbf800000, +	0xbf800000, 0xd8d80000, +	0x01000000, 0xbf8c0000, +	0xe0704000, 0x705d0100, +	0x807c037c, 0x80700370, +	0xd5250000, 0x0001ff00, +	0x00000100, 0xbf0a7b7c, +	0xbf85fff4, 0xbefe03c1, +	0x907c9973, 0x877c817c, +	0xbf06817c, 0xbf850004, +	0xbef003ff, 0x00000200, +	0xbeff0380, 0xbf820003, +	0xbef003ff, 0x00000400, +	0xbeff03c1, 0xb97b3a05, +	0x807b817b, 0x8f7b827b, +	0x907c9973, 0x877c817c, +	0xbf06817c, 0xbf850017,  	0xbef603ff, 0x01000000,  	0xbefc0384, 0xbf0a7b7c, -	0xbf840011, 0x7e008700, +	0xbf840037, 0x7e008700,  	0x7e028701, 0x7e048702,  	0x7e068703, 0xe0704000, -	0x705d0000, 0xe0704100, -	0x705d0100, 0xe0704200, -	0x705d0200, 0xe0704300, +	0x705d0000, 0xe0704080, +	0x705d0100, 0xe0704100, +	0x705d0200, 0xe0704180,  	0x705d0300, 0x807c847c, -	0x8070ff70, 0x00000400, +	0x8070ff70, 0x00000200,  	0xbf0a7b7c, 0xbf85ffef, -	0xb97b1e06, 0x877bc17b, -	0xbf84000c, 0x8f7b837b, -	0x807b7c7b, 0xbefe03c1, -	0xbeff0380, 0x7e008700, +	0xbf820025, 0xbef603ff, +	0x01000000, 0xbefc0384, +	0xbf0a7b7c, 0xbf840011, +	0x7e008700, 0x7e028701, +	0x7e048702, 0x7e068703,  	0xe0704000, 0x705d0000, -	0x807c817c, 0x8070ff70, -	0x00000080, 0xbf0a7b7c, -	0xbf85fff8, 0xbf82013b, -	0xbef4037e, 0x8775ff7f, -	0x0000ffff, 0x8875ff75, -	0x00040000, 0xbef60380, -	0xbef703ff, 0x10807fac, -	0xb97202dc, 0x8f729972, -	0x876eff7f, 0x04000000, -	0xbf840034, 0xbefe03c1, -	0x907c9972, 0x877c817c, -	0xbf06817c, 0xbf850002, -	0xbeff0380, 0xbf820001, -	0xbeff03c1, 0xb96f4306, -	0x876fc16f, 0xbf840029, -	0x8f6f866f, 0x8f6f826f, -	0xbef6036f, 0xb9783a05, -	0x80788178, 0xbf0d9972, -	0xbf850002, 0x8f788978, -	0xbf820001, 0x8f788a78, -	0xb96e1e06, 0x8f6e8a6e, -	0x80786e78, 0x8078ff78, -	0x00000200, 0x8078ff78, -	0x00000080, 0xbef603ff, -	0x01000000, 0x907c9972, -	0x877c817c, 0xbf06817c, -	0xbefc0380, 0xbf850009, -	0xe0310000, 0x781d0000, -	0x807cff7c, 0x00000080, -	0x8078ff78, 0x00000080, -	0xbf0a6f7c, 0xbf85fff8, -	0xbf820008, 0xe0310000, -	0x781d0000, 0x807cff7c, -	0x00000100, 0x8078ff78, -	0x00000100, 0xbf0a6f7c, -	0xbf85fff8, 0xbef80380, +	0xe0704100, 0x705d0100, +	0xe0704200, 0x705d0200, +	0xe0704300, 0x705d0300, +	0x807c847c, 0x8070ff70, +	0x00000400, 0xbf0a7b7c, +	0xbf85ffef, 0xb97b1e06, +	0x877bc17b, 0xbf84000c, +	0x8f7b837b, 0x807b7c7b, +	0xbefe03c1, 0xbeff0380, +	0x7e008700, 0xe0704000, +	0x705d0000, 0x807c817c, +	0x8070ff70, 0x00000080, +	0xbf0a7b7c, 0xbf85fff8, +	0xbf82013b, 0xbef4037e, +	0x8775ff7f, 0x0000ffff, +	0x8875ff75, 0x00040000, +	0xbef60380, 0xbef703ff, +	0x10807fac, 0xb97202dc, +	0x8f729972, 0x876eff7f, +	0x04000000, 0xbf840034,  	0xbefe03c1, 0x907c9972,  	0x877c817c, 0xbf06817c,  	0xbf850002, 0xbeff0380,  	0xbf820001, 0xbeff03c1, -	0xb96f3a05, 0x806f816f, -	0x8f6f826f, 0x907c9972, -	0x877c817c, 0xbf06817c, -	0xbf850024, 0xbef603ff, -	0x01000000, 0xbeee0378, +	0xb96f4306, 0x876fc16f, +	0xbf840029, 0x8f6f866f, +	0x8f6f826f, 0xbef6036f, +	0xb9783a05, 0x80788178, +	0xbf0d9972, 0xbf850002, +	0x8f788978, 0xbf820001, +	0x8f788a78, 0xb96e1e06, +	0x8f6e8a6e, 0x80786e78,  	0x8078ff78, 0x00000200, -	0xbefc0384, 0xbf0a6f7c, -	0xbf840050, 0xe0304000, -	0x785d0000, 0xe0304080, -	0x785d0100, 0xe0304100, -	0x785d0200, 0xe0304180, -	0x785d0300, 0xbf8c3f70, -	0x7e008500, 0x7e028501, -	0x7e048502, 0x7e068503, -	0x807c847c, 0x8078ff78, -	0x00000200, 0xbf0a6f7c, -	0xbf85ffee, 0xe0304000, -	0x6e5d0000, 0xe0304080, -	0x6e5d0100, 0xe0304100, -	0x6e5d0200, 0xe0304180, -	0x6e5d0300, 0xbf8c3f70, -	0xbf820034, 0xbef603ff, -	0x01000000, 0xbeee0378, -	0x8078ff78, 0x00000400, -	0xbefc0384, 0xbf0a6f7c, -	0xbf840012, 0xe0304000, -	0x785d0000, 0xe0304100, -	0x785d0100, 0xe0304200, -	0x785d0200, 0xe0304300, -	0x785d0300, 0xbf8c3f70, -	0x7e008500, 0x7e028501, -	0x7e048502, 0x7e068503, -	0x807c847c, 0x8078ff78, -	0x00000400, 0xbf0a6f7c, -	0xbf85ffee, 0xb96f1e06, -	0x876fc16f, 0xbf84000e, -	0x8f6f836f, 0x806f7c6f, -	0xbefe03c1, 0xbeff0380, +	0x8078ff78, 0x00000080, +	0xbef603ff, 0x01000000, +	0x907c9972, 0x877c817c, +	0xbf06817c, 0xbefc0380, +	0xbf850009, 0xe0310000, +	0x781d0000, 0x807cff7c, +	0x00000080, 0x8078ff78, +	0x00000080, 0xbf0a6f7c, +	0xbf85fff8, 0xbf820008, +	0xe0310000, 0x781d0000, +	0x807cff7c, 0x00000100, +	0x8078ff78, 0x00000100, +	0xbf0a6f7c, 0xbf85fff8, +	0xbef80380, 0xbefe03c1, +	0x907c9972, 0x877c817c, +	0xbf06817c, 0xbf850002, +	0xbeff0380, 0xbf820001, +	0xbeff03c1, 0xb96f3a05, +	0x806f816f, 0x8f6f826f, +	0x907c9972, 0x877c817c, +	0xbf06817c, 0xbf850024, +	0xbef603ff, 0x01000000, +	0xbeee0378, 0x8078ff78, +	0x00000200, 0xbefc0384, +	0xbf0a6f7c, 0xbf840050,  	0xe0304000, 0x785d0000, +	0xe0304080, 0x785d0100, +	0xe0304100, 0x785d0200, +	0xe0304180, 0x785d0300,  	0xbf8c3f70, 0x7e008500, -	0x807c817c, 0x8078ff78, -	0x00000080, 0xbf0a6f7c, -	0xbf85fff7, 0xbeff03c1, +	0x7e028501, 0x7e048502, +	0x7e068503, 0x807c847c, +	0x8078ff78, 0x00000200, +	0xbf0a6f7c, 0xbf85ffee,  	0xe0304000, 0x6e5d0000, -	0xe0304100, 0x6e5d0100, -	0xe0304200, 0x6e5d0200, -	0xe0304300, 0x6e5d0300, -	0xbf8c3f70, 0xb9783a05, -	0x80788178, 0xbf0d9972, -	0xbf850002, 0x8f788978, -	0xbf820001, 0x8f788a78, -	0xb96e1e06, 0x8f6e8a6e, -	0x80786e78, 0x8078ff78, -	0x00000200, 0x80f8ff78, -	0x00000050, 0xbef603ff, -	0x01000000, 0xbefc03ff, -	0x0000006c, 0x80f89078, -	0xf429003a, 0xf0000000, -	0xbf8cc07f, 0x80fc847c, -	0xbf800000, 0xbe803100, -	0xbe823102, 0x80f8a078, -	0xf42d003a, 0xf0000000, -	0xbf8cc07f, 0x80fc887c, -	0xbf800000, 0xbe803100, -	0xbe823102, 0xbe843104, -	0xbe863106, 0x80f8c078, -	0xf431003a, 0xf0000000, -	0xbf8cc07f, 0x80fc907c, -	0xbf800000, 0xbe803100, -	0xbe823102, 0xbe843104, -	0xbe863106, 0xbe883108, -	0xbe8a310a, 0xbe8c310c, -	0xbe8e310e, 0xbf06807c, -	0xbf84fff0, 0xba80f801, -	0x00000000, 0xbf8a0000, +	0xe0304080, 0x6e5d0100, +	0xe0304100, 0x6e5d0200, +	0xe0304180, 0x6e5d0300, +	0xbf8c3f70, 0xbf820034, +	0xbef603ff, 0x01000000, +	0xbeee0378, 0x8078ff78, +	0x00000400, 0xbefc0384, +	0xbf0a6f7c, 0xbf840012, +	0xe0304000, 0x785d0000, +	0xe0304100, 0x785d0100, +	0xe0304200, 0x785d0200, +	0xe0304300, 0x785d0300, +	0xbf8c3f70, 0x7e008500, +	0x7e028501, 0x7e048502, +	0x7e068503, 0x807c847c, +	0x8078ff78, 0x00000400, +	0xbf0a6f7c, 0xbf85ffee, +	0xb96f1e06, 0x876fc16f, +	0xbf84000e, 0x8f6f836f, +	0x806f7c6f, 0xbefe03c1, +	0xbeff0380, 0xe0304000, +	0x785d0000, 0xbf8c3f70, +	0x7e008500, 0x807c817c, +	0x8078ff78, 0x00000080, +	0xbf0a6f7c, 0xbf85fff7, +	0xbeff03c1, 0xe0304000, +	0x6e5d0000, 0xe0304100, +	0x6e5d0100, 0xe0304200, +	0x6e5d0200, 0xe0304300, +	0x6e5d0300, 0xbf8c3f70,  	0xb9783a05, 0x80788178,  	0xbf0d9972, 0xbf850002,  	0x8f788978, 0xbf820001,  	0x8f788a78, 0xb96e1e06,  	0x8f6e8a6e, 0x80786e78,  	0x8078ff78, 0x00000200, +	0x80f8ff78, 0x00000050,  	0xbef603ff, 0x01000000, -	0xf4211bfa, 0xf0000000, -	0x80788478, 0xf4211b3a, +	0xbefc03ff, 0x0000006c, +	0x80f89078, 0xf429003a, +	0xf0000000, 0xbf8cc07f, +	0x80fc847c, 0xbf800000, +	0xbe803100, 0xbe823102, +	0x80f8a078, 0xf42d003a, +	0xf0000000, 0xbf8cc07f, +	0x80fc887c, 0xbf800000, +	0xbe803100, 0xbe823102, +	0xbe843104, 0xbe863106, +	0x80f8c078, 0xf431003a, +	0xf0000000, 0xbf8cc07f, +	0x80fc907c, 0xbf800000, +	0xbe803100, 0xbe823102, +	0xbe843104, 0xbe863106, +	0xbe883108, 0xbe8a310a, +	0xbe8c310c, 0xbe8e310e, +	0xbf06807c, 0xbf84fff0, +	0xba80f801, 0x00000000, +	0xbf8a0000, 0xb9783a05, +	0x80788178, 0xbf0d9972, +	0xbf850002, 0x8f788978, +	0xbf820001, 0x8f788a78, +	0xb96e1e06, 0x8f6e8a6e, +	0x80786e78, 0x8078ff78, +	0x00000200, 0xbef603ff, +	0x01000000, 0xf4211bfa,  	0xf0000000, 0x80788478, -	0xf4211b7a, 0xf0000000, -	0x80788478, 0xf4211c3a, +	0xf4211b3a, 0xf0000000, +	0x80788478, 0xf4211b7a,  	0xf0000000, 0x80788478, -	0xf4211c7a, 0xf0000000, -	0x80788478, 0xf4211eba, +	0xf4211c3a, 0xf0000000, +	0x80788478, 0xf4211c7a,  	0xf0000000, 0x80788478, -	0xf4211efa, 0xf0000000, -	0x80788478, 0xf4211e7a, +	0xf4211eba, 0xf0000000, +	0x80788478, 0xf4211efa,  	0xf0000000, 0x80788478, -	0xf4211cfa, 0xf0000000, -	0x80788478, 0xf4211bba, +	0xf4211e7a, 0xf0000000, +	0x80788478, 0xf4211cfa,  	0xf0000000, 0x80788478, -	0xbf8cc07f, 0xb9eef814,  	0xf4211bba, 0xf0000000,  	0x80788478, 0xbf8cc07f, -	0xb9eef815, 0xbefc036f, -	0xbefe0370, 0xbeff0371, -	0x876f7bff, 0x000003ff, -	0xb9ef4803, 0x876f7bff, -	0xfffff800, 0x906f8b6f, -	0xb9efa2c3, 0xb9f3f801, -	0xb96e3a05, 0x806e816e, -	0xbf0d9972, 0xbf850002, -	0x8f6e896e, 0xbf820001, -	0x8f6e8a6e, 0xb96f1e06, -	0x8f6f8a6f, 0x806e6f6e, -	0x806eff6e, 0x00000200, -	0x806e746e, 0x826f8075, -	0x876fff6f, 0x0000ffff, -	0xf4091c37, 0xfa000050, -	0xf4091d37, 0xfa000060, -	0xf4011e77, 0xfa000074, -	0xbf8cc07f, 0x876dff6d, -	0x0000ffff, 0x87fe7e7e, -	0x87ea6a6a, 0xb9faf802, -	0xbe80226c, 0xbf810000, +	0xb9eef814, 0xf4211bba, +	0xf0000000, 0x80788478, +	0xbf8cc07f, 0xb9eef815, +	0xbefc036f, 0xbefe0370, +	0xbeff0371, 0x876f7bff, +	0x000003ff, 0xb9ef4803, +	0x876f7bff, 0xfffff800, +	0x906f8b6f, 0xb9efa2c3, +	0xb9f3f801, 0xb96e3a05, +	0x806e816e, 0xbf0d9972, +	0xbf850002, 0x8f6e896e, +	0xbf820001, 0x8f6e8a6e, +	0xb96f1e06, 0x8f6f8a6f, +	0x806e6f6e, 0x806eff6e, +	0x00000200, 0x806e746e, +	0x826f8075, 0x876fff6f, +	0x0000ffff, 0xf4091c37, +	0xfa000050, 0xf4091d37, +	0xfa000060, 0xf4011e77, +	0xfa000074, 0xbf8cc07f, +	0x876dff6d, 0x0000ffff, +	0x87fe7e7e, 0x87ea6a6a, +	0xb9faf802, 0xbe80226c, +	0xbf810000, 0xbf9f0000,  	0xbf9f0000, 0xbf9f0000,  	0xbf9f0000, 0xbf9f0000, -	0xbf9f0000, 0x00000000,  };  static const uint32_t cwsr_trap_gfx11_hex[] = { diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm index fdab64624422..e0140df0b0ec 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm @@ -369,6 +369,12 @@ L_SLEEP:  	s_or_b32	s_save_pc_hi, s_save_pc_hi, s_save_tmp  #if NO_SQC_STORE +#if ASIC_FAMILY <= CHIP_SIENNA_CICHLID +	// gfx10: If there was a VALU exception, the exception state must be +	// cleared before executing the VALU instructions below. +	v_clrexcp +#endif +  	// Trap temporaries must be saved via VGPR but all VGPRs are in use.  	// There is no ttmp space to hold the resource constant for VGPR save.  	// Save v0 by itself since it requires only two SGPRs. diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f6d4748c1980..ce4c52ec34d8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1564,16 +1564,11 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,  {  	struct kfd_ioctl_import_dmabuf_args *args = data;  	struct kfd_process_device *pdd; -	struct dma_buf *dmabuf;  	int idr_handle;  	uint64_t size;  	void *mem;  	int r; -	dmabuf = dma_buf_get(args->dmabuf_fd); -	if (IS_ERR(dmabuf)) -		return PTR_ERR(dmabuf); -  	mutex_lock(&p->mutex);  	pdd = kfd_process_device_data_by_id(p, args->gpu_id);  	if (!pdd) { @@ -1587,10 +1582,10 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,  		goto err_unlock;  	} -	r = amdgpu_amdkfd_gpuvm_import_dmabuf(pdd->dev->adev, dmabuf, -					      args->va_addr, pdd->drm_priv, -					      (struct kgd_mem **)&mem, &size, -					      NULL); +	r = amdgpu_amdkfd_gpuvm_import_dmabuf_fd(pdd->dev->adev, args->dmabuf_fd, +						 args->va_addr, pdd->drm_priv, +						 (struct kgd_mem **)&mem, &size, +						 NULL);  	if (r)  		goto err_unlock; @@ -1601,7 +1596,6 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,  	}  	mutex_unlock(&p->mutex); -	dma_buf_put(dmabuf);  	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); @@ -1612,7 +1606,6 @@ err_free:  					       pdd->drm_priv, NULL);  err_unlock:  	mutex_unlock(&p->mutex); -	dma_buf_put(dmabuf);  	return r;  } @@ -1855,8 +1848,8 @@ static uint32_t get_process_num_bos(struct kfd_process *p)  	return num_of_bos;  } -static int criu_get_prime_handle(struct kgd_mem *mem, int flags, -				      u32 *shared_fd) +static int criu_get_prime_handle(struct kgd_mem *mem, +				 int flags, u32 *shared_fd)  {  	struct dma_buf *dmabuf;  	int ret; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 0f58be65132f..739721254a5d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -880,6 +880,10 @@ static int copy_signaled_event_data(uint32_t num_events,  				dst = &data[i].memory_exception_data;  				src = &event->memory_exception_data;  				size = sizeof(struct kfd_hsa_memory_exception_data); +			} else if (event->type == KFD_EVENT_TYPE_HW_EXCEPTION) { +				dst = &data[i].memory_exception_data; +				src = &event->hw_exception_data; +				size = sizeof(struct kfd_hsa_hw_exception_data);  			} else if (event->type == KFD_EVENT_TYPE_SIGNAL &&  				waiter->event_age_enabled) {  				dst = &data[i].signal_event_data.last_event_age; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 62b205dac63a..6604a3f99c5e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -330,12 +330,6 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)  	pdd->gpuvm_limit =  		pdd->dev->kfd->shared_resources.gpuvm_size - 1; -	/* dGPUs: the reserved space for kernel -	 * before SVM -	 */ -	pdd->qpd.cwsr_base = SVM_CWSR_BASE; -	pdd->qpd.ib_base = SVM_IB_BASE; -  	pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI();  	pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);  } @@ -345,18 +339,18 @@ static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id)  	pdd->lds_base = MAKE_LDS_APP_BASE_V9();  	pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); -	pdd->gpuvm_base = PAGE_SIZE; +        /* Raven needs SVM to support graphic handle, etc. Leave the small +         * reserved space before SVM on Raven as well, even though we don't +         * have to. +         * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they +         * are used in Thunk to reserve SVM. +         */ +        pdd->gpuvm_base = SVM_USER_BASE;  	pdd->gpuvm_limit =  		pdd->dev->kfd->shared_resources.gpuvm_size - 1;  	pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9();  	pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); - -	/* -	 * Place TBA/TMA on opposite side of VM hole to prevent -	 * stray faults from triggering SVM on these pages. -	 */ -	pdd->qpd.cwsr_base = pdd->dev->kfd->shared_resources.gpuvm_size;  }  int kfd_init_apertures(struct kfd_process *process) @@ -413,6 +407,12 @@ int kfd_init_apertures(struct kfd_process *process)  					return -EINVAL;  				}  			} + +                        /* dGPUs: the reserved space for kernel +                         * before SVM +                         */ +                        pdd->qpd.cwsr_base = SVM_CWSR_BASE; +                        pdd->qpd.ib_base = SVM_IB_BASE;  		}  		dev_dbg(kfd_device, "node id %u\n", id); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 6c25dab051d5..d630100b9e91 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -260,19 +260,6 @@ static void svm_migrate_put_sys_page(unsigned long addr)  	put_page(page);  } -static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate) -{ -	unsigned long cpages = 0; -	unsigned long i; - -	for (i = 0; i < migrate->npages; i++) { -		if (migrate->src[i] & MIGRATE_PFN_VALID && -		    migrate->src[i] & MIGRATE_PFN_MIGRATE) -			cpages++; -	} -	return cpages; -} -  static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)  {  	unsigned long upages = 0; @@ -402,6 +389,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,  	struct dma_fence *mfence = NULL;  	struct migrate_vma migrate = { 0 };  	unsigned long cpages = 0; +	unsigned long mpages = 0;  	dma_addr_t *scratch;  	void *buf;  	int r = -ENOMEM; @@ -442,20 +430,21 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,  		goto out_free;  	}  	if (cpages != npages) -		pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n", +		pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",  			 cpages, npages);  	else -		pr_debug("0x%lx pages migrated\n", cpages); +		pr_debug("0x%lx pages collected\n", cpages);  	r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset);  	migrate_vma_pages(&migrate); -	pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", -		svm_migrate_successful_pages(&migrate), cpages, migrate.npages); -  	svm_migrate_copy_done(adev, mfence);  	migrate_vma_finalize(&migrate); +	mpages = cpages - svm_migrate_unsuccessful_pages(&migrate); +	pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", +			 mpages, cpages, migrate.npages); +  	kfd_smi_event_migration_end(node, p->lead_thread->pid,  				    start >> PAGE_SHIFT, end >> PAGE_SHIFT,  				    0, node->id, trigger); @@ -465,12 +454,12 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,  out_free:  	kvfree(buf);  out: -	if (!r && cpages) { +	if (!r && mpages) {  		pdd = svm_range_get_pdd_by_node(prange, node);  		if (pdd) -			WRITE_ONCE(pdd->page_in, pdd->page_in + cpages); +			WRITE_ONCE(pdd->page_in, pdd->page_in + mpages); -		return cpages; +		return mpages;  	}  	return r;  } @@ -479,6 +468,8 @@ out:   * svm_migrate_ram_to_vram - migrate svm range from system to device   * @prange: range structure   * @best_loc: the device to migrate to + * @start_mgr: start page to migrate + * @last_mgr: last page to migrate   * @mm: the process mm structure   * @trigger: reason of migration   * @@ -489,19 +480,20 @@ out:   */  static int  svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, +			unsigned long start_mgr, unsigned long last_mgr,  			struct mm_struct *mm, uint32_t trigger)  {  	unsigned long addr, start, end;  	struct vm_area_struct *vma;  	uint64_t ttm_res_offset;  	struct kfd_node *node; -	unsigned long cpages = 0; +	unsigned long mpages = 0;  	long r = 0; -	if (prange->actual_loc == best_loc) { -		pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n", -			 prange->svms, prange->start, prange->last, best_loc); -		return 0; +	if (start_mgr < prange->start || last_mgr > prange->last) { +		pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n", +			 start_mgr, last_mgr, prange->start, prange->last); +		return -EFAULT;  	}  	node = svm_range_get_node_by_id(prange, best_loc); @@ -510,18 +502,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,  		return -ENODEV;  	} -	pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms, -		 prange->start, prange->last, best_loc); +	pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n", +		prange->svms, start_mgr, last_mgr, prange->start, prange->last, +		best_loc); -	start = prange->start << PAGE_SHIFT; -	end = (prange->last + 1) << PAGE_SHIFT; +	start = start_mgr << PAGE_SHIFT; +	end = (last_mgr + 1) << PAGE_SHIFT;  	r = svm_range_vram_node_new(node, prange, true);  	if (r) {  		dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r);  		return r;  	} -	ttm_res_offset = prange->offset << PAGE_SHIFT; +	ttm_res_offset = (start_mgr - prange->start + prange->offset) << PAGE_SHIFT;  	for (addr = start; addr < end;) {  		unsigned long next; @@ -536,16 +529,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,  			pr_debug("failed %ld to migrate\n", r);  			break;  		} else { -			cpages += r; +			mpages += r;  		}  		ttm_res_offset += next - addr;  		addr = next;  	} -	if (cpages) { +	if (mpages) {  		prange->actual_loc = best_loc; -		svm_range_dma_unmap(prange); -	} else { +		prange->vram_pages += mpages; +	} else if (!prange->actual_loc) { +		/* if no page migrated and all pages from prange are at +		 * sys ram drop svm_bo got from svm_range_vram_node_new +		 */  		svm_range_vram_node_free(prange);  	} @@ -663,9 +659,8 @@ out_oom:   * Context: Process context, caller hold mmap read lock, prange->migrate_mutex   *   * Return: - *   0 - success with all pages migrated   *   negative values - indicate error - *   positive values - partial migration, number of pages not migrated + *   positive values or zero - number of pages got migrated   */  static long  svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, @@ -676,6 +671,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,  	uint64_t npages = (end - start) >> PAGE_SHIFT;  	unsigned long upages = npages;  	unsigned long cpages = 0; +	unsigned long mpages = 0;  	struct amdgpu_device *adev = node->adev;  	struct kfd_process_device *pdd;  	struct dma_fence *mfence = NULL; @@ -725,10 +721,10 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,  		goto out_free;  	}  	if (cpages != npages) -		pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n", +		pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",  			 cpages, npages);  	else -		pr_debug("0x%lx pages migrated\n", cpages); +		pr_debug("0x%lx pages collected\n", cpages);  	r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,  				    scratch, npages); @@ -751,17 +747,21 @@ out_free:  	kvfree(buf);  out:  	if (!r && cpages) { +		mpages = cpages - upages;  		pdd = svm_range_get_pdd_by_node(prange, node);  		if (pdd) -			WRITE_ONCE(pdd->page_out, pdd->page_out + cpages); +			WRITE_ONCE(pdd->page_out, pdd->page_out + mpages);  	} -	return r ? r : upages; + +	return r ? r : mpages;  }  /**   * svm_migrate_vram_to_ram - migrate svm range from device to system   * @prange: range structure   * @mm: process mm, use current->mm if NULL + * @start_mgr: start page need be migrated to sys ram + * @last_mgr: last page need be migrated to sys ram   * @trigger: reason of migration   * @fault_page: is from vmf->page, svm_migrate_to_ram(), this is CPU page fault callback   * @@ -771,6 +771,7 @@ out:   * 0 - OK, otherwise error code   */  int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, +			    unsigned long start_mgr, unsigned long last_mgr,  			    uint32_t trigger, struct page *fault_page)  {  	struct kfd_node *node; @@ -778,26 +779,33 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,  	unsigned long addr;  	unsigned long start;  	unsigned long end; -	unsigned long upages = 0; +	unsigned long mpages = 0;  	long r = 0; +	/* this pragne has no any vram page to migrate to sys ram */  	if (!prange->actual_loc) {  		pr_debug("[0x%lx 0x%lx] already migrated to ram\n",  			 prange->start, prange->last);  		return 0;  	} +	if (start_mgr < prange->start || last_mgr > prange->last) { +		pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n", +			 start_mgr, last_mgr, prange->start, prange->last); +		return -EFAULT; +	} +  	node = svm_range_get_node_by_id(prange, prange->actual_loc);  	if (!node) {  		pr_debug("failed to get kfd node by id 0x%x\n", prange->actual_loc);  		return -ENODEV;  	}  	pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n", -		 prange->svms, prange, prange->start, prange->last, +		 prange->svms, prange, start_mgr, last_mgr,  		 prange->actual_loc); -	start = prange->start << PAGE_SHIFT; -	end = (prange->last + 1) << PAGE_SHIFT; +	start = start_mgr << PAGE_SHIFT; +	end = (last_mgr + 1) << PAGE_SHIFT;  	for (addr = start; addr < end;) {  		unsigned long next; @@ -816,14 +824,21 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,  			pr_debug("failed %ld to migrate prange %p\n", r, prange);  			break;  		} else { -			upages += r; +			mpages += r;  		}  		addr = next;  	} -	if (r >= 0 && !upages) { -		svm_range_vram_node_free(prange); -		prange->actual_loc = 0; +	if (r >= 0) { +		prange->vram_pages -= mpages; + +		/* prange does not have vram page set its actual_loc to system +		 * and drop its svm_bo ref +		 */ +		if (prange->vram_pages == 0 && prange->ttm_res) { +			prange->actual_loc = 0; +			svm_range_vram_node_free(prange); +		}  	}  	return r < 0 ? r : 0; @@ -833,17 +848,23 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,   * svm_migrate_vram_to_vram - migrate svm range from device to device   * @prange: range structure   * @best_loc: the device to migrate to + * @start: start page need be migrated to sys ram + * @last: last page need be migrated to sys ram   * @mm: process mm, use current->mm if NULL   * @trigger: reason of migration   *   * Context: Process context, caller hold mmap read lock, svms lock, prange lock   * + * migrate all vram pages in prange to sys ram, then migrate + * [start, last] pages from sys ram to gpu node best_loc. + *   * Return:   * 0 - OK, otherwise error code   */  static int  svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, -			 struct mm_struct *mm, uint32_t trigger) +			unsigned long start, unsigned long last, +			struct mm_struct *mm, uint32_t trigger)  {  	int r, retries = 3; @@ -855,7 +876,8 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,  	pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);  	do { -		r = svm_migrate_vram_to_ram(prange, mm, trigger, NULL); +		r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last, +					    trigger, NULL);  		if (r)  			return r;  	} while (prange->actual_loc && --retries); @@ -863,17 +885,21 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,  	if (prange->actual_loc)  		return -EDEADLK; -	return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger); +	return svm_migrate_ram_to_vram(prange, best_loc, start, last, mm, trigger);  }  int  svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, +		    unsigned long start, unsigned long last,  		    struct mm_struct *mm, uint32_t trigger)  { -	if  (!prange->actual_loc) -		return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger); +	if  (!prange->actual_loc || prange->actual_loc == best_loc) +		return svm_migrate_ram_to_vram(prange, best_loc, start, last, +					       mm, trigger); +  	else -		return svm_migrate_vram_to_vram(prange, best_loc, mm, trigger); +		return svm_migrate_vram_to_vram(prange, best_loc, start, last, +						mm, trigger);  } @@ -889,10 +915,9 @@ svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,   */  static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)  { +	unsigned long start, last, size;  	unsigned long addr = vmf->address;  	struct svm_range_bo *svm_bo; -	enum svm_work_list_ops op; -	struct svm_range *parent;  	struct svm_range *prange;  	struct kfd_process *p;  	struct mm_struct *mm; @@ -929,51 +954,31 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)  	mutex_lock(&p->svms.lock); -	prange = svm_range_from_addr(&p->svms, addr, &parent); +	prange = svm_range_from_addr(&p->svms, addr, NULL);  	if (!prange) {  		pr_debug("failed get range svms 0x%p addr 0x%lx\n", &p->svms, addr);  		r = -EFAULT;  		goto out_unlock_svms;  	} -	mutex_lock(&parent->migrate_mutex); -	if (prange != parent) -		mutex_lock_nested(&prange->migrate_mutex, 1); +	mutex_lock(&prange->migrate_mutex);  	if (!prange->actual_loc)  		goto out_unlock_prange; -	svm_range_lock(parent); -	if (prange != parent) -		mutex_lock_nested(&prange->lock, 1); -	r = svm_range_split_by_granularity(p, mm, addr, parent, prange); -	if (prange != parent) -		mutex_unlock(&prange->lock); -	svm_range_unlock(parent); -	if (r) { -		pr_debug("failed %d to split range by granularity\n", r); -		goto out_unlock_prange; -	} +	/* Align migration range start and size to granularity size */ +	size = 1UL << prange->granularity; +	start = max(ALIGN_DOWN(addr, size), prange->start); +	last = min(ALIGN(addr + 1, size) - 1, prange->last); -	r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm, -				    KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, -				    vmf->page); +	r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm, start, last, +				    KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, vmf->page);  	if (r)  		pr_debug("failed %d migrate svms 0x%p range 0x%p [0x%lx 0x%lx]\n", -			 r, prange->svms, prange, prange->start, prange->last); - -	/* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */ -	if (p->xnack_enabled && parent == prange) -		op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP; -	else -		op = SVM_OP_UPDATE_RANGE_NOTIFIER; -	svm_range_add_list_work(&p->svms, parent, mm, op); -	schedule_deferred_list_work(&p->svms); +			r, prange->svms, prange, start, last);  out_unlock_prange: -	if (prange != parent) -		mutex_unlock(&prange->migrate_mutex); -	mutex_unlock(&parent->migrate_mutex); +	mutex_unlock(&prange->migrate_mutex);  out_unlock_svms:  	mutex_unlock(&p->svms.lock);  out_unref_process: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index 487f26368164..2eebf67f9c2c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -41,9 +41,13 @@ enum MIGRATION_COPY_DIR {  };  int svm_migrate_to_vram(struct svm_range *prange,  uint32_t best_loc, +			unsigned long start, unsigned long last,  			struct mm_struct *mm, uint32_t trigger); +  int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, +			    unsigned long start, unsigned long last,  			    uint32_t trigger, struct page *fault_page); +  unsigned long  svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 4c8e278a0d0c..745024b31340 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -748,7 +748,6 @@ struct kfd_process_device {  	/* VM context for GPUVM allocations */  	struct file *drm_file;  	void *drm_priv; -	atomic64_t tlb_seq;  	/* GPUVM allocations storage */  	struct idr alloc_idr; @@ -971,7 +970,7 @@ struct kfd_process {  	struct work_struct debug_event_workarea;  	/* Tracks debug per-vmid request for debug flags */ -	bool dbg_flags; +	u32 dbg_flags;  	atomic_t poison;  	/* Queues are in paused stated because we are in the process of doing a CRIU checkpoint */ @@ -1462,7 +1461,14 @@ void kfd_signal_reset_event(struct kfd_node *dev);  void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid); -void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type); +static inline void kfd_flush_tlb(struct kfd_process_device *pdd, +				 enum TLB_FLUSH_TYPE type) +{ +	struct amdgpu_device *adev = pdd->dev->adev; +	struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); + +	amdgpu_vm_flush_compute_tlb(adev, vm, type, pdd->dev->xcc_mask); +}  static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)  { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 7a33e06f5c90..71df51fcc1b0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -664,7 +664,8 @@ int kfd_process_create_wq(void)  	if (!kfd_process_wq)  		kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);  	if (!kfd_restore_wq) -		kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0); +		kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", +							 WQ_FREEZABLE);  	if (!kfd_process_wq || !kfd_restore_wq) {  		kfd_process_destroy_wq(); @@ -1642,6 +1643,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,  	struct amdgpu_fpriv *drv_priv;  	struct amdgpu_vm *avm;  	struct kfd_process *p; +	struct dma_fence *ef;  	struct kfd_node *dev;  	int ret; @@ -1661,13 +1663,13 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,  	ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, avm,  						     &p->kgd_process_info, -						     &p->ef); +						     &ef);  	if (ret) {  		pr_err("Failed to create process VM object\n");  		return ret;  	} +	RCU_INIT_POINTER(p->ef, ef);  	pdd->drm_priv = drm_file->private_data; -	atomic64_set(&pdd->tlb_seq, 0);  	ret = kfd_process_device_reserve_ib_mem(pdd);  	if (ret) @@ -1909,6 +1911,21 @@ kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node,  	return -EINVAL;  } +static int signal_eviction_fence(struct kfd_process *p) +{ +	struct dma_fence *ef; +	int ret; + +	rcu_read_lock(); +	ef = dma_fence_get_rcu_safe(&p->ef); +	rcu_read_unlock(); + +	ret = dma_fence_signal(ef); +	dma_fence_put(ef); + +	return ret; +} +  static void evict_process_worker(struct work_struct *work)  {  	int ret; @@ -1921,31 +1938,46 @@ static void evict_process_worker(struct work_struct *work)  	 * lifetime of this thread, kfd_process p will be valid  	 */  	p = container_of(dwork, struct kfd_process, eviction_work); -	WARN_ONCE(p->last_eviction_seqno != p->ef->seqno, -		  "Eviction fence mismatch\n"); - -	/* Narrow window of overlap between restore and evict work -	 * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos -	 * unreserves KFD BOs, it is possible to evicted again. But -	 * restore has few more steps of finish. So lets wait for any -	 * previous restore work to complete -	 */ -	flush_delayed_work(&p->restore_work);  	pr_debug("Started evicting pasid 0x%x\n", p->pasid);  	ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM);  	if (!ret) { -		dma_fence_signal(p->ef); -		dma_fence_put(p->ef); -		p->ef = NULL; -		queue_delayed_work(kfd_restore_wq, &p->restore_work, +		/* If another thread already signaled the eviction fence, +		 * they are responsible stopping the queues and scheduling +		 * the restore work. +		 */ +		if (!signal_eviction_fence(p)) +			queue_delayed_work(kfd_restore_wq, &p->restore_work,  				msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)); +		else +			kfd_process_restore_queues(p);  		pr_debug("Finished evicting pasid 0x%x\n", p->pasid);  	} else  		pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);  } +static int restore_process_helper(struct kfd_process *p) +{ +	int ret = 0; + +	/* VMs may not have been acquired yet during debugging. */ +	if (p->kgd_process_info) { +		ret = amdgpu_amdkfd_gpuvm_restore_process_bos( +			p->kgd_process_info, &p->ef); +		if (ret) +			return ret; +	} + +	ret = kfd_process_restore_queues(p); +	if (!ret) +		pr_debug("Finished restoring pasid 0x%x\n", p->pasid); +	else +		pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid); + +	return ret; +} +  static void restore_process_worker(struct work_struct *work)  {  	struct delayed_work *dwork; @@ -1971,24 +2003,15 @@ static void restore_process_worker(struct work_struct *work)  	 */  	p->last_restore_timestamp = get_jiffies_64(); -	/* VMs may not have been acquired yet during debugging. */ -	if (p->kgd_process_info) -		ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info, -							     &p->ef); + +	ret = restore_process_helper(p);  	if (ret) {  		pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",  			 p->pasid, PROCESS_BACK_OFF_TIME_MS);  		ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,  				msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));  		WARN(!ret, "reschedule restore work failed\n"); -		return;  	} - -	ret = kfd_process_restore_queues(p); -	if (!ret) -		pr_debug("Finished restoring pasid 0x%x\n", p->pasid); -	else -		pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);  }  void kfd_suspend_all_processes(void) @@ -1999,14 +2022,9 @@ void kfd_suspend_all_processes(void)  	WARN(debug_evictions, "Evicting all processes");  	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { -		cancel_delayed_work_sync(&p->eviction_work); -		flush_delayed_work(&p->restore_work); -  		if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND))  			pr_err("Failed to suspend process 0x%x\n", p->pasid); -		dma_fence_signal(p->ef); -		dma_fence_put(p->ef); -		p->ef = NULL; +		signal_eviction_fence(p);  	}  	srcu_read_unlock(&kfd_processes_srcu, idx);  } @@ -2018,7 +2036,7 @@ int kfd_resume_all_processes(void)  	int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);  	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { -		if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) { +		if (restore_process_helper(p)) {  			pr_err("Restore process %d failed during resume\n",  			       p->pasid);  			ret = -EFAULT; @@ -2059,36 +2077,6 @@ int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,  			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);  } -void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) -{ -	struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); -	uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm); -	struct kfd_node *dev = pdd->dev; -	uint32_t xcc_mask = dev->xcc_mask; -	int xcc = 0; - -	/* -	 * It can be that we race and lose here, but that is extremely unlikely -	 * and the worst thing which could happen is that we flush the changes -	 * into the TLB once more which is harmless. -	 */ -	if (atomic64_xchg(&pdd->tlb_seq, tlb_seq) == tlb_seq) -		return; - -	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { -		/* Nothing to flush until a VMID is assigned, which -		 * only happens when the first queue is created. -		 */ -		if (pdd->qpd.vmid) -			amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev, -							pdd->qpd.vmid); -	} else { -		for_each_inst(xcc, xcc_mask) -			amdgpu_amdkfd_flush_gpu_tlb_pasid( -				dev->adev, pdd->process->pasid, type, xcc); -	} -} -  /* assumes caller holds process lock. */  int kfd_process_drain_interrupts(struct kfd_process_device *pdd)  { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 77f493262e05..43eff221eae5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -87,6 +87,8 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)  		return;  	dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd); +	if (dev->kfd->shared_resources.enable_mes) +		amdgpu_mes_flush_shader_debugger(dev->adev, pdd->proc_ctx_gpu_addr);  	pdd->already_dequeued = true;  } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index a15bfb5223e8..ac84c4a2ca07 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -198,6 +198,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,  		pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n",  				     addr[i] >> PAGE_SHIFT, page_to_pfn(page));  	} +  	return 0;  } @@ -349,6 +350,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,  	INIT_LIST_HEAD(&prange->child_list);  	atomic_set(&prange->invalid, 0);  	prange->validate_timestamp = 0; +	prange->vram_pages = 0;  	mutex_init(&prange->migrate_mutex);  	mutex_init(&prange->lock); @@ -395,6 +397,8 @@ static void svm_range_bo_release(struct kref *kref)  			 prange->start, prange->last);  		mutex_lock(&prange->lock);  		prange->svm_bo = NULL; +		/* prange should not hold vram page now */ +		WARN_ONCE(prange->actual_loc, "prange should not hold vram page");  		mutex_unlock(&prange->lock);  		spin_lock(&svm_bo->list_lock); @@ -878,14 +882,29 @@ static void svm_range_debug_dump(struct svm_range_list *svms)  static void *  svm_range_copy_array(void *psrc, size_t size, uint64_t num_elements, -		     uint64_t offset) +		     uint64_t offset, uint64_t *vram_pages)  { +	unsigned char *src = (unsigned char *)psrc + offset;  	unsigned char *dst; +	uint64_t i;  	dst = kvmalloc_array(num_elements, size, GFP_KERNEL);  	if (!dst)  		return NULL; -	memcpy(dst, (unsigned char *)psrc + offset, num_elements * size); + +	if (!vram_pages) { +		memcpy(dst, src, num_elements * size); +		return (void *)dst; +	} + +	*vram_pages = 0; +	for (i = 0; i < num_elements; i++) { +		dma_addr_t *temp; +		temp = (dma_addr_t *)dst + i; +		*temp = *((dma_addr_t *)src + i); +		if (*temp&SVM_RANGE_VRAM_DOMAIN) +			(*vram_pages)++; +	}  	return (void *)dst;  } @@ -899,7 +918,7 @@ svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src)  		if (!src->dma_addr[i])  			continue;  		dst->dma_addr[i] = svm_range_copy_array(src->dma_addr[i], -					sizeof(*src->dma_addr[i]), src->npages, 0); +					sizeof(*src->dma_addr[i]), src->npages, 0, NULL);  		if (!dst->dma_addr[i])  			return -ENOMEM;  	} @@ -910,7 +929,7 @@ svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src)  static int  svm_range_split_array(void *ppnew, void *ppold, size_t size,  		      uint64_t old_start, uint64_t old_n, -		      uint64_t new_start, uint64_t new_n) +		      uint64_t new_start, uint64_t new_n, uint64_t *new_vram_pages)  {  	unsigned char *new, *old, *pold;  	uint64_t d; @@ -922,11 +941,12 @@ svm_range_split_array(void *ppnew, void *ppold, size_t size,  		return 0;  	d = (new_start - old_start) * size; -	new = svm_range_copy_array(pold, size, new_n, d); +	/* get dma addr array for new range and calculte its vram page number */ +	new = svm_range_copy_array(pold, size, new_n, d, new_vram_pages);  	if (!new)  		return -ENOMEM;  	d = (new_start == old_start) ? new_n * size : 0; -	old = svm_range_copy_array(pold, size, old_n, d); +	old = svm_range_copy_array(pold, size, old_n, d, NULL);  	if (!old) {  		kvfree(new);  		return -ENOMEM; @@ -948,10 +968,13 @@ svm_range_split_pages(struct svm_range *new, struct svm_range *old,  	for (i = 0; i < MAX_GPU_INSTANCE; i++) {  		r = svm_range_split_array(&new->dma_addr[i], &old->dma_addr[i],  					  sizeof(*old->dma_addr[i]), old->start, -					  npages, new->start, new->npages); +					  npages, new->start, new->npages, +					  old->actual_loc ? &new->vram_pages : NULL);  		if (r)  			return r;  	} +	if (old->actual_loc) +		old->vram_pages -= new->vram_pages;  	return 0;  } @@ -1097,7 +1120,7 @@ static int  svm_range_split_tail(struct svm_range *prange, uint64_t new_last,  		     struct list_head *insert_list, struct list_head *remap_list)  { -	struct svm_range *tail; +	struct svm_range *tail = NULL;  	int r = svm_range_split(prange, prange->start, new_last, &tail);  	if (!r) { @@ -1112,7 +1135,7 @@ static int  svm_range_split_head(struct svm_range *prange, uint64_t new_start,  		     struct list_head *insert_list, struct list_head *remap_list)  { -	struct svm_range *head; +	struct svm_range *head = NULL;  	int r = svm_range_split(prange, new_start, prange->last, &head);  	if (!r) { @@ -1135,66 +1158,6 @@ svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,  	list_add_tail(&pchild->child_list, &prange->child_list);  } -/** - * svm_range_split_by_granularity - collect ranges within granularity boundary - * - * @p: the process with svms list - * @mm: mm structure - * @addr: the vm fault address in pages, to split the prange - * @parent: parent range if prange is from child list - * @prange: prange to split - * - * Trims @prange to be a single aligned block of prange->granularity if - * possible. The head and tail are added to the child_list in @parent. - * - * Context: caller must hold mmap_read_lock and prange->lock - * - * Return: - * 0 - OK, otherwise error code - */ -int -svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, -			       unsigned long addr, struct svm_range *parent, -			       struct svm_range *prange) -{ -	struct svm_range *head, *tail; -	unsigned long start, last, size; -	int r; - -	/* Align splited range start and size to granularity size, then a single -	 * PTE will be used for whole range, this reduces the number of PTE -	 * updated and the L1 TLB space used for translation. -	 */ -	size = 1UL << prange->granularity; -	start = ALIGN_DOWN(addr, size); -	last = ALIGN(addr + 1, size) - 1; - -	pr_debug("svms 0x%p split [0x%lx 0x%lx] to [0x%lx 0x%lx] size 0x%lx\n", -		 prange->svms, prange->start, prange->last, start, last, size); - -	if (start > prange->start) { -		r = svm_range_split(prange, start, prange->last, &head); -		if (r) -			return r; -		svm_range_add_child(parent, mm, head, SVM_OP_ADD_RANGE); -	} - -	if (last < prange->last) { -		r = svm_range_split(prange, prange->start, last, &tail); -		if (r) -			return r; -		svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE); -	} - -	/* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */ -	if (p->xnack_enabled && prange->work_item.op == SVM_OP_ADD_RANGE) { -		prange->work_item.op = SVM_OP_ADD_RANGE_AND_MAP; -		pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n", -			 prange, prange->start, prange->last, -			 SVM_OP_ADD_RANGE_AND_MAP); -	} -	return 0; -}  static bool  svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b)  { @@ -1529,7 +1492,7 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx, bool intr)  	uint32_t gpuidx;  	int r; -	drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0); +	drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0, 0);  	drm_exec_until_all_locked(&ctx->exec) {  		for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {  			pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx); @@ -1614,6 +1577,7 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)   * 5. Release page table (and SVM BO) reservation   */  static int svm_range_validate_and_map(struct mm_struct *mm, +				      unsigned long map_start, unsigned long map_last,  				      struct svm_range *prange, int32_t gpuidx,  				      bool intr, bool wait, bool flush_tlb)  { @@ -1694,10 +1658,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm,  		}  	} -	start = prange->start << PAGE_SHIFT; -	end = (prange->last + 1) << PAGE_SHIFT; +	start = map_start << PAGE_SHIFT; +	end = (map_last + 1) << PAGE_SHIFT;  	for (addr = start; !r && addr < end; ) {  		struct hmm_range *hmm_range; +		unsigned long map_start_vma; +		unsigned long map_last_vma;  		struct vm_area_struct *vma;  		unsigned long next = 0;  		unsigned long offset; @@ -1725,7 +1691,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,  		}  		if (!r) { -			offset = (addr - start) >> PAGE_SHIFT; +			offset = (addr >> PAGE_SHIFT) - prange->start;  			r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,  					      hmm_range->hmm_pfns);  			if (r) @@ -1743,9 +1709,16 @@ static int svm_range_validate_and_map(struct mm_struct *mm,  			r = -EAGAIN;  		} -		if (!r) -			r = svm_range_map_to_gpus(prange, offset, npages, readonly, -						  ctx->bitmap, wait, flush_tlb); +		if (!r) { +			map_start_vma = max(map_start, prange->start + offset); +			map_last_vma = min(map_last, prange->start + offset + npages - 1); +			if (map_start_vma <= map_last_vma) { +				offset = map_start_vma - prange->start; +				npages = map_last_vma - map_start_vma + 1; +				r = svm_range_map_to_gpus(prange, offset, npages, readonly, +							  ctx->bitmap, wait, flush_tlb); +			} +		}  		if (!r && next == end)  			prange->mapped_to_gpu = true; @@ -1838,8 +1811,8 @@ static void svm_range_restore_work(struct work_struct *work)  		 */  		mutex_lock(&prange->migrate_mutex); -		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, -					       false, true, false); +		r = svm_range_validate_and_map(mm, prange->start, prange->last, prange, +					       MAX_GPU_INSTANCE, false, true, false);  		if (r)  			pr_debug("failed %d to map 0x%lx to gpus\n", r,  				 prange->start); @@ -1876,7 +1849,7 @@ out_reschedule:  	/* If validation failed, reschedule another attempt */  	if (evicted_ranges) {  		pr_debug("reschedule to restore svm range\n"); -		schedule_delayed_work(&svms->restore_work, +		queue_delayed_work(system_freezable_wq, &svms->restore_work,  			msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));  		kfd_smi_event_queue_restore_rescheduled(mm); @@ -1952,7 +1925,7 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,  			pr_debug("failed to quiesce KFD\n");  		pr_debug("schedule to restore svm %p ranges\n", svms); -		schedule_delayed_work(&svms->restore_work, +		queue_delayed_work(system_freezable_wq, &svms->restore_work,  			msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));  	} else {  		unsigned long s, l; @@ -2007,6 +1980,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old)  	new->actual_loc = old->actual_loc;  	new->granularity = old->granularity;  	new->mapped_to_gpu = old->mapped_to_gpu; +	new->vram_pages = old->vram_pages;  	bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);  	bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); @@ -2914,6 +2888,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,  			uint32_t vmid, uint32_t node_id,  			uint64_t addr, bool write_fault)  { +	unsigned long start, last, size;  	struct mm_struct *mm = NULL;  	struct svm_range_list *svms;  	struct svm_range *prange; @@ -3049,40 +3024,44 @@ retry_write_locked:  	kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,  				       write_fault, timestamp); -	if (prange->actual_loc != best_loc) { +	/* Align migration range start and size to granularity size */ +	size = 1UL << prange->granularity; +	start = max_t(unsigned long, ALIGN_DOWN(addr, size), prange->start); +	last = min_t(unsigned long, ALIGN(addr + 1, size) - 1, prange->last); +	if (prange->actual_loc != 0 || best_loc != 0) {  		migration = true; +  		if (best_loc) { -			r = svm_migrate_to_vram(prange, best_loc, mm, -					KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU); +			r = svm_migrate_to_vram(prange, best_loc, start, last, +					mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);  			if (r) {  				pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",  					 r, addr);  				/* Fallback to system memory if migration to  				 * VRAM failed  				 */ -				if (prange->actual_loc) -					r = svm_migrate_vram_to_ram(prange, mm, -					   KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, -					   NULL); +				if (prange->actual_loc && prange->actual_loc != best_loc) +					r = svm_migrate_vram_to_ram(prange, mm, start, last, +						KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL);  				else  					r = 0;  			}  		} else { -			r = svm_migrate_vram_to_ram(prange, mm, -					KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, -					NULL); +			r = svm_migrate_vram_to_ram(prange, mm, start, last, +					KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL);  		}  		if (r) {  			pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n", -				 r, svms, prange->start, prange->last); +				 r, svms, start, last);  			goto out_unlock_range;  		}  	} -	r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false); +	r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false, +				       false, false);  	if (r)  		pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n", -			 r, svms, prange->start, prange->last); +			 r, svms, start, last);  	kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,  				     migration); @@ -3428,18 +3407,24 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,  	*migrated = false;  	best_loc = svm_range_best_prefetch_location(prange); -	if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED || -	    best_loc == prange->actual_loc) +	/* when best_loc is a gpu node and same as prange->actual_loc +	 * we still need do migration as prange->actual_loc !=0 does +	 * not mean all pages in prange are vram. hmm migrate will pick +	 * up right pages during migration. +	 */ +	if ((best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED) || +	    (best_loc == 0 && prange->actual_loc == 0))  		return 0;  	if (!best_loc) { -		r = svm_migrate_vram_to_ram(prange, mm, +		r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last,  					KFD_MIGRATE_TRIGGER_PREFETCH, NULL);  		*migrated = !r;  		return r;  	} -	r = svm_migrate_to_vram(prange, best_loc, mm, KFD_MIGRATE_TRIGGER_PREFETCH); +	r = svm_migrate_to_vram(prange, best_loc, prange->start, prange->last, +				mm, KFD_MIGRATE_TRIGGER_PREFETCH);  	*migrated = !r;  	return r; @@ -3494,7 +3479,11 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)  		mutex_lock(&prange->migrate_mutex);  		do { +			/* migrate all vram pages in this prange to sys ram +			 * after that prange->actual_loc should be zero +			 */  			r = svm_migrate_vram_to_ram(prange, mm, +					prange->start, prange->last,  					KFD_MIGRATE_TRIGGER_TTM_EVICTION, NULL);  		} while (!r && prange->actual_loc && --retries); @@ -3618,8 +3607,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,  		flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu; -		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, -					       true, true, flush_tlb); +		r = svm_range_validate_and_map(mm, prange->start, prange->last, prange, +					       MAX_GPU_INSTANCE, true, true, flush_tlb);  		if (r)  			pr_debug("failed %d to map svm range\n", r); @@ -3633,8 +3622,8 @@ out_unlock_range:  		pr_debug("Remapping prange 0x%p [0x%lx 0x%lx]\n",  			 prange, prange->start, prange->last);  		mutex_lock(&prange->migrate_mutex); -		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, -					       true, true, prange->mapped_to_gpu); +		r = svm_range_validate_and_map(mm,  prange->start, prange->last, prange, +					       MAX_GPU_INSTANCE, true, true, prange->mapped_to_gpu);  		if (r)  			pr_debug("failed %d on remap svm range\n", r);  		mutex_unlock(&prange->migrate_mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index c528df1d0ba2..026863a0abcd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -78,6 +78,7 @@ struct svm_work_list_item {   * @update_list:link list node used to add to update_list   * @mapping:    bo_va mapping structure to create and update GPU page table   * @npages:     number of pages + * @vram_pages: vram pages number in this svm_range   * @dma_addr:   dma mapping address on each GPU for system memory physical page   * @ttm_res:    vram ttm resource map   * @offset:     range start offset within mm_nodes @@ -88,7 +89,9 @@ struct svm_work_list_item {   * @flags:      flags defined as KFD_IOCTL_SVM_FLAG_*   * @perferred_loc: perferred location, 0 for CPU, or GPU id   * @perfetch_loc: last prefetch location, 0 for CPU, or GPU id - * @actual_loc: the actual location, 0 for CPU, or GPU id + * @actual_loc: this svm_range location. 0: all pages are from sys ram; + *              GPU id: this svm_range may include vram pages from GPU with + *              id actual_loc.   * @granularity:migration granularity, log2 num pages   * @invalid:    not 0 means cpu page table is invalidated   * @validate_timestamp: system timestamp when range is validated @@ -112,6 +115,7 @@ struct svm_range {  	struct list_head		list;  	struct list_head		update_list;  	uint64_t			npages; +	uint64_t			vram_pages;  	dma_addr_t			*dma_addr[MAX_GPU_INSTANCE];  	struct ttm_resource		*ttm_res;  	uint64_t			offset; @@ -168,9 +172,6 @@ struct kfd_node *svm_range_get_node_by_id(struct svm_range *prange,  int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,  			    bool clear);  void svm_range_vram_node_free(struct svm_range *prange); -int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, -			       unsigned long addr, struct svm_range *parent, -			       struct svm_range *prange);  int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,  			    uint32_t vmid, uint32_t node_id, uint64_t addr,  			    bool write_fault); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 057284bf50bb..e5f7c92eebcb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1342,10 +1342,11 @@ static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int g  		num_cpu++;  	} +	if (list_empty(&kdev->io_link_props)) +		return -ENODATA; +  	gpu_link = list_first_entry(&kdev->io_link_props, -					struct kfd_iolink_properties, list); -	if (!gpu_link) -		return -ENOMEM; +				    struct kfd_iolink_properties, list);  	for (i = 0; i < num_cpu; i++) {  		/* CPU <--> GPU */ @@ -1423,15 +1424,17 @@ static int kfd_add_peer_prop(struct kfd_topology_device *kdev,  				peer->gpu->adev))  		return ret; +	if (list_empty(&kdev->io_link_props)) +		return -ENODATA; +  	iolink1 = list_first_entry(&kdev->io_link_props, -							struct kfd_iolink_properties, list); -	if (!iolink1) -		return -ENOMEM; +				   struct kfd_iolink_properties, list); + +	if (list_empty(&peer->io_link_props)) +		return -ENODATA;  	iolink2 = list_first_entry(&peer->io_link_props, -							struct kfd_iolink_properties, list); -	if (!iolink2) -		return -ENOMEM; +				   struct kfd_iolink_properties, list);  	props = kfd_alloc_struct(props);  	if (!props) @@ -1449,17 +1452,19 @@ static int kfd_add_peer_prop(struct kfd_topology_device *kdev,  		/* CPU->CPU  link*/  		cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to);  		if (cpu_dev) { -			list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) -				if (iolink3->node_to == iolink2->node_to) -					break; - -			props->weight += iolink3->weight; -			props->min_latency += iolink3->min_latency; -			props->max_latency += iolink3->max_latency; -			props->min_bandwidth = min(props->min_bandwidth, -							iolink3->min_bandwidth); -			props->max_bandwidth = min(props->max_bandwidth, -							iolink3->max_bandwidth); +			list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) { +				if (iolink3->node_to != iolink2->node_to) +					continue; + +				props->weight += iolink3->weight; +				props->min_latency += iolink3->min_latency; +				props->max_latency += iolink3->max_latency; +				props->min_bandwidth = min(props->min_bandwidth, +							   iolink3->min_bandwidth); +				props->max_bandwidth = min(props->max_bandwidth, +							   iolink3->max_bandwidth); +				break; +			}  		} else {  			WARN(1, "CPU node not found");  		} | 
