summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLaurent Morichetti <laurent.morichetti@amd.com>2024-02-16 20:16:41 -0800
committerAlex Deucher <alexander.deucher@amd.com>2024-03-06 15:24:50 -0500
commit45bbf800c5f933de0002b26a44ff04f569247964 (patch)
tree43e497a9385c4ae80003d26141cb92706e045a28
parentf36e3f7260ac60ac8049e6ab1732fabeff334cf5 (diff)
downloadlinux-stable-45bbf800c5f933de0002b26a44ff04f569247964.tar.gz
linux-stable-45bbf800c5f933de0002b26a44ff04f569247964.tar.bz2
linux-stable-45bbf800c5f933de0002b26a44ff04f569247964.zip
drm/amdkfd: Use SQC when TCP would fail in gfx10.1 context save
Similarly to gfx9, gfx10.1 drops vector stores when an xnack error is raised. To work around this issue, use scalar stores instead of vector stores when trapsts.xnack_error == 1. Signed-off-by: Laurent Morichetti <laurent.morichetti@amd.com> Reviewed-by: Jay Cornwall <jay.cornwall@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h543
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm156
2 files changed, 530 insertions, 169 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 2e9b64edb8d2..5a0308d26b53 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -678,7 +678,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
};
static const uint32_t cwsr_trap_nv1x_hex[] = {
- 0xbf820001, 0xbf8201f5,
+ 0xbf820001, 0xbf820394,
0xb0804004, 0xb978f802,
0x8a78ff78, 0x00020006,
0xb97bf803, 0x876eff78,
@@ -769,13 +769,90 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x877c817c, 0xbf06817c,
0xbf850002, 0xbeff0380,
0xbf820002, 0xbeff03c1,
- 0xbf82000b, 0xbef603ff,
- 0x01000000, 0xe0704000,
- 0x705d0000, 0xe0704080,
- 0x705d0100, 0xe0704100,
- 0x705d0200, 0xe0704180,
- 0x705d0300, 0xbf82000a,
- 0xbef603ff, 0x01000000,
+ 0xbf820058, 0xbef603ff,
+ 0x01000000, 0xb97af803,
+ 0x8a7a7aff, 0x10000000,
+ 0xbf850049, 0xbe840380,
+ 0xd7600000, 0x00000900,
+ 0x80048104, 0xd7600001,
+ 0x00000900, 0x80048104,
+ 0xd7600002, 0x00000900,
+ 0x80048104, 0xd7600003,
+ 0x00000900, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06a004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000901,
+ 0x80048104, 0xd7600001,
+ 0x00000901, 0x80048104,
+ 0xd7600002, 0x00000901,
+ 0x80048104, 0xd7600003,
+ 0x00000901, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06a004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000902,
+ 0x80048104, 0xd7600001,
+ 0x00000902, 0x80048104,
+ 0xd7600002, 0x00000902,
+ 0x80048104, 0xd7600003,
+ 0x00000902, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06a004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000903,
+ 0x80048104, 0xd7600001,
+ 0x00000903, 0x80048104,
+ 0xd7600002, 0x00000903,
+ 0x80048104, 0xd7600003,
+ 0x00000903, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06a004,
+ 0xbf84ffef, 0xbf820060,
+ 0xe0704000, 0x705d0000,
+ 0xe0704080, 0x705d0100,
+ 0xe0704100, 0x705d0200,
+ 0xe0704180, 0x705d0300,
+ 0xbf820057, 0xbef603ff,
+ 0x01000000, 0xb97af803,
+ 0x8a7a7aff, 0x10000000,
+ 0xbf850049, 0xbe840380,
+ 0xd7600000, 0x00000900,
+ 0x80048104, 0xd7600001,
+ 0x00000900, 0x80048104,
+ 0xd7600002, 0x00000900,
+ 0x80048104, 0xd7600003,
+ 0x00000900, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000901,
+ 0x80048104, 0xd7600001,
+ 0x00000901, 0x80048104,
+ 0xd7600002, 0x00000901,
+ 0x80048104, 0xd7600003,
+ 0x00000901, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000902,
+ 0x80048104, 0xd7600001,
+ 0x00000902, 0x80048104,
+ 0xd7600002, 0x00000902,
+ 0x80048104, 0xd7600003,
+ 0x00000902, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000903,
+ 0x80048104, 0xd7600001,
+ 0x00000903, 0x80048104,
+ 0xd7600002, 0x00000903,
+ 0x80048104, 0xd7600003,
+ 0x00000903, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0xbf820008,
0xe0704000, 0x705d0000,
0xe0704100, 0x705d0100,
0xe0704200, 0x705d0200,
@@ -855,9 +932,9 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbf850002, 0xbeff0380,
0xbf820001, 0xbeff03c1,
0xb97b4306, 0x877bc17b,
- 0xbf840044, 0xbf8a0000,
+ 0xbf840086, 0xbf8a0000,
0x877aff6d, 0x80000000,
- 0xbf840040, 0x8f7b867b,
+ 0xbf840082, 0x8f7b867b,
0x8f7b827b, 0xbef6037b,
0xb9703a05, 0x80708170,
0xbf0d9973, 0xbf850002,
@@ -871,16 +948,49 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xd7660000, 0x000200c1,
0x16000084, 0x907c9973,
0x877c817c, 0xbf06817c,
- 0xbefc0380, 0xbf850012,
- 0xbe8303ff, 0x00000080,
+ 0xbefc0380, 0xbf850033,
+ 0xb97af803, 0x8a7a7aff,
+ 0x10000000, 0xbf85001d,
+ 0xd8d80000, 0x01000000,
+ 0xbf8c0000, 0xbe840380,
+ 0xd7600000, 0x00000901,
+ 0x80048104, 0xd7600001,
+ 0x00000901, 0x80048104,
+ 0xd7600002, 0x00000901,
+ 0x80048104, 0xd7600003,
+ 0x00000901, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06a004,
+ 0xbf84ffef, 0x807cff7c,
+ 0x00000080, 0xd5250000,
+ 0x0001ff00, 0x00000080,
+ 0xbf0a7b7c, 0xbf85ffe4,
+ 0xbf820044, 0xbe8303ff,
+ 0x00000080, 0xbf800000,
0xbf800000, 0xbf800000,
- 0xbf800000, 0xd8d80000,
+ 0xd8d80000, 0x01000000,
+ 0xbf8c0000, 0xe0704000,
+ 0x705d0100, 0x807c037c,
+ 0x80700370, 0xd5250000,
+ 0x0001ff00, 0x00000080,
+ 0xbf0a7b7c, 0xbf85fff4,
+ 0xbf820032, 0xb97af803,
+ 0x8a7a7aff, 0x10000000,
+ 0xbf85001d, 0xd8d80000,
0x01000000, 0xbf8c0000,
- 0xe0704000, 0x705d0100,
- 0x807c037c, 0x80700370,
+ 0xbe840380, 0xd7600000,
+ 0x00000901, 0x80048104,
+ 0xd7600001, 0x00000901,
+ 0x80048104, 0xd7600002,
+ 0x00000901, 0x80048104,
+ 0xd7600003, 0x00000901,
+ 0x80048104, 0xf469003a,
+ 0xe0000000, 0x80709070,
+ 0xbf06c004, 0xbf84ffef,
+ 0x807cff7c, 0x00000100,
0xd5250000, 0x0001ff00,
- 0x00000080, 0xbf0a7b7c,
- 0xbf85fff4, 0xbf820011,
+ 0x00000100, 0xbf0a7b7c,
+ 0xbf85ffe4, 0xbf820011,
0xbe8303ff, 0x00000100,
0xbf800000, 0xbf800000,
0xbf800000, 0xd8d80000,
@@ -898,10 +1008,52 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbeff03c1, 0xb97b3a05,
0x807b817b, 0x8f7b827b,
0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf850017,
+ 0xbf06817c, 0xbf85006b,
0xbef603ff, 0x01000000,
0xbefc0384, 0xbf0a7b7c,
- 0xbf840037, 0x7e008700,
+ 0xbf8400fa, 0xb97af803,
+ 0x8a7a7aff, 0x10000000,
+ 0xbf850050, 0x7e008700,
+ 0x7e028701, 0x7e048702,
+ 0x7e068703, 0xbe840380,
+ 0xd7600000, 0x00000900,
+ 0x80048104, 0xd7600001,
+ 0x00000900, 0x80048104,
+ 0xd7600002, 0x00000900,
+ 0x80048104, 0xd7600003,
+ 0x00000900, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06a004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000901,
+ 0x80048104, 0xd7600001,
+ 0x00000901, 0x80048104,
+ 0xd7600002, 0x00000901,
+ 0x80048104, 0xd7600003,
+ 0x00000901, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06a004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000902,
+ 0x80048104, 0xd7600001,
+ 0x00000902, 0x80048104,
+ 0xd7600002, 0x00000902,
+ 0x80048104, 0xd7600003,
+ 0x00000902, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06a004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000903,
+ 0x80048104, 0xd7600001,
+ 0x00000903, 0x80048104,
+ 0xd7600002, 0x00000903,
+ 0x80048104, 0xd7600003,
+ 0x00000903, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06a004,
+ 0xbf84ffef, 0x807c847c,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf8200a6, 0x7e008700,
0x7e028701, 0x7e048702,
0x7e068703, 0xe0704000,
0x705d0000, 0xe0704080,
@@ -910,9 +1062,51 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x705d0300, 0x807c847c,
0x8070ff70, 0x00000200,
0xbf0a7b7c, 0xbf85ffef,
- 0xbf820025, 0xbef603ff,
+ 0xbf820094, 0xbef603ff,
0x01000000, 0xbefc0384,
- 0xbf0a7b7c, 0xbf840011,
+ 0xbf0a7b7c, 0xbf840065,
+ 0xb97af803, 0x8a7a7aff,
+ 0x10000000, 0xbf850050,
+ 0x7e008700, 0x7e028701,
+ 0x7e048702, 0x7e068703,
+ 0xbe840380, 0xd7600000,
+ 0x00000900, 0x80048104,
+ 0xd7600001, 0x00000900,
+ 0x80048104, 0xd7600002,
+ 0x00000900, 0x80048104,
+ 0xd7600003, 0x00000900,
+ 0x80048104, 0xf469003a,
+ 0xe0000000, 0x80709070,
+ 0xbf06c004, 0xbf84ffef,
+ 0xbe840380, 0xd7600000,
+ 0x00000901, 0x80048104,
+ 0xd7600001, 0x00000901,
+ 0x80048104, 0xd7600002,
+ 0x00000901, 0x80048104,
+ 0xd7600003, 0x00000901,
+ 0x80048104, 0xf469003a,
+ 0xe0000000, 0x80709070,
+ 0xbf06c004, 0xbf84ffef,
+ 0xbe840380, 0xd7600000,
+ 0x00000902, 0x80048104,
+ 0xd7600001, 0x00000902,
+ 0x80048104, 0xd7600002,
+ 0x00000902, 0x80048104,
+ 0xd7600003, 0x00000902,
+ 0x80048104, 0xf469003a,
+ 0xe0000000, 0x80709070,
+ 0xbf06c004, 0xbf84ffef,
+ 0xbe840380, 0xd7600000,
+ 0x00000903, 0x80048104,
+ 0xd7600001, 0x00000903,
+ 0x80048104, 0xd7600002,
+ 0x00000903, 0x80048104,
+ 0xd7600003, 0x00000903,
+ 0x80048104, 0xf469003a,
+ 0xe0000000, 0x80709070,
+ 0xbf06c004, 0xbf84ffef,
+ 0x807c847c, 0xbf0a7b7c,
+ 0xbf85ffb1, 0xbf82003b,
0x7e008700, 0x7e028701,
0x7e048702, 0x7e068703,
0xe0704000, 0x705d0000,
@@ -922,179 +1116,192 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x807c847c, 0x8070ff70,
0x00000400, 0xbf0a7b7c,
0xbf85ffef, 0xb97b1e06,
- 0x877bc17b, 0xbf84000c,
+ 0x877bc17b, 0xbf840027,
0x8f7b837b, 0x807b7c7b,
0xbefe03c1, 0xbeff0380,
- 0x7e008700, 0xe0704000,
- 0x705d0000, 0x807c817c,
- 0x8070ff70, 0x00000080,
- 0xbf0a7b7c, 0xbf85fff8,
- 0xbf820144, 0xbef4037e,
- 0x8775ff7f, 0x0000ffff,
- 0x8875ff75, 0x00040000,
- 0xbef60380, 0xbef703ff,
- 0x10807fac, 0xb97202dc,
- 0x8f729972, 0x876eff7f,
- 0x04000000, 0xbf840034,
+ 0xb97af803, 0x8a7a7aff,
+ 0x10000000, 0xbf850017,
+ 0x7e008700, 0xbe840380,
+ 0xd7600000, 0x00000900,
+ 0x80048104, 0xd7600001,
+ 0x00000900, 0x80048104,
+ 0xd7600002, 0x00000900,
+ 0x80048104, 0xd7600003,
+ 0x00000900, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0x807c817c,
+ 0xbf0a7b7c, 0xbf85ffea,
+ 0xbf820008, 0x7e008700,
+ 0xe0704000, 0x705d0000,
+ 0x807c817c, 0x8070ff70,
+ 0x00000080, 0xbf0a7b7c,
+ 0xbf85fff8, 0xbf820144,
+ 0xbef4037e, 0x8775ff7f,
+ 0x0000ffff, 0x8875ff75,
+ 0x00040000, 0xbef60380,
+ 0xbef703ff, 0x10807fac,
+ 0xb97202dc, 0x8f729972,
+ 0x876eff7f, 0x04000000,
+ 0xbf840034, 0xbefe03c1,
+ 0x907c9972, 0x877c817c,
+ 0xbf06817c, 0xbf850002,
+ 0xbeff0380, 0xbf820001,
+ 0xbeff03c1, 0xb96f4306,
+ 0x876fc16f, 0xbf840029,
+ 0x8f6f866f, 0x8f6f826f,
+ 0xbef6036f, 0xb9783a05,
+ 0x80788178, 0xbf0d9972,
+ 0xbf850002, 0x8f788978,
+ 0xbf820001, 0x8f788a78,
+ 0xb96e1e06, 0x8f6e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0x8078ff78,
+ 0x00000080, 0xbef603ff,
+ 0x01000000, 0x907c9972,
+ 0x877c817c, 0xbf06817c,
+ 0xbefc0380, 0xbf850009,
+ 0xe0310000, 0x781d0000,
+ 0x807cff7c, 0x00000080,
+ 0x8078ff78, 0x00000080,
+ 0xbf0a6f7c, 0xbf85fff8,
+ 0xbf820008, 0xe0310000,
+ 0x781d0000, 0x807cff7c,
+ 0x00000100, 0x8078ff78,
+ 0x00000100, 0xbf0a6f7c,
+ 0xbf85fff8, 0xbef80380,
0xbefe03c1, 0x907c9972,
0x877c817c, 0xbf06817c,
0xbf850002, 0xbeff0380,
0xbf820001, 0xbeff03c1,
- 0xb96f4306, 0x876fc16f,
- 0xbf840029, 0x8f6f866f,
- 0x8f6f826f, 0xbef6036f,
- 0xb9783a05, 0x80788178,
- 0xbf0d9972, 0xbf850002,
- 0x8f788978, 0xbf820001,
- 0x8f788a78, 0xb96e1e06,
- 0x8f6e8a6e, 0x80786e78,
+ 0xb96f3a05, 0x806f816f,
+ 0x8f6f826f, 0x907c9972,
+ 0x877c817c, 0xbf06817c,
+ 0xbf850024, 0xbef603ff,
+ 0x01000000, 0xbeee0378,
0x8078ff78, 0x00000200,
- 0x8078ff78, 0x00000080,
- 0xbef603ff, 0x01000000,
- 0x907c9972, 0x877c817c,
- 0xbf06817c, 0xbefc0380,
- 0xbf850009, 0xe0310000,
- 0x781d0000, 0x807cff7c,
- 0x00000080, 0x8078ff78,
- 0x00000080, 0xbf0a6f7c,
- 0xbf85fff8, 0xbf820008,
- 0xe0310000, 0x781d0000,
- 0x807cff7c, 0x00000100,
- 0x8078ff78, 0x00000100,
- 0xbf0a6f7c, 0xbf85fff8,
- 0xbef80380, 0xbefe03c1,
- 0x907c9972, 0x877c817c,
- 0xbf06817c, 0xbf850002,
- 0xbeff0380, 0xbf820001,
- 0xbeff03c1, 0xb96f3a05,
- 0x806f816f, 0x8f6f826f,
- 0x907c9972, 0x877c817c,
- 0xbf06817c, 0xbf850024,
- 0xbef603ff, 0x01000000,
- 0xbeee0378, 0x8078ff78,
- 0x00000200, 0xbefc0384,
- 0xbf0a6f7c, 0xbf840050,
+ 0xbefc0384, 0xbf0a6f7c,
+ 0xbf840050, 0xe0304000,
+ 0x785d0000, 0xe0304080,
+ 0x785d0100, 0xe0304100,
+ 0x785d0200, 0xe0304180,
+ 0x785d0300, 0xbf8c3f70,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807c847c, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7c,
+ 0xbf85ffee, 0xe0304000,
+ 0x6e5d0000, 0xe0304080,
+ 0x6e5d0100, 0xe0304100,
+ 0x6e5d0200, 0xe0304180,
+ 0x6e5d0300, 0xbf8c3f70,
+ 0xbf820034, 0xbef603ff,
+ 0x01000000, 0xbeee0378,
+ 0x8078ff78, 0x00000400,
+ 0xbefc0384, 0xbf0a6f7c,
+ 0xbf840012, 0xe0304000,
+ 0x785d0000, 0xe0304100,
+ 0x785d0100, 0xe0304200,
+ 0x785d0200, 0xe0304300,
+ 0x785d0300, 0xbf8c3f70,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807c847c, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7c,
+ 0xbf85ffee, 0xb96f1e06,
+ 0x876fc16f, 0xbf84000e,
+ 0x8f6f836f, 0x806f7c6f,
+ 0xbefe03c1, 0xbeff0380,
0xe0304000, 0x785d0000,
- 0xe0304080, 0x785d0100,
- 0xe0304100, 0x785d0200,
- 0xe0304180, 0x785d0300,
0xbf8c3f70, 0x7e008500,
- 0x7e028501, 0x7e048502,
- 0x7e068503, 0x807c847c,
- 0x8078ff78, 0x00000200,
- 0xbf0a6f7c, 0xbf85ffee,
+ 0x807c817c, 0x8078ff78,
+ 0x00000080, 0xbf0a6f7c,
+ 0xbf85fff7, 0xbeff03c1,
0xe0304000, 0x6e5d0000,
- 0xe0304080, 0x6e5d0100,
- 0xe0304100, 0x6e5d0200,
- 0xe0304180, 0x6e5d0300,
- 0xbf8c3f70, 0xbf820034,
- 0xbef603ff, 0x01000000,
- 0xbeee0378, 0x8078ff78,
- 0x00000400, 0xbefc0384,
- 0xbf0a6f7c, 0xbf840012,
- 0xe0304000, 0x785d0000,
- 0xe0304100, 0x785d0100,
- 0xe0304200, 0x785d0200,
- 0xe0304300, 0x785d0300,
- 0xbf8c3f70, 0x7e008500,
- 0x7e028501, 0x7e048502,
- 0x7e068503, 0x807c847c,
- 0x8078ff78, 0x00000400,
- 0xbf0a6f7c, 0xbf85ffee,
- 0xb96f1e06, 0x876fc16f,
- 0xbf84000e, 0x8f6f836f,
- 0x806f7c6f, 0xbefe03c1,
- 0xbeff0380, 0xe0304000,
- 0x785d0000, 0xbf8c3f70,
- 0x7e008500, 0x807c817c,
- 0x8078ff78, 0x00000080,
- 0xbf0a6f7c, 0xbf85fff7,
- 0xbeff03c1, 0xe0304000,
- 0x6e5d0000, 0xe0304100,
- 0x6e5d0100, 0xe0304200,
- 0x6e5d0200, 0xe0304300,
- 0x6e5d0300, 0xbf8c3f70,
+ 0xe0304100, 0x6e5d0100,
+ 0xe0304200, 0x6e5d0200,
+ 0xe0304300, 0x6e5d0300,
+ 0xbf8c3f70, 0xb9783a05,
+ 0x80788178, 0xbf0d9972,
+ 0xbf850002, 0x8f788978,
+ 0xbf820001, 0x8f788a78,
+ 0xb96e1e06, 0x8f6e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0x80f8ff78,
+ 0x00000050, 0xbef603ff,
+ 0x01000000, 0xbefc03ff,
+ 0x0000006c, 0x80f89078,
+ 0xf429003a, 0xf0000000,
+ 0xbf8cc07f, 0x80fc847c,
+ 0xbf800000, 0xbe803100,
+ 0xbe823102, 0x80f8a078,
+ 0xf42d003a, 0xf0000000,
+ 0xbf8cc07f, 0x80fc887c,
+ 0xbf800000, 0xbe803100,
+ 0xbe823102, 0xbe843104,
+ 0xbe863106, 0x80f8c078,
+ 0xf431003a, 0xf0000000,
+ 0xbf8cc07f, 0x80fc907c,
+ 0xbf800000, 0xbe803100,
+ 0xbe823102, 0xbe843104,
+ 0xbe863106, 0xbe883108,
+ 0xbe8a310a, 0xbe8c310c,
+ 0xbe8e310e, 0xbf06807c,
+ 0xbf84fff0, 0xba80f801,
+ 0x00000000, 0xbf8a0000,
0xb9783a05, 0x80788178,
0xbf0d9972, 0xbf850002,
0x8f788978, 0xbf820001,
0x8f788a78, 0xb96e1e06,
0x8f6e8a6e, 0x80786e78,
0x8078ff78, 0x00000200,
- 0x80f8ff78, 0x00000050,
0xbef603ff, 0x01000000,
- 0xbefc03ff, 0x0000006c,
- 0x80f89078, 0xf429003a,
- 0xf0000000, 0xbf8cc07f,
- 0x80fc847c, 0xbf800000,
- 0xbe803100, 0xbe823102,
- 0x80f8a078, 0xf42d003a,
- 0xf0000000, 0xbf8cc07f,
- 0x80fc887c, 0xbf800000,
- 0xbe803100, 0xbe823102,
- 0xbe843104, 0xbe863106,
- 0x80f8c078, 0xf431003a,
- 0xf0000000, 0xbf8cc07f,
- 0x80fc907c, 0xbf800000,
- 0xbe803100, 0xbe823102,
- 0xbe843104, 0xbe863106,
- 0xbe883108, 0xbe8a310a,
- 0xbe8c310c, 0xbe8e310e,
- 0xbf06807c, 0xbf84fff0,
- 0xba80f801, 0x00000000,
- 0xbf8a0000, 0xb9783a05,
- 0x80788178, 0xbf0d9972,
- 0xbf850002, 0x8f788978,
- 0xbf820001, 0x8f788a78,
- 0xb96e1e06, 0x8f6e8a6e,
- 0x80786e78, 0x8078ff78,
- 0x00000200, 0xbef603ff,
- 0x01000000, 0xf4211bfa,
+ 0xf4211bfa, 0xf0000000,
+ 0x80788478, 0xf4211b3a,
0xf0000000, 0x80788478,
- 0xf4211b3a, 0xf0000000,
- 0x80788478, 0xf4211b7a,
+ 0xf4211b7a, 0xf0000000,
+ 0x80788478, 0xf4211c3a,
0xf0000000, 0x80788478,
- 0xf4211c3a, 0xf0000000,
- 0x80788478, 0xf4211c7a,
+ 0xf4211c7a, 0xf0000000,
+ 0x80788478, 0xf4211eba,
0xf0000000, 0x80788478,
- 0xf4211eba, 0xf0000000,
- 0x80788478, 0xf4211efa,
+ 0xf4211efa, 0xf0000000,
+ 0x80788478, 0xf4211e7a,
0xf0000000, 0x80788478,
- 0xf4211e7a, 0xf0000000,
- 0x80788478, 0xf4211cfa,
+ 0xf4211cfa, 0xf0000000,
+ 0x80788478, 0xf4211bba,
0xf0000000, 0x80788478,
+ 0xbf8cc07f, 0xb9eef814,
0xf4211bba, 0xf0000000,
0x80788478, 0xbf8cc07f,
- 0xb9eef814, 0xf4211bba,
- 0xf0000000, 0x80788478,
- 0xbf8cc07f, 0xb9eef815,
- 0xbefc036f, 0xbefe0370,
- 0xbeff0371, 0x876f7bff,
- 0x000003ff, 0xb9ef4803,
- 0xb9f9f816, 0x876f7bff,
- 0xfffff800, 0x906f8b6f,
- 0xb9efa2c3, 0xb9f3f801,
- 0xb96e3a05, 0x806e816e,
- 0xbf0d9972, 0xbf850002,
- 0x8f6e896e, 0xbf820001,
- 0x8f6e8a6e, 0xb96f1e06,
- 0x8f6f8a6f, 0x806e6f6e,
- 0x806eff6e, 0x00000200,
- 0x806e746e, 0x826f8075,
- 0x876fff6f, 0x0000ffff,
- 0xf4091c37, 0xfa000050,
- 0xf4091d37, 0xfa000060,
- 0xf4011e77, 0xfa000074,
- 0xbf8cc07f, 0x906e8977,
- 0x876fff6e, 0x003f8000,
- 0x906e8677, 0x876eff6e,
- 0x02000000, 0x886e6f6e,
- 0xb9eef807, 0x876dff6d,
- 0x0000ffff, 0x87fe7e7e,
- 0x87ea6a6a, 0xb9faf802,
- 0xbe80226c, 0xbf9b0000,
+ 0xb9eef815, 0xbefc036f,
+ 0xbefe0370, 0xbeff0371,
+ 0x876f7bff, 0x000003ff,
+ 0xb9ef4803, 0xb9f9f816,
+ 0x876f7bff, 0xfffff800,
+ 0x906f8b6f, 0xb9efa2c3,
+ 0xb9f3f801, 0xb96e3a05,
+ 0x806e816e, 0xbf0d9972,
+ 0xbf850002, 0x8f6e896e,
+ 0xbf820001, 0x8f6e8a6e,
+ 0xb96f1e06, 0x8f6f8a6f,
+ 0x806e6f6e, 0x806eff6e,
+ 0x00000200, 0x806e746e,
+ 0x826f8075, 0x876fff6f,
+ 0x0000ffff, 0xf4091c37,
+ 0xfa000050, 0xf4091d37,
+ 0xfa000060, 0xf4011e77,
+ 0xfa000074, 0xbf8cc07f,
+ 0x906e8977, 0x876fff6e,
+ 0x003f8000, 0x906e8677,
+ 0x876eff6e, 0x02000000,
+ 0x886e6f6e, 0xb9eef807,
+ 0x876dff6d, 0x0000ffff,
+ 0x87fe7e7e, 0x87ea6a6a,
+ 0xb9faf802, 0xbe80226c,
+ 0xbf9b0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
- 0xbf9f0000, 0x00000000,
};
static const uint32_t cwsr_trap_arcturus_hex[] = {
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index 7568ff3af978..e1aaa5ce0784 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -44,6 +44,7 @@
#define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO)
#define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO)
#define SW_SA_TRAP (ASIC_FAMILY >= CHIP_PLUM_BONITO)
+#define SAVE_AFTER_XNACK_ERROR (HAVE_XNACK && !NO_SQC_STORE) // workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger
var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
@@ -86,6 +87,7 @@ var SQ_WAVE_TRAPSTS_WAVE_START_MASK = 0x20000
var SQ_WAVE_TRAPSTS_WAVE_END_MASK = 0x40000
var SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK = 0x100000
#endif
+var SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK = 0x10000000
var SQ_WAVE_MODE_EXCP_EN_SHIFT = 12
var SQ_WAVE_MODE_EXCP_EN_ADDR_WATCH_SHIFT = 19
@@ -475,6 +477,16 @@ L_SAVE_4VGPR_WAVE32:
// VGPR Allocated in 4-GPR granularity
+#if SAVE_AFTER_XNACK_ERROR
+ check_if_tcp_store_ok()
+ s_cbranch_scc1 L_SAVE_FIRST_VGPRS32_WITH_TCP
+
+ write_vgprs_to_mem_with_sqc_w32(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
+ s_branch L_SAVE_HWREG
+
+L_SAVE_FIRST_VGPRS32_WITH_TCP:
+#endif
+
#if !NO_SQC_STORE
buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
#endif
@@ -488,6 +500,16 @@ L_SAVE_4VGPR_WAVE64:
// VGPR Allocated in 4-GPR granularity
+#if SAVE_AFTER_XNACK_ERROR
+ check_if_tcp_store_ok()
+ s_cbranch_scc1 L_SAVE_FIRST_VGPRS64_WITH_TCP
+
+ write_vgprs_to_mem_with_sqc_w64(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
+ s_branch L_SAVE_HWREG
+
+L_SAVE_FIRST_VGPRS64_WITH_TCP:
+#endif
+
#if !NO_SQC_STORE
buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
#endif
@@ -660,6 +682,26 @@ L_SAVE_LDS_NORMAL:
s_cbranch_scc1 L_SAVE_LDS_W64
L_SAVE_LDS_W32:
+#if SAVE_AFTER_XNACK_ERROR
+ check_if_tcp_store_ok()
+ s_cbranch_scc1 L_SAVE_LDS_WITH_TCP_W32
+
+L_SAVE_LDS_LOOP_SQC_W32:
+ ds_read_b32 v1, v0
+ s_waitcnt 0
+
+ write_vgprs_to_mem_with_sqc_w32(v1, 1, s_save_buf_rsrc0, s_save_mem_offset)
+
+ s_add_u32 m0, m0, 128 //every buffer_store_lds does 128 bytes
+ v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_LDS_LOOP_SQC_W32 //LDS save is complete?
+
+ s_branch L_SAVE_LDS_DONE
+
+L_SAVE_LDS_WITH_TCP_W32:
+#endif
+
s_mov_b32 s3, 128
s_nop 0
s_nop 0
@@ -669,7 +711,7 @@ L_SAVE_LDS_LOOP_W32:
s_waitcnt 0
buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
- s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes
+ s_add_u32 m0, m0, s3 //every buffer_store_lds does 128 bytes
s_add_u32 s_save_mem_offset, s_save_mem_offset, s3
v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes
s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
@@ -678,6 +720,26 @@ L_SAVE_LDS_LOOP_W32:
s_branch L_SAVE_LDS_DONE
L_SAVE_LDS_W64:
+#if SAVE_AFTER_XNACK_ERROR
+ check_if_tcp_store_ok()
+ s_cbranch_scc1 L_SAVE_LDS_WITH_TCP_W64
+
+L_SAVE_LDS_LOOP_SQC_W64:
+ ds_read_b32 v1, v0
+ s_waitcnt 0
+
+ write_vgprs_to_mem_with_sqc_w64(v1, 1, s_save_buf_rsrc0, s_save_mem_offset)
+
+ s_add_u32 m0, m0, 256 //every buffer_store_lds does 256 bytes
+ v_add_nc_u32 v0, v0, 256 //mem offset increased by 256 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_LDS_LOOP_SQC_W64 //LDS save is complete?
+
+ s_branch L_SAVE_LDS_DONE
+
+L_SAVE_LDS_WITH_TCP_W64:
+#endif
+
s_mov_b32 s3, 256
s_nop 0
s_nop 0
@@ -727,6 +789,25 @@ L_SAVE_VGPR_NORMAL:
s_cmp_lt_u32 m0, s_save_alloc_size
s_cbranch_scc0 L_SAVE_VGPR_END
+#if SAVE_AFTER_XNACK_ERROR
+ check_if_tcp_store_ok()
+ s_cbranch_scc1 L_SAVE_VGPR_W32_LOOP
+
+L_SAVE_VGPR_LOOP_SQC_W32:
+ v_movrels_b32 v0, v0 //v0 = v[0+m0]
+ v_movrels_b32 v1, v1 //v1 = v[1+m0]
+ v_movrels_b32 v2, v2 //v2 = v[2+m0]
+ v_movrels_b32 v3, v3 //v3 = v[3+m0]
+
+ write_vgprs_to_mem_with_sqc_w32(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
+
+ s_add_u32 m0, m0, 4
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc1 L_SAVE_VGPR_LOOP_SQC_W32
+
+ s_branch L_SAVE_VGPR_END
+#endif
+
L_SAVE_VGPR_W32_LOOP:
v_movrels_b32 v0, v0 //v0 = v[0+m0]
v_movrels_b32 v1, v1 //v1 = v[1+m0]
@@ -753,6 +834,25 @@ L_SAVE_VGPR_WAVE64:
s_cmp_lt_u32 m0, s_save_alloc_size
s_cbranch_scc0 L_SAVE_SHARED_VGPR
+#if SAVE_AFTER_XNACK_ERROR
+ check_if_tcp_store_ok()
+ s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP
+
+L_SAVE_VGPR_LOOP_SQC_W64:
+ v_movrels_b32 v0, v0 //v0 = v[0+m0]
+ v_movrels_b32 v1, v1 //v1 = v[1+m0]
+ v_movrels_b32 v2, v2 //v2 = v[2+m0]
+ v_movrels_b32 v3, v3 //v3 = v[3+m0]
+
+ write_vgprs_to_mem_with_sqc_w64(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
+
+ s_add_u32 m0, m0, 4
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc1 L_SAVE_VGPR_LOOP_SQC_W64
+
+ s_branch L_SAVE_VGPR_END
+#endif
+
L_SAVE_VGPR_W64_LOOP:
v_movrels_b32 v0, v0 //v0 = v[0+m0]
v_movrels_b32 v1, v1 //v1 = v[1+m0]
@@ -780,6 +880,23 @@ L_SAVE_SHARED_VGPR:
s_add_u32 s_save_alloc_size, s_save_alloc_size, m0
s_mov_b32 exec_lo, 0xFFFFFFFF
s_mov_b32 exec_hi, 0x00000000
+
+#if SAVE_AFTER_XNACK_ERROR
+ check_if_tcp_store_ok()
+ s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP
+
+L_SAVE_SHARED_VGPR_WAVE64_LOOP_SQC:
+ v_movrels_b32 v0, v0
+
+ write_vgprs_to_mem_with_sqc_w64(v0, 1, s_save_buf_rsrc0, s_save_mem_offset)
+
+ s_add_u32 m0, m0, 1
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP_SQC
+
+ s_branch L_SAVE_VGPR_END
+#endif
+
L_SAVE_SHARED_VGPR_WAVE64_LOOP:
v_movrels_b32 v0, v0 //v0 = v[0+m0]
buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
@@ -1190,6 +1307,43 @@ function read_4sgpr_from_mem(s, s_rsrc, s_mem_offset)
s_buffer_load_dwordx4 s, s_rsrc, s_mem_offset glc:1
end
+#if SAVE_AFTER_XNACK_ERROR
+function check_if_tcp_store_ok
+ // If TRAPSTS.XNACK_ERROR=1 then TCP stores will fail.
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_TRAPSTS)
+ s_andn2_b32 s_save_tmp, SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK, s_save_tmp
+
+L_TCP_STORE_CHECK_DONE:
+end
+
+function write_vgpr_to_mem_with_sqc(vgpr, n_lanes, s_rsrc, s_mem_offset)
+ s_mov_b32 s4, 0
+
+L_WRITE_VGPR_LANE_LOOP:
+ for var lane = 0; lane < 4; ++lane
+ v_readlane_b32 s[lane], vgpr, s4
+ s_add_u32 s4, s4, 1
+ end
+
+ s_buffer_store_dwordx4 s[0:3], s_rsrc, s_mem_offset glc:1
+
+ s_add_u32 s_mem_offset, s_mem_offset, 0x10
+ s_cmp_eq_u32 s4, n_lanes
+ s_cbranch_scc0 L_WRITE_VGPR_LANE_LOOP
+end
+
+function write_vgprs_to_mem_with_sqc_w32(vgpr0, n_vgprs, s_rsrc, s_mem_offset)
+ for var vgpr = 0; vgpr < n_vgprs; ++vgpr
+ write_vgpr_to_mem_with_sqc(vgpr0[vgpr], 32, s_rsrc, s_mem_offset)
+ end
+end
+
+function write_vgprs_to_mem_with_sqc_w64(vgpr0, n_vgprs, s_rsrc, s_mem_offset)
+ for var vgpr = 0; vgpr < n_vgprs; ++vgpr
+ write_vgpr_to_mem_with_sqc(vgpr0[vgpr], 64, s_rsrc, s_mem_offset)
+ end
+end
+#endif
function get_lds_size_bytes(s_lds_size_byte)
s_getreg_b32 s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)