Skip to content

Commit d6fb34c

Browse files
committed
AMDGPU: Add gfx950 subtarget definitions
Mostly a stub, but adds some baseline tests and tests for removed instructions.
1 parent 694f769 commit d6fb34c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+5148
-4519
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -710,6 +710,8 @@ Target Specific Changes
710710
AMDGPU Support
711711
^^^^^^^^^^^^^^
712712

713+
* Initial support for gfx950
714+
713715
X86 Support
714716
^^^^^^^^^^^
715717

clang/include/clang/Basic/Cuda.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ enum class OffloadArch {
107107
GFX940,
108108
GFX941,
109109
GFX942,
110+
GFX950,
110111
GFX10_1_GENERIC,
111112
GFX1010,
112113
GFX1011,

clang/lib/Basic/Cuda.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ static const OffloadArchToStringMap arch_names[] = {
125125
GFX(940), // gfx940
126126
GFX(941), // gfx941
127127
GFX(942), // gfx942
128+
GFX(950), // gfx950
128129
{OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"},
129130
GFX(1010), // gfx1010
130131
GFX(1011), // gfx1011

clang/lib/Basic/Targets/NVPTX.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
209209
case OffloadArch::GFX940:
210210
case OffloadArch::GFX941:
211211
case OffloadArch::GFX942:
212+
case OffloadArch::GFX950:
212213
case OffloadArch::GFX10_1_GENERIC:
213214
case OffloadArch::GFX1010:
214215
case OffloadArch::GFX1011:

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2305,6 +2305,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
23052305
case OffloadArch::GFX940:
23062306
case OffloadArch::GFX941:
23072307
case OffloadArch::GFX942:
2308+
case OffloadArch::GFX950:
23082309
case OffloadArch::GFX10_1_GENERIC:
23092310
case OffloadArch::GFX1010:
23102311
case OffloadArch::GFX1011:

clang/test/CodeGenOpenCL/amdgpu-features.cl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx940 -emit-llvm -o - %s | FileCheck --check-prefix=GFX940 %s
3333
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx941 -emit-llvm -o - %s | FileCheck --check-prefix=GFX941 %s
3434
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -emit-llvm -o - %s | FileCheck --check-prefix=GFX942 %s
35+
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx950 -emit-llvm -o - %s | FileCheck --check-prefix=GFX950 %s
3536
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s
3637
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1011 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s
3738
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s
@@ -88,6 +89,7 @@
8889
// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
8990
// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
9091
// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
92+
// GFX950: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
9193
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
9294
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
9395
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"

clang/test/Driver/amdgpu-macros.cl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@
110110
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx940 -DFAMILY=GFX9
111111
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx941 -DFAMILY=GFX9
112112
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx942 -DFAMILY=GFX9
113+
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx950 -DFAMILY=GFX9
113114
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1010 -DFAMILY=GFX10
114115
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1011 -DFAMILY=GFX10
115116
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1012 -DFAMILY=GFX10

clang/test/Driver/amdgpu-mcpu.cl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
// RUN: %clang -### -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefix=GFX940 %s
9696
// RUN: %clang -### -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefix=GFX941 %s
9797
// RUN: %clang -### -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefix=GFX942 %s
98+
// RUN: %clang -### -target amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --check-prefix=GFX950 %s
9899
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GFX1010 %s
99100
// RUN: %clang -### -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefix=GFX1011 %s
100101
// RUN: %clang -### -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefix=GFX1012 %s
@@ -150,6 +151,7 @@
150151
// GFX940: "-target-cpu" "gfx940"
151152
// GFX941: "-target-cpu" "gfx941"
152153
// GFX942: "-target-cpu" "gfx942"
154+
// GFX950: "-target-cpu" "gfx950"
153155
// GFX1010: "-target-cpu" "gfx1010"
154156
// GFX1011: "-target-cpu" "gfx1011"
155157
// GFX1012: "-target-cpu" "gfx1012"

clang/test/Misc/target-invalid-cpu-note/amdgcn.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
// CHECK-SAME: {{^}}, gfx940
4949
// CHECK-SAME: {{^}}, gfx941
5050
// CHECK-SAME: {{^}}, gfx942
51+
// CHECK-SAME: {{^}}, gfx950
5152
// CHECK-SAME: {{^}}, gfx1010
5253
// CHECK-SAME: {{^}}, gfx1011
5354
// CHECK-SAME: {{^}}, gfx1012

clang/test/Misc/target-invalid-cpu-note/nvptx.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
// CHECK-SAME: {{^}}, gfx940
5555
// CHECK-SAME: {{^}}, gfx941
5656
// CHECK-SAME: {{^}}, gfx942
57+
// CHECK-SAME: {{^}}, gfx950
5758
// CHECK-SAME: {{^}}, gfx10-1-generic
5859
// CHECK-SAME: {{^}}, gfx1010
5960
// CHECK-SAME: {{^}}, gfx1011

llvm/docs/AMDGPUUsage.rst

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,13 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
399399
work-item
400400
IDs
401401

402+
``gfx950`` ``amdgcn`` dGPU - sramecc - Architected *TBA*
403+
- tgsplit flat
404+
- xnack scratch .. TODO::
405+
- kernarg preload - Packed
406+
work-item Add product
407+
IDs names.
408+
402409
**GCN GFX10.1 (RDNA 1)** [AMD-GCN-GFX10-RDNA1]_
403410
-----------------------------------------------------------------------------------------------------------------------
404411
``gfx1010`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 5700
@@ -2178,7 +2185,7 @@ The AMDGPU backend uses the following ELF header:
21782185
``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942``
21792186
*reserved* 0x04d Reserved.
21802187
``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
2181-
*reserved* 0x04f Reserved.
2188+
``EF_AMDGPU_MACH_AMDGCN_GFX950`` 0x04f ``gfx950``
21822189
*reserved* 0x050 Reserved.
21832190
``EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC`` 0x051 ``gfx9-generic``
21842191
``EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC`` 0x052 ``gfx10-1-generic``

llvm/include/llvm/BinaryFormat/ELF.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -811,7 +811,7 @@ enum : unsigned {
811811
EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
812812
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d,
813813
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
814-
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4F = 0x04f,
814+
EF_AMDGPU_MACH_AMDGCN_GFX950 = 0x04f,
815815
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050,
816816
EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051,
817817
EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,

llvm/include/llvm/TargetParser/TargetParser.h

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -86,18 +86,19 @@ enum GPUKind : uint32_t {
8686
GK_GFX940 = 68,
8787
GK_GFX941 = 69,
8888
GK_GFX942 = 70,
89-
90-
GK_GFX1010 = 71,
91-
GK_GFX1011 = 72,
92-
GK_GFX1012 = 73,
93-
GK_GFX1013 = 74,
94-
GK_GFX1030 = 75,
95-
GK_GFX1031 = 76,
96-
GK_GFX1032 = 77,
97-
GK_GFX1033 = 78,
98-
GK_GFX1034 = 79,
99-
GK_GFX1035 = 80,
100-
GK_GFX1036 = 81,
89+
GK_GFX950 = 71,
90+
91+
GK_GFX1010 = 72,
92+
GK_GFX1011 = 73,
93+
GK_GFX1012 = 74,
94+
GK_GFX1013 = 75,
95+
GK_GFX1030 = 76,
96+
GK_GFX1031 = 77,
97+
GK_GFX1032 = 78,
98+
GK_GFX1033 = 79,
99+
GK_GFX1034 = 80,
100+
GK_GFX1035 = 81,
101+
GK_GFX1036 = 82,
101102

102103
GK_GFX1100 = 90,
103104
GK_GFX1101 = 91,

llvm/lib/Object/ELFObjectFile.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,8 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
550550
return "gfx941";
551551
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942:
552552
return "gfx942";
553+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950:
554+
return "gfx950";
553555

554556
// AMDGCN GFX10.
555557
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010:

llvm/lib/ObjectYAML/ELFYAML.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
609609
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH);
610610
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX941, EF_AMDGPU_MACH);
611611
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX942, EF_AMDGPU_MACH);
612+
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX950, EF_AMDGPU_MACH);
612613
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
613614
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH);
614615
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH);

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,12 @@ def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts",
360360
"Additional instructions for GFX940+"
361361
>;
362362

363+
def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
364+
"GFX950Insts",
365+
"true",
366+
"Additional instructions for GFX950+"
367+
>;
368+
363369
def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
364370
"GFX10Insts",
365371
"true",
@@ -1470,6 +1476,14 @@ def FeatureISAVersion9_4_Common : FeatureSet<
14701476
FeatureFlatBufferGlobalAtomicFaddF64Inst
14711477
]>;
14721478

1479+
def FeatureISAVersion9_5_Common : FeatureSet<
1480+
!listconcat(FeatureISAVersion9_4_Common.Features,
1481+
[FeatureFP8Insts,
1482+
FeatureFP8ConversionInsts,
1483+
FeatureCvtFP8VOP1Bug,
1484+
FeatureGFX950Insts
1485+
])>;
1486+
14731487
def FeatureISAVersion9_4_0 : FeatureSet<
14741488
!listconcat(FeatureISAVersion9_4_Common.Features,
14751489
[
@@ -1503,6 +1517,8 @@ def FeatureISAVersion9_4_Generic : FeatureSet<
15031517
!listconcat(FeatureISAVersion9_4_Common.Features,
15041518
[FeatureRequiresCOV6])>;
15051519

1520+
def FeatureISAVersion9_5_0 : FeatureSet<FeatureISAVersion9_5_Common.Features>;
1521+
15061522
def FeatureISAVersion10_Common : FeatureSet<
15071523
[FeatureGFX10,
15081524
FeatureLDSBankCount32,

llvm/lib/Target/AMDGPU/GCNProcessors.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,10 @@ def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel,
204204
FeatureISAVersion9_4_2.Features
205205
>;
206206

207+
def : ProcessorModel<"gfx950", SIDPGFX940FullSpeedModel,
208+
FeatureISAVersion9_5_0.Features
209+
>;
210+
207211
// [gfx900, gfx902, gfx904, gfx906, gfx909, gfx90c]
208212
def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel,
209213
FeatureISAVersion9_Generic.Features

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
106106
bool GFX9Insts = false;
107107
bool GFX90AInsts = false;
108108
bool GFX940Insts = false;
109+
bool GFX950Insts = false;
109110
bool GFX10Insts = false;
110111
bool GFX11Insts = false;
111112
bool GFX12Insts = false;

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
9696
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break;
9797
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break;
9898
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break;
99+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: AK = GK_GFX950; break;
99100
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
100101
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
101102
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
@@ -182,6 +183,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
182183
case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940;
183184
case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941;
184185
case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942;
186+
case GK_GFX950: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX950;
185187
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
186188
case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
187189
case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;

llvm/lib/TargetParser/TargetParser.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ constexpr GPUInfo AMDGCNGPUs[] = {
107107
{{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
108108
{{"gfx941"}, {"gfx941"}, GK_GFX941, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
109109
{{"gfx942"}, {"gfx942"}, GK_GFX942, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
110+
{{"gfx950"}, {"gfx950"}, GK_GFX950, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
110111
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
111112
{{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
112113
{{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
@@ -262,6 +263,7 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
262263
case GK_GFX940: return {9, 4, 0};
263264
case GK_GFX941: return {9, 4, 1};
264265
case GK_GFX942: return {9, 4, 2};
266+
case GK_GFX950: return {9, 5, 0};
265267
case GK_GFX1010: return {10, 1, 0};
266268
case GK_GFX1011: return {10, 1, 1};
267269
case GK_GFX1012: return {10, 1, 2};
@@ -361,7 +363,8 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
361363
Features["wavefrontsize32"] = true;
362364
Features["wavefrontsize64"] = true;
363365
} else if (T.isAMDGCN()) {
364-
switch (parseArchAMDGCN(GPU)) {
366+
AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU);
367+
switch (Kind) {
365368
case GK_GFX1201:
366369
case GK_GFX1200:
367370
case GK_GFX12_GENERIC:
@@ -466,12 +469,16 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
466469
Features["s-memtime-inst"] = true;
467470
Features["gws"] = true;
468471
break;
472+
case GK_GFX950:
473+
Features["gfx950-insts"] = true;
474+
[[fallthrough]];
469475
case GK_GFX942:
470476
case GK_GFX941:
471477
case GK_GFX940:
472478
Features["fp8-insts"] = true;
473479
Features["fp8-conversion-insts"] = true;
474-
Features["xf32-insts"] = true;
480+
if (Kind != GK_GFX950)
481+
Features["xf32-insts"] = true;
475482
[[fallthrough]];
476483
case GK_GFX9_4_GENERIC:
477484
Features["gfx940-insts"] = true;

0 commit comments

Comments
 (0)