Skip to content

Commit 4eca1c0

Browse files
committed
[AArch64][FIX] f16 indexed patterns encoding restrictions.
1 parent c3730ad commit 4eca1c0

File tree

2 files changed

+9
-9
lines changed

2 files changed

+9
-9
lines changed

clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ float16x8_t test_vfmaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
121121
// COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
122122
// UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]])
123123
// CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
124-
// CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
124+
// CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
125125
// COMMONIR: ret <4 x half> [[FMLA]]
126126
float16x4_t test_vfma_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
127127
return vfma_laneq_f16(a, b, c, 7);
@@ -239,7 +239,7 @@ float16x8_t test_vfmsq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
239239
// COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
240240
// UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]])
241241
// CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
242-
// CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
242+
// CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
243243
// COMMONIR: ret <4 x half> [[FMLA]]
244244
float16x4_t test_vfms_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
245245
return vfms_laneq_f16(a, b, c, 7);

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8068,29 +8068,29 @@ multiclass SIMDFPIndexedTiedPatterns<string INST, SDPatternOperator OpNode> {
80688068
let Predicates = [HasNEON, HasFullFP16] in {
80698069
// Patterns for f16: DUPLANE, DUP scalar and vector_extract.
80708070
def : Pat<(v8f16 (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn),
8071-
(AArch64duplane16 (v8f16 V128:$Rm),
8071+
(AArch64duplane16 (v8f16 V128_lo:$Rm),
80728072
VectorIndexH:$idx))),
80738073
(!cast<Instruction>(INST # "v8i16_indexed")
8074-
V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexH:$idx)>;
8074+
V128:$Rd, V128:$Rn, V128_lo:$Rm, VectorIndexH:$idx)>;
80758075
def : Pat<(v8f16 (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn),
80768076
(AArch64dup (f16 FPR16Op:$Rm)))),
80778077
(!cast<Instruction>(INST # "v8i16_indexed") V128:$Rd, V128:$Rn,
80788078
(SUBREG_TO_REG (i32 0), FPR16Op:$Rm, hsub), (i64 0))>;
80798079

80808080
def : Pat<(v4f16 (OpNode (v4f16 V64:$Rd), (v4f16 V64:$Rn),
8081-
(AArch64duplane16 (v8f16 V128:$Rm),
8082-
VectorIndexS:$idx))),
8081+
(AArch64duplane16 (v8f16 V128_lo:$Rm),
8082+
VectorIndexH:$idx))),
80838083
(!cast<Instruction>(INST # "v4i16_indexed")
8084-
V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
8084+
V64:$Rd, V64:$Rn, V128_lo:$Rm, VectorIndexH:$idx)>;
80858085
def : Pat<(v4f16 (OpNode (v4f16 V64:$Rd), (v4f16 V64:$Rn),
80868086
(AArch64dup (f16 FPR16Op:$Rm)))),
80878087
(!cast<Instruction>(INST # "v4i16_indexed") V64:$Rd, V64:$Rn,
80888088
(SUBREG_TO_REG (i32 0), FPR16Op:$Rm, hsub), (i64 0))>;
80898089

80908090
def : Pat<(f16 (OpNode (f16 FPR16:$Rd), (f16 FPR16:$Rn),
8091-
(vector_extract (v8f16 V128:$Rm), VectorIndexH:$idx))),
8091+
(vector_extract (v8f16 V128_lo:$Rm), VectorIndexH:$idx))),
80928092
(!cast<Instruction>(INST # "v1i16_indexed") FPR16:$Rd, FPR16:$Rn,
8093-
V128:$Rm, VectorIndexH:$idx)>;
8093+
V128_lo:$Rm, VectorIndexH:$idx)>;
80948094
} // Predicates = [HasNEON, HasFullFP16]
80958095

80968096
// 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar.

0 commit comments

Comments
 (0)