[LoongArch] Override LoongArchTargetLowering::getExtendForAtomicCmpSwapArg (llvm#83656)

SixWeining · MingcongBai · commit a169693d2912 · 2024-03-08T01:07:55.000+08:00
This patch aims to solve Firefox issue: https://bugzilla.mozilla.org/show_bug.cgi?id=1882301 Similar to 616289e. Currently LoongArch uses an ll.[wd]/sc.[wd] loop for ATOMIC_CMP_XCHG. Because the comparison in the loop is full-width (i.e. the `bne` instruction), we must sign extend the input comparsion argument. Note that LoongArch ISA manual V1.1 has introduced compare-and-swap instructions. We would change the implementation (return `ANY_EXTEND`) when we support them. (cherry picked from commit 5f058aa)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -4940,3 +4940,8 @@ bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
 
   return !isa<ConstantSDNode>(Y);
 }
+
+ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
+  // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension.
+  return ISD::SIGN_EXTEND;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -206,6 +206,8 @@ class LoongArchTargetLowering : public TargetLowering {
     return ISD::SIGN_EXTEND;
   }
 
+  ISD::NodeType getExtendForAtomicCmpSwapArg() const override;
+
   Register getRegisterByName(const char *RegName, LLT VT,
                              const MachineFunction &MF) const override;
   bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
@@ -25,15 +25,16 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
 ; LA64-NEXT:    andi $a5, $a5, 255
 ; LA64-NEXT:    sll.w $a5, $a5, $a3
 ; LA64-NEXT:    and $a6, $a2, $a4
-; LA64-NEXT:    or $a6, $a6, $a5
+; LA64-NEXT:    or $a5, $a6, $a5
+; LA64-NEXT:    addi.w $a6, $a2, 0
 ; LA64-NEXT:  .LBB0_3: # %atomicrmw.start
 ; LA64-NEXT:    # Parent Loop BB0_1 Depth=1
 ; LA64-NEXT:    # => This Inner Loop Header: Depth=2
-; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    bne $a5, $a2, .LBB0_5
+; LA64-NEXT:    ll.w $a2, $a0, 0
+; LA64-NEXT:    bne $a2, $a6, .LBB0_5
 ; LA64-NEXT:  # %bb.4: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB0_3 Depth=2
-; LA64-NEXT:    move $a7, $a6
+; LA64-NEXT:    move $a7, $a5
 ; LA64-NEXT:    sc.w $a7, $a0, 0
 ; LA64-NEXT:    beqz $a7, .LBB0_3
 ; LA64-NEXT:    b .LBB0_6
@@ -42,11 +43,9 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
 ; LA64-NEXT:    dbar 20
 ; LA64-NEXT:  .LBB0_6: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; LA64-NEXT:    addi.w $a6, $a2, 0
-; LA64-NEXT:    move $a2, $a5
-; LA64-NEXT:    bne $a5, $a6, .LBB0_1
+; LA64-NEXT:    bne $a2, $a6, .LBB0_1
 ; LA64-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64-NEXT:    srl.w $a0, $a5, $a3
+; LA64-NEXT:    srl.w $a0, $a2, $a3
 ; LA64-NEXT:    ret
   %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst
   ret i8 %result
@@ -77,15 +76,16 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
 ; LA64-NEXT:    bstrpick.d $a5, $a5, 15, 0
 ; LA64-NEXT:    sll.w $a5, $a5, $a3
 ; LA64-NEXT:    and $a6, $a2, $a4
-; LA64-NEXT:    or $a6, $a6, $a5
+; LA64-NEXT:    or $a5, $a6, $a5
+; LA64-NEXT:    addi.w $a6, $a2, 0
 ; LA64-NEXT:  .LBB1_3: # %atomicrmw.start
 ; LA64-NEXT:    # Parent Loop BB1_1 Depth=1
 ; LA64-NEXT:    # => This Inner Loop Header: Depth=2
-; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    bne $a5, $a2, .LBB1_5
+; LA64-NEXT:    ll.w $a2, $a0, 0
+; LA64-NEXT:    bne $a2, $a6, .LBB1_5
 ; LA64-NEXT:  # %bb.4: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB1_3 Depth=2
-; LA64-NEXT:    move $a7, $a6
+; LA64-NEXT:    move $a7, $a5
 ; LA64-NEXT:    sc.w $a7, $a0, 0
 ; LA64-NEXT:    beqz $a7, .LBB1_3
 ; LA64-NEXT:    b .LBB1_6
@@ -94,11 +94,9 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
 ; LA64-NEXT:    dbar 20
 ; LA64-NEXT:  .LBB1_6: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB1_1 Depth=1
-; LA64-NEXT:    addi.w $a6, $a2, 0
-; LA64-NEXT:    move $a2, $a5
-; LA64-NEXT:    bne $a5, $a6, .LBB1_1
+; LA64-NEXT:    bne $a2, $a6, .LBB1_1
 ; LA64-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64-NEXT:    srl.w $a0, $a5, $a3
+; LA64-NEXT:    srl.w $a0, $a2, $a3
 ; LA64-NEXT:    ret
   %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst
   ret i16 %result
@@ -107,37 +105,36 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
 define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
 ; LA64-LABEL: atomicrmw_uinc_wrap_i32:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    ld.w $a3, $a0, 0
-; LA64-NEXT:    addi.w $a2, $a1, 0
+; LA64-NEXT:    ld.w $a2, $a0, 0
+; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    .p2align 4, , 16
 ; LA64-NEXT:  .LBB2_1: # %atomicrmw.start
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB2_3 Depth 2
-; LA64-NEXT:    addi.w $a4, $a3, 0
-; LA64-NEXT:    sltu $a1, $a4, $a2
-; LA64-NEXT:    xori $a1, $a1, 1
-; LA64-NEXT:    addi.d $a5, $a3, 1
-; LA64-NEXT:    masknez $a5, $a5, $a1
+; LA64-NEXT:    addi.w $a3, $a2, 0
+; LA64-NEXT:    sltu $a4, $a3, $a1
+; LA64-NEXT:    xori $a4, $a4, 1
+; LA64-NEXT:    addi.d $a2, $a2, 1
+; LA64-NEXT:    masknez $a4, $a2, $a4
 ; LA64-NEXT:  .LBB2_3: # %atomicrmw.start
 ; LA64-NEXT:    # Parent Loop BB2_1 Depth=1
 ; LA64-NEXT:    # => This Inner Loop Header: Depth=2
-; LA64-NEXT:    ll.w $a1, $a0, 0
-; LA64-NEXT:    bne $a1, $a3, .LBB2_5
+; LA64-NEXT:    ll.w $a2, $a0, 0
+; LA64-NEXT:    bne $a2, $a3, .LBB2_5
 ; LA64-NEXT:  # %bb.4: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB2_3 Depth=2
-; LA64-NEXT:    move $a6, $a5
-; LA64-NEXT:    sc.w $a6, $a0, 0
-; LA64-NEXT:    beqz $a6, .LBB2_3
+; LA64-NEXT:    move $a5, $a4
+; LA64-NEXT:    sc.w $a5, $a0, 0
+; LA64-NEXT:    beqz $a5, .LBB2_3
 ; LA64-NEXT:    b .LBB2_6
 ; LA64-NEXT:  .LBB2_5: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB2_1 Depth=1
 ; LA64-NEXT:    dbar 20
 ; LA64-NEXT:  .LBB2_6: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB2_1 Depth=1
-; LA64-NEXT:    move $a3, $a1
-; LA64-NEXT:    bne $a1, $a4, .LBB2_1
+; LA64-NEXT:    bne $a2, $a3, .LBB2_1
 ; LA64-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64-NEXT:    move $a0, $a1
+; LA64-NEXT:    move $a0, $a2
 ; LA64-NEXT:    ret
   %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst
   ret i32 %result
@@ -209,15 +206,16 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
 ; LA64-NEXT:    andi $a6, $a6, 255
 ; LA64-NEXT:    sll.w $a6, $a6, $a3
 ; LA64-NEXT:    and $a7, $a2, $a4
-; LA64-NEXT:    or $a7, $a7, $a6
+; LA64-NEXT:    or $a6, $a7, $a6
+; LA64-NEXT:    addi.w $a7, $a2, 0
 ; LA64-NEXT:  .LBB4_3: # %atomicrmw.start
 ; LA64-NEXT:    # Parent Loop BB4_1 Depth=1
 ; LA64-NEXT:    # => This Inner Loop Header: Depth=2
-; LA64-NEXT:    ll.w $a6, $a0, 0
-; LA64-NEXT:    bne $a6, $a2, .LBB4_5
+; LA64-NEXT:    ll.w $a2, $a0, 0
+; LA64-NEXT:    bne $a2, $a7, .LBB4_5
 ; LA64-NEXT:  # %bb.4: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB4_3 Depth=2
-; LA64-NEXT:    move $t0, $a7
+; LA64-NEXT:    move $t0, $a6
 ; LA64-NEXT:    sc.w $t0, $a0, 0
 ; LA64-NEXT:    beqz $t0, .LBB4_3
 ; LA64-NEXT:    b .LBB4_6
@@ -226,11 +224,9 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
 ; LA64-NEXT:    dbar 20
 ; LA64-NEXT:  .LBB4_6: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB4_1 Depth=1
-; LA64-NEXT:    addi.w $a7, $a2, 0
-; LA64-NEXT:    move $a2, $a6
-; LA64-NEXT:    bne $a6, $a7, .LBB4_1
+; LA64-NEXT:    bne $a2, $a7, .LBB4_1
 ; LA64-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64-NEXT:    srl.w $a0, $a6, $a3
+; LA64-NEXT:    srl.w $a0, $a2, $a3
 ; LA64-NEXT:    ret
   %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst
   ret i8 %result
@@ -266,15 +262,16 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
 ; LA64-NEXT:    bstrpick.d $a6, $a6, 15, 0
 ; LA64-NEXT:    sll.w $a6, $a6, $a3
 ; LA64-NEXT:    and $a7, $a2, $a4
-; LA64-NEXT:    or $a7, $a7, $a6
+; LA64-NEXT:    or $a6, $a7, $a6
+; LA64-NEXT:    addi.w $a7, $a2, 0
 ; LA64-NEXT:  .LBB5_3: # %atomicrmw.start
 ; LA64-NEXT:    # Parent Loop BB5_1 Depth=1
 ; LA64-NEXT:    # => This Inner Loop Header: Depth=2
-; LA64-NEXT:    ll.w $a6, $a0, 0
-; LA64-NEXT:    bne $a6, $a2, .LBB5_5
+; LA64-NEXT:    ll.w $a2, $a0, 0
+; LA64-NEXT:    bne $a2, $a7, .LBB5_5
 ; LA64-NEXT:  # %bb.4: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB5_3 Depth=2
-; LA64-NEXT:    move $t0, $a7
+; LA64-NEXT:    move $t0, $a6
 ; LA64-NEXT:    sc.w $t0, $a0, 0
 ; LA64-NEXT:    beqz $t0, .LBB5_3
 ; LA64-NEXT:    b .LBB5_6
@@ -283,11 +280,9 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
 ; LA64-NEXT:    dbar 20
 ; LA64-NEXT:  .LBB5_6: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB5_1 Depth=1
-; LA64-NEXT:    addi.w $a7, $a2, 0
-; LA64-NEXT:    move $a2, $a6
-; LA64-NEXT:    bne $a6, $a7, .LBB5_1
+; LA64-NEXT:    bne $a2, $a7, .LBB5_1
 ; LA64-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64-NEXT:    srl.w $a0, $a6, $a3
+; LA64-NEXT:    srl.w $a0, $a2, $a3
 ; LA64-NEXT:    ret
   %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst
   ret i16 %result
@@ -296,40 +291,39 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
 define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
 ; LA64-LABEL: atomicrmw_udec_wrap_i32:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    ld.w $a4, $a0, 0
+; LA64-NEXT:    ld.w $a2, $a0, 0
 ; LA64-NEXT:    addi.w $a3, $a1, 0
 ; LA64-NEXT:    .p2align 4, , 16
 ; LA64-NEXT:  .LBB6_1: # %atomicrmw.start
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB6_3 Depth 2
-; LA64-NEXT:    addi.w $a5, $a4, 0
-; LA64-NEXT:    sltu $a2, $a3, $a5
-; LA64-NEXT:    addi.d $a6, $a4, -1
-; LA64-NEXT:    masknez $a6, $a6, $a2
-; LA64-NEXT:    maskeqz $a2, $a1, $a2
-; LA64-NEXT:    or $a2, $a2, $a6
-; LA64-NEXT:    sltui $a6, $a5, 1
-; LA64-NEXT:    masknez $a2, $a2, $a6
-; LA64-NEXT:    maskeqz $a6, $a1, $a6
-; LA64-NEXT:    or $a6, $a6, $a2
+; LA64-NEXT:    addi.w $a4, $a2, 0
+; LA64-NEXT:    sltu $a5, $a3, $a4
+; LA64-NEXT:    addi.d $a2, $a2, -1
+; LA64-NEXT:    masknez $a2, $a2, $a5
+; LA64-NEXT:    maskeqz $a5, $a1, $a5
+; LA64-NEXT:    or $a2, $a5, $a2
+; LA64-NEXT:    sltui $a5, $a4, 1
+; LA64-NEXT:    masknez $a2, $a2, $a5
+; LA64-NEXT:    maskeqz $a5, $a1, $a5
+; LA64-NEXT:    or $a5, $a5, $a2
 ; LA64-NEXT:  .LBB6_3: # %atomicrmw.start
 ; LA64-NEXT:    # Parent Loop BB6_1 Depth=1
 ; LA64-NEXT:    # => This Inner Loop Header: Depth=2
 ; LA64-NEXT:    ll.w $a2, $a0, 0
 ; LA64-NEXT:    bne $a2, $a4, .LBB6_5
 ; LA64-NEXT:  # %bb.4: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB6_3 Depth=2
-; LA64-NEXT:    move $a7, $a6
-; LA64-NEXT:    sc.w $a7, $a0, 0
-; LA64-NEXT:    beqz $a7, .LBB6_3
+; LA64-NEXT:    move $a6, $a5
+; LA64-NEXT:    sc.w $a6, $a0, 0
+; LA64-NEXT:    beqz $a6, .LBB6_3
 ; LA64-NEXT:    b .LBB6_6
 ; LA64-NEXT:  .LBB6_5: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB6_1 Depth=1
 ; LA64-NEXT:    dbar 20
 ; LA64-NEXT:  .LBB6_6: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB6_1 Depth=1
-; LA64-NEXT:    move $a4, $a2
-; LA64-NEXT:    bne $a2, $a5, .LBB6_1
+; LA64-NEXT:    bne $a2, $a4, .LBB6_1
 ; LA64-NEXT:  # %bb.2: # %atomicrmw.end
 ; LA64-NEXT:    move $a0, $a2
 ; LA64-NEXT:    ret
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
@@ -69,6 +69,7 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind {
 ; LA64-LABEL: cmpxchg_i32_acquire_acquire:
 ; LA64:       # %bb.0:
+; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a3, $a0, 0
 ; LA64-NEXT:    bne $a3, $a1, .LBB2_3
@@ -172,6 +173,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind {
 ; LA64-LABEL: cmpxchg_i32_acquire_monotonic:
 ; LA64:       # %bb.0:
+; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:  .LBB6_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a3, $a0, 0
 ; LA64-NEXT:    bne $a3, $a1, .LBB6_3
@@ -279,9 +281,10 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou
 define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind {
 ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32:
 ; LA64:       # %bb.0:
+; LA64-NEXT:    addi.w $a3, $a1, 0
 ; LA64-NEXT:  .LBB10_1: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT:    ll.w $a3, $a0, 0
-; LA64-NEXT:    bne $a3, $a1, .LBB10_3
+; LA64-NEXT:    ll.w $a1, $a0, 0
+; LA64-NEXT:    bne $a1, $a3, .LBB10_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB10_1 Depth=1
 ; LA64-NEXT:    move $a4, $a2
 ; LA64-NEXT:    sc.w $a4, $a0, 0
@@ -290,7 +293,7 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou
 ; LA64-NEXT:  .LBB10_3:
 ; LA64-NEXT:    dbar 20
 ; LA64-NEXT:  .LBB10_4:
-; LA64-NEXT:    move $a0, $a3
+; LA64-NEXT:    move $a0, $a1
 ; LA64-NEXT:    ret
   %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire
   %res = extractvalue { i32, i1 } %tmp, 0
@@ -396,6 +399,7 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw
 define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind {
 ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1:
 ; LA64:       # %bb.0:
+; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:  .LBB14_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a3, $a0, 0
 ; LA64-NEXT:    bne $a3, $a1, .LBB14_3
@@ -407,8 +411,7 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw
 ; LA64-NEXT:  .LBB14_3:
 ; LA64-NEXT:    dbar 20
 ; LA64-NEXT:  .LBB14_4:
-; LA64-NEXT:    addi.w $a0, $a1, 0
-; LA64-NEXT:    xor $a0, $a3, $a0
+; LA64-NEXT:    xor $a0, $a3, $a1
 ; LA64-NEXT:    sltui $a0, $a0, 1
 ; LA64-NEXT:    ret
   %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire
@@ -506,6 +509,7 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw
 define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind {
 ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic:
 ; LA64:       # %bb.0:
+; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:  .LBB18_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a3, $a0, 0
 ; LA64-NEXT:    bne $a3, $a1, .LBB18_3
@@ -613,9 +617,10 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val)
 define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind {
 ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32:
 ; LA64:       # %bb.0:
+; LA64-NEXT:    addi.w $a3, $a1, 0
 ; LA64-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT:    ll.w $a3, $a0, 0
-; LA64-NEXT:    bne $a3, $a1, .LBB22_3
+; LA64-NEXT:    ll.w $a1, $a0, 0
+; LA64-NEXT:    bne $a1, $a3, .LBB22_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB22_1 Depth=1
 ; LA64-NEXT:    move $a4, $a2
 ; LA64-NEXT:    sc.w $a4, $a0, 0
@@ -624,7 +629,7 @@ define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val)
 ; LA64-NEXT:  .LBB22_3:
 ; LA64-NEXT:    dbar 1792
 ; LA64-NEXT:  .LBB22_4:
-; LA64-NEXT:    move $a0, $a3
+; LA64-NEXT:    move $a0, $a1
 ; LA64-NEXT:    ret
   %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic
   %res = extractvalue { i32, i1 } %tmp, 0
@@ -730,6 +735,7 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n
 define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind {
 ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1:
 ; LA64:       # %bb.0:
+; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:  .LBB26_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a3, $a0, 0
 ; LA64-NEXT:    bne $a3, $a1, .LBB26_3
@@ -741,8 +747,7 @@ define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) n
 ; LA64-NEXT:  .LBB26_3:
 ; LA64-NEXT:    dbar 1792
 ; LA64-NEXT:  .LBB26_4:
-; LA64-NEXT:    addi.w $a0, $a1, 0
-; LA64-NEXT:    xor $a0, $a3, $a0
+; LA64-NEXT:    xor $a0, $a3, $a1
 ; LA64-NEXT:    sltui $a0, $a0, 1
 ; LA64-NEXT:    ret
   %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll

Original file line number	Diff line number	Diff line change
`@@ -4940,3 +4940,8 @@ bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {`
`4940`	`4940`
`4941`	`4941`	`return !isa<ConstantSDNode>(Y);`
`4942`	`4942`	`}`
	`4943`	`+`
	`4944`	`+ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {`
	`4945`	`+ // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension.`
	`4946`	`+ return ISD::SIGN_EXTEND;`
	`4947`	`+}`
Original file line number	Diff line number	Diff line change
`@@ -206,6 +206,8 @@ class LoongArchTargetLowering : public TargetLowering {`
`206`	`206`	`return ISD::SIGN_EXTEND;`
`207`	`207`	`}`
`208`	`208`
	`209`	`+ ISD::NodeType getExtendForAtomicCmpSwapArg() const override;`
	`210`	`+`
`209`	`211`	`Register getRegisterByName(const char *RegName, LLT VT,`
`210`	`212`	`const MachineFunction &MF) const override;`
`211`	`213`	`bool mayBeEmittedAsTailCall(const CallInst *CI) const override;`