[AArch64] Optimization of repeated constant loads (#51483) #86249

ParkHanbum · 2024-03-22T05:59:07Z

This change looks for cases where load same constant in the upper and lower 32-bit sizes of a 64-bit space. If we found, it loads only the lower 32-bit constant and replaces it with an instruction that stores it simultaneously in the upper and lower.

For example:
renamable $x8 = MOVZXi 49370, 0
renamable $x8 = MOVKXi $x8, 320, 16
renamable $x8 = MOVKXi $x8, 49370, 32
renamable $x8 = MOVKXi $x8, 320, 48
STRXui killed renamable $x8, killed renamable $x0, 0
becomes
$w8 = MOVZWi 49370, 0
$w8 = MOVKWi $w8, 320, 16
STPWi killed renamable $w8, killed renamable $w8, killed renamable $x0, 0

llvmbot · 2024-03-22T05:59:37Z

@llvm/pr-subscribers-backend-aarch64

Author: hanbeom (ParkHanbum)

Changes

This change looks for cases where load same constant in the upper and lower 32-bit sizes of a 64-bit space. If we found, it loads only the lower 32-bit constant and replaces it with an instruction that stores it simultaneously in the upper and lower.

For example:
renamable $x8 = MOVZXi 49370, 0
renamable $x8 = MOVKXi $x8, 320, 16
renamable $x8 = MOVKXi $x8, 49370, 32
renamable $x8 = MOVKXi $x8, 320, 48
STRXui killed renamable $x8, killed renamable $x0, 0
becomes
$w8 = MOVZWi 49370, 0
$w8 = MOVKWi $w8, 320, 16
STPWi killed renamable $w8, killed renamable $w8, killed renamable $x0, 0

Full diff: https://github.com/llvm/llvm-project/pull/86249.diff

1 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp (+148)

diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 926a89466255ca..0b7dc874f614fc 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -199,6 +199,13 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
   // Find and merge a base register updates before or after a ld/st instruction.
   bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
 
+  // Finds and collapses loads of repeated constant values.
+  bool foldRepeatedConstantLoads(MachineBasicBlock::iterator &I,
+                                 unsigned Limit);
+  MachineBasicBlock::iterator tryToFoldRepeatedConstantLoads(
+      MachineInstr &MI, SmallVectorImpl<MachineBasicBlock::iterator> &MIs,
+      int SuccIndex, int Accumulated);
+
   bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
 
   bool runOnMachineFunction(MachineFunction &Fn) override;
@@ -2250,6 +2257,126 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
   return E;
 }
 
+static unsigned getMovValueOrder(unsigned Opc) {
+  switch (Opc) {
+  default:
+    return 0;
+  case AArch64::MOVNXi:
+  case AArch64::MOVZXi:
+    return 1;
+  case AArch64::MOVKXi:
+    return 2;
+  }
+
+  return 0;
+}
+
+MachineBasicBlock::iterator AArch64LoadStoreOpt::tryToFoldRepeatedConstantLoads(
+    MachineInstr &MI, SmallVectorImpl<MachineBasicBlock::iterator> &MIs,
+    int SuccIndex, int Accumulated) {
+  MachineBasicBlock::iterator I = MI.getIterator();
+  MachineBasicBlock::iterator E = I->getParent()->end();
+  MachineBasicBlock::iterator NextI = next_nodbg(I, E);
+  MachineBasicBlock::iterator FirstMovI;
+  MachineBasicBlock *MBB = MI.getParent();
+  uint64_t Mask = 0xFFFFUL;
+  int Index = 0;
+
+  for (auto MI = MIs.begin(), E = MIs.end(); MI != E; ++MI, Index++) {
+    if (Index == SuccIndex - 1) {
+      FirstMovI = *MI;
+      break;
+    }
+    (*MI)->eraseFromParent();
+  }
+
+  Register DstRegW =
+      TRI->getSubReg(FirstMovI->getOperand(0).getReg(), AArch64::sub_32);
+  BuildMI(*MBB, FirstMovI, FirstMovI->getDebugLoc(), TII->get(AArch64::MOVZWi),
+          DstRegW)
+      .addImm(Accumulated & Mask)
+      .addImm(0);
+  BuildMI(*MBB, FirstMovI, FirstMovI->getDebugLoc(), TII->get(AArch64::MOVKWi),
+          DstRegW)
+      .addUse(DstRegW)
+      .addImm((Accumulated >> 16) & Mask)
+      .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16));
+  FirstMovI->eraseFromParent();
+
+  Register BaseReg = getLdStRegOp(MI).getReg();
+  const MachineOperand MO = AArch64InstrInfo::getLdStBaseOp(MI);
+  DstRegW = TRI->getSubReg(BaseReg, AArch64::sub_32);
+  unsigned DstRegState = getRegState(MI.getOperand(0));
+  BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AArch64::STPWi))
+      .addReg(DstRegW, DstRegState)
+      .addReg(DstRegW, DstRegState)
+      .addReg(MO.getReg(), getRegState(MO))
+      .add(AArch64InstrInfo::getLdStOffsetOp(MI))
+      .setMemRefs(MI.memoperands())
+      .setMIFlags(MI.getFlags());
+  I->eraseFromParent();
+
+  return NextI;
+}
+
+bool AArch64LoadStoreOpt::foldRepeatedConstantLoads(
+    MachineBasicBlock::iterator &I, unsigned Limit) {
+  MachineInstr &MI = *I;
+  if (MI.getOpcode() != AArch64::STRXui)
+    return false;
+
+  MachineBasicBlock::iterator MBBI = I;
+  MachineBasicBlock::iterator B = I->getParent()->begin();
+  if (MBBI == B)
+    return false;
+
+  Register BaseReg = getLdStRegOp(MI).getReg();
+  unsigned Count = 0, SuccIndex = 0;
+  uint64_t Accumulated = 0;
+  SmallVector<MachineBasicBlock::iterator> MIs;
+  ModifiedRegUnits.clear();
+  UsedRegUnits.clear();
+
+  do {
+    MBBI = prev_nodbg(MBBI, B);
+    MachineInstr &MI = *MBBI;
+    if (!MI.isTransient())
+      ++Count;
+    unsigned CurrOpc = MI.getOpcode();
+    unsigned ValueOrder = getMovValueOrder(CurrOpc);
+    if (!ValueOrder) {
+      LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
+                                        TRI);
+      continue;
+    }
+
+    MachineOperand Value = MI.getOperand(ValueOrder);
+    MachineOperand Shift = MI.getOperand(ValueOrder + 1);
+    if (!Value.isImm() || !Shift.isImm())
+      return false;
+
+    uint64_t IValue = Value.getImm();
+    uint64_t IShift = Shift.getImm();
+    uint64_t mask = 0xFFFFUL;
+    Accumulated -= (Accumulated & (mask << IShift));
+    Accumulated += (IValue << IShift);
+    MIs.push_back(MBBI);
+
+    if (Accumulated != 0 && (Accumulated >> 32) == (Accumulated & UINT_MAX))
+      SuccIndex = Count;
+    if (!ModifiedRegUnits.available(BaseReg) ||
+        !UsedRegUnits.available(BaseReg))
+      break;
+  } while (MBBI != B && Count < Limit);
+
+  if (SuccIndex) {
+    I = tryToFoldRepeatedConstantLoads(MI, MIs, SuccIndex, Accumulated);
+    return true;
+  }
+
+  return false;
+}
+
 bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
     MachineBasicBlock::iterator &MBBI) {
   MachineInstr &MI = *MBBI;
@@ -2512,6 +2639,27 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
       ++MBBI;
   }
 
+  // We have an opportunity to optimize the `STRXui` instruction, which loads
+  // the same 32-bit value into a register twice. The `STPXi` instruction allows
+  // us to load a 32-bit value only once.
+  // Considering :
+  // mov     x8, 49370
+  // movk    x8, 320, lsl #16
+  // movk    x8, 49370, lsl #32
+  // movk    x8, 320, lsl #48
+  // str     x8, [x0]
+  // Transform :
+  // mov     w8, 49370
+  // movk    w8, 320, lsl #16
+  // stp     w8, w8, [x0]
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E;) {
+    if (foldRepeatedConstantLoads(MBBI, UpdateLimit))
+      Modified = true;
+    else
+      ++MBBI;
+  }
+
   return Modified;
 }

vfdff · 2024-03-22T07:36:47Z

gcc already done , https://gcc.godbolt.org/z/1a4o7Tvzz

davemgreen · 2024-03-22T09:16:25Z

It looks like gcc will transform repeated constants to or x, x, lsl 32 too:
https://gcc.godbolt.org/z/h6MqTja1s

Is it worth doing that first, then this could be simplified a little to just having to look at str(or(x, x, lsl 32)).

Can you add some testing too? Thanks

ParkHanbum · 2024-03-22T17:10:59Z

@davemgreen sure, consider it done.

davemgreen · 2024-03-22T21:08:09Z

Thanks! I think I would expect it to be an additional case in AArch64_IMM::expandMOVImm(..), that can check the two halves are the same.

ParkHanbum · 2024-03-23T16:13:01Z

thanks for tip!

ParkHanbum · 2024-03-27T15:29:59Z

@davemgreen Do we support the same ORR format as GCC?
I get an error when I give ORR three registers as Operand, I wonder if this is because it doesn't support Instruction Format.
https://gcc.godbolt.org/z/131YsEY6W

could you little advise which instruction can I use please?

davemgreen · 2024-03-27T15:31:25Z

Hi - I believe you will need a ORRXrs. The ORRXri will take an immediate for the second operand.

It's like one of these: https://gcc.godbolt.org/z/8aWhfWv4T (but in that case might need to check that the top bits are known to be 0 when we convert it to a stp).

ParkHanbum · 2024-03-27T15:34:57Z

@davemgreen :) thanks!

efriedma-quic · 2024-03-28T23:56:52Z

Is it worth doing that first, then this could be simplified a little to just having to look at str(or(x, x, lsl 32)).

This handles some cases, but not all. Consider:

void f(long long *A) {
   *A = 0xC2000000C2000000;
}

long long f2() {
    return 0xC2000000C2000000;
}

You want mov+stp for the store... but you don't want to generate orr for the return. (Even for the cases where we generate three instructions, we're going to avoid "orr(x, x, lsl 32)" where we can because it's more expensive than movk.)

Maybe we should run this optimization before pseudo-expansion?

ParkHanbum · 2024-03-29T06:49:22Z

@efriedma-quic it is good for me.
what I trying to now is if we found ORR from sequence of backward then do Bit-OR when match "movn" finally.

like this.

    // ValueOrder == 1 mean this is MOVZXi. 
    if (ValueOrder == 1 && DupBitSize) {
      Accumulated |= Accumulated << DupBitSize;
      DupBitSize = 0;
    }

but this is base on assuming that "ORR(x, x, lsl 32)" was generated in pseudo-expansion.
so, it is need verification by you experts.

davemgreen · 2024-03-29T07:16:31Z

Is it worth doing that first, then this could be simplified a little to just having to look at str(or(x, x, lsl 32)).

This handles some cases, but not all. Consider:
void f(long long *A) {
   *A = 0xC2000000C2000000;
}

long long f2() {
    return 0xC2000000C2000000;
}
You want mov+stp for the store... but you don't want to generate orr for the return. (Even for the cases where we generate three instructions, we're going to avoid "orr(x, x, lsl 32)" where we can because it's more expensive than movk.)

Maybe we should run this optimization before pseudo-expansion?

orr(x, x, lsl n) is usually as cheap as a movk. It's really just a lsl #n with an or on the end, and unlike add/sub it can be done in a single cycle. Unfortunately that might not apply for every cpu, but should be for everything modern enough.

You might be right that we might need both methods in the end though. I was hoping we could add the orr combine because it is useful on it's own.

efriedma-quic · 2024-03-29T18:36:36Z

orr(x, x, lsl n) is usually as cheap as a movk.

On which processors? It looks like on Cortex-X2 and newer, "orr(x, x, lsl n)" has the same cost as movk, but looking through various sources, I can't find any other chip that does. Granted, it's probably still better than generating two MOVK.

efriedma-quic · 2024-03-29T18:37:48Z

but this is base on assuming that "ORR(x, x, lsl 32)" was generated in pseudo-expansion.

I think it's fine to assume this as a starting-point; we can try to refine it in a followup.

ParkHanbum · 2024-04-04T03:00:40Z

can I ask code review??
I was tested it with my M1 macbook and it was passed.

davemgreen · 2024-04-04T06:39:51Z

Hello. It would be best if there was one patch that added support for or(x, x, lsl 32), so we can make sure that is tested and committed and doesn't cause any problems. The load/store optimizer change should then be an addition on top.

Whenever we change these mir optimization passes there seems to be a high chance of subtle things going wrong.

efriedma-quic · 2024-04-04T20:35:17Z

(FYI, GitHub doesn't generate a notification when you push a new commit to your branch.)

ParkHanbum · 2024-04-05T05:02:52Z

@davemgreen got it, I'll split this.
@efriedma-quic I didn't know it. what can I do for request review?

efriedma-quic · 2024-04-05T18:11:42Z

Just leave a comment on the pull request noting that you've updated the patch.

(Actually, I've seen GitHub generates notification for new commits in some cases, but in this case it didn't generate a notification; not sure what the difference is.)

davemgreen · 2024-04-22T06:42:38Z

Can you make the first half of this into a separate pr? Thanks

ParkHanbum · 2024-04-22T17:50:18Z

@davemgreen sorry I misunderstood. The test case I created consists of only one MOV Instruction. I'm not sure how to create a failure case in this case, could you please advise?

davemgreen

Thanks, this is looking decent to me. I think it is probably worth adding .ll file tests to check that everything works together properly. Maybe into a test like movw-const.ll.

These are some of the ones I tried. It would be good if 0x0000555500005555 still produced a mov+movk, and the last one is failing probably because of the f's. I'm not sure if there are other patterns that would be worth testing too?

define i64 @test0x1234567812345678() {
; CHECK-LABEL: test0x1234567812345678:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov x0, #22136 // =0x5678
; CHECK-NEXT:    movk x0, #4660, lsl #16
; CHECK-NEXT:    orr x0, x0, x0, lsl #32
; CHECK-NEXT:    ret
  ret i64 u0x1234567812345678
}

define i64 @test0xff3456ffff3456ff() {
; CHECK-LABEL: test0xff3456ffff3456ff:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov x0, #22271 // =0x56ff
; CHECK-NEXT:    movk x0, #65332, lsl #16
; CHECK-NEXT:    orr x0, x0, x0, lsl #32
; CHECK-NEXT:    ret
  ret i64 u0xff3456ffff3456ff
}

define i64 @test0x00345600345600() {
; CHECK-LABEL: test0x00345600345600:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov x0, #22016 // =0x5600
; CHECK-NEXT:    movk x0, #52, lsl #16
; CHECK-NEXT:    movk x0, #13398, lsl #32
; CHECK-NEXT:    ret
  ret i64 u0x00345600345600
}

define i64 @test0x5555555555555555() {
; CHECK-LABEL: test0x5555555555555555:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov x0, #6148914691236517205 // =0x5555555555555555
; CHECK-NEXT:    ret
  ret i64 u0x5555555555555555
}

define i64 @test0x5055555550555555() {
; CHECK-LABEL: test0x5055555550555555:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov x0, #6148914691236517205 // =0x5555555555555555
; CHECK-NEXT:    and x0, x0, #0xf0fffffff0ffffff
; CHECK-NEXT:    ret
  ret i64 u0x5055555550555555
}

define i64 @test0x0000555555555555() {
; CHECK-LABEL: test0x0000555555555555:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov x0, #6148914691236517205 // =0x5555555555555555
; CHECK-NEXT:    movk x0, #0, lsl #48
; CHECK-NEXT:    ret
  ret i64 u0x0000555555555555
}

define i64 @test0x0000555500005555() {
; CHECK-LABEL: test0x0000555500005555:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov x0, #21845 // =0x5555
; CHECK-NEXT:    orr x0, x0, x0, lsl #32
; CHECK-NEXT:    ret
  ret i64 u0x0000555500005555
}

define i64 @testu0xffff5555ffff5555() {
; CHECK-LABEL: testu0xffff5555ffff5555:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov x0, #-43691 // =0xffffffffffff5555
; CHECK-NEXT:    orr x0, x0, x0, lsl #32
; CHECK-NEXT:    ret
  ret i64 u0xffff5555ffff5555
}

I looked through the other uses of expandMOVImm and they all check Insn.size() == 1 or are only checking for the number of instructions.

davemgreen · 2024-04-23T07:41:57Z

llvm/lib/Target/AArch64/AArch64ExpandImm.cpp

+    Insn.push_back(
+        {Opc, Imm16, AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift)});
+  if (Imm != 0 && (Imm >> 32) == (Imm & UINT_MAX)) {
+    Insn.push_back({BitSize == 32 ? AArch64::ORRWrs : AArch64::ORRXrs, 0, 32});


If BitSize == 32 is unreachable then it might be worth only adding AArch64::ORRXrs.

davemgreen

Thanks for splitting out the tests, they look good apart for the one with ffs where isNeg=true.

davemgreen · 2024-05-15T16:41:06Z

llvm/test/CodeGen/AArch64/movimm-expand-ldst.ll

+; CHECK-LABEL: testu0xffff5555ffff5555:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x0, #-43691 // =0xffffffffffff5555
+; CHECK-NEXT:    orr x0, x0, x0, lsl #32


I think this one might be incorrect, because it won't or with the ff's from the first instruction.

Thank you very much for showing us how to write tests. I added two tests

davemgreen

Thanks - I gave this a test and it did OK. It came up a decent amount, and I couldn't see any problems. Other than changing the UINT_MAX this LGTM.

davemgreen · 2024-05-17T14:37:52Z

llvm/lib/Target/AArch64/AArch64ExpandImm.cpp

+
+  // Now, we get 16-bit divided Imm. If high and low bits are same in
+  // 32-bit, there is an opportunity to reduce instruction.
+  if (Insn.size() > 2 && (Imm >> 32) == (Imm & UINT_MAX)) {


I would use 0xffffffff instead of UINT_MAX. They are likely always the same value, but the ff's is more explicit about the size.

…#51483) This change looks for cases of symmetric constant loading. `symmetric constant load` is when the upper 32 bits and lower 32 bits of a 64-bit register load the same value. When it finds this, it replaces it with an instruction that loads only the lower 32 bits of the constant and stores it in the upper and lower bits simultaneously. For example: renamable $x8 = MOVZXi 49370, 0 renamable $x8 = MOVKXi $x8, 320, 16 renamable $x8 = MOVKXi $x8, 49370, 32 renamable $x8 = MOVKXi $x8, 320, 48 becomes renamable $x8 = MOVZXi 49370, 0 renamable $x8 = MOVKXi $x8, 320, 16 renamable $x8 = ORRXrs $x8, $x8, 32

davemgreen · 2024-05-18T13:07:57Z

Thanks, LGTM

davemgreen · 2024-05-18T13:08:45Z

Can you update the commit message? Then let me know if I should hit submit

This change looks for cases of symmetric constant loading. `symmetric constant load` is when the upper 32 bits and lower 32 bits of a 64-bit register load the same value. When it finds this, it replaces it with an instruction that loads only the lower 32 bits of the constant and stores it in the upper and lower bits simultaneously. For example: renamable $x8 = MOVZXi 49370, 0 renamable $x8 = MOVKXi $x8, 320, 16 renamable $x8 = MOVKXi $x8, 49370, 32 renamable $x8 = MOVKXi $x8, 320, 48 becomes renamable $x8 = MOVZXi 49370, 0 renamable $x8 = MOVKXi $x8, 320, 16 renamable $x8 = ORRXrs $x8, $x8, 32

davemgreen · 2024-05-20T08:58:37Z

Pushed in ce1a0d8.

llvmbot added the backend:AArch64 label Mar 22, 2024

ParkHanbum mentioned this pull request Mar 22, 2024

[AArch64] Generate stp for complex repeating constansts #51483

Closed

ParkHanbum force-pushed the issue-b51483 branch 2 times, most recently from 7082034 to 83ee099 Compare March 31, 2024 18:43

ParkHanbum force-pushed the issue-b51483 branch from 83ee099 to b546f76 Compare April 12, 2024 08:36

ParkHanbum force-pushed the issue-b51483 branch from b546f76 to 1e6a5ff Compare April 21, 2024 10:43

ParkHanbum force-pushed the issue-b51483 branch from 1e6a5ff to 1d03a1b Compare April 22, 2024 17:48

davemgreen reviewed Apr 23, 2024

View reviewed changes

ParkHanbum force-pushed the issue-b51483 branch from 1d03a1b to 12e5abb Compare May 15, 2024 07:18

ParkHanbum requested a review from davemgreen May 15, 2024 07:19

davemgreen reviewed May 15, 2024

View reviewed changes

ParkHanbum force-pushed the issue-b51483 branch from 12e5abb to 510f5cc Compare May 16, 2024 07:55

[AArch64] Add PreTest for optimizing MOV to ORR

5775a03

ParkHanbum force-pushed the issue-b51483 branch from 510f5cc to 0dc08b9 Compare May 16, 2024 08:15

ParkHanbum requested a review from davemgreen May 17, 2024 06:03

davemgreen reviewed May 17, 2024

View reviewed changes

davemgreen approved these changes May 17, 2024

View reviewed changes

ParkHanbum force-pushed the issue-b51483 branch from 0dc08b9 to 83ee4a7 Compare May 17, 2024 17:37

davemgreen closed this May 20, 2024

PeddleSpam mentioned this pull request May 20, 2024

[AMDGPU] Fix error in #88512. #92770

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AArch64] Optimization of repeated constant loads (#51483) #86249

[AArch64] Optimization of repeated constant loads (#51483) #86249

ParkHanbum commented Mar 22, 2024

llvmbot commented Mar 22, 2024

vfdff commented Mar 22, 2024

davemgreen commented Mar 22, 2024

ParkHanbum commented Mar 22, 2024 •

edited

Loading

davemgreen commented Mar 22, 2024

ParkHanbum commented Mar 23, 2024

ParkHanbum commented Mar 27, 2024

davemgreen commented Mar 27, 2024

ParkHanbum commented Mar 27, 2024

efriedma-quic commented Mar 28, 2024

ParkHanbum commented Mar 29, 2024

davemgreen commented Mar 29, 2024

efriedma-quic commented Mar 29, 2024

efriedma-quic commented Mar 29, 2024

ParkHanbum commented Apr 4, 2024 •

edited

Loading

davemgreen commented Apr 4, 2024

efriedma-quic commented Apr 4, 2024

ParkHanbum commented Apr 5, 2024

efriedma-quic commented Apr 5, 2024

davemgreen commented Apr 22, 2024

ParkHanbum commented Apr 22, 2024

davemgreen left a comment

davemgreen Apr 23, 2024

davemgreen left a comment

davemgreen May 15, 2024

ParkHanbum May 17, 2024

davemgreen left a comment

davemgreen May 17, 2024

ParkHanbum May 17, 2024

davemgreen commented May 18, 2024

davemgreen commented May 18, 2024

davemgreen commented May 20, 2024

[AArch64] Optimization of repeated constant loads (#51483) #86249

[AArch64] Optimization of repeated constant loads (#51483) #86249

Conversation

ParkHanbum commented Mar 22, 2024

llvmbot commented Mar 22, 2024

vfdff commented Mar 22, 2024

davemgreen commented Mar 22, 2024

ParkHanbum commented Mar 22, 2024 • edited Loading

davemgreen commented Mar 22, 2024

ParkHanbum commented Mar 23, 2024

ParkHanbum commented Mar 27, 2024

davemgreen commented Mar 27, 2024

ParkHanbum commented Mar 27, 2024

efriedma-quic commented Mar 28, 2024

ParkHanbum commented Mar 29, 2024

davemgreen commented Mar 29, 2024

efriedma-quic commented Mar 29, 2024

efriedma-quic commented Mar 29, 2024

ParkHanbum commented Apr 4, 2024 • edited Loading

davemgreen commented Apr 4, 2024

efriedma-quic commented Apr 4, 2024

ParkHanbum commented Apr 5, 2024

efriedma-quic commented Apr 5, 2024

davemgreen commented Apr 22, 2024

ParkHanbum commented Apr 22, 2024

davemgreen left a comment

Choose a reason for hiding this comment

davemgreen Apr 23, 2024

Choose a reason for hiding this comment

davemgreen left a comment

Choose a reason for hiding this comment

davemgreen May 15, 2024

Choose a reason for hiding this comment

ParkHanbum May 17, 2024

Choose a reason for hiding this comment

davemgreen left a comment

Choose a reason for hiding this comment

davemgreen May 17, 2024

Choose a reason for hiding this comment

ParkHanbum May 17, 2024

Choose a reason for hiding this comment

davemgreen commented May 18, 2024

davemgreen commented May 18, 2024

davemgreen commented May 20, 2024

ParkHanbum commented Mar 22, 2024 •

edited

Loading

ParkHanbum commented Apr 4, 2024 •

edited

Loading