Skip to content

Commit f0dd12e

Browse files
committed
[x86] use zero-extending load of a byte outside of loops too (2nd try)
The first attempt missed changing test files for tools (update_llc_test_checks.py). Original commit message: This implements the main suggested change from issue #56498. Using the shorter (non-extending) instruction with only -Oz ("minsize") rather than -Os ("optsize") is left as a possible follow-up. As noted in the bug report, the zero-extending load may have shorter latency/better throughput across a wide range of x86 micro-arches, and it avoids a potential false dependency. The cost is an extra instruction byte. This could cause perf ups and downs from secondary effects, but I don't think it is possible to account for those in advance, and that will likely also depend on exact micro-arch. This does bring LLVM x86 codegen more in line with existing gcc codegen, so if problems are exposed they are more likely to occur for both compilers. Differential Revision: https://reviews.llvm.org/D129775
1 parent 2d889a8 commit f0dd12e

File tree

211 files changed

+3834
-3292
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

211 files changed

+3834
-3292
lines changed

llvm/lib/Target/X86/X86FixupBWInsts.cpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -393,12 +393,12 @@ MachineInstr *FixupBWInstPass::tryReplaceInstr(MachineInstr *MI,
393393
switch (MI->getOpcode()) {
394394

395395
case X86::MOV8rm:
396-
// Only replace 8 bit loads with the zero extending versions if
397-
// in an inner most loop and not optimizing for size. This takes
398-
// an extra byte to encode, and provides limited performance upside.
399-
if (MachineLoop *ML = MLI->getLoopFor(&MBB))
400-
if (ML->begin() == ML->end() && !OptForSize)
401-
return tryReplaceLoad(X86::MOVZX32rm8, MI);
396+
// Replace 8-bit loads with the zero-extending version if not optimizing
397+
// for size. The extending op is cheaper across a wide range of uarch and
398+
// it avoids a potentially expensive partial register stall. It takes an
399+
// extra byte to encode, however, so don't do this when optimizing for size.
400+
if (!OptForSize)
401+
return tryReplaceLoad(X86::MOVZX32rm8, MI);
402402
break;
403403

404404
case X86::MOV16rm:

llvm/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ target triple = "i686-unknown-unknown"
1111
define i32 @test5(i32 %B, i8 %C) {
1212
; CHECK-LABEL: test5:
1313
; CHECK: # %bb.0: # %entry
14-
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
14+
; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1515
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
1616
; CHECK-NEXT: movl A, %eax
1717
; CHECK-NEXT: shldl %cl, %edx, %eax

llvm/test/CodeGen/X86/2006-05-08-InstrSched.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ define void @test() {
1010
; CHECK: # %bb.0:
1111
; CHECK-NEXT: movl A, %eax
1212
; CHECK-NEXT: movzwl 2(%eax), %eax
13-
; CHECK-NEXT: movb B, %cl
13+
; CHECK-NEXT: movzbl B, %ecx
1414
; CHECK-NEXT: movl C, %edx
1515
; CHECK-NEXT: andb $16, %cl
1616
; CHECK-NEXT: shll %cl, %edx

llvm/test/CodeGen/X86/2006-11-17-IllegalMove.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ define void @handle_vector_size_attribute() nounwind {
99
; CHECK-NEXT: cmpl $1, %eax
1010
; CHECK-NEXT: ja .LBB0_2
1111
; CHECK-NEXT: # %bb.1: # %bb77
12-
; CHECK-NEXT: movb 0, %al
13-
; CHECK-NEXT: movb 0, %al
12+
; CHECK-NEXT: movzbl 0, %eax
13+
; CHECK-NEXT: movzbl 0, %eax
1414
; CHECK-NEXT: xorl %eax, %eax
1515
; CHECK-NEXT: testb %al, %al
1616
; CHECK-NEXT: .LBB0_2: # %bb84

llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,11 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
6969
; CHECK-NEXT: movq _PyUFunc_API@GOTPCREL(%rip), %rbp
7070
; CHECK-NEXT: movq (%rbp), %rax
7171
; CHECK-NEXT: callq *216(%rax)
72-
; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %dl
72+
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
7373
; CHECK-NEXT: testb %dl, %dl
7474
; CHECK-NEXT: je LBB0_11
7575
; CHECK-NEXT: ## %bb.7: ## %cond_false.i
76-
; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %bl
76+
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
7777
; CHECK-NEXT: movzbl %bl, %ecx
7878
; CHECK-NEXT: movl %ecx, %eax
7979
; CHECK-NEXT: divb %dl
@@ -98,8 +98,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
9898
; CHECK-NEXT: LBB0_11: ## %cond_true.i
9999
; CHECK-NEXT: movl $4, %edi
100100
; CHECK-NEXT: callq _feraiseexcept
101-
; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %dl
102-
; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %bl
101+
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
102+
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
103103
; CHECK-NEXT: xorl %r14d, %r14d
104104
; CHECK-NEXT: testb %bl, %bl
105105
; CHECK-NEXT: je LBB0_14

llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct
3131
; CHECK-NEXT: .cfi_offset %ebx, -12
3232
; CHECK-NEXT: .cfi_offset %ebp, -8
3333
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
34-
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %bl
34+
; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
3535
; CHECK-NEXT: testb $1, %bl
3636
; CHECK-NEXT: je LBB0_25
3737
; CHECK-NEXT: ## %bb.1: ## %bb116.i

llvm/test/CodeGen/X86/2008-04-24-MemCpyBug.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ define void @testit63_entry_2E_ce() nounwind {
1717
; CHECK-NEXT: movl %esp, %edi
1818
; CHECK-NEXT: movl $g1s63, %esi
1919
; CHECK-NEXT: rep;movsl (%esi), %es:(%edi)
20-
; CHECK-NEXT: movb g1s63+62, %al
20+
; CHECK-NEXT: movzbl g1s63+62, %eax
2121
; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp)
2222
; CHECK-NEXT: movzwl g1s63+60, %eax
2323
; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)

llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ define i32 @func_44(i16 signext %p_46) nounwind {
1515
; SOURCE-SCHED-NEXT: xorl %ecx, %ecx
1616
; SOURCE-SCHED-NEXT: cmpl $2, %eax
1717
; SOURCE-SCHED-NEXT: setge %cl
18-
; SOURCE-SCHED-NEXT: movb g_73, %dl
18+
; SOURCE-SCHED-NEXT: movzbl g_73, %edx
1919
; SOURCE-SCHED-NEXT: xorl %eax, %eax
2020
; SOURCE-SCHED-NEXT: subb {{[0-9]+}}(%esp), %al
2121
; SOURCE-SCHED-NEXT: testb %dl, %dl

llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll

+36-9
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,44 @@
1-
; RUN: llc < %s -mcpu=core2 | FileCheck %s
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=core2 | FileCheck %s
23

34
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
4-
target triple = "x86_64-apple-darwin10.4"
55
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
66

77
define fastcc i32 @cli_magic_scandesc(ptr %in) nounwind ssp {
8+
; CHECK-LABEL: cli_magic_scandesc:
9+
; CHECK: # %bb.0: # %entry
10+
; CHECK-NEXT: subq $72, %rsp
11+
; CHECK-NEXT: movq __stack_chk_guard(%rip), %rax
12+
; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
13+
; CHECK-NEXT: movzbl (%rsp), %eax
14+
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
15+
; CHECK-NEXT: movq (%rdi), %rdx
16+
; CHECK-NEXT: movq 8(%rdi), %rsi
17+
; CHECK-NEXT: movq %rdx, (%rsp)
18+
; CHECK-NEXT: movq 24(%rdi), %rdx
19+
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
20+
; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
21+
; CHECK-NEXT: movq 16(%rdi), %rdx
22+
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
23+
; CHECK-NEXT: movq 32(%rdi), %rdx
24+
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
25+
; CHECK-NEXT: movq 40(%rdi), %rdx
26+
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
27+
; CHECK-NEXT: movq 48(%rdi), %rdx
28+
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
29+
; CHECK-NEXT: movq 56(%rdi), %rdx
30+
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
31+
; CHECK-NEXT: movb %al, (%rsp)
32+
; CHECK-NEXT: movb %cl, {{[0-9]+}}(%rsp)
33+
; CHECK-NEXT: movq __stack_chk_guard(%rip), %rax
34+
; CHECK-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
35+
; CHECK-NEXT: jne .LBB0_2
36+
; CHECK-NEXT: # %bb.1: # %entry
37+
; CHECK-NEXT: xorl %eax, %eax
38+
; CHECK-NEXT: addq $72, %rsp
39+
; CHECK-NEXT: retq
40+
; CHECK-NEXT: .LBB0_2: # %entry
41+
; CHECK-NEXT: callq __stack_chk_fail@PLT
842
entry:
943
%a = alloca [64 x i8]
1044
%c = getelementptr inbounds [64 x i8], ptr %a, i64 0, i32 30
@@ -15,10 +49,3 @@ entry:
1549
store i8 %e, ptr %c, align 8
1650
ret i32 0
1751
}
18-
19-
; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip)
20-
; CHECK: movb (%rsp), [[R1:%.+]]
21-
; CHECK: movb 30(%rsp), [[R0:%.+]]
22-
; CHECK: movb [[R1]], (%rsp)
23-
; CHECK: movb [[R0]], 30(%rsp)
24-
; CHECK: callq ___stack_chk_fail

llvm/test/CodeGen/X86/8bit_cmov_of_trunc_promotion.ll

+6-6
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ define i8 @neg_type_mismatch(i32 %a1_wide_orig, i16 %a2_wide_orig, i32 %inc) nou
233233
define i8 @negative_CopyFromReg(i32 %a1_wide, i32 %a2_wide_orig, i32 %inc) nounwind {
234234
; I386-NOCMOV-LABEL: negative_CopyFromReg:
235235
; I386-NOCMOV: # %bb.0:
236-
; I386-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al
236+
; I386-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
237237
; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
238238
; I386-NOCMOV-NEXT: addl {{[0-9]+}}(%esp), %ecx
239239
; I386-NOCMOV-NEXT: cmpb %cl, %al
@@ -255,7 +255,7 @@ define i8 @negative_CopyFromReg(i32 %a1_wide, i32 %a2_wide_orig, i32 %inc) nounw
255255
;
256256
; I686-NOCMOV-LABEL: negative_CopyFromReg:
257257
; I686-NOCMOV: # %bb.0:
258-
; I686-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al
258+
; I686-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
259259
; I686-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
260260
; I686-NOCMOV-NEXT: addl {{[0-9]+}}(%esp), %ecx
261261
; I686-NOCMOV-NEXT: cmpb %cl, %al
@@ -297,8 +297,8 @@ define i8 @negative_CopyFromReg(i32 %a1_wide, i32 %a2_wide_orig, i32 %inc) nounw
297297
define i8 @negative_CopyFromRegs(i32 %a1_wide, i32 %a2_wide) nounwind {
298298
; I386-NOCMOV-LABEL: negative_CopyFromRegs:
299299
; I386-NOCMOV: # %bb.0:
300-
; I386-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %cl
301-
; I386-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al
300+
; I386-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
301+
; I386-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
302302
; I386-NOCMOV-NEXT: cmpb %cl, %al
303303
; I386-NOCMOV-NEXT: jg .LBB4_2
304304
; I386-NOCMOV-NEXT: # %bb.1:
@@ -317,8 +317,8 @@ define i8 @negative_CopyFromRegs(i32 %a1_wide, i32 %a2_wide) nounwind {
317317
;
318318
; I686-NOCMOV-LABEL: negative_CopyFromRegs:
319319
; I686-NOCMOV: # %bb.0:
320-
; I686-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %cl
321-
; I686-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al
320+
; I686-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
321+
; I686-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
322322
; I686-NOCMOV-NEXT: cmpb %cl, %al
323323
; I686-NOCMOV-NEXT: jg .LBB4_2
324324
; I686-NOCMOV-NEXT: # %bb.1:

llvm/test/CodeGen/X86/GlobalISel/callingconv.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ define void @test_abi_exts_call(ptr %addr) {
324324
; X32-NEXT: .cfi_offset %esi, -12
325325
; X32-NEXT: .cfi_offset %ebx, -8
326326
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
327-
; X32-NEXT: movb (%eax), %bl
327+
; X32-NEXT: movzbl (%eax), %ebx
328328
; X32-NEXT: movzbl %bl, %esi
329329
; X32-NEXT: movl %esi, (%esp)
330330
; X32-NEXT: calll take_char
@@ -346,7 +346,7 @@ define void @test_abi_exts_call(ptr %addr) {
346346
; X64-NEXT: pushq %rbx
347347
; X64-NEXT: .cfi_def_cfa_offset 16
348348
; X64-NEXT: .cfi_offset %rbx, -16
349-
; X64-NEXT: movb (%rdi), %al
349+
; X64-NEXT: movzbl (%rdi), %eax
350350
; X64-NEXT: movzbl %al, %ebx
351351
; X64-NEXT: movl %ebx, %edi
352352
; X64-NEXT: callq take_char

llvm/test/CodeGen/X86/GlobalISel/memop-scalar-x32.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ define i1 @test_load_i1(ptr %p1) {
88
; CHECK-LABEL: test_load_i1:
99
; CHECK: # %bb.0:
1010
; CHECK-NEXT: movl 4(%esp), %eax
11-
; CHECK-NEXT: movb (%eax), %al
11+
; CHECK-NEXT: movzbl (%eax), %eax
1212
; CHECK-NEXT: retl
1313
%r = load i1, ptr %p1
1414
ret i1 %r
@@ -18,7 +18,7 @@ define i8 @test_load_i8(ptr %p1) {
1818
; CHECK-LABEL: test_load_i8:
1919
; CHECK: # %bb.0:
2020
; CHECK-NEXT: movl 4(%esp), %eax
21-
; CHECK-NEXT: movb (%eax), %al
21+
; CHECK-NEXT: movzbl (%eax), %eax
2222
; CHECK-NEXT: retl
2323
%r = load i8, ptr %p1
2424
ret i8 %r

llvm/test/CodeGen/X86/GlobalISel/memop-scalar.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
define i1 @test_load_i1(ptr %p1) {
66
; ALL-LABEL: test_load_i1:
77
; ALL: # %bb.0:
8-
; ALL-NEXT: movb (%rdi), %al
8+
; ALL-NEXT: movzbl (%rdi), %eax
99
; ALL-NEXT: retq
1010
%r = load i1, ptr %p1
1111
ret i1 %r
@@ -14,7 +14,7 @@ define i1 @test_load_i1(ptr %p1) {
1414
define i8 @test_load_i8(ptr %p1) {
1515
; ALL-LABEL: test_load_i8:
1616
; ALL: # %bb.0:
17-
; ALL-NEXT: movb (%rdi), %al
17+
; ALL-NEXT: movzbl (%rdi), %eax
1818
; ALL-NEXT: retq
1919
%r = load i8, ptr %p1
2020
ret i8 %r

llvm/test/CodeGen/X86/PR40322.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ define void @_Z2ami(i32) #0 personality ptr @__gxx_personality_v0 {
1515
; CHECK-MINGW-X86-NEXT: .cfi_def_cfa_offset 12
1616
; CHECK-MINGW-X86-NEXT: .cfi_offset %esi, -12
1717
; CHECK-MINGW-X86-NEXT: .cfi_offset %edi, -8
18-
; CHECK-MINGW-X86-NEXT: movb __ZGVZ2amiE2au, %al
18+
; CHECK-MINGW-X86-NEXT: movzbl __ZGVZ2amiE2au, %eax
1919
; CHECK-MINGW-X86-NEXT: testb %al, %al
2020
; CHECK-MINGW-X86-NEXT: jne LBB0_4
2121
; CHECK-MINGW-X86-NEXT: # %bb.1: # %init.check

llvm/test/CodeGen/X86/abs.ll

+13-13
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ define i8 @test_i8(i8 %a) nounwind {
3535
;
3636
; X86-LABEL: test_i8:
3737
; X86: # %bb.0:
38-
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
38+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
3939
; X86-NEXT: movl %eax, %ecx
4040
; X86-NEXT: sarb $7, %cl
4141
; X86-NEXT: xorb %cl, %al
@@ -530,13 +530,13 @@ define <16 x i8> @test_v16i8(<16 x i8> %a) nounwind {
530530
; X86-NEXT: xorb %al, %bh
531531
; X86-NEXT: subb %al, %bh
532532
; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
533-
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
533+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
534534
; X86-NEXT: movl %ecx, %eax
535535
; X86-NEXT: sarb $7, %al
536536
; X86-NEXT: xorb %al, %cl
537537
; X86-NEXT: subb %al, %cl
538538
; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
539-
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
539+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
540540
; X86-NEXT: movl %ecx, %eax
541541
; X86-NEXT: sarb $7, %al
542542
; X86-NEXT: xorb %al, %cl
@@ -572,7 +572,7 @@ define <16 x i8> @test_v16i8(<16 x i8> %a) nounwind {
572572
; X86-NEXT: sarb $7, %al
573573
; X86-NEXT: xorb %al, %cl
574574
; X86-NEXT: subb %al, %cl
575-
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
575+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
576576
; X86-NEXT: movb %al, %ah
577577
; X86-NEXT: sarb $7, %ah
578578
; X86-NEXT: xorb %ah, %al
@@ -585,23 +585,23 @@ define <16 x i8> @test_v16i8(<16 x i8> %a) nounwind {
585585
; X86-NEXT: movb %dh, 11(%esi)
586586
; X86-NEXT: movb %bl, 10(%esi)
587587
; X86-NEXT: movb %bh, 9(%esi)
588-
; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
588+
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
589589
; X86-NEXT: movb %al, 8(%esi)
590-
; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
590+
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
591591
; X86-NEXT: movb %al, 7(%esi)
592-
; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
592+
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
593593
; X86-NEXT: movb %al, 6(%esi)
594-
; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
594+
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
595595
; X86-NEXT: movb %al, 5(%esi)
596-
; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
596+
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
597597
; X86-NEXT: movb %al, 4(%esi)
598-
; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
598+
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
599599
; X86-NEXT: movb %al, 3(%esi)
600-
; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
600+
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
601601
; X86-NEXT: movb %al, 2(%esi)
602-
; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
602+
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
603603
; X86-NEXT: movb %al, 1(%esi)
604-
; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
604+
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
605605
; X86-NEXT: movb %al, (%esi)
606606
; X86-NEXT: movl %esi, %eax
607607
; X86-NEXT: addl $12, %esp

llvm/test/CodeGen/X86/add-sub-bool.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ define i64 @test_i64_add_add_var(i64 %x, i64 %y, i64 %z, i64 %w) nounwind {
390390
; X86-NEXT: pushl %ebx
391391
; X86-NEXT: pushl %edi
392392
; X86-NEXT: pushl %esi
393-
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
393+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
394394
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
395395
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
396396
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -529,7 +529,7 @@ define i32 @test_i32_sub_add_sext_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
529529
; X86-LABEL: test_i32_sub_add_sext_var:
530530
; X86: # %bb.0:
531531
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
532-
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
532+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
533533
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
534534
; X86-NEXT: shll %cl, %edx
535535
; X86-NEXT: sarl $31, %edx

llvm/test/CodeGen/X86/and-load-fold.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
define i8 @foo(ptr %V) {
77
; CHECK-LABEL: foo:
88
; CHECK: # %bb.0:
9-
; CHECK-NEXT: movb 2(%rdi), %al
9+
; CHECK-NEXT: movzbl 2(%rdi), %eax
1010
; CHECK-NEXT: andb $95, %al
1111
; CHECK-NEXT: retq
1212
%V3i8 = load <3 x i8>, ptr %V, align 4

llvm/test/CodeGen/X86/and-sink.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ define i32 @and_sink2(i32 %a, i1 %c, i1 %c2) {
5151
; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp)
5252
; CHECK-NEXT: je .LBB1_5
5353
; CHECK-NEXT: # %bb.1: # %bb0.preheader
54-
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al
54+
; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax
5555
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
5656
; CHECK-NEXT: .p2align 4, 0x90
5757
; CHECK-NEXT: .LBB1_2: # %bb0

llvm/test/CodeGen/X86/and-with-overflow.ll

+3-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
define i8 @and_i8_ri(i8 zeroext %0, i8 zeroext %1) {
1010
; X86-LABEL: and_i8_ri:
1111
; X86: # %bb.0:
12-
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
12+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1313
; X86-NEXT: movl %eax, %ecx
1414
; X86-NEXT: andb $-17, %cl
1515
; X86-NEXT: je .LBB0_2
@@ -35,8 +35,8 @@ define i8 @and_i8_ri(i8 zeroext %0, i8 zeroext %1) {
3535
define i8 @and_i8_rr(i8 zeroext %0, i8 zeroext %1) {
3636
; X86-LABEL: and_i8_rr:
3737
; X86: # %bb.0:
38-
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
39-
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
38+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
39+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
4040
; X86-NEXT: andb %al, %cl
4141
; X86-NEXT: je .LBB1_2
4242
; X86-NEXT: # %bb.1:

0 commit comments

Comments
 (0)