xref: /llvm-project/llvm/test/CodeGen/X86/bmi2.ll (revision 2d92f7de800a1b1b3dca3dab1e11da712cd55f2b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+bmi2,+cmov | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefix=X64
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
5
6define i32 @bzhi32(i32 %x, i32 %y)   {
7; X86-LABEL: bzhi32:
8; X86:       # %bb.0:
9; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
10; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
11; X86-NEXT:    addl %ecx, %ecx
12; X86-NEXT:    bzhil %eax, %ecx, %eax
13; X86-NEXT:    retl
14;
15; X64-LABEL: bzhi32:
16; X64:       # %bb.0:
17; X64-NEXT:    addl %edi, %edi
18; X64-NEXT:    bzhil %esi, %edi, %eax
19; X64-NEXT:    retq
20;
21; EGPR-LABEL: bzhi32:
22; EGPR:       # %bb.0:
23; EGPR-NEXT:    addl %edi, %edi # encoding: [0x01,0xff]
24; EGPR-NEXT:    bzhil %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf5,0xc7]
25; EGPR-NEXT:    retq # encoding: [0xc3]
26  %x1 = add i32 %x, %x
27  %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y)
28  ret i32 %tmp
29}
30
31define i32 @bzhi32_load(ptr %x, i32 %y)   {
32; X86-LABEL: bzhi32_load:
33; X86:       # %bb.0:
34; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
35; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
36; X86-NEXT:    bzhil %eax, (%ecx), %eax
37; X86-NEXT:    retl
38;
39; X64-LABEL: bzhi32_load:
40; X64:       # %bb.0:
41; X64-NEXT:    bzhil %esi, (%rdi), %eax
42; X64-NEXT:    retq
43;
44; EGPR-LABEL: bzhi32_load:
45; EGPR:       # %bb.0:
46; EGPR-NEXT:    bzhil %esi, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf5,0x07]
47; EGPR-NEXT:    retq # encoding: [0xc3]
48  %x1 = load i32, ptr %x
49  %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y)
50  ret i32 %tmp
51}
52
53; PR48768 - 'bzhi' clears the overflow flag, so we don't need a separate 'test'.
54define i1 @bzhi32_overflow(i32 %x, i32 %y) {
55; X86-LABEL: bzhi32_overflow:
56; X86:       # %bb.0:
57; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
58; X86-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
59; X86-NEXT:    setle %al
60; X86-NEXT:    retl
61;
62; X64-LABEL: bzhi32_overflow:
63; X64:       # %bb.0:
64; X64-NEXT:    bzhil %esi, %edi, %eax
65; X64-NEXT:    setle %al
66; X64-NEXT:    retq
67;
68; EGPR-LABEL: bzhi32_overflow:
69; EGPR:       # %bb.0:
70; EGPR-NEXT:    bzhil %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf5,0xc7]
71; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
72; EGPR-NEXT:    setle %al # encoding: [0x0f,0x9e,0xc0]
73; EGPR-NEXT:    retq # encoding: [0xc3]
74  %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x, i32 %y)
75  %cmp = icmp slt i32 %tmp, 1
76  ret i1 %cmp
77}
78
79declare i32 @llvm.x86.bmi.bzhi.32(i32, i32)
80
81define i32 @pdep32(i32 %x, i32 %y)   {
82; X86-LABEL: pdep32:
83; X86:       # %bb.0:
84; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
85; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
86; X86-NEXT:    addl %ecx, %ecx
87; X86-NEXT:    pdepl %ecx, %eax, %eax
88; X86-NEXT:    retl
89;
90; X64-LABEL: pdep32:
91; X64:       # %bb.0:
92; X64-NEXT:    addl %esi, %esi
93; X64-NEXT:    pdepl %esi, %edi, %eax
94; X64-NEXT:    retq
95;
96; EGPR-LABEL: pdep32:
97; EGPR:       # %bb.0:
98; EGPR-NEXT:    addl %esi, %esi # encoding: [0x01,0xf6]
99; EGPR-NEXT:    pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6]
100; EGPR-NEXT:    retq # encoding: [0xc3]
101  %y1 = add i32 %y, %y
102  %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1)
103  ret i32 %tmp
104}
105
106define i32 @pdep32_load(i32 %x, ptr %y)   {
107; X86-LABEL: pdep32_load:
108; X86:       # %bb.0:
109; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
110; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
111; X86-NEXT:    pdepl (%eax), %ecx, %eax
112; X86-NEXT:    retl
113;
114; X64-LABEL: pdep32_load:
115; X64:       # %bb.0:
116; X64-NEXT:    pdepl (%rsi), %edi, %eax
117; X64-NEXT:    retq
118;
119; EGPR-LABEL: pdep32_load:
120; EGPR:       # %bb.0:
121; EGPR-NEXT:    pdepl (%rsi), %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0x06]
122; EGPR-NEXT:    retq # encoding: [0xc3]
123  %y1 = load i32, ptr %y
124  %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1)
125  ret i32 %tmp
126}
127
128define i32 @pdep32_anyext(i16 %x)   {
129; X86-LABEL: pdep32_anyext:
130; X86:       # %bb.0:
131; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
132; X86-NEXT:    movl $-1431655766, %ecx # imm = 0xAAAAAAAA
133; X86-NEXT:    pdepl %ecx, %eax, %eax
134; X86-NEXT:    retl
135;
136; X64-LABEL: pdep32_anyext:
137; X64:       # %bb.0:
138; X64-NEXT:    movl $-1431655766, %eax # imm = 0xAAAAAAAA
139; X64-NEXT:    pdepl %eax, %edi, %eax
140; X64-NEXT:    retq
141;
142; EGPR-LABEL: pdep32_anyext:
143; EGPR:       # %bb.0:
144; EGPR-NEXT:    movl $-1431655766, %eax # encoding: [0xb8,0xaa,0xaa,0xaa,0xaa]
145; EGPR-NEXT:    # imm = 0xAAAAAAAA
146; EGPR-NEXT:    pdepl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc0]
147; EGPR-NEXT:    retq # encoding: [0xc3]
148  %x1 = sext i16 %x to i32
149  %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x1, i32 -1431655766)
150  ret i32 %tmp
151}
152
153define i32 @pdep32_demandedbits(i32 %x) {
154; X86-LABEL: pdep32_demandedbits:
155; X86:       # %bb.0:
156; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
157; X86-NEXT:    movl $1431655765, %ecx # imm = 0x55555555
158; X86-NEXT:    pdepl %ecx, %eax, %eax
159; X86-NEXT:    retl
160;
161; X64-LABEL: pdep32_demandedbits:
162; X64:       # %bb.0:
163; X64-NEXT:    movl $1431655765, %eax # imm = 0x55555555
164; X64-NEXT:    pdepl %eax, %edi, %eax
165; X64-NEXT:    retq
166;
167; EGPR-LABEL: pdep32_demandedbits:
168; EGPR:       # %bb.0:
169; EGPR-NEXT:    movl $1431655765, %eax # encoding: [0xb8,0x55,0x55,0x55,0x55]
170; EGPR-NEXT:    # imm = 0x55555555
171; EGPR-NEXT:    pdepl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc0]
172; EGPR-NEXT:    retq # encoding: [0xc3]
173  %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 1431655765)
174  %tmp2 = and i32 %tmp, 1431655765
175  ret i32 %tmp2
176}
177
178define i32 @pdep32_demandedbits2(i32 %x, i32 %y) {
179; X86-LABEL: pdep32_demandedbits2:
180; X86:       # %bb.0:
181; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
182; X86-NEXT:    pdepl {{[0-9]+}}(%esp), %eax, %eax
183; X86-NEXT:    andl $128, %eax
184; X86-NEXT:    retl
185;
186; X64-LABEL: pdep32_demandedbits2:
187; X64:       # %bb.0:
188; X64-NEXT:    pdepl %esi, %edi, %eax
189; X64-NEXT:    andl $128, %eax
190; X64-NEXT:    retq
191;
192; EGPR-LABEL: pdep32_demandedbits2:
193; EGPR:       # %bb.0:
194; EGPR-NEXT:    pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6]
195; EGPR-NEXT:    andl $128, %eax # encoding: [0x25,0x80,0x00,0x00,0x00]
196; EGPR-NEXT:    retq # encoding: [0xc3]
197  %tmp = and i32 %x, 255
198  %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %tmp, i32 %y)
199  %tmp3 = and i32 %tmp2, 128
200  ret i32 %tmp3
201}
202
203define i32 @pdep32_demandedbits_mask(i32 %x, i16 %y) {
204; X86-LABEL: pdep32_demandedbits_mask:
205; X86:       # %bb.0:
206; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
207; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
208; X86-NEXT:    pdepl %eax, %ecx, %eax
209; X86-NEXT:    andl $32768, %eax # imm = 0x8000
210; X86-NEXT:    retl
211;
212; X64-LABEL: pdep32_demandedbits_mask:
213; X64:       # %bb.0:
214; X64-NEXT:    pdepl %esi, %edi, %eax
215; X64-NEXT:    andl $32768, %eax # imm = 0x8000
216; X64-NEXT:    retq
217;
218; EGPR-LABEL: pdep32_demandedbits_mask:
219; EGPR:       # %bb.0:
220; EGPR-NEXT:    pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6]
221; EGPR-NEXT:    andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00]
222; EGPR-NEXT:    # imm = 0x8000
223; EGPR-NEXT:    retq # encoding: [0xc3]
224  %tmp = sext i16 %y to i32
225  %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %tmp)
226  %tmp3 = and i32 %tmp2, 32768
227  ret i32 %tmp3
228}
229
230define i32 @pdep32_demandedbits_mask2(i32 %x, i16 %y) {
231; X86-LABEL: pdep32_demandedbits_mask2:
232; X86:       # %bb.0:
233; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
234; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
235; X86-NEXT:    pdepl %eax, %ecx, %eax
236; X86-NEXT:    movzwl %ax, %eax
237; X86-NEXT:    retl
238;
239; X64-LABEL: pdep32_demandedbits_mask2:
240; X64:       # %bb.0:
241; X64-NEXT:    pdepl %esi, %edi, %eax
242; X64-NEXT:    movzwl %ax, %eax
243; X64-NEXT:    retq
244;
245; EGPR-LABEL: pdep32_demandedbits_mask2:
246; EGPR:       # %bb.0:
247; EGPR-NEXT:    pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6]
248; EGPR-NEXT:    movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
249; EGPR-NEXT:    retq # encoding: [0xc3]
250  %tmp = sext i16 %y to i32
251  %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %tmp)
252  %tmp3 = and i32 %tmp2, 65535
253  ret i32 %tmp3
254}
255
256define i32 @pdep32_knownbits(i32 %x) {
257; X86-LABEL: pdep32_knownbits:
258; X86:       # %bb.0:
259; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
260; X86-NEXT:    movl $1431655765, %ecx # imm = 0x55555555
261; X86-NEXT:    pdepl %ecx, %eax, %eax
262; X86-NEXT:    imull %eax, %eax
263; X86-NEXT:    retl
264;
265; X64-LABEL: pdep32_knownbits:
266; X64:       # %bb.0:
267; X64-NEXT:    movl $1431655765, %eax # imm = 0x55555555
268; X64-NEXT:    pdepl %eax, %edi, %eax
269; X64-NEXT:    imull %eax, %eax
270; X64-NEXT:    retq
271;
272; EGPR-LABEL: pdep32_knownbits:
273; EGPR:       # %bb.0:
274; EGPR-NEXT:    movl $1431655765, %eax # encoding: [0xb8,0x55,0x55,0x55,0x55]
275; EGPR-NEXT:    # imm = 0x55555555
276; EGPR-NEXT:    pdepl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc0]
277; EGPR-NEXT:    imull %eax, %eax # encoding: [0x0f,0xaf,0xc0]
278; EGPR-NEXT:    retq # encoding: [0xc3]
279  %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 1431655765)
280  %tmp2 = and i32 %tmp, 1431655765
281  %tmp3 = mul i32 %tmp, %tmp2
282  ret i32 %tmp3
283}
284
285define i32 @pdep32_knownbits2(i32 %x, i32 %y) {
286; X86-LABEL: pdep32_knownbits2:
287; X86:       # %bb.0:
288; X86-NEXT:    movl $-256, %eax
289; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
290; X86-NEXT:    pdepl {{[0-9]+}}(%esp), %eax, %eax
291; X86-NEXT:    imull %eax, %eax
292; X86-NEXT:    retl
293;
294; X64-LABEL: pdep32_knownbits2:
295; X64:       # %bb.0:
296; X64-NEXT:    andl $-256, %edi
297; X64-NEXT:    pdepl %esi, %edi, %eax
298; X64-NEXT:    imull %eax, %eax
299; X64-NEXT:    retq
300;
301; EGPR-LABEL: pdep32_knownbits2:
302; EGPR:       # %bb.0:
303; EGPR-NEXT:    andl $-256, %edi # encoding: [0x81,0xe7,0x00,0xff,0xff,0xff]
304; EGPR-NEXT:    pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6]
305; EGPR-NEXT:    imull %eax, %eax # encoding: [0x0f,0xaf,0xc0]
306; EGPR-NEXT:    retq # encoding: [0xc3]
307  %tmp = and i32 %x, -256
308  %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %tmp, i32 %y)
309  %tmp3 = and i32 %tmp2, -256
310  %tmp4 = mul i32 %tmp2, %tmp3
311  ret i32 %tmp4
312}
313
314declare i32 @llvm.x86.bmi.pdep.32(i32, i32)
315
316define i32 @pext32(i32 %x, i32 %y)   {
317; X86-LABEL: pext32:
318; X86:       # %bb.0:
319; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
320; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
321; X86-NEXT:    addl %ecx, %ecx
322; X86-NEXT:    pextl %ecx, %eax, %eax
323; X86-NEXT:    retl
324;
325; X64-LABEL: pext32:
326; X64:       # %bb.0:
327; X64-NEXT:    addl %esi, %esi
328; X64-NEXT:    pextl %esi, %edi, %eax
329; X64-NEXT:    retq
330;
331; EGPR-LABEL: pext32:
332; EGPR:       # %bb.0:
333; EGPR-NEXT:    addl %esi, %esi # encoding: [0x01,0xf6]
334; EGPR-NEXT:    pextl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x42,0xf5,0xc6]
335; EGPR-NEXT:    retq # encoding: [0xc3]
336  %y1 = add i32 %y, %y
337  %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1)
338  ret i32 %tmp
339}
340
341define i32 @pext32_load(i32 %x, ptr %y)   {
342; X86-LABEL: pext32_load:
343; X86:       # %bb.0:
344; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
345; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
346; X86-NEXT:    pextl (%eax), %ecx, %eax
347; X86-NEXT:    retl
348;
349; X64-LABEL: pext32_load:
350; X64:       # %bb.0:
351; X64-NEXT:    pextl (%rsi), %edi, %eax
352; X64-NEXT:    retq
353;
354; EGPR-LABEL: pext32_load:
355; EGPR:       # %bb.0:
356; EGPR-NEXT:    pextl (%rsi), %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x42,0xf5,0x06]
357; EGPR-NEXT:    retq # encoding: [0xc3]
358  %y1 = load i32, ptr %y
359  %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1)
360  ret i32 %tmp
361}
362
363define i32 @pext32_knownbits(i32 %x)   {
364; X86-LABEL: pext32_knownbits:
365; X86:       # %bb.0:
366; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
367; X86-NEXT:    movl $1431655765, %ecx # imm = 0x55555555
368; X86-NEXT:    pextl %ecx, %eax, %eax
369; X86-NEXT:    retl
370;
371; X64-LABEL: pext32_knownbits:
372; X64:       # %bb.0:
373; X64-NEXT:    movl $1431655765, %eax # imm = 0x55555555
374; X64-NEXT:    pextl %eax, %edi, %eax
375; X64-NEXT:    retq
376;
377; EGPR-LABEL: pext32_knownbits:
378; EGPR:       # %bb.0:
379; EGPR-NEXT:    movl $1431655765, %eax # encoding: [0xb8,0x55,0x55,0x55,0x55]
380; EGPR-NEXT:    # imm = 0x55555555
381; EGPR-NEXT:    pextl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x42,0xf5,0xc0]
382; EGPR-NEXT:    retq # encoding: [0xc3]
383  %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 1431655765)
384  %tmp2 = and i32 %tmp, 65535
385  ret i32 %tmp2
386}
387
388declare i32 @llvm.x86.bmi.pext.32(i32, i32)
389
390define i32 @mulx32(i32 %x, i32 %y, ptr %p)   {
391; X86-LABEL: mulx32:
392; X86:       # %bb.0:
393; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
394; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
395; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
396; X86-NEXT:    addl %edx, %edx
397; X86-NEXT:    addl %eax, %eax
398; X86-NEXT:    mulxl %eax, %eax, %edx
399; X86-NEXT:    movl %edx, (%ecx)
400; X86-NEXT:    retl
401;
402; X64-LABEL: mulx32:
403; X64:       # %bb.0:
404; X64-NEXT:    # kill: def $esi killed $esi def $rsi
405; X64-NEXT:    # kill: def $edi killed $edi def $rdi
406; X64-NEXT:    addl %edi, %edi
407; X64-NEXT:    leal (%rsi,%rsi), %eax
408; X64-NEXT:    imulq %rdi, %rax
409; X64-NEXT:    movq %rax, %rcx
410; X64-NEXT:    shrq $32, %rcx
411; X64-NEXT:    movl %ecx, (%rdx)
412; X64-NEXT:    # kill: def $eax killed $eax killed $rax
413; X64-NEXT:    retq
414;
415; EGPR-LABEL: mulx32:
416; EGPR:       # %bb.0:
417; EGPR-NEXT:    # kill: def $esi killed $esi def $rsi
418; EGPR-NEXT:    # kill: def $edi killed $edi def $rdi
419; EGPR-NEXT:    addl %edi, %edi # encoding: [0x01,0xff]
420; EGPR-NEXT:    leal (%rsi,%rsi), %eax # encoding: [0x8d,0x04,0x36]
421; EGPR-NEXT:    imulq %rdi, %rax # encoding: [0x48,0x0f,0xaf,0xc7]
422; EGPR-NEXT:    movq %rax, %rcx # encoding: [0x48,0x89,0xc1]
423; EGPR-NEXT:    shrq $32, %rcx # encoding: [0x48,0xc1,0xe9,0x20]
424; EGPR-NEXT:    movl %ecx, (%rdx) # encoding: [0x89,0x0a]
425; EGPR-NEXT:    # kill: def $eax killed $eax killed $rax
426; EGPR-NEXT:    retq # encoding: [0xc3]
427  %x1 = add i32 %x, %x
428  %y1 = add i32 %y, %y
429  %x2 = zext i32 %x1 to i64
430  %y2 = zext i32 %y1 to i64
431  %r1 = mul i64 %x2, %y2
432  %h1 = lshr i64 %r1, 32
433  %h  = trunc i64 %h1 to i32
434  %l  = trunc i64 %r1 to i32
435  store i32 %h, ptr %p
436  ret i32 %l
437}
438
439define i32 @mulx32_load(i32 %x, ptr %y, ptr %p)   {
440; X86-LABEL: mulx32_load:
441; X86:       # %bb.0:
442; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
443; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
444; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
445; X86-NEXT:    addl %edx, %edx
446; X86-NEXT:    mulxl (%eax), %eax, %edx
447; X86-NEXT:    movl %edx, (%ecx)
448; X86-NEXT:    retl
449;
450; X64-LABEL: mulx32_load:
451; X64:       # %bb.0:
452; X64-NEXT:    # kill: def $edi killed $edi def $rdi
453; X64-NEXT:    leal (%rdi,%rdi), %eax
454; X64-NEXT:    movl (%rsi), %ecx
455; X64-NEXT:    imulq %rcx, %rax
456; X64-NEXT:    movq %rax, %rcx
457; X64-NEXT:    shrq $32, %rcx
458; X64-NEXT:    movl %ecx, (%rdx)
459; X64-NEXT:    # kill: def $eax killed $eax killed $rax
460; X64-NEXT:    retq
461;
462; EGPR-LABEL: mulx32_load:
463; EGPR:       # %bb.0:
464; EGPR-NEXT:    # kill: def $edi killed $edi def $rdi
465; EGPR-NEXT:    leal (%rdi,%rdi), %eax # encoding: [0x8d,0x04,0x3f]
466; EGPR-NEXT:    movl (%rsi), %ecx # encoding: [0x8b,0x0e]
467; EGPR-NEXT:    imulq %rcx, %rax # encoding: [0x48,0x0f,0xaf,0xc1]
468; EGPR-NEXT:    movq %rax, %rcx # encoding: [0x48,0x89,0xc1]
469; EGPR-NEXT:    shrq $32, %rcx # encoding: [0x48,0xc1,0xe9,0x20]
470; EGPR-NEXT:    movl %ecx, (%rdx) # encoding: [0x89,0x0a]
471; EGPR-NEXT:    # kill: def $eax killed $eax killed $rax
472; EGPR-NEXT:    retq # encoding: [0xc3]
473  %x1 = add i32 %x, %x
474  %y1 = load i32, ptr %y
475  %x2 = zext i32 %x1 to i64
476  %y2 = zext i32 %y1 to i64
477  %r1 = mul i64 %x2, %y2
478  %h1 = lshr i64 %r1, 32
479  %h  = trunc i64 %h1 to i32
480  %l  = trunc i64 %r1 to i32
481  store i32 %h, ptr %p
482  ret i32 %l
483}
484