xref: /llvm-project/llvm/test/CodeGen/X86/xmulo.ll (revision e30a4fc3e20bf5d9cc2f5bfcb61b4eb0e686a193)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown < %s | FileCheck %s --check-prefixes=CHECK,LINUX,SDAG
3; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefixes=CHECK,LINUX,FAST
4; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown -mcpu=knl < %s | FileCheck %s --check-prefixes=CHECK,LINUX,SDAG
5; RUN: llc -disable-peephole -mtriple=x86_64-pc-win32 < %s | FileCheck %s --check-prefixes=CHECK,WIN64
6; RUN: llc -disable-peephole -mtriple=i386-pc-win32 < %s | FileCheck %s --check-prefix=WIN32
7
8define {i64, i1} @t1() nounwind {
9; CHECK-LABEL: t1:
10; CHECK:       # %bb.0:
11; CHECK-NEXT:    movl $72, %eax
12; CHECK-NEXT:    xorl %edx, %edx
13; CHECK-NEXT:    retq
14;
15; WIN32-LABEL: t1:
16; WIN32:       # %bb.0:
17; WIN32-NEXT:    movl $72, %eax
18; WIN32-NEXT:    xorl %edx, %edx
19; WIN32-NEXT:    xorl %ecx, %ecx
20; WIN32-NEXT:    retl
21  %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 8)
22  ret {i64, i1} %1
23}
24
25define {i64, i1} @t2() nounwind {
26; CHECK-LABEL: t2:
27; CHECK:       # %bb.0:
28; CHECK-NEXT:    xorl %eax, %eax
29; CHECK-NEXT:    xorl %edx, %edx
30; CHECK-NEXT:    retq
31;
32; WIN32-LABEL: t2:
33; WIN32:       # %bb.0:
34; WIN32-NEXT:    xorl %eax, %eax
35; WIN32-NEXT:    xorl %edx, %edx
36; WIN32-NEXT:    xorl %ecx, %ecx
37; WIN32-NEXT:    retl
38  %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 0)
39  ret {i64, i1} %1
40}
41
42define {i64, i1} @t3() nounwind {
43; CHECK-LABEL: t3:
44; CHECK:       # %bb.0:
45; CHECK-NEXT:    movq $-9, %rax
46; CHECK-NEXT:    movb $1, %dl
47; CHECK-NEXT:    retq
48;
49; WIN32-LABEL: t3:
50; WIN32:       # %bb.0:
51; WIN32-NEXT:    movl $-9, %eax
52; WIN32-NEXT:    movl $-1, %edx
53; WIN32-NEXT:    movb $1, %cl
54; WIN32-NEXT:    retl
55  %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 -1)
56  ret {i64, i1} %1
57}
58
59; SMULO
60define zeroext i1 @smuloi8(i8 %v1, i8 %v2, ptr %res) {
61; SDAG-LABEL: smuloi8:
62; SDAG:       # %bb.0:
63; SDAG-NEXT:    movl %edi, %eax
64; SDAG-NEXT:    # kill: def $al killed $al killed $eax
65; SDAG-NEXT:    imulb %sil
66; SDAG-NEXT:    seto %cl
67; SDAG-NEXT:    movb %al, (%rdx)
68; SDAG-NEXT:    movl %ecx, %eax
69; SDAG-NEXT:    retq
70;
71; FAST-LABEL: smuloi8:
72; FAST:       # %bb.0:
73; FAST-NEXT:    movl %edi, %eax
74; FAST-NEXT:    # kill: def $al killed $al killed $eax
75; FAST-NEXT:    imulb %sil
76; FAST-NEXT:    seto %cl
77; FAST-NEXT:    movb %al, (%rdx)
78; FAST-NEXT:    andb $1, %cl
79; FAST-NEXT:    movl %ecx, %eax
80; FAST-NEXT:    retq
81;
82; WIN64-LABEL: smuloi8:
83; WIN64:       # %bb.0:
84; WIN64-NEXT:    movl %ecx, %eax
85; WIN64-NEXT:    imulb %dl
86; WIN64-NEXT:    seto %cl
87; WIN64-NEXT:    movb %al, (%r8)
88; WIN64-NEXT:    movl %ecx, %eax
89; WIN64-NEXT:    retq
90;
91; WIN32-LABEL: smuloi8:
92; WIN32:       # %bb.0:
93; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
94; WIN32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
95; WIN32-NEXT:    imulb {{[0-9]+}}(%esp)
96; WIN32-NEXT:    seto %cl
97; WIN32-NEXT:    movb %al, (%edx)
98; WIN32-NEXT:    movl %ecx, %eax
99; WIN32-NEXT:    retl
100  %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
101  %val = extractvalue {i8, i1} %t, 0
102  %obit = extractvalue {i8, i1} %t, 1
103  store i8 %val, ptr %res
104  ret i1 %obit
105}
106
107define zeroext i1 @smuloi16(i16 %v1, i16 %v2, ptr %res) {
108; SDAG-LABEL: smuloi16:
109; SDAG:       # %bb.0:
110; SDAG-NEXT:    imulw %si, %di
111; SDAG-NEXT:    seto %al
112; SDAG-NEXT:    movw %di, (%rdx)
113; SDAG-NEXT:    retq
114;
115; FAST-LABEL: smuloi16:
116; FAST:       # %bb.0:
117; FAST-NEXT:    imulw %si, %di
118; FAST-NEXT:    seto %al
119; FAST-NEXT:    movw %di, (%rdx)
120; FAST-NEXT:    andb $1, %al
121; FAST-NEXT:    retq
122;
123; WIN64-LABEL: smuloi16:
124; WIN64:       # %bb.0:
125; WIN64-NEXT:    imulw %dx, %cx
126; WIN64-NEXT:    seto %al
127; WIN64-NEXT:    movw %cx, (%r8)
128; WIN64-NEXT:    retq
129;
130; WIN32-LABEL: smuloi16:
131; WIN32:       # %bb.0:
132; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
133; WIN32-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
134; WIN32-NEXT:    imulw {{[0-9]+}}(%esp), %dx
135; WIN32-NEXT:    seto %al
136; WIN32-NEXT:    movw %dx, (%ecx)
137; WIN32-NEXT:    retl
138  %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
139  %val = extractvalue {i16, i1} %t, 0
140  %obit = extractvalue {i16, i1} %t, 1
141  store i16 %val, ptr %res
142  ret i1 %obit
143}
144
145define zeroext i1 @smuloi32(i32 %v1, i32 %v2, ptr %res) {
146; SDAG-LABEL: smuloi32:
147; SDAG:       # %bb.0:
148; SDAG-NEXT:    imull %esi, %edi
149; SDAG-NEXT:    seto %al
150; SDAG-NEXT:    movl %edi, (%rdx)
151; SDAG-NEXT:    retq
152;
153; FAST-LABEL: smuloi32:
154; FAST:       # %bb.0:
155; FAST-NEXT:    imull %esi, %edi
156; FAST-NEXT:    seto %al
157; FAST-NEXT:    movl %edi, (%rdx)
158; FAST-NEXT:    andb $1, %al
159; FAST-NEXT:    retq
160;
161; WIN64-LABEL: smuloi32:
162; WIN64:       # %bb.0:
163; WIN64-NEXT:    imull %edx, %ecx
164; WIN64-NEXT:    seto %al
165; WIN64-NEXT:    movl %ecx, (%r8)
166; WIN64-NEXT:    retq
167;
168; WIN32-LABEL: smuloi32:
169; WIN32:       # %bb.0:
170; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
171; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
172; WIN32-NEXT:    imull {{[0-9]+}}(%esp), %edx
173; WIN32-NEXT:    seto %al
174; WIN32-NEXT:    movl %edx, (%ecx)
175; WIN32-NEXT:    retl
176  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
177  %val = extractvalue {i32, i1} %t, 0
178  %obit = extractvalue {i32, i1} %t, 1
179  store i32 %val, ptr %res
180  ret i1 %obit
181}
182
183define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) {
184; SDAG-LABEL: smuloi64:
185; SDAG:       # %bb.0:
186; SDAG-NEXT:    imulq %rsi, %rdi
187; SDAG-NEXT:    seto %al
188; SDAG-NEXT:    movq %rdi, (%rdx)
189; SDAG-NEXT:    retq
190;
191; FAST-LABEL: smuloi64:
192; FAST:       # %bb.0:
193; FAST-NEXT:    imulq %rsi, %rdi
194; FAST-NEXT:    seto %al
195; FAST-NEXT:    movq %rdi, (%rdx)
196; FAST-NEXT:    andb $1, %al
197; FAST-NEXT:    retq
198;
199; WIN64-LABEL: smuloi64:
200; WIN64:       # %bb.0:
201; WIN64-NEXT:    imulq %rdx, %rcx
202; WIN64-NEXT:    seto %al
203; WIN64-NEXT:    movq %rcx, (%r8)
204; WIN64-NEXT:    retq
205;
206; WIN32-LABEL: smuloi64:
207; WIN32:       # %bb.0:
208; WIN32-NEXT:    pushl %ebp
209; WIN32-NEXT:    pushl %ebx
210; WIN32-NEXT:    pushl %edi
211; WIN32-NEXT:    pushl %esi
212; WIN32-NEXT:    subl $8, %esp
213; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
214; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
215; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edi
216; WIN32-NEXT:    movl %edi, %esi
217; WIN32-NEXT:    sarl $31, %esi
218; WIN32-NEXT:    imull %ebx, %esi
219; WIN32-NEXT:    mull %ebx
220; WIN32-NEXT:    movl %edx, %ecx
221; WIN32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
222; WIN32-NEXT:    movl %edi, %eax
223; WIN32-NEXT:    mull %ebx
224; WIN32-NEXT:    movl %edx, %ebx
225; WIN32-NEXT:    movl %eax, %ebp
226; WIN32-NEXT:    addl %ecx, %ebp
227; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
228; WIN32-NEXT:    adcl %esi, %ebx
229; WIN32-NEXT:    movl %ebx, %edi
230; WIN32-NEXT:    sarl $31, %edi
231; WIN32-NEXT:    movl %ecx, %esi
232; WIN32-NEXT:    sarl $31, %esi
233; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
234; WIN32-NEXT:    imull %eax, %esi
235; WIN32-NEXT:    mull %ecx
236; WIN32-NEXT:    movl %edx, %ecx
237; WIN32-NEXT:    addl %ebp, %eax
238; WIN32-NEXT:    movl %eax, (%esp) # 4-byte Spill
239; WIN32-NEXT:    adcl %esi, %ecx
240; WIN32-NEXT:    movl %ecx, %ebp
241; WIN32-NEXT:    sarl $31, %ebp
242; WIN32-NEXT:    addl %ebx, %ecx
243; WIN32-NEXT:    adcl %edi, %ebp
244; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
245; WIN32-NEXT:    imull {{[0-9]+}}(%esp)
246; WIN32-NEXT:    addl %ecx, %eax
247; WIN32-NEXT:    adcl %ebp, %edx
248; WIN32-NEXT:    movl (%esp), %esi # 4-byte Reload
249; WIN32-NEXT:    movl %esi, %ecx
250; WIN32-NEXT:    sarl $31, %ecx
251; WIN32-NEXT:    xorl %ecx, %edx
252; WIN32-NEXT:    xorl %eax, %ecx
253; WIN32-NEXT:    orl %edx, %ecx
254; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
255; WIN32-NEXT:    movl %esi, 4(%eax)
256; WIN32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
257; WIN32-NEXT:    movl %ecx, (%eax)
258; WIN32-NEXT:    setne %al
259; WIN32-NEXT:    addl $8, %esp
260; WIN32-NEXT:    popl %esi
261; WIN32-NEXT:    popl %edi
262; WIN32-NEXT:    popl %ebx
263; WIN32-NEXT:    popl %ebp
264; WIN32-NEXT:    retl
265  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
266  %val = extractvalue {i64, i1} %t, 0
267  %obit = extractvalue {i64, i1} %t, 1
268  store i64 %val, ptr %res
269  ret i1 %obit
270}
271
272; UMULO
273define zeroext i1 @umuloi8(i8 %v1, i8 %v2, ptr %res) {
274; SDAG-LABEL: umuloi8:
275; SDAG:       # %bb.0:
276; SDAG-NEXT:    movl %edi, %eax
277; SDAG-NEXT:    # kill: def $al killed $al killed $eax
278; SDAG-NEXT:    mulb %sil
279; SDAG-NEXT:    seto %cl
280; SDAG-NEXT:    movb %al, (%rdx)
281; SDAG-NEXT:    movl %ecx, %eax
282; SDAG-NEXT:    retq
283;
284; FAST-LABEL: umuloi8:
285; FAST:       # %bb.0:
286; FAST-NEXT:    movl %edi, %eax
287; FAST-NEXT:    # kill: def $al killed $al killed $eax
288; FAST-NEXT:    mulb %sil
289; FAST-NEXT:    seto %cl
290; FAST-NEXT:    movb %al, (%rdx)
291; FAST-NEXT:    andb $1, %cl
292; FAST-NEXT:    movl %ecx, %eax
293; FAST-NEXT:    retq
294;
295; WIN64-LABEL: umuloi8:
296; WIN64:       # %bb.0:
297; WIN64-NEXT:    movl %ecx, %eax
298; WIN64-NEXT:    mulb %dl
299; WIN64-NEXT:    seto %cl
300; WIN64-NEXT:    movb %al, (%r8)
301; WIN64-NEXT:    movl %ecx, %eax
302; WIN64-NEXT:    retq
303;
304; WIN32-LABEL: umuloi8:
305; WIN32:       # %bb.0:
306; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
307; WIN32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
308; WIN32-NEXT:    mulb {{[0-9]+}}(%esp)
309; WIN32-NEXT:    seto %cl
310; WIN32-NEXT:    movb %al, (%edx)
311; WIN32-NEXT:    movl %ecx, %eax
312; WIN32-NEXT:    retl
313  %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
314  %val = extractvalue {i8, i1} %t, 0
315  %obit = extractvalue {i8, i1} %t, 1
316  store i8 %val, ptr %res
317  ret i1 %obit
318}
319
320define zeroext i1 @umuloi16(i16 %v1, i16 %v2, ptr %res) {
321; SDAG-LABEL: umuloi16:
322; SDAG:       # %bb.0:
323; SDAG-NEXT:    movq %rdx, %rcx
324; SDAG-NEXT:    movl %edi, %eax
325; SDAG-NEXT:    # kill: def $ax killed $ax killed $eax
326; SDAG-NEXT:    mulw %si
327; SDAG-NEXT:    seto %dl
328; SDAG-NEXT:    movw %ax, (%rcx)
329; SDAG-NEXT:    movl %edx, %eax
330; SDAG-NEXT:    retq
331;
332; FAST-LABEL: umuloi16:
333; FAST:       # %bb.0:
334; FAST-NEXT:    movq %rdx, %rcx
335; FAST-NEXT:    movl %edi, %eax
336; FAST-NEXT:    # kill: def $ax killed $ax killed $eax
337; FAST-NEXT:    mulw %si
338; FAST-NEXT:    seto %dl
339; FAST-NEXT:    movw %ax, (%rcx)
340; FAST-NEXT:    andb $1, %dl
341; FAST-NEXT:    movl %edx, %eax
342; FAST-NEXT:    retq
343;
344; WIN64-LABEL: umuloi16:
345; WIN64:       # %bb.0:
346; WIN64-NEXT:    movl %ecx, %eax
347; WIN64-NEXT:    mulw %dx
348; WIN64-NEXT:    seto %cl
349; WIN64-NEXT:    movw %ax, (%r8)
350; WIN64-NEXT:    movl %ecx, %eax
351; WIN64-NEXT:    retq
352;
353; WIN32-LABEL: umuloi16:
354; WIN32:       # %bb.0:
355; WIN32-NEXT:    pushl %esi
356; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
357; WIN32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
358; WIN32-NEXT:    mulw {{[0-9]+}}(%esp)
359; WIN32-NEXT:    seto %cl
360; WIN32-NEXT:    movw %ax, (%esi)
361; WIN32-NEXT:    movl %ecx, %eax
362; WIN32-NEXT:    popl %esi
363; WIN32-NEXT:    retl
364  %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
365  %val = extractvalue {i16, i1} %t, 0
366  %obit = extractvalue {i16, i1} %t, 1
367  store i16 %val, ptr %res
368  ret i1 %obit
369}
370
371define zeroext i1 @umuloi32(i32 %v1, i32 %v2, ptr %res) {
372; SDAG-LABEL: umuloi32:
373; SDAG:       # %bb.0:
374; SDAG-NEXT:    movq %rdx, %rcx
375; SDAG-NEXT:    movl %edi, %eax
376; SDAG-NEXT:    mull %esi
377; SDAG-NEXT:    seto %dl
378; SDAG-NEXT:    movl %eax, (%rcx)
379; SDAG-NEXT:    movl %edx, %eax
380; SDAG-NEXT:    retq
381;
382; FAST-LABEL: umuloi32:
383; FAST:       # %bb.0:
384; FAST-NEXT:    movq %rdx, %rcx
385; FAST-NEXT:    movl %edi, %eax
386; FAST-NEXT:    mull %esi
387; FAST-NEXT:    seto %dl
388; FAST-NEXT:    movl %eax, (%rcx)
389; FAST-NEXT:    andb $1, %dl
390; FAST-NEXT:    movl %edx, %eax
391; FAST-NEXT:    retq
392;
393; WIN64-LABEL: umuloi32:
394; WIN64:       # %bb.0:
395; WIN64-NEXT:    movl %ecx, %eax
396; WIN64-NEXT:    mull %edx
397; WIN64-NEXT:    seto %cl
398; WIN64-NEXT:    movl %eax, (%r8)
399; WIN64-NEXT:    movl %ecx, %eax
400; WIN64-NEXT:    retq
401;
402; WIN32-LABEL: umuloi32:
403; WIN32:       # %bb.0:
404; WIN32-NEXT:    pushl %esi
405; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
406; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
407; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
408; WIN32-NEXT:    seto %cl
409; WIN32-NEXT:    movl %eax, (%esi)
410; WIN32-NEXT:    movl %ecx, %eax
411; WIN32-NEXT:    popl %esi
412; WIN32-NEXT:    retl
413  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
414  %val = extractvalue {i32, i1} %t, 0
415  %obit = extractvalue {i32, i1} %t, 1
416  store i32 %val, ptr %res
417  ret i1 %obit
418}
419
420define zeroext i1 @umuloi64(i64 %v1, i64 %v2, ptr %res) {
421; SDAG-LABEL: umuloi64:
422; SDAG:       # %bb.0:
423; SDAG-NEXT:    movq %rdx, %rcx
424; SDAG-NEXT:    movq %rdi, %rax
425; SDAG-NEXT:    mulq %rsi
426; SDAG-NEXT:    seto %dl
427; SDAG-NEXT:    movq %rax, (%rcx)
428; SDAG-NEXT:    movl %edx, %eax
429; SDAG-NEXT:    retq
430;
431; FAST-LABEL: umuloi64:
432; FAST:       # %bb.0:
433; FAST-NEXT:    movq %rdx, %rcx
434; FAST-NEXT:    movq %rdi, %rax
435; FAST-NEXT:    mulq %rsi
436; FAST-NEXT:    seto %dl
437; FAST-NEXT:    movq %rax, (%rcx)
438; FAST-NEXT:    andb $1, %dl
439; FAST-NEXT:    movl %edx, %eax
440; FAST-NEXT:    retq
441;
442; WIN64-LABEL: umuloi64:
443; WIN64:       # %bb.0:
444; WIN64-NEXT:    movq %rcx, %rax
445; WIN64-NEXT:    mulq %rdx
446; WIN64-NEXT:    seto %cl
447; WIN64-NEXT:    movq %rax, (%r8)
448; WIN64-NEXT:    movl %ecx, %eax
449; WIN64-NEXT:    retq
450;
451; WIN32-LABEL: umuloi64:
452; WIN32:       # %bb.0:
453; WIN32-NEXT:    pushl %ebp
454; WIN32-NEXT:    pushl %ebx
455; WIN32-NEXT:    pushl %edi
456; WIN32-NEXT:    pushl %esi
457; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ebp
458; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
459; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
460; WIN32-NEXT:    testl %esi, %esi
461; WIN32-NEXT:    setne %dl
462; WIN32-NEXT:    testl %eax, %eax
463; WIN32-NEXT:    setne %cl
464; WIN32-NEXT:    andb %dl, %cl
465; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
466; WIN32-NEXT:    movl %eax, %edi
467; WIN32-NEXT:    seto %bl
468; WIN32-NEXT:    movl %esi, %eax
469; WIN32-NEXT:    mull %ebp
470; WIN32-NEXT:    seto %ch
471; WIN32-NEXT:    orb %bl, %ch
472; WIN32-NEXT:    orb %cl, %ch
473; WIN32-NEXT:    leal (%edi,%eax), %esi
474; WIN32-NEXT:    movl %ebp, %eax
475; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
476; WIN32-NEXT:    addl %esi, %edx
477; WIN32-NEXT:    setb %cl
478; WIN32-NEXT:    orb %ch, %cl
479; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
480; WIN32-NEXT:    movl %eax, (%esi)
481; WIN32-NEXT:    movl %edx, 4(%esi)
482; WIN32-NEXT:    movl %ecx, %eax
483; WIN32-NEXT:    popl %esi
484; WIN32-NEXT:    popl %edi
485; WIN32-NEXT:    popl %ebx
486; WIN32-NEXT:    popl %ebp
487; WIN32-NEXT:    retl
488  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
489  %val = extractvalue {i64, i1} %t, 0
490  %obit = extractvalue {i64, i1} %t, 1
491  store i64 %val, ptr %res
492  ret i1 %obit
493}
494
495;
496; Check the use of the overflow bit in combination with a select instruction.
497;
498define i32 @smuloselecti32(i32 %v1, i32 %v2) {
499; LINUX-LABEL: smuloselecti32:
500; LINUX:       # %bb.0:
501; LINUX-NEXT:    movl %esi, %eax
502; LINUX-NEXT:    movl %edi, %ecx
503; LINUX-NEXT:    imull %esi, %ecx
504; LINUX-NEXT:    cmovol %edi, %eax
505; LINUX-NEXT:    retq
506;
507; WIN64-LABEL: smuloselecti32:
508; WIN64:       # %bb.0:
509; WIN64-NEXT:    movl %edx, %eax
510; WIN64-NEXT:    movl %ecx, %edx
511; WIN64-NEXT:    imull %eax, %edx
512; WIN64-NEXT:    cmovol %ecx, %eax
513; WIN64-NEXT:    retq
514;
515; WIN32-LABEL: smuloselecti32:
516; WIN32:       # %bb.0:
517; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
518; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
519; WIN32-NEXT:    movl %eax, %edx
520; WIN32-NEXT:    imull %ecx, %edx
521; WIN32-NEXT:    jo LBB11_2
522; WIN32-NEXT:  # %bb.1:
523; WIN32-NEXT:    movl %ecx, %eax
524; WIN32-NEXT:  LBB11_2:
525; WIN32-NEXT:    retl
526  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
527  %obit = extractvalue {i32, i1} %t, 1
528  %ret = select i1 %obit, i32 %v1, i32 %v2
529  ret i32 %ret
530}
531
532define i64 @smuloselecti64(i64 %v1, i64 %v2) {
533; LINUX-LABEL: smuloselecti64:
534; LINUX:       # %bb.0:
535; LINUX-NEXT:    movq %rsi, %rax
536; LINUX-NEXT:    movq %rdi, %rcx
537; LINUX-NEXT:    imulq %rsi, %rcx
538; LINUX-NEXT:    cmovoq %rdi, %rax
539; LINUX-NEXT:    retq
540;
541; WIN64-LABEL: smuloselecti64:
542; WIN64:       # %bb.0:
543; WIN64-NEXT:    movq %rdx, %rax
544; WIN64-NEXT:    movq %rcx, %rdx
545; WIN64-NEXT:    imulq %rax, %rdx
546; WIN64-NEXT:    cmovoq %rcx, %rax
547; WIN64-NEXT:    retq
548;
549; WIN32-LABEL: smuloselecti64:
550; WIN32:       # %bb.0:
551; WIN32-NEXT:    pushl %ebp
552; WIN32-NEXT:    pushl %ebx
553; WIN32-NEXT:    pushl %edi
554; WIN32-NEXT:    pushl %esi
555; WIN32-NEXT:    pushl %eax
556; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edi
557; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
558; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
559; WIN32-NEXT:    movl %ebx, %esi
560; WIN32-NEXT:    sarl $31, %esi
561; WIN32-NEXT:    imull %edi, %esi
562; WIN32-NEXT:    mull %edi
563; WIN32-NEXT:    movl %edx, %ecx
564; WIN32-NEXT:    movl %ebx, %eax
565; WIN32-NEXT:    mull %edi
566; WIN32-NEXT:    movl %edx, %ebx
567; WIN32-NEXT:    movl %eax, %ebp
568; WIN32-NEXT:    addl %ecx, %ebp
569; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
570; WIN32-NEXT:    adcl %esi, %ebx
571; WIN32-NEXT:    movl %ebx, %eax
572; WIN32-NEXT:    sarl $31, %eax
573; WIN32-NEXT:    movl %eax, (%esp) # 4-byte Spill
574; WIN32-NEXT:    movl %ecx, %esi
575; WIN32-NEXT:    sarl $31, %esi
576; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
577; WIN32-NEXT:    imull %eax, %esi
578; WIN32-NEXT:    mull %ecx
579; WIN32-NEXT:    movl %edx, %ecx
580; WIN32-NEXT:    movl %eax, %edi
581; WIN32-NEXT:    addl %ebp, %edi
582; WIN32-NEXT:    adcl %esi, %ecx
583; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
584; WIN32-NEXT:    movl %ecx, %ebp
585; WIN32-NEXT:    sarl $31, %ebp
586; WIN32-NEXT:    addl %ebx, %ecx
587; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
588; WIN32-NEXT:    adcl (%esp), %ebp # 4-byte Folded Reload
589; WIN32-NEXT:    movl %esi, %eax
590; WIN32-NEXT:    imull %ebx
591; WIN32-NEXT:    addl %ecx, %eax
592; WIN32-NEXT:    adcl %ebp, %edx
593; WIN32-NEXT:    sarl $31, %edi
594; WIN32-NEXT:    xorl %edi, %edx
595; WIN32-NEXT:    xorl %eax, %edi
596; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
597; WIN32-NEXT:    orl %edx, %edi
598; WIN32-NEXT:    jne LBB12_2
599; WIN32-NEXT:  # %bb.1:
600; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
601; WIN32-NEXT:    movl %ebx, %esi
602; WIN32-NEXT:  LBB12_2:
603; WIN32-NEXT:    movl %esi, %edx
604; WIN32-NEXT:    addl $4, %esp
605; WIN32-NEXT:    popl %esi
606; WIN32-NEXT:    popl %edi
607; WIN32-NEXT:    popl %ebx
608; WIN32-NEXT:    popl %ebp
609; WIN32-NEXT:    retl
610  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
611  %obit = extractvalue {i64, i1} %t, 1
612  %ret = select i1 %obit, i64 %v1, i64 %v2
613  ret i64 %ret
614}
615
616define i32 @umuloselecti32(i32 %v1, i32 %v2) {
617; LINUX-LABEL: umuloselecti32:
618; LINUX:       # %bb.0:
619; LINUX-NEXT:    movl %edi, %eax
620; LINUX-NEXT:    mull %esi
621; LINUX-NEXT:    cmovol %edi, %esi
622; LINUX-NEXT:    movl %esi, %eax
623; LINUX-NEXT:    retq
624;
625; WIN64-LABEL: umuloselecti32:
626; WIN64:       # %bb.0:
627; WIN64-NEXT:    movl %edx, %r8d
628; WIN64-NEXT:    movl %ecx, %eax
629; WIN64-NEXT:    mull %edx
630; WIN64-NEXT:    cmovol %ecx, %r8d
631; WIN64-NEXT:    movl %r8d, %eax
632; WIN64-NEXT:    retq
633;
634; WIN32-LABEL: umuloselecti32:
635; WIN32:       # %bb.0:
636; WIN32-NEXT:    pushl %esi
637; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
638; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
639; WIN32-NEXT:    movl %ecx, %eax
640; WIN32-NEXT:    mull %esi
641; WIN32-NEXT:    jo LBB13_2
642; WIN32-NEXT:  # %bb.1:
643; WIN32-NEXT:    movl %esi, %ecx
644; WIN32-NEXT:  LBB13_2:
645; WIN32-NEXT:    movl %ecx, %eax
646; WIN32-NEXT:    popl %esi
647; WIN32-NEXT:    retl
648  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
649  %obit = extractvalue {i32, i1} %t, 1
650  %ret = select i1 %obit, i32 %v1, i32 %v2
651  ret i32 %ret
652}
653
654define i64 @umuloselecti64(i64 %v1, i64 %v2) {
655; LINUX-LABEL: umuloselecti64:
656; LINUX:       # %bb.0:
657; LINUX-NEXT:    movq %rdi, %rax
658; LINUX-NEXT:    mulq %rsi
659; LINUX-NEXT:    cmovoq %rdi, %rsi
660; LINUX-NEXT:    movq %rsi, %rax
661; LINUX-NEXT:    retq
662;
663; WIN64-LABEL: umuloselecti64:
664; WIN64:       # %bb.0:
665; WIN64-NEXT:    movq %rdx, %r8
666; WIN64-NEXT:    movq %rcx, %rax
667; WIN64-NEXT:    mulq %rdx
668; WIN64-NEXT:    cmovoq %rcx, %r8
669; WIN64-NEXT:    movq %r8, %rax
670; WIN64-NEXT:    retq
671;
672; WIN32-LABEL: umuloselecti64:
673; WIN32:       # %bb.0:
674; WIN32-NEXT:    pushl %ebp
675; WIN32-NEXT:    pushl %ebx
676; WIN32-NEXT:    pushl %edi
677; WIN32-NEXT:    pushl %esi
678; WIN32-NEXT:    pushl %eax
679; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
680; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
681; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edi
682; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ebp
683; WIN32-NEXT:    testl %ebp, %ebp
684; WIN32-NEXT:    setne %al
685; WIN32-NEXT:    testl %esi, %esi
686; WIN32-NEXT:    setne %bl
687; WIN32-NEXT:    andb %al, %bl
688; WIN32-NEXT:    movl %esi, %eax
689; WIN32-NEXT:    mull %edi
690; WIN32-NEXT:    movl %edi, %edx
691; WIN32-NEXT:    movl %eax, %edi
692; WIN32-NEXT:    seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
693; WIN32-NEXT:    movl %ebp, %eax
694; WIN32-NEXT:    movl %edx, %ebp
695; WIN32-NEXT:    mull %ecx
696; WIN32-NEXT:    seto %bh
697; WIN32-NEXT:    orb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Folded Reload
698; WIN32-NEXT:    orb %bl, %bh
699; WIN32-NEXT:    addl %eax, %edi
700; WIN32-NEXT:    movl %ecx, %eax
701; WIN32-NEXT:    mull %ebp
702; WIN32-NEXT:    addl %edi, %edx
703; WIN32-NEXT:    setb %al
704; WIN32-NEXT:    orb %bh, %al
705; WIN32-NEXT:    testb %al, %al
706; WIN32-NEXT:    jne LBB14_2
707; WIN32-NEXT:  # %bb.1:
708; WIN32-NEXT:    movl %ebp, %ecx
709; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
710; WIN32-NEXT:  LBB14_2:
711; WIN32-NEXT:    movl %ecx, %eax
712; WIN32-NEXT:    movl %esi, %edx
713; WIN32-NEXT:    addl $4, %esp
714; WIN32-NEXT:    popl %esi
715; WIN32-NEXT:    popl %edi
716; WIN32-NEXT:    popl %ebx
717; WIN32-NEXT:    popl %ebp
718; WIN32-NEXT:    retl
719  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
720  %obit = extractvalue {i64, i1} %t, 1
721  %ret = select i1 %obit, i64 %v1, i64 %v2
722  ret i64 %ret
723}
724
725;
726; Check the use of the overflow bit in combination with a branch instruction.
727;
728define zeroext i1 @smulobri8(i8 %v1, i8 %v2) {
729; SDAG-LABEL: smulobri8:
730; SDAG:       # %bb.0:
731; SDAG-NEXT:    movl %edi, %eax
732; SDAG-NEXT:    # kill: def $al killed $al killed $eax
733; SDAG-NEXT:    imulb %sil
734; SDAG-NEXT:    jo .LBB15_1
735; SDAG-NEXT:  # %bb.2: # %continue
736; SDAG-NEXT:    movb $1, %al
737; SDAG-NEXT:    retq
738; SDAG-NEXT:  .LBB15_1: # %overflow
739; SDAG-NEXT:    xorl %eax, %eax
740; SDAG-NEXT:    retq
741;
742; FAST-LABEL: smulobri8:
743; FAST:       # %bb.0:
744; FAST-NEXT:    movl %edi, %eax
745; FAST-NEXT:    # kill: def $al killed $al killed $eax
746; FAST-NEXT:    imulb %sil
747; FAST-NEXT:    seto %al
748; FAST-NEXT:    testb $1, %al
749; FAST-NEXT:    jne .LBB15_1
750; FAST-NEXT:  # %bb.2: # %continue
751; FAST-NEXT:    movb $1, %al
752; FAST-NEXT:    andb $1, %al
753; FAST-NEXT:    retq
754; FAST-NEXT:  .LBB15_1: # %overflow
755; FAST-NEXT:    xorl %eax, %eax
756; FAST-NEXT:    andb $1, %al
757; FAST-NEXT:    # kill: def $al killed $al killed $eax
758; FAST-NEXT:    retq
759;
760; WIN64-LABEL: smulobri8:
761; WIN64:       # %bb.0:
762; WIN64-NEXT:    movl %ecx, %eax
763; WIN64-NEXT:    imulb %dl
764; WIN64-NEXT:    jo .LBB15_1
765; WIN64-NEXT:  # %bb.2: # %continue
766; WIN64-NEXT:    movb $1, %al
767; WIN64-NEXT:    retq
768; WIN64-NEXT:  .LBB15_1: # %overflow
769; WIN64-NEXT:    xorl %eax, %eax
770; WIN64-NEXT:    retq
771;
772; WIN32-LABEL: smulobri8:
773; WIN32:       # %bb.0:
774; WIN32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
775; WIN32-NEXT:    imulb {{[0-9]+}}(%esp)
776; WIN32-NEXT:    jo LBB15_1
777; WIN32-NEXT:  # %bb.2: # %continue
778; WIN32-NEXT:    movb $1, %al
779; WIN32-NEXT:    retl
780; WIN32-NEXT:  LBB15_1: # %overflow
781; WIN32-NEXT:    xorl %eax, %eax
782; WIN32-NEXT:    retl
783  %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
784  %val = extractvalue {i8, i1} %t, 0
785  %obit = extractvalue {i8, i1} %t, 1
786  br i1 %obit, label %overflow, label %continue, !prof !0
787
788overflow:
789  ret i1 false
790
791continue:
792  ret i1 true
793}
794
795define zeroext i1 @smulobri16(i16 %v1, i16 %v2) {
796; SDAG-LABEL: smulobri16:
797; SDAG:       # %bb.0:
798; SDAG-NEXT:    imulw %si, %di
799; SDAG-NEXT:    jo .LBB16_1
800; SDAG-NEXT:  # %bb.2: # %continue
801; SDAG-NEXT:    movb $1, %al
802; SDAG-NEXT:    retq
803; SDAG-NEXT:  .LBB16_1: # %overflow
804; SDAG-NEXT:    xorl %eax, %eax
805; SDAG-NEXT:    retq
806;
807; FAST-LABEL: smulobri16:
808; FAST:       # %bb.0:
809; FAST-NEXT:    imulw %si, %di
810; FAST-NEXT:    seto %al
811; FAST-NEXT:    testb $1, %al
812; FAST-NEXT:    jne .LBB16_1
813; FAST-NEXT:  # %bb.2: # %continue
814; FAST-NEXT:    movb $1, %al
815; FAST-NEXT:    andb $1, %al
816; FAST-NEXT:    retq
817; FAST-NEXT:  .LBB16_1: # %overflow
818; FAST-NEXT:    xorl %eax, %eax
819; FAST-NEXT:    andb $1, %al
820; FAST-NEXT:    # kill: def $al killed $al killed $eax
821; FAST-NEXT:    retq
822;
823; WIN64-LABEL: smulobri16:
824; WIN64:       # %bb.0:
825; WIN64-NEXT:    imulw %dx, %cx
826; WIN64-NEXT:    jo .LBB16_1
827; WIN64-NEXT:  # %bb.2: # %continue
828; WIN64-NEXT:    movb $1, %al
829; WIN64-NEXT:    retq
830; WIN64-NEXT:  .LBB16_1: # %overflow
831; WIN64-NEXT:    xorl %eax, %eax
832; WIN64-NEXT:    retq
833;
834; WIN32-LABEL: smulobri16:
835; WIN32:       # %bb.0:
836; WIN32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
837; WIN32-NEXT:    imulw {{[0-9]+}}(%esp), %ax
838; WIN32-NEXT:    jo LBB16_1
839; WIN32-NEXT:  # %bb.2: # %continue
840; WIN32-NEXT:    movb $1, %al
841; WIN32-NEXT:    retl
842; WIN32-NEXT:  LBB16_1: # %overflow
843; WIN32-NEXT:    xorl %eax, %eax
844; WIN32-NEXT:    retl
845  %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
846  %val = extractvalue {i16, i1} %t, 0
847  %obit = extractvalue {i16, i1} %t, 1
848  br i1 %obit, label %overflow, label %continue, !prof !0
849
850overflow:
851  ret i1 false
852
853continue:
854  ret i1 true
855}
856
857define zeroext i1 @smulobri32(i32 %v1, i32 %v2) {
858; SDAG-LABEL: smulobri32:
859; SDAG:       # %bb.0:
860; SDAG-NEXT:    imull %esi, %edi
861; SDAG-NEXT:    jo .LBB17_1
862; SDAG-NEXT:  # %bb.2: # %continue
863; SDAG-NEXT:    movb $1, %al
864; SDAG-NEXT:    retq
865; SDAG-NEXT:  .LBB17_1: # %overflow
866; SDAG-NEXT:    xorl %eax, %eax
867; SDAG-NEXT:    retq
868;
869; FAST-LABEL: smulobri32:
870; FAST:       # %bb.0:
871; FAST-NEXT:    imull %esi, %edi
872; FAST-NEXT:    jo .LBB17_1
873; FAST-NEXT:  # %bb.2: # %continue
874; FAST-NEXT:    movb $1, %al
875; FAST-NEXT:    andb $1, %al
876; FAST-NEXT:    retq
877; FAST-NEXT:  .LBB17_1: # %overflow
878; FAST-NEXT:    xorl %eax, %eax
879; FAST-NEXT:    andb $1, %al
880; FAST-NEXT:    # kill: def $al killed $al killed $eax
881; FAST-NEXT:    retq
882;
883; WIN64-LABEL: smulobri32:
884; WIN64:       # %bb.0:
885; WIN64-NEXT:    imull %edx, %ecx
886; WIN64-NEXT:    jo .LBB17_1
887; WIN64-NEXT:  # %bb.2: # %continue
888; WIN64-NEXT:    movb $1, %al
889; WIN64-NEXT:    retq
890; WIN64-NEXT:  .LBB17_1: # %overflow
891; WIN64-NEXT:    xorl %eax, %eax
892; WIN64-NEXT:    retq
893;
894; WIN32-LABEL: smulobri32:
895; WIN32:       # %bb.0:
896; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
897; WIN32-NEXT:    imull {{[0-9]+}}(%esp), %eax
898; WIN32-NEXT:    jo LBB17_1
899; WIN32-NEXT:  # %bb.2: # %continue
900; WIN32-NEXT:    movb $1, %al
901; WIN32-NEXT:    retl
902; WIN32-NEXT:  LBB17_1: # %overflow
903; WIN32-NEXT:    xorl %eax, %eax
904; WIN32-NEXT:    retl
905  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
906  %val = extractvalue {i32, i1} %t, 0
907  %obit = extractvalue {i32, i1} %t, 1
908  br i1 %obit, label %overflow, label %continue, !prof !0
909
910overflow:
911  ret i1 false
912
913continue:
914  ret i1 true
915}
916
917define zeroext i1 @smulobri64(i64 %v1, i64 %v2) {
918; SDAG-LABEL: smulobri64:
919; SDAG:       # %bb.0:
920; SDAG-NEXT:    imulq %rsi, %rdi
921; SDAG-NEXT:    jo .LBB18_1
922; SDAG-NEXT:  # %bb.2: # %continue
923; SDAG-NEXT:    movb $1, %al
924; SDAG-NEXT:    retq
925; SDAG-NEXT:  .LBB18_1: # %overflow
926; SDAG-NEXT:    xorl %eax, %eax
927; SDAG-NEXT:    retq
928;
929; FAST-LABEL: smulobri64:
930; FAST:       # %bb.0:
931; FAST-NEXT:    imulq %rsi, %rdi
932; FAST-NEXT:    jo .LBB18_1
933; FAST-NEXT:  # %bb.2: # %continue
934; FAST-NEXT:    movb $1, %al
935; FAST-NEXT:    andb $1, %al
936; FAST-NEXT:    retq
937; FAST-NEXT:  .LBB18_1: # %overflow
938; FAST-NEXT:    xorl %eax, %eax
939; FAST-NEXT:    andb $1, %al
940; FAST-NEXT:    # kill: def $al killed $al killed $eax
941; FAST-NEXT:    retq
942;
943; WIN64-LABEL: smulobri64:
944; WIN64:       # %bb.0:
945; WIN64-NEXT:    imulq %rdx, %rcx
946; WIN64-NEXT:    jo .LBB18_1
947; WIN64-NEXT:  # %bb.2: # %continue
948; WIN64-NEXT:    movb $1, %al
949; WIN64-NEXT:    retq
950; WIN64-NEXT:  .LBB18_1: # %overflow
951; WIN64-NEXT:    xorl %eax, %eax
952; WIN64-NEXT:    retq
953;
954; WIN32-LABEL: smulobri64:
955; WIN32:       # %bb.0:
956; WIN32-NEXT:    pushl %ebp
957; WIN32-NEXT:    pushl %ebx
958; WIN32-NEXT:    pushl %edi
959; WIN32-NEXT:    pushl %esi
960; WIN32-NEXT:    pushl %eax
961; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edi
962; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
963; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ebp
964; WIN32-NEXT:    movl %ebp, %ecx
965; WIN32-NEXT:    sarl $31, %ecx
966; WIN32-NEXT:    imull %edi, %ecx
967; WIN32-NEXT:    movl %esi, %eax
968; WIN32-NEXT:    mull %edi
969; WIN32-NEXT:    movl %edx, %ebx
970; WIN32-NEXT:    movl %ebp, %eax
971; WIN32-NEXT:    mull %edi
972; WIN32-NEXT:    movl %edx, %edi
973; WIN32-NEXT:    movl %eax, %ebp
974; WIN32-NEXT:    addl %ebx, %ebp
975; WIN32-NEXT:    adcl %ecx, %edi
976; WIN32-NEXT:    movl %edi, %eax
977; WIN32-NEXT:    sarl $31, %eax
978; WIN32-NEXT:    movl %eax, (%esp) # 4-byte Spill
979; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
980; WIN32-NEXT:    movl %edx, %ecx
981; WIN32-NEXT:    sarl $31, %ecx
982; WIN32-NEXT:    imull %esi, %ecx
983; WIN32-NEXT:    movl %esi, %eax
984; WIN32-NEXT:    mull %edx
985; WIN32-NEXT:    movl %edx, %ebx
986; WIN32-NEXT:    movl %eax, %esi
987; WIN32-NEXT:    addl %ebp, %esi
988; WIN32-NEXT:    adcl %ecx, %ebx
989; WIN32-NEXT:    movl %ebx, %ebp
990; WIN32-NEXT:    sarl $31, %ebp
991; WIN32-NEXT:    addl %edi, %ebx
992; WIN32-NEXT:    adcl (%esp), %ebp # 4-byte Folded Reload
993; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
994; WIN32-NEXT:    imull {{[0-9]+}}(%esp)
995; WIN32-NEXT:    addl %ebx, %eax
996; WIN32-NEXT:    adcl %ebp, %edx
997; WIN32-NEXT:    sarl $31, %esi
998; WIN32-NEXT:    xorl %esi, %edx
999; WIN32-NEXT:    xorl %eax, %esi
1000; WIN32-NEXT:    orl %edx, %esi
1001; WIN32-NEXT:    jne LBB18_1
1002; WIN32-NEXT:  # %bb.3: # %continue
1003; WIN32-NEXT:    movb $1, %al
1004; WIN32-NEXT:  LBB18_2: # %overflow
1005; WIN32-NEXT:    addl $4, %esp
1006; WIN32-NEXT:    popl %esi
1007; WIN32-NEXT:    popl %edi
1008; WIN32-NEXT:    popl %ebx
1009; WIN32-NEXT:    popl %ebp
1010; WIN32-NEXT:    retl
1011; WIN32-NEXT:  LBB18_1: # %overflow
1012; WIN32-NEXT:    xorl %eax, %eax
1013; WIN32-NEXT:    jmp LBB18_2
1014  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
1015  %val = extractvalue {i64, i1} %t, 0
1016  %obit = extractvalue {i64, i1} %t, 1
1017  br i1 %obit, label %overflow, label %continue, !prof !0
1018
1019overflow:
1020  ret i1 false
1021
1022continue:
1023  ret i1 true
1024}
1025
1026define zeroext i1 @umulobri8(i8 %v1, i8 %v2) {
1027; SDAG-LABEL: umulobri8:
1028; SDAG:       # %bb.0:
1029; SDAG-NEXT:    movl %edi, %eax
1030; SDAG-NEXT:    # kill: def $al killed $al killed $eax
1031; SDAG-NEXT:    mulb %sil
1032; SDAG-NEXT:    jo .LBB19_1
1033; SDAG-NEXT:  # %bb.2: # %continue
1034; SDAG-NEXT:    movb $1, %al
1035; SDAG-NEXT:    retq
1036; SDAG-NEXT:  .LBB19_1: # %overflow
1037; SDAG-NEXT:    xorl %eax, %eax
1038; SDAG-NEXT:    retq
1039;
1040; FAST-LABEL: umulobri8:
1041; FAST:       # %bb.0:
1042; FAST-NEXT:    movl %edi, %eax
1043; FAST-NEXT:    # kill: def $al killed $al killed $eax
1044; FAST-NEXT:    mulb %sil
1045; FAST-NEXT:    seto %al
1046; FAST-NEXT:    testb $1, %al
1047; FAST-NEXT:    jne .LBB19_1
1048; FAST-NEXT:  # %bb.2: # %continue
1049; FAST-NEXT:    movb $1, %al
1050; FAST-NEXT:    andb $1, %al
1051; FAST-NEXT:    retq
1052; FAST-NEXT:  .LBB19_1: # %overflow
1053; FAST-NEXT:    xorl %eax, %eax
1054; FAST-NEXT:    andb $1, %al
1055; FAST-NEXT:    # kill: def $al killed $al killed $eax
1056; FAST-NEXT:    retq
1057;
1058; WIN64-LABEL: umulobri8:
1059; WIN64:       # %bb.0:
1060; WIN64-NEXT:    movl %ecx, %eax
1061; WIN64-NEXT:    mulb %dl
1062; WIN64-NEXT:    jo .LBB19_1
1063; WIN64-NEXT:  # %bb.2: # %continue
1064; WIN64-NEXT:    movb $1, %al
1065; WIN64-NEXT:    retq
1066; WIN64-NEXT:  .LBB19_1: # %overflow
1067; WIN64-NEXT:    xorl %eax, %eax
1068; WIN64-NEXT:    retq
1069;
1070; WIN32-LABEL: umulobri8:
1071; WIN32:       # %bb.0:
1072; WIN32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1073; WIN32-NEXT:    mulb {{[0-9]+}}(%esp)
1074; WIN32-NEXT:    jo LBB19_1
1075; WIN32-NEXT:  # %bb.2: # %continue
1076; WIN32-NEXT:    movb $1, %al
1077; WIN32-NEXT:    retl
1078; WIN32-NEXT:  LBB19_1: # %overflow
1079; WIN32-NEXT:    xorl %eax, %eax
1080; WIN32-NEXT:    retl
1081  %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
1082  %val = extractvalue {i8, i1} %t, 0
1083  %obit = extractvalue {i8, i1} %t, 1
1084  br i1 %obit, label %overflow, label %continue, !prof !0
1085
1086overflow:
1087  ret i1 false
1088
1089continue:
1090  ret i1 true
1091}
1092
1093define zeroext i1 @umulobri16(i16 %v1, i16 %v2) {
1094; SDAG-LABEL: umulobri16:
1095; SDAG:       # %bb.0:
1096; SDAG-NEXT:    movl %edi, %eax
1097; SDAG-NEXT:    # kill: def $ax killed $ax killed $eax
1098; SDAG-NEXT:    mulw %si
1099; SDAG-NEXT:    jo .LBB20_1
1100; SDAG-NEXT:  # %bb.2: # %continue
1101; SDAG-NEXT:    movb $1, %al
1102; SDAG-NEXT:    retq
1103; SDAG-NEXT:  .LBB20_1: # %overflow
1104; SDAG-NEXT:    xorl %eax, %eax
1105; SDAG-NEXT:    retq
1106;
1107; FAST-LABEL: umulobri16:
1108; FAST:       # %bb.0:
1109; FAST-NEXT:    movl %edi, %eax
1110; FAST-NEXT:    # kill: def $ax killed $ax killed $eax
1111; FAST-NEXT:    mulw %si
1112; FAST-NEXT:    seto %al
1113; FAST-NEXT:    testb $1, %al
1114; FAST-NEXT:    jne .LBB20_1
1115; FAST-NEXT:  # %bb.2: # %continue
1116; FAST-NEXT:    movb $1, %al
1117; FAST-NEXT:    andb $1, %al
1118; FAST-NEXT:    retq
1119; FAST-NEXT:  .LBB20_1: # %overflow
1120; FAST-NEXT:    xorl %eax, %eax
1121; FAST-NEXT:    andb $1, %al
1122; FAST-NEXT:    # kill: def $al killed $al killed $eax
1123; FAST-NEXT:    retq
1124;
1125; WIN64-LABEL: umulobri16:
1126; WIN64:       # %bb.0:
1127; WIN64-NEXT:    movl %ecx, %eax
1128; WIN64-NEXT:    mulw %dx
1129; WIN64-NEXT:    jo .LBB20_1
1130; WIN64-NEXT:  # %bb.2: # %continue
1131; WIN64-NEXT:    movb $1, %al
1132; WIN64-NEXT:    retq
1133; WIN64-NEXT:  .LBB20_1: # %overflow
1134; WIN64-NEXT:    xorl %eax, %eax
1135; WIN64-NEXT:    retq
1136;
1137; WIN32-LABEL: umulobri16:
1138; WIN32:       # %bb.0:
1139; WIN32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
1140; WIN32-NEXT:    mulw {{[0-9]+}}(%esp)
1141; WIN32-NEXT:    jo LBB20_1
1142; WIN32-NEXT:  # %bb.2: # %continue
1143; WIN32-NEXT:    movb $1, %al
1144; WIN32-NEXT:    retl
1145; WIN32-NEXT:  LBB20_1: # %overflow
1146; WIN32-NEXT:    xorl %eax, %eax
1147; WIN32-NEXT:    retl
1148  %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
1149  %val = extractvalue {i16, i1} %t, 0
1150  %obit = extractvalue {i16, i1} %t, 1
1151  br i1 %obit, label %overflow, label %continue, !prof !0
1152
1153overflow:
1154  ret i1 false
1155
1156continue:
1157  ret i1 true
1158}
1159
1160define zeroext i1 @umulobri32(i32 %v1, i32 %v2) {
1161; SDAG-LABEL: umulobri32:
1162; SDAG:       # %bb.0:
1163; SDAG-NEXT:    movl %edi, %eax
1164; SDAG-NEXT:    mull %esi
1165; SDAG-NEXT:    jo .LBB21_1
1166; SDAG-NEXT:  # %bb.2: # %continue
1167; SDAG-NEXT:    movb $1, %al
1168; SDAG-NEXT:    retq
1169; SDAG-NEXT:  .LBB21_1: # %overflow
1170; SDAG-NEXT:    xorl %eax, %eax
1171; SDAG-NEXT:    retq
1172;
1173; FAST-LABEL: umulobri32:
1174; FAST:       # %bb.0:
1175; FAST-NEXT:    movl %edi, %eax
1176; FAST-NEXT:    mull %esi
1177; FAST-NEXT:    jo .LBB21_1
1178; FAST-NEXT:  # %bb.2: # %continue
1179; FAST-NEXT:    movb $1, %al
1180; FAST-NEXT:    andb $1, %al
1181; FAST-NEXT:    retq
1182; FAST-NEXT:  .LBB21_1: # %overflow
1183; FAST-NEXT:    xorl %eax, %eax
1184; FAST-NEXT:    andb $1, %al
1185; FAST-NEXT:    # kill: def $al killed $al killed $eax
1186; FAST-NEXT:    retq
1187;
1188; WIN64-LABEL: umulobri32:
1189; WIN64:       # %bb.0:
1190; WIN64-NEXT:    movl %ecx, %eax
1191; WIN64-NEXT:    mull %edx
1192; WIN64-NEXT:    jo .LBB21_1
1193; WIN64-NEXT:  # %bb.2: # %continue
1194; WIN64-NEXT:    movb $1, %al
1195; WIN64-NEXT:    retq
1196; WIN64-NEXT:  .LBB21_1: # %overflow
1197; WIN64-NEXT:    xorl %eax, %eax
1198; WIN64-NEXT:    retq
1199;
1200; WIN32-LABEL: umulobri32:
1201; WIN32:       # %bb.0:
1202; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1203; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
1204; WIN32-NEXT:    jo LBB21_1
1205; WIN32-NEXT:  # %bb.2: # %continue
1206; WIN32-NEXT:    movb $1, %al
1207; WIN32-NEXT:    retl
1208; WIN32-NEXT:  LBB21_1: # %overflow
1209; WIN32-NEXT:    xorl %eax, %eax
1210; WIN32-NEXT:    retl
1211  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
1212  %val = extractvalue {i32, i1} %t, 0
1213  %obit = extractvalue {i32, i1} %t, 1
1214  br i1 %obit, label %overflow, label %continue, !prof !0
1215
1216overflow:
1217  ret i1 false
1218
1219continue:
1220  ret i1 true
1221}
1222
1223define zeroext i1 @umulobri64(i64 %v1, i64 %v2) {
1224; SDAG-LABEL: umulobri64:
1225; SDAG:       # %bb.0:
1226; SDAG-NEXT:    movq %rdi, %rax
1227; SDAG-NEXT:    mulq %rsi
1228; SDAG-NEXT:    jo .LBB22_1
1229; SDAG-NEXT:  # %bb.2: # %continue
1230; SDAG-NEXT:    movb $1, %al
1231; SDAG-NEXT:    retq
1232; SDAG-NEXT:  .LBB22_1: # %overflow
1233; SDAG-NEXT:    xorl %eax, %eax
1234; SDAG-NEXT:    retq
1235;
1236; FAST-LABEL: umulobri64:
1237; FAST:       # %bb.0:
1238; FAST-NEXT:    movq %rdi, %rax
1239; FAST-NEXT:    mulq %rsi
1240; FAST-NEXT:    jo .LBB22_1
1241; FAST-NEXT:  # %bb.2: # %continue
1242; FAST-NEXT:    movb $1, %al
1243; FAST-NEXT:    andb $1, %al
1244; FAST-NEXT:    retq
1245; FAST-NEXT:  .LBB22_1: # %overflow
1246; FAST-NEXT:    xorl %eax, %eax
1247; FAST-NEXT:    andb $1, %al
1248; FAST-NEXT:    # kill: def $al killed $al killed $eax
1249; FAST-NEXT:    retq
1250;
1251; WIN64-LABEL: umulobri64:
1252; WIN64:       # %bb.0:
1253; WIN64-NEXT:    movq %rcx, %rax
1254; WIN64-NEXT:    mulq %rdx
1255; WIN64-NEXT:    jo .LBB22_1
1256; WIN64-NEXT:  # %bb.2: # %continue
1257; WIN64-NEXT:    movb $1, %al
1258; WIN64-NEXT:    retq
1259; WIN64-NEXT:  .LBB22_1: # %overflow
1260; WIN64-NEXT:    xorl %eax, %eax
1261; WIN64-NEXT:    retq
1262;
1263; WIN32-LABEL: umulobri64:
1264; WIN32:       # %bb.0:
1265; WIN32-NEXT:    pushl %ebp
1266; WIN32-NEXT:    pushl %ebx
1267; WIN32-NEXT:    pushl %edi
1268; WIN32-NEXT:    pushl %esi
1269; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ebp
1270; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1271; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
1272; WIN32-NEXT:    testl %esi, %esi
1273; WIN32-NEXT:    setne %dl
1274; WIN32-NEXT:    testl %eax, %eax
1275; WIN32-NEXT:    setne %cl
1276; WIN32-NEXT:    andb %dl, %cl
1277; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
1278; WIN32-NEXT:    movl %eax, %edi
1279; WIN32-NEXT:    seto %bl
1280; WIN32-NEXT:    movl %esi, %eax
1281; WIN32-NEXT:    mull %ebp
1282; WIN32-NEXT:    seto %ch
1283; WIN32-NEXT:    orb %bl, %ch
1284; WIN32-NEXT:    orb %cl, %ch
1285; WIN32-NEXT:    leal (%edi,%eax), %esi
1286; WIN32-NEXT:    movl %ebp, %eax
1287; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
1288; WIN32-NEXT:    addl %esi, %edx
1289; WIN32-NEXT:    setb %al
1290; WIN32-NEXT:    orb %ch, %al
1291; WIN32-NEXT:    subb $1, %al
1292; WIN32-NEXT:    je LBB22_1
1293; WIN32-NEXT:  # %bb.3: # %continue
1294; WIN32-NEXT:    movb $1, %al
1295; WIN32-NEXT:  LBB22_2: # %overflow
1296; WIN32-NEXT:    popl %esi
1297; WIN32-NEXT:    popl %edi
1298; WIN32-NEXT:    popl %ebx
1299; WIN32-NEXT:    popl %ebp
1300; WIN32-NEXT:    retl
1301; WIN32-NEXT:  LBB22_1: # %overflow
1302; WIN32-NEXT:    xorl %eax, %eax
1303; WIN32-NEXT:    jmp LBB22_2
1304  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
1305  %val = extractvalue {i64, i1} %t, 0
1306  %obit = extractvalue {i64, i1} %t, 1
1307  br i1 %obit, label %overflow, label %continue, !prof !0
1308
1309overflow:
1310  ret i1 false
1311
1312continue:
1313  ret i1 true
1314}
1315
1316define i1 @bug27873(i64 %c1, i1 %c2) {
1317; LINUX-LABEL: bug27873:
1318; LINUX:       # %bb.0:
1319; LINUX-NEXT:    movq %rdi, %rax
1320; LINUX-NEXT:    movl $160, %ecx
1321; LINUX-NEXT:    mulq %rcx
1322; LINUX-NEXT:    seto %al
1323; LINUX-NEXT:    orb %sil, %al
1324; LINUX-NEXT:    retq
1325;
1326; WIN64-LABEL: bug27873:
1327; WIN64:       # %bb.0:
1328; WIN64-NEXT:    movl %edx, %r8d
1329; WIN64-NEXT:    movq %rcx, %rax
1330; WIN64-NEXT:    movl $160, %ecx
1331; WIN64-NEXT:    mulq %rcx
1332; WIN64-NEXT:    seto %al
1333; WIN64-NEXT:    orb %r8b, %al
1334; WIN64-NEXT:    retq
1335;
1336; WIN32-LABEL: bug27873:
1337; WIN32:       # %bb.0:
1338; WIN32-NEXT:    pushl %ebx
1339; WIN32-NEXT:    movl $160, %eax
1340; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
1341; WIN32-NEXT:    movl %eax, %ecx
1342; WIN32-NEXT:    seto %bl
1343; WIN32-NEXT:    movl $160, %eax
1344; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
1345; WIN32-NEXT:    addl %ecx, %edx
1346; WIN32-NEXT:    setb %al
1347; WIN32-NEXT:    orb %bl, %al
1348; WIN32-NEXT:    orb {{[0-9]+}}(%esp), %al
1349; WIN32-NEXT:    popl %ebx
1350; WIN32-NEXT:    retl
1351  %mul = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %c1, i64 160)
1352  %mul.overflow = extractvalue { i64, i1 } %mul, 1
1353  %x1 = or i1 %c2, %mul.overflow
1354  ret i1 %x1
1355}
1356
1357define zeroext i1 @smuloi8_load(ptr %ptr1, i8 %v2, ptr %res) {
1358; SDAG-LABEL: smuloi8_load:
1359; SDAG:       # %bb.0:
1360; SDAG-NEXT:    movl %esi, %eax
1361; SDAG-NEXT:    # kill: def $al killed $al killed $eax
1362; SDAG-NEXT:    imulb (%rdi)
1363; SDAG-NEXT:    seto %cl
1364; SDAG-NEXT:    movb %al, (%rdx)
1365; SDAG-NEXT:    movl %ecx, %eax
1366; SDAG-NEXT:    retq
1367;
1368; FAST-LABEL: smuloi8_load:
1369; FAST:       # %bb.0:
1370; FAST-NEXT:    movzbl (%rdi), %eax
1371; FAST-NEXT:    imulb %sil
1372; FAST-NEXT:    seto %cl
1373; FAST-NEXT:    movb %al, (%rdx)
1374; FAST-NEXT:    andb $1, %cl
1375; FAST-NEXT:    movl %ecx, %eax
1376; FAST-NEXT:    retq
1377;
1378; WIN64-LABEL: smuloi8_load:
1379; WIN64:       # %bb.0:
1380; WIN64-NEXT:    movl %edx, %eax
1381; WIN64-NEXT:    imulb (%rcx)
1382; WIN64-NEXT:    seto %cl
1383; WIN64-NEXT:    movb %al, (%r8)
1384; WIN64-NEXT:    movl %ecx, %eax
1385; WIN64-NEXT:    retq
1386;
1387; WIN32-LABEL: smuloi8_load:
1388; WIN32:       # %bb.0:
1389; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
1390; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1391; WIN32-NEXT:    movzbl (%eax), %eax
1392; WIN32-NEXT:    imulb {{[0-9]+}}(%esp)
1393; WIN32-NEXT:    seto %cl
1394; WIN32-NEXT:    movb %al, (%edx)
1395; WIN32-NEXT:    movl %ecx, %eax
1396; WIN32-NEXT:    retl
1397  %v1 = load i8, ptr %ptr1
1398  %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
1399  %val = extractvalue {i8, i1} %t, 0
1400  %obit = extractvalue {i8, i1} %t, 1
1401  store i8 %val, ptr %res
1402  ret i1 %obit
1403}
1404
1405define zeroext i1 @smuloi8_load2(i8 %v1, ptr %ptr2, ptr %res) {
1406; SDAG-LABEL: smuloi8_load2:
1407; SDAG:       # %bb.0:
1408; SDAG-NEXT:    movl %edi, %eax
1409; SDAG-NEXT:    # kill: def $al killed $al killed $eax
1410; SDAG-NEXT:    imulb (%rsi)
1411; SDAG-NEXT:    seto %cl
1412; SDAG-NEXT:    movb %al, (%rdx)
1413; SDAG-NEXT:    movl %ecx, %eax
1414; SDAG-NEXT:    retq
1415;
1416; FAST-LABEL: smuloi8_load2:
1417; FAST:       # %bb.0:
1418; FAST-NEXT:    movl %edi, %eax
1419; FAST-NEXT:    # kill: def $al killed $al killed $eax
1420; FAST-NEXT:    imulb (%rsi)
1421; FAST-NEXT:    seto %cl
1422; FAST-NEXT:    movb %al, (%rdx)
1423; FAST-NEXT:    andb $1, %cl
1424; FAST-NEXT:    movl %ecx, %eax
1425; FAST-NEXT:    retq
1426;
1427; WIN64-LABEL: smuloi8_load2:
1428; WIN64:       # %bb.0:
1429; WIN64-NEXT:    movl %ecx, %eax
1430; WIN64-NEXT:    imulb (%rdx)
1431; WIN64-NEXT:    seto %cl
1432; WIN64-NEXT:    movb %al, (%r8)
1433; WIN64-NEXT:    movl %ecx, %eax
1434; WIN64-NEXT:    retq
1435;
1436; WIN32-LABEL: smuloi8_load2:
1437; WIN32:       # %bb.0:
1438; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
1439; WIN32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1440; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1441; WIN32-NEXT:    imulb (%ecx)
1442; WIN32-NEXT:    seto %cl
1443; WIN32-NEXT:    movb %al, (%edx)
1444; WIN32-NEXT:    movl %ecx, %eax
1445; WIN32-NEXT:    retl
1446  %v2 = load i8, ptr %ptr2
1447  %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
1448  %val = extractvalue {i8, i1} %t, 0
1449  %obit = extractvalue {i8, i1} %t, 1
1450  store i8 %val, ptr %res
1451  ret i1 %obit
1452}
1453
1454define zeroext i1 @smuloi16_load(ptr %ptr1, i16 %v2, ptr %res) {
1455; SDAG-LABEL: smuloi16_load:
1456; SDAG:       # %bb.0:
1457; SDAG-NEXT:    imulw (%rdi), %si
1458; SDAG-NEXT:    seto %al
1459; SDAG-NEXT:    movw %si, (%rdx)
1460; SDAG-NEXT:    retq
1461;
1462; FAST-LABEL: smuloi16_load:
1463; FAST:       # %bb.0:
1464; FAST-NEXT:    imulw (%rdi), %si
1465; FAST-NEXT:    seto %al
1466; FAST-NEXT:    movw %si, (%rdx)
1467; FAST-NEXT:    andb $1, %al
1468; FAST-NEXT:    retq
1469;
1470; WIN64-LABEL: smuloi16_load:
1471; WIN64:       # %bb.0:
1472; WIN64-NEXT:    imulw (%rcx), %dx
1473; WIN64-NEXT:    seto %al
1474; WIN64-NEXT:    movw %dx, (%r8)
1475; WIN64-NEXT:    retq
1476;
1477; WIN32-LABEL: smuloi16_load:
1478; WIN32:       # %bb.0:
1479; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1480; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1481; WIN32-NEXT:    movzwl (%eax), %edx
1482; WIN32-NEXT:    imulw {{[0-9]+}}(%esp), %dx
1483; WIN32-NEXT:    seto %al
1484; WIN32-NEXT:    movw %dx, (%ecx)
1485; WIN32-NEXT:    retl
1486  %v1 = load i16, ptr %ptr1
1487  %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
1488  %val = extractvalue {i16, i1} %t, 0
1489  %obit = extractvalue {i16, i1} %t, 1
1490  store i16 %val, ptr %res
1491  ret i1 %obit
1492}
1493
1494define zeroext i1 @smuloi16_load2(i16 %v1, ptr %ptr2, ptr %res) {
1495; SDAG-LABEL: smuloi16_load2:
1496; SDAG:       # %bb.0:
1497; SDAG-NEXT:    imulw (%rsi), %di
1498; SDAG-NEXT:    seto %al
1499; SDAG-NEXT:    movw %di, (%rdx)
1500; SDAG-NEXT:    retq
1501;
1502; FAST-LABEL: smuloi16_load2:
1503; FAST:       # %bb.0:
1504; FAST-NEXT:    imulw (%rsi), %di
1505; FAST-NEXT:    seto %al
1506; FAST-NEXT:    movw %di, (%rdx)
1507; FAST-NEXT:    andb $1, %al
1508; FAST-NEXT:    retq
1509;
1510; WIN64-LABEL: smuloi16_load2:
1511; WIN64:       # %bb.0:
1512; WIN64-NEXT:    imulw (%rdx), %cx
1513; WIN64-NEXT:    seto %al
1514; WIN64-NEXT:    movw %cx, (%r8)
1515; WIN64-NEXT:    retq
1516;
1517; WIN32-LABEL: smuloi16_load2:
1518; WIN32:       # %bb.0:
1519; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1520; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1521; WIN32-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
1522; WIN32-NEXT:    imulw (%eax), %dx
1523; WIN32-NEXT:    seto %al
1524; WIN32-NEXT:    movw %dx, (%ecx)
1525; WIN32-NEXT:    retl
1526  %v2 = load i16, ptr %ptr2
1527  %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
1528  %val = extractvalue {i16, i1} %t, 0
1529  %obit = extractvalue {i16, i1} %t, 1
1530  store i16 %val, ptr %res
1531  ret i1 %obit
1532}
1533
1534define zeroext i1 @smuloi32_load(ptr %ptr1, i32 %v2, ptr %res) {
1535; SDAG-LABEL: smuloi32_load:
1536; SDAG:       # %bb.0:
1537; SDAG-NEXT:    imull (%rdi), %esi
1538; SDAG-NEXT:    seto %al
1539; SDAG-NEXT:    movl %esi, (%rdx)
1540; SDAG-NEXT:    retq
1541;
1542; FAST-LABEL: smuloi32_load:
1543; FAST:       # %bb.0:
1544; FAST-NEXT:    imull (%rdi), %esi
1545; FAST-NEXT:    seto %al
1546; FAST-NEXT:    movl %esi, (%rdx)
1547; FAST-NEXT:    andb $1, %al
1548; FAST-NEXT:    retq
1549;
1550; WIN64-LABEL: smuloi32_load:
1551; WIN64:       # %bb.0:
1552; WIN64-NEXT:    imull (%rcx), %edx
1553; WIN64-NEXT:    seto %al
1554; WIN64-NEXT:    movl %edx, (%r8)
1555; WIN64-NEXT:    retq
1556;
1557; WIN32-LABEL: smuloi32_load:
1558; WIN32:       # %bb.0:
1559; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1560; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1561; WIN32-NEXT:    movl (%eax), %edx
1562; WIN32-NEXT:    imull {{[0-9]+}}(%esp), %edx
1563; WIN32-NEXT:    seto %al
1564; WIN32-NEXT:    movl %edx, (%ecx)
1565; WIN32-NEXT:    retl
1566  %v1 = load i32, ptr %ptr1
1567  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
1568  %val = extractvalue {i32, i1} %t, 0
1569  %obit = extractvalue {i32, i1} %t, 1
1570  store i32 %val, ptr %res
1571  ret i1 %obit
1572}
1573
1574define zeroext i1 @smuloi32_load2(i32 %v1, ptr %ptr2, ptr %res) {
1575; SDAG-LABEL: smuloi32_load2:
1576; SDAG:       # %bb.0:
1577; SDAG-NEXT:    imull (%rsi), %edi
1578; SDAG-NEXT:    seto %al
1579; SDAG-NEXT:    movl %edi, (%rdx)
1580; SDAG-NEXT:    retq
1581;
1582; FAST-LABEL: smuloi32_load2:
1583; FAST:       # %bb.0:
1584; FAST-NEXT:    imull (%rsi), %edi
1585; FAST-NEXT:    seto %al
1586; FAST-NEXT:    movl %edi, (%rdx)
1587; FAST-NEXT:    andb $1, %al
1588; FAST-NEXT:    retq
1589;
1590; WIN64-LABEL: smuloi32_load2:
1591; WIN64:       # %bb.0:
1592; WIN64-NEXT:    imull (%rdx), %ecx
1593; WIN64-NEXT:    seto %al
1594; WIN64-NEXT:    movl %ecx, (%r8)
1595; WIN64-NEXT:    retq
1596;
1597; WIN32-LABEL: smuloi32_load2:
1598; WIN32:       # %bb.0:
1599; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1600; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1601; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
1602; WIN32-NEXT:    imull (%eax), %edx
1603; WIN32-NEXT:    seto %al
1604; WIN32-NEXT:    movl %edx, (%ecx)
1605; WIN32-NEXT:    retl
1606  %v2 = load i32, ptr %ptr2
1607  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
1608  %val = extractvalue {i32, i1} %t, 0
1609  %obit = extractvalue {i32, i1} %t, 1
1610  store i32 %val, ptr %res
1611  ret i1 %obit
1612}
1613
1614define zeroext i1 @smuloi64_load(ptr %ptr1, i64 %v2, ptr %res) {
1615; SDAG-LABEL: smuloi64_load:
1616; SDAG:       # %bb.0:
1617; SDAG-NEXT:    imulq (%rdi), %rsi
1618; SDAG-NEXT:    seto %al
1619; SDAG-NEXT:    movq %rsi, (%rdx)
1620; SDAG-NEXT:    retq
1621;
1622; FAST-LABEL: smuloi64_load:
1623; FAST:       # %bb.0:
1624; FAST-NEXT:    imulq (%rdi), %rsi
1625; FAST-NEXT:    seto %al
1626; FAST-NEXT:    movq %rsi, (%rdx)
1627; FAST-NEXT:    andb $1, %al
1628; FAST-NEXT:    retq
1629;
1630; WIN64-LABEL: smuloi64_load:
1631; WIN64:       # %bb.0:
1632; WIN64-NEXT:    imulq (%rcx), %rdx
1633; WIN64-NEXT:    seto %al
1634; WIN64-NEXT:    movq %rdx, (%r8)
1635; WIN64-NEXT:    retq
1636;
1637; WIN32-LABEL: smuloi64_load:
1638; WIN32:       # %bb.0:
1639; WIN32-NEXT:    pushl %ebp
1640; WIN32-NEXT:    pushl %ebx
1641; WIN32-NEXT:    pushl %edi
1642; WIN32-NEXT:    pushl %esi
1643; WIN32-NEXT:    subl $12, %esp
1644; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
1645; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1646; WIN32-NEXT:    movl (%eax), %ecx
1647; WIN32-NEXT:    movl 4(%eax), %ebp
1648; WIN32-NEXT:    movl %ebp, %esi
1649; WIN32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1650; WIN32-NEXT:    sarl $31, %esi
1651; WIN32-NEXT:    imull %ebx, %esi
1652; WIN32-NEXT:    movl %ecx, %eax
1653; WIN32-NEXT:    mull %ebx
1654; WIN32-NEXT:    movl %edx, %edi
1655; WIN32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1656; WIN32-NEXT:    movl %ebp, %eax
1657; WIN32-NEXT:    mull %ebx
1658; WIN32-NEXT:    movl %edx, %ebx
1659; WIN32-NEXT:    movl %eax, %ebp
1660; WIN32-NEXT:    addl %edi, %ebp
1661; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1662; WIN32-NEXT:    adcl %esi, %ebx
1663; WIN32-NEXT:    movl %ebx, %edi
1664; WIN32-NEXT:    sarl $31, %edi
1665; WIN32-NEXT:    movl %eax, %esi
1666; WIN32-NEXT:    sarl $31, %esi
1667; WIN32-NEXT:    imull %ecx, %esi
1668; WIN32-NEXT:    movl %ecx, %eax
1669; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
1670; WIN32-NEXT:    movl %edx, %ecx
1671; WIN32-NEXT:    addl %ebp, %eax
1672; WIN32-NEXT:    movl %eax, (%esp) # 4-byte Spill
1673; WIN32-NEXT:    adcl %esi, %ecx
1674; WIN32-NEXT:    movl %ecx, %ebp
1675; WIN32-NEXT:    sarl $31, %ebp
1676; WIN32-NEXT:    addl %ebx, %ecx
1677; WIN32-NEXT:    adcl %edi, %ebp
1678; WIN32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
1679; WIN32-NEXT:    imull {{[0-9]+}}(%esp)
1680; WIN32-NEXT:    addl %ecx, %eax
1681; WIN32-NEXT:    adcl %ebp, %edx
1682; WIN32-NEXT:    movl (%esp), %esi # 4-byte Reload
1683; WIN32-NEXT:    movl %esi, %ecx
1684; WIN32-NEXT:    sarl $31, %ecx
1685; WIN32-NEXT:    xorl %ecx, %edx
1686; WIN32-NEXT:    xorl %eax, %ecx
1687; WIN32-NEXT:    orl %edx, %ecx
1688; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1689; WIN32-NEXT:    movl %esi, 4(%eax)
1690; WIN32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1691; WIN32-NEXT:    movl %ecx, (%eax)
1692; WIN32-NEXT:    setne %al
1693; WIN32-NEXT:    addl $12, %esp
1694; WIN32-NEXT:    popl %esi
1695; WIN32-NEXT:    popl %edi
1696; WIN32-NEXT:    popl %ebx
1697; WIN32-NEXT:    popl %ebp
1698; WIN32-NEXT:    retl
1699  %v1 = load i64, ptr %ptr1
1700  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
1701  %val = extractvalue {i64, i1} %t, 0
1702  %obit = extractvalue {i64, i1} %t, 1
1703  store i64 %val, ptr %res
1704  ret i1 %obit
1705}
1706
1707define zeroext i1 @smuloi64_load2(i64 %v1, ptr %ptr2, ptr %res) {
1708; SDAG-LABEL: smuloi64_load2:
1709; SDAG:       # %bb.0:
1710; SDAG-NEXT:    imulq (%rsi), %rdi
1711; SDAG-NEXT:    seto %al
1712; SDAG-NEXT:    movq %rdi, (%rdx)
1713; SDAG-NEXT:    retq
1714;
1715; FAST-LABEL: smuloi64_load2:
1716; FAST:       # %bb.0:
1717; FAST-NEXT:    imulq (%rsi), %rdi
1718; FAST-NEXT:    seto %al
1719; FAST-NEXT:    movq %rdi, (%rdx)
1720; FAST-NEXT:    andb $1, %al
1721; FAST-NEXT:    retq
1722;
1723; WIN64-LABEL: smuloi64_load2:
1724; WIN64:       # %bb.0:
1725; WIN64-NEXT:    imulq (%rdx), %rcx
1726; WIN64-NEXT:    seto %al
1727; WIN64-NEXT:    movq %rcx, (%r8)
1728; WIN64-NEXT:    retq
1729;
1730; WIN32-LABEL: smuloi64_load2:
1731; WIN32:       # %bb.0:
1732; WIN32-NEXT:    pushl %ebp
1733; WIN32-NEXT:    pushl %ebx
1734; WIN32-NEXT:    pushl %edi
1735; WIN32-NEXT:    pushl %esi
1736; WIN32-NEXT:    subl $12, %esp
1737; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1738; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edi
1739; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1740; WIN32-NEXT:    movl (%ecx), %ebx
1741; WIN32-NEXT:    movl %edi, %esi
1742; WIN32-NEXT:    sarl $31, %esi
1743; WIN32-NEXT:    imull %ebx, %esi
1744; WIN32-NEXT:    mull %ebx
1745; WIN32-NEXT:    movl %edx, %ecx
1746; WIN32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1747; WIN32-NEXT:    movl %edi, %eax
1748; WIN32-NEXT:    mull %ebx
1749; WIN32-NEXT:    movl %edx, %ebx
1750; WIN32-NEXT:    movl %eax, %ebp
1751; WIN32-NEXT:    addl %ecx, %ebp
1752; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1753; WIN32-NEXT:    movl 4(%eax), %ecx
1754; WIN32-NEXT:    movl %ecx, (%esp) # 4-byte Spill
1755; WIN32-NEXT:    adcl %esi, %ebx
1756; WIN32-NEXT:    movl %ebx, %edi
1757; WIN32-NEXT:    sarl $31, %edi
1758; WIN32-NEXT:    movl %ecx, %esi
1759; WIN32-NEXT:    sarl $31, %esi
1760; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1761; WIN32-NEXT:    imull %eax, %esi
1762; WIN32-NEXT:    mull %ecx
1763; WIN32-NEXT:    movl %edx, %ecx
1764; WIN32-NEXT:    addl %ebp, %eax
1765; WIN32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1766; WIN32-NEXT:    adcl %esi, %ecx
1767; WIN32-NEXT:    movl %ecx, %ebp
1768; WIN32-NEXT:    sarl $31, %ebp
1769; WIN32-NEXT:    addl %ebx, %ecx
1770; WIN32-NEXT:    adcl %edi, %ebp
1771; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1772; WIN32-NEXT:    imull (%esp) # 4-byte Folded Reload
1773; WIN32-NEXT:    addl %ecx, %eax
1774; WIN32-NEXT:    adcl %ebp, %edx
1775; WIN32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
1776; WIN32-NEXT:    movl %esi, %ecx
1777; WIN32-NEXT:    sarl $31, %ecx
1778; WIN32-NEXT:    xorl %ecx, %edx
1779; WIN32-NEXT:    xorl %eax, %ecx
1780; WIN32-NEXT:    orl %edx, %ecx
1781; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1782; WIN32-NEXT:    movl %esi, 4(%eax)
1783; WIN32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1784; WIN32-NEXT:    movl %ecx, (%eax)
1785; WIN32-NEXT:    setne %al
1786; WIN32-NEXT:    addl $12, %esp
1787; WIN32-NEXT:    popl %esi
1788; WIN32-NEXT:    popl %edi
1789; WIN32-NEXT:    popl %ebx
1790; WIN32-NEXT:    popl %ebp
1791; WIN32-NEXT:    retl
1792  %v2 = load i64, ptr %ptr2
1793  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
1794  %val = extractvalue {i64, i1} %t, 0
1795  %obit = extractvalue {i64, i1} %t, 1
1796  store i64 %val, ptr %res
1797  ret i1 %obit
1798}
1799
1800define zeroext i1 @umuloi8_load(ptr %ptr1, i8 %v2, ptr %res) {
1801; SDAG-LABEL: umuloi8_load:
1802; SDAG:       # %bb.0:
1803; SDAG-NEXT:    movl %esi, %eax
1804; SDAG-NEXT:    # kill: def $al killed $al killed $eax
1805; SDAG-NEXT:    mulb (%rdi)
1806; SDAG-NEXT:    seto %cl
1807; SDAG-NEXT:    movb %al, (%rdx)
1808; SDAG-NEXT:    movl %ecx, %eax
1809; SDAG-NEXT:    retq
1810;
1811; FAST-LABEL: umuloi8_load:
1812; FAST:       # %bb.0:
1813; FAST-NEXT:    movzbl (%rdi), %eax
1814; FAST-NEXT:    mulb %sil
1815; FAST-NEXT:    seto %cl
1816; FAST-NEXT:    movb %al, (%rdx)
1817; FAST-NEXT:    andb $1, %cl
1818; FAST-NEXT:    movl %ecx, %eax
1819; FAST-NEXT:    retq
1820;
1821; WIN64-LABEL: umuloi8_load:
1822; WIN64:       # %bb.0:
1823; WIN64-NEXT:    movl %edx, %eax
1824; WIN64-NEXT:    mulb (%rcx)
1825; WIN64-NEXT:    seto %cl
1826; WIN64-NEXT:    movb %al, (%r8)
1827; WIN64-NEXT:    movl %ecx, %eax
1828; WIN64-NEXT:    retq
1829;
1830; WIN32-LABEL: umuloi8_load:
1831; WIN32:       # %bb.0:
1832; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
1833; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1834; WIN32-NEXT:    movzbl (%eax), %eax
1835; WIN32-NEXT:    mulb {{[0-9]+}}(%esp)
1836; WIN32-NEXT:    seto %cl
1837; WIN32-NEXT:    movb %al, (%edx)
1838; WIN32-NEXT:    movl %ecx, %eax
1839; WIN32-NEXT:    retl
1840  %v1 = load i8, ptr %ptr1
1841  %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
1842  %val = extractvalue {i8, i1} %t, 0
1843  %obit = extractvalue {i8, i1} %t, 1
1844  store i8 %val, ptr %res
1845  ret i1 %obit
1846}
1847
1848define zeroext i1 @umuloi8_load2(i8 %v1, ptr %ptr2, ptr %res) {
1849; SDAG-LABEL: umuloi8_load2:
1850; SDAG:       # %bb.0:
1851; SDAG-NEXT:    movl %edi, %eax
1852; SDAG-NEXT:    # kill: def $al killed $al killed $eax
1853; SDAG-NEXT:    mulb (%rsi)
1854; SDAG-NEXT:    seto %cl
1855; SDAG-NEXT:    movb %al, (%rdx)
1856; SDAG-NEXT:    movl %ecx, %eax
1857; SDAG-NEXT:    retq
1858;
1859; FAST-LABEL: umuloi8_load2:
1860; FAST:       # %bb.0:
1861; FAST-NEXT:    movl %edi, %eax
1862; FAST-NEXT:    # kill: def $al killed $al killed $eax
1863; FAST-NEXT:    mulb (%rsi)
1864; FAST-NEXT:    seto %cl
1865; FAST-NEXT:    movb %al, (%rdx)
1866; FAST-NEXT:    andb $1, %cl
1867; FAST-NEXT:    movl %ecx, %eax
1868; FAST-NEXT:    retq
1869;
1870; WIN64-LABEL: umuloi8_load2:
1871; WIN64:       # %bb.0:
1872; WIN64-NEXT:    movl %ecx, %eax
1873; WIN64-NEXT:    mulb (%rdx)
1874; WIN64-NEXT:    seto %cl
1875; WIN64-NEXT:    movb %al, (%r8)
1876; WIN64-NEXT:    movl %ecx, %eax
1877; WIN64-NEXT:    retq
1878;
1879; WIN32-LABEL: umuloi8_load2:
1880; WIN32:       # %bb.0:
1881; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
1882; WIN32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1883; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1884; WIN32-NEXT:    mulb (%ecx)
1885; WIN32-NEXT:    seto %cl
1886; WIN32-NEXT:    movb %al, (%edx)
1887; WIN32-NEXT:    movl %ecx, %eax
1888; WIN32-NEXT:    retl
1889  %v2 = load i8, ptr %ptr2
1890  %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
1891  %val = extractvalue {i8, i1} %t, 0
1892  %obit = extractvalue {i8, i1} %t, 1
1893  store i8 %val, ptr %res
1894  ret i1 %obit
1895}
1896
1897define zeroext i1 @umuloi16_load(ptr %ptr1, i16 %v2, ptr %res) {
1898; SDAG-LABEL: umuloi16_load:
1899; SDAG:       # %bb.0:
1900; SDAG-NEXT:    movq %rdx, %rcx
1901; SDAG-NEXT:    movl %esi, %eax
1902; SDAG-NEXT:    # kill: def $ax killed $ax killed $eax
1903; SDAG-NEXT:    mulw (%rdi)
1904; SDAG-NEXT:    seto %dl
1905; SDAG-NEXT:    movw %ax, (%rcx)
1906; SDAG-NEXT:    movl %edx, %eax
1907; SDAG-NEXT:    retq
1908;
1909; FAST-LABEL: umuloi16_load:
1910; FAST:       # %bb.0:
1911; FAST-NEXT:    movq %rdx, %rcx
1912; FAST-NEXT:    movzwl (%rdi), %eax
1913; FAST-NEXT:    mulw %si
1914; FAST-NEXT:    seto %dl
1915; FAST-NEXT:    movw %ax, (%rcx)
1916; FAST-NEXT:    andb $1, %dl
1917; FAST-NEXT:    movl %edx, %eax
1918; FAST-NEXT:    retq
1919;
1920; WIN64-LABEL: umuloi16_load:
1921; WIN64:       # %bb.0:
1922; WIN64-NEXT:    movl %edx, %eax
1923; WIN64-NEXT:    mulw (%rcx)
1924; WIN64-NEXT:    seto %cl
1925; WIN64-NEXT:    movw %ax, (%r8)
1926; WIN64-NEXT:    movl %ecx, %eax
1927; WIN64-NEXT:    retq
1928;
1929; WIN32-LABEL: umuloi16_load:
1930; WIN32:       # %bb.0:
1931; WIN32-NEXT:    pushl %esi
1932; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
1933; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1934; WIN32-NEXT:    movzwl (%eax), %eax
1935; WIN32-NEXT:    mulw {{[0-9]+}}(%esp)
1936; WIN32-NEXT:    seto %cl
1937; WIN32-NEXT:    movw %ax, (%esi)
1938; WIN32-NEXT:    movl %ecx, %eax
1939; WIN32-NEXT:    popl %esi
1940; WIN32-NEXT:    retl
1941  %v1 = load i16, ptr %ptr1
1942  %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
1943  %val = extractvalue {i16, i1} %t, 0
1944  %obit = extractvalue {i16, i1} %t, 1
1945  store i16 %val, ptr %res
1946  ret i1 %obit
1947}
1948
1949define zeroext i1 @umuloi16_load2(i16 %v1, ptr %ptr2, ptr %res) {
1950; SDAG-LABEL: umuloi16_load2:
1951; SDAG:       # %bb.0:
1952; SDAG-NEXT:    movq %rdx, %rcx
1953; SDAG-NEXT:    movl %edi, %eax
1954; SDAG-NEXT:    # kill: def $ax killed $ax killed $eax
1955; SDAG-NEXT:    mulw (%rsi)
1956; SDAG-NEXT:    seto %dl
1957; SDAG-NEXT:    movw %ax, (%rcx)
1958; SDAG-NEXT:    movl %edx, %eax
1959; SDAG-NEXT:    retq
1960;
1961; FAST-LABEL: umuloi16_load2:
1962; FAST:       # %bb.0:
1963; FAST-NEXT:    movq %rdx, %rcx
1964; FAST-NEXT:    movl %edi, %eax
1965; FAST-NEXT:    # kill: def $ax killed $ax killed $eax
1966; FAST-NEXT:    mulw (%rsi)
1967; FAST-NEXT:    seto %dl
1968; FAST-NEXT:    movw %ax, (%rcx)
1969; FAST-NEXT:    andb $1, %dl
1970; FAST-NEXT:    movl %edx, %eax
1971; FAST-NEXT:    retq
1972;
1973; WIN64-LABEL: umuloi16_load2:
1974; WIN64:       # %bb.0:
1975; WIN64-NEXT:    movl %ecx, %eax
1976; WIN64-NEXT:    mulw (%rdx)
1977; WIN64-NEXT:    seto %cl
1978; WIN64-NEXT:    movw %ax, (%r8)
1979; WIN64-NEXT:    movl %ecx, %eax
1980; WIN64-NEXT:    retq
1981;
1982; WIN32-LABEL: umuloi16_load2:
1983; WIN32:       # %bb.0:
1984; WIN32-NEXT:    pushl %esi
1985; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
1986; WIN32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
1987; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1988; WIN32-NEXT:    mulw (%ecx)
1989; WIN32-NEXT:    seto %cl
1990; WIN32-NEXT:    movw %ax, (%esi)
1991; WIN32-NEXT:    movl %ecx, %eax
1992; WIN32-NEXT:    popl %esi
1993; WIN32-NEXT:    retl
1994  %v2 = load i16, ptr %ptr2
1995  %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
1996  %val = extractvalue {i16, i1} %t, 0
1997  %obit = extractvalue {i16, i1} %t, 1
1998  store i16 %val, ptr %res
1999  ret i1 %obit
2000}
2001
2002define zeroext i1 @umuloi32_load(ptr %ptr1, i32 %v2, ptr %res) {
2003; SDAG-LABEL: umuloi32_load:
2004; SDAG:       # %bb.0:
2005; SDAG-NEXT:    movq %rdx, %rcx
2006; SDAG-NEXT:    movl %esi, %eax
2007; SDAG-NEXT:    mull (%rdi)
2008; SDAG-NEXT:    seto %dl
2009; SDAG-NEXT:    movl %eax, (%rcx)
2010; SDAG-NEXT:    movl %edx, %eax
2011; SDAG-NEXT:    retq
2012;
2013; FAST-LABEL: umuloi32_load:
2014; FAST:       # %bb.0:
2015; FAST-NEXT:    movq %rdx, %rcx
2016; FAST-NEXT:    movl (%rdi), %eax
2017; FAST-NEXT:    mull %esi
2018; FAST-NEXT:    seto %dl
2019; FAST-NEXT:    movl %eax, (%rcx)
2020; FAST-NEXT:    andb $1, %dl
2021; FAST-NEXT:    movl %edx, %eax
2022; FAST-NEXT:    retq
2023;
2024; WIN64-LABEL: umuloi32_load:
2025; WIN64:       # %bb.0:
2026; WIN64-NEXT:    movl %edx, %eax
2027; WIN64-NEXT:    mull (%rcx)
2028; WIN64-NEXT:    seto %cl
2029; WIN64-NEXT:    movl %eax, (%r8)
2030; WIN64-NEXT:    movl %ecx, %eax
2031; WIN64-NEXT:    retq
2032;
2033; WIN32-LABEL: umuloi32_load:
2034; WIN32:       # %bb.0:
2035; WIN32-NEXT:    pushl %esi
2036; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
2037; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
2038; WIN32-NEXT:    movl (%eax), %eax
2039; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
2040; WIN32-NEXT:    seto %cl
2041; WIN32-NEXT:    movl %eax, (%esi)
2042; WIN32-NEXT:    movl %ecx, %eax
2043; WIN32-NEXT:    popl %esi
2044; WIN32-NEXT:    retl
2045  %v1 = load i32, ptr %ptr1
2046  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
2047  %val = extractvalue {i32, i1} %t, 0
2048  %obit = extractvalue {i32, i1} %t, 1
2049  store i32 %val, ptr %res
2050  ret i1 %obit
2051}
2052
2053define zeroext i1 @umuloi32_load2(i32 %v1, ptr %ptr2, ptr %res) {
2054; SDAG-LABEL: umuloi32_load2:
2055; SDAG:       # %bb.0:
2056; SDAG-NEXT:    movq %rdx, %rcx
2057; SDAG-NEXT:    movl %edi, %eax
2058; SDAG-NEXT:    mull (%rsi)
2059; SDAG-NEXT:    seto %dl
2060; SDAG-NEXT:    movl %eax, (%rcx)
2061; SDAG-NEXT:    movl %edx, %eax
2062; SDAG-NEXT:    retq
2063;
2064; FAST-LABEL: umuloi32_load2:
2065; FAST:       # %bb.0:
2066; FAST-NEXT:    movq %rdx, %rcx
2067; FAST-NEXT:    movl %edi, %eax
2068; FAST-NEXT:    mull (%rsi)
2069; FAST-NEXT:    seto %dl
2070; FAST-NEXT:    movl %eax, (%rcx)
2071; FAST-NEXT:    andb $1, %dl
2072; FAST-NEXT:    movl %edx, %eax
2073; FAST-NEXT:    retq
2074;
2075; WIN64-LABEL: umuloi32_load2:
2076; WIN64:       # %bb.0:
2077; WIN64-NEXT:    movl %ecx, %eax
2078; WIN64-NEXT:    mull (%rdx)
2079; WIN64-NEXT:    seto %cl
2080; WIN64-NEXT:    movl %eax, (%r8)
2081; WIN64-NEXT:    movl %ecx, %eax
2082; WIN64-NEXT:    retq
2083;
2084; WIN32-LABEL: umuloi32_load2:
2085; WIN32:       # %bb.0:
2086; WIN32-NEXT:    pushl %esi
2087; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
2088; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
2089; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2090; WIN32-NEXT:    mull (%ecx)
2091; WIN32-NEXT:    seto %cl
2092; WIN32-NEXT:    movl %eax, (%esi)
2093; WIN32-NEXT:    movl %ecx, %eax
2094; WIN32-NEXT:    popl %esi
2095; WIN32-NEXT:    retl
2096  %v2 = load i32, ptr %ptr2
2097  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
2098  %val = extractvalue {i32, i1} %t, 0
2099  %obit = extractvalue {i32, i1} %t, 1
2100  store i32 %val, ptr %res
2101  ret i1 %obit
2102}
2103
2104define zeroext i1 @umuloi64_load(ptr %ptr1, i64 %v2, ptr %res) {
2105; SDAG-LABEL: umuloi64_load:
2106; SDAG:       # %bb.0:
2107; SDAG-NEXT:    movq %rdx, %rcx
2108; SDAG-NEXT:    movq %rsi, %rax
2109; SDAG-NEXT:    mulq (%rdi)
2110; SDAG-NEXT:    seto %dl
2111; SDAG-NEXT:    movq %rax, (%rcx)
2112; SDAG-NEXT:    movl %edx, %eax
2113; SDAG-NEXT:    retq
2114;
2115; FAST-LABEL: umuloi64_load:
2116; FAST:       # %bb.0:
2117; FAST-NEXT:    movq %rdx, %rcx
2118; FAST-NEXT:    movq (%rdi), %rax
2119; FAST-NEXT:    mulq %rsi
2120; FAST-NEXT:    seto %dl
2121; FAST-NEXT:    movq %rax, (%rcx)
2122; FAST-NEXT:    andb $1, %dl
2123; FAST-NEXT:    movl %edx, %eax
2124; FAST-NEXT:    retq
2125;
2126; WIN64-LABEL: umuloi64_load:
2127; WIN64:       # %bb.0:
2128; WIN64-NEXT:    movq %rdx, %rax
2129; WIN64-NEXT:    mulq (%rcx)
2130; WIN64-NEXT:    seto %cl
2131; WIN64-NEXT:    movq %rax, (%r8)
2132; WIN64-NEXT:    movl %ecx, %eax
2133; WIN64-NEXT:    retq
2134;
2135; WIN32-LABEL: umuloi64_load:
2136; WIN32:       # %bb.0:
2137; WIN32-NEXT:    pushl %ebp
2138; WIN32-NEXT:    pushl %ebx
2139; WIN32-NEXT:    pushl %edi
2140; WIN32-NEXT:    pushl %esi
2141; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
2142; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
2143; WIN32-NEXT:    movl (%eax), %ebp
2144; WIN32-NEXT:    movl 4(%eax), %eax
2145; WIN32-NEXT:    testl %esi, %esi
2146; WIN32-NEXT:    setne %dl
2147; WIN32-NEXT:    testl %eax, %eax
2148; WIN32-NEXT:    setne %cl
2149; WIN32-NEXT:    andb %dl, %cl
2150; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
2151; WIN32-NEXT:    movl %eax, %edi
2152; WIN32-NEXT:    seto %bl
2153; WIN32-NEXT:    movl %esi, %eax
2154; WIN32-NEXT:    mull %ebp
2155; WIN32-NEXT:    seto %ch
2156; WIN32-NEXT:    orb %bl, %ch
2157; WIN32-NEXT:    orb %cl, %ch
2158; WIN32-NEXT:    leal (%edi,%eax), %esi
2159; WIN32-NEXT:    movl %ebp, %eax
2160; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
2161; WIN32-NEXT:    addl %esi, %edx
2162; WIN32-NEXT:    setb %cl
2163; WIN32-NEXT:    orb %ch, %cl
2164; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
2165; WIN32-NEXT:    movl %eax, (%esi)
2166; WIN32-NEXT:    movl %edx, 4(%esi)
2167; WIN32-NEXT:    movl %ecx, %eax
2168; WIN32-NEXT:    popl %esi
2169; WIN32-NEXT:    popl %edi
2170; WIN32-NEXT:    popl %ebx
2171; WIN32-NEXT:    popl %ebp
2172; WIN32-NEXT:    retl
2173  %v1 = load i64, ptr %ptr1
2174  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
2175  %val = extractvalue {i64, i1} %t, 0
2176  %obit = extractvalue {i64, i1} %t, 1
2177  store i64 %val, ptr %res
2178  ret i1 %obit
2179}
2180
2181define zeroext i1 @umuloi64_load2(i64 %v1, ptr %ptr2, ptr %res) {
2182; SDAG-LABEL: umuloi64_load2:
2183; SDAG:       # %bb.0:
2184; SDAG-NEXT:    movq %rdx, %rcx
2185; SDAG-NEXT:    movq %rdi, %rax
2186; SDAG-NEXT:    mulq (%rsi)
2187; SDAG-NEXT:    seto %dl
2188; SDAG-NEXT:    movq %rax, (%rcx)
2189; SDAG-NEXT:    movl %edx, %eax
2190; SDAG-NEXT:    retq
2191;
2192; FAST-LABEL: umuloi64_load2:
2193; FAST:       # %bb.0:
2194; FAST-NEXT:    movq %rdx, %rcx
2195; FAST-NEXT:    movq %rdi, %rax
2196; FAST-NEXT:    mulq (%rsi)
2197; FAST-NEXT:    seto %dl
2198; FAST-NEXT:    movq %rax, (%rcx)
2199; FAST-NEXT:    andb $1, %dl
2200; FAST-NEXT:    movl %edx, %eax
2201; FAST-NEXT:    retq
2202;
2203; WIN64-LABEL: umuloi64_load2:
2204; WIN64:       # %bb.0:
2205; WIN64-NEXT:    movq %rcx, %rax
2206; WIN64-NEXT:    mulq (%rdx)
2207; WIN64-NEXT:    seto %cl
2208; WIN64-NEXT:    movq %rax, (%r8)
2209; WIN64-NEXT:    movl %ecx, %eax
2210; WIN64-NEXT:    retq
2211;
2212; WIN32-LABEL: umuloi64_load2:
2213; WIN32:       # %bb.0:
2214; WIN32-NEXT:    pushl %ebp
2215; WIN32-NEXT:    pushl %ebx
2216; WIN32-NEXT:    pushl %edi
2217; WIN32-NEXT:    pushl %esi
2218; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
2219; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2220; WIN32-NEXT:    movl (%ecx), %ebp
2221; WIN32-NEXT:    movl 4(%ecx), %esi
2222; WIN32-NEXT:    testl %eax, %eax
2223; WIN32-NEXT:    setne %dl
2224; WIN32-NEXT:    testl %esi, %esi
2225; WIN32-NEXT:    setne %cl
2226; WIN32-NEXT:    andb %dl, %cl
2227; WIN32-NEXT:    mull %ebp
2228; WIN32-NEXT:    movl %eax, %edi
2229; WIN32-NEXT:    seto %bl
2230; WIN32-NEXT:    movl %esi, %eax
2231; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
2232; WIN32-NEXT:    seto %ch
2233; WIN32-NEXT:    orb %bl, %ch
2234; WIN32-NEXT:    orb %cl, %ch
2235; WIN32-NEXT:    leal (%edi,%eax), %esi
2236; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
2237; WIN32-NEXT:    mull %ebp
2238; WIN32-NEXT:    addl %esi, %edx
2239; WIN32-NEXT:    setb %cl
2240; WIN32-NEXT:    orb %ch, %cl
2241; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
2242; WIN32-NEXT:    movl %eax, (%esi)
2243; WIN32-NEXT:    movl %edx, 4(%esi)
2244; WIN32-NEXT:    movl %ecx, %eax
2245; WIN32-NEXT:    popl %esi
2246; WIN32-NEXT:    popl %edi
2247; WIN32-NEXT:    popl %ebx
2248; WIN32-NEXT:    popl %ebp
2249; WIN32-NEXT:    retl
2250  %v2 = load i64, ptr %ptr2
2251  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
2252  %val = extractvalue {i64, i1} %t, 0
2253  %obit = extractvalue {i64, i1} %t, 1
2254  store i64 %val, ptr %res
2255  ret i1 %obit
2256}
2257
2258declare {i8,  i1} @llvm.smul.with.overflow.i8 (i8,  i8 ) nounwind readnone
2259declare {i16, i1} @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone
2260declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
2261declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
2262declare {i8,  i1} @llvm.umul.with.overflow.i8 (i8,  i8 ) nounwind readnone
2263declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16) nounwind readnone
2264declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
2265declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
2266
2267!0 = !{!"branch_weights", i32 0, i32 2147483647}
2268