xref: /llvm-project/llvm/test/CodeGen/X86/midpoint-int.ll (revision 401d123a1fdcbbf4ae7a20178957b7e3a625c044)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
3; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
4
5; These test cases are inspired by C++2a std::midpoint().
6; See https://bugs.llvm.org/show_bug.cgi?id=40965
7
8; ---------------------------------------------------------------------------- ;
9; 32-bit width
10; ---------------------------------------------------------------------------- ;
11
12; Values come from regs
13
14define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind {
15; X64-LABEL: scalar_i32_signed_reg_reg:
16; X64:       # %bb.0:
17; X64-NEXT:    xorl %ecx, %ecx
18; X64-NEXT:    movl %edi, %eax
19; X64-NEXT:    subl %esi, %eax
20; X64-NEXT:    setle %cl
21; X64-NEXT:    leal -1(%rcx,%rcx), %ecx
22; X64-NEXT:    subl %edi, %esi
23; X64-NEXT:    cmovgel %esi, %eax
24; X64-NEXT:    shrl %eax
25; X64-NEXT:    imull %ecx, %eax
26; X64-NEXT:    addl %edi, %eax
27; X64-NEXT:    retq
28;
29; X86-LABEL: scalar_i32_signed_reg_reg:
30; X86:       # %bb.0:
31; X86-NEXT:    pushl %esi
32; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
33; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
34; X86-NEXT:    xorl %eax, %eax
35; X86-NEXT:    cmpl %esi, %ecx
36; X86-NEXT:    setle %al
37; X86-NEXT:    leal -1(%eax,%eax), %edx
38; X86-NEXT:    movl %ecx, %eax
39; X86-NEXT:    subl %esi, %eax
40; X86-NEXT:    jg .LBB0_2
41; X86-NEXT:  # %bb.1:
42; X86-NEXT:    subl %ecx, %esi
43; X86-NEXT:    movl %esi, %eax
44; X86-NEXT:  .LBB0_2:
45; X86-NEXT:    shrl %eax
46; X86-NEXT:    imull %edx, %eax
47; X86-NEXT:    addl %ecx, %eax
48; X86-NEXT:    popl %esi
49; X86-NEXT:    retl
50  %t3 = icmp sgt i32 %a1, %a2 ; signed
51  %t4 = select i1 %t3, i32 -1, i32 1
52  %t5 = select i1 %t3, i32 %a2, i32 %a1
53  %t6 = select i1 %t3, i32 %a1, i32 %a2
54  %t7 = sub i32 %t6, %t5
55  %t8 = lshr i32 %t7, 1
56  %t9 = mul nsw i32 %t8, %t4 ; signed
57  %a10 = add nsw i32 %t9, %a1 ; signed
58  ret i32 %a10
59}
60
61define i32 @scalar_i32_unsigned_reg_reg(i32 %a1, i32 %a2) nounwind {
62; X64-LABEL: scalar_i32_unsigned_reg_reg:
63; X64:       # %bb.0:
64; X64-NEXT:    xorl %ecx, %ecx
65; X64-NEXT:    cmpl %edi, %esi
66; X64-NEXT:    sbbl %ecx, %ecx
67; X64-NEXT:    movl %edi, %eax
68; X64-NEXT:    subl %esi, %eax
69; X64-NEXT:    subl %edi, %esi
70; X64-NEXT:    cmovael %esi, %eax
71; X64-NEXT:    orl $1, %ecx
72; X64-NEXT:    shrl %eax
73; X64-NEXT:    imull %ecx, %eax
74; X64-NEXT:    addl %edi, %eax
75; X64-NEXT:    retq
76;
77; X86-LABEL: scalar_i32_unsigned_reg_reg:
78; X86:       # %bb.0:
79; X86-NEXT:    pushl %edi
80; X86-NEXT:    pushl %esi
81; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
82; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
83; X86-NEXT:    xorl %edx, %edx
84; X86-NEXT:    movl %edi, %esi
85; X86-NEXT:    subl %ecx, %esi
86; X86-NEXT:    sbbl %edx, %edx
87; X86-NEXT:    orl $1, %edx
88; X86-NEXT:    movl %ecx, %eax
89; X86-NEXT:    subl %edi, %eax
90; X86-NEXT:    ja .LBB1_2
91; X86-NEXT:  # %bb.1:
92; X86-NEXT:    movl %esi, %eax
93; X86-NEXT:  .LBB1_2:
94; X86-NEXT:    shrl %eax
95; X86-NEXT:    imull %edx, %eax
96; X86-NEXT:    addl %ecx, %eax
97; X86-NEXT:    popl %esi
98; X86-NEXT:    popl %edi
99; X86-NEXT:    retl
100  %t3 = icmp ugt i32 %a1, %a2
101  %t4 = select i1 %t3, i32 -1, i32 1
102  %t5 = select i1 %t3, i32 %a2, i32 %a1
103  %t6 = select i1 %t3, i32 %a1, i32 %a2
104  %t7 = sub i32 %t6, %t5
105  %t8 = lshr i32 %t7, 1
106  %t9 = mul i32 %t8, %t4
107  %a10 = add i32 %t9, %a1
108  ret i32 %a10
109}
110
111; Values are loaded. Only check signed case.
112
113define i32 @scalar_i32_signed_mem_reg(ptr %a1_addr, i32 %a2) nounwind {
114; X64-LABEL: scalar_i32_signed_mem_reg:
115; X64:       # %bb.0:
116; X64-NEXT:    movl (%rdi), %ecx
117; X64-NEXT:    xorl %edx, %edx
118; X64-NEXT:    movl %ecx, %eax
119; X64-NEXT:    subl %esi, %eax
120; X64-NEXT:    setle %dl
121; X64-NEXT:    leal -1(%rdx,%rdx), %edx
122; X64-NEXT:    subl %ecx, %esi
123; X64-NEXT:    cmovgel %esi, %eax
124; X64-NEXT:    shrl %eax
125; X64-NEXT:    imull %edx, %eax
126; X64-NEXT:    addl %ecx, %eax
127; X64-NEXT:    retq
128;
129; X86-LABEL: scalar_i32_signed_mem_reg:
130; X86:       # %bb.0:
131; X86-NEXT:    pushl %esi
132; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
133; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
134; X86-NEXT:    movl (%eax), %ecx
135; X86-NEXT:    xorl %eax, %eax
136; X86-NEXT:    cmpl %edx, %ecx
137; X86-NEXT:    setle %al
138; X86-NEXT:    leal -1(%eax,%eax), %esi
139; X86-NEXT:    movl %ecx, %eax
140; X86-NEXT:    subl %edx, %eax
141; X86-NEXT:    jg .LBB2_2
142; X86-NEXT:  # %bb.1:
143; X86-NEXT:    subl %ecx, %edx
144; X86-NEXT:    movl %edx, %eax
145; X86-NEXT:  .LBB2_2:
146; X86-NEXT:    shrl %eax
147; X86-NEXT:    imull %esi, %eax
148; X86-NEXT:    addl %ecx, %eax
149; X86-NEXT:    popl %esi
150; X86-NEXT:    retl
151  %a1 = load i32, ptr %a1_addr
152  %t3 = icmp sgt i32 %a1, %a2 ; signed
153  %t4 = select i1 %t3, i32 -1, i32 1
154  %t5 = select i1 %t3, i32 %a2, i32 %a1
155  %t6 = select i1 %t3, i32 %a1, i32 %a2
156  %t7 = sub i32 %t6, %t5
157  %t8 = lshr i32 %t7, 1
158  %t9 = mul nsw i32 %t8, %t4 ; signed
159  %a10 = add nsw i32 %t9, %a1 ; signed
160  ret i32 %a10
161}
162
163define i32 @scalar_i32_signed_reg_mem(i32 %a1, ptr %a2_addr) nounwind {
164; X64-LABEL: scalar_i32_signed_reg_mem:
165; X64:       # %bb.0:
166; X64-NEXT:    movl (%rsi), %eax
167; X64-NEXT:    xorl %ecx, %ecx
168; X64-NEXT:    movl %edi, %edx
169; X64-NEXT:    subl %eax, %edx
170; X64-NEXT:    setle %cl
171; X64-NEXT:    leal -1(%rcx,%rcx), %ecx
172; X64-NEXT:    subl %edi, %eax
173; X64-NEXT:    cmovll %edx, %eax
174; X64-NEXT:    shrl %eax
175; X64-NEXT:    imull %ecx, %eax
176; X64-NEXT:    addl %edi, %eax
177; X64-NEXT:    retq
178;
179; X86-LABEL: scalar_i32_signed_reg_mem:
180; X86:       # %bb.0:
181; X86-NEXT:    pushl %esi
182; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
183; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
184; X86-NEXT:    movl (%eax), %esi
185; X86-NEXT:    xorl %eax, %eax
186; X86-NEXT:    cmpl %esi, %ecx
187; X86-NEXT:    setle %al
188; X86-NEXT:    leal -1(%eax,%eax), %edx
189; X86-NEXT:    movl %ecx, %eax
190; X86-NEXT:    subl %esi, %eax
191; X86-NEXT:    jg .LBB3_2
192; X86-NEXT:  # %bb.1:
193; X86-NEXT:    subl %ecx, %esi
194; X86-NEXT:    movl %esi, %eax
195; X86-NEXT:  .LBB3_2:
196; X86-NEXT:    shrl %eax
197; X86-NEXT:    imull %edx, %eax
198; X86-NEXT:    addl %ecx, %eax
199; X86-NEXT:    popl %esi
200; X86-NEXT:    retl
201  %a2 = load i32, ptr %a2_addr
202  %t3 = icmp sgt i32 %a1, %a2 ; signed
203  %t4 = select i1 %t3, i32 -1, i32 1
204  %t5 = select i1 %t3, i32 %a2, i32 %a1
205  %t6 = select i1 %t3, i32 %a1, i32 %a2
206  %t7 = sub i32 %t6, %t5
207  %t8 = lshr i32 %t7, 1
208  %t9 = mul nsw i32 %t8, %t4 ; signed
209  %a10 = add nsw i32 %t9, %a1 ; signed
210  ret i32 %a10
211}
212
213define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
214; X64-LABEL: scalar_i32_signed_mem_mem:
215; X64:       # %bb.0:
216; X64-NEXT:    movl (%rdi), %ecx
217; X64-NEXT:    movl (%rsi), %eax
218; X64-NEXT:    xorl %edx, %edx
219; X64-NEXT:    movl %ecx, %esi
220; X64-NEXT:    subl %eax, %esi
221; X64-NEXT:    setle %dl
222; X64-NEXT:    leal -1(%rdx,%rdx), %edx
223; X64-NEXT:    subl %ecx, %eax
224; X64-NEXT:    cmovll %esi, %eax
225; X64-NEXT:    shrl %eax
226; X64-NEXT:    imull %edx, %eax
227; X64-NEXT:    addl %ecx, %eax
228; X64-NEXT:    retq
229;
230; X86-LABEL: scalar_i32_signed_mem_mem:
231; X86:       # %bb.0:
232; X86-NEXT:    pushl %esi
233; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
234; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
235; X86-NEXT:    movl (%ecx), %ecx
236; X86-NEXT:    movl (%eax), %esi
237; X86-NEXT:    xorl %eax, %eax
238; X86-NEXT:    cmpl %esi, %ecx
239; X86-NEXT:    setle %al
240; X86-NEXT:    leal -1(%eax,%eax), %edx
241; X86-NEXT:    movl %ecx, %eax
242; X86-NEXT:    subl %esi, %eax
243; X86-NEXT:    jg .LBB4_2
244; X86-NEXT:  # %bb.1:
245; X86-NEXT:    subl %ecx, %esi
246; X86-NEXT:    movl %esi, %eax
247; X86-NEXT:  .LBB4_2:
248; X86-NEXT:    shrl %eax
249; X86-NEXT:    imull %edx, %eax
250; X86-NEXT:    addl %ecx, %eax
251; X86-NEXT:    popl %esi
252; X86-NEXT:    retl
253  %a1 = load i32, ptr %a1_addr
254  %a2 = load i32, ptr %a2_addr
255  %t3 = icmp sgt i32 %a1, %a2 ; signed
256  %t4 = select i1 %t3, i32 -1, i32 1
257  %t5 = select i1 %t3, i32 %a2, i32 %a1
258  %t6 = select i1 %t3, i32 %a1, i32 %a2
259  %t7 = sub i32 %t6, %t5
260  %t8 = lshr i32 %t7, 1
261  %t9 = mul nsw i32 %t8, %t4 ; signed
262  %a10 = add nsw i32 %t9, %a1 ; signed
263  ret i32 %a10
264}
265
266; ---------------------------------------------------------------------------- ;
267; 64-bit width
268; ---------------------------------------------------------------------------- ;
269
270; Values come from regs
271
272define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind {
273; X64-LABEL: scalar_i64_signed_reg_reg:
274; X64:       # %bb.0:
275; X64-NEXT:    xorl %ecx, %ecx
276; X64-NEXT:    movq %rdi, %rax
277; X64-NEXT:    subq %rsi, %rax
278; X64-NEXT:    setle %cl
279; X64-NEXT:    leaq -1(%rcx,%rcx), %rcx
280; X64-NEXT:    subq %rdi, %rsi
281; X64-NEXT:    cmovgeq %rsi, %rax
282; X64-NEXT:    shrq %rax
283; X64-NEXT:    imulq %rcx, %rax
284; X64-NEXT:    addq %rdi, %rax
285; X64-NEXT:    retq
286;
287; X86-LABEL: scalar_i64_signed_reg_reg:
288; X86:       # %bb.0:
289; X86-NEXT:    pushl %ebp
290; X86-NEXT:    pushl %ebx
291; X86-NEXT:    pushl %edi
292; X86-NEXT:    pushl %esi
293; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
294; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
295; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
296; X86-NEXT:    cmpl %esi, %edx
297; X86-NEXT:    movl %ecx, %eax
298; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %eax
299; X86-NEXT:    setl %al
300; X86-NEXT:    movzbl %al, %edi
301; X86-NEXT:    negl %edi
302; X86-NEXT:    movl %edi, %ebx
303; X86-NEXT:    orl $1, %ebx
304; X86-NEXT:    movl %esi, %eax
305; X86-NEXT:    subl %edx, %eax
306; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
307; X86-NEXT:    sbbl %ecx, %ebp
308; X86-NEXT:    subl %esi, %edx
309; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
310; X86-NEXT:    jl .LBB5_2
311; X86-NEXT:  # %bb.1:
312; X86-NEXT:    movl %edx, %eax
313; X86-NEXT:    movl %ecx, %ebp
314; X86-NEXT:  .LBB5_2:
315; X86-NEXT:    shrdl $1, %ebp, %eax
316; X86-NEXT:    shrl %ebp
317; X86-NEXT:    imull %eax, %edi
318; X86-NEXT:    mull %ebx
319; X86-NEXT:    addl %edi, %edx
320; X86-NEXT:    imull %ebx, %ebp
321; X86-NEXT:    addl %ebp, %edx
322; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
323; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edx
324; X86-NEXT:    popl %esi
325; X86-NEXT:    popl %edi
326; X86-NEXT:    popl %ebx
327; X86-NEXT:    popl %ebp
328; X86-NEXT:    retl
329  %t3 = icmp sgt i64 %a1, %a2 ; signed
330  %t4 = select i1 %t3, i64 -1, i64 1
331  %t5 = select i1 %t3, i64 %a2, i64 %a1
332  %t6 = select i1 %t3, i64 %a1, i64 %a2
333  %t7 = sub i64 %t6, %t5
334  %t8 = lshr i64 %t7, 1
335  %t9 = mul nsw i64 %t8, %t4 ; signed
336  %a10 = add nsw i64 %t9, %a1 ; signed
337  ret i64 %a10
338}
339
340define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind {
341; X64-LABEL: scalar_i64_unsigned_reg_reg:
342; X64:       # %bb.0:
343; X64-NEXT:    xorl %ecx, %ecx
344; X64-NEXT:    cmpq %rdi, %rsi
345; X64-NEXT:    sbbq %rcx, %rcx
346; X64-NEXT:    movq %rdi, %rax
347; X64-NEXT:    subq %rsi, %rax
348; X64-NEXT:    subq %rdi, %rsi
349; X64-NEXT:    cmovaeq %rsi, %rax
350; X64-NEXT:    orq $1, %rcx
351; X64-NEXT:    shrq %rax
352; X64-NEXT:    imulq %rcx, %rax
353; X64-NEXT:    addq %rdi, %rax
354; X64-NEXT:    retq
355;
356; X86-LABEL: scalar_i64_unsigned_reg_reg:
357; X86:       # %bb.0:
358; X86-NEXT:    pushl %ebp
359; X86-NEXT:    pushl %ebx
360; X86-NEXT:    pushl %edi
361; X86-NEXT:    pushl %esi
362; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
363; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
364; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
365; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
366; X86-NEXT:    xorl %edx, %edx
367; X86-NEXT:    cmpl %eax, %ebp
368; X86-NEXT:    sbbl %ecx, %esi
369; X86-NEXT:    movl $0, %ebx
370; X86-NEXT:    sbbl %ebx, %ebx
371; X86-NEXT:    movl %ebx, %edi
372; X86-NEXT:    orl $1, %edi
373; X86-NEXT:    movl %eax, %esi
374; X86-NEXT:    subl %ebp, %esi
375; X86-NEXT:    movl %ecx, %eax
376; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %eax
377; X86-NEXT:    sbbl %edx, %edx
378; X86-NEXT:    xorl %edx, %eax
379; X86-NEXT:    xorl %edx, %esi
380; X86-NEXT:    subl %edx, %esi
381; X86-NEXT:    sbbl %edx, %eax
382; X86-NEXT:    movl %eax, %ebp
383; X86-NEXT:    shldl $31, %esi, %eax
384; X86-NEXT:    imull %eax, %ebx
385; X86-NEXT:    mull %edi
386; X86-NEXT:    addl %ebx, %edx
387; X86-NEXT:    shrl %ebp
388; X86-NEXT:    imull %edi, %ebp
389; X86-NEXT:    addl %ebp, %edx
390; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
391; X86-NEXT:    adcl %ecx, %edx
392; X86-NEXT:    popl %esi
393; X86-NEXT:    popl %edi
394; X86-NEXT:    popl %ebx
395; X86-NEXT:    popl %ebp
396; X86-NEXT:    retl
397  %t3 = icmp ugt i64 %a1, %a2
398  %t4 = select i1 %t3, i64 -1, i64 1
399  %t5 = select i1 %t3, i64 %a2, i64 %a1
400  %t6 = select i1 %t3, i64 %a1, i64 %a2
401  %t7 = sub i64 %t6, %t5
402  %t8 = lshr i64 %t7, 1
403  %t9 = mul i64 %t8, %t4
404  %a10 = add i64 %t9, %a1
405  ret i64 %a10
406}
407
408; Values are loaded. Only check signed case.
409
410define i64 @scalar_i64_signed_mem_reg(ptr %a1_addr, i64 %a2) nounwind {
411; X64-LABEL: scalar_i64_signed_mem_reg:
412; X64:       # %bb.0:
413; X64-NEXT:    movq (%rdi), %rcx
414; X64-NEXT:    xorl %edx, %edx
415; X64-NEXT:    movq %rcx, %rax
416; X64-NEXT:    subq %rsi, %rax
417; X64-NEXT:    setle %dl
418; X64-NEXT:    leaq -1(%rdx,%rdx), %rdx
419; X64-NEXT:    subq %rcx, %rsi
420; X64-NEXT:    cmovgeq %rsi, %rax
421; X64-NEXT:    shrq %rax
422; X64-NEXT:    imulq %rdx, %rax
423; X64-NEXT:    addq %rcx, %rax
424; X64-NEXT:    retq
425;
426; X86-LABEL: scalar_i64_signed_mem_reg:
427; X86:       # %bb.0:
428; X86-NEXT:    pushl %ebp
429; X86-NEXT:    pushl %ebx
430; X86-NEXT:    pushl %edi
431; X86-NEXT:    pushl %esi
432; X86-NEXT:    subl $12, %esp
433; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
434; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
435; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
436; X86-NEXT:    movl (%eax), %ebx
437; X86-NEXT:    movl 4(%eax), %esi
438; X86-NEXT:    cmpl %ebx, %edx
439; X86-NEXT:    movl %ecx, %eax
440; X86-NEXT:    sbbl %esi, %eax
441; X86-NEXT:    setl %al
442; X86-NEXT:    movzbl %al, %edi
443; X86-NEXT:    negl %edi
444; X86-NEXT:    movl %edi, %eax
445; X86-NEXT:    orl $1, %eax
446; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
447; X86-NEXT:    movl %ebx, %eax
448; X86-NEXT:    subl %edx, %eax
449; X86-NEXT:    movl %esi, %ebp
450; X86-NEXT:    sbbl %ecx, %ebp
451; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
452; X86-NEXT:    subl %ebx, %edx
453; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
454; X86-NEXT:    sbbl %esi, %ecx
455; X86-NEXT:    jl .LBB7_2
456; X86-NEXT:  # %bb.1:
457; X86-NEXT:    movl %edx, %eax
458; X86-NEXT:    movl %ecx, %ebp
459; X86-NEXT:  .LBB7_2:
460; X86-NEXT:    shrdl $1, %ebp, %eax
461; X86-NEXT:    shrl %ebp
462; X86-NEXT:    imull %eax, %edi
463; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
464; X86-NEXT:    mull %ecx
465; X86-NEXT:    addl %edi, %edx
466; X86-NEXT:    imull %ecx, %ebp
467; X86-NEXT:    addl %ebp, %edx
468; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
469; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
470; X86-NEXT:    addl $12, %esp
471; X86-NEXT:    popl %esi
472; X86-NEXT:    popl %edi
473; X86-NEXT:    popl %ebx
474; X86-NEXT:    popl %ebp
475; X86-NEXT:    retl
476  %a1 = load i64, ptr %a1_addr
477  %t3 = icmp sgt i64 %a1, %a2 ; signed
478  %t4 = select i1 %t3, i64 -1, i64 1
479  %t5 = select i1 %t3, i64 %a2, i64 %a1
480  %t6 = select i1 %t3, i64 %a1, i64 %a2
481  %t7 = sub i64 %t6, %t5
482  %t8 = lshr i64 %t7, 1
483  %t9 = mul nsw i64 %t8, %t4 ; signed
484  %a10 = add nsw i64 %t9, %a1 ; signed
485  ret i64 %a10
486}
487
488define i64 @scalar_i64_signed_reg_mem(i64 %a1, ptr %a2_addr) nounwind {
489; X64-LABEL: scalar_i64_signed_reg_mem:
490; X64:       # %bb.0:
491; X64-NEXT:    movq (%rsi), %rax
492; X64-NEXT:    xorl %ecx, %ecx
493; X64-NEXT:    movq %rdi, %rdx
494; X64-NEXT:    subq %rax, %rdx
495; X64-NEXT:    setle %cl
496; X64-NEXT:    leaq -1(%rcx,%rcx), %rcx
497; X64-NEXT:    subq %rdi, %rax
498; X64-NEXT:    cmovlq %rdx, %rax
499; X64-NEXT:    shrq %rax
500; X64-NEXT:    imulq %rcx, %rax
501; X64-NEXT:    addq %rdi, %rax
502; X64-NEXT:    retq
503;
504; X86-LABEL: scalar_i64_signed_reg_mem:
505; X86:       # %bb.0:
506; X86-NEXT:    pushl %ebp
507; X86-NEXT:    pushl %ebx
508; X86-NEXT:    pushl %edi
509; X86-NEXT:    pushl %esi
510; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
511; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
512; X86-NEXT:    movl (%eax), %edx
513; X86-NEXT:    movl 4(%eax), %ecx
514; X86-NEXT:    cmpl %esi, %edx
515; X86-NEXT:    movl %ecx, %eax
516; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %eax
517; X86-NEXT:    setl %al
518; X86-NEXT:    movzbl %al, %edi
519; X86-NEXT:    negl %edi
520; X86-NEXT:    movl %edi, %ebx
521; X86-NEXT:    orl $1, %ebx
522; X86-NEXT:    movl %esi, %eax
523; X86-NEXT:    subl %edx, %eax
524; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
525; X86-NEXT:    sbbl %ecx, %ebp
526; X86-NEXT:    subl %esi, %edx
527; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
528; X86-NEXT:    jl .LBB8_2
529; X86-NEXT:  # %bb.1:
530; X86-NEXT:    movl %edx, %eax
531; X86-NEXT:    movl %ecx, %ebp
532; X86-NEXT:  .LBB8_2:
533; X86-NEXT:    shrdl $1, %ebp, %eax
534; X86-NEXT:    shrl %ebp
535; X86-NEXT:    imull %eax, %edi
536; X86-NEXT:    mull %ebx
537; X86-NEXT:    addl %edi, %edx
538; X86-NEXT:    imull %ebx, %ebp
539; X86-NEXT:    addl %ebp, %edx
540; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
541; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edx
542; X86-NEXT:    popl %esi
543; X86-NEXT:    popl %edi
544; X86-NEXT:    popl %ebx
545; X86-NEXT:    popl %ebp
546; X86-NEXT:    retl
547  %a2 = load i64, ptr %a2_addr
548  %t3 = icmp sgt i64 %a1, %a2 ; signed
549  %t4 = select i1 %t3, i64 -1, i64 1
550  %t5 = select i1 %t3, i64 %a2, i64 %a1
551  %t6 = select i1 %t3, i64 %a1, i64 %a2
552  %t7 = sub i64 %t6, %t5
553  %t8 = lshr i64 %t7, 1
554  %t9 = mul nsw i64 %t8, %t4 ; signed
555  %a10 = add nsw i64 %t9, %a1 ; signed
556  ret i64 %a10
557}
558
559define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
560; X64-LABEL: scalar_i64_signed_mem_mem:
561; X64:       # %bb.0:
562; X64-NEXT:    movq (%rdi), %rcx
563; X64-NEXT:    movq (%rsi), %rax
564; X64-NEXT:    xorl %edx, %edx
565; X64-NEXT:    movq %rcx, %rsi
566; X64-NEXT:    subq %rax, %rsi
567; X64-NEXT:    setle %dl
568; X64-NEXT:    leaq -1(%rdx,%rdx), %rdx
569; X64-NEXT:    subq %rcx, %rax
570; X64-NEXT:    cmovlq %rsi, %rax
571; X64-NEXT:    shrq %rax
572; X64-NEXT:    imulq %rdx, %rax
573; X64-NEXT:    addq %rcx, %rax
574; X64-NEXT:    retq
575;
576; X86-LABEL: scalar_i64_signed_mem_mem:
577; X86:       # %bb.0:
578; X86-NEXT:    pushl %ebp
579; X86-NEXT:    pushl %ebx
580; X86-NEXT:    pushl %edi
581; X86-NEXT:    pushl %esi
582; X86-NEXT:    subl $12, %esp
583; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
584; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
585; X86-NEXT:    movl (%ecx), %ebx
586; X86-NEXT:    movl 4(%ecx), %esi
587; X86-NEXT:    movl (%eax), %edx
588; X86-NEXT:    movl 4(%eax), %ecx
589; X86-NEXT:    cmpl %ebx, %edx
590; X86-NEXT:    movl %ecx, %eax
591; X86-NEXT:    sbbl %esi, %eax
592; X86-NEXT:    setl %al
593; X86-NEXT:    movzbl %al, %edi
594; X86-NEXT:    negl %edi
595; X86-NEXT:    movl %edi, %eax
596; X86-NEXT:    orl $1, %eax
597; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
598; X86-NEXT:    movl %ebx, %eax
599; X86-NEXT:    subl %edx, %eax
600; X86-NEXT:    movl %esi, %ebp
601; X86-NEXT:    sbbl %ecx, %ebp
602; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
603; X86-NEXT:    subl %ebx, %edx
604; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
605; X86-NEXT:    sbbl %esi, %ecx
606; X86-NEXT:    jl .LBB9_2
607; X86-NEXT:  # %bb.1:
608; X86-NEXT:    movl %edx, %eax
609; X86-NEXT:    movl %ecx, %ebp
610; X86-NEXT:  .LBB9_2:
611; X86-NEXT:    shrdl $1, %ebp, %eax
612; X86-NEXT:    shrl %ebp
613; X86-NEXT:    imull %eax, %edi
614; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
615; X86-NEXT:    mull %ecx
616; X86-NEXT:    addl %edi, %edx
617; X86-NEXT:    imull %ecx, %ebp
618; X86-NEXT:    addl %ebp, %edx
619; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
620; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
621; X86-NEXT:    addl $12, %esp
622; X86-NEXT:    popl %esi
623; X86-NEXT:    popl %edi
624; X86-NEXT:    popl %ebx
625; X86-NEXT:    popl %ebp
626; X86-NEXT:    retl
627  %a1 = load i64, ptr %a1_addr
628  %a2 = load i64, ptr %a2_addr
629  %t3 = icmp sgt i64 %a1, %a2 ; signed
630  %t4 = select i1 %t3, i64 -1, i64 1
631  %t5 = select i1 %t3, i64 %a2, i64 %a1
632  %t6 = select i1 %t3, i64 %a1, i64 %a2
633  %t7 = sub i64 %t6, %t5
634  %t8 = lshr i64 %t7, 1
635  %t9 = mul nsw i64 %t8, %t4 ; signed
636  %a10 = add nsw i64 %t9, %a1 ; signed
637  ret i64 %a10
638}
639
640; ---------------------------------------------------------------------------- ;
641; 16-bit width
642; ---------------------------------------------------------------------------- ;
643
644; Values come from regs
645
646define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
647; X64-LABEL: scalar_i16_signed_reg_reg:
648; X64:       # %bb.0:
649; X64-NEXT:    xorl %eax, %eax
650; X64-NEXT:    cmpw %si, %di
651; X64-NEXT:    setle %al
652; X64-NEXT:    leal -1(%rax,%rax), %ecx
653; X64-NEXT:    movl %edi, %eax
654; X64-NEXT:    subl %esi, %eax
655; X64-NEXT:    movswl %di, %edx
656; X64-NEXT:    movswl %si, %esi
657; X64-NEXT:    subl %edx, %esi
658; X64-NEXT:    cmovll %eax, %esi
659; X64-NEXT:    movzwl %si, %eax
660; X64-NEXT:    shrl %eax
661; X64-NEXT:    imull %ecx, %eax
662; X64-NEXT:    addl %edi, %eax
663; X64-NEXT:    # kill: def $ax killed $ax killed $eax
664; X64-NEXT:    retq
665;
666; X86-LABEL: scalar_i16_signed_reg_reg:
667; X86:       # %bb.0:
668; X86-NEXT:    pushl %ebx
669; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
670; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
671; X86-NEXT:    movl %ecx, %eax
672; X86-NEXT:    subw %dx, %ax
673; X86-NEXT:    jg .LBB10_2
674; X86-NEXT:  # %bb.1:
675; X86-NEXT:    negl %eax
676; X86-NEXT:  .LBB10_2:
677; X86-NEXT:    xorl %ebx, %ebx
678; X86-NEXT:    cmpw %dx, %cx
679; X86-NEXT:    setle %bl
680; X86-NEXT:    leal -1(%ebx,%ebx), %edx
681; X86-NEXT:    movzwl %ax, %eax
682; X86-NEXT:    shrl %eax
683; X86-NEXT:    imull %edx, %eax
684; X86-NEXT:    addl %ecx, %eax
685; X86-NEXT:    # kill: def $ax killed $ax killed $eax
686; X86-NEXT:    popl %ebx
687; X86-NEXT:    retl
688  %t3 = icmp sgt i16 %a1, %a2 ; signed
689  %t4 = select i1 %t3, i16 -1, i16 1
690  %t5 = select i1 %t3, i16 %a2, i16 %a1
691  %t6 = select i1 %t3, i16 %a1, i16 %a2
692  %t7 = sub i16 %t6, %t5
693  %t8 = lshr i16 %t7, 1
694  %t9 = mul nsw i16 %t8, %t4 ; signed
695  %a10 = add nsw i16 %t9, %a1 ; signed
696  ret i16 %a10
697}
698
699define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
700; X64-LABEL: scalar_i16_unsigned_reg_reg:
701; X64:       # %bb.0:
702; X64-NEXT:    xorl %ecx, %ecx
703; X64-NEXT:    cmpw %di, %si
704; X64-NEXT:    sbbl %ecx, %ecx
705; X64-NEXT:    orl $1, %ecx
706; X64-NEXT:    movl %edi, %eax
707; X64-NEXT:    subl %esi, %eax
708; X64-NEXT:    movzwl %di, %edx
709; X64-NEXT:    movzwl %si, %esi
710; X64-NEXT:    subl %edx, %esi
711; X64-NEXT:    cmovbl %eax, %esi
712; X64-NEXT:    movzwl %si, %eax
713; X64-NEXT:    shrl %eax
714; X64-NEXT:    imull %ecx, %eax
715; X64-NEXT:    addl %edi, %eax
716; X64-NEXT:    # kill: def $ax killed $ax killed $eax
717; X64-NEXT:    retq
718;
719; X86-LABEL: scalar_i16_unsigned_reg_reg:
720; X86:       # %bb.0:
721; X86-NEXT:    pushl %esi
722; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
723; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
724; X86-NEXT:    movl %ecx, %eax
725; X86-NEXT:    subw %dx, %ax
726; X86-NEXT:    ja .LBB11_2
727; X86-NEXT:  # %bb.1:
728; X86-NEXT:    negl %eax
729; X86-NEXT:  .LBB11_2:
730; X86-NEXT:    xorl %esi, %esi
731; X86-NEXT:    cmpw %cx, %dx
732; X86-NEXT:    sbbl %esi, %esi
733; X86-NEXT:    orl $1, %esi
734; X86-NEXT:    movzwl %ax, %eax
735; X86-NEXT:    shrl %eax
736; X86-NEXT:    imull %esi, %eax
737; X86-NEXT:    addl %ecx, %eax
738; X86-NEXT:    # kill: def $ax killed $ax killed $eax
739; X86-NEXT:    popl %esi
740; X86-NEXT:    retl
741  %t3 = icmp ugt i16 %a1, %a2
742  %t4 = select i1 %t3, i16 -1, i16 1
743  %t5 = select i1 %t3, i16 %a2, i16 %a1
744  %t6 = select i1 %t3, i16 %a1, i16 %a2
745  %t7 = sub i16 %t6, %t5
746  %t8 = lshr i16 %t7, 1
747  %t9 = mul i16 %t8, %t4
748  %a10 = add i16 %t9, %a1
749  ret i16 %a10
750}
751
752; Values are loaded. Only check signed case.
753
754define i16 @scalar_i16_signed_mem_reg(ptr %a1_addr, i16 %a2) nounwind {
755; X64-LABEL: scalar_i16_signed_mem_reg:
756; X64:       # %bb.0:
757; X64-NEXT:    movswl (%rdi), %ecx
758; X64-NEXT:    xorl %eax, %eax
759; X64-NEXT:    cmpw %si, %cx
760; X64-NEXT:    setle %al
761; X64-NEXT:    leal -1(%rax,%rax), %edx
762; X64-NEXT:    movl %ecx, %eax
763; X64-NEXT:    subl %esi, %eax
764; X64-NEXT:    movswl %si, %esi
765; X64-NEXT:    subl %ecx, %esi
766; X64-NEXT:    cmovll %eax, %esi
767; X64-NEXT:    movzwl %si, %eax
768; X64-NEXT:    shrl %eax
769; X64-NEXT:    imull %edx, %eax
770; X64-NEXT:    addl %ecx, %eax
771; X64-NEXT:    # kill: def $ax killed $ax killed $eax
772; X64-NEXT:    retq
773;
774; X86-LABEL: scalar_i16_signed_mem_reg:
775; X86:       # %bb.0:
776; X86-NEXT:    pushl %ebx
777; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
778; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
779; X86-NEXT:    movzwl (%eax), %ecx
780; X86-NEXT:    movl %ecx, %eax
781; X86-NEXT:    subw %dx, %ax
782; X86-NEXT:    jg .LBB12_2
783; X86-NEXT:  # %bb.1:
784; X86-NEXT:    negl %eax
785; X86-NEXT:  .LBB12_2:
786; X86-NEXT:    xorl %ebx, %ebx
787; X86-NEXT:    cmpw %dx, %cx
788; X86-NEXT:    setle %bl
789; X86-NEXT:    leal -1(%ebx,%ebx), %edx
790; X86-NEXT:    movzwl %ax, %eax
791; X86-NEXT:    shrl %eax
792; X86-NEXT:    imull %edx, %eax
793; X86-NEXT:    addl %ecx, %eax
794; X86-NEXT:    # kill: def $ax killed $ax killed $eax
795; X86-NEXT:    popl %ebx
796; X86-NEXT:    retl
797  %a1 = load i16, ptr %a1_addr
798  %t3 = icmp sgt i16 %a1, %a2 ; signed
799  %t4 = select i1 %t3, i16 -1, i16 1
800  %t5 = select i1 %t3, i16 %a2, i16 %a1
801  %t6 = select i1 %t3, i16 %a1, i16 %a2
802  %t7 = sub i16 %t6, %t5
803  %t8 = lshr i16 %t7, 1
804  %t9 = mul nsw i16 %t8, %t4 ; signed
805  %a10 = add nsw i16 %t9, %a1 ; signed
806  ret i16 %a10
807}
808
809define i16 @scalar_i16_signed_reg_mem(i16 %a1, ptr %a2_addr) nounwind {
810; X64-LABEL: scalar_i16_signed_reg_mem:
811; X64:       # %bb.0:
812; X64-NEXT:    movswl (%rsi), %eax
813; X64-NEXT:    xorl %ecx, %ecx
814; X64-NEXT:    cmpw %ax, %di
815; X64-NEXT:    setle %cl
816; X64-NEXT:    leal -1(%rcx,%rcx), %ecx
817; X64-NEXT:    movl %edi, %edx
818; X64-NEXT:    subl %eax, %edx
819; X64-NEXT:    movswl %di, %esi
820; X64-NEXT:    subl %esi, %eax
821; X64-NEXT:    cmovll %edx, %eax
822; X64-NEXT:    movzwl %ax, %eax
823; X64-NEXT:    shrl %eax
824; X64-NEXT:    imull %ecx, %eax
825; X64-NEXT:    addl %edi, %eax
826; X64-NEXT:    # kill: def $ax killed $ax killed $eax
827; X64-NEXT:    retq
828;
829; X86-LABEL: scalar_i16_signed_reg_mem:
830; X86:       # %bb.0:
831; X86-NEXT:    pushl %ebx
832; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
833; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
834; X86-NEXT:    movzwl (%eax), %edx
835; X86-NEXT:    movl %ecx, %eax
836; X86-NEXT:    subw %dx, %ax
837; X86-NEXT:    jg .LBB13_2
838; X86-NEXT:  # %bb.1:
839; X86-NEXT:    negl %eax
840; X86-NEXT:  .LBB13_2:
841; X86-NEXT:    xorl %ebx, %ebx
842; X86-NEXT:    cmpw %dx, %cx
843; X86-NEXT:    setle %bl
844; X86-NEXT:    leal -1(%ebx,%ebx), %edx
845; X86-NEXT:    movzwl %ax, %eax
846; X86-NEXT:    shrl %eax
847; X86-NEXT:    imull %edx, %eax
848; X86-NEXT:    addl %ecx, %eax
849; X86-NEXT:    # kill: def $ax killed $ax killed $eax
850; X86-NEXT:    popl %ebx
851; X86-NEXT:    retl
852  %a2 = load i16, ptr %a2_addr
853  %t3 = icmp sgt i16 %a1, %a2 ; signed
854  %t4 = select i1 %t3, i16 -1, i16 1
855  %t5 = select i1 %t3, i16 %a2, i16 %a1
856  %t6 = select i1 %t3, i16 %a1, i16 %a2
857  %t7 = sub i16 %t6, %t5
858  %t8 = lshr i16 %t7, 1
859  %t9 = mul nsw i16 %t8, %t4 ; signed
860  %a10 = add nsw i16 %t9, %a1 ; signed
861  ret i16 %a10
862}
863
864define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
865; X64-LABEL: scalar_i16_signed_mem_mem:
866; X64:       # %bb.0:
867; X64-NEXT:    movswl (%rdi), %ecx
868; X64-NEXT:    movswl (%rsi), %eax
869; X64-NEXT:    xorl %edx, %edx
870; X64-NEXT:    cmpw %ax, %cx
871; X64-NEXT:    setle %dl
872; X64-NEXT:    leal -1(%rdx,%rdx), %edx
873; X64-NEXT:    movl %ecx, %esi
874; X64-NEXT:    subl %eax, %esi
875; X64-NEXT:    subl %ecx, %eax
876; X64-NEXT:    cmovll %esi, %eax
877; X64-NEXT:    movzwl %ax, %eax
878; X64-NEXT:    shrl %eax
879; X64-NEXT:    imull %edx, %eax
880; X64-NEXT:    addl %ecx, %eax
881; X64-NEXT:    # kill: def $ax killed $ax killed $eax
882; X64-NEXT:    retq
883;
884; X86-LABEL: scalar_i16_signed_mem_mem:
885; X86:       # %bb.0:
886; X86-NEXT:    pushl %ebx
887; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
888; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
889; X86-NEXT:    movzwl (%ecx), %ecx
890; X86-NEXT:    movzwl (%eax), %edx
891; X86-NEXT:    movl %ecx, %eax
892; X86-NEXT:    subw %dx, %ax
893; X86-NEXT:    jg .LBB14_2
894; X86-NEXT:  # %bb.1:
895; X86-NEXT:    negl %eax
896; X86-NEXT:  .LBB14_2:
897; X86-NEXT:    xorl %ebx, %ebx
898; X86-NEXT:    cmpw %dx, %cx
899; X86-NEXT:    setle %bl
900; X86-NEXT:    leal -1(%ebx,%ebx), %edx
901; X86-NEXT:    movzwl %ax, %eax
902; X86-NEXT:    shrl %eax
903; X86-NEXT:    imull %edx, %eax
904; X86-NEXT:    addl %ecx, %eax
905; X86-NEXT:    # kill: def $ax killed $ax killed $eax
906; X86-NEXT:    popl %ebx
907; X86-NEXT:    retl
908  %a1 = load i16, ptr %a1_addr
909  %a2 = load i16, ptr %a2_addr
910  %t3 = icmp sgt i16 %a1, %a2 ; signed
911  %t4 = select i1 %t3, i16 -1, i16 1
912  %t5 = select i1 %t3, i16 %a2, i16 %a1
913  %t6 = select i1 %t3, i16 %a1, i16 %a2
914  %t7 = sub i16 %t6, %t5
915  %t8 = lshr i16 %t7, 1
916  %t9 = mul nsw i16 %t8, %t4 ; signed
917  %a10 = add nsw i16 %t9, %a1 ; signed
918  ret i16 %a10
919}
920
921; ---------------------------------------------------------------------------- ;
922; 8-bit width
923; ---------------------------------------------------------------------------- ;
924
925; Values come from regs
926
927define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind {
928; X64-LABEL: scalar_i8_signed_reg_reg:
929; X64:       # %bb.0:
930; X64-NEXT:    cmpb %sil, %dil
931; X64-NEXT:    setg %cl
932; X64-NEXT:    negb %cl
933; X64-NEXT:    orb $1, %cl
934; X64-NEXT:    movsbl %dil, %edx
935; X64-NEXT:    subl %esi, %edi
936; X64-NEXT:    movsbl %sil, %eax
937; X64-NEXT:    subl %edx, %eax
938; X64-NEXT:    cmovll %edi, %eax
939; X64-NEXT:    shrb %al
940; X64-NEXT:    # kill: def $al killed $al killed $eax
941; X64-NEXT:    mulb %cl
942; X64-NEXT:    addb %dl, %al
943; X64-NEXT:    retq
944;
945; X86-LABEL: scalar_i8_signed_reg_reg:
946; X86:       # %bb.0:
947; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
948; X86-NEXT:    movb {{[0-9]+}}(%esp), %ah
949; X86-NEXT:    cmpb %ah, %cl
950; X86-NEXT:    setg %dl
951; X86-NEXT:    negb %dl
952; X86-NEXT:    orb $1, %dl
953; X86-NEXT:    movb %cl, %al
954; X86-NEXT:    subb %ah, %al
955; X86-NEXT:    jg .LBB15_2
956; X86-NEXT:  # %bb.1:
957; X86-NEXT:    subb %cl, %ah
958; X86-NEXT:    movb %ah, %al
959; X86-NEXT:  .LBB15_2:
960; X86-NEXT:    shrb %al
961; X86-NEXT:    mulb %dl
962; X86-NEXT:    addb %cl, %al
963; X86-NEXT:    retl
964  %t3 = icmp sgt i8 %a1, %a2 ; signed
965  %t4 = select i1 %t3, i8 -1, i8 1
966  %t5 = select i1 %t3, i8 %a2, i8 %a1
967  %t6 = select i1 %t3, i8 %a1, i8 %a2
968  %t7 = sub i8 %t6, %t5
969  %t8 = lshr i8 %t7, 1
970  %t9 = mul nsw i8 %t8, %t4 ; signed
971  %a10 = add nsw i8 %t9, %a1 ; signed
972  ret i8 %a10
973}
974
975define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind {
976; X64-LABEL: scalar_i8_unsigned_reg_reg:
977; X64:       # %bb.0:
978; X64-NEXT:    xorl %ecx, %ecx
979; X64-NEXT:    cmpb %dil, %sil
980; X64-NEXT:    sbbl %ecx, %ecx
981; X64-NEXT:    orb $1, %cl
982; X64-NEXT:    movzbl %dil, %edx
983; X64-NEXT:    subl %esi, %edi
984; X64-NEXT:    movzbl %sil, %eax
985; X64-NEXT:    subl %edx, %eax
986; X64-NEXT:    cmovbl %edi, %eax
987; X64-NEXT:    shrb %al
988; X64-NEXT:    # kill: def $al killed $al killed $eax
989; X64-NEXT:    mulb %cl
990; X64-NEXT:    addb %dl, %al
991; X64-NEXT:    retq
992;
993; X86-LABEL: scalar_i8_unsigned_reg_reg:
994; X86:       # %bb.0:
995; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
996; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
997; X86-NEXT:    xorl %edx, %edx
998; X86-NEXT:    movb %ch, %ah
999; X86-NEXT:    subb %cl, %ah
1000; X86-NEXT:    sbbl %edx, %edx
1001; X86-NEXT:    orb $1, %dl
1002; X86-NEXT:    movb %cl, %al
1003; X86-NEXT:    subb %ch, %al
1004; X86-NEXT:    ja .LBB16_2
1005; X86-NEXT:  # %bb.1:
1006; X86-NEXT:    movb %ah, %al
1007; X86-NEXT:  .LBB16_2:
1008; X86-NEXT:    shrb %al
1009; X86-NEXT:    mulb %dl
1010; X86-NEXT:    addb %cl, %al
1011; X86-NEXT:    retl
1012  %t3 = icmp ugt i8 %a1, %a2
1013  %t4 = select i1 %t3, i8 -1, i8 1
1014  %t5 = select i1 %t3, i8 %a2, i8 %a1
1015  %t6 = select i1 %t3, i8 %a1, i8 %a2
1016  %t7 = sub i8 %t6, %t5
1017  %t8 = lshr i8 %t7, 1
1018  %t9 = mul i8 %t8, %t4
1019  %a10 = add i8 %t9, %a1
1020  ret i8 %a10
1021}
1022
1023; Values are loaded. Only check signed case.
1024
1025define i8 @scalar_i8_signed_mem_reg(ptr %a1_addr, i8 %a2) nounwind {
1026; X64-LABEL: scalar_i8_signed_mem_reg:
1027; X64:       # %bb.0:
1028; X64-NEXT:    movsbl (%rdi), %ecx
1029; X64-NEXT:    cmpb %sil, %cl
1030; X64-NEXT:    setg %dl
1031; X64-NEXT:    negb %dl
1032; X64-NEXT:    orb $1, %dl
1033; X64-NEXT:    movl %ecx, %edi
1034; X64-NEXT:    subl %esi, %edi
1035; X64-NEXT:    movsbl %sil, %eax
1036; X64-NEXT:    subl %ecx, %eax
1037; X64-NEXT:    cmovll %edi, %eax
1038; X64-NEXT:    shrb %al
1039; X64-NEXT:    # kill: def $al killed $al killed $eax
1040; X64-NEXT:    mulb %dl
1041; X64-NEXT:    addb %cl, %al
1042; X64-NEXT:    retq
1043;
1044; X86-LABEL: scalar_i8_signed_mem_reg:
1045; X86:       # %bb.0:
1046; X86-NEXT:    movb {{[0-9]+}}(%esp), %ah
1047; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1048; X86-NEXT:    movzbl (%ecx), %ecx
1049; X86-NEXT:    cmpb %ah, %cl
1050; X86-NEXT:    setg %dl
1051; X86-NEXT:    negb %dl
1052; X86-NEXT:    orb $1, %dl
1053; X86-NEXT:    movb %cl, %al
1054; X86-NEXT:    subb %ah, %al
1055; X86-NEXT:    jg .LBB17_2
1056; X86-NEXT:  # %bb.1:
1057; X86-NEXT:    subb %cl, %ah
1058; X86-NEXT:    movb %ah, %al
1059; X86-NEXT:  .LBB17_2:
1060; X86-NEXT:    shrb %al
1061; X86-NEXT:    mulb %dl
1062; X86-NEXT:    addb %cl, %al
1063; X86-NEXT:    retl
1064  %a1 = load i8, ptr %a1_addr
1065  %t3 = icmp sgt i8 %a1, %a2 ; signed
1066  %t4 = select i1 %t3, i8 -1, i8 1
1067  %t5 = select i1 %t3, i8 %a2, i8 %a1
1068  %t6 = select i1 %t3, i8 %a1, i8 %a2
1069  %t7 = sub i8 %t6, %t5
1070  %t8 = lshr i8 %t7, 1
1071  %t9 = mul nsw i8 %t8, %t4 ; signed
1072  %a10 = add nsw i8 %t9, %a1 ; signed
1073  ret i8 %a10
1074}
1075
1076define i8 @scalar_i8_signed_reg_mem(i8 %a1, ptr %a2_addr) nounwind {
1077; X64-LABEL: scalar_i8_signed_reg_mem:
1078; X64:       # %bb.0:
1079; X64-NEXT:    movsbl (%rsi), %eax
1080; X64-NEXT:    cmpb %al, %dil
1081; X64-NEXT:    setg %cl
1082; X64-NEXT:    negb %cl
1083; X64-NEXT:    orb $1, %cl
1084; X64-NEXT:    movsbl %dil, %edx
1085; X64-NEXT:    subl %eax, %edi
1086; X64-NEXT:    subl %edx, %eax
1087; X64-NEXT:    cmovll %edi, %eax
1088; X64-NEXT:    shrb %al
1089; X64-NEXT:    # kill: def $al killed $al killed $eax
1090; X64-NEXT:    mulb %cl
1091; X64-NEXT:    addb %dl, %al
1092; X64-NEXT:    retq
1093;
1094; X86-LABEL: scalar_i8_signed_reg_mem:
1095; X86:       # %bb.0:
1096; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1097; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1098; X86-NEXT:    movb (%eax), %ah
1099; X86-NEXT:    cmpb %ah, %cl
1100; X86-NEXT:    setg %dl
1101; X86-NEXT:    negb %dl
1102; X86-NEXT:    orb $1, %dl
1103; X86-NEXT:    movb %cl, %al
1104; X86-NEXT:    subb %ah, %al
1105; X86-NEXT:    jg .LBB18_2
1106; X86-NEXT:  # %bb.1:
1107; X86-NEXT:    subb %cl, %ah
1108; X86-NEXT:    movb %ah, %al
1109; X86-NEXT:  .LBB18_2:
1110; X86-NEXT:    shrb %al
1111; X86-NEXT:    mulb %dl
1112; X86-NEXT:    addb %cl, %al
1113; X86-NEXT:    retl
1114  %a2 = load i8, ptr %a2_addr
1115  %t3 = icmp sgt i8 %a1, %a2 ; signed
1116  %t4 = select i1 %t3, i8 -1, i8 1
1117  %t5 = select i1 %t3, i8 %a2, i8 %a1
1118  %t6 = select i1 %t3, i8 %a1, i8 %a2
1119  %t7 = sub i8 %t6, %t5
1120  %t8 = lshr i8 %t7, 1
1121  %t9 = mul nsw i8 %t8, %t4 ; signed
1122  %a10 = add nsw i8 %t9, %a1 ; signed
1123  ret i8 %a10
1124}
1125
1126define i8 @scalar_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
1127; X64-LABEL: scalar_i8_signed_mem_mem:
1128; X64:       # %bb.0:
1129; X64-NEXT:    movsbl (%rdi), %ecx
1130; X64-NEXT:    movsbl (%rsi), %eax
1131; X64-NEXT:    cmpb %al, %cl
1132; X64-NEXT:    setg %dl
1133; X64-NEXT:    negb %dl
1134; X64-NEXT:    orb $1, %dl
1135; X64-NEXT:    movl %ecx, %esi
1136; X64-NEXT:    subl %eax, %esi
1137; X64-NEXT:    subl %ecx, %eax
1138; X64-NEXT:    cmovll %esi, %eax
1139; X64-NEXT:    shrb %al
1140; X64-NEXT:    # kill: def $al killed $al killed $eax
1141; X64-NEXT:    mulb %dl
1142; X64-NEXT:    addb %cl, %al
1143; X64-NEXT:    retq
1144;
1145; X86-LABEL: scalar_i8_signed_mem_mem:
1146; X86:       # %bb.0:
1147; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1148; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1149; X86-NEXT:    movzbl (%ecx), %ecx
1150; X86-NEXT:    movb (%eax), %ah
1151; X86-NEXT:    cmpb %ah, %cl
1152; X86-NEXT:    setg %dl
1153; X86-NEXT:    negb %dl
1154; X86-NEXT:    orb $1, %dl
1155; X86-NEXT:    movb %cl, %al
1156; X86-NEXT:    subb %ah, %al
1157; X86-NEXT:    jg .LBB19_2
1158; X86-NEXT:  # %bb.1:
1159; X86-NEXT:    subb %cl, %ah
1160; X86-NEXT:    movb %ah, %al
1161; X86-NEXT:  .LBB19_2:
1162; X86-NEXT:    shrb %al
1163; X86-NEXT:    mulb %dl
1164; X86-NEXT:    addb %cl, %al
1165; X86-NEXT:    retl
1166  %a1 = load i8, ptr %a1_addr
1167  %a2 = load i8, ptr %a2_addr
1168  %t3 = icmp sgt i8 %a1, %a2 ; signed
1169  %t4 = select i1 %t3, i8 -1, i8 1
1170  %t5 = select i1 %t3, i8 %a2, i8 %a1
1171  %t6 = select i1 %t3, i8 %a1, i8 %a2
1172  %t7 = sub i8 %t6, %t5
1173  %t8 = lshr i8 %t7, 1
1174  %t9 = mul nsw i8 %t8, %t4 ; signed
1175  %a10 = add nsw i8 %t9, %a1 ; signed
1176  ret i8 %a10
1177}
1178