xref: /llvm-project/llvm/test/CodeGen/X86/bmi.ll (revision 2d92f7de800a1b1b3dca3dab1e11da712cd55f2b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+bmi | FileCheck %s --check-prefixes=X86,X86-SLOW-BEXTR
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+bmi,+bmi2 | FileCheck %s --check-prefixes=X86,X86-SLOW-BEXTR
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=X64,X64-SLOW-BEXTR
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=X64,X64-SLOW-BEXTR
6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+bmi,+fast-bextr | FileCheck %s --check-prefixes=X86,X86-FAST-BEXTR
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=X64,X64-FAST-BEXTR
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
9
10define i32 @andn32(i32 %x, i32 %y)   {
11; X86-LABEL: andn32:
12; X86:       # %bb.0:
13; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
14; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
15; X86-NEXT:    retl
16;
17; X64-LABEL: andn32:
18; X64:       # %bb.0:
19; X64-NEXT:    andnl %esi, %edi, %eax
20; X64-NEXT:    retq
21;
22; EGPR-LABEL: andn32:
23; EGPR:       # %bb.0:
24; EGPR-NEXT:    andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6]
25; EGPR-NEXT:    retq # encoding: [0xc3]
26  %tmp1 = xor i32 %x, -1
27  %tmp2 = and i32 %y, %tmp1
28  ret i32 %tmp2
29}
30
31define i32 @andn32_load(i32 %x, ptr %y)   {
32; X86-LABEL: andn32_load:
33; X86:       # %bb.0:
34; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
35; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
36; X86-NEXT:    andnl (%eax), %ecx, %eax
37; X86-NEXT:    retl
38;
39; X64-LABEL: andn32_load:
40; X64:       # %bb.0:
41; X64-NEXT:    andnl (%rsi), %edi, %eax
42; X64-NEXT:    retq
43;
44; EGPR-LABEL: andn32_load:
45; EGPR:       # %bb.0:
46; EGPR-NEXT:    andnl (%rsi), %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0x06]
47; EGPR-NEXT:    retq # encoding: [0xc3]
48  %y1 = load i32, ptr %y
49  %tmp1 = xor i32 %x, -1
50  %tmp2 = and i32 %y1, %tmp1
51  ret i32 %tmp2
52}
53
54define i64 @andn64(i64 %x, i64 %y)   {
55; X86-LABEL: andn64:
56; X86:       # %bb.0:
57; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
58; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
59; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
60; X86-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
61; X86-NEXT:    retl
62;
63; X64-LABEL: andn64:
64; X64:       # %bb.0:
65; X64-NEXT:    andnq %rsi, %rdi, %rax
66; X64-NEXT:    retq
67;
68; EGPR-LABEL: andn64:
69; EGPR:       # %bb.0:
70; EGPR-NEXT:    andnq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc0,0xf2,0xc6]
71; EGPR-NEXT:    retq # encoding: [0xc3]
72  %tmp1 = xor i64 %x, -1
73  %tmp2 = and i64 %tmp1, %y
74  ret i64 %tmp2
75}
76
77; Don't choose a 'test' if an 'andn' can be used.
78define i1 @andn_cmp(i32 %x, i32 %y) {
79; X86-LABEL: andn_cmp:
80; X86:       # %bb.0:
81; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
82; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
83; X86-NEXT:    sete %al
84; X86-NEXT:    retl
85;
86; X64-LABEL: andn_cmp:
87; X64:       # %bb.0:
88; X64-NEXT:    andnl %esi, %edi, %eax
89; X64-NEXT:    sete %al
90; X64-NEXT:    retq
91;
92; EGPR-LABEL: andn_cmp:
93; EGPR:       # %bb.0:
94; EGPR-NEXT:    andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6]
95; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
96; EGPR-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
97; EGPR-NEXT:    retq # encoding: [0xc3]
98  %notx = xor i32 %x, -1
99  %and = and i32 %notx, %y
100  %cmp = icmp eq i32 %and, 0
101  ret i1 %cmp
102}
103
104; Recognize a disguised andn in the following 4 tests.
105define i1 @and_cmp1(i32 %x, i32 %y) {
106; X86-LABEL: and_cmp1:
107; X86:       # %bb.0:
108; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
109; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
110; X86-NEXT:    sete %al
111; X86-NEXT:    retl
112;
113; X64-LABEL: and_cmp1:
114; X64:       # %bb.0:
115; X64-NEXT:    andnl %esi, %edi, %eax
116; X64-NEXT:    sete %al
117; X64-NEXT:    retq
118;
119; EGPR-LABEL: and_cmp1:
120; EGPR:       # %bb.0:
121; EGPR-NEXT:    andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6]
122; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
123; EGPR-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
124; EGPR-NEXT:    retq # encoding: [0xc3]
125  %and = and i32 %x, %y
126  %cmp = icmp eq i32 %and, %y
127  ret i1 %cmp
128}
129
130define i1 @and_cmp2(i32 %x, i32 %y) {
131; X86-LABEL: and_cmp2:
132; X86:       # %bb.0:
133; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
134; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
135; X86-NEXT:    setne %al
136; X86-NEXT:    retl
137;
138; X64-LABEL: and_cmp2:
139; X64:       # %bb.0:
140; X64-NEXT:    andnl %esi, %edi, %eax
141; X64-NEXT:    setne %al
142; X64-NEXT:    retq
143;
144; EGPR-LABEL: and_cmp2:
145; EGPR:       # %bb.0:
146; EGPR-NEXT:    andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6]
147; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
148; EGPR-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
149; EGPR-NEXT:    retq # encoding: [0xc3]
150  %and = and i32 %y, %x
151  %cmp = icmp ne i32 %and, %y
152  ret i1 %cmp
153}
154
155define i1 @and_cmp3(i32 %x, i32 %y) {
156; X86-LABEL: and_cmp3:
157; X86:       # %bb.0:
158; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
159; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
160; X86-NEXT:    sete %al
161; X86-NEXT:    retl
162;
163; X64-LABEL: and_cmp3:
164; X64:       # %bb.0:
165; X64-NEXT:    andnl %esi, %edi, %eax
166; X64-NEXT:    sete %al
167; X64-NEXT:    retq
168;
169; EGPR-LABEL: and_cmp3:
170; EGPR:       # %bb.0:
171; EGPR-NEXT:    andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6]
172; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
173; EGPR-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
174; EGPR-NEXT:    retq # encoding: [0xc3]
175  %and = and i32 %x, %y
176  %cmp = icmp eq i32 %y, %and
177  ret i1 %cmp
178}
179
180define i1 @and_cmp4(i32 %x, i32 %y) {
181; X86-LABEL: and_cmp4:
182; X86:       # %bb.0:
183; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
184; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
185; X86-NEXT:    setne %al
186; X86-NEXT:    retl
187;
188; X64-LABEL: and_cmp4:
189; X64:       # %bb.0:
190; X64-NEXT:    andnl %esi, %edi, %eax
191; X64-NEXT:    setne %al
192; X64-NEXT:    retq
193;
194; EGPR-LABEL: and_cmp4:
195; EGPR:       # %bb.0:
196; EGPR-NEXT:    andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6]
197; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
198; EGPR-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
199; EGPR-NEXT:    retq # encoding: [0xc3]
200  %and = and i32 %y, %x
201  %cmp = icmp ne i32 %y, %and
202  ret i1 %cmp
203}
204
205; A mask and compare against constant is ok for an 'andn' too
206; even though the BMI instruction doesn't have an immediate form.
207define i1 @and_cmp_const(i32 %x) {
208; X86-LABEL: and_cmp_const:
209; X86:       # %bb.0:
210; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
211; X86-NEXT:    notl %eax
212; X86-NEXT:    testb $43, %al
213; X86-NEXT:    sete %al
214; X86-NEXT:    retl
215;
216; X64-LABEL: and_cmp_const:
217; X64:       # %bb.0:
218; X64-NEXT:    notl %edi
219; X64-NEXT:    testb $43, %dil
220; X64-NEXT:    sete %al
221; X64-NEXT:    retq
222;
223; EGPR-LABEL: and_cmp_const:
224; EGPR:       # %bb.0:
225; EGPR-NEXT:    notl %edi # encoding: [0xf7,0xd7]
226; EGPR-NEXT:    testb $43, %dil # encoding: [0x40,0xf6,0xc7,0x2b]
227; EGPR-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
228; EGPR-NEXT:    retq # encoding: [0xc3]
229  %and = and i32 %x, 43
230  %cmp = icmp eq i32 %and, 43
231  ret i1 %cmp
232}
233
234; But don't use 'andn' if the mask is a power-of-two.
235define i1 @and_cmp_const_power_of_two(i32 %x, i32 %y) {
236; X86-LABEL: and_cmp_const_power_of_two:
237; X86:       # %bb.0:
238; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
239; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
240; X86-NEXT:    btl %ecx, %eax
241; X86-NEXT:    setae %al
242; X86-NEXT:    retl
243;
244; X64-LABEL: and_cmp_const_power_of_two:
245; X64:       # %bb.0:
246; X64-NEXT:    btl %esi, %edi
247; X64-NEXT:    setae %al
248; X64-NEXT:    retq
249;
250; EGPR-LABEL: and_cmp_const_power_of_two:
251; EGPR:       # %bb.0:
252; EGPR-NEXT:    btl %esi, %edi # encoding: [0x0f,0xa3,0xf7]
253; EGPR-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
254; EGPR-NEXT:    retq # encoding: [0xc3]
255  %shl = shl i32 1, %y
256  %and = and i32 %x, %shl
257  %cmp = icmp ne i32 %and, %shl
258  ret i1 %cmp
259}
260
261; Don't transform to 'andn' if there's another use of the 'and'.
262define i32 @and_cmp_not_one_use(i32 %x) {
263; X86-LABEL: and_cmp_not_one_use:
264; X86:       # %bb.0:
265; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
266; X86-NEXT:    andl $37, %ecx
267; X86-NEXT:    xorl %eax, %eax
268; X86-NEXT:    cmpl $37, %ecx
269; X86-NEXT:    sete %al
270; X86-NEXT:    addl %ecx, %eax
271; X86-NEXT:    retl
272;
273; X64-LABEL: and_cmp_not_one_use:
274; X64:       # %bb.0:
275; X64-NEXT:    andl $37, %edi
276; X64-NEXT:    xorl %eax, %eax
277; X64-NEXT:    cmpl $37, %edi
278; X64-NEXT:    sete %al
279; X64-NEXT:    addl %edi, %eax
280; X64-NEXT:    retq
281;
282; EGPR-LABEL: and_cmp_not_one_use:
283; EGPR:       # %bb.0:
284; EGPR-NEXT:    andl $37, %edi # encoding: [0x83,0xe7,0x25]
285; EGPR-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
286; EGPR-NEXT:    cmpl $37, %edi # encoding: [0x83,0xff,0x25]
287; EGPR-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
288; EGPR-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
289; EGPR-NEXT:    retq # encoding: [0xc3]
290  %and = and i32 %x, 37
291  %cmp = icmp eq i32 %and, 37
292  %ext = zext i1 %cmp to i32
293  %add = add i32 %and, %ext
294  ret i32 %add
295}
296
297; Verify that we're not transforming invalid comparison predicates.
298define i1 @not_an_andn1(i32 %x, i32 %y) {
299; X86-LABEL: not_an_andn1:
300; X86:       # %bb.0:
301; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
302; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
303; X86-NEXT:    andl %eax, %ecx
304; X86-NEXT:    cmpl %ecx, %eax
305; X86-NEXT:    setg %al
306; X86-NEXT:    retl
307;
308; X64-LABEL: not_an_andn1:
309; X64:       # %bb.0:
310; X64-NEXT:    andl %esi, %edi
311; X64-NEXT:    cmpl %edi, %esi
312; X64-NEXT:    setg %al
313; X64-NEXT:    retq
314;
315; EGPR-LABEL: not_an_andn1:
316; EGPR:       # %bb.0:
317; EGPR-NEXT:    andl %esi, %edi # encoding: [0x21,0xf7]
318; EGPR-NEXT:    cmpl %edi, %esi # encoding: [0x39,0xfe]
319; EGPR-NEXT:    setg %al # encoding: [0x0f,0x9f,0xc0]
320; EGPR-NEXT:    retq # encoding: [0xc3]
321  %and = and i32 %x, %y
322  %cmp = icmp sgt i32 %y, %and
323  ret i1 %cmp
324}
325
326define i1 @not_an_andn2(i32 %x, i32 %y) {
327; X86-LABEL: not_an_andn2:
328; X86:       # %bb.0:
329; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
330; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
331; X86-NEXT:    andl %eax, %ecx
332; X86-NEXT:    cmpl %ecx, %eax
333; X86-NEXT:    setbe %al
334; X86-NEXT:    retl
335;
336; X64-LABEL: not_an_andn2:
337; X64:       # %bb.0:
338; X64-NEXT:    andl %esi, %edi
339; X64-NEXT:    cmpl %edi, %esi
340; X64-NEXT:    setbe %al
341; X64-NEXT:    retq
342;
343; EGPR-LABEL: not_an_andn2:
344; EGPR:       # %bb.0:
345; EGPR-NEXT:    andl %esi, %edi # encoding: [0x21,0xf7]
346; EGPR-NEXT:    cmpl %edi, %esi # encoding: [0x39,0xfe]
347; EGPR-NEXT:    setbe %al # encoding: [0x0f,0x96,0xc0]
348; EGPR-NEXT:    retq # encoding: [0xc3]
349  %and = and i32 %y, %x
350  %cmp = icmp ule i32 %y, %and
351  ret i1 %cmp
352}
353
354; Don't choose a 'test' if an 'andn' can be used.
355define i1 @andn_cmp_swap_ops(i64 %x, i64 %y) {
356; X86-LABEL: andn_cmp_swap_ops:
357; X86:       # %bb.0:
358; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
359; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
360; X86-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %ecx
361; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
362; X86-NEXT:    orl %ecx, %eax
363; X86-NEXT:    sete %al
364; X86-NEXT:    retl
365;
366; X64-LABEL: andn_cmp_swap_ops:
367; X64:       # %bb.0:
368; X64-NEXT:    andnq %rsi, %rdi, %rax
369; X64-NEXT:    sete %al
370; X64-NEXT:    retq
371;
372; EGPR-LABEL: andn_cmp_swap_ops:
373; EGPR:       # %bb.0:
374; EGPR-NEXT:    andnq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc0,0xf2,0xc6]
375; EGPR-NEXT:    testq %rax, %rax # encoding: [0x48,0x85,0xc0]
376; EGPR-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
377; EGPR-NEXT:    retq # encoding: [0xc3]
378  %notx = xor i64 %x, -1
379  %and = and i64 %y, %notx
380  %cmp = icmp eq i64 %and, 0
381  ret i1 %cmp
382}
383
384; Use a 'test' (not an 'and') because 'andn' only works for i32/i64.
385define i1 @andn_cmp_i8(i8 %x, i8 %y) {
386; X86-LABEL: andn_cmp_i8:
387; X86:       # %bb.0:
388; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
389; X86-NEXT:    notb %al
390; X86-NEXT:    testb %al, {{[0-9]+}}(%esp)
391; X86-NEXT:    sete %al
392; X86-NEXT:    retl
393;
394; X64-LABEL: andn_cmp_i8:
395; X64:       # %bb.0:
396; X64-NEXT:    notb %sil
397; X64-NEXT:    testb %sil, %dil
398; X64-NEXT:    sete %al
399; X64-NEXT:    retq
400;
401; EGPR-LABEL: andn_cmp_i8:
402; EGPR:       # %bb.0:
403; EGPR-NEXT:    notb %sil # encoding: [0x40,0xf6,0xd6]
404; EGPR-NEXT:    testb %sil, %dil # encoding: [0x40,0x84,0xf7]
405; EGPR-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
406; EGPR-NEXT:    retq # encoding: [0xc3]
407  %noty = xor i8 %y, -1
408  %and = and i8 %x, %noty
409  %cmp = icmp eq i8 %and, 0
410  ret i1 %cmp
411}
412
413; PR48768 - 'andn' clears the overflow flag, so we don't need a separate 'test'.
414define i1 @andn_cmp_i32_overflow(i32 %x, i32 %y) {
415; X86-LABEL: andn_cmp_i32_overflow:
416; X86:       # %bb.0:
417; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
418; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
419; X86-NEXT:    setle %al
420; X86-NEXT:    retl
421;
422; X64-LABEL: andn_cmp_i32_overflow:
423; X64:       # %bb.0:
424; X64-NEXT:    andnl %edi, %esi, %eax
425; X64-NEXT:    setle %al
426; X64-NEXT:    retq
427;
428; EGPR-LABEL: andn_cmp_i32_overflow:
429; EGPR:       # %bb.0:
430; EGPR-NEXT:    andnl %edi, %esi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf2,0xc7]
431; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
432; EGPR-NEXT:    setle %al # encoding: [0x0f,0x9e,0xc0]
433; EGPR-NEXT:    retq # encoding: [0xc3]
434  %noty = xor i32 %y, -1
435  %and = and i32 %x, %noty
436  %cmp = icmp slt i32 %and, 1
437  ret i1 %cmp
438}
439
440declare i32 @llvm.x86.bmi.bextr.32(i32, i32)
441
442define i32 @bextr32(i32 %x, i32 %y)   {
443; X86-LABEL: bextr32:
444; X86:       # %bb.0:
445; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
446; X86-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
447; X86-NEXT:    retl
448;
449; X64-LABEL: bextr32:
450; X64:       # %bb.0:
451; X64-NEXT:    bextrl %esi, %edi, %eax
452; X64-NEXT:    retq
453;
454; EGPR-LABEL: bextr32:
455; EGPR:       # %bb.0:
456; EGPR-NEXT:    bextrl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf7,0xc7]
457; EGPR-NEXT:    retq # encoding: [0xc3]
458  %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %y)
459  ret i32 %tmp
460}
461
462define i32 @bextr32_load(ptr %x, i32 %y)   {
463; X86-LABEL: bextr32_load:
464; X86:       # %bb.0:
465; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
466; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
467; X86-NEXT:    bextrl %eax, (%ecx), %eax
468; X86-NEXT:    retl
469;
470; X64-LABEL: bextr32_load:
471; X64:       # %bb.0:
472; X64-NEXT:    bextrl %esi, (%rdi), %eax
473; X64-NEXT:    retq
474;
475; EGPR-LABEL: bextr32_load:
476; EGPR:       # %bb.0:
477; EGPR-NEXT:    bextrl %esi, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf7,0x07]
478; EGPR-NEXT:    retq # encoding: [0xc3]
479  %x1 = load i32, ptr %x
480  %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x1, i32 %y)
481  ret i32 %tmp
482}
483
484define i32 @bextr32b(i32 %x)  uwtable  ssp {
485; X86-SLOW-BEXTR-LABEL: bextr32b:
486; X86-SLOW-BEXTR:       # %bb.0:
487; X86-SLOW-BEXTR-NEXT:    movl {{[0-9]+}}(%esp), %eax
488; X86-SLOW-BEXTR-NEXT:    shrl $4, %eax
489; X86-SLOW-BEXTR-NEXT:    andl $4095, %eax # imm = 0xFFF
490; X86-SLOW-BEXTR-NEXT:    retl
491;
492; X64-SLOW-BEXTR-LABEL: bextr32b:
493; X64-SLOW-BEXTR:       # %bb.0:
494; X64-SLOW-BEXTR-NEXT:    movl %edi, %eax
495; X64-SLOW-BEXTR-NEXT:    shrl $4, %eax
496; X64-SLOW-BEXTR-NEXT:    andl $4095, %eax # imm = 0xFFF
497; X64-SLOW-BEXTR-NEXT:    retq
498;
499; X86-FAST-BEXTR-LABEL: bextr32b:
500; X86-FAST-BEXTR:       # %bb.0:
501; X86-FAST-BEXTR-NEXT:    movl $3076, %eax # imm = 0xC04
502; X86-FAST-BEXTR-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
503; X86-FAST-BEXTR-NEXT:    retl
504;
505; X64-FAST-BEXTR-LABEL: bextr32b:
506; X64-FAST-BEXTR:       # %bb.0:
507; X64-FAST-BEXTR-NEXT:    movl $3076, %eax # imm = 0xC04
508; X64-FAST-BEXTR-NEXT:    bextrl %eax, %edi, %eax
509; X64-FAST-BEXTR-NEXT:    retq
510;
511; EGPR-LABEL: bextr32b:
512; EGPR:       # %bb.0:
513; EGPR-NEXT:    movl $3076, %eax # encoding: [0xb8,0x04,0x0c,0x00,0x00]
514; EGPR-NEXT:    # imm = 0xC04
515; EGPR-NEXT:    bextrl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf7,0xc7]
516; EGPR-NEXT:    retq # encoding: [0xc3]
517  %1 = lshr i32 %x, 4
518  %2 = and i32 %1, 4095
519  ret i32 %2
520}
521
522; Make sure we still use AH subreg trick to extract 15:8
523define i32 @bextr32_subreg(i32 %x)  uwtable  ssp {
524; X86-LABEL: bextr32_subreg:
525; X86:       # %bb.0:
526; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
527; X86-NEXT:    retl
528;
529; X64-LABEL: bextr32_subreg:
530; X64:       # %bb.0:
531; X64-NEXT:    movl %edi, %eax
532; X64-NEXT:    movzbl %ah, %eax
533; X64-NEXT:    retq
534;
535; EGPR-LABEL: bextr32_subreg:
536; EGPR:       # %bb.0:
537; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
538; EGPR-NEXT:    movzbl %ah, %eax # encoding: [0x0f,0xb6,0xc4]
539; EGPR-NEXT:    retq # encoding: [0xc3]
540  %1 = lshr i32 %x, 8
541  %2 = and i32 %1, 255
542  ret i32 %2
543}
544
545define i32 @bextr32b_load(ptr %x)  uwtable  ssp {
546; X86-SLOW-BEXTR-LABEL: bextr32b_load:
547; X86-SLOW-BEXTR:       # %bb.0:
548; X86-SLOW-BEXTR-NEXT:    movl {{[0-9]+}}(%esp), %eax
549; X86-SLOW-BEXTR-NEXT:    movl (%eax), %eax
550; X86-SLOW-BEXTR-NEXT:    shrl $4, %eax
551; X86-SLOW-BEXTR-NEXT:    andl $4095, %eax # imm = 0xFFF
552; X86-SLOW-BEXTR-NEXT:    retl
553;
554; X64-SLOW-BEXTR-LABEL: bextr32b_load:
555; X64-SLOW-BEXTR:       # %bb.0:
556; X64-SLOW-BEXTR-NEXT:    movl (%rdi), %eax
557; X64-SLOW-BEXTR-NEXT:    shrl $4, %eax
558; X64-SLOW-BEXTR-NEXT:    andl $4095, %eax # imm = 0xFFF
559; X64-SLOW-BEXTR-NEXT:    retq
560;
561; X86-FAST-BEXTR-LABEL: bextr32b_load:
562; X86-FAST-BEXTR:       # %bb.0:
563; X86-FAST-BEXTR-NEXT:    movl {{[0-9]+}}(%esp), %eax
564; X86-FAST-BEXTR-NEXT:    movl $3076, %ecx # imm = 0xC04
565; X86-FAST-BEXTR-NEXT:    bextrl %ecx, (%eax), %eax
566; X86-FAST-BEXTR-NEXT:    retl
567;
568; X64-FAST-BEXTR-LABEL: bextr32b_load:
569; X64-FAST-BEXTR:       # %bb.0:
570; X64-FAST-BEXTR-NEXT:    movl $3076, %eax # imm = 0xC04
571; X64-FAST-BEXTR-NEXT:    bextrl %eax, (%rdi), %eax
572; X64-FAST-BEXTR-NEXT:    retq
573;
574; EGPR-LABEL: bextr32b_load:
575; EGPR:       # %bb.0:
576; EGPR-NEXT:    movl $3076, %eax # encoding: [0xb8,0x04,0x0c,0x00,0x00]
577; EGPR-NEXT:    # imm = 0xC04
578; EGPR-NEXT:    bextrl %eax, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf7,0x07]
579; EGPR-NEXT:    retq # encoding: [0xc3]
580  %1 = load i32, ptr %x
581  %2 = lshr i32 %1, 4
582  %3 = and i32 %2, 4095
583  ret i32 %3
584}
585
586; PR34042
587define i32 @bextr32c(i32 %x, i16 zeroext %y) {
588; X86-LABEL: bextr32c:
589; X86:       # %bb.0:
590; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
591; X86-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
592; X86-NEXT:    retl
593;
594; X64-LABEL: bextr32c:
595; X64:       # %bb.0:
596; X64-NEXT:    bextrl %esi, %edi, %eax
597; X64-NEXT:    retq
598;
599; EGPR-LABEL: bextr32c:
600; EGPR:       # %bb.0:
601; EGPR-NEXT:    bextrl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf7,0xc7]
602; EGPR-NEXT:    retq # encoding: [0xc3]
603  %tmp0 = sext i16 %y to i32
604  %tmp1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %tmp0)
605  ret i32 %tmp1
606}
607
608define i32 @non_bextr32(i32 %x) {
609; X86-LABEL: non_bextr32:
610; X86:       # %bb.0: # %entry
611; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
612; X86-NEXT:    shrl $2, %eax
613; X86-NEXT:    andl $111, %eax
614; X86-NEXT:    retl
615;
616; X64-LABEL: non_bextr32:
617; X64:       # %bb.0: # %entry
618; X64-NEXT:    movl %edi, %eax
619; X64-NEXT:    shrl $2, %eax
620; X64-NEXT:    andl $111, %eax
621; X64-NEXT:    retq
622;
623; EGPR-LABEL: non_bextr32:
624; EGPR:       # %bb.0: # %entry
625; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
626; EGPR-NEXT:    shrl $2, %eax # encoding: [0xc1,0xe8,0x02]
627; EGPR-NEXT:    andl $111, %eax # encoding: [0x83,0xe0,0x6f]
628; EGPR-NEXT:    retq # encoding: [0xc3]
629entry:
630  %shr = lshr i32 %x, 2
631  %and = and i32 %shr, 111
632  ret i32 %and
633}
634
635define i32 @blsi32(i32 %x)   {
636; X86-LABEL: blsi32:
637; X86:       # %bb.0:
638; X86-NEXT:    blsil {{[0-9]+}}(%esp), %eax
639; X86-NEXT:    retl
640;
641; X64-LABEL: blsi32:
642; X64:       # %bb.0:
643; X64-NEXT:    blsil %edi, %eax
644; X64-NEXT:    retq
645;
646; EGPR-LABEL: blsi32:
647; EGPR:       # %bb.0:
648; EGPR-NEXT:    blsil %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xdf]
649; EGPR-NEXT:    retq # encoding: [0xc3]
650  %tmp = sub i32 0, %x
651  %tmp2 = and i32 %x, %tmp
652  ret i32 %tmp2
653}
654
655define i32 @blsi32_load(ptr %x)   {
656; X86-LABEL: blsi32_load:
657; X86:       # %bb.0:
658; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
659; X86-NEXT:    blsil (%eax), %eax
660; X86-NEXT:    retl
661;
662; X64-LABEL: blsi32_load:
663; X64:       # %bb.0:
664; X64-NEXT:    blsil (%rdi), %eax
665; X64-NEXT:    retq
666;
667; EGPR-LABEL: blsi32_load:
668; EGPR:       # %bb.0:
669; EGPR-NEXT:    blsil (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0x1f]
670; EGPR-NEXT:    retq # encoding: [0xc3]
671  %x1 = load i32, ptr %x
672  %tmp = sub i32 0, %x1
673  %tmp2 = and i32 %x1, %tmp
674  ret i32 %tmp2
675}
676
677define i32 @blsi32_z(i32 %a, i32 %b) nounwind {
678; X86-LABEL: blsi32_z:
679; X86:       # %bb.0:
680; X86-NEXT:    blsil {{[0-9]+}}(%esp), %eax
681; X86-NEXT:    jne .LBB25_2
682; X86-NEXT:  # %bb.1:
683; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
684; X86-NEXT:  .LBB25_2:
685; X86-NEXT:    retl
686;
687; X64-LABEL: blsi32_z:
688; X64:       # %bb.0:
689; X64-NEXT:    blsil %edi, %eax
690; X64-NEXT:    cmovel %esi, %eax
691; X64-NEXT:    retq
692;
693; EGPR-LABEL: blsi32_z:
694; EGPR:       # %bb.0:
695; EGPR-NEXT:    blsil %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xdf]
696; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
697; EGPR-NEXT:    cmovel %esi, %eax # encoding: [0x0f,0x44,0xc6]
698; EGPR-NEXT:    retq # encoding: [0xc3]
699  %t0 = sub i32 0, %a
700  %t1 = and i32 %t0, %a
701  %t2 = icmp eq i32 %t1, 0
702  %t3 = select i1 %t2, i32 %b, i32 %t1
703  ret i32 %t3
704}
705
706define i32 @blsi32_z2(i32 %a, i32 %b, i32 %c) nounwind {
707; X86-LABEL: blsi32_z2:
708; X86:       # %bb.0:
709; X86-NEXT:    blsil {{[0-9]+}}(%esp), %eax
710; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
711; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
712; X86-NEXT:    cmovel %eax, %ecx
713; X86-NEXT:    movl (%ecx), %eax
714; X86-NEXT:    retl
715;
716; X64-LABEL: blsi32_z2:
717; X64:       # %bb.0:
718; X64-NEXT:    movl %esi, %eax
719; X64-NEXT:    blsil %edi, %ecx
720; X64-NEXT:    cmovnel %edx, %eax
721; X64-NEXT:    retq
722;
723; EGPR-LABEL: blsi32_z2:
724; EGPR:       # %bb.0:
725; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
726; EGPR-NEXT:    blsil %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xdf]
727; EGPR-NEXT:    testl %ecx, %ecx # encoding: [0x85,0xc9]
728; EGPR-NEXT:    cmovnel %edx, %eax # encoding: [0x0f,0x45,0xc2]
729; EGPR-NEXT:    retq # encoding: [0xc3]
730  %t0 = sub i32 0, %a
731  %t1 = and i32 %t0, %a
732  %t2 = icmp eq i32 %t1, 0
733  %t3 = select i1 %t2, i32 %b, i32 %c
734  ret i32 %t3
735}
736
737; Inspired by PR48768, but using cmovcc instead of setcc. There should be
738; no test instruction.
739define i32 @blsi32_sle(i32 %a, i32 %b, i32 %c) nounwind {
740; X86-LABEL: blsi32_sle:
741; X86:       # %bb.0:
742; X86-NEXT:    blsil {{[0-9]+}}(%esp), %eax
743; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
744; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
745; X86-NEXT:    cmovlel %eax, %ecx
746; X86-NEXT:    movl (%ecx), %eax
747; X86-NEXT:    retl
748;
749; X64-LABEL: blsi32_sle:
750; X64:       # %bb.0:
751; X64-NEXT:    movl %esi, %eax
752; X64-NEXT:    blsil %edi, %ecx
753; X64-NEXT:    cmovgl %edx, %eax
754; X64-NEXT:    retq
755;
756; EGPR-LABEL: blsi32_sle:
757; EGPR:       # %bb.0:
758; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
759; EGPR-NEXT:    blsil %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xdf]
760; EGPR-NEXT:    testl %ecx, %ecx # encoding: [0x85,0xc9]
761; EGPR-NEXT:    cmovgl %edx, %eax # encoding: [0x0f,0x4f,0xc2]
762; EGPR-NEXT:    retq # encoding: [0xc3]
763  %t0 = sub i32 0, %a
764  %t1 = and i32 %t0, %a
765  %t2 = icmp sle i32 %t1, 0
766  %t3 = select i1 %t2, i32 %b, i32 %c
767  ret i32 %t3
768}
769
770define i64 @blsi64(i64 %x)   {
771; X86-LABEL: blsi64:
772; X86:       # %bb.0:
773; X86-NEXT:    pushl %esi
774; X86-NEXT:    .cfi_def_cfa_offset 8
775; X86-NEXT:    .cfi_offset %esi, -8
776; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
777; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
778; X86-NEXT:    xorl %edx, %edx
779; X86-NEXT:    movl %ecx, %eax
780; X86-NEXT:    negl %eax
781; X86-NEXT:    sbbl %esi, %edx
782; X86-NEXT:    andl %esi, %edx
783; X86-NEXT:    andl %ecx, %eax
784; X86-NEXT:    popl %esi
785; X86-NEXT:    .cfi_def_cfa_offset 4
786; X86-NEXT:    retl
787;
788; X64-LABEL: blsi64:
789; X64:       # %bb.0:
790; X64-NEXT:    blsiq %rdi, %rax
791; X64-NEXT:    retq
792;
793; EGPR-LABEL: blsi64:
794; EGPR:       # %bb.0:
795; EGPR-NEXT:    blsiq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xdf]
796; EGPR-NEXT:    retq # encoding: [0xc3]
797  %tmp = sub i64 0, %x
798  %tmp2 = and i64 %tmp, %x
799  ret i64 %tmp2
800}
801
802define i64 @blsi64_z(i64 %a, i64 %b) nounwind {
803; X86-LABEL: blsi64_z:
804; X86:       # %bb.0:
805; X86-NEXT:    pushl %esi
806; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
807; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
808; X86-NEXT:    xorl %edx, %edx
809; X86-NEXT:    movl %ecx, %eax
810; X86-NEXT:    negl %eax
811; X86-NEXT:    sbbl %esi, %edx
812; X86-NEXT:    andl %esi, %edx
813; X86-NEXT:    andl %ecx, %eax
814; X86-NEXT:    movl %eax, %ecx
815; X86-NEXT:    orl %edx, %ecx
816; X86-NEXT:    jne .LBB29_2
817; X86-NEXT:  # %bb.1:
818; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
819; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
820; X86-NEXT:  .LBB29_2:
821; X86-NEXT:    popl %esi
822; X86-NEXT:    retl
823;
824; X64-LABEL: blsi64_z:
825; X64:       # %bb.0:
826; X64-NEXT:    blsiq %rdi, %rax
827; X64-NEXT:    cmoveq %rsi, %rax
828; X64-NEXT:    retq
829;
830; EGPR-LABEL: blsi64_z:
831; EGPR:       # %bb.0:
832; EGPR-NEXT:    blsiq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xdf]
833; EGPR-NEXT:    testq %rax, %rax # encoding: [0x48,0x85,0xc0]
834; EGPR-NEXT:    cmoveq %rsi, %rax # encoding: [0x48,0x0f,0x44,0xc6]
835; EGPR-NEXT:    retq # encoding: [0xc3]
836  %t0 = sub i64 0, %a
837  %t1 = and i64 %t0, %a
838  %t2 = icmp eq i64 %t1, 0
839  %t3 = select i1 %t2, i64 %b, i64 %t1
840  ret i64 %t3
841}
842
843define i64 @blsi64_z2(i64 %a, i64 %b, i64 %c) nounwind {
844; X86-LABEL: blsi64_z2:
845; X86:       # %bb.0:
846; X86-NEXT:    pushl %esi
847; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
848; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
849; X86-NEXT:    xorl %edx, %edx
850; X86-NEXT:    movl %eax, %esi
851; X86-NEXT:    negl %esi
852; X86-NEXT:    sbbl %ecx, %edx
853; X86-NEXT:    andl %ecx, %edx
854; X86-NEXT:    andl %eax, %esi
855; X86-NEXT:    orl %edx, %esi
856; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
857; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
858; X86-NEXT:    cmovel %eax, %ecx
859; X86-NEXT:    movl (%ecx), %eax
860; X86-NEXT:    movl 4(%ecx), %edx
861; X86-NEXT:    popl %esi
862; X86-NEXT:    retl
863;
864; X64-LABEL: blsi64_z2:
865; X64:       # %bb.0:
866; X64-NEXT:    movq %rsi, %rax
867; X64-NEXT:    blsiq %rdi, %rcx
868; X64-NEXT:    cmovneq %rdx, %rax
869; X64-NEXT:    retq
870;
871; EGPR-LABEL: blsi64_z2:
872; EGPR:       # %bb.0:
873; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
874; EGPR-NEXT:    blsiq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xdf]
875; EGPR-NEXT:    testq %rcx, %rcx # encoding: [0x48,0x85,0xc9]
876; EGPR-NEXT:    cmovneq %rdx, %rax # encoding: [0x48,0x0f,0x45,0xc2]
877; EGPR-NEXT:    retq # encoding: [0xc3]
878  %t0 = sub i64 0, %a
879  %t1 = and i64 %t0, %a
880  %t2 = icmp eq i64 %t1, 0
881  %t3 = select i1 %t2, i64 %b, i64 %c
882  ret i64 %t3
883}
884
885define i64 @blsi64_sle(i64 %a, i64 %b, i64 %c) nounwind {
886; X86-LABEL: blsi64_sle:
887; X86:       # %bb.0:
888; X86-NEXT:    pushl %esi
889; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
890; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
891; X86-NEXT:    xorl %edx, %edx
892; X86-NEXT:    movl %eax, %esi
893; X86-NEXT:    negl %esi
894; X86-NEXT:    sbbl %ecx, %edx
895; X86-NEXT:    andl %ecx, %edx
896; X86-NEXT:    andl %eax, %esi
897; X86-NEXT:    cmpl $1, %esi
898; X86-NEXT:    sbbl $0, %edx
899; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
900; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
901; X86-NEXT:    cmovll %eax, %ecx
902; X86-NEXT:    movl (%ecx), %eax
903; X86-NEXT:    movl 4(%ecx), %edx
904; X86-NEXT:    popl %esi
905; X86-NEXT:    retl
906;
907; X64-LABEL: blsi64_sle:
908; X64:       # %bb.0:
909; X64-NEXT:    movq %rsi, %rax
910; X64-NEXT:    blsiq %rdi, %rcx
911; X64-NEXT:    cmovgq %rdx, %rax
912; X64-NEXT:    retq
913;
914; EGPR-LABEL: blsi64_sle:
915; EGPR:       # %bb.0:
916; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
917; EGPR-NEXT:    blsiq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xdf]
918; EGPR-NEXT:    testq %rcx, %rcx # encoding: [0x48,0x85,0xc9]
919; EGPR-NEXT:    cmovgq %rdx, %rax # encoding: [0x48,0x0f,0x4f,0xc2]
920; EGPR-NEXT:    retq # encoding: [0xc3]
921  %t0 = sub i64 0, %a
922  %t1 = and i64 %t0, %a
923  %t2 = icmp sle i64 %t1, 0
924  %t3 = select i1 %t2, i64 %b, i64 %c
925  ret i64 %t3
926}
927
928define i32 @blsmsk32(i32 %x)   {
929; X86-LABEL: blsmsk32:
930; X86:       # %bb.0:
931; X86-NEXT:    blsmskl {{[0-9]+}}(%esp), %eax
932; X86-NEXT:    retl
933;
934; X64-LABEL: blsmsk32:
935; X64:       # %bb.0:
936; X64-NEXT:    blsmskl %edi, %eax
937; X64-NEXT:    retq
938;
939; EGPR-LABEL: blsmsk32:
940; EGPR:       # %bb.0:
941; EGPR-NEXT:    blsmskl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xd7]
942; EGPR-NEXT:    retq # encoding: [0xc3]
943  %tmp = sub i32 %x, 1
944  %tmp2 = xor i32 %x, %tmp
945  ret i32 %tmp2
946}
947
948define i32 @blsmsk32_load(ptr %x)   {
949; X86-LABEL: blsmsk32_load:
950; X86:       # %bb.0:
951; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
952; X86-NEXT:    blsmskl (%eax), %eax
953; X86-NEXT:    retl
954;
955; X64-LABEL: blsmsk32_load:
956; X64:       # %bb.0:
957; X64-NEXT:    blsmskl (%rdi), %eax
958; X64-NEXT:    retq
959;
960; EGPR-LABEL: blsmsk32_load:
961; EGPR:       # %bb.0:
962; EGPR-NEXT:    blsmskl (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0x17]
963; EGPR-NEXT:    retq # encoding: [0xc3]
964  %x1 = load i32, ptr %x
965  %tmp = sub i32 %x1, 1
966  %tmp2 = xor i32 %x1, %tmp
967  ret i32 %tmp2
968}
969
970define i32 @blsmsk32_z(i32 %a, i32 %b) nounwind {
971; X86-LABEL: blsmsk32_z:
972; X86:       # %bb.0:
973; X86-NEXT:    blsmskl {{[0-9]+}}(%esp), %eax
974; X86-NEXT:    jne .LBB34_2
975; X86-NEXT:  # %bb.1:
976; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
977; X86-NEXT:  .LBB34_2:
978; X86-NEXT:    retl
979;
980; X64-LABEL: blsmsk32_z:
981; X64:       # %bb.0:
982; X64-NEXT:    blsmskl %edi, %eax
983; X64-NEXT:    cmovel %esi, %eax
984; X64-NEXT:    retq
985;
986; EGPR-LABEL: blsmsk32_z:
987; EGPR:       # %bb.0:
988; EGPR-NEXT:    blsmskl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xd7]
989; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
990; EGPR-NEXT:    cmovel %esi, %eax # encoding: [0x0f,0x44,0xc6]
991; EGPR-NEXT:    retq # encoding: [0xc3]
992  %t0 = sub i32 %a, 1
993  %t1 = xor i32 %t0, %a
994  %t2 = icmp eq i32 %t1, 0
995  %t3 = select i1 %t2, i32 %b, i32 %t1
996  ret i32 %t3
997}
998
999define i32 @blsmsk32_z2(i32 %a, i32 %b, i32 %c) nounwind {
1000; X86-LABEL: blsmsk32_z2:
1001; X86:       # %bb.0:
1002; X86-NEXT:    blsmskl {{[0-9]+}}(%esp), %eax
1003; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
1004; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1005; X86-NEXT:    cmovel %eax, %ecx
1006; X86-NEXT:    movl (%ecx), %eax
1007; X86-NEXT:    retl
1008;
1009; X64-LABEL: blsmsk32_z2:
1010; X64:       # %bb.0:
1011; X64-NEXT:    movl %esi, %eax
1012; X64-NEXT:    blsmskl %edi, %ecx
1013; X64-NEXT:    cmovnel %edx, %eax
1014; X64-NEXT:    retq
1015;
1016; EGPR-LABEL: blsmsk32_z2:
1017; EGPR:       # %bb.0:
1018; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
1019; EGPR-NEXT:    blsmskl %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xd7]
1020; EGPR-NEXT:    cmovnel %edx, %eax # encoding: [0x0f,0x45,0xc2]
1021; EGPR-NEXT:    retq # encoding: [0xc3]
1022  %t0 = sub i32 %a, 1
1023  %t1 = xor i32 %t0, %a
1024  %t2 = icmp eq i32 %t1, 0
1025  %t3 = select i1 %t2, i32 %b, i32 %c
1026  ret i32 %t3
1027}
1028
1029define i32 @blsmsk32_sle(i32 %a, i32 %b, i32 %c) nounwind {
1030; X86-LABEL: blsmsk32_sle:
1031; X86:       # %bb.0:
1032; X86-NEXT:    blsmskl {{[0-9]+}}(%esp), %eax
1033; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
1034; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1035; X86-NEXT:    cmovlel %eax, %ecx
1036; X86-NEXT:    movl (%ecx), %eax
1037; X86-NEXT:    retl
1038;
1039; X64-LABEL: blsmsk32_sle:
1040; X64:       # %bb.0:
1041; X64-NEXT:    movl %esi, %eax
1042; X64-NEXT:    blsmskl %edi, %ecx
1043; X64-NEXT:    cmovgl %edx, %eax
1044; X64-NEXT:    retq
1045;
1046; EGPR-LABEL: blsmsk32_sle:
1047; EGPR:       # %bb.0:
1048; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
1049; EGPR-NEXT:    blsmskl %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xd7]
1050; EGPR-NEXT:    testl %ecx, %ecx # encoding: [0x85,0xc9]
1051; EGPR-NEXT:    cmovgl %edx, %eax # encoding: [0x0f,0x4f,0xc2]
1052; EGPR-NEXT:    retq # encoding: [0xc3]
1053  %t0 = sub i32 %a, 1
1054  %t1 = xor i32 %t0, %a
1055  %t2 = icmp sle i32 %t1, 0
1056  %t3 = select i1 %t2, i32 %b, i32 %c
1057  ret i32 %t3
1058}
1059
1060define i64 @blsmsk64(i64 %x)   {
1061; X86-LABEL: blsmsk64:
1062; X86:       # %bb.0:
1063; X86-NEXT:    pushl %esi
1064; X86-NEXT:    .cfi_def_cfa_offset 8
1065; X86-NEXT:    .cfi_offset %esi, -8
1066; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1067; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1068; X86-NEXT:    movl %ecx, %eax
1069; X86-NEXT:    addl $-1, %eax
1070; X86-NEXT:    movl %esi, %edx
1071; X86-NEXT:    adcl $-1, %edx
1072; X86-NEXT:    xorl %ecx, %eax
1073; X86-NEXT:    xorl %esi, %edx
1074; X86-NEXT:    popl %esi
1075; X86-NEXT:    .cfi_def_cfa_offset 4
1076; X86-NEXT:    retl
1077;
1078; X64-LABEL: blsmsk64:
1079; X64:       # %bb.0:
1080; X64-NEXT:    blsmskq %rdi, %rax
1081; X64-NEXT:    retq
1082;
1083; EGPR-LABEL: blsmsk64:
1084; EGPR:       # %bb.0:
1085; EGPR-NEXT:    blsmskq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xd7]
1086; EGPR-NEXT:    retq # encoding: [0xc3]
1087  %tmp = sub i64 %x, 1
1088  %tmp2 = xor i64 %tmp, %x
1089  ret i64 %tmp2
1090}
1091
1092define i64 @blsmsk64_z(i64 %a, i64 %b) nounwind {
1093; X86-LABEL: blsmsk64_z:
1094; X86:       # %bb.0:
1095; X86-NEXT:    pushl %esi
1096; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1097; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1098; X86-NEXT:    movl %ecx, %eax
1099; X86-NEXT:    addl $-1, %eax
1100; X86-NEXT:    movl %esi, %edx
1101; X86-NEXT:    adcl $-1, %edx
1102; X86-NEXT:    xorl %ecx, %eax
1103; X86-NEXT:    xorl %esi, %edx
1104; X86-NEXT:    movl %eax, %ecx
1105; X86-NEXT:    orl %edx, %ecx
1106; X86-NEXT:    jne .LBB38_2
1107; X86-NEXT:  # %bb.1:
1108; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1109; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
1110; X86-NEXT:  .LBB38_2:
1111; X86-NEXT:    popl %esi
1112; X86-NEXT:    retl
1113;
1114; X64-LABEL: blsmsk64_z:
1115; X64:       # %bb.0:
1116; X64-NEXT:    blsmskq %rdi, %rax
1117; X64-NEXT:    cmoveq %rsi, %rax
1118; X64-NEXT:    retq
1119;
1120; EGPR-LABEL: blsmsk64_z:
1121; EGPR:       # %bb.0:
1122; EGPR-NEXT:    blsmskq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xd7]
1123; EGPR-NEXT:    testq %rax, %rax # encoding: [0x48,0x85,0xc0]
1124; EGPR-NEXT:    cmoveq %rsi, %rax # encoding: [0x48,0x0f,0x44,0xc6]
1125; EGPR-NEXT:    retq # encoding: [0xc3]
1126  %t0 = sub i64 %a, 1
1127  %t1 = xor i64 %t0, %a
1128  %t2 = icmp eq i64 %t1, 0
1129  %t3 = select i1 %t2, i64 %b, i64 %t1
1130  ret i64 %t3
1131}
1132
1133define i64 @blsmsk64_z2(i64 %a, i64 %b, i64 %c) nounwind {
1134; X86-LABEL: blsmsk64_z2:
1135; X86:       # %bb.0:
1136; X86-NEXT:    pushl %esi
1137; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1138; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1139; X86-NEXT:    movl %eax, %edx
1140; X86-NEXT:    addl $-1, %edx
1141; X86-NEXT:    movl %ecx, %esi
1142; X86-NEXT:    adcl $-1, %esi
1143; X86-NEXT:    xorl %eax, %edx
1144; X86-NEXT:    xorl %ecx, %esi
1145; X86-NEXT:    orl %edx, %esi
1146; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
1147; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1148; X86-NEXT:    cmovel %eax, %ecx
1149; X86-NEXT:    movl (%ecx), %eax
1150; X86-NEXT:    movl 4(%ecx), %edx
1151; X86-NEXT:    popl %esi
1152; X86-NEXT:    retl
1153;
1154; X64-LABEL: blsmsk64_z2:
1155; X64:       # %bb.0:
1156; X64-NEXT:    movq %rsi, %rax
1157; X64-NEXT:    blsmskq %rdi, %rcx
1158; X64-NEXT:    cmovneq %rdx, %rax
1159; X64-NEXT:    retq
1160;
1161; EGPR-LABEL: blsmsk64_z2:
1162; EGPR:       # %bb.0:
1163; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
1164; EGPR-NEXT:    blsmskq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xd7]
1165; EGPR-NEXT:    cmovneq %rdx, %rax # encoding: [0x48,0x0f,0x45,0xc2]
1166; EGPR-NEXT:    retq # encoding: [0xc3]
1167  %t0 = sub i64 %a, 1
1168  %t1 = xor i64 %t0, %a
1169  %t2 = icmp eq i64 %t1, 0
1170  %t3 = select i1 %t2, i64 %b, i64 %c
1171  ret i64 %t3
1172}
1173
1174define i64 @blsmsk64_sle(i64 %a, i64 %b, i64 %c) nounwind {
1175; X86-LABEL: blsmsk64_sle:
1176; X86:       # %bb.0:
1177; X86-NEXT:    pushl %esi
1178; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1179; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1180; X86-NEXT:    movl %eax, %edx
1181; X86-NEXT:    addl $-1, %edx
1182; X86-NEXT:    movl %ecx, %esi
1183; X86-NEXT:    adcl $-1, %esi
1184; X86-NEXT:    xorl %ecx, %esi
1185; X86-NEXT:    xorl %eax, %edx
1186; X86-NEXT:    cmpl $1, %edx
1187; X86-NEXT:    sbbl $0, %esi
1188; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
1189; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1190; X86-NEXT:    cmovll %eax, %ecx
1191; X86-NEXT:    movl (%ecx), %eax
1192; X86-NEXT:    movl 4(%ecx), %edx
1193; X86-NEXT:    popl %esi
1194; X86-NEXT:    retl
1195;
1196; X64-LABEL: blsmsk64_sle:
1197; X64:       # %bb.0:
1198; X64-NEXT:    movq %rsi, %rax
1199; X64-NEXT:    blsmskq %rdi, %rcx
1200; X64-NEXT:    cmovgq %rdx, %rax
1201; X64-NEXT:    retq
1202;
1203; EGPR-LABEL: blsmsk64_sle:
1204; EGPR:       # %bb.0:
1205; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
1206; EGPR-NEXT:    blsmskq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xd7]
1207; EGPR-NEXT:    testq %rcx, %rcx # encoding: [0x48,0x85,0xc9]
1208; EGPR-NEXT:    cmovgq %rdx, %rax # encoding: [0x48,0x0f,0x4f,0xc2]
1209; EGPR-NEXT:    retq # encoding: [0xc3]
1210  %t0 = sub i64 %a, 1
1211  %t1 = xor i64 %t0, %a
1212  %t2 = icmp sle i64 %t1, 0
1213  %t3 = select i1 %t2, i64 %b, i64 %c
1214  ret i64 %t3
1215}
1216
1217define i32 @blsr32(i32 %x)   {
1218; X86-LABEL: blsr32:
1219; X86:       # %bb.0:
1220; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
1221; X86-NEXT:    retl
1222;
1223; X64-LABEL: blsr32:
1224; X64:       # %bb.0:
1225; X64-NEXT:    blsrl %edi, %eax
1226; X64-NEXT:    retq
1227;
1228; EGPR-LABEL: blsr32:
1229; EGPR:       # %bb.0:
1230; EGPR-NEXT:    blsrl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xcf]
1231; EGPR-NEXT:    retq # encoding: [0xc3]
1232  %tmp = sub i32 %x, 1
1233  %tmp2 = and i32 %x, %tmp
1234  ret i32 %tmp2
1235}
1236
1237define i32 @blsr32_load(ptr %x)   {
1238; X86-LABEL: blsr32_load:
1239; X86:       # %bb.0:
1240; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1241; X86-NEXT:    blsrl (%eax), %eax
1242; X86-NEXT:    retl
1243;
1244; X64-LABEL: blsr32_load:
1245; X64:       # %bb.0:
1246; X64-NEXT:    blsrl (%rdi), %eax
1247; X64-NEXT:    retq
1248;
1249; EGPR-LABEL: blsr32_load:
1250; EGPR:       # %bb.0:
1251; EGPR-NEXT:    blsrl (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0x0f]
1252; EGPR-NEXT:    retq # encoding: [0xc3]
1253  %x1 = load i32, ptr %x
1254  %tmp = sub i32 %x1, 1
1255  %tmp2 = and i32 %x1, %tmp
1256  ret i32 %tmp2
1257}
1258
1259define i32 @blsr32_z(i32 %a, i32 %b) nounwind {
1260; X86-LABEL: blsr32_z:
1261; X86:       # %bb.0:
1262; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
1263; X86-NEXT:    jne .LBB43_2
1264; X86-NEXT:  # %bb.1:
1265; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1266; X86-NEXT:  .LBB43_2:
1267; X86-NEXT:    retl
1268;
1269; X64-LABEL: blsr32_z:
1270; X64:       # %bb.0:
1271; X64-NEXT:    blsrl %edi, %eax
1272; X64-NEXT:    cmovel %esi, %eax
1273; X64-NEXT:    retq
1274;
1275; EGPR-LABEL: blsr32_z:
1276; EGPR:       # %bb.0:
1277; EGPR-NEXT:    blsrl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xcf]
1278; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
1279; EGPR-NEXT:    cmovel %esi, %eax # encoding: [0x0f,0x44,0xc6]
1280; EGPR-NEXT:    retq # encoding: [0xc3]
1281  %t0 = sub i32 %a, 1
1282  %t1 = and i32 %t0, %a
1283  %t2 = icmp eq i32 %t1, 0
1284  %t3 = select i1 %t2, i32 %b, i32 %t1
1285  ret i32 %t3
1286}
1287
1288define i32 @blsr32_z2(i32 %a, i32 %b, i32 %c) nounwind {
1289; X86-LABEL: blsr32_z2:
1290; X86:       # %bb.0:
1291; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
1292; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
1293; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1294; X86-NEXT:    cmovel %eax, %ecx
1295; X86-NEXT:    movl (%ecx), %eax
1296; X86-NEXT:    retl
1297;
1298; X64-LABEL: blsr32_z2:
1299; X64:       # %bb.0:
1300; X64-NEXT:    movl %esi, %eax
1301; X64-NEXT:    blsrl %edi, %ecx
1302; X64-NEXT:    cmovnel %edx, %eax
1303; X64-NEXT:    retq
1304;
1305; EGPR-LABEL: blsr32_z2:
1306; EGPR:       # %bb.0:
1307; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
1308; EGPR-NEXT:    blsrl %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xcf]
1309; EGPR-NEXT:    testl %ecx, %ecx # encoding: [0x85,0xc9]
1310; EGPR-NEXT:    cmovnel %edx, %eax # encoding: [0x0f,0x45,0xc2]
1311; EGPR-NEXT:    retq # encoding: [0xc3]
1312  %t0 = sub i32 %a, 1
1313  %t1 = and i32 %t0, %a
1314  %t2 = icmp eq i32 %t1, 0
1315  %t3 = select i1 %t2, i32 %b, i32 %c
1316  ret i32 %t3
1317}
1318
1319define i32 @blsr32_sle(i32 %a, i32 %b, i32 %c) nounwind {
1320; X86-LABEL: blsr32_sle:
1321; X86:       # %bb.0:
1322; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
1323; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
1324; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1325; X86-NEXT:    cmovlel %eax, %ecx
1326; X86-NEXT:    movl (%ecx), %eax
1327; X86-NEXT:    retl
1328;
1329; X64-LABEL: blsr32_sle:
1330; X64:       # %bb.0:
1331; X64-NEXT:    movl %esi, %eax
1332; X64-NEXT:    blsrl %edi, %ecx
1333; X64-NEXT:    cmovgl %edx, %eax
1334; X64-NEXT:    retq
1335;
1336; EGPR-LABEL: blsr32_sle:
1337; EGPR:       # %bb.0:
1338; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
1339; EGPR-NEXT:    blsrl %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xcf]
1340; EGPR-NEXT:    testl %ecx, %ecx # encoding: [0x85,0xc9]
1341; EGPR-NEXT:    cmovgl %edx, %eax # encoding: [0x0f,0x4f,0xc2]
1342; EGPR-NEXT:    retq # encoding: [0xc3]
1343  %t0 = sub i32 %a, 1
1344  %t1 = and i32 %t0, %a
1345  %t2 = icmp sle i32 %t1, 0
1346  %t3 = select i1 %t2, i32 %b, i32 %c
1347  ret i32 %t3
1348}
1349
1350define i64 @blsr64(i64 %x)   {
1351; X86-LABEL: blsr64:
1352; X86:       # %bb.0:
1353; X86-NEXT:    pushl %esi
1354; X86-NEXT:    .cfi_def_cfa_offset 8
1355; X86-NEXT:    .cfi_offset %esi, -8
1356; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1357; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1358; X86-NEXT:    movl %ecx, %eax
1359; X86-NEXT:    addl $-1, %eax
1360; X86-NEXT:    movl %esi, %edx
1361; X86-NEXT:    adcl $-1, %edx
1362; X86-NEXT:    andl %ecx, %eax
1363; X86-NEXT:    andl %esi, %edx
1364; X86-NEXT:    popl %esi
1365; X86-NEXT:    .cfi_def_cfa_offset 4
1366; X86-NEXT:    retl
1367;
1368; X64-LABEL: blsr64:
1369; X64:       # %bb.0:
1370; X64-NEXT:    blsrq %rdi, %rax
1371; X64-NEXT:    retq
1372;
1373; EGPR-LABEL: blsr64:
1374; EGPR:       # %bb.0:
1375; EGPR-NEXT:    blsrq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xcf]
1376; EGPR-NEXT:    retq # encoding: [0xc3]
1377  %tmp = sub i64 %x, 1
1378  %tmp2 = and i64 %tmp, %x
1379  ret i64 %tmp2
1380}
1381
1382define i64 @blsr64_z(i64 %a, i64 %b) nounwind {
1383; X86-LABEL: blsr64_z:
1384; X86:       # %bb.0:
1385; X86-NEXT:    pushl %esi
1386; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1387; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1388; X86-NEXT:    movl %ecx, %eax
1389; X86-NEXT:    addl $-1, %eax
1390; X86-NEXT:    movl %esi, %edx
1391; X86-NEXT:    adcl $-1, %edx
1392; X86-NEXT:    andl %ecx, %eax
1393; X86-NEXT:    andl %esi, %edx
1394; X86-NEXT:    movl %eax, %ecx
1395; X86-NEXT:    orl %edx, %ecx
1396; X86-NEXT:    jne .LBB47_2
1397; X86-NEXT:  # %bb.1:
1398; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1399; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
1400; X86-NEXT:  .LBB47_2:
1401; X86-NEXT:    popl %esi
1402; X86-NEXT:    retl
1403;
1404; X64-LABEL: blsr64_z:
1405; X64:       # %bb.0:
1406; X64-NEXT:    blsrq %rdi, %rax
1407; X64-NEXT:    cmoveq %rsi, %rax
1408; X64-NEXT:    retq
1409;
1410; EGPR-LABEL: blsr64_z:
1411; EGPR:       # %bb.0:
1412; EGPR-NEXT:    blsrq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xcf]
1413; EGPR-NEXT:    testq %rax, %rax # encoding: [0x48,0x85,0xc0]
1414; EGPR-NEXT:    cmoveq %rsi, %rax # encoding: [0x48,0x0f,0x44,0xc6]
1415; EGPR-NEXT:    retq # encoding: [0xc3]
1416  %t0 = sub i64 %a, 1
1417  %t1 = and i64 %t0, %a
1418  %t2 = icmp eq i64 %t1, 0
1419  %t3 = select i1 %t2, i64 %b, i64 %t1
1420  ret i64 %t3
1421}
1422
1423define i64 @blsr64_z2(i64 %a, i64 %b, i64 %c) nounwind {
1424; X86-LABEL: blsr64_z2:
1425; X86:       # %bb.0:
1426; X86-NEXT:    pushl %esi
1427; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1428; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1429; X86-NEXT:    movl %eax, %edx
1430; X86-NEXT:    addl $-1, %edx
1431; X86-NEXT:    movl %ecx, %esi
1432; X86-NEXT:    adcl $-1, %esi
1433; X86-NEXT:    andl %eax, %edx
1434; X86-NEXT:    andl %ecx, %esi
1435; X86-NEXT:    orl %edx, %esi
1436; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
1437; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1438; X86-NEXT:    cmovel %eax, %ecx
1439; X86-NEXT:    movl (%ecx), %eax
1440; X86-NEXT:    movl 4(%ecx), %edx
1441; X86-NEXT:    popl %esi
1442; X86-NEXT:    retl
1443;
1444; X64-LABEL: blsr64_z2:
1445; X64:       # %bb.0:
1446; X64-NEXT:    movq %rsi, %rax
1447; X64-NEXT:    blsrq %rdi, %rcx
1448; X64-NEXT:    cmovneq %rdx, %rax
1449; X64-NEXT:    retq
1450;
1451; EGPR-LABEL: blsr64_z2:
1452; EGPR:       # %bb.0:
1453; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
1454; EGPR-NEXT:    blsrq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xcf]
1455; EGPR-NEXT:    testq %rcx, %rcx # encoding: [0x48,0x85,0xc9]
1456; EGPR-NEXT:    cmovneq %rdx, %rax # encoding: [0x48,0x0f,0x45,0xc2]
1457; EGPR-NEXT:    retq # encoding: [0xc3]
1458  %t0 = sub i64 %a, 1
1459  %t1 = and i64 %t0, %a
1460  %t2 = icmp eq i64 %t1, 0
1461  %t3 = select i1 %t2, i64 %b, i64 %c
1462  ret i64 %t3
1463}
1464
1465define i64 @blsr64_sle(i64 %a, i64 %b, i64 %c) nounwind {
1466; X86-LABEL: blsr64_sle:
1467; X86:       # %bb.0:
1468; X86-NEXT:    pushl %esi
1469; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1470; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1471; X86-NEXT:    movl %eax, %edx
1472; X86-NEXT:    addl $-1, %edx
1473; X86-NEXT:    movl %ecx, %esi
1474; X86-NEXT:    adcl $-1, %esi
1475; X86-NEXT:    andl %ecx, %esi
1476; X86-NEXT:    andl %eax, %edx
1477; X86-NEXT:    cmpl $1, %edx
1478; X86-NEXT:    sbbl $0, %esi
1479; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
1480; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1481; X86-NEXT:    cmovll %eax, %ecx
1482; X86-NEXT:    movl (%ecx), %eax
1483; X86-NEXT:    movl 4(%ecx), %edx
1484; X86-NEXT:    popl %esi
1485; X86-NEXT:    retl
1486;
1487; X64-LABEL: blsr64_sle:
1488; X64:       # %bb.0:
1489; X64-NEXT:    movq %rsi, %rax
1490; X64-NEXT:    blsrq %rdi, %rcx
1491; X64-NEXT:    cmovgq %rdx, %rax
1492; X64-NEXT:    retq
1493;
1494; EGPR-LABEL: blsr64_sle:
1495; EGPR:       # %bb.0:
1496; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
1497; EGPR-NEXT:    blsrq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xcf]
1498; EGPR-NEXT:    testq %rcx, %rcx # encoding: [0x48,0x85,0xc9]
1499; EGPR-NEXT:    cmovgq %rdx, %rax # encoding: [0x48,0x0f,0x4f,0xc2]
1500; EGPR-NEXT:    retq # encoding: [0xc3]
1501  %t0 = sub i64 %a, 1
1502  %t1 = and i64 %t0, %a
1503  %t2 = icmp sle i64 %t1, 0
1504  %t3 = select i1 %t2, i64 %b, i64 %c
1505  ret i64 %t3
1506}
1507
1508; PR35792 - https://bugs.llvm.org/show_bug.cgi?id=35792
1509
1510define i64 @blsr_disguised_constant(i64 %x) {
1511; X86-LABEL: blsr_disguised_constant:
1512; X86:       # %bb.0:
1513; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
1514; X86-NEXT:    movzwl %ax, %eax
1515; X86-NEXT:    xorl %edx, %edx
1516; X86-NEXT:    retl
1517;
1518; X64-LABEL: blsr_disguised_constant:
1519; X64:       # %bb.0:
1520; X64-NEXT:    blsrl %edi, %eax
1521; X64-NEXT:    movzwl %ax, %eax
1522; X64-NEXT:    retq
1523;
1524; EGPR-LABEL: blsr_disguised_constant:
1525; EGPR:       # %bb.0:
1526; EGPR-NEXT:    blsrl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xcf]
1527; EGPR-NEXT:    movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
1528; EGPR-NEXT:    retq # encoding: [0xc3]
1529  %a1 = and i64 %x, 65535
1530  %a2 = add i64 %x, 65535
1531  %r = and i64 %a1, %a2
1532  ret i64 %r
1533}
1534
1535; The add here used to get shrunk, but the and did not thus hiding the blsr pattern.
1536; We now use the knowledge that upper bits of the shift guarantee the and result has 0s in the upper bits to reduce it too.
1537define i64 @blsr_disguised_shrunk_add(i64 %x) {
1538; X86-LABEL: blsr_disguised_shrunk_add:
1539; X86:       # %bb.0:
1540; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1541; X86-NEXT:    shrl $16, %eax
1542; X86-NEXT:    blsrl %eax, %eax
1543; X86-NEXT:    xorl %edx, %edx
1544; X86-NEXT:    retl
1545;
1546; X64-LABEL: blsr_disguised_shrunk_add:
1547; X64:       # %bb.0:
1548; X64-NEXT:    shrq $48, %rdi
1549; X64-NEXT:    blsrl %edi, %eax
1550; X64-NEXT:    retq
1551;
1552; EGPR-LABEL: blsr_disguised_shrunk_add:
1553; EGPR:       # %bb.0:
1554; EGPR-NEXT:    shrq $48, %rdi # encoding: [0x48,0xc1,0xef,0x30]
1555; EGPR-NEXT:    blsrl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xcf]
1556; EGPR-NEXT:    retq # encoding: [0xc3]
1557  %a = lshr i64 %x, 48
1558  %b = add i64 %a, -1
1559  %c = and i64 %b, %a
1560  ret i64 %c
1561}
1562
1563define void @pr40060(i32, i32) {
1564; X86-LABEL: pr40060:
1565; X86:       # %bb.0:
1566; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1567; X86-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
1568; X86-NEXT:    testl %eax, %eax
1569; X86-NEXT:    jns bar # TAILCALL
1570; X86-NEXT:  # %bb.1:
1571; X86-NEXT:    retl
1572;
1573; X64-LABEL: pr40060:
1574; X64:       # %bb.0:
1575; X64-NEXT:    bextrl %esi, %edi, %eax
1576; X64-NEXT:    testl %eax, %eax
1577; X64-NEXT:    jns bar # TAILCALL
1578; X64-NEXT:  # %bb.1:
1579; X64-NEXT:    retq
1580;
1581; EGPR-LABEL: pr40060:
1582; EGPR:       # %bb.0:
1583; EGPR-NEXT:    bextrl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf7,0xc7]
1584; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
1585; EGPR-NEXT:    jns bar # TAILCALL
1586; EGPR-NEXT:    # encoding: [0x79,A]
1587; EGPR-NEXT:    # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
1588; EGPR-NEXT:  # %bb.1:
1589; EGPR-NEXT:    retq # encoding: [0xc3]
1590  %3 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %0, i32 %1)
1591  %4 = icmp sgt i32 %3, -1
1592  br i1 %4, label %5, label %6
1593
1594  tail call void @bar()
1595  br label %6
1596
1597  ret void
1598}
1599
1600define i32 @blsr32_branch(i32 %x) {
1601; X86-LABEL: blsr32_branch:
1602; X86:       # %bb.0:
1603; X86-NEXT:    pushl %esi
1604; X86-NEXT:    .cfi_def_cfa_offset 8
1605; X86-NEXT:    .cfi_offset %esi, -8
1606; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %esi
1607; X86-NEXT:    jne .LBB53_2
1608; X86-NEXT:  # %bb.1:
1609; X86-NEXT:    calll bar
1610; X86-NEXT:  .LBB53_2:
1611; X86-NEXT:    movl %esi, %eax
1612; X86-NEXT:    popl %esi
1613; X86-NEXT:    .cfi_def_cfa_offset 4
1614; X86-NEXT:    retl
1615;
1616; X64-LABEL: blsr32_branch:
1617; X64:       # %bb.0:
1618; X64-NEXT:    pushq %rbx
1619; X64-NEXT:    .cfi_def_cfa_offset 16
1620; X64-NEXT:    .cfi_offset %rbx, -16
1621; X64-NEXT:    blsrl %edi, %ebx
1622; X64-NEXT:    jne .LBB53_2
1623; X64-NEXT:  # %bb.1:
1624; X64-NEXT:    callq bar
1625; X64-NEXT:  .LBB53_2:
1626; X64-NEXT:    movl %ebx, %eax
1627; X64-NEXT:    popq %rbx
1628; X64-NEXT:    .cfi_def_cfa_offset 8
1629; X64-NEXT:    retq
1630;
1631; EGPR-LABEL: blsr32_branch:
1632; EGPR:       # %bb.0:
1633; EGPR-NEXT:    pushq %rbx # encoding: [0x53]
1634; EGPR-NEXT:    .cfi_def_cfa_offset 16
1635; EGPR-NEXT:    .cfi_offset %rbx, -16
1636; EGPR-NEXT:    blsrl %edi, %ebx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x60,0xf3,0xcf]
1637; EGPR-NEXT:    jne .LBB53_2 # encoding: [0x75,A]
1638; EGPR-NEXT:    # fixup A - offset: 1, value: .LBB53_2-1, kind: FK_PCRel_1
1639; EGPR-NEXT:  # %bb.1:
1640; EGPR-NEXT:    callq bar # encoding: [0xe8,A,A,A,A]
1641; EGPR-NEXT:    # fixup A - offset: 1, value: bar-4, kind: reloc_branch_4byte_pcrel
1642; EGPR-NEXT:  .LBB53_2:
1643; EGPR-NEXT:    movl %ebx, %eax # encoding: [0x89,0xd8]
1644; EGPR-NEXT:    popq %rbx # encoding: [0x5b]
1645; EGPR-NEXT:    .cfi_def_cfa_offset 8
1646; EGPR-NEXT:    retq # encoding: [0xc3]
1647  %tmp = sub i32 %x, 1
1648  %tmp2 = and i32 %x, %tmp
1649  %cmp = icmp eq i32 %tmp2, 0
1650  br i1 %cmp, label %1, label %2
1651
1652  tail call void @bar()
1653  br label %2
1654  ret i32 %tmp2
1655}
1656
1657define i64 @blsr64_branch(i64 %x) {
1658; X86-LABEL: blsr64_branch:
1659; X86:       # %bb.0:
1660; X86-NEXT:    pushl %edi
1661; X86-NEXT:    .cfi_def_cfa_offset 8
1662; X86-NEXT:    pushl %esi
1663; X86-NEXT:    .cfi_def_cfa_offset 12
1664; X86-NEXT:    .cfi_offset %esi, -12
1665; X86-NEXT:    .cfi_offset %edi, -8
1666; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1667; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1668; X86-NEXT:    movl %eax, %esi
1669; X86-NEXT:    addl $-1, %esi
1670; X86-NEXT:    movl %ecx, %edi
1671; X86-NEXT:    adcl $-1, %edi
1672; X86-NEXT:    andl %eax, %esi
1673; X86-NEXT:    andl %ecx, %edi
1674; X86-NEXT:    movl %esi, %eax
1675; X86-NEXT:    orl %edi, %eax
1676; X86-NEXT:    jne .LBB54_2
1677; X86-NEXT:  # %bb.1:
1678; X86-NEXT:    calll bar
1679; X86-NEXT:  .LBB54_2:
1680; X86-NEXT:    movl %esi, %eax
1681; X86-NEXT:    movl %edi, %edx
1682; X86-NEXT:    popl %esi
1683; X86-NEXT:    .cfi_def_cfa_offset 8
1684; X86-NEXT:    popl %edi
1685; X86-NEXT:    .cfi_def_cfa_offset 4
1686; X86-NEXT:    retl
1687;
1688; X64-LABEL: blsr64_branch:
1689; X64:       # %bb.0:
1690; X64-NEXT:    pushq %rbx
1691; X64-NEXT:    .cfi_def_cfa_offset 16
1692; X64-NEXT:    .cfi_offset %rbx, -16
1693; X64-NEXT:    blsrq %rdi, %rbx
1694; X64-NEXT:    jne .LBB54_2
1695; X64-NEXT:  # %bb.1:
1696; X64-NEXT:    callq bar
1697; X64-NEXT:  .LBB54_2:
1698; X64-NEXT:    movq %rbx, %rax
1699; X64-NEXT:    popq %rbx
1700; X64-NEXT:    .cfi_def_cfa_offset 8
1701; X64-NEXT:    retq
1702;
1703; EGPR-LABEL: blsr64_branch:
1704; EGPR:       # %bb.0:
1705; EGPR-NEXT:    pushq %rbx # encoding: [0x53]
1706; EGPR-NEXT:    .cfi_def_cfa_offset 16
1707; EGPR-NEXT:    .cfi_offset %rbx, -16
1708; EGPR-NEXT:    blsrq %rdi, %rbx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe0,0xf3,0xcf]
1709; EGPR-NEXT:    jne .LBB54_2 # encoding: [0x75,A]
1710; EGPR-NEXT:    # fixup A - offset: 1, value: .LBB54_2-1, kind: FK_PCRel_1
1711; EGPR-NEXT:  # %bb.1:
1712; EGPR-NEXT:    callq bar # encoding: [0xe8,A,A,A,A]
1713; EGPR-NEXT:    # fixup A - offset: 1, value: bar-4, kind: reloc_branch_4byte_pcrel
1714; EGPR-NEXT:  .LBB54_2:
1715; EGPR-NEXT:    movq %rbx, %rax # encoding: [0x48,0x89,0xd8]
1716; EGPR-NEXT:    popq %rbx # encoding: [0x5b]
1717; EGPR-NEXT:    .cfi_def_cfa_offset 8
1718; EGPR-NEXT:    retq # encoding: [0xc3]
1719  %tmp = sub i64 %x, 1
1720  %tmp2 = and i64 %x, %tmp
1721  %cmp = icmp eq i64 %tmp2, 0
1722  br i1 %cmp, label %1, label %2
1723
1724  tail call void @bar()
1725  br label %2
1726  ret i64 %tmp2
1727}
1728
1729define i32 @blsi32_branch(i32 %x) {
1730; X86-LABEL: blsi32_branch:
1731; X86:       # %bb.0:
1732; X86-NEXT:    pushl %esi
1733; X86-NEXT:    .cfi_def_cfa_offset 8
1734; X86-NEXT:    .cfi_offset %esi, -8
1735; X86-NEXT:    blsil {{[0-9]+}}(%esp), %esi
1736; X86-NEXT:    jne .LBB55_2
1737; X86-NEXT:  # %bb.1:
1738; X86-NEXT:    calll bar
1739; X86-NEXT:  .LBB55_2:
1740; X86-NEXT:    movl %esi, %eax
1741; X86-NEXT:    popl %esi
1742; X86-NEXT:    .cfi_def_cfa_offset 4
1743; X86-NEXT:    retl
1744;
1745; X64-LABEL: blsi32_branch:
1746; X64:       # %bb.0:
1747; X64-NEXT:    pushq %rbx
1748; X64-NEXT:    .cfi_def_cfa_offset 16
1749; X64-NEXT:    .cfi_offset %rbx, -16
1750; X64-NEXT:    blsil %edi, %ebx
1751; X64-NEXT:    jne .LBB55_2
1752; X64-NEXT:  # %bb.1:
1753; X64-NEXT:    callq bar
1754; X64-NEXT:  .LBB55_2:
1755; X64-NEXT:    movl %ebx, %eax
1756; X64-NEXT:    popq %rbx
1757; X64-NEXT:    .cfi_def_cfa_offset 8
1758; X64-NEXT:    retq
1759;
1760; EGPR-LABEL: blsi32_branch:
1761; EGPR:       # %bb.0:
1762; EGPR-NEXT:    pushq %rbx # encoding: [0x53]
1763; EGPR-NEXT:    .cfi_def_cfa_offset 16
1764; EGPR-NEXT:    .cfi_offset %rbx, -16
1765; EGPR-NEXT:    blsil %edi, %ebx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x60,0xf3,0xdf]
1766; EGPR-NEXT:    jne .LBB55_2 # encoding: [0x75,A]
1767; EGPR-NEXT:    # fixup A - offset: 1, value: .LBB55_2-1, kind: FK_PCRel_1
1768; EGPR-NEXT:  # %bb.1:
1769; EGPR-NEXT:    callq bar # encoding: [0xe8,A,A,A,A]
1770; EGPR-NEXT:    # fixup A - offset: 1, value: bar-4, kind: reloc_branch_4byte_pcrel
1771; EGPR-NEXT:  .LBB55_2:
1772; EGPR-NEXT:    movl %ebx, %eax # encoding: [0x89,0xd8]
1773; EGPR-NEXT:    popq %rbx # encoding: [0x5b]
1774; EGPR-NEXT:    .cfi_def_cfa_offset 8
1775; EGPR-NEXT:    retq # encoding: [0xc3]
1776  %tmp = sub i32 0, %x
1777  %tmp2 = and i32 %x, %tmp
1778  %cmp = icmp eq i32 %tmp2, 0
1779  br i1 %cmp, label %1, label %2
1780
1781  tail call void @bar()
1782  br label %2
1783  ret i32 %tmp2
1784}
1785
1786define i64 @blsi64_branch(i64 %x) {
1787; X86-LABEL: blsi64_branch:
1788; X86:       # %bb.0:
1789; X86-NEXT:    pushl %edi
1790; X86-NEXT:    .cfi_def_cfa_offset 8
1791; X86-NEXT:    pushl %esi
1792; X86-NEXT:    .cfi_def_cfa_offset 12
1793; X86-NEXT:    .cfi_offset %esi, -12
1794; X86-NEXT:    .cfi_offset %edi, -8
1795; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1796; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1797; X86-NEXT:    xorl %esi, %esi
1798; X86-NEXT:    movl %eax, %edi
1799; X86-NEXT:    negl %edi
1800; X86-NEXT:    sbbl %ecx, %esi
1801; X86-NEXT:    andl %ecx, %esi
1802; X86-NEXT:    andl %eax, %edi
1803; X86-NEXT:    movl %edi, %eax
1804; X86-NEXT:    orl %esi, %eax
1805; X86-NEXT:    jne .LBB56_2
1806; X86-NEXT:  # %bb.1:
1807; X86-NEXT:    calll bar
1808; X86-NEXT:  .LBB56_2:
1809; X86-NEXT:    movl %edi, %eax
1810; X86-NEXT:    movl %esi, %edx
1811; X86-NEXT:    popl %esi
1812; X86-NEXT:    .cfi_def_cfa_offset 8
1813; X86-NEXT:    popl %edi
1814; X86-NEXT:    .cfi_def_cfa_offset 4
1815; X86-NEXT:    retl
1816;
1817; X64-LABEL: blsi64_branch:
1818; X64:       # %bb.0:
1819; X64-NEXT:    pushq %rbx
1820; X64-NEXT:    .cfi_def_cfa_offset 16
1821; X64-NEXT:    .cfi_offset %rbx, -16
1822; X64-NEXT:    blsiq %rdi, %rbx
1823; X64-NEXT:    jne .LBB56_2
1824; X64-NEXT:  # %bb.1:
1825; X64-NEXT:    callq bar
1826; X64-NEXT:  .LBB56_2:
1827; X64-NEXT:    movq %rbx, %rax
1828; X64-NEXT:    popq %rbx
1829; X64-NEXT:    .cfi_def_cfa_offset 8
1830; X64-NEXT:    retq
1831;
1832; EGPR-LABEL: blsi64_branch:
1833; EGPR:       # %bb.0:
1834; EGPR-NEXT:    pushq %rbx # encoding: [0x53]
1835; EGPR-NEXT:    .cfi_def_cfa_offset 16
1836; EGPR-NEXT:    .cfi_offset %rbx, -16
1837; EGPR-NEXT:    blsiq %rdi, %rbx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe0,0xf3,0xdf]
1838; EGPR-NEXT:    jne .LBB56_2 # encoding: [0x75,A]
1839; EGPR-NEXT:    # fixup A - offset: 1, value: .LBB56_2-1, kind: FK_PCRel_1
1840; EGPR-NEXT:  # %bb.1:
1841; EGPR-NEXT:    callq bar # encoding: [0xe8,A,A,A,A]
1842; EGPR-NEXT:    # fixup A - offset: 1, value: bar-4, kind: reloc_branch_4byte_pcrel
1843; EGPR-NEXT:  .LBB56_2:
1844; EGPR-NEXT:    movq %rbx, %rax # encoding: [0x48,0x89,0xd8]
1845; EGPR-NEXT:    popq %rbx # encoding: [0x5b]
1846; EGPR-NEXT:    .cfi_def_cfa_offset 8
1847; EGPR-NEXT:    retq # encoding: [0xc3]
1848  %tmp = sub i64 0, %x
1849  %tmp2 = and i64 %x, %tmp
1850  %cmp = icmp eq i64 %tmp2, 0
1851  br i1 %cmp, label %1, label %2
1852
1853  tail call void @bar()
1854  br label %2
1855  ret i64 %tmp2
1856}
1857
1858declare dso_local void @bar()
1859
1860define void @pr42118_i32(i32 %x) {
1861; X86-LABEL: pr42118_i32:
1862; X86:       # %bb.0:
1863; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
1864; X86-NEXT:    je bar # TAILCALL
1865; X86-NEXT:  # %bb.1:
1866; X86-NEXT:    retl
1867;
1868; X64-LABEL: pr42118_i32:
1869; X64:       # %bb.0:
1870; X64-NEXT:    blsrl %edi, %eax
1871; X64-NEXT:    je bar # TAILCALL
1872; X64-NEXT:  # %bb.1:
1873; X64-NEXT:    retq
1874;
1875; EGPR-LABEL: pr42118_i32:
1876; EGPR:       # %bb.0:
1877; EGPR-NEXT:    blsrl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xcf]
1878; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
1879; EGPR-NEXT:    je bar # TAILCALL
1880; EGPR-NEXT:    # encoding: [0x74,A]
1881; EGPR-NEXT:    # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
1882; EGPR-NEXT:  # %bb.1:
1883; EGPR-NEXT:    retq # encoding: [0xc3]
1884  %tmp = sub i32 0, %x
1885  %tmp1 = and i32 %tmp, %x
1886  %cmp = icmp eq i32 %tmp1, %x
1887  br i1 %cmp, label %1, label %2
1888
1889  tail call void @bar()
1890  br label %2
1891
1892  ret void
1893}
1894
1895define void @pr42118_i64(i64 %x) {
1896; X86-LABEL: pr42118_i64:
1897; X86:       # %bb.0:
1898; X86-NEXT:    pushl %esi
1899; X86-NEXT:    .cfi_def_cfa_offset 8
1900; X86-NEXT:    .cfi_offset %esi, -8
1901; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1902; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1903; X86-NEXT:    movl %eax, %edx
1904; X86-NEXT:    addl $-1, %edx
1905; X86-NEXT:    movl %ecx, %esi
1906; X86-NEXT:    adcl $-1, %esi
1907; X86-NEXT:    andl %eax, %edx
1908; X86-NEXT:    andl %ecx, %esi
1909; X86-NEXT:    orl %edx, %esi
1910; X86-NEXT:    jne .LBB58_1
1911; X86-NEXT:  # %bb.2:
1912; X86-NEXT:    popl %esi
1913; X86-NEXT:    .cfi_def_cfa_offset 4
1914; X86-NEXT:    jmp bar # TAILCALL
1915; X86-NEXT:  .LBB58_1:
1916; X86-NEXT:    .cfi_def_cfa_offset 8
1917; X86-NEXT:    popl %esi
1918; X86-NEXT:    .cfi_def_cfa_offset 4
1919; X86-NEXT:    retl
1920;
1921; X64-LABEL: pr42118_i64:
1922; X64:       # %bb.0:
1923; X64-NEXT:    blsrq %rdi, %rax
1924; X64-NEXT:    je bar # TAILCALL
1925; X64-NEXT:  # %bb.1:
1926; X64-NEXT:    retq
1927;
1928; EGPR-LABEL: pr42118_i64:
1929; EGPR:       # %bb.0:
1930; EGPR-NEXT:    blsrq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xcf]
1931; EGPR-NEXT:    testq %rax, %rax # encoding: [0x48,0x85,0xc0]
1932; EGPR-NEXT:    je bar # TAILCALL
1933; EGPR-NEXT:    # encoding: [0x74,A]
1934; EGPR-NEXT:    # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
1935; EGPR-NEXT:  # %bb.1:
1936; EGPR-NEXT:    retq # encoding: [0xc3]
1937  %tmp = sub i64 0, %x
1938  %tmp1 = and i64 %tmp, %x
1939  %cmp = icmp eq i64 %tmp1, %x
1940  br i1 %cmp, label %1, label %2
1941
1942  tail call void @bar()
1943  br label %2
1944
1945  ret void
1946}
1947
1948define i32 @blsi_cflag_32(i32 %x, i32 %y) nounwind {
1949; X86-LABEL: blsi_cflag_32:
1950; X86:       # %bb.0:
1951; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1952; X86-NEXT:    testl %eax, %eax
1953; X86-NEXT:    jne .LBB59_1
1954; X86-NEXT:  # %bb.2:
1955; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1956; X86-NEXT:    retl
1957; X86-NEXT:  .LBB59_1:
1958; X86-NEXT:    blsil %eax, %eax
1959; X86-NEXT:    retl
1960;
1961; X64-LABEL: blsi_cflag_32:
1962; X64:       # %bb.0:
1963; X64-NEXT:    blsil %edi, %eax
1964; X64-NEXT:    cmovael %esi, %eax
1965; X64-NEXT:    retq
1966;
1967; EGPR-LABEL: blsi_cflag_32:
1968; EGPR:       # %bb.0:
1969; EGPR-NEXT:    blsil %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xdf]
1970; EGPR-NEXT:    testl %edi, %edi # encoding: [0x85,0xff]
1971; EGPR-NEXT:    cmovel %esi, %eax # encoding: [0x0f,0x44,0xc6]
1972; EGPR-NEXT:    retq # encoding: [0xc3]
1973  %tobool = icmp eq i32 %x, 0
1974  %sub = sub nsw i32 0, %x
1975  %and = and i32 %sub, %x
1976  %cond = select i1 %tobool, i32 %y, i32 %and
1977  ret i32 %cond
1978}
1979
1980define i64 @blsi_cflag_64(i64 %x, i64 %y) nounwind {
1981; X86-LABEL: blsi_cflag_64:
1982; X86:       # %bb.0:
1983; X86-NEXT:    pushl %edi
1984; X86-NEXT:    pushl %esi
1985; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1986; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1987; X86-NEXT:    xorl %edx, %edx
1988; X86-NEXT:    movl %ecx, %eax
1989; X86-NEXT:    negl %eax
1990; X86-NEXT:    sbbl %esi, %edx
1991; X86-NEXT:    movl %ecx, %edi
1992; X86-NEXT:    orl %esi, %edi
1993; X86-NEXT:    jne .LBB60_1
1994; X86-NEXT:  # %bb.2:
1995; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
1996; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1997; X86-NEXT:    jmp .LBB60_3
1998; X86-NEXT:  .LBB60_1:
1999; X86-NEXT:    andl %esi, %edx
2000; X86-NEXT:    andl %ecx, %eax
2001; X86-NEXT:  .LBB60_3:
2002; X86-NEXT:    popl %esi
2003; X86-NEXT:    popl %edi
2004; X86-NEXT:    retl
2005;
2006; X64-LABEL: blsi_cflag_64:
2007; X64:       # %bb.0:
2008; X64-NEXT:    blsiq %rdi, %rax
2009; X64-NEXT:    cmovaeq %rsi, %rax
2010; X64-NEXT:    retq
2011;
2012; EGPR-LABEL: blsi_cflag_64:
2013; EGPR:       # %bb.0:
2014; EGPR-NEXT:    blsiq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xdf]
2015; EGPR-NEXT:    testq %rdi, %rdi # encoding: [0x48,0x85,0xff]
2016; EGPR-NEXT:    cmoveq %rsi, %rax # encoding: [0x48,0x0f,0x44,0xc6]
2017; EGPR-NEXT:    retq # encoding: [0xc3]
2018  %tobool = icmp eq i64 %x, 0
2019  %sub = sub nsw i64 0, %x
2020  %and = and i64 %sub, %x
2021  %cond = select i1 %tobool, i64 %y, i64 %and
2022  ret i64 %cond
2023}
2024