xref: /llvm-project/llvm/test/CodeGen/X86/extract-lowbits.ll (revision 8b6e1dc924fd3c35670b46d744091d2f7ce94c5f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI
3; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI1
4; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI1
5; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2
6; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2
7; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI
8; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI1,X64-BMI1NOTBM
9; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI1,X64-BMI1TBM
10; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2,X64-BMI2TBM
11; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2,X64-BMI2NOTBM
12
13; *Please* keep in sync with test/CodeGen/AArch64/extract-lowbits.ll
14
15; https://bugs.llvm.org/show_bug.cgi?id=36419
16; https://bugs.llvm.org/show_bug.cgi?id=37603
17; https://bugs.llvm.org/show_bug.cgi?id=37610
18
19; Patterns:
20;   a) x &  (1 << nbits) - 1
21;   b) x & ~(-1 << nbits)
22;   c) x &  (-1 >> (32 - y))
23;   d) x << (32 - y) >> (32 - y)
24; are equivalent.
25
26; ---------------------------------------------------------------------------- ;
27; Pattern a. 32-bit
28; ---------------------------------------------------------------------------- ;
29
30define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind {
31; X86-NOBMI-LABEL: bzhi32_a0:
32; X86-NOBMI:       # %bb.0:
33; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
34; X86-NOBMI-NEXT:    movl $1, %eax
35; X86-NOBMI-NEXT:    shll %cl, %eax
36; X86-NOBMI-NEXT:    decl %eax
37; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
38; X86-NOBMI-NEXT:    retl
39;
40; X86-BMI1-LABEL: bzhi32_a0:
41; X86-BMI1:       # %bb.0:
42; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
43; X86-BMI1-NEXT:    shll $8, %eax
44; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
45; X86-BMI1-NEXT:    retl
46;
47; X86-BMI2-LABEL: bzhi32_a0:
48; X86-BMI2:       # %bb.0:
49; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
50; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
51; X86-BMI2-NEXT:    retl
52;
53; X64-NOBMI-LABEL: bzhi32_a0:
54; X64-NOBMI:       # %bb.0:
55; X64-NOBMI-NEXT:    movl %esi, %ecx
56; X64-NOBMI-NEXT:    movl $1, %eax
57; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
58; X64-NOBMI-NEXT:    shll %cl, %eax
59; X64-NOBMI-NEXT:    decl %eax
60; X64-NOBMI-NEXT:    andl %edi, %eax
61; X64-NOBMI-NEXT:    retq
62;
63; X64-BMI1-LABEL: bzhi32_a0:
64; X64-BMI1:       # %bb.0:
65; X64-BMI1-NEXT:    shll $8, %esi
66; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
67; X64-BMI1-NEXT:    retq
68;
69; X64-BMI2-LABEL: bzhi32_a0:
70; X64-BMI2:       # %bb.0:
71; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
72; X64-BMI2-NEXT:    retq
73  %onebit = shl i32 1, %numlowbits
74  %mask = add nsw i32 %onebit, -1
75  %masked = and i32 %mask, %val
76  ret i32 %masked
77}
78
79define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
80; X86-NOBMI-LABEL: bzhi32_a1_indexzext:
81; X86-NOBMI:       # %bb.0:
82; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
83; X86-NOBMI-NEXT:    movl $1, %eax
84; X86-NOBMI-NEXT:    shll %cl, %eax
85; X86-NOBMI-NEXT:    decl %eax
86; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
87; X86-NOBMI-NEXT:    retl
88;
89; X86-BMI1-LABEL: bzhi32_a1_indexzext:
90; X86-BMI1:       # %bb.0:
91; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
92; X86-BMI1-NEXT:    shll $8, %eax
93; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
94; X86-BMI1-NEXT:    retl
95;
96; X86-BMI2-LABEL: bzhi32_a1_indexzext:
97; X86-BMI2:       # %bb.0:
98; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
99; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
100; X86-BMI2-NEXT:    retl
101;
102; X64-NOBMI-LABEL: bzhi32_a1_indexzext:
103; X64-NOBMI:       # %bb.0:
104; X64-NOBMI-NEXT:    movl %esi, %ecx
105; X64-NOBMI-NEXT:    movl $1, %eax
106; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
107; X64-NOBMI-NEXT:    shll %cl, %eax
108; X64-NOBMI-NEXT:    decl %eax
109; X64-NOBMI-NEXT:    andl %edi, %eax
110; X64-NOBMI-NEXT:    retq
111;
112; X64-BMI1-LABEL: bzhi32_a1_indexzext:
113; X64-BMI1:       # %bb.0:
114; X64-BMI1-NEXT:    shll $8, %esi
115; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
116; X64-BMI1-NEXT:    retq
117;
118; X64-BMI2-LABEL: bzhi32_a1_indexzext:
119; X64-BMI2:       # %bb.0:
120; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
121; X64-BMI2-NEXT:    retq
122  %conv = zext i8 %numlowbits to i32
123  %onebit = shl i32 1, %conv
124  %mask = add nsw i32 %onebit, -1
125  %masked = and i32 %mask, %val
126  ret i32 %masked
127}
128
129define i32 @bzhi32_a2_load(ptr %w, i32 %numlowbits) nounwind {
130; X86-NOBMI-LABEL: bzhi32_a2_load:
131; X86-NOBMI:       # %bb.0:
132; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
133; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
134; X86-NOBMI-NEXT:    movl $1, %eax
135; X86-NOBMI-NEXT:    shll %cl, %eax
136; X86-NOBMI-NEXT:    decl %eax
137; X86-NOBMI-NEXT:    andl (%edx), %eax
138; X86-NOBMI-NEXT:    retl
139;
140; X86-BMI1-LABEL: bzhi32_a2_load:
141; X86-BMI1:       # %bb.0:
142; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
143; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
144; X86-BMI1-NEXT:    shll $8, %ecx
145; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
146; X86-BMI1-NEXT:    retl
147;
148; X86-BMI2-LABEL: bzhi32_a2_load:
149; X86-BMI2:       # %bb.0:
150; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
151; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
152; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
153; X86-BMI2-NEXT:    retl
154;
155; X64-NOBMI-LABEL: bzhi32_a2_load:
156; X64-NOBMI:       # %bb.0:
157; X64-NOBMI-NEXT:    movl %esi, %ecx
158; X64-NOBMI-NEXT:    movl $1, %eax
159; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
160; X64-NOBMI-NEXT:    shll %cl, %eax
161; X64-NOBMI-NEXT:    decl %eax
162; X64-NOBMI-NEXT:    andl (%rdi), %eax
163; X64-NOBMI-NEXT:    retq
164;
165; X64-BMI1-LABEL: bzhi32_a2_load:
166; X64-BMI1:       # %bb.0:
167; X64-BMI1-NEXT:    shll $8, %esi
168; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
169; X64-BMI1-NEXT:    retq
170;
171; X64-BMI2-LABEL: bzhi32_a2_load:
172; X64-BMI2:       # %bb.0:
173; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
174; X64-BMI2-NEXT:    retq
175  %val = load i32, ptr %w
176  %onebit = shl i32 1, %numlowbits
177  %mask = add nsw i32 %onebit, -1
178  %masked = and i32 %mask, %val
179  ret i32 %masked
180}
181
182define i32 @bzhi32_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
183; X86-NOBMI-LABEL: bzhi32_a3_load_indexzext:
184; X86-NOBMI:       # %bb.0:
185; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
186; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
187; X86-NOBMI-NEXT:    movl $1, %eax
188; X86-NOBMI-NEXT:    shll %cl, %eax
189; X86-NOBMI-NEXT:    decl %eax
190; X86-NOBMI-NEXT:    andl (%edx), %eax
191; X86-NOBMI-NEXT:    retl
192;
193; X86-BMI1-LABEL: bzhi32_a3_load_indexzext:
194; X86-BMI1:       # %bb.0:
195; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
196; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
197; X86-BMI1-NEXT:    shll $8, %ecx
198; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
199; X86-BMI1-NEXT:    retl
200;
201; X86-BMI2-LABEL: bzhi32_a3_load_indexzext:
202; X86-BMI2:       # %bb.0:
203; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
204; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
205; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
206; X86-BMI2-NEXT:    retl
207;
208; X64-NOBMI-LABEL: bzhi32_a3_load_indexzext:
209; X64-NOBMI:       # %bb.0:
210; X64-NOBMI-NEXT:    movl %esi, %ecx
211; X64-NOBMI-NEXT:    movl $1, %eax
212; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
213; X64-NOBMI-NEXT:    shll %cl, %eax
214; X64-NOBMI-NEXT:    decl %eax
215; X64-NOBMI-NEXT:    andl (%rdi), %eax
216; X64-NOBMI-NEXT:    retq
217;
218; X64-BMI1-LABEL: bzhi32_a3_load_indexzext:
219; X64-BMI1:       # %bb.0:
220; X64-BMI1-NEXT:    shll $8, %esi
221; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
222; X64-BMI1-NEXT:    retq
223;
224; X64-BMI2-LABEL: bzhi32_a3_load_indexzext:
225; X64-BMI2:       # %bb.0:
226; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
227; X64-BMI2-NEXT:    retq
228  %val = load i32, ptr %w
229  %conv = zext i8 %numlowbits to i32
230  %onebit = shl i32 1, %conv
231  %mask = add nsw i32 %onebit, -1
232  %masked = and i32 %mask, %val
233  ret i32 %masked
234}
235
236define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind {
237; X86-NOBMI-LABEL: bzhi32_a4_commutative:
238; X86-NOBMI:       # %bb.0:
239; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
240; X86-NOBMI-NEXT:    movl $1, %eax
241; X86-NOBMI-NEXT:    shll %cl, %eax
242; X86-NOBMI-NEXT:    decl %eax
243; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
244; X86-NOBMI-NEXT:    retl
245;
246; X86-BMI1-LABEL: bzhi32_a4_commutative:
247; X86-BMI1:       # %bb.0:
248; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
249; X86-BMI1-NEXT:    shll $8, %eax
250; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
251; X86-BMI1-NEXT:    retl
252;
253; X86-BMI2-LABEL: bzhi32_a4_commutative:
254; X86-BMI2:       # %bb.0:
255; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
256; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
257; X86-BMI2-NEXT:    retl
258;
259; X64-NOBMI-LABEL: bzhi32_a4_commutative:
260; X64-NOBMI:       # %bb.0:
261; X64-NOBMI-NEXT:    movl %esi, %ecx
262; X64-NOBMI-NEXT:    movl $1, %eax
263; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
264; X64-NOBMI-NEXT:    shll %cl, %eax
265; X64-NOBMI-NEXT:    decl %eax
266; X64-NOBMI-NEXT:    andl %edi, %eax
267; X64-NOBMI-NEXT:    retq
268;
269; X64-BMI1-LABEL: bzhi32_a4_commutative:
270; X64-BMI1:       # %bb.0:
271; X64-BMI1-NEXT:    shll $8, %esi
272; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
273; X64-BMI1-NEXT:    retq
274;
275; X64-BMI2-LABEL: bzhi32_a4_commutative:
276; X64-BMI2:       # %bb.0:
277; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
278; X64-BMI2-NEXT:    retq
279  %onebit = shl i32 1, %numlowbits
280  %mask = add nsw i32 %onebit, -1
281  %masked = and i32 %val, %mask ; swapped order
282  ret i32 %masked
283}
284
285; 64-bit
286
287define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind {
288; X86-NOBMI-LABEL: bzhi64_a0:
289; X86-NOBMI:       # %bb.0:
290; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
291; X86-NOBMI-NEXT:    movl $1, %eax
292; X86-NOBMI-NEXT:    xorl %edx, %edx
293; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
294; X86-NOBMI-NEXT:    shll %cl, %eax
295; X86-NOBMI-NEXT:    testb $32, %cl
296; X86-NOBMI-NEXT:    je .LBB5_2
297; X86-NOBMI-NEXT:  # %bb.1:
298; X86-NOBMI-NEXT:    movl %eax, %edx
299; X86-NOBMI-NEXT:    xorl %eax, %eax
300; X86-NOBMI-NEXT:  .LBB5_2:
301; X86-NOBMI-NEXT:    addl $-1, %eax
302; X86-NOBMI-NEXT:    adcl $-1, %edx
303; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
304; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
305; X86-NOBMI-NEXT:    retl
306;
307; X86-BMI1-LABEL: bzhi64_a0:
308; X86-BMI1:       # %bb.0:
309; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
310; X86-BMI1-NEXT:    movl $1, %eax
311; X86-BMI1-NEXT:    xorl %edx, %edx
312; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
313; X86-BMI1-NEXT:    shll %cl, %eax
314; X86-BMI1-NEXT:    testb $32, %cl
315; X86-BMI1-NEXT:    je .LBB5_2
316; X86-BMI1-NEXT:  # %bb.1:
317; X86-BMI1-NEXT:    movl %eax, %edx
318; X86-BMI1-NEXT:    xorl %eax, %eax
319; X86-BMI1-NEXT:  .LBB5_2:
320; X86-BMI1-NEXT:    addl $-1, %eax
321; X86-BMI1-NEXT:    adcl $-1, %edx
322; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
323; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
324; X86-BMI1-NEXT:    retl
325;
326; X86-BMI2-LABEL: bzhi64_a0:
327; X86-BMI2:       # %bb.0:
328; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
329; X86-BMI2-NEXT:    movl $1, %eax
330; X86-BMI2-NEXT:    xorl %edx, %edx
331; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
332; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
333; X86-BMI2-NEXT:    testb $32, %cl
334; X86-BMI2-NEXT:    je .LBB5_2
335; X86-BMI2-NEXT:  # %bb.1:
336; X86-BMI2-NEXT:    movl %eax, %edx
337; X86-BMI2-NEXT:    xorl %eax, %eax
338; X86-BMI2-NEXT:  .LBB5_2:
339; X86-BMI2-NEXT:    addl $-1, %eax
340; X86-BMI2-NEXT:    adcl $-1, %edx
341; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
342; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
343; X86-BMI2-NEXT:    retl
344;
345; X64-NOBMI-LABEL: bzhi64_a0:
346; X64-NOBMI:       # %bb.0:
347; X64-NOBMI-NEXT:    movq %rsi, %rcx
348; X64-NOBMI-NEXT:    movl $1, %eax
349; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
350; X64-NOBMI-NEXT:    shlq %cl, %rax
351; X64-NOBMI-NEXT:    decq %rax
352; X64-NOBMI-NEXT:    andq %rdi, %rax
353; X64-NOBMI-NEXT:    retq
354;
355; X64-BMI1-LABEL: bzhi64_a0:
356; X64-BMI1:       # %bb.0:
357; X64-BMI1-NEXT:    shll $8, %esi
358; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
359; X64-BMI1-NEXT:    retq
360;
361; X64-BMI2-LABEL: bzhi64_a0:
362; X64-BMI2:       # %bb.0:
363; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
364; X64-BMI2-NEXT:    retq
365  %onebit = shl i64 1, %numlowbits
366  %mask = add nsw i64 %onebit, -1
367  %masked = and i64 %mask, %val
368  ret i64 %masked
369}
370
371; Check that we don't throw away the vreg_width-1 mask if not using shifts
372define i64 @bzhi64_a0_masked(i64 %val, i64 %numlowbits) nounwind {
373; X86-NOBMI-LABEL: bzhi64_a0_masked:
374; X86-NOBMI:       # %bb.0:
375; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
376; X86-NOBMI-NEXT:    movl $1, %eax
377; X86-NOBMI-NEXT:    xorl %edx, %edx
378; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
379; X86-NOBMI-NEXT:    shll %cl, %eax
380; X86-NOBMI-NEXT:    testb $32, %cl
381; X86-NOBMI-NEXT:    je .LBB6_2
382; X86-NOBMI-NEXT:  # %bb.1:
383; X86-NOBMI-NEXT:    movl %eax, %edx
384; X86-NOBMI-NEXT:    xorl %eax, %eax
385; X86-NOBMI-NEXT:  .LBB6_2:
386; X86-NOBMI-NEXT:    addl $-1, %eax
387; X86-NOBMI-NEXT:    adcl $-1, %edx
388; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
389; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
390; X86-NOBMI-NEXT:    retl
391;
392; X86-BMI1-LABEL: bzhi64_a0_masked:
393; X86-BMI1:       # %bb.0:
394; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
395; X86-BMI1-NEXT:    movl $1, %eax
396; X86-BMI1-NEXT:    xorl %edx, %edx
397; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
398; X86-BMI1-NEXT:    shll %cl, %eax
399; X86-BMI1-NEXT:    testb $32, %cl
400; X86-BMI1-NEXT:    je .LBB6_2
401; X86-BMI1-NEXT:  # %bb.1:
402; X86-BMI1-NEXT:    movl %eax, %edx
403; X86-BMI1-NEXT:    xorl %eax, %eax
404; X86-BMI1-NEXT:  .LBB6_2:
405; X86-BMI1-NEXT:    addl $-1, %eax
406; X86-BMI1-NEXT:    adcl $-1, %edx
407; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
408; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
409; X86-BMI1-NEXT:    retl
410;
411; X86-BMI2-LABEL: bzhi64_a0_masked:
412; X86-BMI2:       # %bb.0:
413; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
414; X86-BMI2-NEXT:    movl $1, %eax
415; X86-BMI2-NEXT:    xorl %edx, %edx
416; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
417; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
418; X86-BMI2-NEXT:    testb $32, %cl
419; X86-BMI2-NEXT:    je .LBB6_2
420; X86-BMI2-NEXT:  # %bb.1:
421; X86-BMI2-NEXT:    movl %eax, %edx
422; X86-BMI2-NEXT:    xorl %eax, %eax
423; X86-BMI2-NEXT:  .LBB6_2:
424; X86-BMI2-NEXT:    addl $-1, %eax
425; X86-BMI2-NEXT:    adcl $-1, %edx
426; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
427; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
428; X86-BMI2-NEXT:    retl
429;
430; X64-NOBMI-LABEL: bzhi64_a0_masked:
431; X64-NOBMI:       # %bb.0:
432; X64-NOBMI-NEXT:    movq %rsi, %rcx
433; X64-NOBMI-NEXT:    movl $1, %eax
434; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
435; X64-NOBMI-NEXT:    shlq %cl, %rax
436; X64-NOBMI-NEXT:    decq %rax
437; X64-NOBMI-NEXT:    andq %rdi, %rax
438; X64-NOBMI-NEXT:    retq
439;
440; X64-BMI1-LABEL: bzhi64_a0_masked:
441; X64-BMI1:       # %bb.0:
442; X64-BMI1-NEXT:    andb $63, %sil
443; X64-BMI1-NEXT:    shll $8, %esi
444; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
445; X64-BMI1-NEXT:    retq
446;
447; X64-BMI2-LABEL: bzhi64_a0_masked:
448; X64-BMI2:       # %bb.0:
449; X64-BMI2-NEXT:    andb $63, %sil
450; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
451; X64-BMI2-NEXT:    retq
452  %numlowbits.masked = and i64 %numlowbits, 63
453  %onebit = shl i64 1, %numlowbits.masked
454  %mask = add nsw i64 %onebit, -1
455  %masked = and i64 %mask, %val
456  ret i64 %masked
457}
458
459define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
460; X86-NOBMI-LABEL: bzhi64_a1_indexzext:
461; X86-NOBMI:       # %bb.0:
462; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
463; X86-NOBMI-NEXT:    movl $1, %eax
464; X86-NOBMI-NEXT:    xorl %edx, %edx
465; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
466; X86-NOBMI-NEXT:    shll %cl, %eax
467; X86-NOBMI-NEXT:    testb $32, %cl
468; X86-NOBMI-NEXT:    je .LBB7_2
469; X86-NOBMI-NEXT:  # %bb.1:
470; X86-NOBMI-NEXT:    movl %eax, %edx
471; X86-NOBMI-NEXT:    xorl %eax, %eax
472; X86-NOBMI-NEXT:  .LBB7_2:
473; X86-NOBMI-NEXT:    addl $-1, %eax
474; X86-NOBMI-NEXT:    adcl $-1, %edx
475; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
476; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
477; X86-NOBMI-NEXT:    retl
478;
479; X86-BMI1-LABEL: bzhi64_a1_indexzext:
480; X86-BMI1:       # %bb.0:
481; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
482; X86-BMI1-NEXT:    movl $1, %eax
483; X86-BMI1-NEXT:    xorl %edx, %edx
484; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
485; X86-BMI1-NEXT:    shll %cl, %eax
486; X86-BMI1-NEXT:    testb $32, %cl
487; X86-BMI1-NEXT:    je .LBB7_2
488; X86-BMI1-NEXT:  # %bb.1:
489; X86-BMI1-NEXT:    movl %eax, %edx
490; X86-BMI1-NEXT:    xorl %eax, %eax
491; X86-BMI1-NEXT:  .LBB7_2:
492; X86-BMI1-NEXT:    addl $-1, %eax
493; X86-BMI1-NEXT:    adcl $-1, %edx
494; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
495; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
496; X86-BMI1-NEXT:    retl
497;
498; X86-BMI2-LABEL: bzhi64_a1_indexzext:
499; X86-BMI2:       # %bb.0:
500; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
501; X86-BMI2-NEXT:    movl $1, %eax
502; X86-BMI2-NEXT:    xorl %edx, %edx
503; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
504; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
505; X86-BMI2-NEXT:    testb $32, %cl
506; X86-BMI2-NEXT:    je .LBB7_2
507; X86-BMI2-NEXT:  # %bb.1:
508; X86-BMI2-NEXT:    movl %eax, %edx
509; X86-BMI2-NEXT:    xorl %eax, %eax
510; X86-BMI2-NEXT:  .LBB7_2:
511; X86-BMI2-NEXT:    addl $-1, %eax
512; X86-BMI2-NEXT:    adcl $-1, %edx
513; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
514; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
515; X86-BMI2-NEXT:    retl
516;
517; X64-NOBMI-LABEL: bzhi64_a1_indexzext:
518; X64-NOBMI:       # %bb.0:
519; X64-NOBMI-NEXT:    movl %esi, %ecx
520; X64-NOBMI-NEXT:    movl $1, %eax
521; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
522; X64-NOBMI-NEXT:    shlq %cl, %rax
523; X64-NOBMI-NEXT:    decq %rax
524; X64-NOBMI-NEXT:    andq %rdi, %rax
525; X64-NOBMI-NEXT:    retq
526;
527; X64-BMI1-LABEL: bzhi64_a1_indexzext:
528; X64-BMI1:       # %bb.0:
529; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
530; X64-BMI1-NEXT:    shll $8, %esi
531; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
532; X64-BMI1-NEXT:    retq
533;
534; X64-BMI2-LABEL: bzhi64_a1_indexzext:
535; X64-BMI2:       # %bb.0:
536; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
537; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
538; X64-BMI2-NEXT:    retq
539  %conv = zext i8 %numlowbits to i64
540  %onebit = shl i64 1, %conv
541  %mask = add nsw i64 %onebit, -1
542  %masked = and i64 %mask, %val
543  ret i64 %masked
544}
545
546define i64 @bzhi64_a2_load(ptr %w, i64 %numlowbits) nounwind {
547; X86-NOBMI-LABEL: bzhi64_a2_load:
548; X86-NOBMI:       # %bb.0:
549; X86-NOBMI-NEXT:    pushl %esi
550; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
551; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
552; X86-NOBMI-NEXT:    movl $1, %eax
553; X86-NOBMI-NEXT:    xorl %edx, %edx
554; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
555; X86-NOBMI-NEXT:    shll %cl, %eax
556; X86-NOBMI-NEXT:    testb $32, %cl
557; X86-NOBMI-NEXT:    je .LBB8_2
558; X86-NOBMI-NEXT:  # %bb.1:
559; X86-NOBMI-NEXT:    movl %eax, %edx
560; X86-NOBMI-NEXT:    xorl %eax, %eax
561; X86-NOBMI-NEXT:  .LBB8_2:
562; X86-NOBMI-NEXT:    addl $-1, %eax
563; X86-NOBMI-NEXT:    adcl $-1, %edx
564; X86-NOBMI-NEXT:    andl 4(%esi), %edx
565; X86-NOBMI-NEXT:    andl (%esi), %eax
566; X86-NOBMI-NEXT:    popl %esi
567; X86-NOBMI-NEXT:    retl
568;
569; X86-BMI1-LABEL: bzhi64_a2_load:
570; X86-BMI1:       # %bb.0:
571; X86-BMI1-NEXT:    pushl %esi
572; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
573; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
574; X86-BMI1-NEXT:    movl $1, %eax
575; X86-BMI1-NEXT:    xorl %edx, %edx
576; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
577; X86-BMI1-NEXT:    shll %cl, %eax
578; X86-BMI1-NEXT:    testb $32, %cl
579; X86-BMI1-NEXT:    je .LBB8_2
580; X86-BMI1-NEXT:  # %bb.1:
581; X86-BMI1-NEXT:    movl %eax, %edx
582; X86-BMI1-NEXT:    xorl %eax, %eax
583; X86-BMI1-NEXT:  .LBB8_2:
584; X86-BMI1-NEXT:    addl $-1, %eax
585; X86-BMI1-NEXT:    adcl $-1, %edx
586; X86-BMI1-NEXT:    andl 4(%esi), %edx
587; X86-BMI1-NEXT:    andl (%esi), %eax
588; X86-BMI1-NEXT:    popl %esi
589; X86-BMI1-NEXT:    retl
590;
591; X86-BMI2-LABEL: bzhi64_a2_load:
592; X86-BMI2:       # %bb.0:
593; X86-BMI2-NEXT:    pushl %esi
594; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
595; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
596; X86-BMI2-NEXT:    movl $1, %eax
597; X86-BMI2-NEXT:    xorl %edx, %edx
598; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
599; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
600; X86-BMI2-NEXT:    testb $32, %cl
601; X86-BMI2-NEXT:    je .LBB8_2
602; X86-BMI2-NEXT:  # %bb.1:
603; X86-BMI2-NEXT:    movl %eax, %edx
604; X86-BMI2-NEXT:    xorl %eax, %eax
605; X86-BMI2-NEXT:  .LBB8_2:
606; X86-BMI2-NEXT:    addl $-1, %eax
607; X86-BMI2-NEXT:    adcl $-1, %edx
608; X86-BMI2-NEXT:    andl 4(%esi), %edx
609; X86-BMI2-NEXT:    andl (%esi), %eax
610; X86-BMI2-NEXT:    popl %esi
611; X86-BMI2-NEXT:    retl
612;
613; X64-NOBMI-LABEL: bzhi64_a2_load:
614; X64-NOBMI:       # %bb.0:
615; X64-NOBMI-NEXT:    movq %rsi, %rcx
616; X64-NOBMI-NEXT:    movl $1, %eax
617; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
618; X64-NOBMI-NEXT:    shlq %cl, %rax
619; X64-NOBMI-NEXT:    decq %rax
620; X64-NOBMI-NEXT:    andq (%rdi), %rax
621; X64-NOBMI-NEXT:    retq
622;
623; X64-BMI1-LABEL: bzhi64_a2_load:
624; X64-BMI1:       # %bb.0:
625; X64-BMI1-NEXT:    shll $8, %esi
626; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
627; X64-BMI1-NEXT:    retq
628;
629; X64-BMI2-LABEL: bzhi64_a2_load:
630; X64-BMI2:       # %bb.0:
631; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
632; X64-BMI2-NEXT:    retq
633  %val = load i64, ptr %w
634  %onebit = shl i64 1, %numlowbits
635  %mask = add nsw i64 %onebit, -1
636  %masked = and i64 %mask, %val
637  ret i64 %masked
638}
639
640define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
641; X86-NOBMI-LABEL: bzhi64_a3_load_indexzext:
642; X86-NOBMI:       # %bb.0:
643; X86-NOBMI-NEXT:    pushl %esi
644; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
645; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
646; X86-NOBMI-NEXT:    movl $1, %eax
647; X86-NOBMI-NEXT:    xorl %edx, %edx
648; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
649; X86-NOBMI-NEXT:    shll %cl, %eax
650; X86-NOBMI-NEXT:    testb $32, %cl
651; X86-NOBMI-NEXT:    je .LBB9_2
652; X86-NOBMI-NEXT:  # %bb.1:
653; X86-NOBMI-NEXT:    movl %eax, %edx
654; X86-NOBMI-NEXT:    xorl %eax, %eax
655; X86-NOBMI-NEXT:  .LBB9_2:
656; X86-NOBMI-NEXT:    addl $-1, %eax
657; X86-NOBMI-NEXT:    adcl $-1, %edx
658; X86-NOBMI-NEXT:    andl 4(%esi), %edx
659; X86-NOBMI-NEXT:    andl (%esi), %eax
660; X86-NOBMI-NEXT:    popl %esi
661; X86-NOBMI-NEXT:    retl
662;
663; X86-BMI1-LABEL: bzhi64_a3_load_indexzext:
664; X86-BMI1:       # %bb.0:
665; X86-BMI1-NEXT:    pushl %esi
666; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
667; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
668; X86-BMI1-NEXT:    movl $1, %eax
669; X86-BMI1-NEXT:    xorl %edx, %edx
670; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
671; X86-BMI1-NEXT:    shll %cl, %eax
672; X86-BMI1-NEXT:    testb $32, %cl
673; X86-BMI1-NEXT:    je .LBB9_2
674; X86-BMI1-NEXT:  # %bb.1:
675; X86-BMI1-NEXT:    movl %eax, %edx
676; X86-BMI1-NEXT:    xorl %eax, %eax
677; X86-BMI1-NEXT:  .LBB9_2:
678; X86-BMI1-NEXT:    addl $-1, %eax
679; X86-BMI1-NEXT:    adcl $-1, %edx
680; X86-BMI1-NEXT:    andl 4(%esi), %edx
681; X86-BMI1-NEXT:    andl (%esi), %eax
682; X86-BMI1-NEXT:    popl %esi
683; X86-BMI1-NEXT:    retl
684;
685; X86-BMI2-LABEL: bzhi64_a3_load_indexzext:
686; X86-BMI2:       # %bb.0:
687; X86-BMI2-NEXT:    pushl %esi
688; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
689; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
690; X86-BMI2-NEXT:    movl $1, %eax
691; X86-BMI2-NEXT:    xorl %edx, %edx
692; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
693; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
694; X86-BMI2-NEXT:    testb $32, %cl
695; X86-BMI2-NEXT:    je .LBB9_2
696; X86-BMI2-NEXT:  # %bb.1:
697; X86-BMI2-NEXT:    movl %eax, %edx
698; X86-BMI2-NEXT:    xorl %eax, %eax
699; X86-BMI2-NEXT:  .LBB9_2:
700; X86-BMI2-NEXT:    addl $-1, %eax
701; X86-BMI2-NEXT:    adcl $-1, %edx
702; X86-BMI2-NEXT:    andl 4(%esi), %edx
703; X86-BMI2-NEXT:    andl (%esi), %eax
704; X86-BMI2-NEXT:    popl %esi
705; X86-BMI2-NEXT:    retl
706;
707; X64-NOBMI-LABEL: bzhi64_a3_load_indexzext:
708; X64-NOBMI:       # %bb.0:
709; X64-NOBMI-NEXT:    movl %esi, %ecx
710; X64-NOBMI-NEXT:    movl $1, %eax
711; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
712; X64-NOBMI-NEXT:    shlq %cl, %rax
713; X64-NOBMI-NEXT:    decq %rax
714; X64-NOBMI-NEXT:    andq (%rdi), %rax
715; X64-NOBMI-NEXT:    retq
716;
717; X64-BMI1-LABEL: bzhi64_a3_load_indexzext:
718; X64-BMI1:       # %bb.0:
719; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
720; X64-BMI1-NEXT:    shll $8, %esi
721; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
722; X64-BMI1-NEXT:    retq
723;
724; X64-BMI2-LABEL: bzhi64_a3_load_indexzext:
725; X64-BMI2:       # %bb.0:
726; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
727; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
728; X64-BMI2-NEXT:    retq
729  %val = load i64, ptr %w
730  %conv = zext i8 %numlowbits to i64
731  %onebit = shl i64 1, %conv
732  %mask = add nsw i64 %onebit, -1
733  %masked = and i64 %mask, %val
734  ret i64 %masked
735}
736
737define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind {
738; X86-NOBMI-LABEL: bzhi64_a4_commutative:
739; X86-NOBMI:       # %bb.0:
740; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
741; X86-NOBMI-NEXT:    movl $1, %eax
742; X86-NOBMI-NEXT:    xorl %edx, %edx
743; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
744; X86-NOBMI-NEXT:    shll %cl, %eax
745; X86-NOBMI-NEXT:    testb $32, %cl
746; X86-NOBMI-NEXT:    je .LBB10_2
747; X86-NOBMI-NEXT:  # %bb.1:
748; X86-NOBMI-NEXT:    movl %eax, %edx
749; X86-NOBMI-NEXT:    xorl %eax, %eax
750; X86-NOBMI-NEXT:  .LBB10_2:
751; X86-NOBMI-NEXT:    addl $-1, %eax
752; X86-NOBMI-NEXT:    adcl $-1, %edx
753; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
754; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
755; X86-NOBMI-NEXT:    retl
756;
757; X86-BMI1-LABEL: bzhi64_a4_commutative:
758; X86-BMI1:       # %bb.0:
759; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
760; X86-BMI1-NEXT:    movl $1, %eax
761; X86-BMI1-NEXT:    xorl %edx, %edx
762; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
763; X86-BMI1-NEXT:    shll %cl, %eax
764; X86-BMI1-NEXT:    testb $32, %cl
765; X86-BMI1-NEXT:    je .LBB10_2
766; X86-BMI1-NEXT:  # %bb.1:
767; X86-BMI1-NEXT:    movl %eax, %edx
768; X86-BMI1-NEXT:    xorl %eax, %eax
769; X86-BMI1-NEXT:  .LBB10_2:
770; X86-BMI1-NEXT:    addl $-1, %eax
771; X86-BMI1-NEXT:    adcl $-1, %edx
772; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
773; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
774; X86-BMI1-NEXT:    retl
775;
776; X86-BMI2-LABEL: bzhi64_a4_commutative:
777; X86-BMI2:       # %bb.0:
778; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
779; X86-BMI2-NEXT:    movl $1, %eax
780; X86-BMI2-NEXT:    xorl %edx, %edx
781; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
782; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
783; X86-BMI2-NEXT:    testb $32, %cl
784; X86-BMI2-NEXT:    je .LBB10_2
785; X86-BMI2-NEXT:  # %bb.1:
786; X86-BMI2-NEXT:    movl %eax, %edx
787; X86-BMI2-NEXT:    xorl %eax, %eax
788; X86-BMI2-NEXT:  .LBB10_2:
789; X86-BMI2-NEXT:    addl $-1, %eax
790; X86-BMI2-NEXT:    adcl $-1, %edx
791; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
792; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
793; X86-BMI2-NEXT:    retl
794;
795; X64-NOBMI-LABEL: bzhi64_a4_commutative:
796; X64-NOBMI:       # %bb.0:
797; X64-NOBMI-NEXT:    movq %rsi, %rcx
798; X64-NOBMI-NEXT:    movl $1, %eax
799; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
800; X64-NOBMI-NEXT:    shlq %cl, %rax
801; X64-NOBMI-NEXT:    decq %rax
802; X64-NOBMI-NEXT:    andq %rdi, %rax
803; X64-NOBMI-NEXT:    retq
804;
805; X64-BMI1-LABEL: bzhi64_a4_commutative:
806; X64-BMI1:       # %bb.0:
807; X64-BMI1-NEXT:    shll $8, %esi
808; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
809; X64-BMI1-NEXT:    retq
810;
811; X64-BMI2-LABEL: bzhi64_a4_commutative:
812; X64-BMI2:       # %bb.0:
813; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
814; X64-BMI2-NEXT:    retq
815  %onebit = shl i64 1, %numlowbits
816  %mask = add nsw i64 %onebit, -1
817  %masked = and i64 %val, %mask ; swapped order
818  ret i64 %masked
819}
820
821; 64-bit, but with 32-bit output
822
823; Everything done in 64-bit, truncation happens last.
824define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind {
825; X86-NOBMI-LABEL: bzhi64_32_a0:
826; X86-NOBMI:       # %bb.0:
827; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
828; X86-NOBMI-NEXT:    movl $1, %edx
829; X86-NOBMI-NEXT:    shll %cl, %edx
830; X86-NOBMI-NEXT:    xorl %eax, %eax
831; X86-NOBMI-NEXT:    testb $32, %cl
832; X86-NOBMI-NEXT:    jne .LBB11_2
833; X86-NOBMI-NEXT:  # %bb.1:
834; X86-NOBMI-NEXT:    movl %edx, %eax
835; X86-NOBMI-NEXT:  .LBB11_2:
836; X86-NOBMI-NEXT:    decl %eax
837; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
838; X86-NOBMI-NEXT:    retl
839;
840; X86-BMI1-LABEL: bzhi64_32_a0:
841; X86-BMI1:       # %bb.0:
842; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
843; X86-BMI1-NEXT:    movl $1, %edx
844; X86-BMI1-NEXT:    shll %cl, %edx
845; X86-BMI1-NEXT:    xorl %eax, %eax
846; X86-BMI1-NEXT:    testb $32, %cl
847; X86-BMI1-NEXT:    jne .LBB11_2
848; X86-BMI1-NEXT:  # %bb.1:
849; X86-BMI1-NEXT:    movl %edx, %eax
850; X86-BMI1-NEXT:  .LBB11_2:
851; X86-BMI1-NEXT:    decl %eax
852; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
853; X86-BMI1-NEXT:    retl
854;
855; X86-BMI2-LABEL: bzhi64_32_a0:
856; X86-BMI2:       # %bb.0:
857; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
858; X86-BMI2-NEXT:    xorl %eax, %eax
859; X86-BMI2-NEXT:    testb $32, %cl
860; X86-BMI2-NEXT:    jne .LBB11_2
861; X86-BMI2-NEXT:  # %bb.1:
862; X86-BMI2-NEXT:    movl $1, %eax
863; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
864; X86-BMI2-NEXT:  .LBB11_2:
865; X86-BMI2-NEXT:    decl %eax
866; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
867; X86-BMI2-NEXT:    retl
868;
869; X64-NOBMI-LABEL: bzhi64_32_a0:
870; X64-NOBMI:       # %bb.0:
871; X64-NOBMI-NEXT:    movq %rsi, %rcx
872; X64-NOBMI-NEXT:    movl $1, %eax
873; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
874; X64-NOBMI-NEXT:    shlq %cl, %rax
875; X64-NOBMI-NEXT:    decl %eax
876; X64-NOBMI-NEXT:    andl %edi, %eax
877; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
878; X64-NOBMI-NEXT:    retq
879;
880; X64-BMI1-LABEL: bzhi64_32_a0:
881; X64-BMI1:       # %bb.0:
882; X64-BMI1-NEXT:    shll $8, %esi
883; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
884; X64-BMI1-NEXT:    retq
885;
886; X64-BMI2-LABEL: bzhi64_32_a0:
887; X64-BMI2:       # %bb.0:
888; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
889; X64-BMI2-NEXT:    retq
890  %onebit = shl i64 1, %numlowbits
891  %mask = add nsw i64 %onebit, -1
892  %masked = and i64 %mask, %val
893  %res = trunc i64 %masked to i32
894  ret i32 %res
895}
896
897; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
898define i32 @bzhi64_32_a1(i64 %val, i32 %numlowbits) nounwind {
899; X86-NOBMI-LABEL: bzhi64_32_a1:
900; X86-NOBMI:       # %bb.0:
901; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
902; X86-NOBMI-NEXT:    movl $1, %eax
903; X86-NOBMI-NEXT:    shll %cl, %eax
904; X86-NOBMI-NEXT:    decl %eax
905; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
906; X86-NOBMI-NEXT:    retl
907;
908; X86-BMI1-LABEL: bzhi64_32_a1:
909; X86-BMI1:       # %bb.0:
910; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
911; X86-BMI1-NEXT:    shll $8, %eax
912; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
913; X86-BMI1-NEXT:    retl
914;
915; X86-BMI2-LABEL: bzhi64_32_a1:
916; X86-BMI2:       # %bb.0:
917; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
918; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
919; X86-BMI2-NEXT:    retl
920;
921; X64-NOBMI-LABEL: bzhi64_32_a1:
922; X64-NOBMI:       # %bb.0:
923; X64-NOBMI-NEXT:    movl %esi, %ecx
924; X64-NOBMI-NEXT:    movl $1, %eax
925; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
926; X64-NOBMI-NEXT:    shll %cl, %eax
927; X64-NOBMI-NEXT:    decl %eax
928; X64-NOBMI-NEXT:    andl %edi, %eax
929; X64-NOBMI-NEXT:    retq
930;
931; X64-BMI1-LABEL: bzhi64_32_a1:
932; X64-BMI1:       # %bb.0:
933; X64-BMI1-NEXT:    shll $8, %esi
934; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
935; X64-BMI1-NEXT:    retq
936;
937; X64-BMI2-LABEL: bzhi64_32_a1:
938; X64-BMI2:       # %bb.0:
939; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
940; X64-BMI2-NEXT:    retq
941  %truncval = trunc i64 %val to i32
942  %onebit = shl i32 1, %numlowbits
943  %mask = add nsw i32 %onebit, -1
944  %masked = and i32 %mask, %truncval
945  ret i32 %masked
946}
947
948; Shifting happens in 64-bit, then truncation (with extra use).
949; Masking is 32-bit.
950define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits, ptr %escape) nounwind {
951; X86-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause:
952; X86-NOBMI:       # %bb.0:
953; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
954; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
955; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
956; X86-NOBMI-NEXT:    movl %edx, (%eax)
957; X86-NOBMI-NEXT:    movl $1, %eax
958; X86-NOBMI-NEXT:    shll %cl, %eax
959; X86-NOBMI-NEXT:    decl %eax
960; X86-NOBMI-NEXT:    andl %edx, %eax
961; X86-NOBMI-NEXT:    retl
962;
963; X86-BMI1-LABEL: bzhi64_32_a1_trunc_extrause:
964; X86-BMI1:       # %bb.0:
965; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
966; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
967; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
968; X86-BMI1-NEXT:    movl %ecx, (%edx)
969; X86-BMI1-NEXT:    shll $8, %eax
970; X86-BMI1-NEXT:    bextrl %eax, %ecx, %eax
971; X86-BMI1-NEXT:    retl
972;
973; X86-BMI2-LABEL: bzhi64_32_a1_trunc_extrause:
974; X86-BMI2:       # %bb.0:
975; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
976; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
977; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
978; X86-BMI2-NEXT:    movl %ecx, (%edx)
979; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
980; X86-BMI2-NEXT:    retl
981;
982; X64-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause:
983; X64-NOBMI:       # %bb.0:
984; X64-NOBMI-NEXT:    movl %esi, %ecx
985; X64-NOBMI-NEXT:    movl %edi, (%rdx)
986; X64-NOBMI-NEXT:    movl $1, %eax
987; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
988; X64-NOBMI-NEXT:    shll %cl, %eax
989; X64-NOBMI-NEXT:    decl %eax
990; X64-NOBMI-NEXT:    andl %edi, %eax
991; X64-NOBMI-NEXT:    retq
992;
993; X64-BMI1-LABEL: bzhi64_32_a1_trunc_extrause:
994; X64-BMI1:       # %bb.0:
995; X64-BMI1-NEXT:    movl %edi, (%rdx)
996; X64-BMI1-NEXT:    shll $8, %esi
997; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
998; X64-BMI1-NEXT:    retq
999;
1000; X64-BMI2-LABEL: bzhi64_32_a1_trunc_extrause:
1001; X64-BMI2:       # %bb.0:
1002; X64-BMI2-NEXT:    movl %edi, (%rdx)
1003; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1004; X64-BMI2-NEXT:    retq
1005  %truncval = trunc i64 %val to i32
1006  store i32 %truncval, ptr %escape
1007  %onebit = shl i32 1, %numlowbits
1008  %mask = add nsw i32 %onebit, -1
1009  %masked = and i32 %mask, %truncval
1010  ret i32 %masked
1011}
1012
1013; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit.
1014; Masking is 64-bit. Then truncation.
1015define i32 @bzhi64_32_a2(i64 %val, i32 %numlowbits) nounwind {
1016; X86-NOBMI-LABEL: bzhi64_32_a2:
1017; X86-NOBMI:       # %bb.0:
1018; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1019; X86-NOBMI-NEXT:    movl $1, %eax
1020; X86-NOBMI-NEXT:    shll %cl, %eax
1021; X86-NOBMI-NEXT:    decl %eax
1022; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1023; X86-NOBMI-NEXT:    retl
1024;
1025; X86-BMI1-LABEL: bzhi64_32_a2:
1026; X86-BMI1:       # %bb.0:
1027; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1028; X86-BMI1-NEXT:    shll $8, %eax
1029; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
1030; X86-BMI1-NEXT:    retl
1031;
1032; X86-BMI2-LABEL: bzhi64_32_a2:
1033; X86-BMI2:       # %bb.0:
1034; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1035; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
1036; X86-BMI2-NEXT:    retl
1037;
1038; X64-NOBMI-LABEL: bzhi64_32_a2:
1039; X64-NOBMI:       # %bb.0:
1040; X64-NOBMI-NEXT:    movl %esi, %ecx
1041; X64-NOBMI-NEXT:    movl $1, %eax
1042; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1043; X64-NOBMI-NEXT:    shll %cl, %eax
1044; X64-NOBMI-NEXT:    decl %eax
1045; X64-NOBMI-NEXT:    andl %edi, %eax
1046; X64-NOBMI-NEXT:    retq
1047;
1048; X64-BMI1-LABEL: bzhi64_32_a2:
1049; X64-BMI1:       # %bb.0:
1050; X64-BMI1-NEXT:    shll $8, %esi
1051; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1052; X64-BMI1-NEXT:    retq
1053;
1054; X64-BMI2-LABEL: bzhi64_32_a2:
1055; X64-BMI2:       # %bb.0:
1056; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1057; X64-BMI2-NEXT:    retq
1058  %onebit = shl i32 1, %numlowbits
1059  %mask = add nsw i32 %onebit, -1
1060  %zextmask = zext i32 %mask to i64
1061  %masked = and i64 %zextmask, %val
1062  %truncmasked = trunc i64 %masked to i32
1063  ret i32 %truncmasked
1064}
1065
1066; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit.
1067; Masking is 64-bit. Then truncation.
1068define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind {
1069; X86-NOBMI-LABEL: bzhi64_32_a3:
1070; X86-NOBMI:       # %bb.0:
1071; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1072; X86-NOBMI-NEXT:    movl $1, %edx
1073; X86-NOBMI-NEXT:    shll %cl, %edx
1074; X86-NOBMI-NEXT:    xorl %eax, %eax
1075; X86-NOBMI-NEXT:    testb $32, %cl
1076; X86-NOBMI-NEXT:    jne .LBB15_2
1077; X86-NOBMI-NEXT:  # %bb.1:
1078; X86-NOBMI-NEXT:    movl %edx, %eax
1079; X86-NOBMI-NEXT:  .LBB15_2:
1080; X86-NOBMI-NEXT:    decl %eax
1081; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1082; X86-NOBMI-NEXT:    retl
1083;
1084; X86-BMI1-LABEL: bzhi64_32_a3:
1085; X86-BMI1:       # %bb.0:
1086; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1087; X86-BMI1-NEXT:    movl $1, %edx
1088; X86-BMI1-NEXT:    shll %cl, %edx
1089; X86-BMI1-NEXT:    xorl %eax, %eax
1090; X86-BMI1-NEXT:    testb $32, %cl
1091; X86-BMI1-NEXT:    jne .LBB15_2
1092; X86-BMI1-NEXT:  # %bb.1:
1093; X86-BMI1-NEXT:    movl %edx, %eax
1094; X86-BMI1-NEXT:  .LBB15_2:
1095; X86-BMI1-NEXT:    decl %eax
1096; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
1097; X86-BMI1-NEXT:    retl
1098;
1099; X86-BMI2-LABEL: bzhi64_32_a3:
1100; X86-BMI2:       # %bb.0:
1101; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1102; X86-BMI2-NEXT:    xorl %eax, %eax
1103; X86-BMI2-NEXT:    testb $32, %cl
1104; X86-BMI2-NEXT:    jne .LBB15_2
1105; X86-BMI2-NEXT:  # %bb.1:
1106; X86-BMI2-NEXT:    movl $1, %eax
1107; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
1108; X86-BMI2-NEXT:  .LBB15_2:
1109; X86-BMI2-NEXT:    decl %eax
1110; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
1111; X86-BMI2-NEXT:    retl
1112;
1113; X64-NOBMI-LABEL: bzhi64_32_a3:
1114; X64-NOBMI:       # %bb.0:
1115; X64-NOBMI-NEXT:    movq %rsi, %rcx
1116; X64-NOBMI-NEXT:    movl $1, %eax
1117; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
1118; X64-NOBMI-NEXT:    shlq %cl, %rax
1119; X64-NOBMI-NEXT:    decl %eax
1120; X64-NOBMI-NEXT:    andl %edi, %eax
1121; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
1122; X64-NOBMI-NEXT:    retq
1123;
1124; X64-BMI1-LABEL: bzhi64_32_a3:
1125; X64-BMI1:       # %bb.0:
1126; X64-BMI1-NEXT:    shll $8, %esi
1127; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1128; X64-BMI1-NEXT:    retq
1129;
1130; X64-BMI2-LABEL: bzhi64_32_a3:
1131; X64-BMI2:       # %bb.0:
1132; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1133; X64-BMI2-NEXT:    retq
1134  %onebit = shl i64 1, %numlowbits
1135  %mask = add nsw i64 %onebit, 4294967295
1136  %masked = and i64 %mask, %val
1137  %truncmasked = trunc i64 %masked to i32
1138  ret i32 %truncmasked
1139}
1140
1141; ---------------------------------------------------------------------------- ;
1142; Pattern b. 32-bit
1143; ---------------------------------------------------------------------------- ;
1144
1145define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
1146; X86-NOBMI-LABEL: bzhi32_b0:
1147; X86-NOBMI:       # %bb.0:
1148; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1149; X86-NOBMI-NEXT:    movl $-1, %eax
1150; X86-NOBMI-NEXT:    shll %cl, %eax
1151; X86-NOBMI-NEXT:    notl %eax
1152; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1153; X86-NOBMI-NEXT:    retl
1154;
1155; X86-BMI1-LABEL: bzhi32_b0:
1156; X86-BMI1:       # %bb.0:
1157; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1158; X86-BMI1-NEXT:    shll $8, %eax
1159; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
1160; X86-BMI1-NEXT:    retl
1161;
1162; X86-BMI2-LABEL: bzhi32_b0:
1163; X86-BMI2:       # %bb.0:
1164; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1165; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
1166; X86-BMI2-NEXT:    retl
1167;
1168; X64-NOBMI-LABEL: bzhi32_b0:
1169; X64-NOBMI:       # %bb.0:
1170; X64-NOBMI-NEXT:    movl %esi, %ecx
1171; X64-NOBMI-NEXT:    movl $-1, %eax
1172; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1173; X64-NOBMI-NEXT:    shll %cl, %eax
1174; X64-NOBMI-NEXT:    notl %eax
1175; X64-NOBMI-NEXT:    andl %edi, %eax
1176; X64-NOBMI-NEXT:    retq
1177;
1178; X64-BMI1-LABEL: bzhi32_b0:
1179; X64-BMI1:       # %bb.0:
1180; X64-BMI1-NEXT:    shll $8, %esi
1181; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1182; X64-BMI1-NEXT:    retq
1183;
1184; X64-BMI2-LABEL: bzhi32_b0:
1185; X64-BMI2:       # %bb.0:
1186; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1187; X64-BMI2-NEXT:    retq
1188  %notmask = shl i32 -1, %numlowbits
1189  %mask = xor i32 %notmask, -1
1190  %masked = and i32 %mask, %val
1191  ret i32 %masked
1192}
1193
1194define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
1195; X86-NOBMI-LABEL: bzhi32_b1_indexzext:
1196; X86-NOBMI:       # %bb.0:
1197; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1198; X86-NOBMI-NEXT:    movl $-1, %eax
1199; X86-NOBMI-NEXT:    shll %cl, %eax
1200; X86-NOBMI-NEXT:    notl %eax
1201; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1202; X86-NOBMI-NEXT:    retl
1203;
1204; X86-BMI1-LABEL: bzhi32_b1_indexzext:
1205; X86-BMI1:       # %bb.0:
1206; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1207; X86-BMI1-NEXT:    shll $8, %eax
1208; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
1209; X86-BMI1-NEXT:    retl
1210;
1211; X86-BMI2-LABEL: bzhi32_b1_indexzext:
1212; X86-BMI2:       # %bb.0:
1213; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1214; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
1215; X86-BMI2-NEXT:    retl
1216;
1217; X64-NOBMI-LABEL: bzhi32_b1_indexzext:
1218; X64-NOBMI:       # %bb.0:
1219; X64-NOBMI-NEXT:    movl %esi, %ecx
1220; X64-NOBMI-NEXT:    movl $-1, %eax
1221; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1222; X64-NOBMI-NEXT:    shll %cl, %eax
1223; X64-NOBMI-NEXT:    notl %eax
1224; X64-NOBMI-NEXT:    andl %edi, %eax
1225; X64-NOBMI-NEXT:    retq
1226;
1227; X64-BMI1-LABEL: bzhi32_b1_indexzext:
1228; X64-BMI1:       # %bb.0:
1229; X64-BMI1-NEXT:    shll $8, %esi
1230; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1231; X64-BMI1-NEXT:    retq
1232;
1233; X64-BMI2-LABEL: bzhi32_b1_indexzext:
1234; X64-BMI2:       # %bb.0:
1235; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1236; X64-BMI2-NEXT:    retq
1237  %conv = zext i8 %numlowbits to i32
1238  %notmask = shl i32 -1, %conv
1239  %mask = xor i32 %notmask, -1
1240  %masked = and i32 %mask, %val
1241  ret i32 %masked
1242}
1243
1244define i32 @bzhi32_b2_load(ptr %w, i32 %numlowbits) nounwind {
1245; X86-NOBMI-LABEL: bzhi32_b2_load:
1246; X86-NOBMI:       # %bb.0:
1247; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
1248; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1249; X86-NOBMI-NEXT:    movl $-1, %eax
1250; X86-NOBMI-NEXT:    shll %cl, %eax
1251; X86-NOBMI-NEXT:    notl %eax
1252; X86-NOBMI-NEXT:    andl (%edx), %eax
1253; X86-NOBMI-NEXT:    retl
1254;
1255; X86-BMI1-LABEL: bzhi32_b2_load:
1256; X86-BMI1:       # %bb.0:
1257; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
1258; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1259; X86-BMI1-NEXT:    shll $8, %ecx
1260; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
1261; X86-BMI1-NEXT:    retl
1262;
1263; X86-BMI2-LABEL: bzhi32_b2_load:
1264; X86-BMI2:       # %bb.0:
1265; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1266; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1267; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
1268; X86-BMI2-NEXT:    retl
1269;
1270; X64-NOBMI-LABEL: bzhi32_b2_load:
1271; X64-NOBMI:       # %bb.0:
1272; X64-NOBMI-NEXT:    movl %esi, %ecx
1273; X64-NOBMI-NEXT:    movl $-1, %eax
1274; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1275; X64-NOBMI-NEXT:    shll %cl, %eax
1276; X64-NOBMI-NEXT:    notl %eax
1277; X64-NOBMI-NEXT:    andl (%rdi), %eax
1278; X64-NOBMI-NEXT:    retq
1279;
1280; X64-BMI1-LABEL: bzhi32_b2_load:
1281; X64-BMI1:       # %bb.0:
1282; X64-BMI1-NEXT:    shll $8, %esi
1283; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
1284; X64-BMI1-NEXT:    retq
1285;
1286; X64-BMI2-LABEL: bzhi32_b2_load:
1287; X64-BMI2:       # %bb.0:
1288; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
1289; X64-BMI2-NEXT:    retq
1290  %val = load i32, ptr %w
1291  %notmask = shl i32 -1, %numlowbits
1292  %mask = xor i32 %notmask, -1
1293  %masked = and i32 %mask, %val
1294  ret i32 %masked
1295}
1296
1297define i32 @bzhi32_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
1298; X86-NOBMI-LABEL: bzhi32_b3_load_indexzext:
1299; X86-NOBMI:       # %bb.0:
1300; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
1301; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1302; X86-NOBMI-NEXT:    movl $-1, %eax
1303; X86-NOBMI-NEXT:    shll %cl, %eax
1304; X86-NOBMI-NEXT:    notl %eax
1305; X86-NOBMI-NEXT:    andl (%edx), %eax
1306; X86-NOBMI-NEXT:    retl
1307;
1308; X86-BMI1-LABEL: bzhi32_b3_load_indexzext:
1309; X86-BMI1:       # %bb.0:
1310; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
1311; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1312; X86-BMI1-NEXT:    shll $8, %ecx
1313; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
1314; X86-BMI1-NEXT:    retl
1315;
1316; X86-BMI2-LABEL: bzhi32_b3_load_indexzext:
1317; X86-BMI2:       # %bb.0:
1318; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1319; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1320; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
1321; X86-BMI2-NEXT:    retl
1322;
1323; X64-NOBMI-LABEL: bzhi32_b3_load_indexzext:
1324; X64-NOBMI:       # %bb.0:
1325; X64-NOBMI-NEXT:    movl %esi, %ecx
1326; X64-NOBMI-NEXT:    movl $-1, %eax
1327; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1328; X64-NOBMI-NEXT:    shll %cl, %eax
1329; X64-NOBMI-NEXT:    notl %eax
1330; X64-NOBMI-NEXT:    andl (%rdi), %eax
1331; X64-NOBMI-NEXT:    retq
1332;
1333; X64-BMI1-LABEL: bzhi32_b3_load_indexzext:
1334; X64-BMI1:       # %bb.0:
1335; X64-BMI1-NEXT:    shll $8, %esi
1336; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
1337; X64-BMI1-NEXT:    retq
1338;
1339; X64-BMI2-LABEL: bzhi32_b3_load_indexzext:
1340; X64-BMI2:       # %bb.0:
1341; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
1342; X64-BMI2-NEXT:    retq
1343  %val = load i32, ptr %w
1344  %conv = zext i8 %numlowbits to i32
1345  %notmask = shl i32 -1, %conv
1346  %mask = xor i32 %notmask, -1
1347  %masked = and i32 %mask, %val
1348  ret i32 %masked
1349}
1350
1351define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
1352; X86-NOBMI-LABEL: bzhi32_b4_commutative:
1353; X86-NOBMI:       # %bb.0:
1354; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1355; X86-NOBMI-NEXT:    movl $-1, %eax
1356; X86-NOBMI-NEXT:    shll %cl, %eax
1357; X86-NOBMI-NEXT:    notl %eax
1358; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1359; X86-NOBMI-NEXT:    retl
1360;
1361; X86-BMI1-LABEL: bzhi32_b4_commutative:
1362; X86-BMI1:       # %bb.0:
1363; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1364; X86-BMI1-NEXT:    shll $8, %eax
1365; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
1366; X86-BMI1-NEXT:    retl
1367;
1368; X86-BMI2-LABEL: bzhi32_b4_commutative:
1369; X86-BMI2:       # %bb.0:
1370; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1371; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
1372; X86-BMI2-NEXT:    retl
1373;
1374; X64-NOBMI-LABEL: bzhi32_b4_commutative:
1375; X64-NOBMI:       # %bb.0:
1376; X64-NOBMI-NEXT:    movl %esi, %ecx
1377; X64-NOBMI-NEXT:    movl $-1, %eax
1378; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1379; X64-NOBMI-NEXT:    shll %cl, %eax
1380; X64-NOBMI-NEXT:    notl %eax
1381; X64-NOBMI-NEXT:    andl %edi, %eax
1382; X64-NOBMI-NEXT:    retq
1383;
1384; X64-BMI1-LABEL: bzhi32_b4_commutative:
1385; X64-BMI1:       # %bb.0:
1386; X64-BMI1-NEXT:    shll $8, %esi
1387; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1388; X64-BMI1-NEXT:    retq
1389;
1390; X64-BMI2-LABEL: bzhi32_b4_commutative:
1391; X64-BMI2:       # %bb.0:
1392; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1393; X64-BMI2-NEXT:    retq
1394  %notmask = shl i32 -1, %numlowbits
1395  %mask = xor i32 %notmask, -1
1396  %masked = and i32 %val, %mask ; swapped order
1397  ret i32 %masked
1398}
1399
1400; 64-bit
1401
1402define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
1403; X86-NOBMI-LABEL: bzhi64_b0:
1404; X86-NOBMI:       # %bb.0:
1405; X86-NOBMI-NEXT:    pushl %esi
1406; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1407; X86-NOBMI-NEXT:    movl $-1, %edx
1408; X86-NOBMI-NEXT:    movl $-1, %esi
1409; X86-NOBMI-NEXT:    shll %cl, %esi
1410; X86-NOBMI-NEXT:    xorl %eax, %eax
1411; X86-NOBMI-NEXT:    testb $32, %cl
1412; X86-NOBMI-NEXT:    jne .LBB21_1
1413; X86-NOBMI-NEXT:  # %bb.2:
1414; X86-NOBMI-NEXT:    movl %esi, %eax
1415; X86-NOBMI-NEXT:    jmp .LBB21_3
1416; X86-NOBMI-NEXT:  .LBB21_1:
1417; X86-NOBMI-NEXT:    movl %esi, %edx
1418; X86-NOBMI-NEXT:  .LBB21_3:
1419; X86-NOBMI-NEXT:    notl %edx
1420; X86-NOBMI-NEXT:    notl %eax
1421; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1422; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
1423; X86-NOBMI-NEXT:    popl %esi
1424; X86-NOBMI-NEXT:    retl
1425;
1426; X86-BMI1-LABEL: bzhi64_b0:
1427; X86-BMI1:       # %bb.0:
1428; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1429; X86-BMI1-NEXT:    movl $-1, %edx
1430; X86-BMI1-NEXT:    movl $-1, %eax
1431; X86-BMI1-NEXT:    shll %cl, %eax
1432; X86-BMI1-NEXT:    testb $32, %cl
1433; X86-BMI1-NEXT:    je .LBB21_2
1434; X86-BMI1-NEXT:  # %bb.1:
1435; X86-BMI1-NEXT:    movl %eax, %edx
1436; X86-BMI1-NEXT:    xorl %eax, %eax
1437; X86-BMI1-NEXT:  .LBB21_2:
1438; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
1439; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %edx
1440; X86-BMI1-NEXT:    retl
1441;
1442; X86-BMI2-LABEL: bzhi64_b0:
1443; X86-BMI2:       # %bb.0:
1444; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
1445; X86-BMI2-NEXT:    movl $-1, %ecx
1446; X86-BMI2-NEXT:    shlxl %edx, %ecx, %eax
1447; X86-BMI2-NEXT:    testb $32, %dl
1448; X86-BMI2-NEXT:    je .LBB21_2
1449; X86-BMI2-NEXT:  # %bb.1:
1450; X86-BMI2-NEXT:    movl %eax, %ecx
1451; X86-BMI2-NEXT:    xorl %eax, %eax
1452; X86-BMI2-NEXT:  .LBB21_2:
1453; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
1454; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
1455; X86-BMI2-NEXT:    retl
1456;
1457; X64-NOBMI-LABEL: bzhi64_b0:
1458; X64-NOBMI:       # %bb.0:
1459; X64-NOBMI-NEXT:    movq %rsi, %rcx
1460; X64-NOBMI-NEXT:    movq $-1, %rax
1461; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
1462; X64-NOBMI-NEXT:    shlq %cl, %rax
1463; X64-NOBMI-NEXT:    notq %rax
1464; X64-NOBMI-NEXT:    andq %rdi, %rax
1465; X64-NOBMI-NEXT:    retq
1466;
1467; X64-BMI1-LABEL: bzhi64_b0:
1468; X64-BMI1:       # %bb.0:
1469; X64-BMI1-NEXT:    shll $8, %esi
1470; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
1471; X64-BMI1-NEXT:    retq
1472;
1473; X64-BMI2-LABEL: bzhi64_b0:
1474; X64-BMI2:       # %bb.0:
1475; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
1476; X64-BMI2-NEXT:    retq
1477  %notmask = shl i64 -1, %numlowbits
1478  %mask = xor i64 %notmask, -1
1479  %masked = and i64 %mask, %val
1480  ret i64 %masked
1481}
1482
1483define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
1484; X86-NOBMI-LABEL: bzhi64_b1_indexzext:
1485; X86-NOBMI:       # %bb.0:
1486; X86-NOBMI-NEXT:    pushl %esi
1487; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1488; X86-NOBMI-NEXT:    movl $-1, %edx
1489; X86-NOBMI-NEXT:    movl $-1, %esi
1490; X86-NOBMI-NEXT:    shll %cl, %esi
1491; X86-NOBMI-NEXT:    xorl %eax, %eax
1492; X86-NOBMI-NEXT:    testb $32, %cl
1493; X86-NOBMI-NEXT:    jne .LBB22_1
1494; X86-NOBMI-NEXT:  # %bb.2:
1495; X86-NOBMI-NEXT:    movl %esi, %eax
1496; X86-NOBMI-NEXT:    jmp .LBB22_3
1497; X86-NOBMI-NEXT:  .LBB22_1:
1498; X86-NOBMI-NEXT:    movl %esi, %edx
1499; X86-NOBMI-NEXT:  .LBB22_3:
1500; X86-NOBMI-NEXT:    notl %edx
1501; X86-NOBMI-NEXT:    notl %eax
1502; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1503; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
1504; X86-NOBMI-NEXT:    popl %esi
1505; X86-NOBMI-NEXT:    retl
1506;
1507; X86-BMI1-LABEL: bzhi64_b1_indexzext:
1508; X86-BMI1:       # %bb.0:
1509; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1510; X86-BMI1-NEXT:    movl $-1, %edx
1511; X86-BMI1-NEXT:    movl $-1, %eax
1512; X86-BMI1-NEXT:    shll %cl, %eax
1513; X86-BMI1-NEXT:    testb $32, %cl
1514; X86-BMI1-NEXT:    je .LBB22_2
1515; X86-BMI1-NEXT:  # %bb.1:
1516; X86-BMI1-NEXT:    movl %eax, %edx
1517; X86-BMI1-NEXT:    xorl %eax, %eax
1518; X86-BMI1-NEXT:  .LBB22_2:
1519; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
1520; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %edx
1521; X86-BMI1-NEXT:    retl
1522;
1523; X86-BMI2-LABEL: bzhi64_b1_indexzext:
1524; X86-BMI2:       # %bb.0:
1525; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
1526; X86-BMI2-NEXT:    movl $-1, %ecx
1527; X86-BMI2-NEXT:    shlxl %edx, %ecx, %eax
1528; X86-BMI2-NEXT:    testb $32, %dl
1529; X86-BMI2-NEXT:    je .LBB22_2
1530; X86-BMI2-NEXT:  # %bb.1:
1531; X86-BMI2-NEXT:    movl %eax, %ecx
1532; X86-BMI2-NEXT:    xorl %eax, %eax
1533; X86-BMI2-NEXT:  .LBB22_2:
1534; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
1535; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
1536; X86-BMI2-NEXT:    retl
1537;
1538; X64-NOBMI-LABEL: bzhi64_b1_indexzext:
1539; X64-NOBMI:       # %bb.0:
1540; X64-NOBMI-NEXT:    movl %esi, %ecx
1541; X64-NOBMI-NEXT:    movq $-1, %rax
1542; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1543; X64-NOBMI-NEXT:    shlq %cl, %rax
1544; X64-NOBMI-NEXT:    notq %rax
1545; X64-NOBMI-NEXT:    andq %rdi, %rax
1546; X64-NOBMI-NEXT:    retq
1547;
1548; X64-BMI1-LABEL: bzhi64_b1_indexzext:
1549; X64-BMI1:       # %bb.0:
1550; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
1551; X64-BMI1-NEXT:    shll $8, %esi
1552; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
1553; X64-BMI1-NEXT:    retq
1554;
1555; X64-BMI2-LABEL: bzhi64_b1_indexzext:
1556; X64-BMI2:       # %bb.0:
1557; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
1558; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
1559; X64-BMI2-NEXT:    retq
1560  %conv = zext i8 %numlowbits to i64
1561  %notmask = shl i64 -1, %conv
1562  %mask = xor i64 %notmask, -1
1563  %masked = and i64 %mask, %val
1564  ret i64 %masked
1565}
1566
1567define i64 @bzhi64_b2_load(ptr %w, i64 %numlowbits) nounwind {
1568; X86-NOBMI-LABEL: bzhi64_b2_load:
1569; X86-NOBMI:       # %bb.0:
1570; X86-NOBMI-NEXT:    pushl %edi
1571; X86-NOBMI-NEXT:    pushl %esi
1572; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
1573; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1574; X86-NOBMI-NEXT:    movl $-1, %edx
1575; X86-NOBMI-NEXT:    movl $-1, %edi
1576; X86-NOBMI-NEXT:    shll %cl, %edi
1577; X86-NOBMI-NEXT:    xorl %eax, %eax
1578; X86-NOBMI-NEXT:    testb $32, %cl
1579; X86-NOBMI-NEXT:    jne .LBB23_1
1580; X86-NOBMI-NEXT:  # %bb.2:
1581; X86-NOBMI-NEXT:    movl %edi, %eax
1582; X86-NOBMI-NEXT:    jmp .LBB23_3
1583; X86-NOBMI-NEXT:  .LBB23_1:
1584; X86-NOBMI-NEXT:    movl %edi, %edx
1585; X86-NOBMI-NEXT:  .LBB23_3:
1586; X86-NOBMI-NEXT:    notl %edx
1587; X86-NOBMI-NEXT:    notl %eax
1588; X86-NOBMI-NEXT:    andl (%esi), %eax
1589; X86-NOBMI-NEXT:    andl 4(%esi), %edx
1590; X86-NOBMI-NEXT:    popl %esi
1591; X86-NOBMI-NEXT:    popl %edi
1592; X86-NOBMI-NEXT:    retl
1593;
1594; X86-BMI1-LABEL: bzhi64_b2_load:
1595; X86-BMI1:       # %bb.0:
1596; X86-BMI1-NEXT:    pushl %esi
1597; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
1598; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1599; X86-BMI1-NEXT:    movl $-1, %esi
1600; X86-BMI1-NEXT:    movl $-1, %eax
1601; X86-BMI1-NEXT:    shll %cl, %eax
1602; X86-BMI1-NEXT:    testb $32, %cl
1603; X86-BMI1-NEXT:    je .LBB23_2
1604; X86-BMI1-NEXT:  # %bb.1:
1605; X86-BMI1-NEXT:    movl %eax, %esi
1606; X86-BMI1-NEXT:    xorl %eax, %eax
1607; X86-BMI1-NEXT:  .LBB23_2:
1608; X86-BMI1-NEXT:    andnl (%edx), %eax, %eax
1609; X86-BMI1-NEXT:    andnl 4(%edx), %esi, %edx
1610; X86-BMI1-NEXT:    popl %esi
1611; X86-BMI1-NEXT:    retl
1612;
1613; X86-BMI2-LABEL: bzhi64_b2_load:
1614; X86-BMI2:       # %bb.0:
1615; X86-BMI2-NEXT:    pushl %ebx
1616; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1617; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
1618; X86-BMI2-NEXT:    movl $-1, %edx
1619; X86-BMI2-NEXT:    shlxl %ebx, %edx, %eax
1620; X86-BMI2-NEXT:    testb $32, %bl
1621; X86-BMI2-NEXT:    je .LBB23_2
1622; X86-BMI2-NEXT:  # %bb.1:
1623; X86-BMI2-NEXT:    movl %eax, %edx
1624; X86-BMI2-NEXT:    xorl %eax, %eax
1625; X86-BMI2-NEXT:  .LBB23_2:
1626; X86-BMI2-NEXT:    andnl (%ecx), %eax, %eax
1627; X86-BMI2-NEXT:    andnl 4(%ecx), %edx, %edx
1628; X86-BMI2-NEXT:    popl %ebx
1629; X86-BMI2-NEXT:    retl
1630;
1631; X64-NOBMI-LABEL: bzhi64_b2_load:
1632; X64-NOBMI:       # %bb.0:
1633; X64-NOBMI-NEXT:    movq %rsi, %rcx
1634; X64-NOBMI-NEXT:    movq $-1, %rax
1635; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
1636; X64-NOBMI-NEXT:    shlq %cl, %rax
1637; X64-NOBMI-NEXT:    notq %rax
1638; X64-NOBMI-NEXT:    andq (%rdi), %rax
1639; X64-NOBMI-NEXT:    retq
1640;
1641; X64-BMI1-LABEL: bzhi64_b2_load:
1642; X64-BMI1:       # %bb.0:
1643; X64-BMI1-NEXT:    shll $8, %esi
1644; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
1645; X64-BMI1-NEXT:    retq
1646;
1647; X64-BMI2-LABEL: bzhi64_b2_load:
1648; X64-BMI2:       # %bb.0:
1649; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
1650; X64-BMI2-NEXT:    retq
1651  %val = load i64, ptr %w
1652  %notmask = shl i64 -1, %numlowbits
1653  %mask = xor i64 %notmask, -1
1654  %masked = and i64 %mask, %val
1655  ret i64 %masked
1656}
1657
1658define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
1659; X86-NOBMI-LABEL: bzhi64_b3_load_indexzext:
1660; X86-NOBMI:       # %bb.0:
1661; X86-NOBMI-NEXT:    pushl %edi
1662; X86-NOBMI-NEXT:    pushl %esi
1663; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
1664; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1665; X86-NOBMI-NEXT:    movl $-1, %edx
1666; X86-NOBMI-NEXT:    movl $-1, %edi
1667; X86-NOBMI-NEXT:    shll %cl, %edi
1668; X86-NOBMI-NEXT:    xorl %eax, %eax
1669; X86-NOBMI-NEXT:    testb $32, %cl
1670; X86-NOBMI-NEXT:    jne .LBB24_1
1671; X86-NOBMI-NEXT:  # %bb.2:
1672; X86-NOBMI-NEXT:    movl %edi, %eax
1673; X86-NOBMI-NEXT:    jmp .LBB24_3
1674; X86-NOBMI-NEXT:  .LBB24_1:
1675; X86-NOBMI-NEXT:    movl %edi, %edx
1676; X86-NOBMI-NEXT:  .LBB24_3:
1677; X86-NOBMI-NEXT:    notl %edx
1678; X86-NOBMI-NEXT:    notl %eax
1679; X86-NOBMI-NEXT:    andl (%esi), %eax
1680; X86-NOBMI-NEXT:    andl 4(%esi), %edx
1681; X86-NOBMI-NEXT:    popl %esi
1682; X86-NOBMI-NEXT:    popl %edi
1683; X86-NOBMI-NEXT:    retl
1684;
1685; X86-BMI1-LABEL: bzhi64_b3_load_indexzext:
1686; X86-BMI1:       # %bb.0:
1687; X86-BMI1-NEXT:    pushl %esi
1688; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
1689; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1690; X86-BMI1-NEXT:    movl $-1, %esi
1691; X86-BMI1-NEXT:    movl $-1, %eax
1692; X86-BMI1-NEXT:    shll %cl, %eax
1693; X86-BMI1-NEXT:    testb $32, %cl
1694; X86-BMI1-NEXT:    je .LBB24_2
1695; X86-BMI1-NEXT:  # %bb.1:
1696; X86-BMI1-NEXT:    movl %eax, %esi
1697; X86-BMI1-NEXT:    xorl %eax, %eax
1698; X86-BMI1-NEXT:  .LBB24_2:
1699; X86-BMI1-NEXT:    andnl (%edx), %eax, %eax
1700; X86-BMI1-NEXT:    andnl 4(%edx), %esi, %edx
1701; X86-BMI1-NEXT:    popl %esi
1702; X86-BMI1-NEXT:    retl
1703;
1704; X86-BMI2-LABEL: bzhi64_b3_load_indexzext:
1705; X86-BMI2:       # %bb.0:
1706; X86-BMI2-NEXT:    pushl %ebx
1707; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1708; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
1709; X86-BMI2-NEXT:    movl $-1, %edx
1710; X86-BMI2-NEXT:    shlxl %ebx, %edx, %eax
1711; X86-BMI2-NEXT:    testb $32, %bl
1712; X86-BMI2-NEXT:    je .LBB24_2
1713; X86-BMI2-NEXT:  # %bb.1:
1714; X86-BMI2-NEXT:    movl %eax, %edx
1715; X86-BMI2-NEXT:    xorl %eax, %eax
1716; X86-BMI2-NEXT:  .LBB24_2:
1717; X86-BMI2-NEXT:    andnl (%ecx), %eax, %eax
1718; X86-BMI2-NEXT:    andnl 4(%ecx), %edx, %edx
1719; X86-BMI2-NEXT:    popl %ebx
1720; X86-BMI2-NEXT:    retl
1721;
1722; X64-NOBMI-LABEL: bzhi64_b3_load_indexzext:
1723; X64-NOBMI:       # %bb.0:
1724; X64-NOBMI-NEXT:    movl %esi, %ecx
1725; X64-NOBMI-NEXT:    movq $-1, %rax
1726; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1727; X64-NOBMI-NEXT:    shlq %cl, %rax
1728; X64-NOBMI-NEXT:    notq %rax
1729; X64-NOBMI-NEXT:    andq (%rdi), %rax
1730; X64-NOBMI-NEXT:    retq
1731;
1732; X64-BMI1-LABEL: bzhi64_b3_load_indexzext:
1733; X64-BMI1:       # %bb.0:
1734; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
1735; X64-BMI1-NEXT:    shll $8, %esi
1736; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
1737; X64-BMI1-NEXT:    retq
1738;
1739; X64-BMI2-LABEL: bzhi64_b3_load_indexzext:
1740; X64-BMI2:       # %bb.0:
1741; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
1742; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
1743; X64-BMI2-NEXT:    retq
1744  %val = load i64, ptr %w
1745  %conv = zext i8 %numlowbits to i64
1746  %notmask = shl i64 -1, %conv
1747  %mask = xor i64 %notmask, -1
1748  %masked = and i64 %mask, %val
1749  ret i64 %masked
1750}
1751
1752define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
1753; X86-NOBMI-LABEL: bzhi64_b4_commutative:
1754; X86-NOBMI:       # %bb.0:
1755; X86-NOBMI-NEXT:    pushl %esi
1756; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1757; X86-NOBMI-NEXT:    movl $-1, %edx
1758; X86-NOBMI-NEXT:    movl $-1, %esi
1759; X86-NOBMI-NEXT:    shll %cl, %esi
1760; X86-NOBMI-NEXT:    xorl %eax, %eax
1761; X86-NOBMI-NEXT:    testb $32, %cl
1762; X86-NOBMI-NEXT:    jne .LBB25_1
1763; X86-NOBMI-NEXT:  # %bb.2:
1764; X86-NOBMI-NEXT:    movl %esi, %eax
1765; X86-NOBMI-NEXT:    jmp .LBB25_3
1766; X86-NOBMI-NEXT:  .LBB25_1:
1767; X86-NOBMI-NEXT:    movl %esi, %edx
1768; X86-NOBMI-NEXT:  .LBB25_3:
1769; X86-NOBMI-NEXT:    notl %edx
1770; X86-NOBMI-NEXT:    notl %eax
1771; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1772; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
1773; X86-NOBMI-NEXT:    popl %esi
1774; X86-NOBMI-NEXT:    retl
1775;
1776; X86-BMI1-LABEL: bzhi64_b4_commutative:
1777; X86-BMI1:       # %bb.0:
1778; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1779; X86-BMI1-NEXT:    movl $-1, %edx
1780; X86-BMI1-NEXT:    movl $-1, %eax
1781; X86-BMI1-NEXT:    shll %cl, %eax
1782; X86-BMI1-NEXT:    testb $32, %cl
1783; X86-BMI1-NEXT:    je .LBB25_2
1784; X86-BMI1-NEXT:  # %bb.1:
1785; X86-BMI1-NEXT:    movl %eax, %edx
1786; X86-BMI1-NEXT:    xorl %eax, %eax
1787; X86-BMI1-NEXT:  .LBB25_2:
1788; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
1789; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %edx
1790; X86-BMI1-NEXT:    retl
1791;
1792; X86-BMI2-LABEL: bzhi64_b4_commutative:
1793; X86-BMI2:       # %bb.0:
1794; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
1795; X86-BMI2-NEXT:    movl $-1, %ecx
1796; X86-BMI2-NEXT:    shlxl %edx, %ecx, %eax
1797; X86-BMI2-NEXT:    testb $32, %dl
1798; X86-BMI2-NEXT:    je .LBB25_2
1799; X86-BMI2-NEXT:  # %bb.1:
1800; X86-BMI2-NEXT:    movl %eax, %ecx
1801; X86-BMI2-NEXT:    xorl %eax, %eax
1802; X86-BMI2-NEXT:  .LBB25_2:
1803; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
1804; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
1805; X86-BMI2-NEXT:    retl
1806;
1807; X64-NOBMI-LABEL: bzhi64_b4_commutative:
1808; X64-NOBMI:       # %bb.0:
1809; X64-NOBMI-NEXT:    movq %rsi, %rcx
1810; X64-NOBMI-NEXT:    movq $-1, %rax
1811; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
1812; X64-NOBMI-NEXT:    shlq %cl, %rax
1813; X64-NOBMI-NEXT:    notq %rax
1814; X64-NOBMI-NEXT:    andq %rdi, %rax
1815; X64-NOBMI-NEXT:    retq
1816;
1817; X64-BMI1-LABEL: bzhi64_b4_commutative:
1818; X64-BMI1:       # %bb.0:
1819; X64-BMI1-NEXT:    shll $8, %esi
1820; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
1821; X64-BMI1-NEXT:    retq
1822;
1823; X64-BMI2-LABEL: bzhi64_b4_commutative:
1824; X64-BMI2:       # %bb.0:
1825; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
1826; X64-BMI2-NEXT:    retq
1827  %notmask = shl i64 -1, %numlowbits
1828  %mask = xor i64 %notmask, -1
1829  %masked = and i64 %val, %mask ; swapped order
1830  ret i64 %masked
1831}
1832
1833; 64-bit, but with 32-bit output
1834
1835; Everything done in 64-bit, truncation happens last.
1836define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind {
1837; X86-NOBMI-LABEL: bzhi64_32_b0:
1838; X86-NOBMI:       # %bb.0:
1839; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1840; X86-NOBMI-NEXT:    movl $-1, %edx
1841; X86-NOBMI-NEXT:    shll %cl, %edx
1842; X86-NOBMI-NEXT:    xorl %eax, %eax
1843; X86-NOBMI-NEXT:    testb $32, %cl
1844; X86-NOBMI-NEXT:    jne .LBB26_2
1845; X86-NOBMI-NEXT:  # %bb.1:
1846; X86-NOBMI-NEXT:    movl %edx, %eax
1847; X86-NOBMI-NEXT:  .LBB26_2:
1848; X86-NOBMI-NEXT:    notl %eax
1849; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1850; X86-NOBMI-NEXT:    retl
1851;
1852; X86-BMI1-LABEL: bzhi64_32_b0:
1853; X86-BMI1:       # %bb.0:
1854; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1855; X86-BMI1-NEXT:    movl $-1, %eax
1856; X86-BMI1-NEXT:    shll %cl, %eax
1857; X86-BMI1-NEXT:    xorl %edx, %edx
1858; X86-BMI1-NEXT:    testb $32, %cl
1859; X86-BMI1-NEXT:    jne .LBB26_2
1860; X86-BMI1-NEXT:  # %bb.1:
1861; X86-BMI1-NEXT:    movl %eax, %edx
1862; X86-BMI1-NEXT:  .LBB26_2:
1863; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %eax
1864; X86-BMI1-NEXT:    retl
1865;
1866; X86-BMI2-LABEL: bzhi64_32_b0:
1867; X86-BMI2:       # %bb.0:
1868; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1869; X86-BMI2-NEXT:    xorl %ecx, %ecx
1870; X86-BMI2-NEXT:    testb $32, %al
1871; X86-BMI2-NEXT:    jne .LBB26_2
1872; X86-BMI2-NEXT:  # %bb.1:
1873; X86-BMI2-NEXT:    movl $-1, %ecx
1874; X86-BMI2-NEXT:    shlxl %eax, %ecx, %ecx
1875; X86-BMI2-NEXT:  .LBB26_2:
1876; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %eax
1877; X86-BMI2-NEXT:    retl
1878;
1879; X64-NOBMI-LABEL: bzhi64_32_b0:
1880; X64-NOBMI:       # %bb.0:
1881; X64-NOBMI-NEXT:    movl %esi, %ecx
1882; X64-NOBMI-NEXT:    movq $-1, %rax
1883; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1884; X64-NOBMI-NEXT:    shlq %cl, %rax
1885; X64-NOBMI-NEXT:    notl %eax
1886; X64-NOBMI-NEXT:    andl %edi, %eax
1887; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
1888; X64-NOBMI-NEXT:    retq
1889;
1890; X64-BMI1-LABEL: bzhi64_32_b0:
1891; X64-BMI1:       # %bb.0:
1892; X64-BMI1-NEXT:    shll $8, %esi
1893; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1894; X64-BMI1-NEXT:    retq
1895;
1896; X64-BMI2-LABEL: bzhi64_32_b0:
1897; X64-BMI2:       # %bb.0:
1898; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1899; X64-BMI2-NEXT:    retq
1900  %widenumlowbits = zext i8 %numlowbits to i64
1901  %notmask = shl nsw i64 -1, %widenumlowbits
1902  %mask = xor i64 %notmask, -1
1903  %wideres = and i64 %val, %mask
1904  %res = trunc i64 %wideres to i32
1905  ret i32 %res
1906}
1907
1908; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
1909define i32 @bzhi64_32_b1(i64 %val, i8 %numlowbits) nounwind {
1910; X86-NOBMI-LABEL: bzhi64_32_b1:
1911; X86-NOBMI:       # %bb.0:
1912; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1913; X86-NOBMI-NEXT:    movl $-1, %eax
1914; X86-NOBMI-NEXT:    shll %cl, %eax
1915; X86-NOBMI-NEXT:    notl %eax
1916; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1917; X86-NOBMI-NEXT:    retl
1918;
1919; X86-BMI1-LABEL: bzhi64_32_b1:
1920; X86-BMI1:       # %bb.0:
1921; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1922; X86-BMI1-NEXT:    shll $8, %eax
1923; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
1924; X86-BMI1-NEXT:    retl
1925;
1926; X86-BMI2-LABEL: bzhi64_32_b1:
1927; X86-BMI2:       # %bb.0:
1928; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1929; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
1930; X86-BMI2-NEXT:    retl
1931;
1932; X64-NOBMI-LABEL: bzhi64_32_b1:
1933; X64-NOBMI:       # %bb.0:
1934; X64-NOBMI-NEXT:    movl %esi, %ecx
1935; X64-NOBMI-NEXT:    movl $-1, %eax
1936; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1937; X64-NOBMI-NEXT:    shll %cl, %eax
1938; X64-NOBMI-NEXT:    notl %eax
1939; X64-NOBMI-NEXT:    andl %edi, %eax
1940; X64-NOBMI-NEXT:    retq
1941;
1942; X64-BMI1-LABEL: bzhi64_32_b1:
1943; X64-BMI1:       # %bb.0:
1944; X64-BMI1-NEXT:    shll $8, %esi
1945; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1946; X64-BMI1-NEXT:    retq
1947;
1948; X64-BMI2-LABEL: bzhi64_32_b1:
1949; X64-BMI2:       # %bb.0:
1950; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1951; X64-BMI2-NEXT:    retq
1952  %truncval = trunc i64 %val to i32
1953  %widenumlowbits = zext i8 %numlowbits to i32
1954  %notmask = shl nsw i32 -1, %widenumlowbits
1955  %mask = xor i32 %notmask, -1
1956  %res = and i32 %truncval, %mask
1957  ret i32 %res
1958}
1959
1960; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit.
1961; Masking is 64-bit. Then truncation.
1962define i32 @bzhi64_32_b2(i64 %val, i8 %numlowbits) nounwind {
1963; X86-NOBMI-LABEL: bzhi64_32_b2:
1964; X86-NOBMI:       # %bb.0:
1965; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1966; X86-NOBMI-NEXT:    movl $-1, %eax
1967; X86-NOBMI-NEXT:    shll %cl, %eax
1968; X86-NOBMI-NEXT:    notl %eax
1969; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1970; X86-NOBMI-NEXT:    retl
1971;
1972; X86-BMI1-LABEL: bzhi64_32_b2:
1973; X86-BMI1:       # %bb.0:
1974; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1975; X86-BMI1-NEXT:    shll $8, %eax
1976; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
1977; X86-BMI1-NEXT:    retl
1978;
1979; X86-BMI2-LABEL: bzhi64_32_b2:
1980; X86-BMI2:       # %bb.0:
1981; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1982; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
1983; X86-BMI2-NEXT:    retl
1984;
1985; X64-NOBMI-LABEL: bzhi64_32_b2:
1986; X64-NOBMI:       # %bb.0:
1987; X64-NOBMI-NEXT:    movl %esi, %ecx
1988; X64-NOBMI-NEXT:    movl $-1, %eax
1989; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1990; X64-NOBMI-NEXT:    shll %cl, %eax
1991; X64-NOBMI-NEXT:    notl %eax
1992; X64-NOBMI-NEXT:    andl %edi, %eax
1993; X64-NOBMI-NEXT:    retq
1994;
1995; X64-BMI1-LABEL: bzhi64_32_b2:
1996; X64-BMI1:       # %bb.0:
1997; X64-BMI1-NEXT:    shll $8, %esi
1998; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1999; X64-BMI1-NEXT:    retq
2000;
2001; X64-BMI2-LABEL: bzhi64_32_b2:
2002; X64-BMI2:       # %bb.0:
2003; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
2004; X64-BMI2-NEXT:    retq
2005  %widenumlowbits = zext i8 %numlowbits to i32
2006  %notmask = shl nsw i32 -1, %widenumlowbits
2007  %mask = xor i32 %notmask, -1
2008  %zextmask = zext i32 %mask to i64
2009  %wideres = and i64 %val, %zextmask
2010  %res = trunc i64 %wideres to i32
2011  ret i32 %res
2012}
2013
2014; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit.
2015; Masking is 64-bit. Then truncation.
2016define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind {
2017; X86-NOBMI-LABEL: bzhi64_32_b3:
2018; X86-NOBMI:       # %bb.0:
2019; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
2020; X86-NOBMI-NEXT:    movl $-1, %edx
2021; X86-NOBMI-NEXT:    shll %cl, %edx
2022; X86-NOBMI-NEXT:    xorl %eax, %eax
2023; X86-NOBMI-NEXT:    testb $32, %cl
2024; X86-NOBMI-NEXT:    jne .LBB29_2
2025; X86-NOBMI-NEXT:  # %bb.1:
2026; X86-NOBMI-NEXT:    movl %edx, %eax
2027; X86-NOBMI-NEXT:  .LBB29_2:
2028; X86-NOBMI-NEXT:    notl %eax
2029; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
2030; X86-NOBMI-NEXT:    retl
2031;
2032; X86-BMI1-LABEL: bzhi64_32_b3:
2033; X86-BMI1:       # %bb.0:
2034; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
2035; X86-BMI1-NEXT:    movl $-1, %eax
2036; X86-BMI1-NEXT:    shll %cl, %eax
2037; X86-BMI1-NEXT:    xorl %edx, %edx
2038; X86-BMI1-NEXT:    testb $32, %cl
2039; X86-BMI1-NEXT:    jne .LBB29_2
2040; X86-BMI1-NEXT:  # %bb.1:
2041; X86-BMI1-NEXT:    movl %eax, %edx
2042; X86-BMI1-NEXT:  .LBB29_2:
2043; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %eax
2044; X86-BMI1-NEXT:    retl
2045;
2046; X86-BMI2-LABEL: bzhi64_32_b3:
2047; X86-BMI2:       # %bb.0:
2048; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
2049; X86-BMI2-NEXT:    xorl %ecx, %ecx
2050; X86-BMI2-NEXT:    testb $32, %al
2051; X86-BMI2-NEXT:    jne .LBB29_2
2052; X86-BMI2-NEXT:  # %bb.1:
2053; X86-BMI2-NEXT:    movl $-1, %ecx
2054; X86-BMI2-NEXT:    shlxl %eax, %ecx, %ecx
2055; X86-BMI2-NEXT:  .LBB29_2:
2056; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %eax
2057; X86-BMI2-NEXT:    retl
2058;
2059; X64-NOBMI-LABEL: bzhi64_32_b3:
2060; X64-NOBMI:       # %bb.0:
2061; X64-NOBMI-NEXT:    movl %esi, %ecx
2062; X64-NOBMI-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
2063; X64-NOBMI-NEXT:    movl $4294967295, %edx # imm = 0xFFFFFFFF
2064; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2065; X64-NOBMI-NEXT:    shlq %cl, %rdx
2066; X64-NOBMI-NEXT:    xorl %edx, %eax
2067; X64-NOBMI-NEXT:    andl %edi, %eax
2068; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
2069; X64-NOBMI-NEXT:    retq
2070;
2071; X64-BMI1-LABEL: bzhi64_32_b3:
2072; X64-BMI1:       # %bb.0:
2073; X64-BMI1-NEXT:    shll $8, %esi
2074; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
2075; X64-BMI1-NEXT:    retq
2076;
2077; X64-BMI2-LABEL: bzhi64_32_b3:
2078; X64-BMI2:       # %bb.0:
2079; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
2080; X64-BMI2-NEXT:    retq
2081  %widenumlowbits = zext i8 %numlowbits to i64
2082  %notmask = shl nsw i64 4294967295, %widenumlowbits
2083  %mask = xor i64 %notmask, 4294967295
2084  %wideres = and i64 %val, %mask
2085  %res = trunc i64 %wideres to i32
2086  ret i32 %res
2087}
2088
2089; ---------------------------------------------------------------------------- ;
2090; Pattern c. 32-bit
2091; ---------------------------------------------------------------------------- ;
2092
2093define i32 @bzhi32_c0(i32 %val, i32 %numlowbits, ptr %escape) nounwind {
2094; X86-NOBMI-LABEL: bzhi32_c0:
2095; X86-NOBMI:       # %bb.0:
2096; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
2097; X86-NOBMI-NEXT:    xorl %ecx, %ecx
2098; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2099; X86-NOBMI-NEXT:    movl $-1, %eax
2100; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2101; X86-NOBMI-NEXT:    shrl %cl, %eax
2102; X86-NOBMI-NEXT:    movl %eax, (%edx)
2103; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
2104; X86-NOBMI-NEXT:    retl
2105;
2106; X86-BMI1-LABEL: bzhi32_c0:
2107; X86-BMI1:       # %bb.0:
2108; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
2109; X86-BMI1-NEXT:    xorl %ecx, %ecx
2110; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2111; X86-BMI1-NEXT:    movl $-1, %eax
2112; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2113; X86-BMI1-NEXT:    shrl %cl, %eax
2114; X86-BMI1-NEXT:    movl %eax, (%edx)
2115; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
2116; X86-BMI1-NEXT:    retl
2117;
2118; X86-BMI2-LABEL: bzhi32_c0:
2119; X86-BMI2:       # %bb.0:
2120; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
2121; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
2122; X86-BMI2-NEXT:    movl $-1, %edx
2123; X86-BMI2-NEXT:    bzhil %ecx, %edx, %edx
2124; X86-BMI2-NEXT:    movl %edx, (%eax)
2125; X86-BMI2-NEXT:    bzhil %ecx, {{[0-9]+}}(%esp), %eax
2126; X86-BMI2-NEXT:    retl
2127;
2128; X64-NOBMI-LABEL: bzhi32_c0:
2129; X64-NOBMI:       # %bb.0:
2130; X64-NOBMI-NEXT:    movl %esi, %ecx
2131; X64-NOBMI-NEXT:    negb %cl
2132; X64-NOBMI-NEXT:    movl $-1, %eax
2133; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2134; X64-NOBMI-NEXT:    shrl %cl, %eax
2135; X64-NOBMI-NEXT:    movl %eax, (%rdx)
2136; X64-NOBMI-NEXT:    andl %edi, %eax
2137; X64-NOBMI-NEXT:    retq
2138;
2139; X64-BMI1-LABEL: bzhi32_c0:
2140; X64-BMI1:       # %bb.0:
2141; X64-BMI1-NEXT:    movl %esi, %ecx
2142; X64-BMI1-NEXT:    negb %cl
2143; X64-BMI1-NEXT:    movl $-1, %eax
2144; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2145; X64-BMI1-NEXT:    shrl %cl, %eax
2146; X64-BMI1-NEXT:    movl %eax, (%rdx)
2147; X64-BMI1-NEXT:    andl %edi, %eax
2148; X64-BMI1-NEXT:    retq
2149;
2150; X64-BMI2-LABEL: bzhi32_c0:
2151; X64-BMI2:       # %bb.0:
2152; X64-BMI2-NEXT:    movl $-1, %eax
2153; X64-BMI2-NEXT:    bzhil %esi, %eax, %eax
2154; X64-BMI2-NEXT:    movl %eax, (%rdx)
2155; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
2156; X64-BMI2-NEXT:    retq
2157  %numhighbits = sub i32 32, %numlowbits
2158  %mask = lshr i32 -1, %numhighbits
2159  store i32 %mask, ptr %escape
2160  %masked = and i32 %mask, %val
2161  ret i32 %masked
2162}
2163
2164define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, ptr %escape) nounwind {
2165; X86-NOBMI-LABEL: bzhi32_c1_indexzext:
2166; X86-NOBMI:       # %bb.0:
2167; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
2168; X86-NOBMI-NEXT:    xorl %ecx, %ecx
2169; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2170; X86-NOBMI-NEXT:    movl $-1, %eax
2171; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2172; X86-NOBMI-NEXT:    shrl %cl, %eax
2173; X86-NOBMI-NEXT:    movl %eax, (%edx)
2174; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
2175; X86-NOBMI-NEXT:    retl
2176;
2177; X86-BMI1-LABEL: bzhi32_c1_indexzext:
2178; X86-BMI1:       # %bb.0:
2179; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
2180; X86-BMI1-NEXT:    xorl %ecx, %ecx
2181; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2182; X86-BMI1-NEXT:    movl $-1, %eax
2183; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2184; X86-BMI1-NEXT:    shrl %cl, %eax
2185; X86-BMI1-NEXT:    movl %eax, (%edx)
2186; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
2187; X86-BMI1-NEXT:    retl
2188;
2189; X86-BMI2-LABEL: bzhi32_c1_indexzext:
2190; X86-BMI2:       # %bb.0:
2191; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
2192; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
2193; X86-BMI2-NEXT:    movl $-1, %edx
2194; X86-BMI2-NEXT:    bzhil %ecx, %edx, %edx
2195; X86-BMI2-NEXT:    movl %edx, (%eax)
2196; X86-BMI2-NEXT:    bzhil %ecx, {{[0-9]+}}(%esp), %eax
2197; X86-BMI2-NEXT:    retl
2198;
2199; X64-NOBMI-LABEL: bzhi32_c1_indexzext:
2200; X64-NOBMI:       # %bb.0:
2201; X64-NOBMI-NEXT:    movl %esi, %ecx
2202; X64-NOBMI-NEXT:    negb %cl
2203; X64-NOBMI-NEXT:    movl $-1, %eax
2204; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2205; X64-NOBMI-NEXT:    shrl %cl, %eax
2206; X64-NOBMI-NEXT:    movl %eax, (%rdx)
2207; X64-NOBMI-NEXT:    andl %edi, %eax
2208; X64-NOBMI-NEXT:    retq
2209;
2210; X64-BMI1-LABEL: bzhi32_c1_indexzext:
2211; X64-BMI1:       # %bb.0:
2212; X64-BMI1-NEXT:    movl %esi, %ecx
2213; X64-BMI1-NEXT:    negb %cl
2214; X64-BMI1-NEXT:    movl $-1, %eax
2215; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2216; X64-BMI1-NEXT:    shrl %cl, %eax
2217; X64-BMI1-NEXT:    movl %eax, (%rdx)
2218; X64-BMI1-NEXT:    andl %edi, %eax
2219; X64-BMI1-NEXT:    retq
2220;
2221; X64-BMI2-LABEL: bzhi32_c1_indexzext:
2222; X64-BMI2:       # %bb.0:
2223; X64-BMI2-NEXT:    movl $-1, %eax
2224; X64-BMI2-NEXT:    bzhil %esi, %eax, %eax
2225; X64-BMI2-NEXT:    movl %eax, (%rdx)
2226; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
2227; X64-BMI2-NEXT:    retq
2228  %numhighbits = sub i8 32, %numlowbits
2229  %sh_prom = zext i8 %numhighbits to i32
2230  %mask = lshr i32 -1, %sh_prom
2231  store i32 %mask, ptr %escape
2232  %masked = and i32 %mask, %val
2233  ret i32 %masked
2234}
2235
2236define i32 @bzhi32_c2_load(ptr %w, i32 %numlowbits, ptr %escape) nounwind {
2237; X86-NOBMI-LABEL: bzhi32_c2_load:
2238; X86-NOBMI:       # %bb.0:
2239; X86-NOBMI-NEXT:    pushl %esi
2240; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
2241; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
2242; X86-NOBMI-NEXT:    xorl %ecx, %ecx
2243; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2244; X86-NOBMI-NEXT:    movl $-1, %esi
2245; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2246; X86-NOBMI-NEXT:    shrl %cl, %esi
2247; X86-NOBMI-NEXT:    movl (%eax), %eax
2248; X86-NOBMI-NEXT:    andl %esi, %eax
2249; X86-NOBMI-NEXT:    movl %esi, (%edx)
2250; X86-NOBMI-NEXT:    popl %esi
2251; X86-NOBMI-NEXT:    retl
2252;
2253; X86-BMI1-LABEL: bzhi32_c2_load:
2254; X86-BMI1:       # %bb.0:
2255; X86-BMI1-NEXT:    pushl %esi
2256; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
2257; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
2258; X86-BMI1-NEXT:    xorl %ecx, %ecx
2259; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2260; X86-BMI1-NEXT:    movl $-1, %esi
2261; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2262; X86-BMI1-NEXT:    shrl %cl, %esi
2263; X86-BMI1-NEXT:    movl (%eax), %eax
2264; X86-BMI1-NEXT:    andl %esi, %eax
2265; X86-BMI1-NEXT:    movl %esi, (%edx)
2266; X86-BMI1-NEXT:    popl %esi
2267; X86-BMI1-NEXT:    retl
2268;
2269; X86-BMI2-LABEL: bzhi32_c2_load:
2270; X86-BMI2:       # %bb.0:
2271; X86-BMI2-NEXT:    pushl %esi
2272; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2273; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
2274; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
2275; X86-BMI2-NEXT:    movl $-1, %esi
2276; X86-BMI2-NEXT:    bzhil %edx, %esi, %esi
2277; X86-BMI2-NEXT:    bzhil %edx, (%eax), %eax
2278; X86-BMI2-NEXT:    movl %esi, (%ecx)
2279; X86-BMI2-NEXT:    popl %esi
2280; X86-BMI2-NEXT:    retl
2281;
2282; X64-NOBMI-LABEL: bzhi32_c2_load:
2283; X64-NOBMI:       # %bb.0:
2284; X64-NOBMI-NEXT:    movl %esi, %ecx
2285; X64-NOBMI-NEXT:    negb %cl
2286; X64-NOBMI-NEXT:    movl $-1, %esi
2287; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2288; X64-NOBMI-NEXT:    shrl %cl, %esi
2289; X64-NOBMI-NEXT:    movl (%rdi), %eax
2290; X64-NOBMI-NEXT:    andl %esi, %eax
2291; X64-NOBMI-NEXT:    movl %esi, (%rdx)
2292; X64-NOBMI-NEXT:    retq
2293;
2294; X64-BMI1-LABEL: bzhi32_c2_load:
2295; X64-BMI1:       # %bb.0:
2296; X64-BMI1-NEXT:    movl %esi, %ecx
2297; X64-BMI1-NEXT:    negb %cl
2298; X64-BMI1-NEXT:    movl $-1, %esi
2299; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2300; X64-BMI1-NEXT:    shrl %cl, %esi
2301; X64-BMI1-NEXT:    movl (%rdi), %eax
2302; X64-BMI1-NEXT:    andl %esi, %eax
2303; X64-BMI1-NEXT:    movl %esi, (%rdx)
2304; X64-BMI1-NEXT:    retq
2305;
2306; X64-BMI2-LABEL: bzhi32_c2_load:
2307; X64-BMI2:       # %bb.0:
2308; X64-BMI2-NEXT:    movl $-1, %eax
2309; X64-BMI2-NEXT:    bzhil %esi, %eax, %ecx
2310; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
2311; X64-BMI2-NEXT:    movl %ecx, (%rdx)
2312; X64-BMI2-NEXT:    retq
2313  %val = load i32, ptr %w
2314  %numhighbits = sub i32 32, %numlowbits
2315  %mask = lshr i32 -1, %numhighbits
2316  store i32 %mask, ptr %escape
2317  %masked = and i32 %mask, %val
2318  ret i32 %masked
2319}
2320
2321define i32 @bzhi32_c3_load_indexzext(ptr %w, i8 %numlowbits, ptr %escape) nounwind {
2322; X86-NOBMI-LABEL: bzhi32_c3_load_indexzext:
2323; X86-NOBMI:       # %bb.0:
2324; X86-NOBMI-NEXT:    pushl %esi
2325; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
2326; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
2327; X86-NOBMI-NEXT:    xorl %ecx, %ecx
2328; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2329; X86-NOBMI-NEXT:    movl $-1, %esi
2330; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2331; X86-NOBMI-NEXT:    shrl %cl, %esi
2332; X86-NOBMI-NEXT:    movl (%eax), %eax
2333; X86-NOBMI-NEXT:    andl %esi, %eax
2334; X86-NOBMI-NEXT:    movl %esi, (%edx)
2335; X86-NOBMI-NEXT:    popl %esi
2336; X86-NOBMI-NEXT:    retl
2337;
2338; X86-BMI1-LABEL: bzhi32_c3_load_indexzext:
2339; X86-BMI1:       # %bb.0:
2340; X86-BMI1-NEXT:    pushl %esi
2341; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
2342; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
2343; X86-BMI1-NEXT:    xorl %ecx, %ecx
2344; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2345; X86-BMI1-NEXT:    movl $-1, %esi
2346; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2347; X86-BMI1-NEXT:    shrl %cl, %esi
2348; X86-BMI1-NEXT:    movl (%eax), %eax
2349; X86-BMI1-NEXT:    andl %esi, %eax
2350; X86-BMI1-NEXT:    movl %esi, (%edx)
2351; X86-BMI1-NEXT:    popl %esi
2352; X86-BMI1-NEXT:    retl
2353;
2354; X86-BMI2-LABEL: bzhi32_c3_load_indexzext:
2355; X86-BMI2:       # %bb.0:
2356; X86-BMI2-NEXT:    pushl %esi
2357; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2358; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
2359; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
2360; X86-BMI2-NEXT:    movl $-1, %esi
2361; X86-BMI2-NEXT:    bzhil %edx, %esi, %esi
2362; X86-BMI2-NEXT:    bzhil %edx, (%eax), %eax
2363; X86-BMI2-NEXT:    movl %esi, (%ecx)
2364; X86-BMI2-NEXT:    popl %esi
2365; X86-BMI2-NEXT:    retl
2366;
2367; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext:
2368; X64-NOBMI:       # %bb.0:
2369; X64-NOBMI-NEXT:    movl %esi, %ecx
2370; X64-NOBMI-NEXT:    negb %cl
2371; X64-NOBMI-NEXT:    movl $-1, %esi
2372; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2373; X64-NOBMI-NEXT:    shrl %cl, %esi
2374; X64-NOBMI-NEXT:    movl (%rdi), %eax
2375; X64-NOBMI-NEXT:    andl %esi, %eax
2376; X64-NOBMI-NEXT:    movl %esi, (%rdx)
2377; X64-NOBMI-NEXT:    retq
2378;
2379; X64-BMI1-LABEL: bzhi32_c3_load_indexzext:
2380; X64-BMI1:       # %bb.0:
2381; X64-BMI1-NEXT:    movl %esi, %ecx
2382; X64-BMI1-NEXT:    negb %cl
2383; X64-BMI1-NEXT:    movl $-1, %esi
2384; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2385; X64-BMI1-NEXT:    shrl %cl, %esi
2386; X64-BMI1-NEXT:    movl (%rdi), %eax
2387; X64-BMI1-NEXT:    andl %esi, %eax
2388; X64-BMI1-NEXT:    movl %esi, (%rdx)
2389; X64-BMI1-NEXT:    retq
2390;
2391; X64-BMI2-LABEL: bzhi32_c3_load_indexzext:
2392; X64-BMI2:       # %bb.0:
2393; X64-BMI2-NEXT:    movl $-1, %eax
2394; X64-BMI2-NEXT:    bzhil %esi, %eax, %ecx
2395; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
2396; X64-BMI2-NEXT:    movl %ecx, (%rdx)
2397; X64-BMI2-NEXT:    retq
2398  %val = load i32, ptr %w
2399  %numhighbits = sub i8 32, %numlowbits
2400  %sh_prom = zext i8 %numhighbits to i32
2401  %mask = lshr i32 -1, %sh_prom
2402  store i32 %mask, ptr %escape
2403  %masked = and i32 %mask, %val
2404  ret i32 %masked
2405}
2406
2407define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, ptr %escape) nounwind {
2408; X86-NOBMI-LABEL: bzhi32_c4_commutative:
2409; X86-NOBMI:       # %bb.0:
2410; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
2411; X86-NOBMI-NEXT:    xorl %ecx, %ecx
2412; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2413; X86-NOBMI-NEXT:    movl $-1, %eax
2414; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2415; X86-NOBMI-NEXT:    shrl %cl, %eax
2416; X86-NOBMI-NEXT:    movl %eax, (%edx)
2417; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
2418; X86-NOBMI-NEXT:    retl
2419;
2420; X86-BMI1-LABEL: bzhi32_c4_commutative:
2421; X86-BMI1:       # %bb.0:
2422; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
2423; X86-BMI1-NEXT:    xorl %ecx, %ecx
2424; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2425; X86-BMI1-NEXT:    movl $-1, %eax
2426; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2427; X86-BMI1-NEXT:    shrl %cl, %eax
2428; X86-BMI1-NEXT:    movl %eax, (%edx)
2429; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
2430; X86-BMI1-NEXT:    retl
2431;
2432; X86-BMI2-LABEL: bzhi32_c4_commutative:
2433; X86-BMI2:       # %bb.0:
2434; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
2435; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
2436; X86-BMI2-NEXT:    movl $-1, %edx
2437; X86-BMI2-NEXT:    bzhil %ecx, %edx, %edx
2438; X86-BMI2-NEXT:    movl %edx, (%eax)
2439; X86-BMI2-NEXT:    bzhil %ecx, {{[0-9]+}}(%esp), %eax
2440; X86-BMI2-NEXT:    retl
2441;
2442; X64-NOBMI-LABEL: bzhi32_c4_commutative:
2443; X64-NOBMI:       # %bb.0:
2444; X64-NOBMI-NEXT:    movl %esi, %ecx
2445; X64-NOBMI-NEXT:    negb %cl
2446; X64-NOBMI-NEXT:    movl $-1, %eax
2447; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2448; X64-NOBMI-NEXT:    shrl %cl, %eax
2449; X64-NOBMI-NEXT:    movl %eax, (%rdx)
2450; X64-NOBMI-NEXT:    andl %edi, %eax
2451; X64-NOBMI-NEXT:    retq
2452;
2453; X64-BMI1-LABEL: bzhi32_c4_commutative:
2454; X64-BMI1:       # %bb.0:
2455; X64-BMI1-NEXT:    movl %esi, %ecx
2456; X64-BMI1-NEXT:    negb %cl
2457; X64-BMI1-NEXT:    movl $-1, %eax
2458; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2459; X64-BMI1-NEXT:    shrl %cl, %eax
2460; X64-BMI1-NEXT:    movl %eax, (%rdx)
2461; X64-BMI1-NEXT:    andl %edi, %eax
2462; X64-BMI1-NEXT:    retq
2463;
2464; X64-BMI2-LABEL: bzhi32_c4_commutative:
2465; X64-BMI2:       # %bb.0:
2466; X64-BMI2-NEXT:    movl $-1, %eax
2467; X64-BMI2-NEXT:    bzhil %esi, %eax, %eax
2468; X64-BMI2-NEXT:    movl %eax, (%rdx)
2469; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
2470; X64-BMI2-NEXT:    retq
2471  %numhighbits = sub i32 32, %numlowbits
2472  %mask = lshr i32 -1, %numhighbits
2473  store i32 %mask, ptr %escape
2474  %masked = and i32 %val, %mask ; swapped order
2475  ret i32 %masked
2476}
2477
2478; 64-bit
2479
2480define i64 @bzhi64_c0(i64 %val, i64 %numlowbits, ptr %escape) nounwind {
2481; X86-NOBMI-LABEL: bzhi64_c0:
2482; X86-NOBMI:       # %bb.0:
2483; X86-NOBMI-NEXT:    pushl %esi
2484; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
2485; X86-NOBMI-NEXT:    movb $64, %cl
2486; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2487; X86-NOBMI-NEXT:    movl $-1, %eax
2488; X86-NOBMI-NEXT:    movl $-1, %edx
2489; X86-NOBMI-NEXT:    shrl %cl, %edx
2490; X86-NOBMI-NEXT:    testb $32, %cl
2491; X86-NOBMI-NEXT:    je .LBB35_2
2492; X86-NOBMI-NEXT:  # %bb.1:
2493; X86-NOBMI-NEXT:    movl %edx, %eax
2494; X86-NOBMI-NEXT:    xorl %edx, %edx
2495; X86-NOBMI-NEXT:  .LBB35_2:
2496; X86-NOBMI-NEXT:    movl %edx, 4(%esi)
2497; X86-NOBMI-NEXT:    movl %eax, (%esi)
2498; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
2499; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
2500; X86-NOBMI-NEXT:    popl %esi
2501; X86-NOBMI-NEXT:    retl
2502;
2503; X86-BMI1-LABEL: bzhi64_c0:
2504; X86-BMI1:       # %bb.0:
2505; X86-BMI1-NEXT:    pushl %esi
2506; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
2507; X86-BMI1-NEXT:    movb $64, %cl
2508; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2509; X86-BMI1-NEXT:    movl $-1, %eax
2510; X86-BMI1-NEXT:    movl $-1, %edx
2511; X86-BMI1-NEXT:    shrl %cl, %edx
2512; X86-BMI1-NEXT:    testb $32, %cl
2513; X86-BMI1-NEXT:    je .LBB35_2
2514; X86-BMI1-NEXT:  # %bb.1:
2515; X86-BMI1-NEXT:    movl %edx, %eax
2516; X86-BMI1-NEXT:    xorl %edx, %edx
2517; X86-BMI1-NEXT:  .LBB35_2:
2518; X86-BMI1-NEXT:    movl %edx, 4(%esi)
2519; X86-BMI1-NEXT:    movl %eax, (%esi)
2520; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
2521; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
2522; X86-BMI1-NEXT:    popl %esi
2523; X86-BMI1-NEXT:    retl
2524;
2525; X86-BMI2-LABEL: bzhi64_c0:
2526; X86-BMI2:       # %bb.0:
2527; X86-BMI2-NEXT:    pushl %ebx
2528; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2529; X86-BMI2-NEXT:    movb $64, %bl
2530; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %bl
2531; X86-BMI2-NEXT:    movl $-1, %eax
2532; X86-BMI2-NEXT:    shrxl %ebx, %eax, %edx
2533; X86-BMI2-NEXT:    testb $32, %bl
2534; X86-BMI2-NEXT:    je .LBB35_2
2535; X86-BMI2-NEXT:  # %bb.1:
2536; X86-BMI2-NEXT:    movl %edx, %eax
2537; X86-BMI2-NEXT:    xorl %edx, %edx
2538; X86-BMI2-NEXT:  .LBB35_2:
2539; X86-BMI2-NEXT:    movl %edx, 4(%ecx)
2540; X86-BMI2-NEXT:    movl %eax, (%ecx)
2541; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
2542; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
2543; X86-BMI2-NEXT:    popl %ebx
2544; X86-BMI2-NEXT:    retl
2545;
2546; X64-NOBMI-LABEL: bzhi64_c0:
2547; X64-NOBMI:       # %bb.0:
2548; X64-NOBMI-NEXT:    movq %rsi, %rcx
2549; X64-NOBMI-NEXT:    negb %cl
2550; X64-NOBMI-NEXT:    movq $-1, %rax
2551; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
2552; X64-NOBMI-NEXT:    shrq %cl, %rax
2553; X64-NOBMI-NEXT:    movq %rax, (%rdx)
2554; X64-NOBMI-NEXT:    andq %rdi, %rax
2555; X64-NOBMI-NEXT:    retq
2556;
2557; X64-BMI1-LABEL: bzhi64_c0:
2558; X64-BMI1:       # %bb.0:
2559; X64-BMI1-NEXT:    movq %rsi, %rcx
2560; X64-BMI1-NEXT:    negb %cl
2561; X64-BMI1-NEXT:    movq $-1, %rax
2562; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
2563; X64-BMI1-NEXT:    shrq %cl, %rax
2564; X64-BMI1-NEXT:    movq %rax, (%rdx)
2565; X64-BMI1-NEXT:    andq %rdi, %rax
2566; X64-BMI1-NEXT:    retq
2567;
2568; X64-BMI2-LABEL: bzhi64_c0:
2569; X64-BMI2:       # %bb.0:
2570; X64-BMI2-NEXT:    movq $-1, %rax
2571; X64-BMI2-NEXT:    bzhiq %rsi, %rax, %rax
2572; X64-BMI2-NEXT:    movq %rax, (%rdx)
2573; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
2574; X64-BMI2-NEXT:    retq
2575  %numhighbits = sub i64 64, %numlowbits
2576  %mask = lshr i64 -1, %numhighbits
2577  store i64 %mask, ptr %escape
2578  %masked = and i64 %mask, %val
2579  ret i64 %masked
2580}
2581
2582define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits, ptr %escape) nounwind {
2583; X86-NOBMI-LABEL: bzhi64_c1_indexzext:
2584; X86-NOBMI:       # %bb.0:
2585; X86-NOBMI-NEXT:    pushl %esi
2586; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
2587; X86-NOBMI-NEXT:    movb $64, %cl
2588; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2589; X86-NOBMI-NEXT:    movl $-1, %eax
2590; X86-NOBMI-NEXT:    movl $-1, %edx
2591; X86-NOBMI-NEXT:    shrl %cl, %edx
2592; X86-NOBMI-NEXT:    testb $32, %cl
2593; X86-NOBMI-NEXT:    je .LBB36_2
2594; X86-NOBMI-NEXT:  # %bb.1:
2595; X86-NOBMI-NEXT:    movl %edx, %eax
2596; X86-NOBMI-NEXT:    xorl %edx, %edx
2597; X86-NOBMI-NEXT:  .LBB36_2:
2598; X86-NOBMI-NEXT:    movl %edx, 4(%esi)
2599; X86-NOBMI-NEXT:    movl %eax, (%esi)
2600; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
2601; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
2602; X86-NOBMI-NEXT:    popl %esi
2603; X86-NOBMI-NEXT:    retl
2604;
2605; X86-BMI1-LABEL: bzhi64_c1_indexzext:
2606; X86-BMI1:       # %bb.0:
2607; X86-BMI1-NEXT:    pushl %esi
2608; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
2609; X86-BMI1-NEXT:    movb $64, %cl
2610; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2611; X86-BMI1-NEXT:    movl $-1, %eax
2612; X86-BMI1-NEXT:    movl $-1, %edx
2613; X86-BMI1-NEXT:    shrl %cl, %edx
2614; X86-BMI1-NEXT:    testb $32, %cl
2615; X86-BMI1-NEXT:    je .LBB36_2
2616; X86-BMI1-NEXT:  # %bb.1:
2617; X86-BMI1-NEXT:    movl %edx, %eax
2618; X86-BMI1-NEXT:    xorl %edx, %edx
2619; X86-BMI1-NEXT:  .LBB36_2:
2620; X86-BMI1-NEXT:    movl %edx, 4(%esi)
2621; X86-BMI1-NEXT:    movl %eax, (%esi)
2622; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
2623; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
2624; X86-BMI1-NEXT:    popl %esi
2625; X86-BMI1-NEXT:    retl
2626;
2627; X86-BMI2-LABEL: bzhi64_c1_indexzext:
2628; X86-BMI2:       # %bb.0:
2629; X86-BMI2-NEXT:    pushl %ebx
2630; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2631; X86-BMI2-NEXT:    movb $64, %bl
2632; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %bl
2633; X86-BMI2-NEXT:    movl $-1, %eax
2634; X86-BMI2-NEXT:    shrxl %ebx, %eax, %edx
2635; X86-BMI2-NEXT:    testb $32, %bl
2636; X86-BMI2-NEXT:    je .LBB36_2
2637; X86-BMI2-NEXT:  # %bb.1:
2638; X86-BMI2-NEXT:    movl %edx, %eax
2639; X86-BMI2-NEXT:    xorl %edx, %edx
2640; X86-BMI2-NEXT:  .LBB36_2:
2641; X86-BMI2-NEXT:    movl %edx, 4(%ecx)
2642; X86-BMI2-NEXT:    movl %eax, (%ecx)
2643; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
2644; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
2645; X86-BMI2-NEXT:    popl %ebx
2646; X86-BMI2-NEXT:    retl
2647;
2648; X64-NOBMI-LABEL: bzhi64_c1_indexzext:
2649; X64-NOBMI:       # %bb.0:
2650; X64-NOBMI-NEXT:    movl %esi, %ecx
2651; X64-NOBMI-NEXT:    negb %cl
2652; X64-NOBMI-NEXT:    movq $-1, %rax
2653; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2654; X64-NOBMI-NEXT:    shrq %cl, %rax
2655; X64-NOBMI-NEXT:    movq %rax, (%rdx)
2656; X64-NOBMI-NEXT:    andq %rdi, %rax
2657; X64-NOBMI-NEXT:    retq
2658;
2659; X64-BMI1-LABEL: bzhi64_c1_indexzext:
2660; X64-BMI1:       # %bb.0:
2661; X64-BMI1-NEXT:    movl %esi, %ecx
2662; X64-BMI1-NEXT:    negb %cl
2663; X64-BMI1-NEXT:    movq $-1, %rax
2664; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2665; X64-BMI1-NEXT:    shrq %cl, %rax
2666; X64-BMI1-NEXT:    movq %rax, (%rdx)
2667; X64-BMI1-NEXT:    andq %rdi, %rax
2668; X64-BMI1-NEXT:    retq
2669;
2670; X64-BMI2-LABEL: bzhi64_c1_indexzext:
2671; X64-BMI2:       # %bb.0:
2672; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
2673; X64-BMI2-NEXT:    movq $-1, %rax
2674; X64-BMI2-NEXT:    bzhiq %rsi, %rax, %rax
2675; X64-BMI2-NEXT:    movq %rax, (%rdx)
2676; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
2677; X64-BMI2-NEXT:    retq
2678  %numhighbits = sub i8 64, %numlowbits
2679  %sh_prom = zext i8 %numhighbits to i64
2680  %mask = lshr i64 -1, %sh_prom
2681  store i64 %mask, ptr %escape
2682  %masked = and i64 %mask, %val
2683  ret i64 %masked
2684}
2685
2686define i64 @bzhi64_c2_load(ptr %w, i64 %numlowbits, ptr %escape) nounwind {
2687; X86-NOBMI-LABEL: bzhi64_c2_load:
2688; X86-NOBMI:       # %bb.0:
2689; X86-NOBMI-NEXT:    pushl %ebx
2690; X86-NOBMI-NEXT:    pushl %edi
2691; X86-NOBMI-NEXT:    pushl %esi
2692; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
2693; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
2694; X86-NOBMI-NEXT:    movb $64, %cl
2695; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2696; X86-NOBMI-NEXT:    movl $-1, %edi
2697; X86-NOBMI-NEXT:    movl $-1, %ebx
2698; X86-NOBMI-NEXT:    shrl %cl, %ebx
2699; X86-NOBMI-NEXT:    testb $32, %cl
2700; X86-NOBMI-NEXT:    je .LBB37_2
2701; X86-NOBMI-NEXT:  # %bb.1:
2702; X86-NOBMI-NEXT:    movl %ebx, %edi
2703; X86-NOBMI-NEXT:    xorl %ebx, %ebx
2704; X86-NOBMI-NEXT:  .LBB37_2:
2705; X86-NOBMI-NEXT:    movl 4(%eax), %edx
2706; X86-NOBMI-NEXT:    andl %ebx, %edx
2707; X86-NOBMI-NEXT:    movl (%eax), %eax
2708; X86-NOBMI-NEXT:    andl %edi, %eax
2709; X86-NOBMI-NEXT:    movl %ebx, 4(%esi)
2710; X86-NOBMI-NEXT:    movl %edi, (%esi)
2711; X86-NOBMI-NEXT:    popl %esi
2712; X86-NOBMI-NEXT:    popl %edi
2713; X86-NOBMI-NEXT:    popl %ebx
2714; X86-NOBMI-NEXT:    retl
2715;
2716; X86-BMI1-LABEL: bzhi64_c2_load:
2717; X86-BMI1:       # %bb.0:
2718; X86-BMI1-NEXT:    pushl %ebx
2719; X86-BMI1-NEXT:    pushl %edi
2720; X86-BMI1-NEXT:    pushl %esi
2721; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
2722; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
2723; X86-BMI1-NEXT:    movb $64, %cl
2724; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2725; X86-BMI1-NEXT:    movl $-1, %edi
2726; X86-BMI1-NEXT:    movl $-1, %ebx
2727; X86-BMI1-NEXT:    shrl %cl, %ebx
2728; X86-BMI1-NEXT:    testb $32, %cl
2729; X86-BMI1-NEXT:    je .LBB37_2
2730; X86-BMI1-NEXT:  # %bb.1:
2731; X86-BMI1-NEXT:    movl %ebx, %edi
2732; X86-BMI1-NEXT:    xorl %ebx, %ebx
2733; X86-BMI1-NEXT:  .LBB37_2:
2734; X86-BMI1-NEXT:    movl 4(%eax), %edx
2735; X86-BMI1-NEXT:    andl %ebx, %edx
2736; X86-BMI1-NEXT:    movl (%eax), %eax
2737; X86-BMI1-NEXT:    andl %edi, %eax
2738; X86-BMI1-NEXT:    movl %ebx, 4(%esi)
2739; X86-BMI1-NEXT:    movl %edi, (%esi)
2740; X86-BMI1-NEXT:    popl %esi
2741; X86-BMI1-NEXT:    popl %edi
2742; X86-BMI1-NEXT:    popl %ebx
2743; X86-BMI1-NEXT:    retl
2744;
2745; X86-BMI2-LABEL: bzhi64_c2_load:
2746; X86-BMI2:       # %bb.0:
2747; X86-BMI2-NEXT:    pushl %edi
2748; X86-BMI2-NEXT:    pushl %esi
2749; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2750; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
2751; X86-BMI2-NEXT:    movb $64, %dl
2752; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %dl
2753; X86-BMI2-NEXT:    movl $-1, %esi
2754; X86-BMI2-NEXT:    shrxl %edx, %esi, %edi
2755; X86-BMI2-NEXT:    testb $32, %dl
2756; X86-BMI2-NEXT:    je .LBB37_2
2757; X86-BMI2-NEXT:  # %bb.1:
2758; X86-BMI2-NEXT:    movl %edi, %esi
2759; X86-BMI2-NEXT:    xorl %edi, %edi
2760; X86-BMI2-NEXT:  .LBB37_2:
2761; X86-BMI2-NEXT:    movl 4(%eax), %edx
2762; X86-BMI2-NEXT:    andl %edi, %edx
2763; X86-BMI2-NEXT:    movl (%eax), %eax
2764; X86-BMI2-NEXT:    andl %esi, %eax
2765; X86-BMI2-NEXT:    movl %edi, 4(%ecx)
2766; X86-BMI2-NEXT:    movl %esi, (%ecx)
2767; X86-BMI2-NEXT:    popl %esi
2768; X86-BMI2-NEXT:    popl %edi
2769; X86-BMI2-NEXT:    retl
2770;
2771; X64-NOBMI-LABEL: bzhi64_c2_load:
2772; X64-NOBMI:       # %bb.0:
2773; X64-NOBMI-NEXT:    movq %rsi, %rcx
2774; X64-NOBMI-NEXT:    negb %cl
2775; X64-NOBMI-NEXT:    movq $-1, %rsi
2776; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
2777; X64-NOBMI-NEXT:    shrq %cl, %rsi
2778; X64-NOBMI-NEXT:    movq (%rdi), %rax
2779; X64-NOBMI-NEXT:    andq %rsi, %rax
2780; X64-NOBMI-NEXT:    movq %rsi, (%rdx)
2781; X64-NOBMI-NEXT:    retq
2782;
2783; X64-BMI1-LABEL: bzhi64_c2_load:
2784; X64-BMI1:       # %bb.0:
2785; X64-BMI1-NEXT:    movq %rsi, %rcx
2786; X64-BMI1-NEXT:    negb %cl
2787; X64-BMI1-NEXT:    movq $-1, %rsi
2788; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
2789; X64-BMI1-NEXT:    shrq %cl, %rsi
2790; X64-BMI1-NEXT:    movq (%rdi), %rax
2791; X64-BMI1-NEXT:    andq %rsi, %rax
2792; X64-BMI1-NEXT:    movq %rsi, (%rdx)
2793; X64-BMI1-NEXT:    retq
2794;
2795; X64-BMI2-LABEL: bzhi64_c2_load:
2796; X64-BMI2:       # %bb.0:
2797; X64-BMI2-NEXT:    movq $-1, %rax
2798; X64-BMI2-NEXT:    bzhiq %rsi, %rax, %rcx
2799; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
2800; X64-BMI2-NEXT:    movq %rcx, (%rdx)
2801; X64-BMI2-NEXT:    retq
2802  %val = load i64, ptr %w
2803  %numhighbits = sub i64 64, %numlowbits
2804  %mask = lshr i64 -1, %numhighbits
2805  store i64 %mask, ptr %escape
2806  %masked = and i64 %mask, %val
2807  ret i64 %masked
2808}
2809
2810define i64 @bzhi64_c3_load_indexzext(ptr %w, i8 %numlowbits, ptr %escape) nounwind {
2811; X86-NOBMI-LABEL: bzhi64_c3_load_indexzext:
2812; X86-NOBMI:       # %bb.0:
2813; X86-NOBMI-NEXT:    pushl %ebx
2814; X86-NOBMI-NEXT:    pushl %edi
2815; X86-NOBMI-NEXT:    pushl %esi
2816; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
2817; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
2818; X86-NOBMI-NEXT:    movb $64, %cl
2819; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2820; X86-NOBMI-NEXT:    movl $-1, %edi
2821; X86-NOBMI-NEXT:    movl $-1, %ebx
2822; X86-NOBMI-NEXT:    shrl %cl, %ebx
2823; X86-NOBMI-NEXT:    testb $32, %cl
2824; X86-NOBMI-NEXT:    je .LBB38_2
2825; X86-NOBMI-NEXT:  # %bb.1:
2826; X86-NOBMI-NEXT:    movl %ebx, %edi
2827; X86-NOBMI-NEXT:    xorl %ebx, %ebx
2828; X86-NOBMI-NEXT:  .LBB38_2:
2829; X86-NOBMI-NEXT:    movl 4(%eax), %edx
2830; X86-NOBMI-NEXT:    andl %ebx, %edx
2831; X86-NOBMI-NEXT:    movl (%eax), %eax
2832; X86-NOBMI-NEXT:    andl %edi, %eax
2833; X86-NOBMI-NEXT:    movl %ebx, 4(%esi)
2834; X86-NOBMI-NEXT:    movl %edi, (%esi)
2835; X86-NOBMI-NEXT:    popl %esi
2836; X86-NOBMI-NEXT:    popl %edi
2837; X86-NOBMI-NEXT:    popl %ebx
2838; X86-NOBMI-NEXT:    retl
2839;
2840; X86-BMI1-LABEL: bzhi64_c3_load_indexzext:
2841; X86-BMI1:       # %bb.0:
2842; X86-BMI1-NEXT:    pushl %ebx
2843; X86-BMI1-NEXT:    pushl %edi
2844; X86-BMI1-NEXT:    pushl %esi
2845; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
2846; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
2847; X86-BMI1-NEXT:    movb $64, %cl
2848; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2849; X86-BMI1-NEXT:    movl $-1, %edi
2850; X86-BMI1-NEXT:    movl $-1, %ebx
2851; X86-BMI1-NEXT:    shrl %cl, %ebx
2852; X86-BMI1-NEXT:    testb $32, %cl
2853; X86-BMI1-NEXT:    je .LBB38_2
2854; X86-BMI1-NEXT:  # %bb.1:
2855; X86-BMI1-NEXT:    movl %ebx, %edi
2856; X86-BMI1-NEXT:    xorl %ebx, %ebx
2857; X86-BMI1-NEXT:  .LBB38_2:
2858; X86-BMI1-NEXT:    movl 4(%eax), %edx
2859; X86-BMI1-NEXT:    andl %ebx, %edx
2860; X86-BMI1-NEXT:    movl (%eax), %eax
2861; X86-BMI1-NEXT:    andl %edi, %eax
2862; X86-BMI1-NEXT:    movl %ebx, 4(%esi)
2863; X86-BMI1-NEXT:    movl %edi, (%esi)
2864; X86-BMI1-NEXT:    popl %esi
2865; X86-BMI1-NEXT:    popl %edi
2866; X86-BMI1-NEXT:    popl %ebx
2867; X86-BMI1-NEXT:    retl
2868;
2869; X86-BMI2-LABEL: bzhi64_c3_load_indexzext:
2870; X86-BMI2:       # %bb.0:
2871; X86-BMI2-NEXT:    pushl %edi
2872; X86-BMI2-NEXT:    pushl %esi
2873; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2874; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
2875; X86-BMI2-NEXT:    movb $64, %dl
2876; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %dl
2877; X86-BMI2-NEXT:    movl $-1, %esi
2878; X86-BMI2-NEXT:    shrxl %edx, %esi, %edi
2879; X86-BMI2-NEXT:    testb $32, %dl
2880; X86-BMI2-NEXT:    je .LBB38_2
2881; X86-BMI2-NEXT:  # %bb.1:
2882; X86-BMI2-NEXT:    movl %edi, %esi
2883; X86-BMI2-NEXT:    xorl %edi, %edi
2884; X86-BMI2-NEXT:  .LBB38_2:
2885; X86-BMI2-NEXT:    movl 4(%eax), %edx
2886; X86-BMI2-NEXT:    andl %edi, %edx
2887; X86-BMI2-NEXT:    movl (%eax), %eax
2888; X86-BMI2-NEXT:    andl %esi, %eax
2889; X86-BMI2-NEXT:    movl %edi, 4(%ecx)
2890; X86-BMI2-NEXT:    movl %esi, (%ecx)
2891; X86-BMI2-NEXT:    popl %esi
2892; X86-BMI2-NEXT:    popl %edi
2893; X86-BMI2-NEXT:    retl
2894;
2895; X64-NOBMI-LABEL: bzhi64_c3_load_indexzext:
2896; X64-NOBMI:       # %bb.0:
2897; X64-NOBMI-NEXT:    movl %esi, %ecx
2898; X64-NOBMI-NEXT:    negb %cl
2899; X64-NOBMI-NEXT:    movq $-1, %rsi
2900; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2901; X64-NOBMI-NEXT:    shrq %cl, %rsi
2902; X64-NOBMI-NEXT:    movq (%rdi), %rax
2903; X64-NOBMI-NEXT:    andq %rsi, %rax
2904; X64-NOBMI-NEXT:    movq %rsi, (%rdx)
2905; X64-NOBMI-NEXT:    retq
2906;
2907; X64-BMI1-LABEL: bzhi64_c3_load_indexzext:
2908; X64-BMI1:       # %bb.0:
2909; X64-BMI1-NEXT:    movl %esi, %ecx
2910; X64-BMI1-NEXT:    negb %cl
2911; X64-BMI1-NEXT:    movq $-1, %rsi
2912; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2913; X64-BMI1-NEXT:    shrq %cl, %rsi
2914; X64-BMI1-NEXT:    movq (%rdi), %rax
2915; X64-BMI1-NEXT:    andq %rsi, %rax
2916; X64-BMI1-NEXT:    movq %rsi, (%rdx)
2917; X64-BMI1-NEXT:    retq
2918;
2919; X64-BMI2-LABEL: bzhi64_c3_load_indexzext:
2920; X64-BMI2:       # %bb.0:
2921; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
2922; X64-BMI2-NEXT:    movq $-1, %rax
2923; X64-BMI2-NEXT:    bzhiq %rsi, %rax, %rcx
2924; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
2925; X64-BMI2-NEXT:    movq %rcx, (%rdx)
2926; X64-BMI2-NEXT:    retq
2927  %val = load i64, ptr %w
2928  %numhighbits = sub i8 64, %numlowbits
2929  %sh_prom = zext i8 %numhighbits to i64
2930  %mask = lshr i64 -1, %sh_prom
2931  store i64 %mask, ptr %escape
2932  %masked = and i64 %mask, %val
2933  ret i64 %masked
2934}
2935
2936define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits, ptr %escape) nounwind {
2937; X86-NOBMI-LABEL: bzhi64_c4_commutative:
2938; X86-NOBMI:       # %bb.0:
2939; X86-NOBMI-NEXT:    pushl %esi
2940; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
2941; X86-NOBMI-NEXT:    movb $64, %cl
2942; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2943; X86-NOBMI-NEXT:    movl $-1, %eax
2944; X86-NOBMI-NEXT:    movl $-1, %edx
2945; X86-NOBMI-NEXT:    shrl %cl, %edx
2946; X86-NOBMI-NEXT:    testb $32, %cl
2947; X86-NOBMI-NEXT:    je .LBB39_2
2948; X86-NOBMI-NEXT:  # %bb.1:
2949; X86-NOBMI-NEXT:    movl %edx, %eax
2950; X86-NOBMI-NEXT:    xorl %edx, %edx
2951; X86-NOBMI-NEXT:  .LBB39_2:
2952; X86-NOBMI-NEXT:    movl %edx, 4(%esi)
2953; X86-NOBMI-NEXT:    movl %eax, (%esi)
2954; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
2955; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
2956; X86-NOBMI-NEXT:    popl %esi
2957; X86-NOBMI-NEXT:    retl
2958;
2959; X86-BMI1-LABEL: bzhi64_c4_commutative:
2960; X86-BMI1:       # %bb.0:
2961; X86-BMI1-NEXT:    pushl %esi
2962; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
2963; X86-BMI1-NEXT:    movb $64, %cl
2964; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2965; X86-BMI1-NEXT:    movl $-1, %eax
2966; X86-BMI1-NEXT:    movl $-1, %edx
2967; X86-BMI1-NEXT:    shrl %cl, %edx
2968; X86-BMI1-NEXT:    testb $32, %cl
2969; X86-BMI1-NEXT:    je .LBB39_2
2970; X86-BMI1-NEXT:  # %bb.1:
2971; X86-BMI1-NEXT:    movl %edx, %eax
2972; X86-BMI1-NEXT:    xorl %edx, %edx
2973; X86-BMI1-NEXT:  .LBB39_2:
2974; X86-BMI1-NEXT:    movl %edx, 4(%esi)
2975; X86-BMI1-NEXT:    movl %eax, (%esi)
2976; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
2977; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
2978; X86-BMI1-NEXT:    popl %esi
2979; X86-BMI1-NEXT:    retl
2980;
2981; X86-BMI2-LABEL: bzhi64_c4_commutative:
2982; X86-BMI2:       # %bb.0:
2983; X86-BMI2-NEXT:    pushl %ebx
2984; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2985; X86-BMI2-NEXT:    movb $64, %bl
2986; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %bl
2987; X86-BMI2-NEXT:    movl $-1, %eax
2988; X86-BMI2-NEXT:    shrxl %ebx, %eax, %edx
2989; X86-BMI2-NEXT:    testb $32, %bl
2990; X86-BMI2-NEXT:    je .LBB39_2
2991; X86-BMI2-NEXT:  # %bb.1:
2992; X86-BMI2-NEXT:    movl %edx, %eax
2993; X86-BMI2-NEXT:    xorl %edx, %edx
2994; X86-BMI2-NEXT:  .LBB39_2:
2995; X86-BMI2-NEXT:    movl %edx, 4(%ecx)
2996; X86-BMI2-NEXT:    movl %eax, (%ecx)
2997; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
2998; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
2999; X86-BMI2-NEXT:    popl %ebx
3000; X86-BMI2-NEXT:    retl
3001;
3002; X64-NOBMI-LABEL: bzhi64_c4_commutative:
3003; X64-NOBMI:       # %bb.0:
3004; X64-NOBMI-NEXT:    movq %rsi, %rcx
3005; X64-NOBMI-NEXT:    negb %cl
3006; X64-NOBMI-NEXT:    movq $-1, %rax
3007; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
3008; X64-NOBMI-NEXT:    shrq %cl, %rax
3009; X64-NOBMI-NEXT:    movq %rax, (%rdx)
3010; X64-NOBMI-NEXT:    andq %rdi, %rax
3011; X64-NOBMI-NEXT:    retq
3012;
3013; X64-BMI1-LABEL: bzhi64_c4_commutative:
3014; X64-BMI1:       # %bb.0:
3015; X64-BMI1-NEXT:    movq %rsi, %rcx
3016; X64-BMI1-NEXT:    negb %cl
3017; X64-BMI1-NEXT:    movq $-1, %rax
3018; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
3019; X64-BMI1-NEXT:    shrq %cl, %rax
3020; X64-BMI1-NEXT:    movq %rax, (%rdx)
3021; X64-BMI1-NEXT:    andq %rdi, %rax
3022; X64-BMI1-NEXT:    retq
3023;
3024; X64-BMI2-LABEL: bzhi64_c4_commutative:
3025; X64-BMI2:       # %bb.0:
3026; X64-BMI2-NEXT:    movq $-1, %rax
3027; X64-BMI2-NEXT:    bzhiq %rsi, %rax, %rax
3028; X64-BMI2-NEXT:    movq %rax, (%rdx)
3029; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
3030; X64-BMI2-NEXT:    retq
3031  %numhighbits = sub i64 64, %numlowbits
3032  %mask = lshr i64 -1, %numhighbits
3033  store i64 %mask, ptr %escape
3034  %masked = and i64 %val, %mask ; swapped order
3035  ret i64 %masked
3036}
3037
3038; 64-bit, but with 32-bit output
3039
3040; Everything done in 64-bit, truncation happens last.
3041define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind {
3042; X86-NOBMI-LABEL: bzhi64_32_c0:
3043; X86-NOBMI:       # %bb.0:
3044; X86-NOBMI-NEXT:    movb $64, %cl
3045; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3046; X86-NOBMI-NEXT:    movl $-1, %eax
3047; X86-NOBMI-NEXT:    shrl %cl, %eax
3048; X86-NOBMI-NEXT:    testb $32, %cl
3049; X86-NOBMI-NEXT:    jne .LBB40_2
3050; X86-NOBMI-NEXT:  # %bb.1:
3051; X86-NOBMI-NEXT:    movl $-1, %eax
3052; X86-NOBMI-NEXT:  .LBB40_2:
3053; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
3054; X86-NOBMI-NEXT:    retl
3055;
3056; X86-BMI1-LABEL: bzhi64_32_c0:
3057; X86-BMI1:       # %bb.0:
3058; X86-BMI1-NEXT:    movb $64, %cl
3059; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
3060; X86-BMI1-NEXT:    movl $-1, %eax
3061; X86-BMI1-NEXT:    shrl %cl, %eax
3062; X86-BMI1-NEXT:    testb $32, %cl
3063; X86-BMI1-NEXT:    jne .LBB40_2
3064; X86-BMI1-NEXT:  # %bb.1:
3065; X86-BMI1-NEXT:    movl $-1, %eax
3066; X86-BMI1-NEXT:  .LBB40_2:
3067; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
3068; X86-BMI1-NEXT:    retl
3069;
3070; X86-BMI2-LABEL: bzhi64_32_c0:
3071; X86-BMI2:       # %bb.0:
3072; X86-BMI2-NEXT:    movb $64, %cl
3073; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
3074; X86-BMI2-NEXT:    movl $-1, %eax
3075; X86-BMI2-NEXT:    testb $32, %cl
3076; X86-BMI2-NEXT:    je .LBB40_2
3077; X86-BMI2-NEXT:  # %bb.1:
3078; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
3079; X86-BMI2-NEXT:  .LBB40_2:
3080; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
3081; X86-BMI2-NEXT:    retl
3082;
3083; X64-NOBMI-LABEL: bzhi64_32_c0:
3084; X64-NOBMI:       # %bb.0:
3085; X64-NOBMI-NEXT:    movq %rsi, %rcx
3086; X64-NOBMI-NEXT:    negb %cl
3087; X64-NOBMI-NEXT:    movq $-1, %rax
3088; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
3089; X64-NOBMI-NEXT:    shrq %cl, %rax
3090; X64-NOBMI-NEXT:    andl %edi, %eax
3091; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
3092; X64-NOBMI-NEXT:    retq
3093;
3094; X64-BMI1-LABEL: bzhi64_32_c0:
3095; X64-BMI1:       # %bb.0:
3096; X64-BMI1-NEXT:    shll $8, %esi
3097; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
3098; X64-BMI1-NEXT:    retq
3099;
3100; X64-BMI2-LABEL: bzhi64_32_c0:
3101; X64-BMI2:       # %bb.0:
3102; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
3103; X64-BMI2-NEXT:    retq
3104  %numhighbits = sub i64 64, %numlowbits
3105  %mask = lshr i64 -1, %numhighbits
3106  %masked = and i64 %mask, %val
3107  %res = trunc i64 %masked to i32
3108  ret i32 %res
3109}
3110
3111; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
3112define i32 @bzhi64_32_c1(i64 %val, i32 %numlowbits) nounwind {
3113; X86-NOBMI-LABEL: bzhi64_32_c1:
3114; X86-NOBMI:       # %bb.0:
3115; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3116; X86-NOBMI-NEXT:    xorl %ecx, %ecx
3117; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3118; X86-NOBMI-NEXT:    shll %cl, %eax
3119; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3120; X86-NOBMI-NEXT:    shrl %cl, %eax
3121; X86-NOBMI-NEXT:    retl
3122;
3123; X86-BMI1-LABEL: bzhi64_32_c1:
3124; X86-BMI1:       # %bb.0:
3125; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
3126; X86-BMI1-NEXT:    shll $8, %eax
3127; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
3128; X86-BMI1-NEXT:    retl
3129;
3130; X86-BMI2-LABEL: bzhi64_32_c1:
3131; X86-BMI2:       # %bb.0:
3132; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
3133; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
3134; X86-BMI2-NEXT:    retl
3135;
3136; X64-NOBMI-LABEL: bzhi64_32_c1:
3137; X64-NOBMI:       # %bb.0:
3138; X64-NOBMI-NEXT:    movl %esi, %ecx
3139; X64-NOBMI-NEXT:    movq %rdi, %rax
3140; X64-NOBMI-NEXT:    negb %cl
3141; X64-NOBMI-NEXT:    shll %cl, %eax
3142; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3143; X64-NOBMI-NEXT:    shrl %cl, %eax
3144; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
3145; X64-NOBMI-NEXT:    retq
3146;
3147; X64-BMI1-LABEL: bzhi64_32_c1:
3148; X64-BMI1:       # %bb.0:
3149; X64-BMI1-NEXT:    shll $8, %esi
3150; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
3151; X64-BMI1-NEXT:    retq
3152;
3153; X64-BMI2-LABEL: bzhi64_32_c1:
3154; X64-BMI2:       # %bb.0:
3155; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
3156; X64-BMI2-NEXT:    retq
3157  %truncval = trunc i64 %val to i32
3158  %numhighbits = sub i32 32, %numlowbits
3159  %mask = lshr i32 -1, %numhighbits
3160  %masked = and i32 %mask, %truncval
3161  ret i32 %masked
3162}
3163
3164; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit.
3165; Masking is 64-bit. Then truncation.
3166define i32 @bzhi64_32_c2(i64 %val, i32 %numlowbits) nounwind {
3167; X86-NOBMI-LABEL: bzhi64_32_c2:
3168; X86-NOBMI:       # %bb.0:
3169; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3170; X86-NOBMI-NEXT:    xorl %ecx, %ecx
3171; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3172; X86-NOBMI-NEXT:    shll %cl, %eax
3173; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3174; X86-NOBMI-NEXT:    shrl %cl, %eax
3175; X86-NOBMI-NEXT:    retl
3176;
3177; X86-BMI1-LABEL: bzhi64_32_c2:
3178; X86-BMI1:       # %bb.0:
3179; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
3180; X86-BMI1-NEXT:    shll $8, %eax
3181; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
3182; X86-BMI1-NEXT:    retl
3183;
3184; X86-BMI2-LABEL: bzhi64_32_c2:
3185; X86-BMI2:       # %bb.0:
3186; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
3187; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
3188; X86-BMI2-NEXT:    retl
3189;
3190; X64-NOBMI-LABEL: bzhi64_32_c2:
3191; X64-NOBMI:       # %bb.0:
3192; X64-NOBMI-NEXT:    movl %esi, %ecx
3193; X64-NOBMI-NEXT:    movq %rdi, %rax
3194; X64-NOBMI-NEXT:    negb %cl
3195; X64-NOBMI-NEXT:    shll %cl, %eax
3196; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3197; X64-NOBMI-NEXT:    shrl %cl, %eax
3198; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
3199; X64-NOBMI-NEXT:    retq
3200;
3201; X64-BMI1-LABEL: bzhi64_32_c2:
3202; X64-BMI1:       # %bb.0:
3203; X64-BMI1-NEXT:    shll $8, %esi
3204; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
3205; X64-BMI1-NEXT:    retq
3206;
3207; X64-BMI2-LABEL: bzhi64_32_c2:
3208; X64-BMI2:       # %bb.0:
3209; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
3210; X64-BMI2-NEXT:    retq
3211  %numhighbits = sub i32 32, %numlowbits
3212  %mask = lshr i32 -1, %numhighbits
3213  %zextmask = zext i32 %mask to i64
3214  %masked = and i64 %zextmask, %val
3215  %truncmasked = trunc i64 %masked to i32
3216  ret i32 %truncmasked
3217}
3218
3219; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit.
3220; Masking is 64-bit. Then truncation.
3221define i32 @bzhi64_32_c3(i64 %val, i64 %numlowbits) nounwind {
3222; X86-LABEL: bzhi64_32_c3:
3223; X86:       # %bb.0:
3224; X86-NEXT:    movb $64, %cl
3225; X86-NEXT:    subb {{[0-9]+}}(%esp), %cl
3226; X86-NEXT:    xorl %eax, %eax
3227; X86-NEXT:    movl $-1, %edx
3228; X86-NEXT:    shrdl %cl, %eax, %edx
3229; X86-NEXT:    testb $32, %cl
3230; X86-NEXT:    jne .LBB43_2
3231; X86-NEXT:  # %bb.1:
3232; X86-NEXT:    movl %edx, %eax
3233; X86-NEXT:  .LBB43_2:
3234; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
3235; X86-NEXT:    retl
3236;
3237; X64-NOBMI-LABEL: bzhi64_32_c3:
3238; X64-NOBMI:       # %bb.0:
3239; X64-NOBMI-NEXT:    movq %rsi, %rcx
3240; X64-NOBMI-NEXT:    negb %cl
3241; X64-NOBMI-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
3242; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
3243; X64-NOBMI-NEXT:    shrq %cl, %rax
3244; X64-NOBMI-NEXT:    andl %edi, %eax
3245; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
3246; X64-NOBMI-NEXT:    retq
3247;
3248; X64-BMI1-LABEL: bzhi64_32_c3:
3249; X64-BMI1:       # %bb.0:
3250; X64-BMI1-NEXT:    movq %rsi, %rcx
3251; X64-BMI1-NEXT:    negb %cl
3252; X64-BMI1-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
3253; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
3254; X64-BMI1-NEXT:    shrq %cl, %rax
3255; X64-BMI1-NEXT:    andl %edi, %eax
3256; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
3257; X64-BMI1-NEXT:    retq
3258;
3259; X64-BMI2-LABEL: bzhi64_32_c3:
3260; X64-BMI2:       # %bb.0:
3261; X64-BMI2-NEXT:    negb %sil
3262; X64-BMI2-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
3263; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
3264; X64-BMI2-NEXT:    andl %edi, %eax
3265; X64-BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
3266; X64-BMI2-NEXT:    retq
3267  %numhighbits = sub i64 64, %numlowbits
3268  %mask = lshr i64 4294967295, %numhighbits
3269  %masked = and i64 %mask, %val
3270  %truncmasked = trunc i64 %masked to i32
3271  ret i32 %truncmasked
3272}
3273
3274; ---------------------------------------------------------------------------- ;
3275; Pattern d. 32-bit.
3276; ---------------------------------------------------------------------------- ;
3277
3278define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
3279; X86-NOBMI-LABEL: bzhi32_d0:
3280; X86-NOBMI:       # %bb.0:
3281; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3282; X86-NOBMI-NEXT:    xorl %ecx, %ecx
3283; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3284; X86-NOBMI-NEXT:    shll %cl, %eax
3285; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3286; X86-NOBMI-NEXT:    shrl %cl, %eax
3287; X86-NOBMI-NEXT:    retl
3288;
3289; X86-BMI1-LABEL: bzhi32_d0:
3290; X86-BMI1:       # %bb.0:
3291; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
3292; X86-BMI1-NEXT:    shll $8, %eax
3293; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
3294; X86-BMI1-NEXT:    retl
3295;
3296; X86-BMI2-LABEL: bzhi32_d0:
3297; X86-BMI2:       # %bb.0:
3298; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
3299; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
3300; X86-BMI2-NEXT:    retl
3301;
3302; X64-NOBMI-LABEL: bzhi32_d0:
3303; X64-NOBMI:       # %bb.0:
3304; X64-NOBMI-NEXT:    movl %esi, %ecx
3305; X64-NOBMI-NEXT:    movl %edi, %eax
3306; X64-NOBMI-NEXT:    negb %cl
3307; X64-NOBMI-NEXT:    shll %cl, %eax
3308; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3309; X64-NOBMI-NEXT:    shrl %cl, %eax
3310; X64-NOBMI-NEXT:    retq
3311;
3312; X64-BMI1-LABEL: bzhi32_d0:
3313; X64-BMI1:       # %bb.0:
3314; X64-BMI1-NEXT:    shll $8, %esi
3315; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
3316; X64-BMI1-NEXT:    retq
3317;
3318; X64-BMI2-LABEL: bzhi32_d0:
3319; X64-BMI2:       # %bb.0:
3320; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
3321; X64-BMI2-NEXT:    retq
3322  %numhighbits = sub i32 32, %numlowbits
3323  %highbitscleared = shl i32 %val, %numhighbits
3324  %masked = lshr i32 %highbitscleared, %numhighbits
3325  ret i32 %masked
3326}
3327
3328define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
3329; X86-NOBMI-LABEL: bzhi32_d1_indexzext:
3330; X86-NOBMI:       # %bb.0:
3331; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3332; X86-NOBMI-NEXT:    xorl %ecx, %ecx
3333; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3334; X86-NOBMI-NEXT:    shll %cl, %eax
3335; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3336; X86-NOBMI-NEXT:    shrl %cl, %eax
3337; X86-NOBMI-NEXT:    retl
3338;
3339; X86-BMI1-LABEL: bzhi32_d1_indexzext:
3340; X86-BMI1:       # %bb.0:
3341; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
3342; X86-BMI1-NEXT:    shll $8, %eax
3343; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
3344; X86-BMI1-NEXT:    retl
3345;
3346; X86-BMI2-LABEL: bzhi32_d1_indexzext:
3347; X86-BMI2:       # %bb.0:
3348; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
3349; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
3350; X86-BMI2-NEXT:    retl
3351;
3352; X64-NOBMI-LABEL: bzhi32_d1_indexzext:
3353; X64-NOBMI:       # %bb.0:
3354; X64-NOBMI-NEXT:    movl %esi, %ecx
3355; X64-NOBMI-NEXT:    movl %edi, %eax
3356; X64-NOBMI-NEXT:    negb %cl
3357; X64-NOBMI-NEXT:    shll %cl, %eax
3358; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3359; X64-NOBMI-NEXT:    shrl %cl, %eax
3360; X64-NOBMI-NEXT:    retq
3361;
3362; X64-BMI1-LABEL: bzhi32_d1_indexzext:
3363; X64-BMI1:       # %bb.0:
3364; X64-BMI1-NEXT:    shll $8, %esi
3365; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
3366; X64-BMI1-NEXT:    retq
3367;
3368; X64-BMI2-LABEL: bzhi32_d1_indexzext:
3369; X64-BMI2:       # %bb.0:
3370; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
3371; X64-BMI2-NEXT:    retq
3372  %numhighbits = sub i8 32, %numlowbits
3373  %sh_prom = zext i8 %numhighbits to i32
3374  %highbitscleared = shl i32 %val, %sh_prom
3375  %masked = lshr i32 %highbitscleared, %sh_prom
3376  ret i32 %masked
3377}
3378
3379define i32 @bzhi32_d2_load(ptr %w, i32 %numlowbits) nounwind {
3380; X86-NOBMI-LABEL: bzhi32_d2_load:
3381; X86-NOBMI:       # %bb.0:
3382; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3383; X86-NOBMI-NEXT:    movl (%eax), %eax
3384; X86-NOBMI-NEXT:    xorl %ecx, %ecx
3385; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3386; X86-NOBMI-NEXT:    shll %cl, %eax
3387; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3388; X86-NOBMI-NEXT:    shrl %cl, %eax
3389; X86-NOBMI-NEXT:    retl
3390;
3391; X86-BMI1-LABEL: bzhi32_d2_load:
3392; X86-BMI1:       # %bb.0:
3393; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
3394; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
3395; X86-BMI1-NEXT:    shll $8, %ecx
3396; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
3397; X86-BMI1-NEXT:    retl
3398;
3399; X86-BMI2-LABEL: bzhi32_d2_load:
3400; X86-BMI2:       # %bb.0:
3401; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
3402; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
3403; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
3404; X86-BMI2-NEXT:    retl
3405;
3406; X64-NOBMI-LABEL: bzhi32_d2_load:
3407; X64-NOBMI:       # %bb.0:
3408; X64-NOBMI-NEXT:    movl %esi, %ecx
3409; X64-NOBMI-NEXT:    movl (%rdi), %eax
3410; X64-NOBMI-NEXT:    negb %cl
3411; X64-NOBMI-NEXT:    shll %cl, %eax
3412; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3413; X64-NOBMI-NEXT:    shrl %cl, %eax
3414; X64-NOBMI-NEXT:    retq
3415;
3416; X64-BMI1-LABEL: bzhi32_d2_load:
3417; X64-BMI1:       # %bb.0:
3418; X64-BMI1-NEXT:    shll $8, %esi
3419; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
3420; X64-BMI1-NEXT:    retq
3421;
3422; X64-BMI2-LABEL: bzhi32_d2_load:
3423; X64-BMI2:       # %bb.0:
3424; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
3425; X64-BMI2-NEXT:    retq
3426  %val = load i32, ptr %w
3427  %numhighbits = sub i32 32, %numlowbits
3428  %highbitscleared = shl i32 %val, %numhighbits
3429  %masked = lshr i32 %highbitscleared, %numhighbits
3430  ret i32 %masked
3431}
3432
3433define i32 @bzhi32_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
3434; X86-NOBMI-LABEL: bzhi32_d3_load_indexzext:
3435; X86-NOBMI:       # %bb.0:
3436; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3437; X86-NOBMI-NEXT:    movl (%eax), %eax
3438; X86-NOBMI-NEXT:    xorl %ecx, %ecx
3439; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3440; X86-NOBMI-NEXT:    shll %cl, %eax
3441; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3442; X86-NOBMI-NEXT:    shrl %cl, %eax
3443; X86-NOBMI-NEXT:    retl
3444;
3445; X86-BMI1-LABEL: bzhi32_d3_load_indexzext:
3446; X86-BMI1:       # %bb.0:
3447; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
3448; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
3449; X86-BMI1-NEXT:    shll $8, %ecx
3450; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
3451; X86-BMI1-NEXT:    retl
3452;
3453; X86-BMI2-LABEL: bzhi32_d3_load_indexzext:
3454; X86-BMI2:       # %bb.0:
3455; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
3456; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
3457; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
3458; X86-BMI2-NEXT:    retl
3459;
3460; X64-NOBMI-LABEL: bzhi32_d3_load_indexzext:
3461; X64-NOBMI:       # %bb.0:
3462; X64-NOBMI-NEXT:    movl %esi, %ecx
3463; X64-NOBMI-NEXT:    movl (%rdi), %eax
3464; X64-NOBMI-NEXT:    negb %cl
3465; X64-NOBMI-NEXT:    shll %cl, %eax
3466; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3467; X64-NOBMI-NEXT:    shrl %cl, %eax
3468; X64-NOBMI-NEXT:    retq
3469;
3470; X64-BMI1-LABEL: bzhi32_d3_load_indexzext:
3471; X64-BMI1:       # %bb.0:
3472; X64-BMI1-NEXT:    shll $8, %esi
3473; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
3474; X64-BMI1-NEXT:    retq
3475;
3476; X64-BMI2-LABEL: bzhi32_d3_load_indexzext:
3477; X64-BMI2:       # %bb.0:
3478; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
3479; X64-BMI2-NEXT:    retq
3480  %val = load i32, ptr %w
3481  %numhighbits = sub i8 32, %numlowbits
3482  %sh_prom = zext i8 %numhighbits to i32
3483  %highbitscleared = shl i32 %val, %sh_prom
3484  %masked = lshr i32 %highbitscleared, %sh_prom
3485  ret i32 %masked
3486}
3487
3488; 64-bit.
3489
3490define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
3491; X86-NOBMI-LABEL: bzhi64_d0:
3492; X86-NOBMI:       # %bb.0:
3493; X86-NOBMI-NEXT:    pushl %ebx
3494; X86-NOBMI-NEXT:    pushl %edi
3495; X86-NOBMI-NEXT:    pushl %esi
3496; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
3497; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3498; X86-NOBMI-NEXT:    movb $64, %cl
3499; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3500; X86-NOBMI-NEXT:    movl %edx, %esi
3501; X86-NOBMI-NEXT:    shll %cl, %esi
3502; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
3503; X86-NOBMI-NEXT:    testb $32, %cl
3504; X86-NOBMI-NEXT:    movl %esi, %edi
3505; X86-NOBMI-NEXT:    jne .LBB48_2
3506; X86-NOBMI-NEXT:  # %bb.1:
3507; X86-NOBMI-NEXT:    movl %eax, %edi
3508; X86-NOBMI-NEXT:  .LBB48_2:
3509; X86-NOBMI-NEXT:    movl %edi, %eax
3510; X86-NOBMI-NEXT:    shrl %cl, %eax
3511; X86-NOBMI-NEXT:    xorl %ebx, %ebx
3512; X86-NOBMI-NEXT:    testb $32, %cl
3513; X86-NOBMI-NEXT:    movl $0, %edx
3514; X86-NOBMI-NEXT:    jne .LBB48_4
3515; X86-NOBMI-NEXT:  # %bb.3:
3516; X86-NOBMI-NEXT:    movl %esi, %ebx
3517; X86-NOBMI-NEXT:    movl %eax, %edx
3518; X86-NOBMI-NEXT:  .LBB48_4:
3519; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
3520; X86-NOBMI-NEXT:    testb $32, %cl
3521; X86-NOBMI-NEXT:    jne .LBB48_6
3522; X86-NOBMI-NEXT:  # %bb.5:
3523; X86-NOBMI-NEXT:    movl %ebx, %eax
3524; X86-NOBMI-NEXT:  .LBB48_6:
3525; X86-NOBMI-NEXT:    popl %esi
3526; X86-NOBMI-NEXT:    popl %edi
3527; X86-NOBMI-NEXT:    popl %ebx
3528; X86-NOBMI-NEXT:    retl
3529;
3530; X86-BMI1-LABEL: bzhi64_d0:
3531; X86-BMI1:       # %bb.0:
3532; X86-BMI1-NEXT:    pushl %ebx
3533; X86-BMI1-NEXT:    pushl %edi
3534; X86-BMI1-NEXT:    pushl %esi
3535; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
3536; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
3537; X86-BMI1-NEXT:    movb $64, %cl
3538; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
3539; X86-BMI1-NEXT:    movl %edx, %esi
3540; X86-BMI1-NEXT:    shll %cl, %esi
3541; X86-BMI1-NEXT:    shldl %cl, %edx, %eax
3542; X86-BMI1-NEXT:    testb $32, %cl
3543; X86-BMI1-NEXT:    movl %esi, %edi
3544; X86-BMI1-NEXT:    jne .LBB48_2
3545; X86-BMI1-NEXT:  # %bb.1:
3546; X86-BMI1-NEXT:    movl %eax, %edi
3547; X86-BMI1-NEXT:  .LBB48_2:
3548; X86-BMI1-NEXT:    movl %edi, %eax
3549; X86-BMI1-NEXT:    shrl %cl, %eax
3550; X86-BMI1-NEXT:    xorl %ebx, %ebx
3551; X86-BMI1-NEXT:    testb $32, %cl
3552; X86-BMI1-NEXT:    movl $0, %edx
3553; X86-BMI1-NEXT:    jne .LBB48_4
3554; X86-BMI1-NEXT:  # %bb.3:
3555; X86-BMI1-NEXT:    movl %esi, %ebx
3556; X86-BMI1-NEXT:    movl %eax, %edx
3557; X86-BMI1-NEXT:  .LBB48_4:
3558; X86-BMI1-NEXT:    shrdl %cl, %edi, %ebx
3559; X86-BMI1-NEXT:    testb $32, %cl
3560; X86-BMI1-NEXT:    jne .LBB48_6
3561; X86-BMI1-NEXT:  # %bb.5:
3562; X86-BMI1-NEXT:    movl %ebx, %eax
3563; X86-BMI1-NEXT:  .LBB48_6:
3564; X86-BMI1-NEXT:    popl %esi
3565; X86-BMI1-NEXT:    popl %edi
3566; X86-BMI1-NEXT:    popl %ebx
3567; X86-BMI1-NEXT:    retl
3568;
3569; X86-BMI2-LABEL: bzhi64_d0:
3570; X86-BMI2:       # %bb.0:
3571; X86-BMI2-NEXT:    pushl %edi
3572; X86-BMI2-NEXT:    pushl %esi
3573; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
3574; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
3575; X86-BMI2-NEXT:    movb $64, %cl
3576; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
3577; X86-BMI2-NEXT:    shldl %cl, %eax, %esi
3578; X86-BMI2-NEXT:    shlxl %ecx, %eax, %edi
3579; X86-BMI2-NEXT:    xorl %edx, %edx
3580; X86-BMI2-NEXT:    testb $32, %cl
3581; X86-BMI2-NEXT:    je .LBB48_2
3582; X86-BMI2-NEXT:  # %bb.1:
3583; X86-BMI2-NEXT:    movl %edi, %esi
3584; X86-BMI2-NEXT:    movl $0, %edi
3585; X86-BMI2-NEXT:  .LBB48_2:
3586; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
3587; X86-BMI2-NEXT:    jne .LBB48_4
3588; X86-BMI2-NEXT:  # %bb.3:
3589; X86-BMI2-NEXT:    movl %eax, %edx
3590; X86-BMI2-NEXT:  .LBB48_4:
3591; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
3592; X86-BMI2-NEXT:    testb $32, %cl
3593; X86-BMI2-NEXT:    jne .LBB48_6
3594; X86-BMI2-NEXT:  # %bb.5:
3595; X86-BMI2-NEXT:    movl %edi, %eax
3596; X86-BMI2-NEXT:  .LBB48_6:
3597; X86-BMI2-NEXT:    popl %esi
3598; X86-BMI2-NEXT:    popl %edi
3599; X86-BMI2-NEXT:    retl
3600;
3601; X64-NOBMI-LABEL: bzhi64_d0:
3602; X64-NOBMI:       # %bb.0:
3603; X64-NOBMI-NEXT:    movq %rsi, %rcx
3604; X64-NOBMI-NEXT:    movq %rdi, %rax
3605; X64-NOBMI-NEXT:    negb %cl
3606; X64-NOBMI-NEXT:    shlq %cl, %rax
3607; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
3608; X64-NOBMI-NEXT:    shrq %cl, %rax
3609; X64-NOBMI-NEXT:    retq
3610;
3611; X64-BMI1-LABEL: bzhi64_d0:
3612; X64-BMI1:       # %bb.0:
3613; X64-BMI1-NEXT:    shll $8, %esi
3614; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
3615; X64-BMI1-NEXT:    retq
3616;
3617; X64-BMI2-LABEL: bzhi64_d0:
3618; X64-BMI2:       # %bb.0:
3619; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
3620; X64-BMI2-NEXT:    retq
3621  %numhighbits = sub i64 64, %numlowbits
3622  %highbitscleared = shl i64 %val, %numhighbits
3623  %masked = lshr i64 %highbitscleared, %numhighbits
3624  ret i64 %masked
3625}
3626
3627define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
3628; X86-NOBMI-LABEL: bzhi64_d1_indexzext:
3629; X86-NOBMI:       # %bb.0:
3630; X86-NOBMI-NEXT:    pushl %ebx
3631; X86-NOBMI-NEXT:    pushl %edi
3632; X86-NOBMI-NEXT:    pushl %esi
3633; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
3634; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3635; X86-NOBMI-NEXT:    movb $64, %cl
3636; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3637; X86-NOBMI-NEXT:    movl %edx, %esi
3638; X86-NOBMI-NEXT:    shll %cl, %esi
3639; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
3640; X86-NOBMI-NEXT:    testb $32, %cl
3641; X86-NOBMI-NEXT:    movl %esi, %edi
3642; X86-NOBMI-NEXT:    jne .LBB49_2
3643; X86-NOBMI-NEXT:  # %bb.1:
3644; X86-NOBMI-NEXT:    movl %eax, %edi
3645; X86-NOBMI-NEXT:  .LBB49_2:
3646; X86-NOBMI-NEXT:    movl %edi, %eax
3647; X86-NOBMI-NEXT:    shrl %cl, %eax
3648; X86-NOBMI-NEXT:    xorl %ebx, %ebx
3649; X86-NOBMI-NEXT:    testb $32, %cl
3650; X86-NOBMI-NEXT:    movl $0, %edx
3651; X86-NOBMI-NEXT:    jne .LBB49_4
3652; X86-NOBMI-NEXT:  # %bb.3:
3653; X86-NOBMI-NEXT:    movl %esi, %ebx
3654; X86-NOBMI-NEXT:    movl %eax, %edx
3655; X86-NOBMI-NEXT:  .LBB49_4:
3656; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
3657; X86-NOBMI-NEXT:    testb $32, %cl
3658; X86-NOBMI-NEXT:    jne .LBB49_6
3659; X86-NOBMI-NEXT:  # %bb.5:
3660; X86-NOBMI-NEXT:    movl %ebx, %eax
3661; X86-NOBMI-NEXT:  .LBB49_6:
3662; X86-NOBMI-NEXT:    popl %esi
3663; X86-NOBMI-NEXT:    popl %edi
3664; X86-NOBMI-NEXT:    popl %ebx
3665; X86-NOBMI-NEXT:    retl
3666;
3667; X86-BMI1-LABEL: bzhi64_d1_indexzext:
3668; X86-BMI1:       # %bb.0:
3669; X86-BMI1-NEXT:    pushl %ebx
3670; X86-BMI1-NEXT:    pushl %edi
3671; X86-BMI1-NEXT:    pushl %esi
3672; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
3673; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
3674; X86-BMI1-NEXT:    movb $64, %cl
3675; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
3676; X86-BMI1-NEXT:    movl %edx, %esi
3677; X86-BMI1-NEXT:    shll %cl, %esi
3678; X86-BMI1-NEXT:    shldl %cl, %edx, %eax
3679; X86-BMI1-NEXT:    testb $32, %cl
3680; X86-BMI1-NEXT:    movl %esi, %edi
3681; X86-BMI1-NEXT:    jne .LBB49_2
3682; X86-BMI1-NEXT:  # %bb.1:
3683; X86-BMI1-NEXT:    movl %eax, %edi
3684; X86-BMI1-NEXT:  .LBB49_2:
3685; X86-BMI1-NEXT:    movl %edi, %eax
3686; X86-BMI1-NEXT:    shrl %cl, %eax
3687; X86-BMI1-NEXT:    xorl %ebx, %ebx
3688; X86-BMI1-NEXT:    testb $32, %cl
3689; X86-BMI1-NEXT:    movl $0, %edx
3690; X86-BMI1-NEXT:    jne .LBB49_4
3691; X86-BMI1-NEXT:  # %bb.3:
3692; X86-BMI1-NEXT:    movl %esi, %ebx
3693; X86-BMI1-NEXT:    movl %eax, %edx
3694; X86-BMI1-NEXT:  .LBB49_4:
3695; X86-BMI1-NEXT:    shrdl %cl, %edi, %ebx
3696; X86-BMI1-NEXT:    testb $32, %cl
3697; X86-BMI1-NEXT:    jne .LBB49_6
3698; X86-BMI1-NEXT:  # %bb.5:
3699; X86-BMI1-NEXT:    movl %ebx, %eax
3700; X86-BMI1-NEXT:  .LBB49_6:
3701; X86-BMI1-NEXT:    popl %esi
3702; X86-BMI1-NEXT:    popl %edi
3703; X86-BMI1-NEXT:    popl %ebx
3704; X86-BMI1-NEXT:    retl
3705;
3706; X86-BMI2-LABEL: bzhi64_d1_indexzext:
3707; X86-BMI2:       # %bb.0:
3708; X86-BMI2-NEXT:    pushl %edi
3709; X86-BMI2-NEXT:    pushl %esi
3710; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
3711; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
3712; X86-BMI2-NEXT:    movb $64, %cl
3713; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
3714; X86-BMI2-NEXT:    shldl %cl, %eax, %esi
3715; X86-BMI2-NEXT:    shlxl %ecx, %eax, %edi
3716; X86-BMI2-NEXT:    xorl %edx, %edx
3717; X86-BMI2-NEXT:    testb $32, %cl
3718; X86-BMI2-NEXT:    je .LBB49_2
3719; X86-BMI2-NEXT:  # %bb.1:
3720; X86-BMI2-NEXT:    movl %edi, %esi
3721; X86-BMI2-NEXT:    movl $0, %edi
3722; X86-BMI2-NEXT:  .LBB49_2:
3723; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
3724; X86-BMI2-NEXT:    jne .LBB49_4
3725; X86-BMI2-NEXT:  # %bb.3:
3726; X86-BMI2-NEXT:    movl %eax, %edx
3727; X86-BMI2-NEXT:  .LBB49_4:
3728; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
3729; X86-BMI2-NEXT:    testb $32, %cl
3730; X86-BMI2-NEXT:    jne .LBB49_6
3731; X86-BMI2-NEXT:  # %bb.5:
3732; X86-BMI2-NEXT:    movl %edi, %eax
3733; X86-BMI2-NEXT:  .LBB49_6:
3734; X86-BMI2-NEXT:    popl %esi
3735; X86-BMI2-NEXT:    popl %edi
3736; X86-BMI2-NEXT:    retl
3737;
3738; X64-NOBMI-LABEL: bzhi64_d1_indexzext:
3739; X64-NOBMI:       # %bb.0:
3740; X64-NOBMI-NEXT:    movl %esi, %ecx
3741; X64-NOBMI-NEXT:    movq %rdi, %rax
3742; X64-NOBMI-NEXT:    negb %cl
3743; X64-NOBMI-NEXT:    shlq %cl, %rax
3744; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3745; X64-NOBMI-NEXT:    shrq %cl, %rax
3746; X64-NOBMI-NEXT:    retq
3747;
3748; X64-BMI1-LABEL: bzhi64_d1_indexzext:
3749; X64-BMI1:       # %bb.0:
3750; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
3751; X64-BMI1-NEXT:    shll $8, %esi
3752; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
3753; X64-BMI1-NEXT:    retq
3754;
3755; X64-BMI2-LABEL: bzhi64_d1_indexzext:
3756; X64-BMI2:       # %bb.0:
3757; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
3758; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
3759; X64-BMI2-NEXT:    retq
3760  %numhighbits = sub i8 64, %numlowbits
3761  %sh_prom = zext i8 %numhighbits to i64
3762  %highbitscleared = shl i64 %val, %sh_prom
3763  %masked = lshr i64 %highbitscleared, %sh_prom
3764  ret i64 %masked
3765}
3766
3767define i64 @bzhi64_d2_load(ptr %w, i64 %numlowbits) nounwind {
3768; X86-NOBMI-LABEL: bzhi64_d2_load:
3769; X86-NOBMI:       # %bb.0:
3770; X86-NOBMI-NEXT:    pushl %ebx
3771; X86-NOBMI-NEXT:    pushl %edi
3772; X86-NOBMI-NEXT:    pushl %esi
3773; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3774; X86-NOBMI-NEXT:    movl (%eax), %edx
3775; X86-NOBMI-NEXT:    movl 4(%eax), %eax
3776; X86-NOBMI-NEXT:    movb $64, %cl
3777; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3778; X86-NOBMI-NEXT:    movl %edx, %esi
3779; X86-NOBMI-NEXT:    shll %cl, %esi
3780; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
3781; X86-NOBMI-NEXT:    testb $32, %cl
3782; X86-NOBMI-NEXT:    movl %esi, %edi
3783; X86-NOBMI-NEXT:    jne .LBB50_2
3784; X86-NOBMI-NEXT:  # %bb.1:
3785; X86-NOBMI-NEXT:    movl %eax, %edi
3786; X86-NOBMI-NEXT:  .LBB50_2:
3787; X86-NOBMI-NEXT:    movl %edi, %eax
3788; X86-NOBMI-NEXT:    shrl %cl, %eax
3789; X86-NOBMI-NEXT:    xorl %ebx, %ebx
3790; X86-NOBMI-NEXT:    testb $32, %cl
3791; X86-NOBMI-NEXT:    movl $0, %edx
3792; X86-NOBMI-NEXT:    jne .LBB50_4
3793; X86-NOBMI-NEXT:  # %bb.3:
3794; X86-NOBMI-NEXT:    movl %esi, %ebx
3795; X86-NOBMI-NEXT:    movl %eax, %edx
3796; X86-NOBMI-NEXT:  .LBB50_4:
3797; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
3798; X86-NOBMI-NEXT:    testb $32, %cl
3799; X86-NOBMI-NEXT:    jne .LBB50_6
3800; X86-NOBMI-NEXT:  # %bb.5:
3801; X86-NOBMI-NEXT:    movl %ebx, %eax
3802; X86-NOBMI-NEXT:  .LBB50_6:
3803; X86-NOBMI-NEXT:    popl %esi
3804; X86-NOBMI-NEXT:    popl %edi
3805; X86-NOBMI-NEXT:    popl %ebx
3806; X86-NOBMI-NEXT:    retl
3807;
3808; X86-BMI1-LABEL: bzhi64_d2_load:
3809; X86-BMI1:       # %bb.0:
3810; X86-BMI1-NEXT:    pushl %ebx
3811; X86-BMI1-NEXT:    pushl %edi
3812; X86-BMI1-NEXT:    pushl %esi
3813; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
3814; X86-BMI1-NEXT:    movl (%eax), %edx
3815; X86-BMI1-NEXT:    movl 4(%eax), %eax
3816; X86-BMI1-NEXT:    movb $64, %cl
3817; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
3818; X86-BMI1-NEXT:    movl %edx, %esi
3819; X86-BMI1-NEXT:    shll %cl, %esi
3820; X86-BMI1-NEXT:    shldl %cl, %edx, %eax
3821; X86-BMI1-NEXT:    testb $32, %cl
3822; X86-BMI1-NEXT:    movl %esi, %edi
3823; X86-BMI1-NEXT:    jne .LBB50_2
3824; X86-BMI1-NEXT:  # %bb.1:
3825; X86-BMI1-NEXT:    movl %eax, %edi
3826; X86-BMI1-NEXT:  .LBB50_2:
3827; X86-BMI1-NEXT:    movl %edi, %eax
3828; X86-BMI1-NEXT:    shrl %cl, %eax
3829; X86-BMI1-NEXT:    xorl %ebx, %ebx
3830; X86-BMI1-NEXT:    testb $32, %cl
3831; X86-BMI1-NEXT:    movl $0, %edx
3832; X86-BMI1-NEXT:    jne .LBB50_4
3833; X86-BMI1-NEXT:  # %bb.3:
3834; X86-BMI1-NEXT:    movl %esi, %ebx
3835; X86-BMI1-NEXT:    movl %eax, %edx
3836; X86-BMI1-NEXT:  .LBB50_4:
3837; X86-BMI1-NEXT:    shrdl %cl, %edi, %ebx
3838; X86-BMI1-NEXT:    testb $32, %cl
3839; X86-BMI1-NEXT:    jne .LBB50_6
3840; X86-BMI1-NEXT:  # %bb.5:
3841; X86-BMI1-NEXT:    movl %ebx, %eax
3842; X86-BMI1-NEXT:  .LBB50_6:
3843; X86-BMI1-NEXT:    popl %esi
3844; X86-BMI1-NEXT:    popl %edi
3845; X86-BMI1-NEXT:    popl %ebx
3846; X86-BMI1-NEXT:    retl
3847;
3848; X86-BMI2-LABEL: bzhi64_d2_load:
3849; X86-BMI2:       # %bb.0:
3850; X86-BMI2-NEXT:    pushl %edi
3851; X86-BMI2-NEXT:    pushl %esi
3852; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
3853; X86-BMI2-NEXT:    movl (%eax), %edx
3854; X86-BMI2-NEXT:    movl 4(%eax), %esi
3855; X86-BMI2-NEXT:    movb $64, %cl
3856; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
3857; X86-BMI2-NEXT:    shldl %cl, %edx, %esi
3858; X86-BMI2-NEXT:    shlxl %ecx, %edx, %edi
3859; X86-BMI2-NEXT:    xorl %edx, %edx
3860; X86-BMI2-NEXT:    testb $32, %cl
3861; X86-BMI2-NEXT:    je .LBB50_2
3862; X86-BMI2-NEXT:  # %bb.1:
3863; X86-BMI2-NEXT:    movl %edi, %esi
3864; X86-BMI2-NEXT:    movl $0, %edi
3865; X86-BMI2-NEXT:  .LBB50_2:
3866; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
3867; X86-BMI2-NEXT:    jne .LBB50_4
3868; X86-BMI2-NEXT:  # %bb.3:
3869; X86-BMI2-NEXT:    movl %eax, %edx
3870; X86-BMI2-NEXT:  .LBB50_4:
3871; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
3872; X86-BMI2-NEXT:    testb $32, %cl
3873; X86-BMI2-NEXT:    jne .LBB50_6
3874; X86-BMI2-NEXT:  # %bb.5:
3875; X86-BMI2-NEXT:    movl %edi, %eax
3876; X86-BMI2-NEXT:  .LBB50_6:
3877; X86-BMI2-NEXT:    popl %esi
3878; X86-BMI2-NEXT:    popl %edi
3879; X86-BMI2-NEXT:    retl
3880;
3881; X64-NOBMI-LABEL: bzhi64_d2_load:
3882; X64-NOBMI:       # %bb.0:
3883; X64-NOBMI-NEXT:    movq %rsi, %rcx
3884; X64-NOBMI-NEXT:    movq (%rdi), %rax
3885; X64-NOBMI-NEXT:    negb %cl
3886; X64-NOBMI-NEXT:    shlq %cl, %rax
3887; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
3888; X64-NOBMI-NEXT:    shrq %cl, %rax
3889; X64-NOBMI-NEXT:    retq
3890;
3891; X64-BMI1-LABEL: bzhi64_d2_load:
3892; X64-BMI1:       # %bb.0:
3893; X64-BMI1-NEXT:    shll $8, %esi
3894; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
3895; X64-BMI1-NEXT:    retq
3896;
3897; X64-BMI2-LABEL: bzhi64_d2_load:
3898; X64-BMI2:       # %bb.0:
3899; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
3900; X64-BMI2-NEXT:    retq
3901  %val = load i64, ptr %w
3902  %numhighbits = sub i64 64, %numlowbits
3903  %highbitscleared = shl i64 %val, %numhighbits
3904  %masked = lshr i64 %highbitscleared, %numhighbits
3905  ret i64 %masked
3906}
3907
3908define i64 @bzhi64_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
3909; X86-NOBMI-LABEL: bzhi64_d3_load_indexzext:
3910; X86-NOBMI:       # %bb.0:
3911; X86-NOBMI-NEXT:    pushl %ebx
3912; X86-NOBMI-NEXT:    pushl %edi
3913; X86-NOBMI-NEXT:    pushl %esi
3914; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3915; X86-NOBMI-NEXT:    movl (%eax), %edx
3916; X86-NOBMI-NEXT:    movl 4(%eax), %eax
3917; X86-NOBMI-NEXT:    movb $64, %cl
3918; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3919; X86-NOBMI-NEXT:    movl %edx, %esi
3920; X86-NOBMI-NEXT:    shll %cl, %esi
3921; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
3922; X86-NOBMI-NEXT:    testb $32, %cl
3923; X86-NOBMI-NEXT:    movl %esi, %edi
3924; X86-NOBMI-NEXT:    jne .LBB51_2
3925; X86-NOBMI-NEXT:  # %bb.1:
3926; X86-NOBMI-NEXT:    movl %eax, %edi
3927; X86-NOBMI-NEXT:  .LBB51_2:
3928; X86-NOBMI-NEXT:    movl %edi, %eax
3929; X86-NOBMI-NEXT:    shrl %cl, %eax
3930; X86-NOBMI-NEXT:    xorl %ebx, %ebx
3931; X86-NOBMI-NEXT:    testb $32, %cl
3932; X86-NOBMI-NEXT:    movl $0, %edx
3933; X86-NOBMI-NEXT:    jne .LBB51_4
3934; X86-NOBMI-NEXT:  # %bb.3:
3935; X86-NOBMI-NEXT:    movl %esi, %ebx
3936; X86-NOBMI-NEXT:    movl %eax, %edx
3937; X86-NOBMI-NEXT:  .LBB51_4:
3938; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
3939; X86-NOBMI-NEXT:    testb $32, %cl
3940; X86-NOBMI-NEXT:    jne .LBB51_6
3941; X86-NOBMI-NEXT:  # %bb.5:
3942; X86-NOBMI-NEXT:    movl %ebx, %eax
3943; X86-NOBMI-NEXT:  .LBB51_6:
3944; X86-NOBMI-NEXT:    popl %esi
3945; X86-NOBMI-NEXT:    popl %edi
3946; X86-NOBMI-NEXT:    popl %ebx
3947; X86-NOBMI-NEXT:    retl
3948;
3949; X86-BMI1-LABEL: bzhi64_d3_load_indexzext:
3950; X86-BMI1:       # %bb.0:
3951; X86-BMI1-NEXT:    pushl %ebx
3952; X86-BMI1-NEXT:    pushl %edi
3953; X86-BMI1-NEXT:    pushl %esi
3954; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
3955; X86-BMI1-NEXT:    movl (%eax), %edx
3956; X86-BMI1-NEXT:    movl 4(%eax), %eax
3957; X86-BMI1-NEXT:    movb $64, %cl
3958; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
3959; X86-BMI1-NEXT:    movl %edx, %esi
3960; X86-BMI1-NEXT:    shll %cl, %esi
3961; X86-BMI1-NEXT:    shldl %cl, %edx, %eax
3962; X86-BMI1-NEXT:    testb $32, %cl
3963; X86-BMI1-NEXT:    movl %esi, %edi
3964; X86-BMI1-NEXT:    jne .LBB51_2
3965; X86-BMI1-NEXT:  # %bb.1:
3966; X86-BMI1-NEXT:    movl %eax, %edi
3967; X86-BMI1-NEXT:  .LBB51_2:
3968; X86-BMI1-NEXT:    movl %edi, %eax
3969; X86-BMI1-NEXT:    shrl %cl, %eax
3970; X86-BMI1-NEXT:    xorl %ebx, %ebx
3971; X86-BMI1-NEXT:    testb $32, %cl
3972; X86-BMI1-NEXT:    movl $0, %edx
3973; X86-BMI1-NEXT:    jne .LBB51_4
3974; X86-BMI1-NEXT:  # %bb.3:
3975; X86-BMI1-NEXT:    movl %esi, %ebx
3976; X86-BMI1-NEXT:    movl %eax, %edx
3977; X86-BMI1-NEXT:  .LBB51_4:
3978; X86-BMI1-NEXT:    shrdl %cl, %edi, %ebx
3979; X86-BMI1-NEXT:    testb $32, %cl
3980; X86-BMI1-NEXT:    jne .LBB51_6
3981; X86-BMI1-NEXT:  # %bb.5:
3982; X86-BMI1-NEXT:    movl %ebx, %eax
3983; X86-BMI1-NEXT:  .LBB51_6:
3984; X86-BMI1-NEXT:    popl %esi
3985; X86-BMI1-NEXT:    popl %edi
3986; X86-BMI1-NEXT:    popl %ebx
3987; X86-BMI1-NEXT:    retl
3988;
3989; X86-BMI2-LABEL: bzhi64_d3_load_indexzext:
3990; X86-BMI2:       # %bb.0:
3991; X86-BMI2-NEXT:    pushl %edi
3992; X86-BMI2-NEXT:    pushl %esi
3993; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
3994; X86-BMI2-NEXT:    movl (%eax), %edx
3995; X86-BMI2-NEXT:    movl 4(%eax), %esi
3996; X86-BMI2-NEXT:    movb $64, %cl
3997; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
3998; X86-BMI2-NEXT:    shldl %cl, %edx, %esi
3999; X86-BMI2-NEXT:    shlxl %ecx, %edx, %edi
4000; X86-BMI2-NEXT:    xorl %edx, %edx
4001; X86-BMI2-NEXT:    testb $32, %cl
4002; X86-BMI2-NEXT:    je .LBB51_2
4003; X86-BMI2-NEXT:  # %bb.1:
4004; X86-BMI2-NEXT:    movl %edi, %esi
4005; X86-BMI2-NEXT:    movl $0, %edi
4006; X86-BMI2-NEXT:  .LBB51_2:
4007; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
4008; X86-BMI2-NEXT:    jne .LBB51_4
4009; X86-BMI2-NEXT:  # %bb.3:
4010; X86-BMI2-NEXT:    movl %eax, %edx
4011; X86-BMI2-NEXT:  .LBB51_4:
4012; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
4013; X86-BMI2-NEXT:    testb $32, %cl
4014; X86-BMI2-NEXT:    jne .LBB51_6
4015; X86-BMI2-NEXT:  # %bb.5:
4016; X86-BMI2-NEXT:    movl %edi, %eax
4017; X86-BMI2-NEXT:  .LBB51_6:
4018; X86-BMI2-NEXT:    popl %esi
4019; X86-BMI2-NEXT:    popl %edi
4020; X86-BMI2-NEXT:    retl
4021;
4022; X64-NOBMI-LABEL: bzhi64_d3_load_indexzext:
4023; X64-NOBMI:       # %bb.0:
4024; X64-NOBMI-NEXT:    movl %esi, %ecx
4025; X64-NOBMI-NEXT:    movq (%rdi), %rax
4026; X64-NOBMI-NEXT:    negb %cl
4027; X64-NOBMI-NEXT:    shlq %cl, %rax
4028; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
4029; X64-NOBMI-NEXT:    shrq %cl, %rax
4030; X64-NOBMI-NEXT:    retq
4031;
4032; X64-BMI1-LABEL: bzhi64_d3_load_indexzext:
4033; X64-BMI1:       # %bb.0:
4034; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
4035; X64-BMI1-NEXT:    shll $8, %esi
4036; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
4037; X64-BMI1-NEXT:    retq
4038;
4039; X64-BMI2-LABEL: bzhi64_d3_load_indexzext:
4040; X64-BMI2:       # %bb.0:
4041; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
4042; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
4043; X64-BMI2-NEXT:    retq
4044  %val = load i64, ptr %w
4045  %numhighbits = sub i8 64, %numlowbits
4046  %sh_prom = zext i8 %numhighbits to i64
4047  %highbitscleared = shl i64 %val, %sh_prom
4048  %masked = lshr i64 %highbitscleared, %sh_prom
4049  ret i64 %masked
4050}
4051
4052; 64-bit, but with 32-bit output
4053
4054; Everything done in 64-bit, truncation happens last.
4055define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind {
4056; X86-NOBMI-LABEL: bzhi64_32_d0:
4057; X86-NOBMI:       # %bb.0:
4058; X86-NOBMI-NEXT:    pushl %esi
4059; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
4060; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
4061; X86-NOBMI-NEXT:    movb $64, %cl
4062; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
4063; X86-NOBMI-NEXT:    movl %esi, %edx
4064; X86-NOBMI-NEXT:    shll %cl, %edx
4065; X86-NOBMI-NEXT:    shldl %cl, %esi, %eax
4066; X86-NOBMI-NEXT:    testb $32, %cl
4067; X86-NOBMI-NEXT:    je .LBB52_2
4068; X86-NOBMI-NEXT:  # %bb.1:
4069; X86-NOBMI-NEXT:    movl %edx, %eax
4070; X86-NOBMI-NEXT:    xorl %edx, %edx
4071; X86-NOBMI-NEXT:  .LBB52_2:
4072; X86-NOBMI-NEXT:    shrdl %cl, %eax, %edx
4073; X86-NOBMI-NEXT:    shrl %cl, %eax
4074; X86-NOBMI-NEXT:    testb $32, %cl
4075; X86-NOBMI-NEXT:    jne .LBB52_4
4076; X86-NOBMI-NEXT:  # %bb.3:
4077; X86-NOBMI-NEXT:    movl %edx, %eax
4078; X86-NOBMI-NEXT:  .LBB52_4:
4079; X86-NOBMI-NEXT:    popl %esi
4080; X86-NOBMI-NEXT:    retl
4081;
4082; X86-BMI1-LABEL: bzhi64_32_d0:
4083; X86-BMI1:       # %bb.0:
4084; X86-BMI1-NEXT:    pushl %esi
4085; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
4086; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
4087; X86-BMI1-NEXT:    movb $64, %cl
4088; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
4089; X86-BMI1-NEXT:    movl %esi, %edx
4090; X86-BMI1-NEXT:    shll %cl, %edx
4091; X86-BMI1-NEXT:    shldl %cl, %esi, %eax
4092; X86-BMI1-NEXT:    testb $32, %cl
4093; X86-BMI1-NEXT:    je .LBB52_2
4094; X86-BMI1-NEXT:  # %bb.1:
4095; X86-BMI1-NEXT:    movl %edx, %eax
4096; X86-BMI1-NEXT:    xorl %edx, %edx
4097; X86-BMI1-NEXT:  .LBB52_2:
4098; X86-BMI1-NEXT:    shrdl %cl, %eax, %edx
4099; X86-BMI1-NEXT:    shrl %cl, %eax
4100; X86-BMI1-NEXT:    testb $32, %cl
4101; X86-BMI1-NEXT:    jne .LBB52_4
4102; X86-BMI1-NEXT:  # %bb.3:
4103; X86-BMI1-NEXT:    movl %edx, %eax
4104; X86-BMI1-NEXT:  .LBB52_4:
4105; X86-BMI1-NEXT:    popl %esi
4106; X86-BMI1-NEXT:    retl
4107;
4108; X86-BMI2-LABEL: bzhi64_32_d0:
4109; X86-BMI2:       # %bb.0:
4110; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
4111; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
4112; X86-BMI2-NEXT:    movb $64, %cl
4113; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
4114; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
4115; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
4116; X86-BMI2-NEXT:    testb $32, %cl
4117; X86-BMI2-NEXT:    je .LBB52_2
4118; X86-BMI2-NEXT:  # %bb.1:
4119; X86-BMI2-NEXT:    movl %eax, %edx
4120; X86-BMI2-NEXT:    xorl %eax, %eax
4121; X86-BMI2-NEXT:  .LBB52_2:
4122; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
4123; X86-BMI2-NEXT:    testb $32, %cl
4124; X86-BMI2-NEXT:    je .LBB52_4
4125; X86-BMI2-NEXT:  # %bb.3:
4126; X86-BMI2-NEXT:    shrxl %ecx, %edx, %eax
4127; X86-BMI2-NEXT:  .LBB52_4:
4128; X86-BMI2-NEXT:    retl
4129;
4130; X64-NOBMI-LABEL: bzhi64_32_d0:
4131; X64-NOBMI:       # %bb.0:
4132; X64-NOBMI-NEXT:    movq %rsi, %rcx
4133; X64-NOBMI-NEXT:    movq %rdi, %rax
4134; X64-NOBMI-NEXT:    negb %cl
4135; X64-NOBMI-NEXT:    shlq %cl, %rax
4136; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
4137; X64-NOBMI-NEXT:    shrq %cl, %rax
4138; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
4139; X64-NOBMI-NEXT:    retq
4140;
4141; X64-BMI1-LABEL: bzhi64_32_d0:
4142; X64-BMI1:       # %bb.0:
4143; X64-BMI1-NEXT:    shll $8, %esi
4144; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
4145; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
4146; X64-BMI1-NEXT:    retq
4147;
4148; X64-BMI2-LABEL: bzhi64_32_d0:
4149; X64-BMI2:       # %bb.0:
4150; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
4151; X64-BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
4152; X64-BMI2-NEXT:    retq
4153  %numhighbits = sub i64 64, %numlowbits
4154  %highbitscleared = shl i64 %val, %numhighbits
4155  %masked = lshr i64 %highbitscleared, %numhighbits
4156  %res = trunc i64 %masked to i32
4157  ret i32 %res
4158}
4159
4160; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
4161define i32 @bzhi64_32_d1(i64 %val, i32 %numlowbits) nounwind {
4162; X86-NOBMI-LABEL: bzhi64_32_d1:
4163; X86-NOBMI:       # %bb.0:
4164; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
4165; X86-NOBMI-NEXT:    xorl %ecx, %ecx
4166; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
4167; X86-NOBMI-NEXT:    shll %cl, %eax
4168; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
4169; X86-NOBMI-NEXT:    shrl %cl, %eax
4170; X86-NOBMI-NEXT:    retl
4171;
4172; X86-BMI1-LABEL: bzhi64_32_d1:
4173; X86-BMI1:       # %bb.0:
4174; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
4175; X86-BMI1-NEXT:    shll $8, %eax
4176; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
4177; X86-BMI1-NEXT:    retl
4178;
4179; X86-BMI2-LABEL: bzhi64_32_d1:
4180; X86-BMI2:       # %bb.0:
4181; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
4182; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
4183; X86-BMI2-NEXT:    retl
4184;
4185; X64-NOBMI-LABEL: bzhi64_32_d1:
4186; X64-NOBMI:       # %bb.0:
4187; X64-NOBMI-NEXT:    movl %esi, %ecx
4188; X64-NOBMI-NEXT:    movq %rdi, %rax
4189; X64-NOBMI-NEXT:    negb %cl
4190; X64-NOBMI-NEXT:    shll %cl, %eax
4191; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
4192; X64-NOBMI-NEXT:    shrl %cl, %eax
4193; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
4194; X64-NOBMI-NEXT:    retq
4195;
4196; X64-BMI1-LABEL: bzhi64_32_d1:
4197; X64-BMI1:       # %bb.0:
4198; X64-BMI1-NEXT:    shll $8, %esi
4199; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
4200; X64-BMI1-NEXT:    retq
4201;
4202; X64-BMI2-LABEL: bzhi64_32_d1:
4203; X64-BMI2:       # %bb.0:
4204; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
4205; X64-BMI2-NEXT:    retq
4206  %truncval = trunc i64 %val to i32
4207  %numhighbits = sub i32 32, %numlowbits
4208  %highbitscleared = shl i32 %truncval, %numhighbits
4209  %masked = lshr i32 %highbitscleared, %numhighbits
4210  ret i32 %masked
4211}
4212
4213; ---------------------------------------------------------------------------- ;
4214; Constant mask
4215; ---------------------------------------------------------------------------- ;
4216
4217; 32-bit
4218
4219define i32 @bzhi32_constant_mask32(i32 %val) nounwind {
4220; X86-LABEL: bzhi32_constant_mask32:
4221; X86:       # %bb.0:
4222; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
4223; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
4224; X86-NEXT:    retl
4225;
4226; X64-LABEL: bzhi32_constant_mask32:
4227; X64:       # %bb.0:
4228; X64-NEXT:    movl %edi, %eax
4229; X64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
4230; X64-NEXT:    retq
4231  %masked = and i32 %val, 2147483647
4232  ret i32 %masked
4233}
4234
4235define i32 @bzhi32_constant_mask32_load(ptr %val) nounwind {
4236; X86-LABEL: bzhi32_constant_mask32_load:
4237; X86:       # %bb.0:
4238; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
4239; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
4240; X86-NEXT:    andl (%ecx), %eax
4241; X86-NEXT:    retl
4242;
4243; X64-LABEL: bzhi32_constant_mask32_load:
4244; X64:       # %bb.0:
4245; X64-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
4246; X64-NEXT:    andl (%rdi), %eax
4247; X64-NEXT:    retq
4248  %val1 = load i32, ptr %val
4249  %masked = and i32 %val1, 2147483647
4250  ret i32 %masked
4251}
4252
4253define i32 @bzhi32_constant_mask16(i32 %val) nounwind {
4254; X86-LABEL: bzhi32_constant_mask16:
4255; X86:       # %bb.0:
4256; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
4257; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
4258; X86-NEXT:    retl
4259;
4260; X64-LABEL: bzhi32_constant_mask16:
4261; X64:       # %bb.0:
4262; X64-NEXT:    movl %edi, %eax
4263; X64-NEXT:    andl $32767, %eax # imm = 0x7FFF
4264; X64-NEXT:    retq
4265  %masked = and i32 %val, 32767
4266  ret i32 %masked
4267}
4268
4269define i32 @bzhi32_constant_mask16_load(ptr %val) nounwind {
4270; X86-LABEL: bzhi32_constant_mask16_load:
4271; X86:       # %bb.0:
4272; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
4273; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
4274; X86-NEXT:    andl (%ecx), %eax
4275; X86-NEXT:    retl
4276;
4277; X64-LABEL: bzhi32_constant_mask16_load:
4278; X64:       # %bb.0:
4279; X64-NEXT:    movl $32767, %eax # imm = 0x7FFF
4280; X64-NEXT:    andl (%rdi), %eax
4281; X64-NEXT:    retq
4282  %val1 = load i32, ptr %val
4283  %masked = and i32 %val1, 32767
4284  ret i32 %masked
4285}
4286
4287define i32 @bzhi32_constant_mask8(i32 %val) nounwind {
4288; X86-LABEL: bzhi32_constant_mask8:
4289; X86:       # %bb.0:
4290; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
4291; X86-NEXT:    andl $127, %eax
4292; X86-NEXT:    retl
4293;
4294; X64-LABEL: bzhi32_constant_mask8:
4295; X64:       # %bb.0:
4296; X64-NEXT:    movl %edi, %eax
4297; X64-NEXT:    andl $127, %eax
4298; X64-NEXT:    retq
4299  %masked = and i32 %val, 127
4300  ret i32 %masked
4301}
4302
4303define i32 @bzhi32_constant_mask8_load(ptr %val) nounwind {
4304; X86-LABEL: bzhi32_constant_mask8_load:
4305; X86:       # %bb.0:
4306; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
4307; X86-NEXT:    movl (%eax), %eax
4308; X86-NEXT:    andl $127, %eax
4309; X86-NEXT:    retl
4310;
4311; X64-LABEL: bzhi32_constant_mask8_load:
4312; X64:       # %bb.0:
4313; X64-NEXT:    movl (%rdi), %eax
4314; X64-NEXT:    andl $127, %eax
4315; X64-NEXT:    retq
4316  %val1 = load i32, ptr %val
4317  %masked = and i32 %val1, 127
4318  ret i32 %masked
4319}
4320
4321; 64-bit
4322
4323define i64 @bzhi64_constant_mask64(i64 %val) nounwind {
4324; X86-LABEL: bzhi64_constant_mask64:
4325; X86:       # %bb.0:
4326; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
4327; X86-NEXT:    movl $1073741823, %edx # imm = 0x3FFFFFFF
4328; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
4329; X86-NEXT:    retl
4330;
4331; X64-NOBMI-LABEL: bzhi64_constant_mask64:
4332; X64-NOBMI:       # %bb.0:
4333; X64-NOBMI-NEXT:    movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF
4334; X64-NOBMI-NEXT:    andq %rdi, %rax
4335; X64-NOBMI-NEXT:    retq
4336;
4337; X64-BMI1NOTBM-LABEL: bzhi64_constant_mask64:
4338; X64-BMI1NOTBM:       # %bb.0:
4339; X64-BMI1NOTBM-NEXT:    movl $15872, %eax # imm = 0x3E00
4340; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
4341; X64-BMI1NOTBM-NEXT:    retq
4342;
4343; X64-BMI1TBM-LABEL: bzhi64_constant_mask64:
4344; X64-BMI1TBM:       # %bb.0:
4345; X64-BMI1TBM-NEXT:    bextrq $15872, %rdi, %rax # imm = 0x3E00
4346; X64-BMI1TBM-NEXT:    retq
4347;
4348; X64-BMI2TBM-LABEL: bzhi64_constant_mask64:
4349; X64-BMI2TBM:       # %bb.0:
4350; X64-BMI2TBM-NEXT:    bextrq $15872, %rdi, %rax # imm = 0x3E00
4351; X64-BMI2TBM-NEXT:    retq
4352;
4353; X64-BMI2NOTBM-LABEL: bzhi64_constant_mask64:
4354; X64-BMI2NOTBM:       # %bb.0:
4355; X64-BMI2NOTBM-NEXT:    movb $62, %al
4356; X64-BMI2NOTBM-NEXT:    bzhiq %rax, %rdi, %rax
4357; X64-BMI2NOTBM-NEXT:    retq
4358  %masked = and i64 %val, 4611686018427387903
4359  ret i64 %masked
4360}
4361
4362define i64 @bzhi64_constant_mask64_load(ptr %val) nounwind {
4363; X86-LABEL: bzhi64_constant_mask64_load:
4364; X86:       # %bb.0:
4365; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
4366; X86-NEXT:    movl (%ecx), %eax
4367; X86-NEXT:    movl $1073741823, %edx # imm = 0x3FFFFFFF
4368; X86-NEXT:    andl 4(%ecx), %edx
4369; X86-NEXT:    retl
4370;
4371; X64-NOBMI-LABEL: bzhi64_constant_mask64_load:
4372; X64-NOBMI:       # %bb.0:
4373; X64-NOBMI-NEXT:    movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF
4374; X64-NOBMI-NEXT:    andq (%rdi), %rax
4375; X64-NOBMI-NEXT:    retq
4376;
4377; X64-BMI1NOTBM-LABEL: bzhi64_constant_mask64_load:
4378; X64-BMI1NOTBM:       # %bb.0:
4379; X64-BMI1NOTBM-NEXT:    movl $15872, %eax # imm = 0x3E00
4380; X64-BMI1NOTBM-NEXT:    bextrq %rax, (%rdi), %rax
4381; X64-BMI1NOTBM-NEXT:    retq
4382;
4383; X64-BMI1TBM-LABEL: bzhi64_constant_mask64_load:
4384; X64-BMI1TBM:       # %bb.0:
4385; X64-BMI1TBM-NEXT:    bextrq $15872, (%rdi), %rax # imm = 0x3E00
4386; X64-BMI1TBM-NEXT:    retq
4387;
4388; X64-BMI2TBM-LABEL: bzhi64_constant_mask64_load:
4389; X64-BMI2TBM:       # %bb.0:
4390; X64-BMI2TBM-NEXT:    bextrq $15872, (%rdi), %rax # imm = 0x3E00
4391; X64-BMI2TBM-NEXT:    retq
4392;
4393; X64-BMI2NOTBM-LABEL: bzhi64_constant_mask64_load:
4394; X64-BMI2NOTBM:       # %bb.0:
4395; X64-BMI2NOTBM-NEXT:    movb $62, %al
4396; X64-BMI2NOTBM-NEXT:    bzhiq %rax, (%rdi), %rax
4397; X64-BMI2NOTBM-NEXT:    retq
4398  %val1 = load i64, ptr %val
4399  %masked = and i64 %val1, 4611686018427387903
4400  ret i64 %masked
4401}
4402
4403define i64 @bzhi64_constant_mask32(i64 %val) nounwind {
4404; X86-LABEL: bzhi64_constant_mask32:
4405; X86:       # %bb.0:
4406; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
4407; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
4408; X86-NEXT:    xorl %edx, %edx
4409; X86-NEXT:    retl
4410;
4411; X64-LABEL: bzhi64_constant_mask32:
4412; X64:       # %bb.0:
4413; X64-NEXT:    movq %rdi, %rax
4414; X64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
4415; X64-NEXT:    retq
4416  %masked = and i64 %val, 2147483647
4417  ret i64 %masked
4418}
4419
4420define i64 @bzhi64_constant_mask32_load(ptr %val) nounwind {
4421; X86-LABEL: bzhi64_constant_mask32_load:
4422; X86:       # %bb.0:
4423; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
4424; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
4425; X86-NEXT:    andl (%ecx), %eax
4426; X86-NEXT:    xorl %edx, %edx
4427; X86-NEXT:    retl
4428;
4429; X64-LABEL: bzhi64_constant_mask32_load:
4430; X64:       # %bb.0:
4431; X64-NEXT:    movq (%rdi), %rax
4432; X64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
4433; X64-NEXT:    retq
4434  %val1 = load i64, ptr %val
4435  %masked = and i64 %val1, 2147483647
4436  ret i64 %masked
4437}
4438
4439define i64 @bzhi64_constant_mask16(i64 %val) nounwind {
4440; X86-LABEL: bzhi64_constant_mask16:
4441; X86:       # %bb.0:
4442; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
4443; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
4444; X86-NEXT:    xorl %edx, %edx
4445; X86-NEXT:    retl
4446;
4447; X64-LABEL: bzhi64_constant_mask16:
4448; X64:       # %bb.0:
4449; X64-NEXT:    movq %rdi, %rax
4450; X64-NEXT:    andl $32767, %eax # imm = 0x7FFF
4451; X64-NEXT:    retq
4452  %masked = and i64 %val, 32767
4453  ret i64 %masked
4454}
4455
4456define i64 @bzhi64_constant_mask16_load(ptr %val) nounwind {
4457; X86-LABEL: bzhi64_constant_mask16_load:
4458; X86:       # %bb.0:
4459; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
4460; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
4461; X86-NEXT:    andl (%ecx), %eax
4462; X86-NEXT:    xorl %edx, %edx
4463; X86-NEXT:    retl
4464;
4465; X64-LABEL: bzhi64_constant_mask16_load:
4466; X64:       # %bb.0:
4467; X64-NEXT:    movq (%rdi), %rax
4468; X64-NEXT:    andl $32767, %eax # imm = 0x7FFF
4469; X64-NEXT:    retq
4470  %val1 = load i64, ptr %val
4471  %masked = and i64 %val1, 32767
4472  ret i64 %masked
4473}
4474
4475define i64 @bzhi64_constant_mask8(i64 %val) nounwind {
4476; X86-LABEL: bzhi64_constant_mask8:
4477; X86:       # %bb.0:
4478; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
4479; X86-NEXT:    andl $127, %eax
4480; X86-NEXT:    xorl %edx, %edx
4481; X86-NEXT:    retl
4482;
4483; X64-LABEL: bzhi64_constant_mask8:
4484; X64:       # %bb.0:
4485; X64-NEXT:    movq %rdi, %rax
4486; X64-NEXT:    andl $127, %eax
4487; X64-NEXT:    retq
4488  %masked = and i64 %val, 127
4489  ret i64 %masked
4490}
4491
4492define i64 @bzhi64_constant_mask8_load(ptr %val) nounwind {
4493; X86-LABEL: bzhi64_constant_mask8_load:
4494; X86:       # %bb.0:
4495; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
4496; X86-NEXT:    movl (%eax), %eax
4497; X86-NEXT:    andl $127, %eax
4498; X86-NEXT:    xorl %edx, %edx
4499; X86-NEXT:    retl
4500;
4501; X64-LABEL: bzhi64_constant_mask8_load:
4502; X64:       # %bb.0:
4503; X64-NEXT:    movq (%rdi), %rax
4504; X64-NEXT:    andl $127, %eax
4505; X64-NEXT:    retq
4506  %val1 = load i64, ptr %val
4507  %masked = and i64 %val1, 127
4508  ret i64 %masked
4509}
4510
4511; Ensure constant hoisting doesn't prevent BEXTR/BZHI instructions in both paths.
4512define void @PR111323(ptr nocapture noundef writeonly %use, i64 noundef %x, i64 noundef %y) nounwind {
4513; X86-LABEL: PR111323:
4514; X86:       # %bb.0: # %entry
4515; X86-NEXT:    pushl %edi
4516; X86-NEXT:    pushl %esi
4517; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
4518; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
4519; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
4520; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
4521; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %edi
4522; X86-NEXT:    movl %edi, 4(%ecx)
4523; X86-NEXT:    movl %esi, (%ecx)
4524; X86-NEXT:    movl %edx, %esi
4525; X86-NEXT:    orl %eax, %esi
4526; X86-NEXT:    je .LBB68_2
4527; X86-NEXT:  # %bb.1: # %if.end
4528; X86-NEXT:    andl $65535, %eax # imm = 0xFFFF
4529; X86-NEXT:    andl $-1, %edx
4530; X86-NEXT:    movl %edx, 8(%ecx)
4531; X86-NEXT:    movl %eax, 12(%ecx)
4532; X86-NEXT:  .LBB68_2: # %return
4533; X86-NEXT:    popl %esi
4534; X86-NEXT:    popl %edi
4535; X86-NEXT:    retl
4536;
4537; X64-NOBMI-LABEL: PR111323:
4538; X64-NOBMI:       # %bb.0: # %entry
4539; X64-NOBMI-NEXT:    movabsq $281474976710655, %rax # imm = 0xFFFFFFFFFFFF
4540; X64-NOBMI-NEXT:    andq %rax, %rsi
4541; X64-NOBMI-NEXT:    movq %rsi, (%rdi)
4542; X64-NOBMI-NEXT:    testq %rdx, %rdx
4543; X64-NOBMI-NEXT:    je .LBB68_2
4544; X64-NOBMI-NEXT:  # %bb.1: # %if.end
4545; X64-NOBMI-NEXT:    andq %rax, %rdx
4546; X64-NOBMI-NEXT:    movq %rdx, 8(%rdi)
4547; X64-NOBMI-NEXT:  .LBB68_2: # %return
4548; X64-NOBMI-NEXT:    retq
4549;
4550; X64-BMI1NOTBM-LABEL: PR111323:
4551; X64-BMI1NOTBM:       # %bb.0: # %entry
4552; X64-BMI1NOTBM-NEXT:    movl $12288, %eax # imm = 0x3000
4553; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rsi, %rax
4554; X64-BMI1NOTBM-NEXT:    movq %rax, (%rdi)
4555; X64-BMI1NOTBM-NEXT:    testq %rdx, %rdx
4556; X64-BMI1NOTBM-NEXT:    je .LBB68_2
4557; X64-BMI1NOTBM-NEXT:  # %bb.1: # %if.end
4558; X64-BMI1NOTBM-NEXT:    movl $12288, %eax # imm = 0x3000
4559; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdx, %rax
4560; X64-BMI1NOTBM-NEXT:    movq %rax, 8(%rdi)
4561; X64-BMI1NOTBM-NEXT:  .LBB68_2: # %return
4562; X64-BMI1NOTBM-NEXT:    retq
4563;
4564; X64-BMI1TBM-LABEL: PR111323:
4565; X64-BMI1TBM:       # %bb.0: # %entry
4566; X64-BMI1TBM-NEXT:    bextrq $12288, %rsi, %rax # imm = 0x3000
4567; X64-BMI1TBM-NEXT:    movq %rax, (%rdi)
4568; X64-BMI1TBM-NEXT:    testq %rdx, %rdx
4569; X64-BMI1TBM-NEXT:    je .LBB68_2
4570; X64-BMI1TBM-NEXT:  # %bb.1: # %if.end
4571; X64-BMI1TBM-NEXT:    bextrq $12288, %rdx, %rax # imm = 0x3000
4572; X64-BMI1TBM-NEXT:    movq %rax, 8(%rdi)
4573; X64-BMI1TBM-NEXT:  .LBB68_2: # %return
4574; X64-BMI1TBM-NEXT:    retq
4575;
4576; X64-BMI2TBM-LABEL: PR111323:
4577; X64-BMI2TBM:       # %bb.0: # %entry
4578; X64-BMI2TBM-NEXT:    bextrq $12288, %rsi, %rax # imm = 0x3000
4579; X64-BMI2TBM-NEXT:    movq %rax, (%rdi)
4580; X64-BMI2TBM-NEXT:    testq %rdx, %rdx
4581; X64-BMI2TBM-NEXT:    je .LBB68_2
4582; X64-BMI2TBM-NEXT:  # %bb.1: # %if.end
4583; X64-BMI2TBM-NEXT:    bextrq $12288, %rdx, %rax # imm = 0x3000
4584; X64-BMI2TBM-NEXT:    movq %rax, 8(%rdi)
4585; X64-BMI2TBM-NEXT:  .LBB68_2: # %return
4586; X64-BMI2TBM-NEXT:    retq
4587;
4588; X64-BMI2NOTBM-LABEL: PR111323:
4589; X64-BMI2NOTBM:       # %bb.0: # %entry
4590; X64-BMI2NOTBM-NEXT:    movb $48, %al
4591; X64-BMI2NOTBM-NEXT:    bzhiq %rax, %rsi, %rcx
4592; X64-BMI2NOTBM-NEXT:    movq %rcx, (%rdi)
4593; X64-BMI2NOTBM-NEXT:    testq %rdx, %rdx
4594; X64-BMI2NOTBM-NEXT:    je .LBB68_2
4595; X64-BMI2NOTBM-NEXT:  # %bb.1: # %if.end
4596; X64-BMI2NOTBM-NEXT:    bzhiq %rax, %rdx, %rax
4597; X64-BMI2NOTBM-NEXT:    movq %rax, 8(%rdi)
4598; X64-BMI2NOTBM-NEXT:  .LBB68_2: # %return
4599; X64-BMI2NOTBM-NEXT:    retq
4600entry:
4601  %and = and i64 %x, 281474976710655
4602  store i64 %and, ptr %use, align 8
4603  %cmp = icmp eq i64 %y, 0
4604  br i1 %cmp, label %return, label %if.end
4605
4606if.end:
4607  %and1 = and i64 %y, 281474976710655
4608  %arrayidx2 = getelementptr inbounds i8, ptr %use, i64 8
4609  store i64 %and1, ptr %arrayidx2, align 8
4610  br label %return
4611
4612return:
4613  ret void
4614}
4615