xref: /llvm-project/llvm/test/CodeGen/X86/clear-highbits.ll (revision f0dd12ec5c0169ba5b4363b62d59511181cf954a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI2,X86-BASELINE
3; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov,+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI2,X86-BMI1
4; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov,+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI2,X86-BMI1
5; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov,+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2
6; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov,+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2
7; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI2
8; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI2
9; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI2
10; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2
11; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2
12
13; Patterns:
14;    c) x &  (-1 >> y)
15;    d) x << y >> y
16; are equivalent, but we prefer the second variant if we have BMI2.
17
18; We do not test the variant where y = (32 - z), because that is BMI2's BZHI.
19
20; ---------------------------------------------------------------------------- ;
21; 8-bit
22; ---------------------------------------------------------------------------- ;
23
24define i8 @clear_highbits8_c0(i8 %val, i8 %numhighbits) nounwind {
25; X86-LABEL: clear_highbits8_c0:
26; X86:       # %bb.0:
27; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
28; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
29; X86-NEXT:    shlb %cl, %al
30; X86-NEXT:    shrb %cl, %al
31; X86-NEXT:    retl
32;
33; X64-LABEL: clear_highbits8_c0:
34; X64:       # %bb.0:
35; X64-NEXT:    movl %esi, %ecx
36; X64-NEXT:    movl %edi, %eax
37; X64-NEXT:    shlb %cl, %al
38; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
39; X64-NEXT:    shrb %cl, %al
40; X64-NEXT:    # kill: def $al killed $al killed $eax
41; X64-NEXT:    retq
42  %mask = lshr i8 -1, %numhighbits
43  %masked = and i8 %mask, %val
44  ret i8 %masked
45}
46
47define i8 @clear_highbits8_c2_load(ptr %w, i8 %numhighbits) nounwind {
48; X86-LABEL: clear_highbits8_c2_load:
49; X86:       # %bb.0:
50; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
51; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
52; X86-NEXT:    movzbl (%eax), %eax
53; X86-NEXT:    shlb %cl, %al
54; X86-NEXT:    shrb %cl, %al
55; X86-NEXT:    retl
56;
57; X64-LABEL: clear_highbits8_c2_load:
58; X64:       # %bb.0:
59; X64-NEXT:    movl %esi, %ecx
60; X64-NEXT:    movzbl (%rdi), %eax
61; X64-NEXT:    shlb %cl, %al
62; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
63; X64-NEXT:    shrb %cl, %al
64; X64-NEXT:    retq
65  %val = load i8, ptr %w
66  %mask = lshr i8 -1, %numhighbits
67  %masked = and i8 %mask, %val
68  ret i8 %masked
69}
70
71define i8 @clear_highbits8_c4_commutative(i8 %val, i8 %numhighbits) nounwind {
72; X86-LABEL: clear_highbits8_c4_commutative:
73; X86:       # %bb.0:
74; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
75; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
76; X86-NEXT:    shlb %cl, %al
77; X86-NEXT:    shrb %cl, %al
78; X86-NEXT:    retl
79;
80; X64-LABEL: clear_highbits8_c4_commutative:
81; X64:       # %bb.0:
82; X64-NEXT:    movl %esi, %ecx
83; X64-NEXT:    movl %edi, %eax
84; X64-NEXT:    shlb %cl, %al
85; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
86; X64-NEXT:    shrb %cl, %al
87; X64-NEXT:    # kill: def $al killed $al killed $eax
88; X64-NEXT:    retq
89  %mask = lshr i8 -1, %numhighbits
90  %masked = and i8 %val, %mask ; swapped order
91  ret i8 %masked
92}
93
94; ---------------------------------------------------------------------------- ;
95; 16-bit
96; ---------------------------------------------------------------------------- ;
97
98define i16 @clear_highbits16_c0(i16 %val, i16 %numhighbits) nounwind {
99; X86-NOBMI2-LABEL: clear_highbits16_c0:
100; X86-NOBMI2:       # %bb.0:
101; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
102; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
103; X86-NOBMI2-NEXT:    shll %cl, %eax
104; X86-NOBMI2-NEXT:    movzwl %ax, %eax
105; X86-NOBMI2-NEXT:    shrl %cl, %eax
106; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
107; X86-NOBMI2-NEXT:    retl
108;
109; X86-BMI2-LABEL: clear_highbits16_c0:
110; X86-BMI2:       # %bb.0:
111; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
112; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %ecx
113; X86-BMI2-NEXT:    movzwl %cx, %ecx
114; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
115; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
116; X86-BMI2-NEXT:    retl
117;
118; X64-NOBMI2-LABEL: clear_highbits16_c0:
119; X64-NOBMI2:       # %bb.0:
120; X64-NOBMI2-NEXT:    movl %esi, %ecx
121; X64-NOBMI2-NEXT:    shll %cl, %edi
122; X64-NOBMI2-NEXT:    movzwl %di, %eax
123; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
124; X64-NOBMI2-NEXT:    shrl %cl, %eax
125; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
126; X64-NOBMI2-NEXT:    retq
127;
128; X64-BMI2-LABEL: clear_highbits16_c0:
129; X64-BMI2:       # %bb.0:
130; X64-BMI2-NEXT:    shlxl %esi, %edi, %eax
131; X64-BMI2-NEXT:    movzwl %ax, %eax
132; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
133; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
134; X64-BMI2-NEXT:    retq
135  %mask = lshr i16 -1, %numhighbits
136  %masked = and i16 %mask, %val
137  ret i16 %masked
138}
139
140define i16 @clear_highbits16_c1_indexzext(i16 %val, i8 %numhighbits) nounwind {
141; X86-NOBMI2-LABEL: clear_highbits16_c1_indexzext:
142; X86-NOBMI2:       # %bb.0:
143; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
144; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
145; X86-NOBMI2-NEXT:    shll %cl, %eax
146; X86-NOBMI2-NEXT:    movzwl %ax, %eax
147; X86-NOBMI2-NEXT:    shrl %cl, %eax
148; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
149; X86-NOBMI2-NEXT:    retl
150;
151; X86-BMI2-LABEL: clear_highbits16_c1_indexzext:
152; X86-BMI2:       # %bb.0:
153; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
154; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %ecx
155; X86-BMI2-NEXT:    movzwl %cx, %ecx
156; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
157; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
158; X86-BMI2-NEXT:    retl
159;
160; X64-NOBMI2-LABEL: clear_highbits16_c1_indexzext:
161; X64-NOBMI2:       # %bb.0:
162; X64-NOBMI2-NEXT:    movl %esi, %ecx
163; X64-NOBMI2-NEXT:    shll %cl, %edi
164; X64-NOBMI2-NEXT:    movzwl %di, %eax
165; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
166; X64-NOBMI2-NEXT:    shrl %cl, %eax
167; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
168; X64-NOBMI2-NEXT:    retq
169;
170; X64-BMI2-LABEL: clear_highbits16_c1_indexzext:
171; X64-BMI2:       # %bb.0:
172; X64-BMI2-NEXT:    shlxl %esi, %edi, %eax
173; X64-BMI2-NEXT:    movzwl %ax, %eax
174; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
175; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
176; X64-BMI2-NEXT:    retq
177  %sh_prom = zext i8 %numhighbits to i16
178  %mask = lshr i16 -1, %sh_prom
179  %masked = and i16 %mask, %val
180  ret i16 %masked
181}
182
183define i16 @clear_highbits16_c2_load(ptr %w, i16 %numhighbits) nounwind {
184; X86-NOBMI2-LABEL: clear_highbits16_c2_load:
185; X86-NOBMI2:       # %bb.0:
186; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
187; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
188; X86-NOBMI2-NEXT:    movzwl (%eax), %eax
189; X86-NOBMI2-NEXT:    shll %cl, %eax
190; X86-NOBMI2-NEXT:    movzwl %ax, %eax
191; X86-NOBMI2-NEXT:    shrl %cl, %eax
192; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
193; X86-NOBMI2-NEXT:    retl
194;
195; X86-BMI2-LABEL: clear_highbits16_c2_load:
196; X86-BMI2:       # %bb.0:
197; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
198; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
199; X86-BMI2-NEXT:    movzwl (%ecx), %ecx
200; X86-BMI2-NEXT:    shlxl %eax, %ecx, %ecx
201; X86-BMI2-NEXT:    movzwl %cx, %ecx
202; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
203; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
204; X86-BMI2-NEXT:    retl
205;
206; X64-NOBMI2-LABEL: clear_highbits16_c2_load:
207; X64-NOBMI2:       # %bb.0:
208; X64-NOBMI2-NEXT:    movl %esi, %ecx
209; X64-NOBMI2-NEXT:    movzwl (%rdi), %eax
210; X64-NOBMI2-NEXT:    shll %cl, %eax
211; X64-NOBMI2-NEXT:    movzwl %ax, %eax
212; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
213; X64-NOBMI2-NEXT:    shrl %cl, %eax
214; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
215; X64-NOBMI2-NEXT:    retq
216;
217; X64-BMI2-LABEL: clear_highbits16_c2_load:
218; X64-BMI2:       # %bb.0:
219; X64-BMI2-NEXT:    movzwl (%rdi), %eax
220; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
221; X64-BMI2-NEXT:    movzwl %ax, %eax
222; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
223; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
224; X64-BMI2-NEXT:    retq
225  %val = load i16, ptr %w
226  %mask = lshr i16 -1, %numhighbits
227  %masked = and i16 %mask, %val
228  ret i16 %masked
229}
230
231define i16 @clear_highbits16_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind {
232; X86-NOBMI2-LABEL: clear_highbits16_c3_load_indexzext:
233; X86-NOBMI2:       # %bb.0:
234; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
235; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
236; X86-NOBMI2-NEXT:    movzwl (%eax), %eax
237; X86-NOBMI2-NEXT:    shll %cl, %eax
238; X86-NOBMI2-NEXT:    movzwl %ax, %eax
239; X86-NOBMI2-NEXT:    shrl %cl, %eax
240; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
241; X86-NOBMI2-NEXT:    retl
242;
243; X86-BMI2-LABEL: clear_highbits16_c3_load_indexzext:
244; X86-BMI2:       # %bb.0:
245; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
246; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
247; X86-BMI2-NEXT:    movzwl (%ecx), %ecx
248; X86-BMI2-NEXT:    shlxl %eax, %ecx, %ecx
249; X86-BMI2-NEXT:    movzwl %cx, %ecx
250; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
251; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
252; X86-BMI2-NEXT:    retl
253;
254; X64-NOBMI2-LABEL: clear_highbits16_c3_load_indexzext:
255; X64-NOBMI2:       # %bb.0:
256; X64-NOBMI2-NEXT:    movl %esi, %ecx
257; X64-NOBMI2-NEXT:    movzwl (%rdi), %eax
258; X64-NOBMI2-NEXT:    shll %cl, %eax
259; X64-NOBMI2-NEXT:    movzwl %ax, %eax
260; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
261; X64-NOBMI2-NEXT:    shrl %cl, %eax
262; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
263; X64-NOBMI2-NEXT:    retq
264;
265; X64-BMI2-LABEL: clear_highbits16_c3_load_indexzext:
266; X64-BMI2:       # %bb.0:
267; X64-BMI2-NEXT:    movzwl (%rdi), %eax
268; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
269; X64-BMI2-NEXT:    movzwl %ax, %eax
270; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
271; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
272; X64-BMI2-NEXT:    retq
273  %val = load i16, ptr %w
274  %sh_prom = zext i8 %numhighbits to i16
275  %mask = lshr i16 -1, %sh_prom
276  %masked = and i16 %mask, %val
277  ret i16 %masked
278}
279
280define i16 @clear_highbits16_c4_commutative(i16 %val, i16 %numhighbits) nounwind {
281; X86-NOBMI2-LABEL: clear_highbits16_c4_commutative:
282; X86-NOBMI2:       # %bb.0:
283; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
284; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
285; X86-NOBMI2-NEXT:    shll %cl, %eax
286; X86-NOBMI2-NEXT:    movzwl %ax, %eax
287; X86-NOBMI2-NEXT:    shrl %cl, %eax
288; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
289; X86-NOBMI2-NEXT:    retl
290;
291; X86-BMI2-LABEL: clear_highbits16_c4_commutative:
292; X86-BMI2:       # %bb.0:
293; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
294; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %ecx
295; X86-BMI2-NEXT:    movzwl %cx, %ecx
296; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
297; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
298; X86-BMI2-NEXT:    retl
299;
300; X64-NOBMI2-LABEL: clear_highbits16_c4_commutative:
301; X64-NOBMI2:       # %bb.0:
302; X64-NOBMI2-NEXT:    movl %esi, %ecx
303; X64-NOBMI2-NEXT:    shll %cl, %edi
304; X64-NOBMI2-NEXT:    movzwl %di, %eax
305; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
306; X64-NOBMI2-NEXT:    shrl %cl, %eax
307; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
308; X64-NOBMI2-NEXT:    retq
309;
310; X64-BMI2-LABEL: clear_highbits16_c4_commutative:
311; X64-BMI2:       # %bb.0:
312; X64-BMI2-NEXT:    shlxl %esi, %edi, %eax
313; X64-BMI2-NEXT:    movzwl %ax, %eax
314; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
315; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
316; X64-BMI2-NEXT:    retq
317  %mask = lshr i16 -1, %numhighbits
318  %masked = and i16 %val, %mask ; swapped order
319  ret i16 %masked
320}
321
322; ---------------------------------------------------------------------------- ;
323; 32-bit
324; ---------------------------------------------------------------------------- ;
325
326define i32 @clear_highbits32_c0(i32 %val, i32 %numhighbits) nounwind {
327; X86-NOBMI2-LABEL: clear_highbits32_c0:
328; X86-NOBMI2:       # %bb.0:
329; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
330; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
331; X86-NOBMI2-NEXT:    shll %cl, %eax
332; X86-NOBMI2-NEXT:    shrl %cl, %eax
333; X86-NOBMI2-NEXT:    retl
334;
335; X86-BMI2-LABEL: clear_highbits32_c0:
336; X86-BMI2:       # %bb.0:
337; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
338; X86-BMI2-NEXT:    movl $32, %ecx
339; X86-BMI2-NEXT:    subl %eax, %ecx
340; X86-BMI2-NEXT:    bzhil %ecx, {{[0-9]+}}(%esp), %eax
341; X86-BMI2-NEXT:    retl
342;
343; X64-NOBMI2-LABEL: clear_highbits32_c0:
344; X64-NOBMI2:       # %bb.0:
345; X64-NOBMI2-NEXT:    movl %esi, %ecx
346; X64-NOBMI2-NEXT:    movl %edi, %eax
347; X64-NOBMI2-NEXT:    shll %cl, %eax
348; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
349; X64-NOBMI2-NEXT:    shrl %cl, %eax
350; X64-NOBMI2-NEXT:    retq
351;
352; X64-BMI2-LABEL: clear_highbits32_c0:
353; X64-BMI2:       # %bb.0:
354; X64-BMI2-NEXT:    movl $32, %eax
355; X64-BMI2-NEXT:    subl %esi, %eax
356; X64-BMI2-NEXT:    bzhil %eax, %edi, %eax
357; X64-BMI2-NEXT:    retq
358  %mask = lshr i32 -1, %numhighbits
359  %masked = and i32 %mask, %val
360  ret i32 %masked
361}
362
363define i32 @clear_highbits32_c1_indexzext(i32 %val, i8 %numhighbits) nounwind {
364; X86-NOBMI2-LABEL: clear_highbits32_c1_indexzext:
365; X86-NOBMI2:       # %bb.0:
366; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
367; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
368; X86-NOBMI2-NEXT:    shll %cl, %eax
369; X86-NOBMI2-NEXT:    shrl %cl, %eax
370; X86-NOBMI2-NEXT:    retl
371;
372; X86-BMI2-LABEL: clear_highbits32_c1_indexzext:
373; X86-BMI2:       # %bb.0:
374; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
375; X86-BMI2-NEXT:    movl $32, %ecx
376; X86-BMI2-NEXT:    subl %eax, %ecx
377; X86-BMI2-NEXT:    bzhil %ecx, {{[0-9]+}}(%esp), %eax
378; X86-BMI2-NEXT:    retl
379;
380; X64-NOBMI2-LABEL: clear_highbits32_c1_indexzext:
381; X64-NOBMI2:       # %bb.0:
382; X64-NOBMI2-NEXT:    movl %esi, %ecx
383; X64-NOBMI2-NEXT:    movl %edi, %eax
384; X64-NOBMI2-NEXT:    shll %cl, %eax
385; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
386; X64-NOBMI2-NEXT:    shrl %cl, %eax
387; X64-NOBMI2-NEXT:    retq
388;
389; X64-BMI2-LABEL: clear_highbits32_c1_indexzext:
390; X64-BMI2:       # %bb.0:
391; X64-BMI2-NEXT:    movl $32, %eax
392; X64-BMI2-NEXT:    subl %esi, %eax
393; X64-BMI2-NEXT:    bzhil %eax, %edi, %eax
394; X64-BMI2-NEXT:    retq
395  %sh_prom = zext i8 %numhighbits to i32
396  %mask = lshr i32 -1, %sh_prom
397  %masked = and i32 %mask, %val
398  ret i32 %masked
399}
400
401define i32 @clear_highbits32_c2_load(ptr %w, i32 %numhighbits) nounwind {
402; X86-NOBMI2-LABEL: clear_highbits32_c2_load:
403; X86-NOBMI2:       # %bb.0:
404; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
405; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
406; X86-NOBMI2-NEXT:    movl (%eax), %eax
407; X86-NOBMI2-NEXT:    shll %cl, %eax
408; X86-NOBMI2-NEXT:    shrl %cl, %eax
409; X86-NOBMI2-NEXT:    retl
410;
411; X86-BMI2-LABEL: clear_highbits32_c2_load:
412; X86-BMI2:       # %bb.0:
413; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
414; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
415; X86-BMI2-NEXT:    movl $32, %edx
416; X86-BMI2-NEXT:    subl %ecx, %edx
417; X86-BMI2-NEXT:    bzhil %edx, (%eax), %eax
418; X86-BMI2-NEXT:    retl
419;
420; X64-NOBMI2-LABEL: clear_highbits32_c2_load:
421; X64-NOBMI2:       # %bb.0:
422; X64-NOBMI2-NEXT:    movl %esi, %ecx
423; X64-NOBMI2-NEXT:    movl (%rdi), %eax
424; X64-NOBMI2-NEXT:    shll %cl, %eax
425; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
426; X64-NOBMI2-NEXT:    shrl %cl, %eax
427; X64-NOBMI2-NEXT:    retq
428;
429; X64-BMI2-LABEL: clear_highbits32_c2_load:
430; X64-BMI2:       # %bb.0:
431; X64-BMI2-NEXT:    movl $32, %eax
432; X64-BMI2-NEXT:    subl %esi, %eax
433; X64-BMI2-NEXT:    bzhil %eax, (%rdi), %eax
434; X64-BMI2-NEXT:    retq
435  %val = load i32, ptr %w
436  %mask = lshr i32 -1, %numhighbits
437  %masked = and i32 %mask, %val
438  ret i32 %masked
439}
440
441define i32 @clear_highbits32_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind {
442; X86-NOBMI2-LABEL: clear_highbits32_c3_load_indexzext:
443; X86-NOBMI2:       # %bb.0:
444; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
445; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
446; X86-NOBMI2-NEXT:    movl (%eax), %eax
447; X86-NOBMI2-NEXT:    shll %cl, %eax
448; X86-NOBMI2-NEXT:    shrl %cl, %eax
449; X86-NOBMI2-NEXT:    retl
450;
451; X86-BMI2-LABEL: clear_highbits32_c3_load_indexzext:
452; X86-BMI2:       # %bb.0:
453; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
454; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
455; X86-BMI2-NEXT:    movl $32, %edx
456; X86-BMI2-NEXT:    subl %ecx, %edx
457; X86-BMI2-NEXT:    bzhil %edx, (%eax), %eax
458; X86-BMI2-NEXT:    retl
459;
460; X64-NOBMI2-LABEL: clear_highbits32_c3_load_indexzext:
461; X64-NOBMI2:       # %bb.0:
462; X64-NOBMI2-NEXT:    movl %esi, %ecx
463; X64-NOBMI2-NEXT:    movl (%rdi), %eax
464; X64-NOBMI2-NEXT:    shll %cl, %eax
465; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
466; X64-NOBMI2-NEXT:    shrl %cl, %eax
467; X64-NOBMI2-NEXT:    retq
468;
469; X64-BMI2-LABEL: clear_highbits32_c3_load_indexzext:
470; X64-BMI2:       # %bb.0:
471; X64-BMI2-NEXT:    movl $32, %eax
472; X64-BMI2-NEXT:    subl %esi, %eax
473; X64-BMI2-NEXT:    bzhil %eax, (%rdi), %eax
474; X64-BMI2-NEXT:    retq
475  %val = load i32, ptr %w
476  %sh_prom = zext i8 %numhighbits to i32
477  %mask = lshr i32 -1, %sh_prom
478  %masked = and i32 %mask, %val
479  ret i32 %masked
480}
481
482define i32 @clear_highbits32_c4_commutative(i32 %val, i32 %numhighbits) nounwind {
483; X86-NOBMI2-LABEL: clear_highbits32_c4_commutative:
484; X86-NOBMI2:       # %bb.0:
485; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
486; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
487; X86-NOBMI2-NEXT:    shll %cl, %eax
488; X86-NOBMI2-NEXT:    shrl %cl, %eax
489; X86-NOBMI2-NEXT:    retl
490;
491; X86-BMI2-LABEL: clear_highbits32_c4_commutative:
492; X86-BMI2:       # %bb.0:
493; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
494; X86-BMI2-NEXT:    movl $32, %ecx
495; X86-BMI2-NEXT:    subl %eax, %ecx
496; X86-BMI2-NEXT:    bzhil %ecx, {{[0-9]+}}(%esp), %eax
497; X86-BMI2-NEXT:    retl
498;
499; X64-NOBMI2-LABEL: clear_highbits32_c4_commutative:
500; X64-NOBMI2:       # %bb.0:
501; X64-NOBMI2-NEXT:    movl %esi, %ecx
502; X64-NOBMI2-NEXT:    movl %edi, %eax
503; X64-NOBMI2-NEXT:    shll %cl, %eax
504; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
505; X64-NOBMI2-NEXT:    shrl %cl, %eax
506; X64-NOBMI2-NEXT:    retq
507;
508; X64-BMI2-LABEL: clear_highbits32_c4_commutative:
509; X64-BMI2:       # %bb.0:
510; X64-BMI2-NEXT:    movl $32, %eax
511; X64-BMI2-NEXT:    subl %esi, %eax
512; X64-BMI2-NEXT:    bzhil %eax, %edi, %eax
513; X64-BMI2-NEXT:    retq
514  %mask = lshr i32 -1, %numhighbits
515  %masked = and i32 %val, %mask ; swapped order
516  ret i32 %masked
517}
518
519; ---------------------------------------------------------------------------- ;
520; 64-bit
521; ---------------------------------------------------------------------------- ;
522
523define i64 @clear_highbits64_c0(i64 %val, i64 %numhighbits) nounwind {
524; X86-BASELINE-LABEL: clear_highbits64_c0:
525; X86-BASELINE:       # %bb.0:
526; X86-BASELINE-NEXT:    pushl %esi
527; X86-BASELINE-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
528; X86-BASELINE-NEXT:    movl $-1, %eax
529; X86-BASELINE-NEXT:    movl $-1, %esi
530; X86-BASELINE-NEXT:    shrl %cl, %esi
531; X86-BASELINE-NEXT:    xorl %edx, %edx
532; X86-BASELINE-NEXT:    testb $32, %cl
533; X86-BASELINE-NEXT:    jne .LBB13_1
534; X86-BASELINE-NEXT:  # %bb.2:
535; X86-BASELINE-NEXT:    movl %esi, %edx
536; X86-BASELINE-NEXT:    jmp .LBB13_3
537; X86-BASELINE-NEXT:  .LBB13_1:
538; X86-BASELINE-NEXT:    movl %esi, %eax
539; X86-BASELINE-NEXT:  .LBB13_3:
540; X86-BASELINE-NEXT:    andl {{[0-9]+}}(%esp), %eax
541; X86-BASELINE-NEXT:    andl {{[0-9]+}}(%esp), %edx
542; X86-BASELINE-NEXT:    popl %esi
543; X86-BASELINE-NEXT:    retl
544;
545; X86-BMI1-LABEL: clear_highbits64_c0:
546; X86-BMI1:       # %bb.0:
547; X86-BMI1-NEXT:    pushl %esi
548; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
549; X86-BMI1-NEXT:    movl $-1, %esi
550; X86-BMI1-NEXT:    movl $-1, %eax
551; X86-BMI1-NEXT:    shrl %cl, %eax
552; X86-BMI1-NEXT:    xorl %edx, %edx
553; X86-BMI1-NEXT:    testb $32, %cl
554; X86-BMI1-NEXT:    cmovel %eax, %edx
555; X86-BMI1-NEXT:    cmovel %esi, %eax
556; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
557; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
558; X86-BMI1-NEXT:    popl %esi
559; X86-BMI1-NEXT:    retl
560;
561; X86-BMI2-LABEL: clear_highbits64_c0:
562; X86-BMI2:       # %bb.0:
563; X86-BMI2-NEXT:    pushl %esi
564; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
565; X86-BMI2-NEXT:    movl $-1, %eax
566; X86-BMI2-NEXT:    shrxl %ecx, %eax, %esi
567; X86-BMI2-NEXT:    xorl %edx, %edx
568; X86-BMI2-NEXT:    testb $32, %cl
569; X86-BMI2-NEXT:    cmovel %esi, %edx
570; X86-BMI2-NEXT:    cmovnel %esi, %eax
571; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
572; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
573; X86-BMI2-NEXT:    popl %esi
574; X86-BMI2-NEXT:    retl
575;
576; X64-NOBMI2-LABEL: clear_highbits64_c0:
577; X64-NOBMI2:       # %bb.0:
578; X64-NOBMI2-NEXT:    movq %rsi, %rcx
579; X64-NOBMI2-NEXT:    movq %rdi, %rax
580; X64-NOBMI2-NEXT:    shlq %cl, %rax
581; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
582; X64-NOBMI2-NEXT:    shrq %cl, %rax
583; X64-NOBMI2-NEXT:    retq
584;
585; X64-BMI2-LABEL: clear_highbits64_c0:
586; X64-BMI2:       # %bb.0:
587; X64-BMI2-NEXT:    movl $64, %eax
588; X64-BMI2-NEXT:    subl %esi, %eax
589; X64-BMI2-NEXT:    bzhiq %rax, %rdi, %rax
590; X64-BMI2-NEXT:    retq
591  %mask = lshr i64 -1, %numhighbits
592  %masked = and i64 %mask, %val
593  ret i64 %masked
594}
595
596define i64 @clear_highbits64_c1_indexzext(i64 %val, i8 %numhighbits) nounwind {
597; X86-BASELINE-LABEL: clear_highbits64_c1_indexzext:
598; X86-BASELINE:       # %bb.0:
599; X86-BASELINE-NEXT:    pushl %esi
600; X86-BASELINE-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
601; X86-BASELINE-NEXT:    movl $-1, %eax
602; X86-BASELINE-NEXT:    movl $-1, %esi
603; X86-BASELINE-NEXT:    shrl %cl, %esi
604; X86-BASELINE-NEXT:    xorl %edx, %edx
605; X86-BASELINE-NEXT:    testb $32, %cl
606; X86-BASELINE-NEXT:    jne .LBB14_1
607; X86-BASELINE-NEXT:  # %bb.2:
608; X86-BASELINE-NEXT:    movl %esi, %edx
609; X86-BASELINE-NEXT:    jmp .LBB14_3
610; X86-BASELINE-NEXT:  .LBB14_1:
611; X86-BASELINE-NEXT:    movl %esi, %eax
612; X86-BASELINE-NEXT:  .LBB14_3:
613; X86-BASELINE-NEXT:    andl {{[0-9]+}}(%esp), %eax
614; X86-BASELINE-NEXT:    andl {{[0-9]+}}(%esp), %edx
615; X86-BASELINE-NEXT:    popl %esi
616; X86-BASELINE-NEXT:    retl
617;
618; X86-BMI1-LABEL: clear_highbits64_c1_indexzext:
619; X86-BMI1:       # %bb.0:
620; X86-BMI1-NEXT:    pushl %esi
621; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
622; X86-BMI1-NEXT:    movl $-1, %esi
623; X86-BMI1-NEXT:    movl $-1, %eax
624; X86-BMI1-NEXT:    shrl %cl, %eax
625; X86-BMI1-NEXT:    xorl %edx, %edx
626; X86-BMI1-NEXT:    testb $32, %cl
627; X86-BMI1-NEXT:    cmovel %eax, %edx
628; X86-BMI1-NEXT:    cmovel %esi, %eax
629; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
630; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
631; X86-BMI1-NEXT:    popl %esi
632; X86-BMI1-NEXT:    retl
633;
634; X86-BMI2-LABEL: clear_highbits64_c1_indexzext:
635; X86-BMI2:       # %bb.0:
636; X86-BMI2-NEXT:    pushl %esi
637; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
638; X86-BMI2-NEXT:    movl $-1, %eax
639; X86-BMI2-NEXT:    shrxl %ecx, %eax, %esi
640; X86-BMI2-NEXT:    xorl %edx, %edx
641; X86-BMI2-NEXT:    testb $32, %cl
642; X86-BMI2-NEXT:    cmovel %esi, %edx
643; X86-BMI2-NEXT:    cmovnel %esi, %eax
644; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
645; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
646; X86-BMI2-NEXT:    popl %esi
647; X86-BMI2-NEXT:    retl
648;
649; X64-NOBMI2-LABEL: clear_highbits64_c1_indexzext:
650; X64-NOBMI2:       # %bb.0:
651; X64-NOBMI2-NEXT:    movl %esi, %ecx
652; X64-NOBMI2-NEXT:    movq %rdi, %rax
653; X64-NOBMI2-NEXT:    shlq %cl, %rax
654; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
655; X64-NOBMI2-NEXT:    shrq %cl, %rax
656; X64-NOBMI2-NEXT:    retq
657;
658; X64-BMI2-LABEL: clear_highbits64_c1_indexzext:
659; X64-BMI2:       # %bb.0:
660; X64-BMI2-NEXT:    movl $64, %eax
661; X64-BMI2-NEXT:    subl %esi, %eax
662; X64-BMI2-NEXT:    bzhiq %rax, %rdi, %rax
663; X64-BMI2-NEXT:    retq
664  %sh_prom = zext i8 %numhighbits to i64
665  %mask = lshr i64 -1, %sh_prom
666  %masked = and i64 %mask, %val
667  ret i64 %masked
668}
669
670define i64 @clear_highbits64_c2_load(ptr %w, i64 %numhighbits) nounwind {
671; X86-BASELINE-LABEL: clear_highbits64_c2_load:
672; X86-BASELINE:       # %bb.0:
673; X86-BASELINE-NEXT:    pushl %edi
674; X86-BASELINE-NEXT:    pushl %esi
675; X86-BASELINE-NEXT:    movl {{[0-9]+}}(%esp), %esi
676; X86-BASELINE-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
677; X86-BASELINE-NEXT:    movl $-1, %eax
678; X86-BASELINE-NEXT:    movl $-1, %edi
679; X86-BASELINE-NEXT:    shrl %cl, %edi
680; X86-BASELINE-NEXT:    xorl %edx, %edx
681; X86-BASELINE-NEXT:    testb $32, %cl
682; X86-BASELINE-NEXT:    jne .LBB15_1
683; X86-BASELINE-NEXT:  # %bb.2:
684; X86-BASELINE-NEXT:    movl %edi, %edx
685; X86-BASELINE-NEXT:    jmp .LBB15_3
686; X86-BASELINE-NEXT:  .LBB15_1:
687; X86-BASELINE-NEXT:    movl %edi, %eax
688; X86-BASELINE-NEXT:  .LBB15_3:
689; X86-BASELINE-NEXT:    andl (%esi), %eax
690; X86-BASELINE-NEXT:    andl 4(%esi), %edx
691; X86-BASELINE-NEXT:    popl %esi
692; X86-BASELINE-NEXT:    popl %edi
693; X86-BASELINE-NEXT:    retl
694;
695; X86-BMI1-LABEL: clear_highbits64_c2_load:
696; X86-BMI1:       # %bb.0:
697; X86-BMI1-NEXT:    pushl %edi
698; X86-BMI1-NEXT:    pushl %esi
699; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
700; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
701; X86-BMI1-NEXT:    movl $-1, %edi
702; X86-BMI1-NEXT:    movl $-1, %eax
703; X86-BMI1-NEXT:    shrl %cl, %eax
704; X86-BMI1-NEXT:    xorl %edx, %edx
705; X86-BMI1-NEXT:    testb $32, %cl
706; X86-BMI1-NEXT:    cmovel %eax, %edx
707; X86-BMI1-NEXT:    cmovel %edi, %eax
708; X86-BMI1-NEXT:    andl (%esi), %eax
709; X86-BMI1-NEXT:    andl 4(%esi), %edx
710; X86-BMI1-NEXT:    popl %esi
711; X86-BMI1-NEXT:    popl %edi
712; X86-BMI1-NEXT:    retl
713;
714; X86-BMI2-LABEL: clear_highbits64_c2_load:
715; X86-BMI2:       # %bb.0:
716; X86-BMI2-NEXT:    pushl %ebx
717; X86-BMI2-NEXT:    pushl %esi
718; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
719; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
720; X86-BMI2-NEXT:    movl $-1, %eax
721; X86-BMI2-NEXT:    shrxl %ebx, %eax, %esi
722; X86-BMI2-NEXT:    xorl %edx, %edx
723; X86-BMI2-NEXT:    testb $32, %bl
724; X86-BMI2-NEXT:    cmovel %esi, %edx
725; X86-BMI2-NEXT:    cmovnel %esi, %eax
726; X86-BMI2-NEXT:    andl (%ecx), %eax
727; X86-BMI2-NEXT:    andl 4(%ecx), %edx
728; X86-BMI2-NEXT:    popl %esi
729; X86-BMI2-NEXT:    popl %ebx
730; X86-BMI2-NEXT:    retl
731;
732; X64-NOBMI2-LABEL: clear_highbits64_c2_load:
733; X64-NOBMI2:       # %bb.0:
734; X64-NOBMI2-NEXT:    movq %rsi, %rcx
735; X64-NOBMI2-NEXT:    movq (%rdi), %rax
736; X64-NOBMI2-NEXT:    shlq %cl, %rax
737; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
738; X64-NOBMI2-NEXT:    shrq %cl, %rax
739; X64-NOBMI2-NEXT:    retq
740;
741; X64-BMI2-LABEL: clear_highbits64_c2_load:
742; X64-BMI2:       # %bb.0:
743; X64-BMI2-NEXT:    movl $64, %eax
744; X64-BMI2-NEXT:    subl %esi, %eax
745; X64-BMI2-NEXT:    bzhiq %rax, (%rdi), %rax
746; X64-BMI2-NEXT:    retq
747  %val = load i64, ptr %w
748  %mask = lshr i64 -1, %numhighbits
749  %masked = and i64 %mask, %val
750  ret i64 %masked
751}
752
753define i64 @clear_highbits64_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind {
754; X86-BASELINE-LABEL: clear_highbits64_c3_load_indexzext:
755; X86-BASELINE:       # %bb.0:
756; X86-BASELINE-NEXT:    pushl %edi
757; X86-BASELINE-NEXT:    pushl %esi
758; X86-BASELINE-NEXT:    movl {{[0-9]+}}(%esp), %esi
759; X86-BASELINE-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
760; X86-BASELINE-NEXT:    movl $-1, %eax
761; X86-BASELINE-NEXT:    movl $-1, %edi
762; X86-BASELINE-NEXT:    shrl %cl, %edi
763; X86-BASELINE-NEXT:    xorl %edx, %edx
764; X86-BASELINE-NEXT:    testb $32, %cl
765; X86-BASELINE-NEXT:    jne .LBB16_1
766; X86-BASELINE-NEXT:  # %bb.2:
767; X86-BASELINE-NEXT:    movl %edi, %edx
768; X86-BASELINE-NEXT:    jmp .LBB16_3
769; X86-BASELINE-NEXT:  .LBB16_1:
770; X86-BASELINE-NEXT:    movl %edi, %eax
771; X86-BASELINE-NEXT:  .LBB16_3:
772; X86-BASELINE-NEXT:    andl (%esi), %eax
773; X86-BASELINE-NEXT:    andl 4(%esi), %edx
774; X86-BASELINE-NEXT:    popl %esi
775; X86-BASELINE-NEXT:    popl %edi
776; X86-BASELINE-NEXT:    retl
777;
778; X86-BMI1-LABEL: clear_highbits64_c3_load_indexzext:
779; X86-BMI1:       # %bb.0:
780; X86-BMI1-NEXT:    pushl %edi
781; X86-BMI1-NEXT:    pushl %esi
782; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
783; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
784; X86-BMI1-NEXT:    movl $-1, %edi
785; X86-BMI1-NEXT:    movl $-1, %eax
786; X86-BMI1-NEXT:    shrl %cl, %eax
787; X86-BMI1-NEXT:    xorl %edx, %edx
788; X86-BMI1-NEXT:    testb $32, %cl
789; X86-BMI1-NEXT:    cmovel %eax, %edx
790; X86-BMI1-NEXT:    cmovel %edi, %eax
791; X86-BMI1-NEXT:    andl (%esi), %eax
792; X86-BMI1-NEXT:    andl 4(%esi), %edx
793; X86-BMI1-NEXT:    popl %esi
794; X86-BMI1-NEXT:    popl %edi
795; X86-BMI1-NEXT:    retl
796;
797; X86-BMI2-LABEL: clear_highbits64_c3_load_indexzext:
798; X86-BMI2:       # %bb.0:
799; X86-BMI2-NEXT:    pushl %ebx
800; X86-BMI2-NEXT:    pushl %esi
801; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
802; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
803; X86-BMI2-NEXT:    movl $-1, %eax
804; X86-BMI2-NEXT:    shrxl %ebx, %eax, %esi
805; X86-BMI2-NEXT:    xorl %edx, %edx
806; X86-BMI2-NEXT:    testb $32, %bl
807; X86-BMI2-NEXT:    cmovel %esi, %edx
808; X86-BMI2-NEXT:    cmovnel %esi, %eax
809; X86-BMI2-NEXT:    andl (%ecx), %eax
810; X86-BMI2-NEXT:    andl 4(%ecx), %edx
811; X86-BMI2-NEXT:    popl %esi
812; X86-BMI2-NEXT:    popl %ebx
813; X86-BMI2-NEXT:    retl
814;
815; X64-NOBMI2-LABEL: clear_highbits64_c3_load_indexzext:
816; X64-NOBMI2:       # %bb.0:
817; X64-NOBMI2-NEXT:    movl %esi, %ecx
818; X64-NOBMI2-NEXT:    movq (%rdi), %rax
819; X64-NOBMI2-NEXT:    shlq %cl, %rax
820; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
821; X64-NOBMI2-NEXT:    shrq %cl, %rax
822; X64-NOBMI2-NEXT:    retq
823;
824; X64-BMI2-LABEL: clear_highbits64_c3_load_indexzext:
825; X64-BMI2:       # %bb.0:
826; X64-BMI2-NEXT:    movl $64, %eax
827; X64-BMI2-NEXT:    subl %esi, %eax
828; X64-BMI2-NEXT:    bzhiq %rax, (%rdi), %rax
829; X64-BMI2-NEXT:    retq
830  %val = load i64, ptr %w
831  %sh_prom = zext i8 %numhighbits to i64
832  %mask = lshr i64 -1, %sh_prom
833  %masked = and i64 %mask, %val
834  ret i64 %masked
835}
836
837define i64 @clear_highbits64_c4_commutative(i64 %val, i64 %numhighbits) nounwind {
838; X86-BASELINE-LABEL: clear_highbits64_c4_commutative:
839; X86-BASELINE:       # %bb.0:
840; X86-BASELINE-NEXT:    pushl %esi
841; X86-BASELINE-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
842; X86-BASELINE-NEXT:    movl $-1, %eax
843; X86-BASELINE-NEXT:    movl $-1, %esi
844; X86-BASELINE-NEXT:    shrl %cl, %esi
845; X86-BASELINE-NEXT:    xorl %edx, %edx
846; X86-BASELINE-NEXT:    testb $32, %cl
847; X86-BASELINE-NEXT:    jne .LBB17_1
848; X86-BASELINE-NEXT:  # %bb.2:
849; X86-BASELINE-NEXT:    movl %esi, %edx
850; X86-BASELINE-NEXT:    jmp .LBB17_3
851; X86-BASELINE-NEXT:  .LBB17_1:
852; X86-BASELINE-NEXT:    movl %esi, %eax
853; X86-BASELINE-NEXT:  .LBB17_3:
854; X86-BASELINE-NEXT:    andl {{[0-9]+}}(%esp), %eax
855; X86-BASELINE-NEXT:    andl {{[0-9]+}}(%esp), %edx
856; X86-BASELINE-NEXT:    popl %esi
857; X86-BASELINE-NEXT:    retl
858;
859; X86-BMI1-LABEL: clear_highbits64_c4_commutative:
860; X86-BMI1:       # %bb.0:
861; X86-BMI1-NEXT:    pushl %esi
862; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
863; X86-BMI1-NEXT:    movl $-1, %esi
864; X86-BMI1-NEXT:    movl $-1, %eax
865; X86-BMI1-NEXT:    shrl %cl, %eax
866; X86-BMI1-NEXT:    xorl %edx, %edx
867; X86-BMI1-NEXT:    testb $32, %cl
868; X86-BMI1-NEXT:    cmovel %eax, %edx
869; X86-BMI1-NEXT:    cmovel %esi, %eax
870; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
871; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
872; X86-BMI1-NEXT:    popl %esi
873; X86-BMI1-NEXT:    retl
874;
875; X86-BMI2-LABEL: clear_highbits64_c4_commutative:
876; X86-BMI2:       # %bb.0:
877; X86-BMI2-NEXT:    pushl %esi
878; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
879; X86-BMI2-NEXT:    movl $-1, %eax
880; X86-BMI2-NEXT:    shrxl %ecx, %eax, %esi
881; X86-BMI2-NEXT:    xorl %edx, %edx
882; X86-BMI2-NEXT:    testb $32, %cl
883; X86-BMI2-NEXT:    cmovel %esi, %edx
884; X86-BMI2-NEXT:    cmovnel %esi, %eax
885; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
886; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
887; X86-BMI2-NEXT:    popl %esi
888; X86-BMI2-NEXT:    retl
889;
890; X64-NOBMI2-LABEL: clear_highbits64_c4_commutative:
891; X64-NOBMI2:       # %bb.0:
892; X64-NOBMI2-NEXT:    movq %rsi, %rcx
893; X64-NOBMI2-NEXT:    movq %rdi, %rax
894; X64-NOBMI2-NEXT:    shlq %cl, %rax
895; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
896; X64-NOBMI2-NEXT:    shrq %cl, %rax
897; X64-NOBMI2-NEXT:    retq
898;
899; X64-BMI2-LABEL: clear_highbits64_c4_commutative:
900; X64-BMI2:       # %bb.0:
901; X64-BMI2-NEXT:    movl $64, %eax
902; X64-BMI2-NEXT:    subl %esi, %eax
903; X64-BMI2-NEXT:    bzhiq %rax, %rdi, %rax
904; X64-BMI2-NEXT:    retq
905  %mask = lshr i64 -1, %numhighbits
906  %masked = and i64 %val, %mask ; swapped order
907  ret i64 %masked
908}
909
910; ---------------------------------------------------------------------------- ;
911; Multi-use tests
912; ---------------------------------------------------------------------------- ;
913
914define i32 @oneuse32_c(i32 %val, i32 %numhighbits, ptr %escape) nounwind {
915; X86-NOBMI2-LABEL: oneuse32_c:
916; X86-NOBMI2:       # %bb.0:
917; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
918; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
919; X86-NOBMI2-NEXT:    movl $-1, %eax
920; X86-NOBMI2-NEXT:    shrl %cl, %eax
921; X86-NOBMI2-NEXT:    movl %eax, (%edx)
922; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
923; X86-NOBMI2-NEXT:    retl
924;
925; X86-BMI2-LABEL: oneuse32_c:
926; X86-BMI2:       # %bb.0:
927; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
928; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
929; X86-BMI2-NEXT:    movl $-1, %edx
930; X86-BMI2-NEXT:    shrxl %eax, %edx, %eax
931; X86-BMI2-NEXT:    movl %eax, (%ecx)
932; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
933; X86-BMI2-NEXT:    retl
934;
935; X64-NOBMI2-LABEL: oneuse32_c:
936; X64-NOBMI2:       # %bb.0:
937; X64-NOBMI2-NEXT:    movl %esi, %ecx
938; X64-NOBMI2-NEXT:    movl $-1, %eax
939; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
940; X64-NOBMI2-NEXT:    shrl %cl, %eax
941; X64-NOBMI2-NEXT:    movl %eax, (%rdx)
942; X64-NOBMI2-NEXT:    andl %edi, %eax
943; X64-NOBMI2-NEXT:    retq
944;
945; X64-BMI2-LABEL: oneuse32_c:
946; X64-BMI2:       # %bb.0:
947; X64-BMI2-NEXT:    movl $-1, %eax
948; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
949; X64-BMI2-NEXT:    movl %eax, (%rdx)
950; X64-BMI2-NEXT:    andl %edi, %eax
951; X64-BMI2-NEXT:    retq
952  %mask = lshr i32 -1, %numhighbits
953  store i32 %mask, ptr %escape
954  %masked = and i32 %mask, %val
955  ret i32 %masked
956}
957
958define i64 @oneuse64_c(i64 %val, i64 %numhighbits, ptr %escape) nounwind {
959; X86-BASELINE-LABEL: oneuse64_c:
960; X86-BASELINE:       # %bb.0:
961; X86-BASELINE-NEXT:    pushl %esi
962; X86-BASELINE-NEXT:    movl {{[0-9]+}}(%esp), %esi
963; X86-BASELINE-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
964; X86-BASELINE-NEXT:    movl $-1, %eax
965; X86-BASELINE-NEXT:    movl $-1, %edx
966; X86-BASELINE-NEXT:    shrl %cl, %edx
967; X86-BASELINE-NEXT:    testb $32, %cl
968; X86-BASELINE-NEXT:    je .LBB19_2
969; X86-BASELINE-NEXT:  # %bb.1:
970; X86-BASELINE-NEXT:    movl %edx, %eax
971; X86-BASELINE-NEXT:    xorl %edx, %edx
972; X86-BASELINE-NEXT:  .LBB19_2:
973; X86-BASELINE-NEXT:    movl %edx, 4(%esi)
974; X86-BASELINE-NEXT:    movl %eax, (%esi)
975; X86-BASELINE-NEXT:    andl {{[0-9]+}}(%esp), %eax
976; X86-BASELINE-NEXT:    andl {{[0-9]+}}(%esp), %edx
977; X86-BASELINE-NEXT:    popl %esi
978; X86-BASELINE-NEXT:    retl
979;
980; X86-BMI1-LABEL: oneuse64_c:
981; X86-BMI1:       # %bb.0:
982; X86-BMI1-NEXT:    pushl %edi
983; X86-BMI1-NEXT:    pushl %esi
984; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
985; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
986; X86-BMI1-NEXT:    movl $-1, %eax
987; X86-BMI1-NEXT:    movl $-1, %edi
988; X86-BMI1-NEXT:    shrl %cl, %edi
989; X86-BMI1-NEXT:    xorl %edx, %edx
990; X86-BMI1-NEXT:    testb $32, %cl
991; X86-BMI1-NEXT:    cmovnel %edi, %eax
992; X86-BMI1-NEXT:    cmovel %edi, %edx
993; X86-BMI1-NEXT:    movl %edx, 4(%esi)
994; X86-BMI1-NEXT:    movl %eax, (%esi)
995; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
996; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
997; X86-BMI1-NEXT:    popl %esi
998; X86-BMI1-NEXT:    popl %edi
999; X86-BMI1-NEXT:    retl
1000;
1001; X86-BMI2-LABEL: oneuse64_c:
1002; X86-BMI2:       # %bb.0:
1003; X86-BMI2-NEXT:    pushl %ebx
1004; X86-BMI2-NEXT:    pushl %esi
1005; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1006; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
1007; X86-BMI2-NEXT:    movl $-1, %eax
1008; X86-BMI2-NEXT:    shrxl %ebx, %eax, %esi
1009; X86-BMI2-NEXT:    xorl %edx, %edx
1010; X86-BMI2-NEXT:    testb $32, %bl
1011; X86-BMI2-NEXT:    cmovnel %esi, %eax
1012; X86-BMI2-NEXT:    cmovel %esi, %edx
1013; X86-BMI2-NEXT:    movl %edx, 4(%ecx)
1014; X86-BMI2-NEXT:    movl %eax, (%ecx)
1015; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
1016; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
1017; X86-BMI2-NEXT:    popl %esi
1018; X86-BMI2-NEXT:    popl %ebx
1019; X86-BMI2-NEXT:    retl
1020;
1021; X64-NOBMI2-LABEL: oneuse64_c:
1022; X64-NOBMI2:       # %bb.0:
1023; X64-NOBMI2-NEXT:    movq %rsi, %rcx
1024; X64-NOBMI2-NEXT:    movq $-1, %rax
1025; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
1026; X64-NOBMI2-NEXT:    shrq %cl, %rax
1027; X64-NOBMI2-NEXT:    movq %rax, (%rdx)
1028; X64-NOBMI2-NEXT:    andq %rdi, %rax
1029; X64-NOBMI2-NEXT:    retq
1030;
1031; X64-BMI2-LABEL: oneuse64_c:
1032; X64-BMI2:       # %bb.0:
1033; X64-BMI2-NEXT:    movq $-1, %rax
1034; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
1035; X64-BMI2-NEXT:    movq %rax, (%rdx)
1036; X64-BMI2-NEXT:    andq %rdi, %rax
1037; X64-BMI2-NEXT:    retq
1038  %mask = lshr i64 -1, %numhighbits
1039  store i64 %mask, ptr %escape
1040  %masked = and i64 %mask, %val
1041  ret i64 %masked
1042}
1043
1044define i32 @oneuse32_d(i32 %val, i32 %numhighbits, ptr %escape) nounwind {
1045; X86-NOBMI2-LABEL: oneuse32_d:
1046; X86-NOBMI2:       # %bb.0:
1047; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1048; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1049; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1050; X86-NOBMI2-NEXT:    shll %cl, %eax
1051; X86-NOBMI2-NEXT:    movl %eax, (%edx)
1052; X86-NOBMI2-NEXT:    shrl %cl, %eax
1053; X86-NOBMI2-NEXT:    retl
1054;
1055; X86-BMI2-LABEL: oneuse32_d:
1056; X86-BMI2:       # %bb.0:
1057; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1058; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1059; X86-BMI2-NEXT:    shlxl %ecx, {{[0-9]+}}(%esp), %edx
1060; X86-BMI2-NEXT:    movl %edx, (%eax)
1061; X86-BMI2-NEXT:    shrxl %ecx, %edx, %eax
1062; X86-BMI2-NEXT:    retl
1063;
1064; X64-NOBMI2-LABEL: oneuse32_d:
1065; X64-NOBMI2:       # %bb.0:
1066; X64-NOBMI2-NEXT:    movl %esi, %ecx
1067; X64-NOBMI2-NEXT:    movl %edi, %eax
1068; X64-NOBMI2-NEXT:    shll %cl, %eax
1069; X64-NOBMI2-NEXT:    movl %eax, (%rdx)
1070; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
1071; X64-NOBMI2-NEXT:    shrl %cl, %eax
1072; X64-NOBMI2-NEXT:    retq
1073;
1074; X64-BMI2-LABEL: oneuse32_d:
1075; X64-BMI2:       # %bb.0:
1076; X64-BMI2-NEXT:    shlxl %esi, %edi, %eax
1077; X64-BMI2-NEXT:    movl %eax, (%rdx)
1078; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
1079; X64-BMI2-NEXT:    retq
1080  %sh1 = shl i32 %val, %numhighbits
1081  store i32 %sh1, ptr %escape
1082  %masked = lshr i32 %sh1, %numhighbits
1083  ret i32 %masked
1084}
1085
1086define i64 @oneusei64_d(i64 %val, i64 %numhighbits, ptr %escape) nounwind {
1087; X86-BASELINE-LABEL: oneusei64_d:
1088; X86-BASELINE:       # %bb.0:
1089; X86-BASELINE-NEXT:    pushl %ebx
1090; X86-BASELINE-NEXT:    pushl %edi
1091; X86-BASELINE-NEXT:    pushl %esi
1092; X86-BASELINE-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1093; X86-BASELINE-NEXT:    movl {{[0-9]+}}(%esp), %edx
1094; X86-BASELINE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1095; X86-BASELINE-NEXT:    movl %edx, %edi
1096; X86-BASELINE-NEXT:    shll %cl, %edi
1097; X86-BASELINE-NEXT:    shldl %cl, %edx, %eax
1098; X86-BASELINE-NEXT:    testb $32, %cl
1099; X86-BASELINE-NEXT:    movl %edi, %esi
1100; X86-BASELINE-NEXT:    jne .LBB21_2
1101; X86-BASELINE-NEXT:  # %bb.1:
1102; X86-BASELINE-NEXT:    movl %eax, %esi
1103; X86-BASELINE-NEXT:  .LBB21_2:
1104; X86-BASELINE-NEXT:    movl %esi, %eax
1105; X86-BASELINE-NEXT:    shrl %cl, %eax
1106; X86-BASELINE-NEXT:    xorl %ebx, %ebx
1107; X86-BASELINE-NEXT:    testb $32, %cl
1108; X86-BASELINE-NEXT:    movl $0, %edx
1109; X86-BASELINE-NEXT:    jne .LBB21_4
1110; X86-BASELINE-NEXT:  # %bb.3:
1111; X86-BASELINE-NEXT:    movl %edi, %ebx
1112; X86-BASELINE-NEXT:    movl %eax, %edx
1113; X86-BASELINE-NEXT:  .LBB21_4:
1114; X86-BASELINE-NEXT:    movl %ebx, %edi
1115; X86-BASELINE-NEXT:    shrdl %cl, %esi, %edi
1116; X86-BASELINE-NEXT:    testb $32, %cl
1117; X86-BASELINE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1118; X86-BASELINE-NEXT:    movl %ebx, (%ecx)
1119; X86-BASELINE-NEXT:    movl %esi, 4(%ecx)
1120; X86-BASELINE-NEXT:    jne .LBB21_6
1121; X86-BASELINE-NEXT:  # %bb.5:
1122; X86-BASELINE-NEXT:    movl %edi, %eax
1123; X86-BASELINE-NEXT:  .LBB21_6:
1124; X86-BASELINE-NEXT:    popl %esi
1125; X86-BASELINE-NEXT:    popl %edi
1126; X86-BASELINE-NEXT:    popl %ebx
1127; X86-BASELINE-NEXT:    retl
1128;
1129; X86-BMI1-LABEL: oneusei64_d:
1130; X86-BMI1:       # %bb.0:
1131; X86-BMI1-NEXT:    pushl %ebx
1132; X86-BMI1-NEXT:    pushl %edi
1133; X86-BMI1-NEXT:    pushl %esi
1134; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1135; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
1136; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
1137; X86-BMI1-NEXT:    movl %edx, %eax
1138; X86-BMI1-NEXT:    shll %cl, %eax
1139; X86-BMI1-NEXT:    shldl %cl, %edx, %esi
1140; X86-BMI1-NEXT:    testb $32, %cl
1141; X86-BMI1-NEXT:    cmovnel %eax, %esi
1142; X86-BMI1-NEXT:    movl %esi, %edi
1143; X86-BMI1-NEXT:    shrl %cl, %edi
1144; X86-BMI1-NEXT:    xorl %edx, %edx
1145; X86-BMI1-NEXT:    testb $32, %cl
1146; X86-BMI1-NEXT:    cmovnel %edx, %eax
1147; X86-BMI1-NEXT:    cmovel %edi, %edx
1148; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %ebx
1149; X86-BMI1-NEXT:    movl %eax, (%ebx)
1150; X86-BMI1-NEXT:    shrdl %cl, %esi, %eax
1151; X86-BMI1-NEXT:    testb $32, %cl
1152; X86-BMI1-NEXT:    movl %esi, 4(%ebx)
1153; X86-BMI1-NEXT:    cmovnel %edi, %eax
1154; X86-BMI1-NEXT:    popl %esi
1155; X86-BMI1-NEXT:    popl %edi
1156; X86-BMI1-NEXT:    popl %ebx
1157; X86-BMI1-NEXT:    retl
1158;
1159; X86-BMI2-LABEL: oneusei64_d:
1160; X86-BMI2:       # %bb.0:
1161; X86-BMI2-NEXT:    pushl %ebx
1162; X86-BMI2-NEXT:    pushl %edi
1163; X86-BMI2-NEXT:    pushl %esi
1164; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1165; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1166; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
1167; X86-BMI2-NEXT:    shldl %cl, %eax, %esi
1168; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
1169; X86-BMI2-NEXT:    xorl %edx, %edx
1170; X86-BMI2-NEXT:    testb $32, %cl
1171; X86-BMI2-NEXT:    cmovnel %eax, %esi
1172; X86-BMI2-NEXT:    cmovnel %edx, %eax
1173; X86-BMI2-NEXT:    shrxl %ecx, %esi, %edi
1174; X86-BMI2-NEXT:    cmovel %edi, %edx
1175; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ebx
1176; X86-BMI2-NEXT:    movl %eax, (%ebx)
1177; X86-BMI2-NEXT:    shrdl %cl, %esi, %eax
1178; X86-BMI2-NEXT:    testb $32, %cl
1179; X86-BMI2-NEXT:    movl %esi, 4(%ebx)
1180; X86-BMI2-NEXT:    cmovnel %edi, %eax
1181; X86-BMI2-NEXT:    popl %esi
1182; X86-BMI2-NEXT:    popl %edi
1183; X86-BMI2-NEXT:    popl %ebx
1184; X86-BMI2-NEXT:    retl
1185;
1186; X64-NOBMI2-LABEL: oneusei64_d:
1187; X64-NOBMI2:       # %bb.0:
1188; X64-NOBMI2-NEXT:    movq %rsi, %rcx
1189; X64-NOBMI2-NEXT:    movq %rdi, %rax
1190; X64-NOBMI2-NEXT:    shlq %cl, %rax
1191; X64-NOBMI2-NEXT:    movq %rax, (%rdx)
1192; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
1193; X64-NOBMI2-NEXT:    shrq %cl, %rax
1194; X64-NOBMI2-NEXT:    retq
1195;
1196; X64-BMI2-LABEL: oneusei64_d:
1197; X64-BMI2:       # %bb.0:
1198; X64-BMI2-NEXT:    shlxq %rsi, %rdi, %rax
1199; X64-BMI2-NEXT:    movq %rax, (%rdx)
1200; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
1201; X64-BMI2-NEXT:    retq
1202  %sh1 = shl i64 %val, %numhighbits
1203  store i64 %sh1, ptr %escape
1204  %masked = lshr i64 %sh1, %numhighbits
1205  ret i64 %masked
1206}
1207
1208; ---------------------------------------------------------------------------- ;
1209; Misc.
1210;
1211; Variation of pattern
1212;   c) x &  (-1 >> (C - y))
1213; but with C != bitwidth(x)
1214; ---------------------------------------------------------------------------- ;
1215
1216define i32 @clear_highbits32_16(i32 %val, i32 %numlowbits) nounwind {
1217; X86-NOBMI2-LABEL: clear_highbits32_16:
1218; X86-NOBMI2:       # %bb.0:
1219; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1220; X86-NOBMI2-NEXT:    movb $16, %cl
1221; X86-NOBMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
1222; X86-NOBMI2-NEXT:    shll %cl, %eax
1223; X86-NOBMI2-NEXT:    shrl %cl, %eax
1224; X86-NOBMI2-NEXT:    retl
1225;
1226; X86-BMI2-LABEL: clear_highbits32_16:
1227; X86-BMI2:       # %bb.0:
1228; X86-BMI2-NEXT:    movb $16, %al
1229; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
1230; X86-BMI2-NEXT:    movl $32, %ecx
1231; X86-BMI2-NEXT:    subl %eax, %ecx
1232; X86-BMI2-NEXT:    bzhil %ecx, {{[0-9]+}}(%esp), %eax
1233; X86-BMI2-NEXT:    retl
1234;
1235; X64-NOBMI2-LABEL: clear_highbits32_16:
1236; X64-NOBMI2:       # %bb.0:
1237; X64-NOBMI2-NEXT:    movl %edi, %eax
1238; X64-NOBMI2-NEXT:    movb $16, %cl
1239; X64-NOBMI2-NEXT:    subb %sil, %cl
1240; X64-NOBMI2-NEXT:    shll %cl, %eax
1241; X64-NOBMI2-NEXT:    shrl %cl, %eax
1242; X64-NOBMI2-NEXT:    retq
1243;
1244; X64-BMI2-LABEL: clear_highbits32_16:
1245; X64-BMI2:       # %bb.0:
1246; X64-BMI2-NEXT:    movb $16, %al
1247; X64-BMI2-NEXT:    subb %sil, %al
1248; X64-BMI2-NEXT:    movl $32, %ecx
1249; X64-BMI2-NEXT:    subl %eax, %ecx
1250; X64-BMI2-NEXT:    bzhil %ecx, %edi, %eax
1251; X64-BMI2-NEXT:    retq
1252  %numhighbits = sub i32 16, %numlowbits
1253  %mask = lshr i32 -1, %numhighbits
1254  %masked = and i32 %mask, %val
1255  ret i32 %masked
1256}
1257define i32 @clear_highbits32_48(i32 %val, i32 %numlowbits) nounwind {
1258; X86-NOBMI2-LABEL: clear_highbits32_48:
1259; X86-NOBMI2:       # %bb.0:
1260; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1261; X86-NOBMI2-NEXT:    movb $48, %cl
1262; X86-NOBMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
1263; X86-NOBMI2-NEXT:    shll %cl, %eax
1264; X86-NOBMI2-NEXT:    shrl %cl, %eax
1265; X86-NOBMI2-NEXT:    retl
1266;
1267; X86-BMI2-LABEL: clear_highbits32_48:
1268; X86-BMI2:       # %bb.0:
1269; X86-BMI2-NEXT:    movb $48, %al
1270; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
1271; X86-BMI2-NEXT:    movl $32, %ecx
1272; X86-BMI2-NEXT:    subl %eax, %ecx
1273; X86-BMI2-NEXT:    bzhil %ecx, {{[0-9]+}}(%esp), %eax
1274; X86-BMI2-NEXT:    retl
1275;
1276; X64-NOBMI2-LABEL: clear_highbits32_48:
1277; X64-NOBMI2:       # %bb.0:
1278; X64-NOBMI2-NEXT:    movl %edi, %eax
1279; X64-NOBMI2-NEXT:    movb $48, %cl
1280; X64-NOBMI2-NEXT:    subb %sil, %cl
1281; X64-NOBMI2-NEXT:    shll %cl, %eax
1282; X64-NOBMI2-NEXT:    shrl %cl, %eax
1283; X64-NOBMI2-NEXT:    retq
1284;
1285; X64-BMI2-LABEL: clear_highbits32_48:
1286; X64-BMI2:       # %bb.0:
1287; X64-BMI2-NEXT:    movb $48, %al
1288; X64-BMI2-NEXT:    subb %sil, %al
1289; X64-BMI2-NEXT:    movl $32, %ecx
1290; X64-BMI2-NEXT:    subl %eax, %ecx
1291; X64-BMI2-NEXT:    bzhil %ecx, %edi, %eax
1292; X64-BMI2-NEXT:    retq
1293  %numhighbits = sub i32 48, %numlowbits
1294  %mask = lshr i32 -1, %numhighbits
1295  %masked = and i32 %mask, %val
1296  ret i32 %masked
1297}
1298
1299define i32 @clear_highbits32_16_extrause(i32 %val, i32 %numlowbits, ptr %escape) nounwind {
1300; X86-NOBMI2-LABEL: clear_highbits32_16_extrause:
1301; X86-NOBMI2:       # %bb.0:
1302; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1303; X86-NOBMI2-NEXT:    movb $16, %cl
1304; X86-NOBMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
1305; X86-NOBMI2-NEXT:    movl $-1, %eax
1306; X86-NOBMI2-NEXT:    shrl %cl, %eax
1307; X86-NOBMI2-NEXT:    movl %eax, (%edx)
1308; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
1309; X86-NOBMI2-NEXT:    retl
1310;
1311; X86-BMI2-LABEL: clear_highbits32_16_extrause:
1312; X86-BMI2:       # %bb.0:
1313; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1314; X86-BMI2-NEXT:    movb $16, %al
1315; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
1316; X86-BMI2-NEXT:    movl $-1, %edx
1317; X86-BMI2-NEXT:    shrxl %eax, %edx, %eax
1318; X86-BMI2-NEXT:    movl %eax, (%ecx)
1319; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
1320; X86-BMI2-NEXT:    retl
1321;
1322; X64-NOBMI2-LABEL: clear_highbits32_16_extrause:
1323; X64-NOBMI2:       # %bb.0:
1324; X64-NOBMI2-NEXT:    movb $16, %cl
1325; X64-NOBMI2-NEXT:    subb %sil, %cl
1326; X64-NOBMI2-NEXT:    movl $-1, %eax
1327; X64-NOBMI2-NEXT:    shrl %cl, %eax
1328; X64-NOBMI2-NEXT:    movl %eax, (%rdx)
1329; X64-NOBMI2-NEXT:    andl %edi, %eax
1330; X64-NOBMI2-NEXT:    retq
1331;
1332; X64-BMI2-LABEL: clear_highbits32_16_extrause:
1333; X64-BMI2:       # %bb.0:
1334; X64-BMI2-NEXT:    movb $16, %al
1335; X64-BMI2-NEXT:    subb %sil, %al
1336; X64-BMI2-NEXT:    movl $-1, %ecx
1337; X64-BMI2-NEXT:    shrxl %eax, %ecx, %eax
1338; X64-BMI2-NEXT:    movl %eax, (%rdx)
1339; X64-BMI2-NEXT:    andl %edi, %eax
1340; X64-BMI2-NEXT:    retq
1341  %numhighbits = sub i32 16, %numlowbits
1342  %mask = lshr i32 -1, %numhighbits
1343  store i32 %mask, ptr %escape
1344  %masked = and i32 %mask, %val
1345  ret i32 %masked
1346}
1347define i32 @clear_highbits32_48_extrause(i32 %val, i32 %numlowbits, ptr %escape) nounwind {
1348; X86-NOBMI2-LABEL: clear_highbits32_48_extrause:
1349; X86-NOBMI2:       # %bb.0:
1350; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1351; X86-NOBMI2-NEXT:    movb $48, %cl
1352; X86-NOBMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
1353; X86-NOBMI2-NEXT:    movl $-1, %eax
1354; X86-NOBMI2-NEXT:    shrl %cl, %eax
1355; X86-NOBMI2-NEXT:    movl %eax, (%edx)
1356; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
1357; X86-NOBMI2-NEXT:    retl
1358;
1359; X86-BMI2-LABEL: clear_highbits32_48_extrause:
1360; X86-BMI2:       # %bb.0:
1361; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1362; X86-BMI2-NEXT:    movb $48, %al
1363; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
1364; X86-BMI2-NEXT:    movl $-1, %edx
1365; X86-BMI2-NEXT:    shrxl %eax, %edx, %eax
1366; X86-BMI2-NEXT:    movl %eax, (%ecx)
1367; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
1368; X86-BMI2-NEXT:    retl
1369;
1370; X64-NOBMI2-LABEL: clear_highbits32_48_extrause:
1371; X64-NOBMI2:       # %bb.0:
1372; X64-NOBMI2-NEXT:    movb $48, %cl
1373; X64-NOBMI2-NEXT:    subb %sil, %cl
1374; X64-NOBMI2-NEXT:    movl $-1, %eax
1375; X64-NOBMI2-NEXT:    shrl %cl, %eax
1376; X64-NOBMI2-NEXT:    movl %eax, (%rdx)
1377; X64-NOBMI2-NEXT:    andl %edi, %eax
1378; X64-NOBMI2-NEXT:    retq
1379;
1380; X64-BMI2-LABEL: clear_highbits32_48_extrause:
1381; X64-BMI2:       # %bb.0:
1382; X64-BMI2-NEXT:    movb $48, %al
1383; X64-BMI2-NEXT:    subb %sil, %al
1384; X64-BMI2-NEXT:    movl $-1, %ecx
1385; X64-BMI2-NEXT:    shrxl %eax, %ecx, %eax
1386; X64-BMI2-NEXT:    movl %eax, (%rdx)
1387; X64-BMI2-NEXT:    andl %edi, %eax
1388; X64-BMI2-NEXT:    retq
1389  %numhighbits = sub i32 48, %numlowbits
1390  %mask = lshr i32 -1, %numhighbits
1391  store i32 %mask, ptr %escape
1392  %masked = and i32 %mask, %val
1393  ret i32 %masked
1394}
1395