xref: /llvm-project/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll (revision 11276563c81987791a2326950dbc3315a32dd709)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i386-pc-win32       -mattr=+avx512bw  | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-win32        -mattr=+avx512bw  | FileCheck %s --check-prefix=CHECK64 --check-prefix=WIN64
4; RUN: llc < %s -mtriple=x86_64-linux-gnu    -mattr=+avx512bw  | FileCheck %s --check-prefix=CHECK64 --check-prefix=LINUXOSX64
5
6; Test regcall when receiving arguments of v64i1 type
7define dso_local x86_regcallcc i64 @test_argv64i1(<64 x i1> %x0, <64 x i1> %x1, <64 x i1> %x2, <64 x i1> %x3, <64 x i1> %x4, <64 x i1> %x5, <64 x i1> %x6, <64 x i1> %x7, <64 x i1> %x8, <64 x i1> %x9, <64 x i1> %x10, <64 x i1> %x11, <64 x i1> %x12)  {
8; X32-LABEL: test_argv64i1:
9; X32:       # %bb.0:
10; X32-NEXT:    addl %edx, %eax
11; X32-NEXT:    adcl %edi, %ecx
12; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
13; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
14; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
15; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
16; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
17; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
18; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
19; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
20; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
21; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
22; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
23; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
24; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
25; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
26; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
27; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
28; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
29; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
30; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
31; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
32; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
33; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
34; X32-NEXT:    retl
35;
36; WIN64-LABEL: test_argv64i1:
37; WIN64:       # %bb.0:
38; WIN64-NEXT:    addq %rcx, %rax
39; WIN64-NEXT:    addq %rdx, %rax
40; WIN64-NEXT:    addq %rdi, %rax
41; WIN64-NEXT:    leaq (%rsi,%r8), %rcx
42; WIN64-NEXT:    addq %r9, %rcx
43; WIN64-NEXT:    addq %rcx, %rax
44; WIN64-NEXT:    leaq (%r10,%r11), %rcx
45; WIN64-NEXT:    addq %r12, %rcx
46; WIN64-NEXT:    addq %r14, %rcx
47; WIN64-NEXT:    addq %rcx, %rax
48; WIN64-NEXT:    addq %r15, %rax
49; WIN64-NEXT:    addq {{[0-9]+}}(%rsp), %rax
50; WIN64-NEXT:    retq
51;
52; LINUXOSX64-LABEL: test_argv64i1:
53; LINUXOSX64:       # %bb.0:
54; LINUXOSX64-NEXT:    addq %rcx, %rax
55; LINUXOSX64-NEXT:    addq %rdx, %rax
56; LINUXOSX64-NEXT:    addq %rdi, %rax
57; LINUXOSX64-NEXT:    leaq (%rsi,%r8), %rcx
58; LINUXOSX64-NEXT:    addq %r9, %rcx
59; LINUXOSX64-NEXT:    addq %rcx, %rax
60; LINUXOSX64-NEXT:    leaq (%r12,%r13), %rcx
61; LINUXOSX64-NEXT:    addq %r14, %rcx
62; LINUXOSX64-NEXT:    addq %r15, %rcx
63; LINUXOSX64-NEXT:    addq %rcx, %rax
64; LINUXOSX64-NEXT:    addq {{[0-9]+}}(%rsp), %rax
65; LINUXOSX64-NEXT:    addq {{[0-9]+}}(%rsp), %rax
66; LINUXOSX64-NEXT:    retq
67  %y0 = bitcast <64 x i1> %x0 to i64
68  %y1 = bitcast <64 x i1> %x1 to i64
69  %y2 = bitcast <64 x i1> %x2 to i64
70  %y3 = bitcast <64 x i1> %x3 to i64
71  %y4 = bitcast <64 x i1> %x4 to i64
72  %y5 = bitcast <64 x i1> %x5 to i64
73  %y6 = bitcast <64 x i1> %x6 to i64
74  %y7 = bitcast <64 x i1> %x7 to i64
75  %y8 = bitcast <64 x i1> %x8 to i64
76  %y9 = bitcast <64 x i1> %x9 to i64
77  %y10 = bitcast <64 x i1> %x10 to i64
78  %y11 = bitcast <64 x i1> %x11 to i64
79  %y12 = bitcast <64 x i1> %x12 to i64
80  %add1 = add i64 %y0, %y1
81  %add2 = add i64 %add1, %y2
82  %add3 = add i64 %add2, %y3
83  %add4 = add i64 %add3, %y4
84  %add5 = add i64 %add4, %y5
85  %add6 = add i64 %add5, %y6
86  %add7 = add i64 %add6, %y7
87  %add8 = add i64 %add7, %y8
88  %add9 = add i64 %add8, %y9
89  %add10 = add i64 %add9, %y10
90  %add11 = add i64 %add10, %y11
91  %add12 = add i64 %add11, %y12
92  ret i64 %add12
93}
94
95; Test regcall when passing arguments of v64i1 type
96define dso_local i64 @caller_argv64i1() #0 {
97; X32-LABEL: caller_argv64i1:
98; X32:       # %bb.0: # %entry
99; X32-NEXT:    pushl %edi
100; X32-NEXT:    subl $88, %esp
101; X32-NEXT:    vbroadcastsd {{.*#+}} zmm0 = [2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1]
102; X32-NEXT:    vmovups %xmm0, {{[0-9]+}}(%esp)
103; X32-NEXT:    vmovups %zmm0, (%esp)
104; X32-NEXT:    movl $1, {{[0-9]+}}(%esp)
105; X32-NEXT:    movl $2, {{[0-9]+}}(%esp)
106; X32-NEXT:    movl $2, %eax
107; X32-NEXT:    movl $1, %ecx
108; X32-NEXT:    movl $2, %edx
109; X32-NEXT:    movl $1, %edi
110; X32-NEXT:    vzeroupper
111; X32-NEXT:    calll _test_argv64i1
112; X32-NEXT:    movl %ecx, %edx
113; X32-NEXT:    addl $88, %esp
114; X32-NEXT:    popl %edi
115; X32-NEXT:    retl
116;
117; WIN64-LABEL: caller_argv64i1:
118; WIN64:       # %bb.0: # %entry
119; WIN64-NEXT:    pushq %r15
120; WIN64-NEXT:    .seh_pushreg %r15
121; WIN64-NEXT:    pushq %r14
122; WIN64-NEXT:    .seh_pushreg %r14
123; WIN64-NEXT:    pushq %r12
124; WIN64-NEXT:    .seh_pushreg %r12
125; WIN64-NEXT:    pushq %rsi
126; WIN64-NEXT:    .seh_pushreg %rsi
127; WIN64-NEXT:    pushq %rdi
128; WIN64-NEXT:    .seh_pushreg %rdi
129; WIN64-NEXT:    subq $48, %rsp
130; WIN64-NEXT:    .seh_stackalloc 48
131; WIN64-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
132; WIN64-NEXT:    .seh_savexmm %xmm7, 32
133; WIN64-NEXT:    vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
134; WIN64-NEXT:    .seh_savexmm %xmm6, 16
135; WIN64-NEXT:    .seh_endprologue
136; WIN64-NEXT:    movabsq $4294967298, %rax # imm = 0x100000002
137; WIN64-NEXT:    movq %rax, (%rsp)
138; WIN64-NEXT:    movq %rax, %rcx
139; WIN64-NEXT:    movq %rax, %rdx
140; WIN64-NEXT:    movq %rax, %rdi
141; WIN64-NEXT:    movq %rax, %r8
142; WIN64-NEXT:    movq %rax, %r9
143; WIN64-NEXT:    movq %rax, %r10
144; WIN64-NEXT:    movq %rax, %r11
145; WIN64-NEXT:    movq %rax, %r12
146; WIN64-NEXT:    movq %rax, %r14
147; WIN64-NEXT:    movq %rax, %r15
148; WIN64-NEXT:    movq %rax, %rsi
149; WIN64-NEXT:    callq test_argv64i1
150; WIN64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
151; WIN64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
152; WIN64-NEXT:    addq $48, %rsp
153; WIN64-NEXT:    popq %rdi
154; WIN64-NEXT:    popq %rsi
155; WIN64-NEXT:    popq %r12
156; WIN64-NEXT:    popq %r14
157; WIN64-NEXT:    popq %r15
158; WIN64-NEXT:    retq
159; WIN64-NEXT:    .seh_endproc
160;
161; LINUXOSX64-LABEL: caller_argv64i1:
162; LINUXOSX64:       # %bb.0: # %entry
163; LINUXOSX64-NEXT:    pushq %r15
164; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
165; LINUXOSX64-NEXT:    pushq %r14
166; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 24
167; LINUXOSX64-NEXT:    pushq %r13
168; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 32
169; LINUXOSX64-NEXT:    pushq %r12
170; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 40
171; LINUXOSX64-NEXT:    pushq %rax
172; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 48
173; LINUXOSX64-NEXT:    .cfi_offset %r12, -40
174; LINUXOSX64-NEXT:    .cfi_offset %r13, -32
175; LINUXOSX64-NEXT:    .cfi_offset %r14, -24
176; LINUXOSX64-NEXT:    .cfi_offset %r15, -16
177; LINUXOSX64-NEXT:    movabsq $4294967298, %rax # imm = 0x100000002
178; LINUXOSX64-NEXT:    movq %rax, %rcx
179; LINUXOSX64-NEXT:    movq %rax, %rdx
180; LINUXOSX64-NEXT:    movq %rax, %rdi
181; LINUXOSX64-NEXT:    movq %rax, %r8
182; LINUXOSX64-NEXT:    movq %rax, %r9
183; LINUXOSX64-NEXT:    movq %rax, %r12
184; LINUXOSX64-NEXT:    movq %rax, %r13
185; LINUXOSX64-NEXT:    movq %rax, %r14
186; LINUXOSX64-NEXT:    movq %rax, %r15
187; LINUXOSX64-NEXT:    movq %rax, %rsi
188; LINUXOSX64-NEXT:    pushq %rax
189; LINUXOSX64-NEXT:    .cfi_adjust_cfa_offset 8
190; LINUXOSX64-NEXT:    pushq %rax
191; LINUXOSX64-NEXT:    .cfi_adjust_cfa_offset 8
192; LINUXOSX64-NEXT:    callq test_argv64i1
193; LINUXOSX64-NEXT:    addq $24, %rsp
194; LINUXOSX64-NEXT:    .cfi_adjust_cfa_offset -24
195; LINUXOSX64-NEXT:    popq %r12
196; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 32
197; LINUXOSX64-NEXT:    popq %r13
198; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 24
199; LINUXOSX64-NEXT:    popq %r14
200; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
201; LINUXOSX64-NEXT:    popq %r15
202; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
203; LINUXOSX64-NEXT:    retq
204entry:
205  %v0 = bitcast i64 4294967298 to <64 x i1>
206  %call = call x86_regcallcc i64 @test_argv64i1(<64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
207                                                <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
208                                                <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
209                                                <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
210                                                <64 x i1> %v0)
211  ret i64 %call
212}
213
214; Test regcall when returning v64i1 type
215define dso_local x86_regcallcc <64 x i1> @test_retv64i1()  {
216; X32-LABEL: test_retv64i1:
217; X32:       # %bb.0:
218; X32-NEXT:    movl $2, %eax
219; X32-NEXT:    movl $1, %ecx
220; X32-NEXT:    retl
221;
222; CHECK64-LABEL: test_retv64i1:
223; CHECK64:       # %bb.0:
224; CHECK64-NEXT:    movabsq $4294967298, %rax # imm = 0x100000002
225; CHECK64-NEXT:    retq
226  %a = bitcast i64 4294967298 to <64 x i1>
227 ret <64 x i1> %a
228}
229
230; Test regcall when processing result of v64i1 type
231define dso_local <64 x i1> @caller_retv64i1() #0 {
232; X32-LABEL: caller_retv64i1:
233; X32:       # %bb.0: # %entry
234; X32-NEXT:    calll _test_retv64i1
235; X32-NEXT:    kmovd %eax, %k0
236; X32-NEXT:    kmovd %ecx, %k1
237; X32-NEXT:    kunpckdq %k0, %k1, %k0
238; X32-NEXT:    vpmovm2b %k0, %zmm0
239; X32-NEXT:    retl
240;
241; WIN64-LABEL: caller_retv64i1:
242; WIN64:       # %bb.0: # %entry
243; WIN64-NEXT:    pushq %rsi
244; WIN64-NEXT:    .seh_pushreg %rsi
245; WIN64-NEXT:    pushq %rdi
246; WIN64-NEXT:    .seh_pushreg %rdi
247; WIN64-NEXT:    subq $40, %rsp
248; WIN64-NEXT:    .seh_stackalloc 40
249; WIN64-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
250; WIN64-NEXT:    .seh_savexmm %xmm7, 16
251; WIN64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
252; WIN64-NEXT:    .seh_savexmm %xmm6, 0
253; WIN64-NEXT:    .seh_endprologue
254; WIN64-NEXT:    callq test_retv64i1
255; WIN64-NEXT:    kmovq %rax, %k0
256; WIN64-NEXT:    vpmovm2b %k0, %zmm0
257; WIN64-NEXT:    vmovaps (%rsp), %xmm6 # 16-byte Reload
258; WIN64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
259; WIN64-NEXT:    addq $40, %rsp
260; WIN64-NEXT:    popq %rdi
261; WIN64-NEXT:    popq %rsi
262; WIN64-NEXT:    retq
263; WIN64-NEXT:    .seh_endproc
264;
265; LINUXOSX64-LABEL: caller_retv64i1:
266; LINUXOSX64:       # %bb.0: # %entry
267; LINUXOSX64-NEXT:    pushq %rax
268; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
269; LINUXOSX64-NEXT:    callq test_retv64i1
270; LINUXOSX64-NEXT:    kmovq %rax, %k0
271; LINUXOSX64-NEXT:    vpmovm2b %k0, %zmm0
272; LINUXOSX64-NEXT:    popq %rax
273; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
274; LINUXOSX64-NEXT:    retq
275entry:
276  %call = call x86_regcallcc <64 x i1> @test_retv64i1()
277  ret <64 x i1> %call
278}
279
280; Test regcall when receiving arguments of v32i1 type
281declare i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2)
282define dso_local x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2)  {
283; X32-LABEL: test_argv32i1:
284; X32:       # %bb.0: # %entry
285; X32-NEXT:    subl $76, %esp
286; X32-NEXT:    vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
287; X32-NEXT:    vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
288; X32-NEXT:    vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
289; X32-NEXT:    vmovups %xmm4, (%esp) # 16-byte Spill
290; X32-NEXT:    kmovd %edx, %k0
291; X32-NEXT:    kmovd %ecx, %k1
292; X32-NEXT:    kmovd %eax, %k2
293; X32-NEXT:    vpmovm2b %k2, %zmm0
294; X32-NEXT:    vpmovm2b %k1, %zmm1
295; X32-NEXT:    vpmovm2b %k0, %zmm2
296; X32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
297; X32-NEXT:    # kill: def $ymm1 killed $ymm1 killed $zmm1
298; X32-NEXT:    # kill: def $ymm2 killed $ymm2 killed $zmm2
299; X32-NEXT:    calll _test_argv32i1helper
300; X32-NEXT:    vmovups (%esp), %xmm4 # 16-byte Reload
301; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
302; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
303; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
304; X32-NEXT:    addl $76, %esp
305; X32-NEXT:    vzeroupper
306; X32-NEXT:    retl
307;
308; WIN64-LABEL: test_argv32i1:
309; WIN64:       # %bb.0: # %entry
310; WIN64-NEXT:    pushq %rbp
311; WIN64-NEXT:    .seh_pushreg %rbp
312; WIN64-NEXT:    pushq %r11
313; WIN64-NEXT:    .seh_pushreg %r11
314; WIN64-NEXT:    pushq %r10
315; WIN64-NEXT:    .seh_pushreg %r10
316; WIN64-NEXT:    subq $128, %rsp
317; WIN64-NEXT:    .seh_stackalloc 128
318; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rbp
319; WIN64-NEXT:    .seh_setframe %rbp, 128
320; WIN64-NEXT:    .seh_endprologue
321; WIN64-NEXT:    andq $-32, %rsp
322; WIN64-NEXT:    kmovd %edx, %k0
323; WIN64-NEXT:    kmovd %eax, %k1
324; WIN64-NEXT:    kmovd %ecx, %k2
325; WIN64-NEXT:    vpmovm2b %k2, %zmm0
326; WIN64-NEXT:    vmovdqa %ymm0, {{[0-9]+}}(%rsp)
327; WIN64-NEXT:    vpmovm2b %k1, %zmm0
328; WIN64-NEXT:    vmovdqa %ymm0, {{[0-9]+}}(%rsp)
329; WIN64-NEXT:    vpmovm2b %k0, %zmm0
330; WIN64-NEXT:    vmovdqa %ymm0, {{[0-9]+}}(%rsp)
331; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
332; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
333; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
334; WIN64-NEXT:    vzeroupper
335; WIN64-NEXT:    callq test_argv32i1helper
336; WIN64-NEXT:    nop
337; WIN64-NEXT:    movq %rbp, %rsp
338; WIN64-NEXT:    popq %r10
339; WIN64-NEXT:    popq %r11
340; WIN64-NEXT:    popq %rbp
341; WIN64-NEXT:    retq
342; WIN64-NEXT:    .seh_endproc
343;
344; LINUXOSX64-LABEL: test_argv32i1:
345; LINUXOSX64:       # %bb.0: # %entry
346; LINUXOSX64-NEXT:    subq $136, %rsp
347; LINUXOSX64-NEXT:    vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
348; LINUXOSX64-NEXT:    vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
349; LINUXOSX64-NEXT:    vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
350; LINUXOSX64-NEXT:    vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
351; LINUXOSX64-NEXT:    vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
352; LINUXOSX64-NEXT:    vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
353; LINUXOSX64-NEXT:    vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
354; LINUXOSX64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
355; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 144
356; LINUXOSX64-NEXT:    .cfi_offset %xmm8, -144
357; LINUXOSX64-NEXT:    .cfi_offset %xmm9, -128
358; LINUXOSX64-NEXT:    .cfi_offset %xmm10, -112
359; LINUXOSX64-NEXT:    .cfi_offset %xmm11, -96
360; LINUXOSX64-NEXT:    .cfi_offset %xmm12, -80
361; LINUXOSX64-NEXT:    .cfi_offset %xmm13, -64
362; LINUXOSX64-NEXT:    .cfi_offset %xmm14, -48
363; LINUXOSX64-NEXT:    .cfi_offset %xmm15, -32
364; LINUXOSX64-NEXT:    kmovd %edx, %k0
365; LINUXOSX64-NEXT:    kmovd %ecx, %k1
366; LINUXOSX64-NEXT:    kmovd %eax, %k2
367; LINUXOSX64-NEXT:    vpmovm2b %k2, %zmm0
368; LINUXOSX64-NEXT:    vpmovm2b %k1, %zmm1
369; LINUXOSX64-NEXT:    vpmovm2b %k0, %zmm2
370; LINUXOSX64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
371; LINUXOSX64-NEXT:    # kill: def $ymm1 killed $ymm1 killed $zmm1
372; LINUXOSX64-NEXT:    # kill: def $ymm2 killed $ymm2 killed $zmm2
373; LINUXOSX64-NEXT:    callq test_argv32i1helper@PLT
374; LINUXOSX64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
375; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
376; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload
377; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload
378; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
379; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
380; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
381; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
382; LINUXOSX64-NEXT:    addq $136, %rsp
383; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
384; LINUXOSX64-NEXT:    vzeroupper
385; LINUXOSX64-NEXT:    retq
386entry:
387  %res = call i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2)
388  ret i32 %res
389}
390
391; Test regcall when passing arguments of v32i1 type
392define dso_local i32 @caller_argv32i1() #0 {
393; X32-LABEL: caller_argv32i1:
394; X32:       # %bb.0: # %entry
395; X32-NEXT:    movl $1, %eax
396; X32-NEXT:    movl $1, %ecx
397; X32-NEXT:    movl $1, %edx
398; X32-NEXT:    calll _test_argv32i1
399; X32-NEXT:    retl
400;
401; WIN64-LABEL: caller_argv32i1:
402; WIN64:       # %bb.0: # %entry
403; WIN64-NEXT:    pushq %rsi
404; WIN64-NEXT:    .seh_pushreg %rsi
405; WIN64-NEXT:    pushq %rdi
406; WIN64-NEXT:    .seh_pushreg %rdi
407; WIN64-NEXT:    subq $40, %rsp
408; WIN64-NEXT:    .seh_stackalloc 40
409; WIN64-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
410; WIN64-NEXT:    .seh_savexmm %xmm7, 16
411; WIN64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
412; WIN64-NEXT:    .seh_savexmm %xmm6, 0
413; WIN64-NEXT:    .seh_endprologue
414; WIN64-NEXT:    movl $1, %eax
415; WIN64-NEXT:    movl $1, %ecx
416; WIN64-NEXT:    movl $1, %edx
417; WIN64-NEXT:    callq test_argv32i1
418; WIN64-NEXT:    vmovaps (%rsp), %xmm6 # 16-byte Reload
419; WIN64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
420; WIN64-NEXT:    addq $40, %rsp
421; WIN64-NEXT:    popq %rdi
422; WIN64-NEXT:    popq %rsi
423; WIN64-NEXT:    retq
424; WIN64-NEXT:    .seh_endproc
425;
426; LINUXOSX64-LABEL: caller_argv32i1:
427; LINUXOSX64:       # %bb.0: # %entry
428; LINUXOSX64-NEXT:    pushq %rax
429; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
430; LINUXOSX64-NEXT:    movl $1, %eax
431; LINUXOSX64-NEXT:    movl $1, %ecx
432; LINUXOSX64-NEXT:    movl $1, %edx
433; LINUXOSX64-NEXT:    callq test_argv32i1
434; LINUXOSX64-NEXT:    popq %rcx
435; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
436; LINUXOSX64-NEXT:    retq
437entry:
438  %v0 = bitcast i32 1 to <32 x i1>
439  %call = call x86_regcallcc i32 @test_argv32i1(<32 x i1> %v0, <32 x i1> %v0, <32 x i1> %v0)
440  ret i32 %call
441}
442
443; Test regcall when returning v32i1 type
444define dso_local x86_regcallcc <32 x i1> @test_retv32i1()  {
445; X32-LABEL: test_retv32i1:
446; X32:       # %bb.0:
447; X32-NEXT:    movl $1, %eax
448; X32-NEXT:    retl
449;
450; CHECK64-LABEL: test_retv32i1:
451; CHECK64:       # %bb.0:
452; CHECK64-NEXT:    movl $1, %eax
453; CHECK64-NEXT:    retq
454  %a = bitcast i32 1 to <32 x i1>
455  ret <32 x i1> %a
456}
457
458; Test regcall when processing result of v32i1 type
459define dso_local i32 @caller_retv32i1() #0 {
460; X32-LABEL: caller_retv32i1:
461; X32:       # %bb.0: # %entry
462; X32-NEXT:    calll _test_retv32i1
463; X32-NEXT:    incl %eax
464; X32-NEXT:    retl
465;
466; WIN64-LABEL: caller_retv32i1:
467; WIN64:       # %bb.0: # %entry
468; WIN64-NEXT:    pushq %rsi
469; WIN64-NEXT:    .seh_pushreg %rsi
470; WIN64-NEXT:    pushq %rdi
471; WIN64-NEXT:    .seh_pushreg %rdi
472; WIN64-NEXT:    subq $40, %rsp
473; WIN64-NEXT:    .seh_stackalloc 40
474; WIN64-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
475; WIN64-NEXT:    .seh_savexmm %xmm7, 16
476; WIN64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
477; WIN64-NEXT:    .seh_savexmm %xmm6, 0
478; WIN64-NEXT:    .seh_endprologue
479; WIN64-NEXT:    callq test_retv32i1
480; WIN64-NEXT:    incl %eax
481; WIN64-NEXT:    vmovaps (%rsp), %xmm6 # 16-byte Reload
482; WIN64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
483; WIN64-NEXT:    addq $40, %rsp
484; WIN64-NEXT:    popq %rdi
485; WIN64-NEXT:    popq %rsi
486; WIN64-NEXT:    retq
487; WIN64-NEXT:    .seh_endproc
488;
489; LINUXOSX64-LABEL: caller_retv32i1:
490; LINUXOSX64:       # %bb.0: # %entry
491; LINUXOSX64-NEXT:    pushq %rax
492; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
493; LINUXOSX64-NEXT:    callq test_retv32i1
494; LINUXOSX64-NEXT:    incl %eax
495; LINUXOSX64-NEXT:    popq %rcx
496; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
497; LINUXOSX64-NEXT:    retq
498entry:
499  %call = call x86_regcallcc <32 x i1> @test_retv32i1()
500  %c = bitcast <32 x i1> %call to i32
501  %add = add i32 %c, 1
502  ret i32 %add
503}
504
505; Test regcall when receiving arguments of v16i1 type
506declare i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)
507define dso_local x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)  {
508; X32-LABEL: test_argv16i1:
509; X32:       # %bb.0:
510; X32-NEXT:    subl $76, %esp
511; X32-NEXT:    vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
512; X32-NEXT:    vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
513; X32-NEXT:    vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
514; X32-NEXT:    vmovups %xmm4, (%esp) # 16-byte Spill
515; X32-NEXT:    kmovd %edx, %k0
516; X32-NEXT:    kmovd %ecx, %k1
517; X32-NEXT:    kmovd %eax, %k2
518; X32-NEXT:    vpmovm2b %k2, %zmm0
519; X32-NEXT:    vpmovm2b %k1, %zmm1
520; X32-NEXT:    vpmovm2b %k0, %zmm2
521; X32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
522; X32-NEXT:    # kill: def $xmm1 killed $xmm1 killed $zmm1
523; X32-NEXT:    # kill: def $xmm2 killed $xmm2 killed $zmm2
524; X32-NEXT:    vzeroupper
525; X32-NEXT:    calll _test_argv16i1helper
526; X32-NEXT:    vmovups (%esp), %xmm4 # 16-byte Reload
527; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
528; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
529; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
530; X32-NEXT:    addl $76, %esp
531; X32-NEXT:    retl
532;
533; WIN64-LABEL: test_argv16i1:
534; WIN64:       # %bb.0:
535; WIN64-NEXT:    pushq %r11
536; WIN64-NEXT:    .seh_pushreg %r11
537; WIN64-NEXT:    pushq %r10
538; WIN64-NEXT:    .seh_pushreg %r10
539; WIN64-NEXT:    subq $88, %rsp
540; WIN64-NEXT:    .seh_stackalloc 88
541; WIN64-NEXT:    .seh_endprologue
542; WIN64-NEXT:    kmovd %edx, %k0
543; WIN64-NEXT:    kmovd %eax, %k1
544; WIN64-NEXT:    kmovd %ecx, %k2
545; WIN64-NEXT:    vpmovm2b %k2, %zmm0
546; WIN64-NEXT:    vmovdqa %xmm0, {{[0-9]+}}(%rsp)
547; WIN64-NEXT:    vpmovm2b %k1, %zmm0
548; WIN64-NEXT:    vmovdqa %xmm0, {{[0-9]+}}(%rsp)
549; WIN64-NEXT:    vpmovm2b %k0, %zmm0
550; WIN64-NEXT:    vmovdqa %xmm0, {{[0-9]+}}(%rsp)
551; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
552; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
553; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
554; WIN64-NEXT:    vzeroupper
555; WIN64-NEXT:    callq test_argv16i1helper
556; WIN64-NEXT:    nop
557; WIN64-NEXT:    addq $88, %rsp
558; WIN64-NEXT:    popq %r10
559; WIN64-NEXT:    popq %r11
560; WIN64-NEXT:    retq
561; WIN64-NEXT:    .seh_endproc
562;
563; LINUXOSX64-LABEL: test_argv16i1:
564; LINUXOSX64:       # %bb.0:
565; LINUXOSX64-NEXT:    subq $136, %rsp
566; LINUXOSX64-NEXT:    vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
567; LINUXOSX64-NEXT:    vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
568; LINUXOSX64-NEXT:    vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
569; LINUXOSX64-NEXT:    vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
570; LINUXOSX64-NEXT:    vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
571; LINUXOSX64-NEXT:    vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
572; LINUXOSX64-NEXT:    vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
573; LINUXOSX64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
574; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 144
575; LINUXOSX64-NEXT:    .cfi_offset %xmm8, -144
576; LINUXOSX64-NEXT:    .cfi_offset %xmm9, -128
577; LINUXOSX64-NEXT:    .cfi_offset %xmm10, -112
578; LINUXOSX64-NEXT:    .cfi_offset %xmm11, -96
579; LINUXOSX64-NEXT:    .cfi_offset %xmm12, -80
580; LINUXOSX64-NEXT:    .cfi_offset %xmm13, -64
581; LINUXOSX64-NEXT:    .cfi_offset %xmm14, -48
582; LINUXOSX64-NEXT:    .cfi_offset %xmm15, -32
583; LINUXOSX64-NEXT:    kmovd %edx, %k0
584; LINUXOSX64-NEXT:    kmovd %ecx, %k1
585; LINUXOSX64-NEXT:    kmovd %eax, %k2
586; LINUXOSX64-NEXT:    vpmovm2b %k2, %zmm0
587; LINUXOSX64-NEXT:    vpmovm2b %k1, %zmm1
588; LINUXOSX64-NEXT:    vpmovm2b %k0, %zmm2
589; LINUXOSX64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
590; LINUXOSX64-NEXT:    # kill: def $xmm1 killed $xmm1 killed $zmm1
591; LINUXOSX64-NEXT:    # kill: def $xmm2 killed $xmm2 killed $zmm2
592; LINUXOSX64-NEXT:    vzeroupper
593; LINUXOSX64-NEXT:    callq test_argv16i1helper@PLT
594; LINUXOSX64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
595; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
596; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload
597; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload
598; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
599; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
600; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
601; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
602; LINUXOSX64-NEXT:    addq $136, %rsp
603; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
604; LINUXOSX64-NEXT:    retq
605  %res = call i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)
606  ret i16 %res
607}
608
609; Test regcall when passing arguments of v16i1 type
610define dso_local i16 @caller_argv16i1() #0 {
611; X32-LABEL: caller_argv16i1:
612; X32:       # %bb.0: # %entry
613; X32-NEXT:    movl $1, %eax
614; X32-NEXT:    movl $1, %ecx
615; X32-NEXT:    movl $1, %edx
616; X32-NEXT:    calll _test_argv16i1
617; X32-NEXT:    retl
618;
619; WIN64-LABEL: caller_argv16i1:
620; WIN64:       # %bb.0: # %entry
621; WIN64-NEXT:    pushq %rsi
622; WIN64-NEXT:    .seh_pushreg %rsi
623; WIN64-NEXT:    pushq %rdi
624; WIN64-NEXT:    .seh_pushreg %rdi
625; WIN64-NEXT:    subq $40, %rsp
626; WIN64-NEXT:    .seh_stackalloc 40
627; WIN64-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
628; WIN64-NEXT:    .seh_savexmm %xmm7, 16
629; WIN64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
630; WIN64-NEXT:    .seh_savexmm %xmm6, 0
631; WIN64-NEXT:    .seh_endprologue
632; WIN64-NEXT:    movl $1, %eax
633; WIN64-NEXT:    movl $1, %ecx
634; WIN64-NEXT:    movl $1, %edx
635; WIN64-NEXT:    callq test_argv16i1
636; WIN64-NEXT:    vmovaps (%rsp), %xmm6 # 16-byte Reload
637; WIN64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
638; WIN64-NEXT:    addq $40, %rsp
639; WIN64-NEXT:    popq %rdi
640; WIN64-NEXT:    popq %rsi
641; WIN64-NEXT:    retq
642; WIN64-NEXT:    .seh_endproc
643;
644; LINUXOSX64-LABEL: caller_argv16i1:
645; LINUXOSX64:       # %bb.0: # %entry
646; LINUXOSX64-NEXT:    pushq %rax
647; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
648; LINUXOSX64-NEXT:    movl $1, %eax
649; LINUXOSX64-NEXT:    movl $1, %ecx
650; LINUXOSX64-NEXT:    movl $1, %edx
651; LINUXOSX64-NEXT:    callq test_argv16i1
652; LINUXOSX64-NEXT:    popq %rcx
653; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
654; LINUXOSX64-NEXT:    retq
655entry:
656  %v0 = bitcast i16 1 to <16 x i1>
657  %call = call x86_regcallcc i16 @test_argv16i1(<16 x i1> %v0, <16 x i1> %v0, <16 x i1> %v0)
658  ret i16 %call
659}
660
661; Test regcall when returning v16i1 type
662define dso_local x86_regcallcc <16 x i1> @test_retv16i1()  {
663; X32-LABEL: test_retv16i1:
664; X32:       # %bb.0:
665; X32-NEXT:    movw $1, %ax
666; X32-NEXT:    retl
667;
668; CHECK64-LABEL: test_retv16i1:
669; CHECK64:       # %bb.0:
670; CHECK64-NEXT:    movw $1, %ax
671; CHECK64-NEXT:    retq
672  %a = bitcast i16 1 to <16 x i1>
673  ret <16 x i1> %a
674}
675
676; Test regcall when processing result of v16i1 type
677define dso_local i16 @caller_retv16i1() #0 {
678; X32-LABEL: caller_retv16i1:
679; X32:       # %bb.0: # %entry
680; X32-NEXT:    calll _test_retv16i1
681; X32-NEXT:    # kill: def $ax killed $ax def $eax
682; X32-NEXT:    incl %eax
683; X32-NEXT:    # kill: def $ax killed $ax killed $eax
684; X32-NEXT:    retl
685;
686; WIN64-LABEL: caller_retv16i1:
687; WIN64:       # %bb.0: # %entry
688; WIN64-NEXT:    pushq %rsi
689; WIN64-NEXT:    .seh_pushreg %rsi
690; WIN64-NEXT:    pushq %rdi
691; WIN64-NEXT:    .seh_pushreg %rdi
692; WIN64-NEXT:    subq $40, %rsp
693; WIN64-NEXT:    .seh_stackalloc 40
694; WIN64-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
695; WIN64-NEXT:    .seh_savexmm %xmm7, 16
696; WIN64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
697; WIN64-NEXT:    .seh_savexmm %xmm6, 0
698; WIN64-NEXT:    .seh_endprologue
699; WIN64-NEXT:    callq test_retv16i1
700; WIN64-NEXT:    # kill: def $ax killed $ax def $eax
701; WIN64-NEXT:    incl %eax
702; WIN64-NEXT:    # kill: def $ax killed $ax killed $eax
703; WIN64-NEXT:    vmovaps (%rsp), %xmm6 # 16-byte Reload
704; WIN64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
705; WIN64-NEXT:    addq $40, %rsp
706; WIN64-NEXT:    popq %rdi
707; WIN64-NEXT:    popq %rsi
708; WIN64-NEXT:    retq
709; WIN64-NEXT:    .seh_endproc
710;
711; LINUXOSX64-LABEL: caller_retv16i1:
712; LINUXOSX64:       # %bb.0: # %entry
713; LINUXOSX64-NEXT:    pushq %rax
714; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
715; LINUXOSX64-NEXT:    callq test_retv16i1
716; LINUXOSX64-NEXT:    # kill: def $ax killed $ax def $eax
717; LINUXOSX64-NEXT:    incl %eax
718; LINUXOSX64-NEXT:    # kill: def $ax killed $ax killed $eax
719; LINUXOSX64-NEXT:    popq %rcx
720; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
721; LINUXOSX64-NEXT:    retq
722entry:
723  %call = call x86_regcallcc <16 x i1> @test_retv16i1()
724  %c = bitcast <16 x i1> %call to i16
725  %add = add i16 %c, 1
726  ret i16 %add
727}
728
729; Test regcall when receiving arguments of v8i1 type
730declare i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)
731define dso_local x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)  {
732; X32-LABEL: test_argv8i1:
733; X32:       # %bb.0:
734; X32-NEXT:    subl $76, %esp
735; X32-NEXT:    vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
736; X32-NEXT:    vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
737; X32-NEXT:    vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
738; X32-NEXT:    vmovups %xmm4, (%esp) # 16-byte Spill
739; X32-NEXT:    kmovd %edx, %k0
740; X32-NEXT:    kmovd %ecx, %k1
741; X32-NEXT:    kmovd %eax, %k2
742; X32-NEXT:    vpmovm2w %k2, %zmm0
743; X32-NEXT:    vpmovm2w %k1, %zmm1
744; X32-NEXT:    vpmovm2w %k0, %zmm2
745; X32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
746; X32-NEXT:    # kill: def $xmm1 killed $xmm1 killed $zmm1
747; X32-NEXT:    # kill: def $xmm2 killed $xmm2 killed $zmm2
748; X32-NEXT:    vzeroupper
749; X32-NEXT:    calll _test_argv8i1helper
750; X32-NEXT:    vmovups (%esp), %xmm4 # 16-byte Reload
751; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
752; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
753; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
754; X32-NEXT:    addl $76, %esp
755; X32-NEXT:    retl
756;
757; WIN64-LABEL: test_argv8i1:
758; WIN64:       # %bb.0:
759; WIN64-NEXT:    pushq %r11
760; WIN64-NEXT:    .seh_pushreg %r11
761; WIN64-NEXT:    pushq %r10
762; WIN64-NEXT:    .seh_pushreg %r10
763; WIN64-NEXT:    subq $88, %rsp
764; WIN64-NEXT:    .seh_stackalloc 88
765; WIN64-NEXT:    .seh_endprologue
766; WIN64-NEXT:    kmovd %edx, %k0
767; WIN64-NEXT:    kmovd %eax, %k1
768; WIN64-NEXT:    kmovd %ecx, %k2
769; WIN64-NEXT:    vpmovm2w %k2, %zmm0
770; WIN64-NEXT:    vmovdqa %xmm0, {{[0-9]+}}(%rsp)
771; WIN64-NEXT:    vpmovm2w %k1, %zmm0
772; WIN64-NEXT:    vmovdqa %xmm0, {{[0-9]+}}(%rsp)
773; WIN64-NEXT:    vpmovm2w %k0, %zmm0
774; WIN64-NEXT:    vmovdqa %xmm0, {{[0-9]+}}(%rsp)
775; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
776; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
777; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
778; WIN64-NEXT:    vzeroupper
779; WIN64-NEXT:    callq test_argv8i1helper
780; WIN64-NEXT:    nop
781; WIN64-NEXT:    addq $88, %rsp
782; WIN64-NEXT:    popq %r10
783; WIN64-NEXT:    popq %r11
784; WIN64-NEXT:    retq
785; WIN64-NEXT:    .seh_endproc
786;
787; LINUXOSX64-LABEL: test_argv8i1:
788; LINUXOSX64:       # %bb.0:
789; LINUXOSX64-NEXT:    subq $136, %rsp
790; LINUXOSX64-NEXT:    vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
791; LINUXOSX64-NEXT:    vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
792; LINUXOSX64-NEXT:    vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
793; LINUXOSX64-NEXT:    vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
794; LINUXOSX64-NEXT:    vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
795; LINUXOSX64-NEXT:    vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
796; LINUXOSX64-NEXT:    vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
797; LINUXOSX64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
798; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 144
799; LINUXOSX64-NEXT:    .cfi_offset %xmm8, -144
800; LINUXOSX64-NEXT:    .cfi_offset %xmm9, -128
801; LINUXOSX64-NEXT:    .cfi_offset %xmm10, -112
802; LINUXOSX64-NEXT:    .cfi_offset %xmm11, -96
803; LINUXOSX64-NEXT:    .cfi_offset %xmm12, -80
804; LINUXOSX64-NEXT:    .cfi_offset %xmm13, -64
805; LINUXOSX64-NEXT:    .cfi_offset %xmm14, -48
806; LINUXOSX64-NEXT:    .cfi_offset %xmm15, -32
807; LINUXOSX64-NEXT:    kmovd %edx, %k0
808; LINUXOSX64-NEXT:    kmovd %ecx, %k1
809; LINUXOSX64-NEXT:    kmovd %eax, %k2
810; LINUXOSX64-NEXT:    vpmovm2w %k2, %zmm0
811; LINUXOSX64-NEXT:    vpmovm2w %k1, %zmm1
812; LINUXOSX64-NEXT:    vpmovm2w %k0, %zmm2
813; LINUXOSX64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
814; LINUXOSX64-NEXT:    # kill: def $xmm1 killed $xmm1 killed $zmm1
815; LINUXOSX64-NEXT:    # kill: def $xmm2 killed $xmm2 killed $zmm2
816; LINUXOSX64-NEXT:    vzeroupper
817; LINUXOSX64-NEXT:    callq test_argv8i1helper@PLT
818; LINUXOSX64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
819; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
820; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload
821; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload
822; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
823; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
824; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
825; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
826; LINUXOSX64-NEXT:    addq $136, %rsp
827; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
828; LINUXOSX64-NEXT:    retq
829  %res = call i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)
830  ret i8 %res
831}
832
833; Test regcall when passing arguments of v8i1 type
834define dso_local i8 @caller_argv8i1() #0 {
835; X32-LABEL: caller_argv8i1:
836; X32:       # %bb.0: # %entry
837; X32-NEXT:    movl $1, %eax
838; X32-NEXT:    movl $1, %ecx
839; X32-NEXT:    movl $1, %edx
840; X32-NEXT:    calll _test_argv8i1
841; X32-NEXT:    retl
842;
843; WIN64-LABEL: caller_argv8i1:
844; WIN64:       # %bb.0: # %entry
845; WIN64-NEXT:    pushq %rsi
846; WIN64-NEXT:    .seh_pushreg %rsi
847; WIN64-NEXT:    pushq %rdi
848; WIN64-NEXT:    .seh_pushreg %rdi
849; WIN64-NEXT:    subq $40, %rsp
850; WIN64-NEXT:    .seh_stackalloc 40
851; WIN64-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
852; WIN64-NEXT:    .seh_savexmm %xmm7, 16
853; WIN64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
854; WIN64-NEXT:    .seh_savexmm %xmm6, 0
855; WIN64-NEXT:    .seh_endprologue
856; WIN64-NEXT:    movl $1, %eax
857; WIN64-NEXT:    movl $1, %ecx
858; WIN64-NEXT:    movl $1, %edx
859; WIN64-NEXT:    callq test_argv8i1
860; WIN64-NEXT:    vmovaps (%rsp), %xmm6 # 16-byte Reload
861; WIN64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
862; WIN64-NEXT:    addq $40, %rsp
863; WIN64-NEXT:    popq %rdi
864; WIN64-NEXT:    popq %rsi
865; WIN64-NEXT:    retq
866; WIN64-NEXT:    .seh_endproc
867;
868; LINUXOSX64-LABEL: caller_argv8i1:
869; LINUXOSX64:       # %bb.0: # %entry
870; LINUXOSX64-NEXT:    pushq %rax
871; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
872; LINUXOSX64-NEXT:    movl $1, %eax
873; LINUXOSX64-NEXT:    movl $1, %ecx
874; LINUXOSX64-NEXT:    movl $1, %edx
875; LINUXOSX64-NEXT:    callq test_argv8i1
876; LINUXOSX64-NEXT:    popq %rcx
877; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
878; LINUXOSX64-NEXT:    retq
879entry:
880  %v0 = bitcast i8 1 to <8 x i1>
881  %call = call x86_regcallcc i8 @test_argv8i1(<8 x i1> %v0, <8 x i1> %v0, <8 x i1> %v0)
882  ret i8 %call
883}
884
885; Test regcall when returning v8i1 type
886define dso_local x86_regcallcc <8 x i1> @test_retv8i1()  {
887; X32-LABEL: test_retv8i1:
888; X32:       # %bb.0:
889; X32-NEXT:    movb $1, %al
890; X32-NEXT:    retl
891;
892; CHECK64-LABEL: test_retv8i1:
893; CHECK64:       # %bb.0:
894; CHECK64-NEXT:    movb $1, %al
895; CHECK64-NEXT:    retq
896  %a = bitcast i8 1 to <8 x i1>
897  ret <8 x i1> %a
898}
899
900; Test regcall when processing result of v8i1 type
901define dso_local <8 x i1> @caller_retv8i1() #0 {
902; X32-LABEL: caller_retv8i1:
903; X32:       # %bb.0: # %entry
904; X32-NEXT:    calll _test_retv8i1
905; X32-NEXT:    # kill: def $al killed $al def $eax
906; X32-NEXT:    kmovd %eax, %k0
907; X32-NEXT:    vpmovm2w %k0, %zmm0
908; X32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
909; X32-NEXT:    vzeroupper
910; X32-NEXT:    retl
911;
912; WIN64-LABEL: caller_retv8i1:
913; WIN64:       # %bb.0: # %entry
914; WIN64-NEXT:    pushq %rsi
915; WIN64-NEXT:    .seh_pushreg %rsi
916; WIN64-NEXT:    pushq %rdi
917; WIN64-NEXT:    .seh_pushreg %rdi
918; WIN64-NEXT:    subq $40, %rsp
919; WIN64-NEXT:    .seh_stackalloc 40
920; WIN64-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
921; WIN64-NEXT:    .seh_savexmm %xmm7, 16
922; WIN64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
923; WIN64-NEXT:    .seh_savexmm %xmm6, 0
924; WIN64-NEXT:    .seh_endprologue
925; WIN64-NEXT:    callq test_retv8i1
926; WIN64-NEXT:    # kill: def $al killed $al def $eax
927; WIN64-NEXT:    kmovd %eax, %k0
928; WIN64-NEXT:    vpmovm2w %k0, %zmm0
929; WIN64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
930; WIN64-NEXT:    vmovaps (%rsp), %xmm6 # 16-byte Reload
931; WIN64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
932; WIN64-NEXT:    addq $40, %rsp
933; WIN64-NEXT:    popq %rdi
934; WIN64-NEXT:    popq %rsi
935; WIN64-NEXT:    vzeroupper
936; WIN64-NEXT:    retq
937; WIN64-NEXT:    .seh_endproc
938;
939; LINUXOSX64-LABEL: caller_retv8i1:
940; LINUXOSX64:       # %bb.0: # %entry
941; LINUXOSX64-NEXT:    pushq %rax
942; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
943; LINUXOSX64-NEXT:    callq test_retv8i1
944; LINUXOSX64-NEXT:    # kill: def $al killed $al def $eax
945; LINUXOSX64-NEXT:    kmovd %eax, %k0
946; LINUXOSX64-NEXT:    vpmovm2w %k0, %zmm0
947; LINUXOSX64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
948; LINUXOSX64-NEXT:    popq %rax
949; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
950; LINUXOSX64-NEXT:    vzeroupper
951; LINUXOSX64-NEXT:    retq
952entry:
953  %call = call x86_regcallcc <8 x i1> @test_retv8i1()
954  ret <8 x i1> %call
955}
956
957