1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+avx512bw | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK64 --check-prefix=WIN64 4; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK64 --check-prefix=LINUXOSX64 5 6; Test regcall when receiving arguments of v64i1 type 7define dso_local x86_regcallcc i64 @test_argv64i1(<64 x i1> %x0, <64 x i1> %x1, <64 x i1> %x2, <64 x i1> %x3, <64 x i1> %x4, <64 x i1> %x5, <64 x i1> %x6, <64 x i1> %x7, <64 x i1> %x8, <64 x i1> %x9, <64 x i1> %x10, <64 x i1> %x11, <64 x i1> %x12) { 8; X32-LABEL: test_argv64i1: 9; X32: # %bb.0: 10; X32-NEXT: addl %edx, %eax 11; X32-NEXT: adcl %edi, %ecx 12; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 13; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 14; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 15; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 16; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 17; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 18; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 19; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 20; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 21; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 22; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 23; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 24; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 25; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 26; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 27; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 28; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 29; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 30; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 31; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 32; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 33; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 34; X32-NEXT: retl 35; 36; WIN64-LABEL: test_argv64i1: 37; WIN64: # %bb.0: 38; WIN64-NEXT: addq %rcx, %rax 39; WIN64-NEXT: addq %rdx, %rax 40; WIN64-NEXT: addq %rdi, %rax 41; WIN64-NEXT: leaq (%rsi,%r8), %rcx 42; WIN64-NEXT: addq %r9, %rcx 43; WIN64-NEXT: addq %rcx, %rax 44; WIN64-NEXT: leaq (%r10,%r11), %rcx 45; WIN64-NEXT: addq %r12, %rcx 46; WIN64-NEXT: addq %r14, %rcx 47; WIN64-NEXT: addq %rcx, %rax 48; WIN64-NEXT: addq %r15, %rax 49; WIN64-NEXT: addq {{[0-9]+}}(%rsp), %rax 50; WIN64-NEXT: retq 51; 52; LINUXOSX64-LABEL: test_argv64i1: 53; LINUXOSX64: # %bb.0: 54; LINUXOSX64-NEXT: addq %rcx, %rax 55; LINUXOSX64-NEXT: addq %rdx, %rax 56; LINUXOSX64-NEXT: addq %rdi, %rax 57; LINUXOSX64-NEXT: leaq (%rsi,%r8), %rcx 58; LINUXOSX64-NEXT: addq %r9, %rcx 59; LINUXOSX64-NEXT: addq %rcx, %rax 60; LINUXOSX64-NEXT: leaq (%r12,%r13), %rcx 61; LINUXOSX64-NEXT: addq %r14, %rcx 62; LINUXOSX64-NEXT: addq %r15, %rcx 63; LINUXOSX64-NEXT: addq %rcx, %rax 64; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax 65; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax 66; LINUXOSX64-NEXT: retq 67 %y0 = bitcast <64 x i1> %x0 to i64 68 %y1 = bitcast <64 x i1> %x1 to i64 69 %y2 = bitcast <64 x i1> %x2 to i64 70 %y3 = bitcast <64 x i1> %x3 to i64 71 %y4 = bitcast <64 x i1> %x4 to i64 72 %y5 = bitcast <64 x i1> %x5 to i64 73 %y6 = bitcast <64 x i1> %x6 to i64 74 %y7 = bitcast <64 x i1> %x7 to i64 75 %y8 = bitcast <64 x i1> %x8 to i64 76 %y9 = bitcast <64 x i1> %x9 to i64 77 %y10 = bitcast <64 x i1> %x10 to i64 78 %y11 = bitcast <64 x i1> %x11 to i64 79 %y12 = bitcast <64 x i1> %x12 to i64 80 %add1 = add i64 %y0, %y1 81 %add2 = add i64 %add1, %y2 82 %add3 = add i64 %add2, %y3 83 %add4 = add i64 %add3, %y4 84 %add5 = add i64 %add4, %y5 85 %add6 = add i64 %add5, %y6 86 %add7 = add i64 %add6, %y7 87 %add8 = add i64 %add7, %y8 88 %add9 = add i64 %add8, %y9 89 %add10 = add i64 %add9, %y10 90 %add11 = add i64 %add10, %y11 91 %add12 = add i64 %add11, %y12 92 ret i64 %add12 93} 94 95; Test regcall when passing arguments of v64i1 type 96define dso_local i64 @caller_argv64i1() #0 { 97; X32-LABEL: caller_argv64i1: 98; X32: # %bb.0: # %entry 99; X32-NEXT: pushl %edi 100; X32-NEXT: subl $88, %esp 101; X32-NEXT: vbroadcastsd {{.*#+}} zmm0 = [2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1] 102; X32-NEXT: vmovups %xmm0, {{[0-9]+}}(%esp) 103; X32-NEXT: vmovups %zmm0, (%esp) 104; X32-NEXT: movl $1, {{[0-9]+}}(%esp) 105; X32-NEXT: movl $2, {{[0-9]+}}(%esp) 106; X32-NEXT: movl $2, %eax 107; X32-NEXT: movl $1, %ecx 108; X32-NEXT: movl $2, %edx 109; X32-NEXT: movl $1, %edi 110; X32-NEXT: vzeroupper 111; X32-NEXT: calll _test_argv64i1 112; X32-NEXT: movl %ecx, %edx 113; X32-NEXT: addl $88, %esp 114; X32-NEXT: popl %edi 115; X32-NEXT: retl 116; 117; WIN64-LABEL: caller_argv64i1: 118; WIN64: # %bb.0: # %entry 119; WIN64-NEXT: pushq %r15 120; WIN64-NEXT: .seh_pushreg %r15 121; WIN64-NEXT: pushq %r14 122; WIN64-NEXT: .seh_pushreg %r14 123; WIN64-NEXT: pushq %r12 124; WIN64-NEXT: .seh_pushreg %r12 125; WIN64-NEXT: pushq %rsi 126; WIN64-NEXT: .seh_pushreg %rsi 127; WIN64-NEXT: pushq %rdi 128; WIN64-NEXT: .seh_pushreg %rdi 129; WIN64-NEXT: subq $48, %rsp 130; WIN64-NEXT: .seh_stackalloc 48 131; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 132; WIN64-NEXT: .seh_savexmm %xmm7, 32 133; WIN64-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 134; WIN64-NEXT: .seh_savexmm %xmm6, 16 135; WIN64-NEXT: .seh_endprologue 136; WIN64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002 137; WIN64-NEXT: movq %rax, (%rsp) 138; WIN64-NEXT: movq %rax, %rcx 139; WIN64-NEXT: movq %rax, %rdx 140; WIN64-NEXT: movq %rax, %rdi 141; WIN64-NEXT: movq %rax, %r8 142; WIN64-NEXT: movq %rax, %r9 143; WIN64-NEXT: movq %rax, %r10 144; WIN64-NEXT: movq %rax, %r11 145; WIN64-NEXT: movq %rax, %r12 146; WIN64-NEXT: movq %rax, %r14 147; WIN64-NEXT: movq %rax, %r15 148; WIN64-NEXT: movq %rax, %rsi 149; WIN64-NEXT: callq test_argv64i1 150; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload 151; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload 152; WIN64-NEXT: addq $48, %rsp 153; WIN64-NEXT: popq %rdi 154; WIN64-NEXT: popq %rsi 155; WIN64-NEXT: popq %r12 156; WIN64-NEXT: popq %r14 157; WIN64-NEXT: popq %r15 158; WIN64-NEXT: retq 159; WIN64-NEXT: .seh_endproc 160; 161; LINUXOSX64-LABEL: caller_argv64i1: 162; LINUXOSX64: # %bb.0: # %entry 163; LINUXOSX64-NEXT: pushq %r15 164; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 165; LINUXOSX64-NEXT: pushq %r14 166; LINUXOSX64-NEXT: .cfi_def_cfa_offset 24 167; LINUXOSX64-NEXT: pushq %r13 168; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32 169; LINUXOSX64-NEXT: pushq %r12 170; LINUXOSX64-NEXT: .cfi_def_cfa_offset 40 171; LINUXOSX64-NEXT: pushq %rax 172; LINUXOSX64-NEXT: .cfi_def_cfa_offset 48 173; LINUXOSX64-NEXT: .cfi_offset %r12, -40 174; LINUXOSX64-NEXT: .cfi_offset %r13, -32 175; LINUXOSX64-NEXT: .cfi_offset %r14, -24 176; LINUXOSX64-NEXT: .cfi_offset %r15, -16 177; LINUXOSX64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002 178; LINUXOSX64-NEXT: movq %rax, %rcx 179; LINUXOSX64-NEXT: movq %rax, %rdx 180; LINUXOSX64-NEXT: movq %rax, %rdi 181; LINUXOSX64-NEXT: movq %rax, %r8 182; LINUXOSX64-NEXT: movq %rax, %r9 183; LINUXOSX64-NEXT: movq %rax, %r12 184; LINUXOSX64-NEXT: movq %rax, %r13 185; LINUXOSX64-NEXT: movq %rax, %r14 186; LINUXOSX64-NEXT: movq %rax, %r15 187; LINUXOSX64-NEXT: movq %rax, %rsi 188; LINUXOSX64-NEXT: pushq %rax 189; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset 8 190; LINUXOSX64-NEXT: pushq %rax 191; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset 8 192; LINUXOSX64-NEXT: callq test_argv64i1 193; LINUXOSX64-NEXT: addq $24, %rsp 194; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset -24 195; LINUXOSX64-NEXT: popq %r12 196; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32 197; LINUXOSX64-NEXT: popq %r13 198; LINUXOSX64-NEXT: .cfi_def_cfa_offset 24 199; LINUXOSX64-NEXT: popq %r14 200; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 201; LINUXOSX64-NEXT: popq %r15 202; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 203; LINUXOSX64-NEXT: retq 204entry: 205 %v0 = bitcast i64 4294967298 to <64 x i1> 206 %call = call x86_regcallcc i64 @test_argv64i1(<64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0, 207 <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0, 208 <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0, 209 <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0, 210 <64 x i1> %v0) 211 ret i64 %call 212} 213 214; Test regcall when returning v64i1 type 215define dso_local x86_regcallcc <64 x i1> @test_retv64i1() { 216; X32-LABEL: test_retv64i1: 217; X32: # %bb.0: 218; X32-NEXT: movl $2, %eax 219; X32-NEXT: movl $1, %ecx 220; X32-NEXT: retl 221; 222; CHECK64-LABEL: test_retv64i1: 223; CHECK64: # %bb.0: 224; CHECK64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002 225; CHECK64-NEXT: retq 226 %a = bitcast i64 4294967298 to <64 x i1> 227 ret <64 x i1> %a 228} 229 230; Test regcall when processing result of v64i1 type 231define dso_local <64 x i1> @caller_retv64i1() #0 { 232; X32-LABEL: caller_retv64i1: 233; X32: # %bb.0: # %entry 234; X32-NEXT: calll _test_retv64i1 235; X32-NEXT: kmovd %eax, %k0 236; X32-NEXT: kmovd %ecx, %k1 237; X32-NEXT: kunpckdq %k0, %k1, %k0 238; X32-NEXT: vpmovm2b %k0, %zmm0 239; X32-NEXT: retl 240; 241; WIN64-LABEL: caller_retv64i1: 242; WIN64: # %bb.0: # %entry 243; WIN64-NEXT: pushq %rsi 244; WIN64-NEXT: .seh_pushreg %rsi 245; WIN64-NEXT: pushq %rdi 246; WIN64-NEXT: .seh_pushreg %rdi 247; WIN64-NEXT: subq $40, %rsp 248; WIN64-NEXT: .seh_stackalloc 40 249; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 250; WIN64-NEXT: .seh_savexmm %xmm7, 16 251; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill 252; WIN64-NEXT: .seh_savexmm %xmm6, 0 253; WIN64-NEXT: .seh_endprologue 254; WIN64-NEXT: callq test_retv64i1 255; WIN64-NEXT: kmovq %rax, %k0 256; WIN64-NEXT: vpmovm2b %k0, %zmm0 257; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload 258; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload 259; WIN64-NEXT: addq $40, %rsp 260; WIN64-NEXT: popq %rdi 261; WIN64-NEXT: popq %rsi 262; WIN64-NEXT: retq 263; WIN64-NEXT: .seh_endproc 264; 265; LINUXOSX64-LABEL: caller_retv64i1: 266; LINUXOSX64: # %bb.0: # %entry 267; LINUXOSX64-NEXT: pushq %rax 268; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 269; LINUXOSX64-NEXT: callq test_retv64i1 270; LINUXOSX64-NEXT: kmovq %rax, %k0 271; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm0 272; LINUXOSX64-NEXT: popq %rax 273; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 274; LINUXOSX64-NEXT: retq 275entry: 276 %call = call x86_regcallcc <64 x i1> @test_retv64i1() 277 ret <64 x i1> %call 278} 279 280; Test regcall when receiving arguments of v32i1 type 281declare i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2) 282define dso_local x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2) { 283; X32-LABEL: test_argv32i1: 284; X32: # %bb.0: # %entry 285; X32-NEXT: subl $76, %esp 286; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 287; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 288; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 289; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill 290; X32-NEXT: kmovd %edx, %k0 291; X32-NEXT: kmovd %ecx, %k1 292; X32-NEXT: kmovd %eax, %k2 293; X32-NEXT: vpmovm2b %k2, %zmm0 294; X32-NEXT: vpmovm2b %k1, %zmm1 295; X32-NEXT: vpmovm2b %k0, %zmm2 296; X32-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 297; X32-NEXT: # kill: def $ymm1 killed $ymm1 killed $zmm1 298; X32-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2 299; X32-NEXT: calll _test_argv32i1helper 300; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload 301; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload 302; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload 303; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload 304; X32-NEXT: addl $76, %esp 305; X32-NEXT: vzeroupper 306; X32-NEXT: retl 307; 308; WIN64-LABEL: test_argv32i1: 309; WIN64: # %bb.0: # %entry 310; WIN64-NEXT: pushq %rbp 311; WIN64-NEXT: .seh_pushreg %rbp 312; WIN64-NEXT: pushq %r11 313; WIN64-NEXT: .seh_pushreg %r11 314; WIN64-NEXT: pushq %r10 315; WIN64-NEXT: .seh_pushreg %r10 316; WIN64-NEXT: subq $128, %rsp 317; WIN64-NEXT: .seh_stackalloc 128 318; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp 319; WIN64-NEXT: .seh_setframe %rbp, 128 320; WIN64-NEXT: .seh_endprologue 321; WIN64-NEXT: andq $-32, %rsp 322; WIN64-NEXT: kmovd %edx, %k0 323; WIN64-NEXT: kmovd %eax, %k1 324; WIN64-NEXT: kmovd %ecx, %k2 325; WIN64-NEXT: vpmovm2b %k2, %zmm0 326; WIN64-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) 327; WIN64-NEXT: vpmovm2b %k1, %zmm0 328; WIN64-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) 329; WIN64-NEXT: vpmovm2b %k0, %zmm0 330; WIN64-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) 331; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx 332; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx 333; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %r8 334; WIN64-NEXT: vzeroupper 335; WIN64-NEXT: callq test_argv32i1helper 336; WIN64-NEXT: nop 337; WIN64-NEXT: movq %rbp, %rsp 338; WIN64-NEXT: popq %r10 339; WIN64-NEXT: popq %r11 340; WIN64-NEXT: popq %rbp 341; WIN64-NEXT: retq 342; WIN64-NEXT: .seh_endproc 343; 344; LINUXOSX64-LABEL: test_argv32i1: 345; LINUXOSX64: # %bb.0: # %entry 346; LINUXOSX64-NEXT: subq $136, %rsp 347; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 348; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 349; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 350; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 351; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 352; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 353; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 354; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill 355; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144 356; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144 357; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128 358; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112 359; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96 360; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80 361; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64 362; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48 363; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32 364; LINUXOSX64-NEXT: kmovd %edx, %k0 365; LINUXOSX64-NEXT: kmovd %ecx, %k1 366; LINUXOSX64-NEXT: kmovd %eax, %k2 367; LINUXOSX64-NEXT: vpmovm2b %k2, %zmm0 368; LINUXOSX64-NEXT: vpmovm2b %k1, %zmm1 369; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm2 370; LINUXOSX64-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 371; LINUXOSX64-NEXT: # kill: def $ymm1 killed $ymm1 killed $zmm1 372; LINUXOSX64-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2 373; LINUXOSX64-NEXT: callq test_argv32i1helper@PLT 374; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload 375; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload 376; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload 377; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload 378; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload 379; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload 380; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload 381; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload 382; LINUXOSX64-NEXT: addq $136, %rsp 383; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 384; LINUXOSX64-NEXT: vzeroupper 385; LINUXOSX64-NEXT: retq 386entry: 387 %res = call i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2) 388 ret i32 %res 389} 390 391; Test regcall when passing arguments of v32i1 type 392define dso_local i32 @caller_argv32i1() #0 { 393; X32-LABEL: caller_argv32i1: 394; X32: # %bb.0: # %entry 395; X32-NEXT: movl $1, %eax 396; X32-NEXT: movl $1, %ecx 397; X32-NEXT: movl $1, %edx 398; X32-NEXT: calll _test_argv32i1 399; X32-NEXT: retl 400; 401; WIN64-LABEL: caller_argv32i1: 402; WIN64: # %bb.0: # %entry 403; WIN64-NEXT: pushq %rsi 404; WIN64-NEXT: .seh_pushreg %rsi 405; WIN64-NEXT: pushq %rdi 406; WIN64-NEXT: .seh_pushreg %rdi 407; WIN64-NEXT: subq $40, %rsp 408; WIN64-NEXT: .seh_stackalloc 40 409; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 410; WIN64-NEXT: .seh_savexmm %xmm7, 16 411; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill 412; WIN64-NEXT: .seh_savexmm %xmm6, 0 413; WIN64-NEXT: .seh_endprologue 414; WIN64-NEXT: movl $1, %eax 415; WIN64-NEXT: movl $1, %ecx 416; WIN64-NEXT: movl $1, %edx 417; WIN64-NEXT: callq test_argv32i1 418; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload 419; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload 420; WIN64-NEXT: addq $40, %rsp 421; WIN64-NEXT: popq %rdi 422; WIN64-NEXT: popq %rsi 423; WIN64-NEXT: retq 424; WIN64-NEXT: .seh_endproc 425; 426; LINUXOSX64-LABEL: caller_argv32i1: 427; LINUXOSX64: # %bb.0: # %entry 428; LINUXOSX64-NEXT: pushq %rax 429; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 430; LINUXOSX64-NEXT: movl $1, %eax 431; LINUXOSX64-NEXT: movl $1, %ecx 432; LINUXOSX64-NEXT: movl $1, %edx 433; LINUXOSX64-NEXT: callq test_argv32i1 434; LINUXOSX64-NEXT: popq %rcx 435; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 436; LINUXOSX64-NEXT: retq 437entry: 438 %v0 = bitcast i32 1 to <32 x i1> 439 %call = call x86_regcallcc i32 @test_argv32i1(<32 x i1> %v0, <32 x i1> %v0, <32 x i1> %v0) 440 ret i32 %call 441} 442 443; Test regcall when returning v32i1 type 444define dso_local x86_regcallcc <32 x i1> @test_retv32i1() { 445; X32-LABEL: test_retv32i1: 446; X32: # %bb.0: 447; X32-NEXT: movl $1, %eax 448; X32-NEXT: retl 449; 450; CHECK64-LABEL: test_retv32i1: 451; CHECK64: # %bb.0: 452; CHECK64-NEXT: movl $1, %eax 453; CHECK64-NEXT: retq 454 %a = bitcast i32 1 to <32 x i1> 455 ret <32 x i1> %a 456} 457 458; Test regcall when processing result of v32i1 type 459define dso_local i32 @caller_retv32i1() #0 { 460; X32-LABEL: caller_retv32i1: 461; X32: # %bb.0: # %entry 462; X32-NEXT: calll _test_retv32i1 463; X32-NEXT: incl %eax 464; X32-NEXT: retl 465; 466; WIN64-LABEL: caller_retv32i1: 467; WIN64: # %bb.0: # %entry 468; WIN64-NEXT: pushq %rsi 469; WIN64-NEXT: .seh_pushreg %rsi 470; WIN64-NEXT: pushq %rdi 471; WIN64-NEXT: .seh_pushreg %rdi 472; WIN64-NEXT: subq $40, %rsp 473; WIN64-NEXT: .seh_stackalloc 40 474; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 475; WIN64-NEXT: .seh_savexmm %xmm7, 16 476; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill 477; WIN64-NEXT: .seh_savexmm %xmm6, 0 478; WIN64-NEXT: .seh_endprologue 479; WIN64-NEXT: callq test_retv32i1 480; WIN64-NEXT: incl %eax 481; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload 482; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload 483; WIN64-NEXT: addq $40, %rsp 484; WIN64-NEXT: popq %rdi 485; WIN64-NEXT: popq %rsi 486; WIN64-NEXT: retq 487; WIN64-NEXT: .seh_endproc 488; 489; LINUXOSX64-LABEL: caller_retv32i1: 490; LINUXOSX64: # %bb.0: # %entry 491; LINUXOSX64-NEXT: pushq %rax 492; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 493; LINUXOSX64-NEXT: callq test_retv32i1 494; LINUXOSX64-NEXT: incl %eax 495; LINUXOSX64-NEXT: popq %rcx 496; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 497; LINUXOSX64-NEXT: retq 498entry: 499 %call = call x86_regcallcc <32 x i1> @test_retv32i1() 500 %c = bitcast <32 x i1> %call to i32 501 %add = add i32 %c, 1 502 ret i32 %add 503} 504 505; Test regcall when receiving arguments of v16i1 type 506declare i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2) 507define dso_local x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2) { 508; X32-LABEL: test_argv16i1: 509; X32: # %bb.0: 510; X32-NEXT: subl $76, %esp 511; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 512; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 513; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 514; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill 515; X32-NEXT: kmovd %edx, %k0 516; X32-NEXT: kmovd %ecx, %k1 517; X32-NEXT: kmovd %eax, %k2 518; X32-NEXT: vpmovm2b %k2, %zmm0 519; X32-NEXT: vpmovm2b %k1, %zmm1 520; X32-NEXT: vpmovm2b %k0, %zmm2 521; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 522; X32-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1 523; X32-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2 524; X32-NEXT: vzeroupper 525; X32-NEXT: calll _test_argv16i1helper 526; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload 527; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload 528; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload 529; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload 530; X32-NEXT: addl $76, %esp 531; X32-NEXT: retl 532; 533; WIN64-LABEL: test_argv16i1: 534; WIN64: # %bb.0: 535; WIN64-NEXT: pushq %r11 536; WIN64-NEXT: .seh_pushreg %r11 537; WIN64-NEXT: pushq %r10 538; WIN64-NEXT: .seh_pushreg %r10 539; WIN64-NEXT: subq $88, %rsp 540; WIN64-NEXT: .seh_stackalloc 88 541; WIN64-NEXT: .seh_endprologue 542; WIN64-NEXT: kmovd %edx, %k0 543; WIN64-NEXT: kmovd %eax, %k1 544; WIN64-NEXT: kmovd %ecx, %k2 545; WIN64-NEXT: vpmovm2b %k2, %zmm0 546; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) 547; WIN64-NEXT: vpmovm2b %k1, %zmm0 548; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) 549; WIN64-NEXT: vpmovm2b %k0, %zmm0 550; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) 551; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx 552; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx 553; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %r8 554; WIN64-NEXT: vzeroupper 555; WIN64-NEXT: callq test_argv16i1helper 556; WIN64-NEXT: nop 557; WIN64-NEXT: addq $88, %rsp 558; WIN64-NEXT: popq %r10 559; WIN64-NEXT: popq %r11 560; WIN64-NEXT: retq 561; WIN64-NEXT: .seh_endproc 562; 563; LINUXOSX64-LABEL: test_argv16i1: 564; LINUXOSX64: # %bb.0: 565; LINUXOSX64-NEXT: subq $136, %rsp 566; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 567; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 568; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 569; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 570; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 571; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 572; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 573; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill 574; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144 575; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144 576; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128 577; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112 578; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96 579; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80 580; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64 581; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48 582; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32 583; LINUXOSX64-NEXT: kmovd %edx, %k0 584; LINUXOSX64-NEXT: kmovd %ecx, %k1 585; LINUXOSX64-NEXT: kmovd %eax, %k2 586; LINUXOSX64-NEXT: vpmovm2b %k2, %zmm0 587; LINUXOSX64-NEXT: vpmovm2b %k1, %zmm1 588; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm2 589; LINUXOSX64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 590; LINUXOSX64-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1 591; LINUXOSX64-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2 592; LINUXOSX64-NEXT: vzeroupper 593; LINUXOSX64-NEXT: callq test_argv16i1helper@PLT 594; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload 595; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload 596; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload 597; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload 598; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload 599; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload 600; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload 601; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload 602; LINUXOSX64-NEXT: addq $136, %rsp 603; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 604; LINUXOSX64-NEXT: retq 605 %res = call i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2) 606 ret i16 %res 607} 608 609; Test regcall when passing arguments of v16i1 type 610define dso_local i16 @caller_argv16i1() #0 { 611; X32-LABEL: caller_argv16i1: 612; X32: # %bb.0: # %entry 613; X32-NEXT: movl $1, %eax 614; X32-NEXT: movl $1, %ecx 615; X32-NEXT: movl $1, %edx 616; X32-NEXT: calll _test_argv16i1 617; X32-NEXT: retl 618; 619; WIN64-LABEL: caller_argv16i1: 620; WIN64: # %bb.0: # %entry 621; WIN64-NEXT: pushq %rsi 622; WIN64-NEXT: .seh_pushreg %rsi 623; WIN64-NEXT: pushq %rdi 624; WIN64-NEXT: .seh_pushreg %rdi 625; WIN64-NEXT: subq $40, %rsp 626; WIN64-NEXT: .seh_stackalloc 40 627; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 628; WIN64-NEXT: .seh_savexmm %xmm7, 16 629; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill 630; WIN64-NEXT: .seh_savexmm %xmm6, 0 631; WIN64-NEXT: .seh_endprologue 632; WIN64-NEXT: movl $1, %eax 633; WIN64-NEXT: movl $1, %ecx 634; WIN64-NEXT: movl $1, %edx 635; WIN64-NEXT: callq test_argv16i1 636; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload 637; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload 638; WIN64-NEXT: addq $40, %rsp 639; WIN64-NEXT: popq %rdi 640; WIN64-NEXT: popq %rsi 641; WIN64-NEXT: retq 642; WIN64-NEXT: .seh_endproc 643; 644; LINUXOSX64-LABEL: caller_argv16i1: 645; LINUXOSX64: # %bb.0: # %entry 646; LINUXOSX64-NEXT: pushq %rax 647; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 648; LINUXOSX64-NEXT: movl $1, %eax 649; LINUXOSX64-NEXT: movl $1, %ecx 650; LINUXOSX64-NEXT: movl $1, %edx 651; LINUXOSX64-NEXT: callq test_argv16i1 652; LINUXOSX64-NEXT: popq %rcx 653; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 654; LINUXOSX64-NEXT: retq 655entry: 656 %v0 = bitcast i16 1 to <16 x i1> 657 %call = call x86_regcallcc i16 @test_argv16i1(<16 x i1> %v0, <16 x i1> %v0, <16 x i1> %v0) 658 ret i16 %call 659} 660 661; Test regcall when returning v16i1 type 662define dso_local x86_regcallcc <16 x i1> @test_retv16i1() { 663; X32-LABEL: test_retv16i1: 664; X32: # %bb.0: 665; X32-NEXT: movw $1, %ax 666; X32-NEXT: retl 667; 668; CHECK64-LABEL: test_retv16i1: 669; CHECK64: # %bb.0: 670; CHECK64-NEXT: movw $1, %ax 671; CHECK64-NEXT: retq 672 %a = bitcast i16 1 to <16 x i1> 673 ret <16 x i1> %a 674} 675 676; Test regcall when processing result of v16i1 type 677define dso_local i16 @caller_retv16i1() #0 { 678; X32-LABEL: caller_retv16i1: 679; X32: # %bb.0: # %entry 680; X32-NEXT: calll _test_retv16i1 681; X32-NEXT: # kill: def $ax killed $ax def $eax 682; X32-NEXT: incl %eax 683; X32-NEXT: # kill: def $ax killed $ax killed $eax 684; X32-NEXT: retl 685; 686; WIN64-LABEL: caller_retv16i1: 687; WIN64: # %bb.0: # %entry 688; WIN64-NEXT: pushq %rsi 689; WIN64-NEXT: .seh_pushreg %rsi 690; WIN64-NEXT: pushq %rdi 691; WIN64-NEXT: .seh_pushreg %rdi 692; WIN64-NEXT: subq $40, %rsp 693; WIN64-NEXT: .seh_stackalloc 40 694; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 695; WIN64-NEXT: .seh_savexmm %xmm7, 16 696; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill 697; WIN64-NEXT: .seh_savexmm %xmm6, 0 698; WIN64-NEXT: .seh_endprologue 699; WIN64-NEXT: callq test_retv16i1 700; WIN64-NEXT: # kill: def $ax killed $ax def $eax 701; WIN64-NEXT: incl %eax 702; WIN64-NEXT: # kill: def $ax killed $ax killed $eax 703; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload 704; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload 705; WIN64-NEXT: addq $40, %rsp 706; WIN64-NEXT: popq %rdi 707; WIN64-NEXT: popq %rsi 708; WIN64-NEXT: retq 709; WIN64-NEXT: .seh_endproc 710; 711; LINUXOSX64-LABEL: caller_retv16i1: 712; LINUXOSX64: # %bb.0: # %entry 713; LINUXOSX64-NEXT: pushq %rax 714; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 715; LINUXOSX64-NEXT: callq test_retv16i1 716; LINUXOSX64-NEXT: # kill: def $ax killed $ax def $eax 717; LINUXOSX64-NEXT: incl %eax 718; LINUXOSX64-NEXT: # kill: def $ax killed $ax killed $eax 719; LINUXOSX64-NEXT: popq %rcx 720; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 721; LINUXOSX64-NEXT: retq 722entry: 723 %call = call x86_regcallcc <16 x i1> @test_retv16i1() 724 %c = bitcast <16 x i1> %call to i16 725 %add = add i16 %c, 1 726 ret i16 %add 727} 728 729; Test regcall when receiving arguments of v8i1 type 730declare i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) 731define dso_local x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) { 732; X32-LABEL: test_argv8i1: 733; X32: # %bb.0: 734; X32-NEXT: subl $76, %esp 735; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 736; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 737; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 738; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill 739; X32-NEXT: kmovd %edx, %k0 740; X32-NEXT: kmovd %ecx, %k1 741; X32-NEXT: kmovd %eax, %k2 742; X32-NEXT: vpmovm2w %k2, %zmm0 743; X32-NEXT: vpmovm2w %k1, %zmm1 744; X32-NEXT: vpmovm2w %k0, %zmm2 745; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 746; X32-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1 747; X32-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2 748; X32-NEXT: vzeroupper 749; X32-NEXT: calll _test_argv8i1helper 750; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload 751; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload 752; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload 753; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload 754; X32-NEXT: addl $76, %esp 755; X32-NEXT: retl 756; 757; WIN64-LABEL: test_argv8i1: 758; WIN64: # %bb.0: 759; WIN64-NEXT: pushq %r11 760; WIN64-NEXT: .seh_pushreg %r11 761; WIN64-NEXT: pushq %r10 762; WIN64-NEXT: .seh_pushreg %r10 763; WIN64-NEXT: subq $88, %rsp 764; WIN64-NEXT: .seh_stackalloc 88 765; WIN64-NEXT: .seh_endprologue 766; WIN64-NEXT: kmovd %edx, %k0 767; WIN64-NEXT: kmovd %eax, %k1 768; WIN64-NEXT: kmovd %ecx, %k2 769; WIN64-NEXT: vpmovm2w %k2, %zmm0 770; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) 771; WIN64-NEXT: vpmovm2w %k1, %zmm0 772; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) 773; WIN64-NEXT: vpmovm2w %k0, %zmm0 774; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) 775; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx 776; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx 777; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %r8 778; WIN64-NEXT: vzeroupper 779; WIN64-NEXT: callq test_argv8i1helper 780; WIN64-NEXT: nop 781; WIN64-NEXT: addq $88, %rsp 782; WIN64-NEXT: popq %r10 783; WIN64-NEXT: popq %r11 784; WIN64-NEXT: retq 785; WIN64-NEXT: .seh_endproc 786; 787; LINUXOSX64-LABEL: test_argv8i1: 788; LINUXOSX64: # %bb.0: 789; LINUXOSX64-NEXT: subq $136, %rsp 790; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 791; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 792; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 793; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 794; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 795; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 796; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 797; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill 798; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144 799; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144 800; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128 801; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112 802; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96 803; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80 804; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64 805; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48 806; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32 807; LINUXOSX64-NEXT: kmovd %edx, %k0 808; LINUXOSX64-NEXT: kmovd %ecx, %k1 809; LINUXOSX64-NEXT: kmovd %eax, %k2 810; LINUXOSX64-NEXT: vpmovm2w %k2, %zmm0 811; LINUXOSX64-NEXT: vpmovm2w %k1, %zmm1 812; LINUXOSX64-NEXT: vpmovm2w %k0, %zmm2 813; LINUXOSX64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 814; LINUXOSX64-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1 815; LINUXOSX64-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2 816; LINUXOSX64-NEXT: vzeroupper 817; LINUXOSX64-NEXT: callq test_argv8i1helper@PLT 818; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload 819; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload 820; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload 821; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload 822; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload 823; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload 824; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload 825; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload 826; LINUXOSX64-NEXT: addq $136, %rsp 827; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 828; LINUXOSX64-NEXT: retq 829 %res = call i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) 830 ret i8 %res 831} 832 833; Test regcall when passing arguments of v8i1 type 834define dso_local i8 @caller_argv8i1() #0 { 835; X32-LABEL: caller_argv8i1: 836; X32: # %bb.0: # %entry 837; X32-NEXT: movl $1, %eax 838; X32-NEXT: movl $1, %ecx 839; X32-NEXT: movl $1, %edx 840; X32-NEXT: calll _test_argv8i1 841; X32-NEXT: retl 842; 843; WIN64-LABEL: caller_argv8i1: 844; WIN64: # %bb.0: # %entry 845; WIN64-NEXT: pushq %rsi 846; WIN64-NEXT: .seh_pushreg %rsi 847; WIN64-NEXT: pushq %rdi 848; WIN64-NEXT: .seh_pushreg %rdi 849; WIN64-NEXT: subq $40, %rsp 850; WIN64-NEXT: .seh_stackalloc 40 851; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 852; WIN64-NEXT: .seh_savexmm %xmm7, 16 853; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill 854; WIN64-NEXT: .seh_savexmm %xmm6, 0 855; WIN64-NEXT: .seh_endprologue 856; WIN64-NEXT: movl $1, %eax 857; WIN64-NEXT: movl $1, %ecx 858; WIN64-NEXT: movl $1, %edx 859; WIN64-NEXT: callq test_argv8i1 860; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload 861; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload 862; WIN64-NEXT: addq $40, %rsp 863; WIN64-NEXT: popq %rdi 864; WIN64-NEXT: popq %rsi 865; WIN64-NEXT: retq 866; WIN64-NEXT: .seh_endproc 867; 868; LINUXOSX64-LABEL: caller_argv8i1: 869; LINUXOSX64: # %bb.0: # %entry 870; LINUXOSX64-NEXT: pushq %rax 871; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 872; LINUXOSX64-NEXT: movl $1, %eax 873; LINUXOSX64-NEXT: movl $1, %ecx 874; LINUXOSX64-NEXT: movl $1, %edx 875; LINUXOSX64-NEXT: callq test_argv8i1 876; LINUXOSX64-NEXT: popq %rcx 877; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 878; LINUXOSX64-NEXT: retq 879entry: 880 %v0 = bitcast i8 1 to <8 x i1> 881 %call = call x86_regcallcc i8 @test_argv8i1(<8 x i1> %v0, <8 x i1> %v0, <8 x i1> %v0) 882 ret i8 %call 883} 884 885; Test regcall when returning v8i1 type 886define dso_local x86_regcallcc <8 x i1> @test_retv8i1() { 887; X32-LABEL: test_retv8i1: 888; X32: # %bb.0: 889; X32-NEXT: movb $1, %al 890; X32-NEXT: retl 891; 892; CHECK64-LABEL: test_retv8i1: 893; CHECK64: # %bb.0: 894; CHECK64-NEXT: movb $1, %al 895; CHECK64-NEXT: retq 896 %a = bitcast i8 1 to <8 x i1> 897 ret <8 x i1> %a 898} 899 900; Test regcall when processing result of v8i1 type 901define dso_local <8 x i1> @caller_retv8i1() #0 { 902; X32-LABEL: caller_retv8i1: 903; X32: # %bb.0: # %entry 904; X32-NEXT: calll _test_retv8i1 905; X32-NEXT: # kill: def $al killed $al def $eax 906; X32-NEXT: kmovd %eax, %k0 907; X32-NEXT: vpmovm2w %k0, %zmm0 908; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 909; X32-NEXT: vzeroupper 910; X32-NEXT: retl 911; 912; WIN64-LABEL: caller_retv8i1: 913; WIN64: # %bb.0: # %entry 914; WIN64-NEXT: pushq %rsi 915; WIN64-NEXT: .seh_pushreg %rsi 916; WIN64-NEXT: pushq %rdi 917; WIN64-NEXT: .seh_pushreg %rdi 918; WIN64-NEXT: subq $40, %rsp 919; WIN64-NEXT: .seh_stackalloc 40 920; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 921; WIN64-NEXT: .seh_savexmm %xmm7, 16 922; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill 923; WIN64-NEXT: .seh_savexmm %xmm6, 0 924; WIN64-NEXT: .seh_endprologue 925; WIN64-NEXT: callq test_retv8i1 926; WIN64-NEXT: # kill: def $al killed $al def $eax 927; WIN64-NEXT: kmovd %eax, %k0 928; WIN64-NEXT: vpmovm2w %k0, %zmm0 929; WIN64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 930; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload 931; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload 932; WIN64-NEXT: addq $40, %rsp 933; WIN64-NEXT: popq %rdi 934; WIN64-NEXT: popq %rsi 935; WIN64-NEXT: vzeroupper 936; WIN64-NEXT: retq 937; WIN64-NEXT: .seh_endproc 938; 939; LINUXOSX64-LABEL: caller_retv8i1: 940; LINUXOSX64: # %bb.0: # %entry 941; LINUXOSX64-NEXT: pushq %rax 942; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 943; LINUXOSX64-NEXT: callq test_retv8i1 944; LINUXOSX64-NEXT: # kill: def $al killed $al def $eax 945; LINUXOSX64-NEXT: kmovd %eax, %k0 946; LINUXOSX64-NEXT: vpmovm2w %k0, %zmm0 947; LINUXOSX64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 948; LINUXOSX64-NEXT: popq %rax 949; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 950; LINUXOSX64-NEXT: vzeroupper 951; LINUXOSX64-NEXT: retq 952entry: 953 %call = call x86_regcallcc <8 x i1> @test_retv8i1() 954 ret <8 x i1> %call 955} 956 957