1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-linux-gnu < %s -mattr=fma | FileCheck %s 3 4; Basic test coverage for FREM 5 6define void @frem_f16(half %a0, half %a1, ptr%p3) nounwind { 7; CHECK-LABEL: frem_f16: 8; CHECK: # %bb.0: 9; CHECK-NEXT: pushq %rbx 10; CHECK-NEXT: subq $16, %rsp 11; CHECK-NEXT: movq %rdi, %rbx 12; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 13; CHECK-NEXT: vmovaps %xmm1, %xmm0 14; CHECK-NEXT: callq __extendhfsf2@PLT 15; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 16; CHECK-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 17; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero 18; CHECK-NEXT: callq __extendhfsf2@PLT 19; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 20; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 21; CHECK-NEXT: callq fmodf@PLT 22; CHECK-NEXT: callq __truncsfhf2@PLT 23; CHECK-NEXT: vpextrw $0, %xmm0, (%rbx) 24; CHECK-NEXT: addq $16, %rsp 25; CHECK-NEXT: popq %rbx 26; CHECK-NEXT: retq 27 %frem = frem half %a0, %a1 28 store half %frem, ptr%p3 29 ret void 30} 31 32define void @frem_f32(float %a0, float %a1, ptr%p3) nounwind { 33; CHECK-LABEL: frem_f32: 34; CHECK: # %bb.0: 35; CHECK-NEXT: pushq %rbx 36; CHECK-NEXT: movq %rdi, %rbx 37; CHECK-NEXT: callq fmodf@PLT 38; CHECK-NEXT: vmovss %xmm0, (%rbx) 39; CHECK-NEXT: popq %rbx 40; CHECK-NEXT: retq 41 %frem = frem float %a0, %a1 42 store float %frem, ptr%p3 43 ret void 44} 45 46define void @frem_f64(double %a0, double %a1, ptr%p3) nounwind { 47; CHECK-LABEL: frem_f64: 48; CHECK: # %bb.0: 49; CHECK-NEXT: pushq %rbx 50; CHECK-NEXT: movq %rdi, %rbx 51; CHECK-NEXT: callq fmod@PLT 52; CHECK-NEXT: vmovsd %xmm0, (%rbx) 53; CHECK-NEXT: popq %rbx 54; CHECK-NEXT: retq 55 %frem = frem double %a0, %a1 56 store double %frem, ptr%p3 57 ret void 58} 59 60define void @frem_f80(x86_fp80 %a0, x86_fp80 %a1, ptr%p3) nounwind { 61; CHECK-LABEL: frem_f80: 62; CHECK: # %bb.0: 63; CHECK-NEXT: pushq %rbx 64; CHECK-NEXT: subq $32, %rsp 65; CHECK-NEXT: movq %rdi, %rbx 66; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 67; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 68; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) 69; CHECK-NEXT: fstpt (%rsp) 70; CHECK-NEXT: callq fmodl@PLT 71; CHECK-NEXT: fstpt (%rbx) 72; CHECK-NEXT: addq $32, %rsp 73; CHECK-NEXT: popq %rbx 74; CHECK-NEXT: retq 75 %frem = frem x86_fp80 %a0, %a1 76 store x86_fp80 %frem, ptr%p3 77 ret void 78} 79 80define void @frem_f128(fp128 %a0, fp128 %a1, ptr%p3) nounwind { 81; CHECK-LABEL: frem_f128: 82; CHECK: # %bb.0: 83; CHECK-NEXT: pushq %rbx 84; CHECK-NEXT: movq %rdi, %rbx 85; CHECK-NEXT: callq fmodf128 86; CHECK-NEXT: vmovaps %xmm0, (%rbx) 87; CHECK-NEXT: popq %rbx 88; CHECK-NEXT: retq 89 %frem = frem fp128 %a0, %a1 90 store fp128 %frem, ptr%p3 91 ret void 92} 93 94define void @frem_v16f32(<16 x float> %a0, <16 x float> %a1, ptr%p3) nounwind { 95; CHECK-LABEL: frem_v16f32: 96; CHECK: # %bb.0: 97; CHECK-NEXT: pushq %rbx 98; CHECK-NEXT: subq $160, %rsp 99; CHECK-NEXT: movq %rdi, %rbx 100; CHECK-NEXT: vmovups %ymm3, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 101; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 102; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 103; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 104; CHECK-NEXT: vmovaps %xmm1, %xmm0 105; CHECK-NEXT: vmovaps %xmm3, %xmm1 106; CHECK-NEXT: vzeroupper 107; CHECK-NEXT: callq fmodf@PLT 108; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 109; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 110; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 111; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 112; CHECK-NEXT: # xmm1 = mem[1,1,3,3] 113; CHECK-NEXT: callq fmodf@PLT 114; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 115; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 116; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 117; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 118; CHECK-NEXT: # xmm0 = mem[1,0] 119; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 120; CHECK-NEXT: # xmm1 = mem[1,0] 121; CHECK-NEXT: callq fmodf@PLT 122; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 123; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 124; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 125; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 126; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 127; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 128; CHECK-NEXT: # xmm1 = mem[3,3,3,3] 129; CHECK-NEXT: callq fmodf@PLT 130; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 131; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 132; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 133; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 134; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 135; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 136; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 137; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 138; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 139; CHECK-NEXT: vmovaps %xmm2, %xmm0 140; CHECK-NEXT: vzeroupper 141; CHECK-NEXT: callq fmodf@PLT 142; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 143; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 144; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 145; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 146; CHECK-NEXT: # xmm1 = mem[1,1,3,3] 147; CHECK-NEXT: callq fmodf@PLT 148; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 149; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 150; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 151; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 152; CHECK-NEXT: # xmm0 = mem[1,0] 153; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 154; CHECK-NEXT: # xmm1 = mem[1,0] 155; CHECK-NEXT: callq fmodf@PLT 156; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 157; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 158; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 159; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 160; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 161; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 162; CHECK-NEXT: # xmm1 = mem[3,3,3,3] 163; CHECK-NEXT: callq fmodf@PLT 164; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 165; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 166; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 167; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 168; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 169; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload 170; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 killed $ymm1 171; CHECK-NEXT: vzeroupper 172; CHECK-NEXT: callq fmodf@PLT 173; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 174; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 175; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 176; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 177; CHECK-NEXT: # xmm1 = mem[1,1,3,3] 178; CHECK-NEXT: callq fmodf@PLT 179; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 180; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 181; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 182; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 183; CHECK-NEXT: # xmm0 = mem[1,0] 184; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 185; CHECK-NEXT: # xmm1 = mem[1,0] 186; CHECK-NEXT: callq fmodf@PLT 187; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 188; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 189; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 190; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 191; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 192; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 193; CHECK-NEXT: # xmm1 = mem[3,3,3,3] 194; CHECK-NEXT: callq fmodf@PLT 195; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 196; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 197; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 198; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 199; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 200; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 201; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 202; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 203; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 204; CHECK-NEXT: vmovaps %xmm2, %xmm0 205; CHECK-NEXT: vzeroupper 206; CHECK-NEXT: callq fmodf@PLT 207; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 208; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 209; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 210; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 211; CHECK-NEXT: # xmm1 = mem[1,1,3,3] 212; CHECK-NEXT: callq fmodf@PLT 213; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 214; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 215; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 216; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 217; CHECK-NEXT: # xmm0 = mem[1,0] 218; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 219; CHECK-NEXT: # xmm1 = mem[1,0] 220; CHECK-NEXT: callq fmodf@PLT 221; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 222; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 223; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 224; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 225; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 226; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 227; CHECK-NEXT: # xmm1 = mem[3,3,3,3] 228; CHECK-NEXT: callq fmodf@PLT 229; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 230; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 231; CHECK-NEXT: vmovaps %xmm0, 16(%rbx) 232; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 233; CHECK-NEXT: vmovaps %xmm0, (%rbx) 234; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 235; CHECK-NEXT: vmovaps %xmm0, 48(%rbx) 236; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 237; CHECK-NEXT: vmovaps %xmm0, 32(%rbx) 238; CHECK-NEXT: addq $160, %rsp 239; CHECK-NEXT: popq %rbx 240; CHECK-NEXT: retq 241 %frem = frem <16 x float> %a0, %a1 242 store <16 x float> %frem, ptr%p3 243 ret void 244} 245 246define void @frem_v8f32(<8 x float> %a0, <8 x float> %a1, ptr%p3) nounwind { 247; CHECK-LABEL: frem_v8f32: 248; CHECK: # %bb.0: 249; CHECK-NEXT: pushq %rbx 250; CHECK-NEXT: subq $96, %rsp 251; CHECK-NEXT: movq %rdi, %rbx 252; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 253; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 254; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 255; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 killed $ymm1 256; CHECK-NEXT: vzeroupper 257; CHECK-NEXT: callq fmodf@PLT 258; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 259; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 260; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 261; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 262; CHECK-NEXT: # xmm1 = mem[1,1,3,3] 263; CHECK-NEXT: callq fmodf@PLT 264; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 265; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 266; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 267; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 268; CHECK-NEXT: # xmm0 = mem[1,0] 269; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 270; CHECK-NEXT: # xmm1 = mem[1,0] 271; CHECK-NEXT: callq fmodf@PLT 272; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 273; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 274; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 275; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 276; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 277; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 278; CHECK-NEXT: # xmm1 = mem[3,3,3,3] 279; CHECK-NEXT: callq fmodf@PLT 280; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 281; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 282; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 283; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 284; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 285; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 286; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 287; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 288; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 289; CHECK-NEXT: vmovaps %xmm2, %xmm0 290; CHECK-NEXT: vzeroupper 291; CHECK-NEXT: callq fmodf@PLT 292; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 293; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 294; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 295; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 296; CHECK-NEXT: # xmm1 = mem[1,1,3,3] 297; CHECK-NEXT: callq fmodf@PLT 298; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 299; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 300; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 301; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 302; CHECK-NEXT: # xmm0 = mem[1,0] 303; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 304; CHECK-NEXT: # xmm1 = mem[1,0] 305; CHECK-NEXT: callq fmodf@PLT 306; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 307; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 308; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 309; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 310; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 311; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 312; CHECK-NEXT: # xmm1 = mem[3,3,3,3] 313; CHECK-NEXT: callq fmodf@PLT 314; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 315; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 316; CHECK-NEXT: vmovaps %xmm0, 16(%rbx) 317; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 318; CHECK-NEXT: vmovaps %xmm0, (%rbx) 319; CHECK-NEXT: addq $96, %rsp 320; CHECK-NEXT: popq %rbx 321; CHECK-NEXT: retq 322 %frem = frem <8 x float> %a0, %a1 323 store <8 x float> %frem, ptr%p3 324 ret void 325} 326 327define void @frem_v4f32(<4 x float> %a0, <4 x float> %a1, ptr%p3) nounwind { 328; CHECK-LABEL: frem_v4f32: 329; CHECK: # %bb.0: 330; CHECK-NEXT: pushq %rbx 331; CHECK-NEXT: subq $48, %rsp 332; CHECK-NEXT: movq %rdi, %rbx 333; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 334; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 335; CHECK-NEXT: callq fmodf@PLT 336; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 337; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 338; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 339; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 340; CHECK-NEXT: # xmm1 = mem[1,1,3,3] 341; CHECK-NEXT: callq fmodf@PLT 342; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 343; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 344; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 345; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 346; CHECK-NEXT: # xmm0 = mem[1,0] 347; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 348; CHECK-NEXT: # xmm1 = mem[1,0] 349; CHECK-NEXT: callq fmodf@PLT 350; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 351; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 352; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 353; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 354; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 355; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 356; CHECK-NEXT: # xmm1 = mem[3,3,3,3] 357; CHECK-NEXT: callq fmodf@PLT 358; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 359; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 360; CHECK-NEXT: vmovaps %xmm0, (%rbx) 361; CHECK-NEXT: addq $48, %rsp 362; CHECK-NEXT: popq %rbx 363; CHECK-NEXT: retq 364 %frem = frem <4 x float> %a0, %a1 365 store <4 x float> %frem, ptr%p3 366 ret void 367} 368 369define void @frem_v8f64(<8 x double> %a0, <8 x double> %a1, ptr%p3) nounwind { 370; CHECK-LABEL: frem_v8f64: 371; CHECK: # %bb.0: 372; CHECK-NEXT: pushq %rbx 373; CHECK-NEXT: subq $160, %rsp 374; CHECK-NEXT: movq %rdi, %rbx 375; CHECK-NEXT: vmovups %ymm3, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 376; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 377; CHECK-NEXT: vmovups %ymm1, (%rsp) # 32-byte Spill 378; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 379; CHECK-NEXT: vmovaps %xmm1, %xmm0 380; CHECK-NEXT: vmovaps %xmm3, %xmm1 381; CHECK-NEXT: vzeroupper 382; CHECK-NEXT: callq fmod@PLT 383; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 384; CHECK-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload 385; CHECK-NEXT: # xmm0 = mem[1,0] 386; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 387; CHECK-NEXT: # xmm1 = mem[1,0] 388; CHECK-NEXT: callq fmod@PLT 389; CHECK-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 390; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 391; CHECK-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 392; CHECK-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 393; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 394; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 395; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 396; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 397; CHECK-NEXT: vmovaps %xmm1, (%rsp) # 16-byte Spill 398; CHECK-NEXT: vmovaps %xmm2, %xmm0 399; CHECK-NEXT: vzeroupper 400; CHECK-NEXT: callq fmod@PLT 401; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 402; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 403; CHECK-NEXT: # xmm0 = mem[1,0] 404; CHECK-NEXT: vpermilpd $1, (%rsp), %xmm1 # 16-byte Folded Reload 405; CHECK-NEXT: # xmm1 = mem[1,0] 406; CHECK-NEXT: callq fmod@PLT 407; CHECK-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 408; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 409; CHECK-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 410; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 411; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 412; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload 413; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 killed $ymm1 414; CHECK-NEXT: vzeroupper 415; CHECK-NEXT: callq fmod@PLT 416; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 417; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 418; CHECK-NEXT: # xmm0 = mem[1,0] 419; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 420; CHECK-NEXT: # xmm1 = mem[1,0] 421; CHECK-NEXT: callq fmod@PLT 422; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload 423; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 424; CHECK-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 425; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 426; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 427; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 428; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 429; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 430; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 431; CHECK-NEXT: vmovaps %xmm2, %xmm0 432; CHECK-NEXT: vzeroupper 433; CHECK-NEXT: callq fmod@PLT 434; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 435; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 436; CHECK-NEXT: # xmm0 = mem[1,0] 437; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 438; CHECK-NEXT: # xmm1 = mem[1,0] 439; CHECK-NEXT: callq fmod@PLT 440; CHECK-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 441; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 442; CHECK-NEXT: vmovapd %xmm0, 16(%rbx) 443; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 444; CHECK-NEXT: vmovaps %xmm0, (%rbx) 445; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 446; CHECK-NEXT: vmovaps %xmm0, 48(%rbx) 447; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 448; CHECK-NEXT: vmovaps %xmm0, 32(%rbx) 449; CHECK-NEXT: addq $160, %rsp 450; CHECK-NEXT: popq %rbx 451; CHECK-NEXT: retq 452 %frem = frem <8 x double> %a0, %a1 453 store <8 x double> %frem, ptr%p3 454 ret void 455} 456 457define void @frem_v4f64(<4 x double> %a0, <4 x double> %a1, ptr%p3) nounwind { 458; CHECK-LABEL: frem_v4f64: 459; CHECK: # %bb.0: 460; CHECK-NEXT: pushq %rbx 461; CHECK-NEXT: subq $96, %rsp 462; CHECK-NEXT: movq %rdi, %rbx 463; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 464; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 465; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 466; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 killed $ymm1 467; CHECK-NEXT: vzeroupper 468; CHECK-NEXT: callq fmod@PLT 469; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 470; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 471; CHECK-NEXT: # xmm0 = mem[1,0] 472; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 473; CHECK-NEXT: # xmm1 = mem[1,0] 474; CHECK-NEXT: callq fmod@PLT 475; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload 476; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 477; CHECK-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 478; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 479; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 480; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 481; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 482; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 483; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 484; CHECK-NEXT: vmovaps %xmm2, %xmm0 485; CHECK-NEXT: vzeroupper 486; CHECK-NEXT: callq fmod@PLT 487; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 488; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 489; CHECK-NEXT: # xmm0 = mem[1,0] 490; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 491; CHECK-NEXT: # xmm1 = mem[1,0] 492; CHECK-NEXT: callq fmod@PLT 493; CHECK-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 494; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 495; CHECK-NEXT: vmovapd %xmm0, 16(%rbx) 496; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 497; CHECK-NEXT: vmovaps %xmm0, (%rbx) 498; CHECK-NEXT: addq $96, %rsp 499; CHECK-NEXT: popq %rbx 500; CHECK-NEXT: retq 501 %frem = frem <4 x double> %a0, %a1 502 store <4 x double> %frem, ptr%p3 503 ret void 504} 505 506define void @frem_v2f64(<2 x double> %a0, <2 x double> %a1, ptr%p3) nounwind { 507; CHECK-LABEL: frem_v2f64: 508; CHECK: # %bb.0: 509; CHECK-NEXT: pushq %rbx 510; CHECK-NEXT: subq $48, %rsp 511; CHECK-NEXT: movq %rdi, %rbx 512; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 513; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 514; CHECK-NEXT: callq fmod@PLT 515; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 516; CHECK-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload 517; CHECK-NEXT: # xmm0 = mem[1,0] 518; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 519; CHECK-NEXT: # xmm1 = mem[1,0] 520; CHECK-NEXT: callq fmod@PLT 521; CHECK-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 522; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 523; CHECK-NEXT: vmovapd %xmm0, (%rbx) 524; CHECK-NEXT: addq $48, %rsp 525; CHECK-NEXT: popq %rbx 526; CHECK-NEXT: retq 527 %frem = frem <2 x double> %a0, %a1 528 store <2 x double> %frem, ptr%p3 529 ret void 530} 531 532define void @frem_v32f16(<32 x half> %a0, <32 x half> %a1, ptr%p3) nounwind { 533; CHECK-LABEL: frem_v32f16: 534; CHECK: # %bb.0: 535; CHECK-NEXT: pushq %rbx 536; CHECK-NEXT: subq $224, %rsp 537; CHECK-NEXT: movq %rdi, %rbx 538; CHECK-NEXT: vmovups %ymm3, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 539; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 540; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 541; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 542; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm0 543; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 544; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 545; CHECK-NEXT: vzeroupper 546; CHECK-NEXT: callq __extendhfsf2@PLT 547; CHECK-NEXT: vmovd %xmm0, (%rsp) # 4-byte Folded Spill 548; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 549; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 550; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 551; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 552; CHECK-NEXT: vzeroupper 553; CHECK-NEXT: callq __extendhfsf2@PLT 554; CHECK-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload 555; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 556; CHECK-NEXT: callq fmodf@PLT 557; CHECK-NEXT: callq __truncsfhf2@PLT 558; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 559; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 560; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 561; CHECK-NEXT: callq __extendhfsf2@PLT 562; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 563; CHECK-NEXT: vpshufd $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 564; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 565; CHECK-NEXT: callq __extendhfsf2@PLT 566; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 567; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 568; CHECK-NEXT: callq fmodf@PLT 569; CHECK-NEXT: callq __truncsfhf2@PLT 570; CHECK-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 571; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 572; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 573; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 574; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 575; CHECK-NEXT: callq __extendhfsf2@PLT 576; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 577; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 578; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 579; CHECK-NEXT: callq __extendhfsf2@PLT 580; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 581; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 582; CHECK-NEXT: callq fmodf@PLT 583; CHECK-NEXT: callq __truncsfhf2@PLT 584; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 585; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 586; CHECK-NEXT: # xmm0 = mem[1,0] 587; CHECK-NEXT: callq __extendhfsf2@PLT 588; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 589; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 590; CHECK-NEXT: # xmm0 = mem[1,0] 591; CHECK-NEXT: callq __extendhfsf2@PLT 592; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 593; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 594; CHECK-NEXT: callq fmodf@PLT 595; CHECK-NEXT: callq __truncsfhf2@PLT 596; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 597; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 598; CHECK-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 599; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 600; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 601; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 602; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm0 603; CHECK-NEXT: callq __extendhfsf2@PLT 604; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 605; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 606; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm0 607; CHECK-NEXT: callq __extendhfsf2@PLT 608; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 609; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 610; CHECK-NEXT: callq fmodf@PLT 611; CHECK-NEXT: callq __truncsfhf2@PLT 612; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 613; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 614; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 615; CHECK-NEXT: callq __extendhfsf2@PLT 616; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 617; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 618; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 619; CHECK-NEXT: callq __extendhfsf2@PLT 620; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 621; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 622; CHECK-NEXT: callq fmodf@PLT 623; CHECK-NEXT: callq __truncsfhf2@PLT 624; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 625; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 626; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 627; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 628; CHECK-NEXT: callq __extendhfsf2@PLT 629; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 630; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 631; CHECK-NEXT: callq __extendhfsf2@PLT 632; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 633; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 634; CHECK-NEXT: callq fmodf@PLT 635; CHECK-NEXT: callq __truncsfhf2@PLT 636; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 637; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 638; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0 639; CHECK-NEXT: callq __extendhfsf2@PLT 640; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 641; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 642; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0 643; CHECK-NEXT: callq __extendhfsf2@PLT 644; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 645; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 646; CHECK-NEXT: callq fmodf@PLT 647; CHECK-NEXT: callq __truncsfhf2@PLT 648; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 649; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 650; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 651; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 652; CHECK-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 653; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 654; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 655; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 656; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 657; CHECK-NEXT: vzeroupper 658; CHECK-NEXT: callq __extendhfsf2@PLT 659; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 660; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 661; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 662; CHECK-NEXT: vzeroupper 663; CHECK-NEXT: callq __extendhfsf2@PLT 664; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 665; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 666; CHECK-NEXT: callq fmodf@PLT 667; CHECK-NEXT: callq __truncsfhf2@PLT 668; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 669; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 670; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 671; CHECK-NEXT: callq __extendhfsf2@PLT 672; CHECK-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill 673; CHECK-NEXT: vpshufd $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 674; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 675; CHECK-NEXT: callq __extendhfsf2@PLT 676; CHECK-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload 677; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 678; CHECK-NEXT: callq fmodf@PLT 679; CHECK-NEXT: callq __truncsfhf2@PLT 680; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 681; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 682; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 683; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 684; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 685; CHECK-NEXT: vzeroupper 686; CHECK-NEXT: callq __extendhfsf2@PLT 687; CHECK-NEXT: vmovd %xmm0, (%rsp) # 4-byte Folded Spill 688; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 689; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 690; CHECK-NEXT: vzeroupper 691; CHECK-NEXT: callq __extendhfsf2@PLT 692; CHECK-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload 693; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 694; CHECK-NEXT: callq fmodf@PLT 695; CHECK-NEXT: callq __truncsfhf2@PLT 696; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 697; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 698; CHECK-NEXT: # xmm0 = mem[1,0] 699; CHECK-NEXT: callq __extendhfsf2@PLT 700; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 701; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 702; CHECK-NEXT: # xmm0 = mem[1,0] 703; CHECK-NEXT: callq __extendhfsf2@PLT 704; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 705; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 706; CHECK-NEXT: callq fmodf@PLT 707; CHECK-NEXT: callq __truncsfhf2@PLT 708; CHECK-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 709; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 710; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 711; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 712; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 713; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 714; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm0 715; CHECK-NEXT: vzeroupper 716; CHECK-NEXT: callq __extendhfsf2@PLT 717; CHECK-NEXT: vmovd %xmm0, (%rsp) # 4-byte Folded Spill 718; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 719; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm0 720; CHECK-NEXT: vzeroupper 721; CHECK-NEXT: callq __extendhfsf2@PLT 722; CHECK-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload 723; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 724; CHECK-NEXT: callq fmodf@PLT 725; CHECK-NEXT: callq __truncsfhf2@PLT 726; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 727; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 728; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 729; CHECK-NEXT: callq __extendhfsf2@PLT 730; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 731; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 732; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 733; CHECK-NEXT: callq __extendhfsf2@PLT 734; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 735; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 736; CHECK-NEXT: callq fmodf@PLT 737; CHECK-NEXT: callq __truncsfhf2@PLT 738; CHECK-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 739; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 740; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 741; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 742; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 743; CHECK-NEXT: vzeroupper 744; CHECK-NEXT: callq __extendhfsf2@PLT 745; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 746; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 747; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 748; CHECK-NEXT: vzeroupper 749; CHECK-NEXT: callq __extendhfsf2@PLT 750; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 751; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 752; CHECK-NEXT: callq fmodf@PLT 753; CHECK-NEXT: callq __truncsfhf2@PLT 754; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 755; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 756; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0 757; CHECK-NEXT: vzeroupper 758; CHECK-NEXT: callq __extendhfsf2@PLT 759; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 760; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 761; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0 762; CHECK-NEXT: vzeroupper 763; CHECK-NEXT: callq __extendhfsf2@PLT 764; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 765; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 766; CHECK-NEXT: callq fmodf@PLT 767; CHECK-NEXT: callq __truncsfhf2@PLT 768; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 769; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 770; CHECK-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 771; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 772; CHECK-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 773; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 774; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 775; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 776; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 777; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 778; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 779; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 780; CHECK-NEXT: vzeroupper 781; CHECK-NEXT: callq __extendhfsf2@PLT 782; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 783; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 784; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 785; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 786; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 787; CHECK-NEXT: vzeroupper 788; CHECK-NEXT: callq __extendhfsf2@PLT 789; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 790; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 791; CHECK-NEXT: callq fmodf@PLT 792; CHECK-NEXT: callq __truncsfhf2@PLT 793; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 794; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 795; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 796; CHECK-NEXT: callq __extendhfsf2@PLT 797; CHECK-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill 798; CHECK-NEXT: vpshufd $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 799; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 800; CHECK-NEXT: callq __extendhfsf2@PLT 801; CHECK-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload 802; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 803; CHECK-NEXT: callq fmodf@PLT 804; CHECK-NEXT: callq __truncsfhf2@PLT 805; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 806; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 807; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 808; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 809; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 810; CHECK-NEXT: callq __extendhfsf2@PLT 811; CHECK-NEXT: vmovd %xmm0, (%rsp) # 4-byte Folded Spill 812; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 813; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 814; CHECK-NEXT: callq __extendhfsf2@PLT 815; CHECK-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload 816; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 817; CHECK-NEXT: callq fmodf@PLT 818; CHECK-NEXT: callq __truncsfhf2@PLT 819; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 820; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 821; CHECK-NEXT: # xmm0 = mem[1,0] 822; CHECK-NEXT: callq __extendhfsf2@PLT 823; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 824; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 825; CHECK-NEXT: # xmm0 = mem[1,0] 826; CHECK-NEXT: callq __extendhfsf2@PLT 827; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 828; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 829; CHECK-NEXT: callq fmodf@PLT 830; CHECK-NEXT: callq __truncsfhf2@PLT 831; CHECK-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 832; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 833; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 834; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 835; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 836; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 837; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm0 838; CHECK-NEXT: callq __extendhfsf2@PLT 839; CHECK-NEXT: vmovd %xmm0, (%rsp) # 4-byte Folded Spill 840; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 841; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm0 842; CHECK-NEXT: callq __extendhfsf2@PLT 843; CHECK-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload 844; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 845; CHECK-NEXT: callq fmodf@PLT 846; CHECK-NEXT: callq __truncsfhf2@PLT 847; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 848; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 849; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 850; CHECK-NEXT: callq __extendhfsf2@PLT 851; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 852; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 853; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 854; CHECK-NEXT: callq __extendhfsf2@PLT 855; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 856; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 857; CHECK-NEXT: callq fmodf@PLT 858; CHECK-NEXT: callq __truncsfhf2@PLT 859; CHECK-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 860; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 861; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 862; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 863; CHECK-NEXT: callq __extendhfsf2@PLT 864; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 865; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 866; CHECK-NEXT: callq __extendhfsf2@PLT 867; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 868; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 869; CHECK-NEXT: callq fmodf@PLT 870; CHECK-NEXT: callq __truncsfhf2@PLT 871; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 872; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 873; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0 874; CHECK-NEXT: callq __extendhfsf2@PLT 875; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 876; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 877; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0 878; CHECK-NEXT: callq __extendhfsf2@PLT 879; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 880; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 881; CHECK-NEXT: callq fmodf@PLT 882; CHECK-NEXT: callq __truncsfhf2@PLT 883; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 884; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 885; CHECK-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 886; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 887; CHECK-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 888; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 889; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 890; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 891; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 892; CHECK-NEXT: vzeroupper 893; CHECK-NEXT: callq __extendhfsf2@PLT 894; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 895; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 896; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 897; CHECK-NEXT: vzeroupper 898; CHECK-NEXT: callq __extendhfsf2@PLT 899; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 900; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 901; CHECK-NEXT: callq fmodf@PLT 902; CHECK-NEXT: callq __truncsfhf2@PLT 903; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 904; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 905; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 906; CHECK-NEXT: callq __extendhfsf2@PLT 907; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 908; CHECK-NEXT: vpshufd $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 909; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 910; CHECK-NEXT: callq __extendhfsf2@PLT 911; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 912; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 913; CHECK-NEXT: callq fmodf@PLT 914; CHECK-NEXT: callq __truncsfhf2@PLT 915; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 916; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 917; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 918; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 919; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 920; CHECK-NEXT: vzeroupper 921; CHECK-NEXT: callq __extendhfsf2@PLT 922; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 923; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 924; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 925; CHECK-NEXT: vzeroupper 926; CHECK-NEXT: callq __extendhfsf2@PLT 927; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 928; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 929; CHECK-NEXT: callq fmodf@PLT 930; CHECK-NEXT: callq __truncsfhf2@PLT 931; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 932; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 933; CHECK-NEXT: # xmm0 = mem[1,0] 934; CHECK-NEXT: callq __extendhfsf2@PLT 935; CHECK-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill 936; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 937; CHECK-NEXT: # xmm0 = mem[1,0] 938; CHECK-NEXT: callq __extendhfsf2@PLT 939; CHECK-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload 940; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 941; CHECK-NEXT: callq fmodf@PLT 942; CHECK-NEXT: callq __truncsfhf2@PLT 943; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 944; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 945; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 946; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 947; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 948; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 949; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm0 950; CHECK-NEXT: vzeroupper 951; CHECK-NEXT: callq __extendhfsf2@PLT 952; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 953; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 954; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm0 955; CHECK-NEXT: vzeroupper 956; CHECK-NEXT: callq __extendhfsf2@PLT 957; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 958; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 959; CHECK-NEXT: callq fmodf@PLT 960; CHECK-NEXT: callq __truncsfhf2@PLT 961; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 962; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 963; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 964; CHECK-NEXT: callq __extendhfsf2@PLT 965; CHECK-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill 966; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 967; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 968; CHECK-NEXT: callq __extendhfsf2@PLT 969; CHECK-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload 970; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 971; CHECK-NEXT: callq fmodf@PLT 972; CHECK-NEXT: callq __truncsfhf2@PLT 973; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 974; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 975; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 976; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 977; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 978; CHECK-NEXT: vzeroupper 979; CHECK-NEXT: callq __extendhfsf2@PLT 980; CHECK-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill 981; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 982; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 983; CHECK-NEXT: vzeroupper 984; CHECK-NEXT: callq __extendhfsf2@PLT 985; CHECK-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload 986; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 987; CHECK-NEXT: callq fmodf@PLT 988; CHECK-NEXT: callq __truncsfhf2@PLT 989; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 990; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 991; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0 992; CHECK-NEXT: vzeroupper 993; CHECK-NEXT: callq __extendhfsf2@PLT 994; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 995; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 996; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0 997; CHECK-NEXT: vzeroupper 998; CHECK-NEXT: callq __extendhfsf2@PLT 999; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1000; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1001; CHECK-NEXT: callq fmodf@PLT 1002; CHECK-NEXT: callq __truncsfhf2@PLT 1003; CHECK-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload 1004; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1005; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1006; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 1007; CHECK-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1008; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 1009; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 1010; CHECK-NEXT: vmovaps %ymm0, 32(%rbx) 1011; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1012; CHECK-NEXT: vmovaps %ymm0, (%rbx) 1013; CHECK-NEXT: addq $224, %rsp 1014; CHECK-NEXT: popq %rbx 1015; CHECK-NEXT: vzeroupper 1016; CHECK-NEXT: retq 1017 %frem = frem <32 x half> %a0, %a1 1018 store <32 x half> %frem, ptr%p3 1019 ret void 1020} 1021 1022define void @frem_v16f16(<16 x half> %a0, <16 x half> %a1, ptr%p3) nounwind { 1023; CHECK-LABEL: frem_v16f16: 1024; CHECK: # %bb.0: 1025; CHECK-NEXT: pushq %rbx 1026; CHECK-NEXT: subq $144, %rsp 1027; CHECK-NEXT: movq %rdi, %rbx 1028; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 1029; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 1030; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm0 1031; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1032; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1033; CHECK-NEXT: vzeroupper 1034; CHECK-NEXT: callq __extendhfsf2@PLT 1035; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 1036; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1037; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 1038; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1039; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1040; CHECK-NEXT: vzeroupper 1041; CHECK-NEXT: callq __extendhfsf2@PLT 1042; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1043; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1044; CHECK-NEXT: callq fmodf@PLT 1045; CHECK-NEXT: callq __truncsfhf2@PLT 1046; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1047; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1048; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1049; CHECK-NEXT: callq __extendhfsf2@PLT 1050; CHECK-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill 1051; CHECK-NEXT: vpshufd $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1052; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1053; CHECK-NEXT: callq __extendhfsf2@PLT 1054; CHECK-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload 1055; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1056; CHECK-NEXT: callq fmodf@PLT 1057; CHECK-NEXT: callq __truncsfhf2@PLT 1058; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1059; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1060; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1061; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1062; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1063; CHECK-NEXT: callq __extendhfsf2@PLT 1064; CHECK-NEXT: vmovd %xmm0, (%rsp) # 4-byte Folded Spill 1065; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1066; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1067; CHECK-NEXT: callq __extendhfsf2@PLT 1068; CHECK-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload 1069; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1070; CHECK-NEXT: callq fmodf@PLT 1071; CHECK-NEXT: callq __truncsfhf2@PLT 1072; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 1073; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1074; CHECK-NEXT: # xmm0 = mem[1,0] 1075; CHECK-NEXT: callq __extendhfsf2@PLT 1076; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1077; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1078; CHECK-NEXT: # xmm0 = mem[1,0] 1079; CHECK-NEXT: callq __extendhfsf2@PLT 1080; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1081; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1082; CHECK-NEXT: callq fmodf@PLT 1083; CHECK-NEXT: callq __truncsfhf2@PLT 1084; CHECK-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1085; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1086; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1087; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 1088; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1089; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1090; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm0 1091; CHECK-NEXT: callq __extendhfsf2@PLT 1092; CHECK-NEXT: vmovd %xmm0, (%rsp) # 4-byte Folded Spill 1093; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1094; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm0 1095; CHECK-NEXT: callq __extendhfsf2@PLT 1096; CHECK-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload 1097; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1098; CHECK-NEXT: callq fmodf@PLT 1099; CHECK-NEXT: callq __truncsfhf2@PLT 1100; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 1101; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1102; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1103; CHECK-NEXT: callq __extendhfsf2@PLT 1104; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1105; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1106; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1107; CHECK-NEXT: callq __extendhfsf2@PLT 1108; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1109; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1110; CHECK-NEXT: callq fmodf@PLT 1111; CHECK-NEXT: callq __truncsfhf2@PLT 1112; CHECK-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1113; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1114; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 1115; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1116; CHECK-NEXT: callq __extendhfsf2@PLT 1117; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1118; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1119; CHECK-NEXT: callq __extendhfsf2@PLT 1120; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1121; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1122; CHECK-NEXT: callq fmodf@PLT 1123; CHECK-NEXT: callq __truncsfhf2@PLT 1124; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1125; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1126; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0 1127; CHECK-NEXT: callq __extendhfsf2@PLT 1128; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 1129; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1130; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0 1131; CHECK-NEXT: callq __extendhfsf2@PLT 1132; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1133; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1134; CHECK-NEXT: callq fmodf@PLT 1135; CHECK-NEXT: callq __truncsfhf2@PLT 1136; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1137; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1138; CHECK-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1139; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 1140; CHECK-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1141; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 1142; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1143; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1144; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1145; CHECK-NEXT: vzeroupper 1146; CHECK-NEXT: callq __extendhfsf2@PLT 1147; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 1148; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1149; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1150; CHECK-NEXT: vzeroupper 1151; CHECK-NEXT: callq __extendhfsf2@PLT 1152; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1153; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1154; CHECK-NEXT: callq fmodf@PLT 1155; CHECK-NEXT: callq __truncsfhf2@PLT 1156; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1157; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1158; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1159; CHECK-NEXT: callq __extendhfsf2@PLT 1160; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1161; CHECK-NEXT: vpshufd $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1162; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1163; CHECK-NEXT: callq __extendhfsf2@PLT 1164; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1165; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1166; CHECK-NEXT: callq fmodf@PLT 1167; CHECK-NEXT: callq __truncsfhf2@PLT 1168; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1169; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1170; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1171; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1172; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1173; CHECK-NEXT: vzeroupper 1174; CHECK-NEXT: callq __extendhfsf2@PLT 1175; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 1176; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1177; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1178; CHECK-NEXT: vzeroupper 1179; CHECK-NEXT: callq __extendhfsf2@PLT 1180; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1181; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1182; CHECK-NEXT: callq fmodf@PLT 1183; CHECK-NEXT: callq __truncsfhf2@PLT 1184; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1185; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1186; CHECK-NEXT: # xmm0 = mem[1,0] 1187; CHECK-NEXT: callq __extendhfsf2@PLT 1188; CHECK-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill 1189; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1190; CHECK-NEXT: # xmm0 = mem[1,0] 1191; CHECK-NEXT: callq __extendhfsf2@PLT 1192; CHECK-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload 1193; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1194; CHECK-NEXT: callq fmodf@PLT 1195; CHECK-NEXT: callq __truncsfhf2@PLT 1196; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1197; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1198; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1199; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 1200; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1201; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1202; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm0 1203; CHECK-NEXT: vzeroupper 1204; CHECK-NEXT: callq __extendhfsf2@PLT 1205; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 1206; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1207; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm0 1208; CHECK-NEXT: vzeroupper 1209; CHECK-NEXT: callq __extendhfsf2@PLT 1210; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1211; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1212; CHECK-NEXT: callq fmodf@PLT 1213; CHECK-NEXT: callq __truncsfhf2@PLT 1214; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1215; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1216; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1217; CHECK-NEXT: callq __extendhfsf2@PLT 1218; CHECK-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill 1219; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1220; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1221; CHECK-NEXT: callq __extendhfsf2@PLT 1222; CHECK-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload 1223; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1224; CHECK-NEXT: callq fmodf@PLT 1225; CHECK-NEXT: callq __truncsfhf2@PLT 1226; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1227; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1228; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1229; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1230; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1231; CHECK-NEXT: vzeroupper 1232; CHECK-NEXT: callq __extendhfsf2@PLT 1233; CHECK-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill 1234; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1235; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1236; CHECK-NEXT: vzeroupper 1237; CHECK-NEXT: callq __extendhfsf2@PLT 1238; CHECK-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload 1239; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1240; CHECK-NEXT: callq fmodf@PLT 1241; CHECK-NEXT: callq __truncsfhf2@PLT 1242; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1243; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1244; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0 1245; CHECK-NEXT: vzeroupper 1246; CHECK-NEXT: callq __extendhfsf2@PLT 1247; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 1248; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1249; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0 1250; CHECK-NEXT: vzeroupper 1251; CHECK-NEXT: callq __extendhfsf2@PLT 1252; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1253; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1254; CHECK-NEXT: callq fmodf@PLT 1255; CHECK-NEXT: callq __truncsfhf2@PLT 1256; CHECK-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload 1257; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1258; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1259; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 1260; CHECK-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1261; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 1262; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 1263; CHECK-NEXT: vmovaps %ymm0, (%rbx) 1264; CHECK-NEXT: addq $144, %rsp 1265; CHECK-NEXT: popq %rbx 1266; CHECK-NEXT: vzeroupper 1267; CHECK-NEXT: retq 1268 %frem = frem <16 x half> %a0, %a1 1269 store <16 x half> %frem, ptr%p3 1270 ret void 1271} 1272 1273define void @frem_v8f16(<8 x half> %a0, <8 x half> %a1, ptr%p3) nounwind { 1274; CHECK-LABEL: frem_v8f16: 1275; CHECK: # %bb.0: 1276; CHECK-NEXT: pushq %rbx 1277; CHECK-NEXT: subq $80, %rsp 1278; CHECK-NEXT: movq %rdi, %rbx 1279; CHECK-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1280; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1281; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1282; CHECK-NEXT: callq __extendhfsf2@PLT 1283; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 1284; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1285; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1286; CHECK-NEXT: callq __extendhfsf2@PLT 1287; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1288; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1289; CHECK-NEXT: callq fmodf@PLT 1290; CHECK-NEXT: callq __truncsfhf2@PLT 1291; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1292; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1293; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1294; CHECK-NEXT: callq __extendhfsf2@PLT 1295; CHECK-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill 1296; CHECK-NEXT: vpshufd $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1297; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1298; CHECK-NEXT: callq __extendhfsf2@PLT 1299; CHECK-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload 1300; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1301; CHECK-NEXT: callq fmodf@PLT 1302; CHECK-NEXT: callq __truncsfhf2@PLT 1303; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1304; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1305; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1306; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1307; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1308; CHECK-NEXT: callq __extendhfsf2@PLT 1309; CHECK-NEXT: vmovd %xmm0, (%rsp) # 4-byte Folded Spill 1310; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1311; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1312; CHECK-NEXT: callq __extendhfsf2@PLT 1313; CHECK-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload 1314; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1315; CHECK-NEXT: callq fmodf@PLT 1316; CHECK-NEXT: callq __truncsfhf2@PLT 1317; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 1318; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1319; CHECK-NEXT: # xmm0 = mem[1,0] 1320; CHECK-NEXT: callq __extendhfsf2@PLT 1321; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1322; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1323; CHECK-NEXT: # xmm0 = mem[1,0] 1324; CHECK-NEXT: callq __extendhfsf2@PLT 1325; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1326; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1327; CHECK-NEXT: callq fmodf@PLT 1328; CHECK-NEXT: callq __truncsfhf2@PLT 1329; CHECK-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1330; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1331; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1332; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 1333; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1334; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1335; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm0 1336; CHECK-NEXT: callq __extendhfsf2@PLT 1337; CHECK-NEXT: vmovd %xmm0, (%rsp) # 4-byte Folded Spill 1338; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1339; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm0 1340; CHECK-NEXT: callq __extendhfsf2@PLT 1341; CHECK-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload 1342; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1343; CHECK-NEXT: callq fmodf@PLT 1344; CHECK-NEXT: callq __truncsfhf2@PLT 1345; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 1346; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1347; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1348; CHECK-NEXT: callq __extendhfsf2@PLT 1349; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1350; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1351; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1352; CHECK-NEXT: callq __extendhfsf2@PLT 1353; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1354; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1355; CHECK-NEXT: callq fmodf@PLT 1356; CHECK-NEXT: callq __truncsfhf2@PLT 1357; CHECK-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1358; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1359; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 1360; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1361; CHECK-NEXT: callq __extendhfsf2@PLT 1362; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1363; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1364; CHECK-NEXT: callq __extendhfsf2@PLT 1365; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1366; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1367; CHECK-NEXT: callq fmodf@PLT 1368; CHECK-NEXT: callq __truncsfhf2@PLT 1369; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1370; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1371; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0 1372; CHECK-NEXT: callq __extendhfsf2@PLT 1373; CHECK-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 1374; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1375; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0 1376; CHECK-NEXT: callq __extendhfsf2@PLT 1377; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1378; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1379; CHECK-NEXT: callq fmodf@PLT 1380; CHECK-NEXT: callq __truncsfhf2@PLT 1381; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1382; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1383; CHECK-NEXT: vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1384; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 1385; CHECK-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1386; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 1387; CHECK-NEXT: vmovdqa %xmm0, (%rbx) 1388; CHECK-NEXT: addq $80, %rsp 1389; CHECK-NEXT: popq %rbx 1390; CHECK-NEXT: retq 1391 %frem = frem <8 x half> %a0, %a1 1392 store <8 x half> %frem, ptr%p3 1393 ret void 1394} 1395 1396define void @frem_v4f80(<4 x x86_fp80> %a0, <4 x x86_fp80> %a1, ptr%p3) nounwind { 1397; CHECK-LABEL: frem_v4f80: 1398; CHECK: # %bb.0: 1399; CHECK-NEXT: pushq %rbx 1400; CHECK-NEXT: subq $128, %rsp 1401; CHECK-NEXT: movq %rdi, %rbx 1402; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 1403; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill 1404; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 1405; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill 1406; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 1407; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill 1408; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 1409; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill 1410; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 1411; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill 1412; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 1413; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill 1414; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 1415; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 1416; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) 1417; CHECK-NEXT: fstpt (%rsp) 1418; CHECK-NEXT: callq fmodl@PLT 1419; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill 1420; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload 1421; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) 1422; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload 1423; CHECK-NEXT: fstpt (%rsp) 1424; CHECK-NEXT: callq fmodl@PLT 1425; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill 1426; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload 1427; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) 1428; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload 1429; CHECK-NEXT: fstpt (%rsp) 1430; CHECK-NEXT: callq fmodl@PLT 1431; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill 1432; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload 1433; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) 1434; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload 1435; CHECK-NEXT: fstpt (%rsp) 1436; CHECK-NEXT: callq fmodl@PLT 1437; CHECK-NEXT: fstpt 30(%rbx) 1438; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload 1439; CHECK-NEXT: fstpt 20(%rbx) 1440; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload 1441; CHECK-NEXT: fstpt 10(%rbx) 1442; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload 1443; CHECK-NEXT: fstpt (%rbx) 1444; CHECK-NEXT: addq $128, %rsp 1445; CHECK-NEXT: popq %rbx 1446; CHECK-NEXT: retq 1447 %frem = frem <4 x x86_fp80> %a0, %a1 1448 store <4 x x86_fp80> %frem, ptr%p3 1449 ret void 1450} 1451