1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 | FileCheck %s 3 4define half @frem(half %x, half %y) nounwind { 5; CHECK-LABEL: frem: 6; CHECK: # %bb.0: 7; CHECK-NEXT: pushq %rax 8; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 9; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 10; CHECK-NEXT: callq fmodf@PLT 11; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 12; CHECK-NEXT: popq %rax 13; CHECK-NEXT: retq 14 %r = frem half %x, %y 15 ret half %r 16} 17 18define <2 x half> @frem_vec2(<2 x half> %x, <2 x half> %y) nounwind { 19; CHECK-LABEL: frem_vec2: 20; CHECK: # %bb.0: 21; CHECK-NEXT: subq $88, %rsp 22; CHECK-NEXT: vmovapd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 23; CHECK-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 24; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 25; CHECK-NEXT: vcvtsh2ss %xmm2, %xmm2, %xmm0 26; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 27; CHECK-NEXT: vcvtsh2ss %xmm2, %xmm2, %xmm1 28; CHECK-NEXT: callq fmodf@PLT 29; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 30; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 31; CHECK-NEXT: vpermilps $255, (%rsp), %xmm0 # 16-byte Folded Reload 32; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 33; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 34; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 35; CHECK-NEXT: # xmm1 = mem[3,3,3,3] 36; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 37; CHECK-NEXT: callq fmodf@PLT 38; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 39; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 40; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 41; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 42; CHECK-NEXT: vpsrldq $10, (%rsp), %xmm0 # 16-byte Folded Reload 43; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 44; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 45; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 46; CHECK-NEXT: # xmm1 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 47; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 48; CHECK-NEXT: callq fmodf@PLT 49; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 50; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 51; CHECK-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload 52; CHECK-NEXT: # xmm0 = mem[1,0] 53; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 54; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 55; CHECK-NEXT: # xmm1 = mem[1,0] 56; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 57; CHECK-NEXT: callq fmodf@PLT 58; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 59; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 60; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 61; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 62; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 63; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 64; CHECK-NEXT: vpsrlq $48, (%rsp), %xmm0 # 16-byte Folded Reload 65; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 66; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 67; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 68; CHECK-NEXT: callq fmodf@PLT 69; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 70; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 71; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 72; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 73; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 74; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 75; CHECK-NEXT: # xmm1 = mem[1,1,3,3] 76; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 77; CHECK-NEXT: callq fmodf@PLT 78; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 79; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 80; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 81; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 82; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 83; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 84; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 85; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 86; CHECK-NEXT: callq fmodf@PLT 87; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 88; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 89; CHECK-NEXT: vpsrld $16, (%rsp), %xmm0 # 16-byte Folded Reload 90; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 91; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 92; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 93; CHECK-NEXT: callq fmodf@PLT 94; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 95; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 96; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 97; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 98; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 99; CHECK-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 100; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 101; CHECK-NEXT: addq $88, %rsp 102; CHECK-NEXT: retq 103 %r = frem <2 x half> %x, %y 104 ret <2 x half> %r 105} 106 107define <4 x half> @frem_vec4(<4 x half> %x, <4 x half> %y) nounwind { 108; CHECK-LABEL: frem_vec4: 109; CHECK: # %bb.0: 110; CHECK-NEXT: subq $88, %rsp 111; CHECK-NEXT: vmovapd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 112; CHECK-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 113; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 114; CHECK-NEXT: vcvtsh2ss %xmm2, %xmm2, %xmm0 115; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 116; CHECK-NEXT: vcvtsh2ss %xmm2, %xmm2, %xmm1 117; CHECK-NEXT: callq fmodf@PLT 118; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 119; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 120; CHECK-NEXT: vpermilps $255, (%rsp), %xmm0 # 16-byte Folded Reload 121; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 122; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 123; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 124; CHECK-NEXT: # xmm1 = mem[3,3,3,3] 125; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 126; CHECK-NEXT: callq fmodf@PLT 127; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 128; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 129; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 130; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 131; CHECK-NEXT: vpsrldq $10, (%rsp), %xmm0 # 16-byte Folded Reload 132; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 133; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 134; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 135; CHECK-NEXT: # xmm1 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 136; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 137; CHECK-NEXT: callq fmodf@PLT 138; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 139; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 140; CHECK-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload 141; CHECK-NEXT: # xmm0 = mem[1,0] 142; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 143; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 144; CHECK-NEXT: # xmm1 = mem[1,0] 145; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 146; CHECK-NEXT: callq fmodf@PLT 147; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 148; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 149; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 150; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 151; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 152; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 153; CHECK-NEXT: vpsrlq $48, (%rsp), %xmm0 # 16-byte Folded Reload 154; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 155; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 156; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 157; CHECK-NEXT: callq fmodf@PLT 158; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 159; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 160; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 161; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 162; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 163; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 164; CHECK-NEXT: # xmm1 = mem[1,1,3,3] 165; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 166; CHECK-NEXT: callq fmodf@PLT 167; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 168; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 169; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 170; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 171; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 172; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 173; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 174; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 175; CHECK-NEXT: callq fmodf@PLT 176; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 177; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 178; CHECK-NEXT: vpsrld $16, (%rsp), %xmm0 # 16-byte Folded Reload 179; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 180; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 181; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 182; CHECK-NEXT: callq fmodf@PLT 183; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 184; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 185; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 186; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 187; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 188; CHECK-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 189; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 190; CHECK-NEXT: addq $88, %rsp 191; CHECK-NEXT: retq 192 %r = frem <4 x half> %x, %y 193 ret <4 x half> %r 194} 195 196define <8 x half> @frem_vec8(<8 x half> %x, <8 x half> %y) nounwind { 197; CHECK-LABEL: frem_vec8: 198; CHECK: # %bb.0: 199; CHECK-NEXT: subq $88, %rsp 200; CHECK-NEXT: vmovapd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 201; CHECK-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 202; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 203; CHECK-NEXT: vcvtsh2ss %xmm2, %xmm2, %xmm0 204; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 205; CHECK-NEXT: vcvtsh2ss %xmm2, %xmm2, %xmm1 206; CHECK-NEXT: callq fmodf@PLT 207; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 208; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 209; CHECK-NEXT: vpermilps $255, (%rsp), %xmm0 # 16-byte Folded Reload 210; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 211; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 212; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 213; CHECK-NEXT: # xmm1 = mem[3,3,3,3] 214; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 215; CHECK-NEXT: callq fmodf@PLT 216; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 217; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 218; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 219; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 220; CHECK-NEXT: vpsrldq $10, (%rsp), %xmm0 # 16-byte Folded Reload 221; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 222; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 223; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 224; CHECK-NEXT: # xmm1 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 225; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 226; CHECK-NEXT: callq fmodf@PLT 227; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 228; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 229; CHECK-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload 230; CHECK-NEXT: # xmm0 = mem[1,0] 231; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 232; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 233; CHECK-NEXT: # xmm1 = mem[1,0] 234; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 235; CHECK-NEXT: callq fmodf@PLT 236; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 237; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 238; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 239; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 240; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 241; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 242; CHECK-NEXT: vpsrlq $48, (%rsp), %xmm0 # 16-byte Folded Reload 243; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 244; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 245; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 246; CHECK-NEXT: callq fmodf@PLT 247; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 248; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 249; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 250; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 251; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 252; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 253; CHECK-NEXT: # xmm1 = mem[1,1,3,3] 254; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 255; CHECK-NEXT: callq fmodf@PLT 256; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 257; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 258; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 259; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 260; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 261; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 262; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 263; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 264; CHECK-NEXT: callq fmodf@PLT 265; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 266; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 267; CHECK-NEXT: vpsrld $16, (%rsp), %xmm0 # 16-byte Folded Reload 268; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 269; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 270; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 271; CHECK-NEXT: callq fmodf@PLT 272; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 273; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 274; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 275; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 276; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 277; CHECK-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 278; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 279; CHECK-NEXT: addq $88, %rsp 280; CHECK-NEXT: retq 281 %r = frem <8 x half> %x, %y 282 ret <8 x half> %r 283} 284 285define <16 x half> @frem_vec16(<16 x half> %x, <16 x half> %y) nounwind { 286; CHECK-LABEL: frem_vec16: 287; CHECK: # %bb.0: 288; CHECK-NEXT: subq $184, %rsp 289; CHECK-NEXT: vmovupd %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 290; CHECK-NEXT: vmovupd %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 291; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 292; CHECK-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 293; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 294; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 295; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1 296; CHECK-NEXT: vmovapd %xmm1, (%rsp) # 16-byte Spill 297; CHECK-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 298; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 299; CHECK-NEXT: vzeroupper 300; CHECK-NEXT: callq fmodf@PLT 301; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 302; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 303; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 304; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 305; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 306; CHECK-NEXT: vpermilps $255, (%rsp), %xmm1 # 16-byte Folded Reload 307; CHECK-NEXT: # xmm1 = mem[3,3,3,3] 308; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 309; CHECK-NEXT: callq fmodf@PLT 310; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 311; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 312; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 313; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 314; CHECK-NEXT: vpsrldq $14, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 315; CHECK-NEXT: # xmm0 = mem[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 316; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 317; CHECK-NEXT: vpsrldq $14, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 318; CHECK-NEXT: # xmm1 = mem[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 319; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 320; CHECK-NEXT: callq fmodf@PLT 321; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 322; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 323; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 324; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 325; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 326; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 327; CHECK-NEXT: # xmm1 = mem[3,3,3,3] 328; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 329; CHECK-NEXT: callq fmodf@PLT 330; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 331; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 332; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 333; CHECK-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 334; CHECK-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 335; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 336; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 337; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 338; CHECK-NEXT: vpsrldq $10, (%rsp), %xmm1 # 16-byte Folded Reload 339; CHECK-NEXT: # xmm1 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 340; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 341; CHECK-NEXT: vzeroupper 342; CHECK-NEXT: callq fmodf@PLT 343; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 344; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 345; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 346; CHECK-NEXT: # xmm0 = mem[1,0] 347; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 348; CHECK-NEXT: vpermilpd $1, (%rsp), %xmm1 # 16-byte Folded Reload 349; CHECK-NEXT: # xmm1 = mem[1,0] 350; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 351; CHECK-NEXT: callq fmodf@PLT 352; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 353; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 354; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 355; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 356; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 357; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 358; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 359; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 360; CHECK-NEXT: # xmm1 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 361; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 362; CHECK-NEXT: callq fmodf@PLT 363; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 364; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 365; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 366; CHECK-NEXT: # xmm0 = mem[1,0] 367; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 368; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 369; CHECK-NEXT: # xmm1 = mem[1,0] 370; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 371; CHECK-NEXT: callq fmodf@PLT 372; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 373; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 374; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 375; CHECK-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 376; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload 377; CHECK-NEXT: # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] 378; CHECK-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 379; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 380; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 381; CHECK-NEXT: vpsrlq $48, (%rsp), %xmm1 # 16-byte Folded Reload 382; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 383; CHECK-NEXT: vzeroupper 384; CHECK-NEXT: callq fmodf@PLT 385; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 386; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 387; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 388; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 389; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 390; CHECK-NEXT: vmovshdup (%rsp), %xmm1 # 16-byte Folded Reload 391; CHECK-NEXT: # xmm1 = mem[1,1,3,3] 392; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 393; CHECK-NEXT: callq fmodf@PLT 394; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 395; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 396; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 397; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 398; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 399; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 400; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 401; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 402; CHECK-NEXT: callq fmodf@PLT 403; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 404; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 405; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 406; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 407; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 408; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 409; CHECK-NEXT: # xmm1 = mem[1,1,3,3] 410; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 411; CHECK-NEXT: callq fmodf@PLT 412; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 413; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 414; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 415; CHECK-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 416; CHECK-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 417; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 418; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 419; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 420; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 421; CHECK-NEXT: vzeroupper 422; CHECK-NEXT: callq fmodf@PLT 423; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 424; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 425; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 426; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 427; CHECK-NEXT: vpsrld $16, (%rsp), %xmm1 # 16-byte Folded Reload 428; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 429; CHECK-NEXT: callq fmodf@PLT 430; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 431; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 432; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 433; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 434; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 435; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 436; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload 437; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 438; CHECK-NEXT: vzeroupper 439; CHECK-NEXT: callq fmodf@PLT 440; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 441; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 442; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 443; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 444; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 445; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 446; CHECK-NEXT: callq fmodf@PLT 447; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 448; CHECK-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload 449; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 450; CHECK-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 451; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload 452; CHECK-NEXT: # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] 453; CHECK-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload 454; CHECK-NEXT: # ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] 455; CHECK-NEXT: addq $184, %rsp 456; CHECK-NEXT: retq 457 %r = frem <16 x half> %x, %y 458 ret <16 x half> %r 459} 460 461define <32 x half> @frem_vec32(<32 x half> %x, <32 x half> %y) nounwind { 462; CHECK-LABEL: frem_vec32: 463; CHECK: # %bb.0: 464; CHECK-NEXT: subq $408, %rsp # imm = 0x198 465; CHECK-NEXT: vmovupd %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 466; CHECK-NEXT: vmovupd %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 467; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm0 468; CHECK-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 469; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 470; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 471; CHECK-NEXT: vextractf32x4 $3, %zmm1, %xmm1 472; CHECK-NEXT: vmovapd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 473; CHECK-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 474; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 475; CHECK-NEXT: vzeroupper 476; CHECK-NEXT: callq fmodf@PLT 477; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 478; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 479; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 480; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 481; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 482; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 483; CHECK-NEXT: # xmm1 = mem[3,3,3,3] 484; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 485; CHECK-NEXT: callq fmodf@PLT 486; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 487; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 488; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 489; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 490; CHECK-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload 491; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm0 492; CHECK-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 493; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 494; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 495; CHECK-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload 496; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm1 497; CHECK-NEXT: vmovapd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 498; CHECK-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 499; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 500; CHECK-NEXT: vzeroupper 501; CHECK-NEXT: callq fmodf@PLT 502; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 503; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 504; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 505; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 506; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 507; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 508; CHECK-NEXT: # xmm1 = mem[3,3,3,3] 509; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 510; CHECK-NEXT: callq fmodf@PLT 511; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 512; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 513; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 514; CHECK-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 515; CHECK-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 516; CHECK-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload 517; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 518; CHECK-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 519; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 520; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 521; CHECK-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload 522; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1 523; CHECK-NEXT: vmovapd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 524; CHECK-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 525; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 526; CHECK-NEXT: vzeroupper 527; CHECK-NEXT: callq fmodf@PLT 528; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 529; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 530; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 531; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 532; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 533; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 534; CHECK-NEXT: # xmm1 = mem[3,3,3,3] 535; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 536; CHECK-NEXT: callq fmodf@PLT 537; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 538; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 539; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 540; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 541; CHECK-NEXT: vpsrldq $14, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 542; CHECK-NEXT: # xmm0 = mem[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 543; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 544; CHECK-NEXT: vpsrldq $14, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 545; CHECK-NEXT: # xmm1 = mem[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 546; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 547; CHECK-NEXT: callq fmodf@PLT 548; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 549; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 550; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 551; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 552; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 553; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 554; CHECK-NEXT: # xmm1 = mem[3,3,3,3] 555; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 556; CHECK-NEXT: callq fmodf@PLT 557; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 558; CHECK-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 559; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 560; CHECK-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 561; CHECK-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload 562; CHECK-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 563; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 564; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 565; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 566; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 567; CHECK-NEXT: # xmm1 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 568; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 569; CHECK-NEXT: vzeroupper 570; CHECK-NEXT: callq fmodf@PLT 571; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 572; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 573; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 574; CHECK-NEXT: # xmm0 = mem[1,0] 575; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 576; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 577; CHECK-NEXT: # xmm1 = mem[1,0] 578; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 579; CHECK-NEXT: callq fmodf@PLT 580; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 581; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 582; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 583; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 584; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 585; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 586; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 587; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 588; CHECK-NEXT: # xmm1 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 589; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 590; CHECK-NEXT: callq fmodf@PLT 591; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 592; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 593; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 594; CHECK-NEXT: # xmm0 = mem[1,0] 595; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 596; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 597; CHECK-NEXT: # xmm1 = mem[1,0] 598; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 599; CHECK-NEXT: callq fmodf@PLT 600; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 601; CHECK-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 602; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 603; CHECK-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 604; CHECK-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 605; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 606; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 607; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 608; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 609; CHECK-NEXT: # xmm1 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 610; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 611; CHECK-NEXT: vzeroupper 612; CHECK-NEXT: callq fmodf@PLT 613; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 614; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 615; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 616; CHECK-NEXT: # xmm0 = mem[1,0] 617; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 618; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 619; CHECK-NEXT: # xmm1 = mem[1,0] 620; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 621; CHECK-NEXT: callq fmodf@PLT 622; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 623; CHECK-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 624; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 625; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 626; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 627; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 628; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 629; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 630; CHECK-NEXT: # xmm1 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 631; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 632; CHECK-NEXT: callq fmodf@PLT 633; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 634; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 635; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 636; CHECK-NEXT: # xmm0 = mem[1,0] 637; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 638; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 639; CHECK-NEXT: # xmm1 = mem[1,0] 640; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 641; CHECK-NEXT: callq fmodf@PLT 642; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 643; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 644; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 645; CHECK-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 646; CHECK-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload 647; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 64-byte Folded Reload 648; CHECK-NEXT: # zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] 649; CHECK-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 650; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 651; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 652; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 653; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 654; CHECK-NEXT: vzeroupper 655; CHECK-NEXT: callq fmodf@PLT 656; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 657; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 658; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 659; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 660; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 661; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 662; CHECK-NEXT: # xmm1 = mem[1,1,3,3] 663; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 664; CHECK-NEXT: callq fmodf@PLT 665; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 666; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 667; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 668; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 669; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 670; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 671; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 672; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 673; CHECK-NEXT: callq fmodf@PLT 674; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 675; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 676; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 677; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 678; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 679; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 680; CHECK-NEXT: # xmm1 = mem[1,1,3,3] 681; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 682; CHECK-NEXT: callq fmodf@PLT 683; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 684; CHECK-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 685; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 686; CHECK-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 687; CHECK-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 688; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 689; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 690; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 691; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 692; CHECK-NEXT: vzeroupper 693; CHECK-NEXT: callq fmodf@PLT 694; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 695; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 696; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 697; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 698; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 699; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 700; CHECK-NEXT: # xmm1 = mem[1,1,3,3] 701; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 702; CHECK-NEXT: callq fmodf@PLT 703; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 704; CHECK-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 705; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 706; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 707; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 708; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 709; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 710; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 711; CHECK-NEXT: callq fmodf@PLT 712; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 713; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 714; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 715; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 716; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 717; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 718; CHECK-NEXT: # xmm1 = mem[1,1,3,3] 719; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 720; CHECK-NEXT: callq fmodf@PLT 721; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 722; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 723; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 724; CHECK-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 725; CHECK-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload 726; CHECK-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 727; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 728; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 729; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 730; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 731; CHECK-NEXT: vzeroupper 732; CHECK-NEXT: callq fmodf@PLT 733; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 734; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 735; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 736; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 737; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 738; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 739; CHECK-NEXT: callq fmodf@PLT 740; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 741; CHECK-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload 742; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 743; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 744; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 745; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 746; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 747; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 748; CHECK-NEXT: callq fmodf@PLT 749; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 750; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 751; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 752; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 753; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 754; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 755; CHECK-NEXT: callq fmodf@PLT 756; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 757; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 758; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 759; CHECK-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 760; CHECK-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 761; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 762; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 763; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 764; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 765; CHECK-NEXT: vzeroupper 766; CHECK-NEXT: callq fmodf@PLT 767; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 768; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 769; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 770; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 771; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 772; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 773; CHECK-NEXT: callq fmodf@PLT 774; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 775; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 776; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 777; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 778; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload 779; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 780; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload 781; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 782; CHECK-NEXT: vzeroupper 783; CHECK-NEXT: callq fmodf@PLT 784; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 785; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 786; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 787; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 788; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 789; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 790; CHECK-NEXT: callq fmodf@PLT 791; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 792; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 793; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 794; CHECK-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 795; CHECK-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload 796; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 64-byte Folded Reload 797; CHECK-NEXT: # zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] 798; CHECK-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 64-byte Folded Reload 799; CHECK-NEXT: # zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] 800; CHECK-NEXT: addq $408, %rsp # imm = 0x198 801; CHECK-NEXT: retq 802 %r = frem <32 x half> %x, %y 803 ret <32 x half> %r 804} 805 806define half @frem_strict(half %x, half %y) nounwind #0 { 807; CHECK-LABEL: frem_strict: 808; CHECK: # %bb.0: 809; CHECK-NEXT: pushq %rax 810; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 811; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 812; CHECK-NEXT: callq fmodf@PLT 813; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 814; CHECK-NEXT: popq %rax 815; CHECK-NEXT: retq 816 %result = call half @llvm.experimental.constrained.frem.f16(half %x, half %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 817 ret half %result 818} 819 820define <2 x half> @frem_strict_vec2(<2 x half> %x, <2 x half> %y) nounwind #0 { 821; CHECK-LABEL: frem_strict_vec2: 822; CHECK: # %bb.0: 823; CHECK-NEXT: subq $56, %rsp 824; CHECK-NEXT: vmovaps %xmm1, (%rsp) # 16-byte Spill 825; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 826; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 827; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 828; CHECK-NEXT: callq fmodf@PLT 829; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 830; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 831; CHECK-NEXT: vpsrld $16, (%rsp), %xmm0 # 16-byte Folded Reload 832; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 833; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 834; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 835; CHECK-NEXT: callq fmodf@PLT 836; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 837; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 838; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 839; CHECK-NEXT: addq $56, %rsp 840; CHECK-NEXT: retq 841 %result = call <2 x half> @llvm.experimental.constrained.frem.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 842 ret <2 x half> %result 843} 844 845define <4 x half> @frem_strict_vec4(<4 x half> %x, <4 x half> %y) nounwind #0 { 846; CHECK-LABEL: frem_strict_vec4: 847; CHECK: # %bb.0: 848; CHECK-NEXT: subq $72, %rsp 849; CHECK-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 850; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 851; CHECK-NEXT: vpsrlq $48, %xmm1, %xmm2 852; CHECK-NEXT: vcvtsh2ss %xmm2, %xmm2, %xmm1 853; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm2 854; CHECK-NEXT: vcvtsh2ss %xmm2, %xmm2, %xmm0 855; CHECK-NEXT: callq fmodf@PLT 856; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 857; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 858; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 859; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 860; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 861; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 862; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 863; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 864; CHECK-NEXT: callq fmodf@PLT 865; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 866; CHECK-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 867; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 868; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 869; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 870; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 871; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 872; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 873; CHECK-NEXT: callq fmodf@PLT 874; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 875; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 876; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 877; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 878; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 879; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 880; CHECK-NEXT: callq fmodf@PLT 881; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 882; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 883; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 884; CHECK-NEXT: vinsertps $28, (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 885; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],zero,zero 886; CHECK-NEXT: addq $72, %rsp 887; CHECK-NEXT: retq 888 %result = call <4 x half> @llvm.experimental.constrained.frem.v4f16(<4 x half> %x, <4 x half> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 889 ret <4 x half> %result 890} 891 892define <8 x half> @frem_strict_vec8(<8 x half> %x, <8 x half> %y) nounwind #0 { 893; CHECK-LABEL: frem_strict_vec8: 894; CHECK: # %bb.0: 895; CHECK-NEXT: subq $88, %rsp 896; CHECK-NEXT: vmovapd %xmm1, (%rsp) # 16-byte Spill 897; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 898; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 899; CHECK-NEXT: vcvtsh2ss %xmm2, %xmm2, %xmm1 900; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 901; CHECK-NEXT: vcvtsh2ss %xmm2, %xmm2, %xmm0 902; CHECK-NEXT: callq fmodf@PLT 903; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 904; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 905; CHECK-NEXT: vpermilps $255, (%rsp), %xmm0 # 16-byte Folded Reload 906; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 907; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 908; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 909; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 910; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 911; CHECK-NEXT: callq fmodf@PLT 912; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 913; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 914; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 915; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 916; CHECK-NEXT: vpsrldq $10, (%rsp), %xmm0 # 16-byte Folded Reload 917; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 918; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 919; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 920; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 921; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 922; CHECK-NEXT: callq fmodf@PLT 923; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 924; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 925; CHECK-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload 926; CHECK-NEXT: # xmm0 = mem[1,0] 927; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 928; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 929; CHECK-NEXT: # xmm0 = mem[1,0] 930; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 931; CHECK-NEXT: callq fmodf@PLT 932; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 933; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 934; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 935; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 936; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 937; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 938; CHECK-NEXT: vpsrlq $48, (%rsp), %xmm0 # 16-byte Folded Reload 939; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 940; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 941; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 942; CHECK-NEXT: callq fmodf@PLT 943; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 944; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 945; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 946; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 947; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 948; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 949; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 950; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 951; CHECK-NEXT: callq fmodf@PLT 952; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 953; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 954; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 955; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 956; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 957; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 958; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 959; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 960; CHECK-NEXT: callq fmodf@PLT 961; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 962; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 963; CHECK-NEXT: vpsrld $16, (%rsp), %xmm0 # 16-byte Folded Reload 964; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 965; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 966; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 967; CHECK-NEXT: callq fmodf@PLT 968; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 969; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 970; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 971; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 972; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 973; CHECK-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 974; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 975; CHECK-NEXT: addq $88, %rsp 976; CHECK-NEXT: retq 977 %result = call <8 x half> @llvm.experimental.constrained.frem.v8f16(<8 x half> %x, <8 x half> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 978 ret <8 x half> %result 979} 980 981define <16 x half> @frem_strict_vec16(<16 x half> %x, <16 x half> %y) nounwind #0 { 982; CHECK-LABEL: frem_strict_vec16: 983; CHECK: # %bb.0: 984; CHECK-NEXT: subq $184, %rsp 985; CHECK-NEXT: vmovupd %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 986; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 987; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1 988; CHECK-NEXT: vmovapd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 989; CHECK-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 990; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 991; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 992; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 993; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 994; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 995; CHECK-NEXT: vzeroupper 996; CHECK-NEXT: callq fmodf@PLT 997; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 998; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 999; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1000; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1001; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1002; CHECK-NEXT: vpermilps $255, (%rsp), %xmm0 # 16-byte Folded Reload 1003; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1004; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1005; CHECK-NEXT: callq fmodf@PLT 1006; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1007; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1008; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1009; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1010; CHECK-NEXT: vpsrldq $14, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1011; CHECK-NEXT: # xmm0 = mem[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1012; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1013; CHECK-NEXT: vpsrldq $14, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1014; CHECK-NEXT: # xmm0 = mem[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1015; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1016; CHECK-NEXT: callq fmodf@PLT 1017; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1018; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1019; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1020; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1021; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1022; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1023; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1024; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1025; CHECK-NEXT: callq fmodf@PLT 1026; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1027; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1028; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1029; CHECK-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 1030; CHECK-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 1031; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1032; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1033; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1034; CHECK-NEXT: vpsrldq $10, (%rsp), %xmm0 # 16-byte Folded Reload 1035; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1036; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1037; CHECK-NEXT: vzeroupper 1038; CHECK-NEXT: callq fmodf@PLT 1039; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1040; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1041; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1042; CHECK-NEXT: # xmm0 = mem[1,0] 1043; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1044; CHECK-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload 1045; CHECK-NEXT: # xmm0 = mem[1,0] 1046; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1047; CHECK-NEXT: callq fmodf@PLT 1048; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1049; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1050; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1051; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1052; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1053; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1054; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1055; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1056; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1057; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1058; CHECK-NEXT: callq fmodf@PLT 1059; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1060; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1061; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1062; CHECK-NEXT: # xmm0 = mem[1,0] 1063; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1064; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1065; CHECK-NEXT: # xmm0 = mem[1,0] 1066; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1067; CHECK-NEXT: callq fmodf@PLT 1068; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1069; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1070; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1071; CHECK-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 1072; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload 1073; CHECK-NEXT: # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] 1074; CHECK-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 1075; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1076; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1077; CHECK-NEXT: vpsrlq $48, (%rsp), %xmm0 # 16-byte Folded Reload 1078; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1079; CHECK-NEXT: vzeroupper 1080; CHECK-NEXT: callq fmodf@PLT 1081; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1082; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1083; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1084; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1085; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1086; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 1087; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1088; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1089; CHECK-NEXT: callq fmodf@PLT 1090; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1091; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1092; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1093; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1094; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1095; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1096; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1097; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1098; CHECK-NEXT: callq fmodf@PLT 1099; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1100; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1101; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1102; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1103; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1104; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1105; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1106; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1107; CHECK-NEXT: callq fmodf@PLT 1108; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1109; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1110; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1111; CHECK-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 1112; CHECK-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 1113; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1114; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1115; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 1116; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1117; CHECK-NEXT: vzeroupper 1118; CHECK-NEXT: callq fmodf@PLT 1119; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1120; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1121; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1122; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1123; CHECK-NEXT: vpsrld $16, (%rsp), %xmm0 # 16-byte Folded Reload 1124; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1125; CHECK-NEXT: callq fmodf@PLT 1126; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1127; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1128; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1129; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1130; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1131; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1132; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1133; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1134; CHECK-NEXT: vzeroupper 1135; CHECK-NEXT: callq fmodf@PLT 1136; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1137; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1138; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1139; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1140; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1141; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1142; CHECK-NEXT: callq fmodf@PLT 1143; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1144; CHECK-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload 1145; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1146; CHECK-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 1147; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload 1148; CHECK-NEXT: # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] 1149; CHECK-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload 1150; CHECK-NEXT: # ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] 1151; CHECK-NEXT: addq $184, %rsp 1152; CHECK-NEXT: retq 1153 %result = call <16 x half> @llvm.experimental.constrained.frem.v16f16(<16 x half> %x, <16 x half> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 1154 ret <16 x half> %result 1155} 1156 1157define <32 x half> @frem_strict_vec32(<32 x half> %x, <32 x half> %y) nounwind #0 { 1158; CHECK-LABEL: frem_strict_vec32: 1159; CHECK: # %bb.0: 1160; CHECK-NEXT: subq $408, %rsp # imm = 0x198 1161; CHECK-NEXT: vmovupd %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 1162; CHECK-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 1163; CHECK-NEXT: vextractf32x4 $3, %zmm1, %xmm1 1164; CHECK-NEXT: vmovapd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1165; CHECK-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1166; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 1167; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm0 1168; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1169; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1170; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1171; CHECK-NEXT: vzeroupper 1172; CHECK-NEXT: callq fmodf@PLT 1173; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1174; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1175; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1176; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1177; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1178; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1179; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1180; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1181; CHECK-NEXT: callq fmodf@PLT 1182; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1183; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1184; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1185; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1186; CHECK-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload 1187; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm0 1188; CHECK-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1189; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1190; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1191; CHECK-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload 1192; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm0 1193; CHECK-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1194; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1195; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1196; CHECK-NEXT: vzeroupper 1197; CHECK-NEXT: callq fmodf@PLT 1198; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1199; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1200; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1201; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1202; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1203; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1204; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1205; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1206; CHECK-NEXT: callq fmodf@PLT 1207; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1208; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1209; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1210; CHECK-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 1211; CHECK-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 1212; CHECK-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload 1213; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 1214; CHECK-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1215; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1216; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1217; CHECK-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload 1218; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 1219; CHECK-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1220; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1221; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1222; CHECK-NEXT: vzeroupper 1223; CHECK-NEXT: callq fmodf@PLT 1224; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1225; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1226; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1227; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1228; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1229; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1230; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1231; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1232; CHECK-NEXT: callq fmodf@PLT 1233; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1234; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1235; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1236; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1237; CHECK-NEXT: vpsrldq $14, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1238; CHECK-NEXT: # xmm0 = mem[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1239; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1240; CHECK-NEXT: vpsrldq $14, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1241; CHECK-NEXT: # xmm0 = mem[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1242; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1243; CHECK-NEXT: callq fmodf@PLT 1244; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1245; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1246; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1247; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1248; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1249; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1250; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1251; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1252; CHECK-NEXT: callq fmodf@PLT 1253; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1254; CHECK-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1255; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1256; CHECK-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 1257; CHECK-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload 1258; CHECK-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 1259; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1260; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1261; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1262; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1263; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1264; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1265; CHECK-NEXT: vzeroupper 1266; CHECK-NEXT: callq fmodf@PLT 1267; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1268; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1269; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1270; CHECK-NEXT: # xmm0 = mem[1,0] 1271; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1272; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1273; CHECK-NEXT: # xmm0 = mem[1,0] 1274; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1275; CHECK-NEXT: callq fmodf@PLT 1276; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1277; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1278; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1279; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1280; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1281; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1282; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1283; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1284; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1285; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1286; CHECK-NEXT: callq fmodf@PLT 1287; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1288; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1289; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1290; CHECK-NEXT: # xmm0 = mem[1,0] 1291; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1292; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1293; CHECK-NEXT: # xmm0 = mem[1,0] 1294; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1295; CHECK-NEXT: callq fmodf@PLT 1296; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1297; CHECK-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1298; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1299; CHECK-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 1300; CHECK-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 1301; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1302; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1303; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1304; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1305; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1306; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1307; CHECK-NEXT: vzeroupper 1308; CHECK-NEXT: callq fmodf@PLT 1309; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1310; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1311; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1312; CHECK-NEXT: # xmm0 = mem[1,0] 1313; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1314; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1315; CHECK-NEXT: # xmm0 = mem[1,0] 1316; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1317; CHECK-NEXT: callq fmodf@PLT 1318; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1319; CHECK-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1320; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1321; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 1322; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1323; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1324; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1325; CHECK-NEXT: vpsrldq $10, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1326; CHECK-NEXT: # xmm0 = mem[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1327; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1328; CHECK-NEXT: callq fmodf@PLT 1329; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1330; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1331; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1332; CHECK-NEXT: # xmm0 = mem[1,0] 1333; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1334; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1335; CHECK-NEXT: # xmm0 = mem[1,0] 1336; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1337; CHECK-NEXT: callq fmodf@PLT 1338; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1339; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1340; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1341; CHECK-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 1342; CHECK-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload 1343; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 64-byte Folded Reload 1344; CHECK-NEXT: # zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] 1345; CHECK-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 1346; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1347; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1348; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1349; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1350; CHECK-NEXT: vzeroupper 1351; CHECK-NEXT: callq fmodf@PLT 1352; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1353; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1354; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1355; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1356; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1357; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1358; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1359; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1360; CHECK-NEXT: callq fmodf@PLT 1361; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1362; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1363; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1364; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1365; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1366; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1367; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1368; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1369; CHECK-NEXT: callq fmodf@PLT 1370; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1371; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1372; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1373; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1374; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1375; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1376; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1377; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1378; CHECK-NEXT: callq fmodf@PLT 1379; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1380; CHECK-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1381; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1382; CHECK-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 1383; CHECK-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 1384; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1385; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1386; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1387; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1388; CHECK-NEXT: vzeroupper 1389; CHECK-NEXT: callq fmodf@PLT 1390; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1391; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1392; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1393; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1394; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1395; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1396; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1397; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1398; CHECK-NEXT: callq fmodf@PLT 1399; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1400; CHECK-NEXT: vpunpcklwd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1401; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1402; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 1403; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1404; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1405; CHECK-NEXT: vpsrlq $48, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1406; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1407; CHECK-NEXT: callq fmodf@PLT 1408; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1409; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1410; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1411; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1412; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1413; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1414; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1415; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1416; CHECK-NEXT: callq fmodf@PLT 1417; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1418; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1419; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1420; CHECK-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 1421; CHECK-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload 1422; CHECK-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 1423; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1424; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1425; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1426; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1427; CHECK-NEXT: vzeroupper 1428; CHECK-NEXT: callq fmodf@PLT 1429; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1430; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1431; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1432; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1433; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1434; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1435; CHECK-NEXT: callq fmodf@PLT 1436; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1437; CHECK-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload 1438; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1439; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1440; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1441; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1442; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1443; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1444; CHECK-NEXT: callq fmodf@PLT 1445; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1446; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1447; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1448; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1449; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1450; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1451; CHECK-NEXT: callq fmodf@PLT 1452; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1453; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1454; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1455; CHECK-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 1456; CHECK-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 1457; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1458; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1459; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1460; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1461; CHECK-NEXT: vzeroupper 1462; CHECK-NEXT: callq fmodf@PLT 1463; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1464; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1465; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1466; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1467; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1468; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1469; CHECK-NEXT: callq fmodf@PLT 1470; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1471; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1472; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1473; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1474; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload 1475; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1476; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload 1477; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1478; CHECK-NEXT: vzeroupper 1479; CHECK-NEXT: callq fmodf@PLT 1480; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1481; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1482; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1483; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm1 1484; CHECK-NEXT: vpsrld $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1485; CHECK-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 1486; CHECK-NEXT: callq fmodf@PLT 1487; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 1488; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1489; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1490; CHECK-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 1491; CHECK-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload 1492; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 64-byte Folded Reload 1493; CHECK-NEXT: # zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] 1494; CHECK-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 64-byte Folded Reload 1495; CHECK-NEXT: # zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] 1496; CHECK-NEXT: addq $408, %rsp # imm = 0x198 1497; CHECK-NEXT: retq 1498 %result = call <32 x half> @llvm.experimental.constrained.frem.v32f16(<32 x half> %x, <32 x half> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 1499 ret <32 x half> %result 1500} 1501 1502attributes #0 = { strictfp } 1503declare half @llvm.experimental.constrained.frem.f16(half, half, metadata, metadata) 1504declare <2 x half> @llvm.experimental.constrained.frem.v2f16(<2 x half>, <2 x half>, metadata, metadata) 1505declare <4 x half> @llvm.experimental.constrained.frem.v4f16(<4 x half>, <4 x half>, metadata, metadata) 1506declare <8 x half> @llvm.experimental.constrained.frem.v8f16(<8 x half>, <8 x half>, metadata, metadata) 1507declare <16 x half> @llvm.experimental.constrained.frem.v16f16(<16 x half>, <16 x half>, metadata, metadata) 1508declare <32 x half> @llvm.experimental.constrained.frem.v32f16(<32 x half>, <32 x half>, metadata, metadata) 1509