1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -O3 -verify-machineinstrs -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 < %s | FileCheck %s 3 4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 5target triple = "x86_64-unknown-unknown" 6 7; Stack reload folding tests. 8; 9; By including a nop call with sideeffects we can force a partial register spill of the 10; relevant registers and check that the reload is correctly folded into the instruction. 11 12define <2 x double> @stack_fold_addpd(<2 x double> %a0, <2 x double> %a1) { 13; CHECK-LABEL: stack_fold_addpd: 14; CHECK: # %bb.0: 15; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 16; CHECK-NEXT: #APP 17; CHECK-NEXT: nop 18; CHECK-NEXT: #NO_APP 19; CHECK-NEXT: addpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 20; CHECK-NEXT: retq 21 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 22 %2 = fadd <2 x double> %a0, %a1 23 ret <2 x double> %2 24} 25 26define <4 x float> @stack_fold_addps(<4 x float> %a0, <4 x float> %a1) { 27; CHECK-LABEL: stack_fold_addps: 28; CHECK: # %bb.0: 29; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 30; CHECK-NEXT: #APP 31; CHECK-NEXT: nop 32; CHECK-NEXT: #NO_APP 33; CHECK-NEXT: addps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 34; CHECK-NEXT: retq 35 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 36 %2 = fadd <4 x float> %a0, %a1 37 ret <4 x float> %2 38} 39 40define double @stack_fold_addsd(double %a0, double %a1) { 41; CHECK-LABEL: stack_fold_addsd: 42; CHECK: # %bb.0: 43; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 44; CHECK-NEXT: #APP 45; CHECK-NEXT: nop 46; CHECK-NEXT: #NO_APP 47; CHECK-NEXT: addsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 48; CHECK-NEXT: retq 49 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 50 %2 = fadd double %a0, %a1 51 ret double %2 52} 53 54define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) { 55; CHECK-LABEL: stack_fold_addsd_int: 56; CHECK: # %bb.0: 57; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 58; CHECK-NEXT: #APP 59; CHECK-NEXT: nop 60; CHECK-NEXT: #NO_APP 61; CHECK-NEXT: addsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 62; CHECK-NEXT: retq 63 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 64 %2 = extractelement <2 x double> %a0, i32 0 65 %3 = extractelement <2 x double> %a1, i32 0 66 %4 = fadd double %2, %3 67 %5 = insertelement <2 x double> %a0, double %4, i32 0 68 ret <2 x double> %5 69} 70 71define float @stack_fold_addss(float %a0, float %a1) { 72; CHECK-LABEL: stack_fold_addss: 73; CHECK: # %bb.0: 74; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 75; CHECK-NEXT: #APP 76; CHECK-NEXT: nop 77; CHECK-NEXT: #NO_APP 78; CHECK-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 79; CHECK-NEXT: retq 80 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 81 %2 = fadd float %a0, %a1 82 ret float %2 83} 84 85define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) { 86; CHECK-LABEL: stack_fold_addss_int: 87; CHECK: # %bb.0: 88; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 89; CHECK-NEXT: #APP 90; CHECK-NEXT: nop 91; CHECK-NEXT: #NO_APP 92; CHECK-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 93; CHECK-NEXT: retq 94 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 95 %2 = extractelement <4 x float> %a0, i32 0 96 %3 = extractelement <4 x float> %a1, i32 0 97 %4 = fadd float %2, %3 98 %5 = insertelement <4 x float> %a0, float %4, i32 0 99 ret <4 x float> %5 100} 101 102define <2 x double> @stack_fold_addsubpd(<2 x double> %a0, <2 x double> %a1) { 103; CHECK-LABEL: stack_fold_addsubpd: 104; CHECK: # %bb.0: 105; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 106; CHECK-NEXT: #APP 107; CHECK-NEXT: nop 108; CHECK-NEXT: #NO_APP 109; CHECK-NEXT: addsubpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 110; CHECK-NEXT: retq 111 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 112 %2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) 113 ret <2 x double> %2 114} 115declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 116 117define <4 x float> @stack_fold_addsubps(<4 x float> %a0, <4 x float> %a1) { 118; CHECK-LABEL: stack_fold_addsubps: 119; CHECK: # %bb.0: 120; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 121; CHECK-NEXT: #APP 122; CHECK-NEXT: nop 123; CHECK-NEXT: #NO_APP 124; CHECK-NEXT: addsubps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 125; CHECK-NEXT: retq 126 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 127 %2 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) 128 ret <4 x float> %2 129} 130declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 131 132define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) { 133; CHECK-LABEL: stack_fold_andnpd: 134; CHECK: # %bb.0: 135; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 136; CHECK-NEXT: #APP 137; CHECK-NEXT: nop 138; CHECK-NEXT: #NO_APP 139; CHECK-NEXT: andnpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 140; CHECK-NEXT: xorpd %xmm1, %xmm1 141; CHECK-NEXT: addpd %xmm1, %xmm0 142; CHECK-NEXT: retq 143 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 144 %2 = bitcast <2 x double> %a0 to <2 x i64> 145 %3 = bitcast <2 x double> %a1 to <2 x i64> 146 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 147 %5 = and <2 x i64> %4, %3 148 %6 = bitcast <2 x i64> %5 to <2 x double> 149 ; fadd forces execution domain 150 %7 = fadd <2 x double> %6, <double 0x0, double 0x0> 151 ret <2 x double> %7 152} 153 154define <4 x float> @stack_fold_andnps(<4 x float> %a0, <4 x float> %a1) { 155; CHECK-LABEL: stack_fold_andnps: 156; CHECK: # %bb.0: 157; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 158; CHECK-NEXT: #APP 159; CHECK-NEXT: nop 160; CHECK-NEXT: #NO_APP 161; CHECK-NEXT: andnps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 162; CHECK-NEXT: xorps %xmm1, %xmm1 163; CHECK-NEXT: addps %xmm1, %xmm0 164; CHECK-NEXT: retq 165 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 166 %2 = bitcast <4 x float> %a0 to <2 x i64> 167 %3 = bitcast <4 x float> %a1 to <2 x i64> 168 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 169 %5 = and <2 x i64> %4, %3 170 %6 = bitcast <2 x i64> %5 to <4 x float> 171 ; fadd forces execution domain 172 %7 = fadd <4 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0> 173 ret <4 x float> %7 174} 175 176define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) { 177; CHECK-LABEL: stack_fold_andpd: 178; CHECK: # %bb.0: 179; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 180; CHECK-NEXT: #APP 181; CHECK-NEXT: nop 182; CHECK-NEXT: #NO_APP 183; CHECK-NEXT: andpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 184; CHECK-NEXT: xorpd %xmm1, %xmm1 185; CHECK-NEXT: addpd %xmm1, %xmm0 186; CHECK-NEXT: retq 187 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 188 %2 = bitcast <2 x double> %a0 to <2 x i64> 189 %3 = bitcast <2 x double> %a1 to <2 x i64> 190 %4 = and <2 x i64> %2, %3 191 %5 = bitcast <2 x i64> %4 to <2 x double> 192 ; fadd forces execution domain 193 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 194 ret <2 x double> %6 195} 196 197define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) { 198; CHECK-LABEL: stack_fold_andps: 199; CHECK: # %bb.0: 200; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 201; CHECK-NEXT: #APP 202; CHECK-NEXT: nop 203; CHECK-NEXT: #NO_APP 204; CHECK-NEXT: andps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 205; CHECK-NEXT: xorps %xmm1, %xmm1 206; CHECK-NEXT: addps %xmm1, %xmm0 207; CHECK-NEXT: retq 208 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 209 %2 = bitcast <4 x float> %a0 to <2 x i64> 210 %3 = bitcast <4 x float> %a1 to <2 x i64> 211 %4 = and <2 x i64> %2, %3 212 %5 = bitcast <2 x i64> %4 to <4 x float> 213 ; fadd forces execution domain 214 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 215 ret <4 x float> %6 216} 217 218define <2 x double> @stack_fold_blendpd(<2 x double> %a0, <2 x double> %a1) { 219; CHECK-LABEL: stack_fold_blendpd: 220; CHECK: # %bb.0: 221; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 222; CHECK-NEXT: #APP 223; CHECK-NEXT: nop 224; CHECK-NEXT: #NO_APP 225; CHECK-NEXT: blendpd $2, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 226; CHECK-NEXT: # xmm0 = xmm0[0],mem[1] 227; CHECK-NEXT: xorpd %xmm1, %xmm1 228; CHECK-NEXT: addpd %xmm1, %xmm0 229; CHECK-NEXT: retq 230 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 231 %2 = select <2 x i1> <i1 1, i1 0>, <2 x double> %a0, <2 x double> %a1 232 ; fadd forces execution domain 233 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 234 ret <2 x double> %3 235} 236 237define <4 x float> @stack_fold_blendps(<4 x float> %a0, <4 x float> %a1) { 238; CHECK-LABEL: stack_fold_blendps: 239; CHECK: # %bb.0: 240; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 241; CHECK-NEXT: #APP 242; CHECK-NEXT: nop 243; CHECK-NEXT: #NO_APP 244; CHECK-NEXT: blendps $6, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 245; CHECK-NEXT: # xmm0 = xmm0[0],mem[1,2],xmm0[3] 246; CHECK-NEXT: xorps %xmm1, %xmm1 247; CHECK-NEXT: addps %xmm1, %xmm0 248; CHECK-NEXT: retq 249 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 250 %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x float> %a0, <4 x float> %a1 251 ; fadd forces execution domain 252 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 253 ret <4 x float> %3 254} 255 256define <2 x double> @stack_fold_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %c) { 257; CHECK-LABEL: stack_fold_blendvpd: 258; CHECK: # %bb.0: 259; CHECK-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 260; CHECK-NEXT: movapd %xmm1, %xmm2 261; CHECK-NEXT: #APP 262; CHECK-NEXT: nop 263; CHECK-NEXT: #NO_APP 264; CHECK-NEXT: blendvpd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload 265; CHECK-NEXT: movapd %xmm2, %xmm0 266; CHECK-NEXT: retq 267 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 268 %2 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a1, <2 x double> %c, <2 x double> %a0) 269 ret <2 x double> %2 270} 271declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 272 273define <4 x float> @stack_fold_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %c) { 274; CHECK-LABEL: stack_fold_blendvps: 275; CHECK: # %bb.0: 276; CHECK-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 277; CHECK-NEXT: movaps %xmm1, %xmm2 278; CHECK-NEXT: #APP 279; CHECK-NEXT: nop 280; CHECK-NEXT: #NO_APP 281; CHECK-NEXT: blendvps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload 282; CHECK-NEXT: movaps %xmm2, %xmm0 283; CHECK-NEXT: retq 284 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 285 %2 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a1, <4 x float> %c, <4 x float> %a0) 286 ret <4 x float> %2 287} 288declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 289 290define <2 x double> @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) { 291; CHECK-LABEL: stack_fold_cmppd: 292; CHECK: # %bb.0: 293; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 294; CHECK-NEXT: #APP 295; CHECK-NEXT: nop 296; CHECK-NEXT: #NO_APP 297; CHECK-NEXT: cmpeqpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 298; CHECK-NEXT: retq 299 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 300 %2 = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0) 301 ret <2 x double> %2 302} 303declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 304 305define <4 x float> @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) { 306; CHECK-LABEL: stack_fold_cmpps: 307; CHECK: # %bb.0: 308; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 309; CHECK-NEXT: #APP 310; CHECK-NEXT: nop 311; CHECK-NEXT: #NO_APP 312; CHECK-NEXT: cmpeqps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 313; CHECK-NEXT: retq 314 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 315 %2 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 0) 316 ret <4 x float> %2 317} 318declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone 319 320define i32 @stack_fold_cmpsd(double %a0, double %a1) { 321; CHECK-LABEL: stack_fold_cmpsd: 322; CHECK: # %bb.0: 323; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 324; CHECK-NEXT: #APP 325; CHECK-NEXT: nop 326; CHECK-NEXT: #NO_APP 327; CHECK-NEXT: cmpeqsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 328; CHECK-NEXT: movq %xmm0, %rax 329; CHECK-NEXT: andl $1, %eax 330; CHECK-NEXT: # kill: def $eax killed $eax killed $rax 331; CHECK-NEXT: retq 332 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 333 %2 = fcmp oeq double %a0, %a1 334 %3 = zext i1 %2 to i32 335 ret i32 %3 336} 337 338define <2 x double> @stack_fold_cmpsd_int(<2 x double> %a0, <2 x double> %a1) { 339; CHECK-LABEL: stack_fold_cmpsd_int: 340; CHECK: # %bb.0: 341; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 342; CHECK-NEXT: #APP 343; CHECK-NEXT: nop 344; CHECK-NEXT: #NO_APP 345; CHECK-NEXT: cmpeqsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 346; CHECK-NEXT: retq 347 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 348 %2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0) 349 ret <2 x double> %2 350} 351declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 352 353define i32 @stack_fold_cmpss(float %a0, float %a1) { 354; CHECK-LABEL: stack_fold_cmpss: 355; CHECK: # %bb.0: 356; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 357; CHECK-NEXT: #APP 358; CHECK-NEXT: nop 359; CHECK-NEXT: #NO_APP 360; CHECK-NEXT: cmpeqss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 361; CHECK-NEXT: movd %xmm0, %eax 362; CHECK-NEXT: andl $1, %eax 363; CHECK-NEXT: retq 364 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 365 %2 = fcmp oeq float %a0, %a1 366 %3 = zext i1 %2 to i32 367 ret i32 %3 368} 369 370define <4 x float> @stack_fold_cmpss_int(<4 x float> %a0, <4 x float> %a1) { 371; CHECK-LABEL: stack_fold_cmpss_int: 372; CHECK: # %bb.0: 373; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 374; CHECK-NEXT: #APP 375; CHECK-NEXT: nop 376; CHECK-NEXT: #NO_APP 377; CHECK-NEXT: cmpeqss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 378; CHECK-NEXT: retq 379 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 380 %2 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0) 381 ret <4 x float> %2 382} 383declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 384 385; TODO stack_fold_comisd 386 387define i32 @stack_fold_comisd_int(<2 x double> %a0, <2 x double> %a1) { 388; CHECK-LABEL: stack_fold_comisd_int: 389; CHECK: # %bb.0: 390; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 391; CHECK-NEXT: #APP 392; CHECK-NEXT: nop 393; CHECK-NEXT: #NO_APP 394; CHECK-NEXT: comisd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 395; CHECK-NEXT: setnp %al 396; CHECK-NEXT: sete %cl 397; CHECK-NEXT: andb %al, %cl 398; CHECK-NEXT: movzbl %cl, %eax 399; CHECK-NEXT: retq 400 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 401 %2 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) 402 ret i32 %2 403} 404declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 405 406; TODO stack_fold_comiss 407 408define i32 @stack_fold_comiss_int(<4 x float> %a0, <4 x float> %a1) { 409; CHECK-LABEL: stack_fold_comiss_int: 410; CHECK: # %bb.0: 411; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 412; CHECK-NEXT: #APP 413; CHECK-NEXT: nop 414; CHECK-NEXT: #NO_APP 415; CHECK-NEXT: comiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 416; CHECK-NEXT: setnp %al 417; CHECK-NEXT: sete %cl 418; CHECK-NEXT: andb %al, %cl 419; CHECK-NEXT: movzbl %cl, %eax 420; CHECK-NEXT: retq 421 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 422 %2 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) 423 ret i32 %2 424} 425declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 426 427define <2 x double> @stack_fold_cvtdq2pd(<4 x i32> %a0) { 428; CHECK-LABEL: stack_fold_cvtdq2pd: 429; CHECK: # %bb.0: 430; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 431; CHECK-NEXT: #APP 432; CHECK-NEXT: nop 433; CHECK-NEXT: #NO_APP 434; CHECK-NEXT: cvtdq2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 435; CHECK-NEXT: retq 436 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 437 %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 438 %3 = sitofp <2 x i32> %2 to <2 x double> 439 ret <2 x double> %3 440} 441 442define <2 x double> @stack_fold_cvtdq2pd_int(<4 x i32> %a0) { 443; CHECK-LABEL: stack_fold_cvtdq2pd_int: 444; CHECK: # %bb.0: 445; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 446; CHECK-NEXT: #APP 447; CHECK-NEXT: nop 448; CHECK-NEXT: #NO_APP 449; CHECK-NEXT: cvtdq2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 450; CHECK-NEXT: retq 451 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 452 %2 = shufflevector <4 x i32> %a0, <4 x i32> %a0, <2 x i32> <i32 0, i32 1> 453 %cvt = sitofp <2 x i32> %2 to <2 x double> 454 ret <2 x double> %cvt 455} 456 457define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) { 458; CHECK-LABEL: stack_fold_cvtdq2ps: 459; CHECK: # %bb.0: 460; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 461; CHECK-NEXT: #APP 462; CHECK-NEXT: nop 463; CHECK-NEXT: #NO_APP 464; CHECK-NEXT: cvtdq2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 465; CHECK-NEXT: retq 466 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 467 %2 = sitofp <4 x i32> %a0 to <4 x float> 468 ret <4 x float> %2 469} 470 471define <4 x i32> @stack_fold_cvtpd2dq(<2 x double> %a0) { 472; CHECK-LABEL: stack_fold_cvtpd2dq: 473; CHECK: # %bb.0: 474; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 475; CHECK-NEXT: #APP 476; CHECK-NEXT: nop 477; CHECK-NEXT: #NO_APP 478; CHECK-NEXT: cvtpd2dq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 479; CHECK-NEXT: retq 480 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 481 %2 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) 482 ret <4 x i32> %2 483} 484declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 485 486define <2 x float> @stack_fold_cvtpd2ps(<2 x double> %a0) { 487; CHECK-LABEL: stack_fold_cvtpd2ps: 488; CHECK: # %bb.0: 489; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 490; CHECK-NEXT: #APP 491; CHECK-NEXT: nop 492; CHECK-NEXT: #NO_APP 493; CHECK-NEXT: cvtpd2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 494; CHECK-NEXT: retq 495 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 496 %2 = fptrunc <2 x double> %a0 to <2 x float> 497 ret <2 x float> %2 498} 499 500define <4 x i32> @stack_fold_cvtps2dq(<4 x float> %a0) { 501; CHECK-LABEL: stack_fold_cvtps2dq: 502; CHECK: # %bb.0: 503; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 504; CHECK-NEXT: #APP 505; CHECK-NEXT: nop 506; CHECK-NEXT: #NO_APP 507; CHECK-NEXT: cvtps2dq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 508; CHECK-NEXT: retq 509 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 510 %2 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) 511 ret <4 x i32> %2 512} 513declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 514 515define <2 x double> @stack_fold_cvtps2pd(<4 x float> %a0) { 516; CHECK-LABEL: stack_fold_cvtps2pd: 517; CHECK: # %bb.0: 518; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 519; CHECK-NEXT: #APP 520; CHECK-NEXT: nop 521; CHECK-NEXT: #NO_APP 522; CHECK-NEXT: cvtps2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 523; CHECK-NEXT: retq 524 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 525 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <2 x i32> <i32 0, i32 1> 526 %3 = fpext <2 x float> %2 to <2 x double> 527 ret <2 x double> %3 528} 529 530define <2 x double> @stack_fold_cvtps2pd_int(<4 x float> %a0) { 531; CHECK-LABEL: stack_fold_cvtps2pd_int: 532; CHECK: # %bb.0: 533; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 534; CHECK-NEXT: #APP 535; CHECK-NEXT: nop 536; CHECK-NEXT: #NO_APP 537; CHECK-NEXT: cvtps2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 538; CHECK-NEXT: retq 539 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 540 %2 = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1> 541 %cvtps2pd = fpext <2 x float> %2 to <2 x double> 542 ret <2 x double> %cvtps2pd 543} 544 545; TODO stack_fold_cvtsd2si 546 547define i32 @stack_fold_cvtsd2si_int(<2 x double> %a0) { 548; CHECK-LABEL: stack_fold_cvtsd2si_int: 549; CHECK: # %bb.0: 550; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 551; CHECK-NEXT: #APP 552; CHECK-NEXT: nop 553; CHECK-NEXT: #NO_APP 554; CHECK-NEXT: cvtsd2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 16-byte Folded Reload 555; CHECK-NEXT: retq 556 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 557 %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) 558 ret i32 %2 559} 560declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 561 562; TODO stack_fold_cvtsd2si64 563 564define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) { 565; CHECK-LABEL: stack_fold_cvtsd2si64_int: 566; CHECK: # %bb.0: 567; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 568; CHECK-NEXT: #APP 569; CHECK-NEXT: nop 570; CHECK-NEXT: #NO_APP 571; CHECK-NEXT: cvtsd2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 16-byte Folded Reload 572; CHECK-NEXT: retq 573 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 574 %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) 575 ret i64 %2 576} 577declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone 578 579define float @stack_fold_cvtsd2ss(double %a0) minsize { 580; CHECK-LABEL: stack_fold_cvtsd2ss: 581; CHECK: # %bb.0: 582; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 583; CHECK-NEXT: #APP 584; CHECK-NEXT: nop 585; CHECK-NEXT: #NO_APP 586; CHECK-NEXT: cvtsd2ss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 587; CHECK-NEXT: retq 588 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 589 %2 = fptrunc double %a0 to float 590 ret float %2 591} 592 593define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) optsize { 594; CHECK-LABEL: stack_fold_cvtsd2ss_int: 595; CHECK: # %bb.0: 596; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 597; CHECK-NEXT: #APP 598; CHECK-NEXT: nop 599; CHECK-NEXT: #NO_APP 600; CHECK-NEXT: xorps %xmm1, %xmm1 601; CHECK-NEXT: cvtsd2ss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 602; CHECK-NEXT: movaps %xmm1, %xmm0 603; CHECK-NEXT: retq 604 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 605 %2 = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, <2 x double> %a0) 606 ret <4 x float> %2 607} 608declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 609 610define double @stack_fold_cvtsi2sd(i32 %a0) { 611; CHECK-LABEL: stack_fold_cvtsi2sd: 612; CHECK: # %bb.0: 613; CHECK-NEXT: pushq %rbp 614; CHECK-NEXT: .cfi_def_cfa_offset 16 615; CHECK-NEXT: pushq %r15 616; CHECK-NEXT: .cfi_def_cfa_offset 24 617; CHECK-NEXT: pushq %r14 618; CHECK-NEXT: .cfi_def_cfa_offset 32 619; CHECK-NEXT: pushq %r13 620; CHECK-NEXT: .cfi_def_cfa_offset 40 621; CHECK-NEXT: pushq %r12 622; CHECK-NEXT: .cfi_def_cfa_offset 48 623; CHECK-NEXT: pushq %rbx 624; CHECK-NEXT: .cfi_def_cfa_offset 56 625; CHECK-NEXT: .cfi_offset %rbx, -56 626; CHECK-NEXT: .cfi_offset %r12, -48 627; CHECK-NEXT: .cfi_offset %r13, -40 628; CHECK-NEXT: .cfi_offset %r14, -32 629; CHECK-NEXT: .cfi_offset %r15, -24 630; CHECK-NEXT: .cfi_offset %rbp, -16 631; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 632; CHECK-NEXT: #APP 633; CHECK-NEXT: nop 634; CHECK-NEXT: #NO_APP 635; CHECK-NEXT: xorps %xmm0, %xmm0 636; CHECK-NEXT: cvtsi2sdl {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 637; CHECK-NEXT: popq %rbx 638; CHECK-NEXT: .cfi_def_cfa_offset 48 639; CHECK-NEXT: popq %r12 640; CHECK-NEXT: .cfi_def_cfa_offset 40 641; CHECK-NEXT: popq %r13 642; CHECK-NEXT: .cfi_def_cfa_offset 32 643; CHECK-NEXT: popq %r14 644; CHECK-NEXT: .cfi_def_cfa_offset 24 645; CHECK-NEXT: popq %r15 646; CHECK-NEXT: .cfi_def_cfa_offset 16 647; CHECK-NEXT: popq %rbp 648; CHECK-NEXT: .cfi_def_cfa_offset 8 649; CHECK-NEXT: retq 650 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 651 %2 = sitofp i32 %a0 to double 652 ret double %2 653} 654 655define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0, <2 x double> %b0) { 656; CHECK-LABEL: stack_fold_cvtsi2sd_int: 657; CHECK: # %bb.0: 658; CHECK-NEXT: pushq %rbp 659; CHECK-NEXT: .cfi_def_cfa_offset 16 660; CHECK-NEXT: pushq %r15 661; CHECK-NEXT: .cfi_def_cfa_offset 24 662; CHECK-NEXT: pushq %r14 663; CHECK-NEXT: .cfi_def_cfa_offset 32 664; CHECK-NEXT: pushq %r13 665; CHECK-NEXT: .cfi_def_cfa_offset 40 666; CHECK-NEXT: pushq %r12 667; CHECK-NEXT: .cfi_def_cfa_offset 48 668; CHECK-NEXT: pushq %rbx 669; CHECK-NEXT: .cfi_def_cfa_offset 56 670; CHECK-NEXT: .cfi_offset %rbx, -56 671; CHECK-NEXT: .cfi_offset %r12, -48 672; CHECK-NEXT: .cfi_offset %r13, -40 673; CHECK-NEXT: .cfi_offset %r14, -32 674; CHECK-NEXT: .cfi_offset %r15, -24 675; CHECK-NEXT: .cfi_offset %rbp, -16 676; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 677; CHECK-NEXT: #APP 678; CHECK-NEXT: nop 679; CHECK-NEXT: #NO_APP 680; CHECK-NEXT: cvtsi2sdl {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 681; CHECK-NEXT: popq %rbx 682; CHECK-NEXT: .cfi_def_cfa_offset 48 683; CHECK-NEXT: popq %r12 684; CHECK-NEXT: .cfi_def_cfa_offset 40 685; CHECK-NEXT: popq %r13 686; CHECK-NEXT: .cfi_def_cfa_offset 32 687; CHECK-NEXT: popq %r14 688; CHECK-NEXT: .cfi_def_cfa_offset 24 689; CHECK-NEXT: popq %r15 690; CHECK-NEXT: .cfi_def_cfa_offset 16 691; CHECK-NEXT: popq %rbp 692; CHECK-NEXT: .cfi_def_cfa_offset 8 693; CHECK-NEXT: retq 694 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 695 %2 = sitofp i32 %a0 to double 696 %3 = insertelement <2 x double> %b0, double %2, i64 0 697 ret <2 x double> %3 698} 699 700define double @stack_fold_cvtsi642sd(i64 %a0) { 701; CHECK-LABEL: stack_fold_cvtsi642sd: 702; CHECK: # %bb.0: 703; CHECK-NEXT: pushq %rbp 704; CHECK-NEXT: .cfi_def_cfa_offset 16 705; CHECK-NEXT: pushq %r15 706; CHECK-NEXT: .cfi_def_cfa_offset 24 707; CHECK-NEXT: pushq %r14 708; CHECK-NEXT: .cfi_def_cfa_offset 32 709; CHECK-NEXT: pushq %r13 710; CHECK-NEXT: .cfi_def_cfa_offset 40 711; CHECK-NEXT: pushq %r12 712; CHECK-NEXT: .cfi_def_cfa_offset 48 713; CHECK-NEXT: pushq %rbx 714; CHECK-NEXT: .cfi_def_cfa_offset 56 715; CHECK-NEXT: .cfi_offset %rbx, -56 716; CHECK-NEXT: .cfi_offset %r12, -48 717; CHECK-NEXT: .cfi_offset %r13, -40 718; CHECK-NEXT: .cfi_offset %r14, -32 719; CHECK-NEXT: .cfi_offset %r15, -24 720; CHECK-NEXT: .cfi_offset %rbp, -16 721; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 722; CHECK-NEXT: #APP 723; CHECK-NEXT: nop 724; CHECK-NEXT: #NO_APP 725; CHECK-NEXT: xorps %xmm0, %xmm0 726; CHECK-NEXT: cvtsi2sdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 727; CHECK-NEXT: popq %rbx 728; CHECK-NEXT: .cfi_def_cfa_offset 48 729; CHECK-NEXT: popq %r12 730; CHECK-NEXT: .cfi_def_cfa_offset 40 731; CHECK-NEXT: popq %r13 732; CHECK-NEXT: .cfi_def_cfa_offset 32 733; CHECK-NEXT: popq %r14 734; CHECK-NEXT: .cfi_def_cfa_offset 24 735; CHECK-NEXT: popq %r15 736; CHECK-NEXT: .cfi_def_cfa_offset 16 737; CHECK-NEXT: popq %rbp 738; CHECK-NEXT: .cfi_def_cfa_offset 8 739; CHECK-NEXT: retq 740 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 741 %2 = sitofp i64 %a0 to double 742 ret double %2 743} 744 745define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0, <2 x double> %b0) { 746; CHECK-LABEL: stack_fold_cvtsi642sd_int: 747; CHECK: # %bb.0: 748; CHECK-NEXT: pushq %rbp 749; CHECK-NEXT: .cfi_def_cfa_offset 16 750; CHECK-NEXT: pushq %r15 751; CHECK-NEXT: .cfi_def_cfa_offset 24 752; CHECK-NEXT: pushq %r14 753; CHECK-NEXT: .cfi_def_cfa_offset 32 754; CHECK-NEXT: pushq %r13 755; CHECK-NEXT: .cfi_def_cfa_offset 40 756; CHECK-NEXT: pushq %r12 757; CHECK-NEXT: .cfi_def_cfa_offset 48 758; CHECK-NEXT: pushq %rbx 759; CHECK-NEXT: .cfi_def_cfa_offset 56 760; CHECK-NEXT: .cfi_offset %rbx, -56 761; CHECK-NEXT: .cfi_offset %r12, -48 762; CHECK-NEXT: .cfi_offset %r13, -40 763; CHECK-NEXT: .cfi_offset %r14, -32 764; CHECK-NEXT: .cfi_offset %r15, -24 765; CHECK-NEXT: .cfi_offset %rbp, -16 766; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 767; CHECK-NEXT: #APP 768; CHECK-NEXT: nop 769; CHECK-NEXT: #NO_APP 770; CHECK-NEXT: cvtsi2sdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 771; CHECK-NEXT: popq %rbx 772; CHECK-NEXT: .cfi_def_cfa_offset 48 773; CHECK-NEXT: popq %r12 774; CHECK-NEXT: .cfi_def_cfa_offset 40 775; CHECK-NEXT: popq %r13 776; CHECK-NEXT: .cfi_def_cfa_offset 32 777; CHECK-NEXT: popq %r14 778; CHECK-NEXT: .cfi_def_cfa_offset 24 779; CHECK-NEXT: popq %r15 780; CHECK-NEXT: .cfi_def_cfa_offset 16 781; CHECK-NEXT: popq %rbp 782; CHECK-NEXT: .cfi_def_cfa_offset 8 783; CHECK-NEXT: retq 784 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 785 %2 = sitofp i64 %a0 to double 786 %3 = insertelement <2 x double> %b0, double %2, i64 0 787 ret <2 x double> %3 788} 789 790define float @stack_fold_cvtsi2ss(i32 %a0) { 791; CHECK-LABEL: stack_fold_cvtsi2ss: 792; CHECK: # %bb.0: 793; CHECK-NEXT: pushq %rbp 794; CHECK-NEXT: .cfi_def_cfa_offset 16 795; CHECK-NEXT: pushq %r15 796; CHECK-NEXT: .cfi_def_cfa_offset 24 797; CHECK-NEXT: pushq %r14 798; CHECK-NEXT: .cfi_def_cfa_offset 32 799; CHECK-NEXT: pushq %r13 800; CHECK-NEXT: .cfi_def_cfa_offset 40 801; CHECK-NEXT: pushq %r12 802; CHECK-NEXT: .cfi_def_cfa_offset 48 803; CHECK-NEXT: pushq %rbx 804; CHECK-NEXT: .cfi_def_cfa_offset 56 805; CHECK-NEXT: .cfi_offset %rbx, -56 806; CHECK-NEXT: .cfi_offset %r12, -48 807; CHECK-NEXT: .cfi_offset %r13, -40 808; CHECK-NEXT: .cfi_offset %r14, -32 809; CHECK-NEXT: .cfi_offset %r15, -24 810; CHECK-NEXT: .cfi_offset %rbp, -16 811; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 812; CHECK-NEXT: #APP 813; CHECK-NEXT: nop 814; CHECK-NEXT: #NO_APP 815; CHECK-NEXT: xorps %xmm0, %xmm0 816; CHECK-NEXT: cvtsi2ssl {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 817; CHECK-NEXT: popq %rbx 818; CHECK-NEXT: .cfi_def_cfa_offset 48 819; CHECK-NEXT: popq %r12 820; CHECK-NEXT: .cfi_def_cfa_offset 40 821; CHECK-NEXT: popq %r13 822; CHECK-NEXT: .cfi_def_cfa_offset 32 823; CHECK-NEXT: popq %r14 824; CHECK-NEXT: .cfi_def_cfa_offset 24 825; CHECK-NEXT: popq %r15 826; CHECK-NEXT: .cfi_def_cfa_offset 16 827; CHECK-NEXT: popq %rbp 828; CHECK-NEXT: .cfi_def_cfa_offset 8 829; CHECK-NEXT: retq 830 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 831 %2 = sitofp i32 %a0 to float 832 ret float %2 833} 834 835define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0, <4 x float> %b0) { 836; CHECK-LABEL: stack_fold_cvtsi2ss_int: 837; CHECK: # %bb.0: 838; CHECK-NEXT: pushq %rbp 839; CHECK-NEXT: .cfi_def_cfa_offset 16 840; CHECK-NEXT: pushq %r15 841; CHECK-NEXT: .cfi_def_cfa_offset 24 842; CHECK-NEXT: pushq %r14 843; CHECK-NEXT: .cfi_def_cfa_offset 32 844; CHECK-NEXT: pushq %r13 845; CHECK-NEXT: .cfi_def_cfa_offset 40 846; CHECK-NEXT: pushq %r12 847; CHECK-NEXT: .cfi_def_cfa_offset 48 848; CHECK-NEXT: pushq %rbx 849; CHECK-NEXT: .cfi_def_cfa_offset 56 850; CHECK-NEXT: .cfi_offset %rbx, -56 851; CHECK-NEXT: .cfi_offset %r12, -48 852; CHECK-NEXT: .cfi_offset %r13, -40 853; CHECK-NEXT: .cfi_offset %r14, -32 854; CHECK-NEXT: .cfi_offset %r15, -24 855; CHECK-NEXT: .cfi_offset %rbp, -16 856; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 857; CHECK-NEXT: #APP 858; CHECK-NEXT: nop 859; CHECK-NEXT: #NO_APP 860; CHECK-NEXT: cvtsi2ssl {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 861; CHECK-NEXT: popq %rbx 862; CHECK-NEXT: .cfi_def_cfa_offset 48 863; CHECK-NEXT: popq %r12 864; CHECK-NEXT: .cfi_def_cfa_offset 40 865; CHECK-NEXT: popq %r13 866; CHECK-NEXT: .cfi_def_cfa_offset 32 867; CHECK-NEXT: popq %r14 868; CHECK-NEXT: .cfi_def_cfa_offset 24 869; CHECK-NEXT: popq %r15 870; CHECK-NEXT: .cfi_def_cfa_offset 16 871; CHECK-NEXT: popq %rbp 872; CHECK-NEXT: .cfi_def_cfa_offset 8 873; CHECK-NEXT: retq 874 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 875 %2 = sitofp i32 %a0 to float 876 %3 = insertelement <4 x float> %b0, float %2, i64 0 877 ret <4 x float> %3 878} 879 880define float @stack_fold_cvtsi642ss(i64 %a0) { 881; CHECK-LABEL: stack_fold_cvtsi642ss: 882; CHECK: # %bb.0: 883; CHECK-NEXT: pushq %rbp 884; CHECK-NEXT: .cfi_def_cfa_offset 16 885; CHECK-NEXT: pushq %r15 886; CHECK-NEXT: .cfi_def_cfa_offset 24 887; CHECK-NEXT: pushq %r14 888; CHECK-NEXT: .cfi_def_cfa_offset 32 889; CHECK-NEXT: pushq %r13 890; CHECK-NEXT: .cfi_def_cfa_offset 40 891; CHECK-NEXT: pushq %r12 892; CHECK-NEXT: .cfi_def_cfa_offset 48 893; CHECK-NEXT: pushq %rbx 894; CHECK-NEXT: .cfi_def_cfa_offset 56 895; CHECK-NEXT: .cfi_offset %rbx, -56 896; CHECK-NEXT: .cfi_offset %r12, -48 897; CHECK-NEXT: .cfi_offset %r13, -40 898; CHECK-NEXT: .cfi_offset %r14, -32 899; CHECK-NEXT: .cfi_offset %r15, -24 900; CHECK-NEXT: .cfi_offset %rbp, -16 901; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 902; CHECK-NEXT: #APP 903; CHECK-NEXT: nop 904; CHECK-NEXT: #NO_APP 905; CHECK-NEXT: xorps %xmm0, %xmm0 906; CHECK-NEXT: cvtsi2ssq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 907; CHECK-NEXT: popq %rbx 908; CHECK-NEXT: .cfi_def_cfa_offset 48 909; CHECK-NEXT: popq %r12 910; CHECK-NEXT: .cfi_def_cfa_offset 40 911; CHECK-NEXT: popq %r13 912; CHECK-NEXT: .cfi_def_cfa_offset 32 913; CHECK-NEXT: popq %r14 914; CHECK-NEXT: .cfi_def_cfa_offset 24 915; CHECK-NEXT: popq %r15 916; CHECK-NEXT: .cfi_def_cfa_offset 16 917; CHECK-NEXT: popq %rbp 918; CHECK-NEXT: .cfi_def_cfa_offset 8 919; CHECK-NEXT: retq 920 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 921 %2 = sitofp i64 %a0 to float 922 ret float %2 923} 924 925define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0, <4 x float> %b0) { 926; CHECK-LABEL: stack_fold_cvtsi642ss_int: 927; CHECK: # %bb.0: 928; CHECK-NEXT: pushq %rbp 929; CHECK-NEXT: .cfi_def_cfa_offset 16 930; CHECK-NEXT: pushq %r15 931; CHECK-NEXT: .cfi_def_cfa_offset 24 932; CHECK-NEXT: pushq %r14 933; CHECK-NEXT: .cfi_def_cfa_offset 32 934; CHECK-NEXT: pushq %r13 935; CHECK-NEXT: .cfi_def_cfa_offset 40 936; CHECK-NEXT: pushq %r12 937; CHECK-NEXT: .cfi_def_cfa_offset 48 938; CHECK-NEXT: pushq %rbx 939; CHECK-NEXT: .cfi_def_cfa_offset 56 940; CHECK-NEXT: .cfi_offset %rbx, -56 941; CHECK-NEXT: .cfi_offset %r12, -48 942; CHECK-NEXT: .cfi_offset %r13, -40 943; CHECK-NEXT: .cfi_offset %r14, -32 944; CHECK-NEXT: .cfi_offset %r15, -24 945; CHECK-NEXT: .cfi_offset %rbp, -16 946; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 947; CHECK-NEXT: #APP 948; CHECK-NEXT: nop 949; CHECK-NEXT: #NO_APP 950; CHECK-NEXT: cvtsi2ssq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 951; CHECK-NEXT: popq %rbx 952; CHECK-NEXT: .cfi_def_cfa_offset 48 953; CHECK-NEXT: popq %r12 954; CHECK-NEXT: .cfi_def_cfa_offset 40 955; CHECK-NEXT: popq %r13 956; CHECK-NEXT: .cfi_def_cfa_offset 32 957; CHECK-NEXT: popq %r14 958; CHECK-NEXT: .cfi_def_cfa_offset 24 959; CHECK-NEXT: popq %r15 960; CHECK-NEXT: .cfi_def_cfa_offset 16 961; CHECK-NEXT: popq %rbp 962; CHECK-NEXT: .cfi_def_cfa_offset 8 963; CHECK-NEXT: retq 964 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 965 %2 = sitofp i64 %a0 to float 966 %3 = insertelement <4 x float> %b0, float %2, i64 0 967 ret <4 x float> %3 968} 969 970define double @stack_fold_cvtss2sd(float %a0) minsize { 971; CHECK-LABEL: stack_fold_cvtss2sd: 972; CHECK: # %bb.0: 973; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 974; CHECK-NEXT: #APP 975; CHECK-NEXT: nop 976; CHECK-NEXT: #NO_APP 977; CHECK-NEXT: cvtss2sd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 978; CHECK-NEXT: retq 979 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 980 %2 = fpext float %a0 to double 981 ret double %2 982} 983 984define <2 x double> @stack_fold_cvtss2sd_int(<4 x float> %a0) optsize { 985; CHECK-LABEL: stack_fold_cvtss2sd_int: 986; CHECK: # %bb.0: 987; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 988; CHECK-NEXT: #APP 989; CHECK-NEXT: nop 990; CHECK-NEXT: #NO_APP 991; CHECK-NEXT: xorps %xmm0, %xmm0 992; CHECK-NEXT: cvtss2sd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 993; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 994; CHECK-NEXT: retq 995 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 996 %2 = extractelement <4 x float> %a0, i64 0 997 %3 = fpext float %2 to double 998 %4 = insertelement <2 x double> zeroinitializer, double %3, i64 0 999 ret <2 x double> %4 1000} 1001 1002; TODO stack_fold_cvtss2si 1003 1004define i32 @stack_fold_cvtss2si_int(<4 x float> %a0) { 1005; CHECK-LABEL: stack_fold_cvtss2si_int: 1006; CHECK: # %bb.0: 1007; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1008; CHECK-NEXT: #APP 1009; CHECK-NEXT: nop 1010; CHECK-NEXT: #NO_APP 1011; CHECK-NEXT: cvtss2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 16-byte Folded Reload 1012; CHECK-NEXT: retq 1013 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1014 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 1015 ret i32 %2 1016} 1017declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 1018 1019; TODO stack_fold_cvtss2si64 1020 1021define i64 @stack_fold_cvtss2si64_int(<4 x float> %a0) { 1022; CHECK-LABEL: stack_fold_cvtss2si64_int: 1023; CHECK: # %bb.0: 1024; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1025; CHECK-NEXT: #APP 1026; CHECK-NEXT: nop 1027; CHECK-NEXT: #NO_APP 1028; CHECK-NEXT: cvtss2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 16-byte Folded Reload 1029; CHECK-NEXT: retq 1030 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1031 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) 1032 ret i64 %2 1033} 1034declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone 1035 1036define <4 x i32> @stack_fold_cvttpd2dq(<2 x double> %a0) { 1037; CHECK-LABEL: stack_fold_cvttpd2dq: 1038; CHECK: # %bb.0: 1039; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1040; CHECK-NEXT: #APP 1041; CHECK-NEXT: nop 1042; CHECK-NEXT: #NO_APP 1043; CHECK-NEXT: cvttpd2dq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1044; CHECK-NEXT: retq 1045 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1046 %2 = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) 1047 ret <4 x i32> %2 1048} 1049declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 1050 1051define <4 x i32> @stack_fold_cvttps2dq(<4 x float> %a0) { 1052; CHECK-LABEL: stack_fold_cvttps2dq: 1053; CHECK: # %bb.0: 1054; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1055; CHECK-NEXT: #APP 1056; CHECK-NEXT: nop 1057; CHECK-NEXT: #NO_APP 1058; CHECK-NEXT: cvttps2dq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1059; CHECK-NEXT: retq 1060 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1061 %2 = fptosi <4 x float> %a0 to <4 x i32> 1062 ret <4 x i32> %2 1063} 1064 1065define i32 @stack_fold_cvttsd2si(double %a0) { 1066; CHECK-LABEL: stack_fold_cvttsd2si: 1067; CHECK: # %bb.0: 1068; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1069; CHECK-NEXT: #APP 1070; CHECK-NEXT: nop 1071; CHECK-NEXT: #NO_APP 1072; CHECK-NEXT: cvttsd2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 8-byte Folded Reload 1073; CHECK-NEXT: retq 1074 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1075 %2 = fptosi double %a0 to i32 1076 ret i32 %2 1077} 1078 1079define i32 @stack_fold_cvttsd2si_int(<2 x double> %a0) { 1080; CHECK-LABEL: stack_fold_cvttsd2si_int: 1081; CHECK: # %bb.0: 1082; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1083; CHECK-NEXT: #APP 1084; CHECK-NEXT: nop 1085; CHECK-NEXT: #NO_APP 1086; CHECK-NEXT: cvttsd2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 16-byte Folded Reload 1087; CHECK-NEXT: retq 1088 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1089 %2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) 1090 ret i32 %2 1091} 1092declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 1093 1094define i64 @stack_fold_cvttsd2si64(double %a0) { 1095; CHECK-LABEL: stack_fold_cvttsd2si64: 1096; CHECK: # %bb.0: 1097; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1098; CHECK-NEXT: #APP 1099; CHECK-NEXT: nop 1100; CHECK-NEXT: #NO_APP 1101; CHECK-NEXT: cvttsd2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload 1102; CHECK-NEXT: retq 1103 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1104 %2 = fptosi double %a0 to i64 1105 ret i64 %2 1106} 1107 1108define i64 @stack_fold_cvttsd2si64_int(<2 x double> %a0) { 1109; CHECK-LABEL: stack_fold_cvttsd2si64_int: 1110; CHECK: # %bb.0: 1111; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1112; CHECK-NEXT: #APP 1113; CHECK-NEXT: nop 1114; CHECK-NEXT: #NO_APP 1115; CHECK-NEXT: cvttsd2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 16-byte Folded Reload 1116; CHECK-NEXT: retq 1117 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1118 %2 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) 1119 ret i64 %2 1120} 1121declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone 1122 1123define i32 @stack_fold_cvttss2si(float %a0) { 1124; CHECK-LABEL: stack_fold_cvttss2si: 1125; CHECK: # %bb.0: 1126; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1127; CHECK-NEXT: #APP 1128; CHECK-NEXT: nop 1129; CHECK-NEXT: #NO_APP 1130; CHECK-NEXT: cvttss2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 1131; CHECK-NEXT: retq 1132 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1133 %2 = fptosi float %a0 to i32 1134 ret i32 %2 1135} 1136 1137define i32 @stack_fold_cvttss2si_int(<4 x float> %a0) { 1138; CHECK-LABEL: stack_fold_cvttss2si_int: 1139; CHECK: # %bb.0: 1140; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1141; CHECK-NEXT: #APP 1142; CHECK-NEXT: nop 1143; CHECK-NEXT: #NO_APP 1144; CHECK-NEXT: cvttss2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 16-byte Folded Reload 1145; CHECK-NEXT: retq 1146 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1147 %2 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) 1148 ret i32 %2 1149} 1150declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 1151 1152define i64 @stack_fold_cvttss2si64(float %a0) { 1153; CHECK-LABEL: stack_fold_cvttss2si64: 1154; CHECK: # %bb.0: 1155; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1156; CHECK-NEXT: #APP 1157; CHECK-NEXT: nop 1158; CHECK-NEXT: #NO_APP 1159; CHECK-NEXT: cvttss2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 4-byte Folded Reload 1160; CHECK-NEXT: retq 1161 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1162 %2 = fptosi float %a0 to i64 1163 ret i64 %2 1164} 1165 1166define i64 @stack_fold_cvttss2si64_int(<4 x float> %a0) { 1167; CHECK-LABEL: stack_fold_cvttss2si64_int: 1168; CHECK: # %bb.0: 1169; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1170; CHECK-NEXT: #APP 1171; CHECK-NEXT: nop 1172; CHECK-NEXT: #NO_APP 1173; CHECK-NEXT: cvttss2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 16-byte Folded Reload 1174; CHECK-NEXT: retq 1175 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1176 %2 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) 1177 ret i64 %2 1178} 1179declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone 1180 1181define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) { 1182; CHECK-LABEL: stack_fold_divpd: 1183; CHECK: # %bb.0: 1184; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1185; CHECK-NEXT: #APP 1186; CHECK-NEXT: nop 1187; CHECK-NEXT: #NO_APP 1188; CHECK-NEXT: divpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1189; CHECK-NEXT: retq 1190 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1191 %2 = fdiv <2 x double> %a0, %a1 1192 ret <2 x double> %2 1193} 1194 1195define <4 x float> @stack_fold_divps(<4 x float> %a0, <4 x float> %a1) { 1196; CHECK-LABEL: stack_fold_divps: 1197; CHECK: # %bb.0: 1198; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1199; CHECK-NEXT: #APP 1200; CHECK-NEXT: nop 1201; CHECK-NEXT: #NO_APP 1202; CHECK-NEXT: divps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1203; CHECK-NEXT: retq 1204 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1205 %2 = fdiv <4 x float> %a0, %a1 1206 ret <4 x float> %2 1207} 1208 1209define double @stack_fold_divsd(double %a0, double %a1) { 1210; CHECK-LABEL: stack_fold_divsd: 1211; CHECK: # %bb.0: 1212; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1213; CHECK-NEXT: #APP 1214; CHECK-NEXT: nop 1215; CHECK-NEXT: #NO_APP 1216; CHECK-NEXT: divsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 1217; CHECK-NEXT: retq 1218 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1219 %2 = fdiv double %a0, %a1 1220 ret double %2 1221} 1222 1223define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) { 1224; CHECK-LABEL: stack_fold_divsd_int: 1225; CHECK: # %bb.0: 1226; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1227; CHECK-NEXT: #APP 1228; CHECK-NEXT: nop 1229; CHECK-NEXT: #NO_APP 1230; CHECK-NEXT: divsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1231; CHECK-NEXT: retq 1232 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1233 %2 = extractelement <2 x double> %a0, i32 0 1234 %3 = extractelement <2 x double> %a1, i32 0 1235 %4 = fdiv double %2, %3 1236 %5 = insertelement <2 x double> %a0, double %4, i32 0 1237 ret <2 x double> %5 1238} 1239declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 1240 1241define float @stack_fold_divss(float %a0, float %a1) { 1242; CHECK-LABEL: stack_fold_divss: 1243; CHECK: # %bb.0: 1244; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1245; CHECK-NEXT: #APP 1246; CHECK-NEXT: nop 1247; CHECK-NEXT: #NO_APP 1248; CHECK-NEXT: divss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 1249; CHECK-NEXT: retq 1250 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1251 %2 = fdiv float %a0, %a1 1252 ret float %2 1253} 1254 1255define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) { 1256; CHECK-LABEL: stack_fold_divss_int: 1257; CHECK: # %bb.0: 1258; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1259; CHECK-NEXT: #APP 1260; CHECK-NEXT: nop 1261; CHECK-NEXT: #NO_APP 1262; CHECK-NEXT: divss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1263; CHECK-NEXT: retq 1264 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1265 %2 = extractelement <4 x float> %a0, i32 0 1266 %3 = extractelement <4 x float> %a1, i32 0 1267 %4 = fdiv float %2, %3 1268 %5 = insertelement <4 x float> %a0, float %4, i32 0 1269 ret <4 x float> %5 1270} 1271declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone 1272 1273define <2 x double> @stack_fold_dppd(<2 x double> %a0, <2 x double> %a1) { 1274; CHECK-LABEL: stack_fold_dppd: 1275; CHECK: # %bb.0: 1276; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1277; CHECK-NEXT: #APP 1278; CHECK-NEXT: nop 1279; CHECK-NEXT: #NO_APP 1280; CHECK-NEXT: dppd $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1281; CHECK-NEXT: retq 1282 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1283 %2 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) 1284 ret <2 x double> %2 1285} 1286declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 1287 1288define <4 x float> @stack_fold_dpps(<4 x float> %a0, <4 x float> %a1) { 1289; CHECK-LABEL: stack_fold_dpps: 1290; CHECK: # %bb.0: 1291; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1292; CHECK-NEXT: #APP 1293; CHECK-NEXT: nop 1294; CHECK-NEXT: #NO_APP 1295; CHECK-NEXT: dpps $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1296; CHECK-NEXT: retq 1297 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1298 %2 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) 1299 ret <4 x float> %2 1300} 1301declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 1302 1303define i32 @stack_fold_extractps(<4 x float> %a0, <4 x float> %a1) { 1304; CHECK-LABEL: stack_fold_extractps: 1305; CHECK: # %bb.0: 1306; CHECK-NEXT: pushq %rbp 1307; CHECK-NEXT: .cfi_def_cfa_offset 16 1308; CHECK-NEXT: pushq %r15 1309; CHECK-NEXT: .cfi_def_cfa_offset 24 1310; CHECK-NEXT: pushq %r14 1311; CHECK-NEXT: .cfi_def_cfa_offset 32 1312; CHECK-NEXT: pushq %r13 1313; CHECK-NEXT: .cfi_def_cfa_offset 40 1314; CHECK-NEXT: pushq %r12 1315; CHECK-NEXT: .cfi_def_cfa_offset 48 1316; CHECK-NEXT: pushq %rbx 1317; CHECK-NEXT: .cfi_def_cfa_offset 56 1318; CHECK-NEXT: .cfi_offset %rbx, -56 1319; CHECK-NEXT: .cfi_offset %r12, -48 1320; CHECK-NEXT: .cfi_offset %r13, -40 1321; CHECK-NEXT: .cfi_offset %r14, -32 1322; CHECK-NEXT: .cfi_offset %r15, -24 1323; CHECK-NEXT: .cfi_offset %rbp, -16 1324; CHECK-NEXT: addps %xmm1, %xmm0 1325; CHECK-NEXT: extractps $1, %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 1326; CHECK-NEXT: #APP 1327; CHECK-NEXT: nop 1328; CHECK-NEXT: #NO_APP 1329; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 1330; CHECK-NEXT: popq %rbx 1331; CHECK-NEXT: .cfi_def_cfa_offset 48 1332; CHECK-NEXT: popq %r12 1333; CHECK-NEXT: .cfi_def_cfa_offset 40 1334; CHECK-NEXT: popq %r13 1335; CHECK-NEXT: .cfi_def_cfa_offset 32 1336; CHECK-NEXT: popq %r14 1337; CHECK-NEXT: .cfi_def_cfa_offset 24 1338; CHECK-NEXT: popq %r15 1339; CHECK-NEXT: .cfi_def_cfa_offset 16 1340; CHECK-NEXT: popq %rbp 1341; CHECK-NEXT: .cfi_def_cfa_offset 8 1342; CHECK-NEXT: retq 1343 ; fadd forces execution domain 1344 %1 = fadd <4 x float> %a0, %a1 1345 %2 = extractelement <4 x float> %1, i32 1 1346 %3 = bitcast float %2 to i32 1347 %4 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 1348 ret i32 %3 1349} 1350 1351define <2 x double> @stack_fold_haddpd(<2 x double> %a0, <2 x double> %a1) { 1352; CHECK-LABEL: stack_fold_haddpd: 1353; CHECK: # %bb.0: 1354; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1355; CHECK-NEXT: #APP 1356; CHECK-NEXT: nop 1357; CHECK-NEXT: #NO_APP 1358; CHECK-NEXT: haddpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1359; CHECK-NEXT: retq 1360 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1361 %2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) 1362 ret <2 x double> %2 1363} 1364declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 1365 1366define <4 x float> @stack_fold_haddps(<4 x float> %a0, <4 x float> %a1) { 1367; CHECK-LABEL: stack_fold_haddps: 1368; CHECK: # %bb.0: 1369; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1370; CHECK-NEXT: #APP 1371; CHECK-NEXT: nop 1372; CHECK-NEXT: #NO_APP 1373; CHECK-NEXT: haddps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1374; CHECK-NEXT: retq 1375 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1376 %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) 1377 ret <4 x float> %2 1378} 1379declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 1380 1381define <2 x double> @stack_fold_hsubpd(<2 x double> %a0, <2 x double> %a1) { 1382; CHECK-LABEL: stack_fold_hsubpd: 1383; CHECK: # %bb.0: 1384; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1385; CHECK-NEXT: #APP 1386; CHECK-NEXT: nop 1387; CHECK-NEXT: #NO_APP 1388; CHECK-NEXT: hsubpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1389; CHECK-NEXT: retq 1390 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1391 %2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) 1392 ret <2 x double> %2 1393} 1394declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 1395 1396define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) { 1397; CHECK-LABEL: stack_fold_hsubps: 1398; CHECK: # %bb.0: 1399; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1400; CHECK-NEXT: #APP 1401; CHECK-NEXT: nop 1402; CHECK-NEXT: #NO_APP 1403; CHECK-NEXT: hsubps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1404; CHECK-NEXT: retq 1405 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1406 %2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) 1407 ret <4 x float> %2 1408} 1409declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 1410 1411define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) { 1412; CHECK-LABEL: stack_fold_insertps: 1413; CHECK: # %bb.0: 1414; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1415; CHECK-NEXT: #APP 1416; CHECK-NEXT: nop 1417; CHECK-NEXT: #NO_APP 1418; CHECK-NEXT: insertps $17, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1419; CHECK-NEXT: # xmm0 = zero,mem[0],xmm0[2,3] 1420; CHECK-NEXT: retq 1421 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1422 %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209) 1423 ret <4 x float> %2 1424} 1425declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 1426 1427define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 { 1428; CHECK-LABEL: stack_fold_maxpd: 1429; CHECK: # %bb.0: 1430; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1431; CHECK-NEXT: #APP 1432; CHECK-NEXT: nop 1433; CHECK-NEXT: #NO_APP 1434; CHECK-NEXT: maxpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1435; CHECK-NEXT: retq 1436 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1437 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) 1438 ret <2 x double> %2 1439} 1440declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 1441 1442define <2 x double> @stack_fold_maxpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 { 1443; CHECK-LABEL: stack_fold_maxpd_commutable: 1444; CHECK: # %bb.0: 1445; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1446; CHECK-NEXT: #APP 1447; CHECK-NEXT: nop 1448; CHECK-NEXT: #NO_APP 1449; CHECK-NEXT: maxpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1450; CHECK-NEXT: retq 1451 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1452 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) 1453 ret <2 x double> %2 1454} 1455 1456define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) #0 { 1457; CHECK-LABEL: stack_fold_maxps: 1458; CHECK: # %bb.0: 1459; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1460; CHECK-NEXT: #APP 1461; CHECK-NEXT: nop 1462; CHECK-NEXT: #NO_APP 1463; CHECK-NEXT: maxps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1464; CHECK-NEXT: retq 1465 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1466 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) 1467 ret <4 x float> %2 1468} 1469declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 1470 1471define <4 x float> @stack_fold_maxps_commutable(<4 x float> %a0, <4 x float> %a1) #1 { 1472; CHECK-LABEL: stack_fold_maxps_commutable: 1473; CHECK: # %bb.0: 1474; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1475; CHECK-NEXT: #APP 1476; CHECK-NEXT: nop 1477; CHECK-NEXT: #NO_APP 1478; CHECK-NEXT: maxps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1479; CHECK-NEXT: retq 1480 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1481 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) 1482 ret <4 x float> %2 1483} 1484 1485define double @stack_fold_maxsd(double %a0, double %a1) #0 { 1486; CHECK-LABEL: stack_fold_maxsd: 1487; CHECK: # %bb.0: 1488; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1489; CHECK-NEXT: #APP 1490; CHECK-NEXT: nop 1491; CHECK-NEXT: #NO_APP 1492; CHECK-NEXT: maxsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 1493; CHECK-NEXT: retq 1494 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1495 %2 = fcmp ogt double %a0, %a1 1496 %3 = select i1 %2, double %a0, double %a1 1497 ret double %3 1498} 1499 1500define double @stack_fold_maxsd_commutable(double %a0, double %a1) #1 { 1501; CHECK-LABEL: stack_fold_maxsd_commutable: 1502; CHECK: # %bb.0: 1503; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1504; CHECK-NEXT: #APP 1505; CHECK-NEXT: nop 1506; CHECK-NEXT: #NO_APP 1507; CHECK-NEXT: maxsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 1508; CHECK-NEXT: retq 1509 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1510 %2 = fcmp ogt double %a0, %a1 1511 %3 = select i1 %2, double %a0, double %a1 1512 ret double %3 1513} 1514 1515define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) #0 { 1516; CHECK-LABEL: stack_fold_maxsd_int: 1517; CHECK: # %bb.0: 1518; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1519; CHECK-NEXT: #APP 1520; CHECK-NEXT: nop 1521; CHECK-NEXT: #NO_APP 1522; CHECK-NEXT: maxsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1523; CHECK-NEXT: retq 1524 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1525 %2 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) 1526 ret <2 x double> %2 1527} 1528declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 1529 1530define float @stack_fold_maxss(float %a0, float %a1) #0 { 1531; CHECK-LABEL: stack_fold_maxss: 1532; CHECK: # %bb.0: 1533; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1534; CHECK-NEXT: #APP 1535; CHECK-NEXT: nop 1536; CHECK-NEXT: #NO_APP 1537; CHECK-NEXT: maxss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 1538; CHECK-NEXT: retq 1539 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1540 %2 = fcmp ogt float %a0, %a1 1541 %3 = select i1 %2, float %a0, float %a1 1542 ret float %3 1543} 1544 1545define float @stack_fold_maxss_commutable(float %a0, float %a1) #1 { 1546; CHECK-LABEL: stack_fold_maxss_commutable: 1547; CHECK: # %bb.0: 1548; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1549; CHECK-NEXT: #APP 1550; CHECK-NEXT: nop 1551; CHECK-NEXT: #NO_APP 1552; CHECK-NEXT: maxss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 1553; CHECK-NEXT: retq 1554 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1555 %2 = fcmp ogt float %a0, %a1 1556 %3 = select i1 %2, float %a0, float %a1 1557 ret float %3 1558} 1559 1560define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) #0 { 1561; CHECK-LABEL: stack_fold_maxss_int: 1562; CHECK: # %bb.0: 1563; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1564; CHECK-NEXT: #APP 1565; CHECK-NEXT: nop 1566; CHECK-NEXT: #NO_APP 1567; CHECK-NEXT: maxss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1568; CHECK-NEXT: retq 1569 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1570 %2 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) 1571 ret <4 x float> %2 1572} 1573declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 1574 1575define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) #0 { 1576; CHECK-LABEL: stack_fold_minpd: 1577; CHECK: # %bb.0: 1578; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1579; CHECK-NEXT: #APP 1580; CHECK-NEXT: nop 1581; CHECK-NEXT: #NO_APP 1582; CHECK-NEXT: minpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1583; CHECK-NEXT: retq 1584 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1585 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) 1586 ret <2 x double> %2 1587} 1588declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 1589 1590define <2 x double> @stack_fold_minpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 { 1591; CHECK-LABEL: stack_fold_minpd_commutable: 1592; CHECK: # %bb.0: 1593; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1594; CHECK-NEXT: #APP 1595; CHECK-NEXT: nop 1596; CHECK-NEXT: #NO_APP 1597; CHECK-NEXT: minpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1598; CHECK-NEXT: retq 1599 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1600 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) 1601 ret <2 x double> %2 1602} 1603 1604define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) #0 { 1605; CHECK-LABEL: stack_fold_minps: 1606; CHECK: # %bb.0: 1607; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1608; CHECK-NEXT: #APP 1609; CHECK-NEXT: nop 1610; CHECK-NEXT: #NO_APP 1611; CHECK-NEXT: minps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1612; CHECK-NEXT: retq 1613 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1614 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) 1615 ret <4 x float> %2 1616} 1617declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 1618 1619define <4 x float> @stack_fold_minps_commutable(<4 x float> %a0, <4 x float> %a1) #1 { 1620; CHECK-LABEL: stack_fold_minps_commutable: 1621; CHECK: # %bb.0: 1622; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1623; CHECK-NEXT: #APP 1624; CHECK-NEXT: nop 1625; CHECK-NEXT: #NO_APP 1626; CHECK-NEXT: minps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1627; CHECK-NEXT: retq 1628 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1629 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) 1630 ret <4 x float> %2 1631} 1632 1633define double @stack_fold_minsd(double %a0, double %a1) #0 { 1634; CHECK-LABEL: stack_fold_minsd: 1635; CHECK: # %bb.0: 1636; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1637; CHECK-NEXT: #APP 1638; CHECK-NEXT: nop 1639; CHECK-NEXT: #NO_APP 1640; CHECK-NEXT: minsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 1641; CHECK-NEXT: retq 1642 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1643 %2 = fcmp olt double %a0, %a1 1644 %3 = select i1 %2, double %a0, double %a1 1645 ret double %3 1646} 1647 1648define double @stack_fold_minsd_commutable(double %a0, double %a1) #1 { 1649; CHECK-LABEL: stack_fold_minsd_commutable: 1650; CHECK: # %bb.0: 1651; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1652; CHECK-NEXT: #APP 1653; CHECK-NEXT: nop 1654; CHECK-NEXT: #NO_APP 1655; CHECK-NEXT: minsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 1656; CHECK-NEXT: retq 1657 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1658 %2 = fcmp olt double %a0, %a1 1659 %3 = select i1 %2, double %a0, double %a1 1660 ret double %3 1661} 1662 1663define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) #0 { 1664; CHECK-LABEL: stack_fold_minsd_int: 1665; CHECK: # %bb.0: 1666; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1667; CHECK-NEXT: #APP 1668; CHECK-NEXT: nop 1669; CHECK-NEXT: #NO_APP 1670; CHECK-NEXT: minsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1671; CHECK-NEXT: retq 1672 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1673 %2 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) 1674 ret <2 x double> %2 1675} 1676declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 1677 1678define float @stack_fold_minss(float %a0, float %a1) #0 { 1679; CHECK-LABEL: stack_fold_minss: 1680; CHECK: # %bb.0: 1681; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1682; CHECK-NEXT: #APP 1683; CHECK-NEXT: nop 1684; CHECK-NEXT: #NO_APP 1685; CHECK-NEXT: minss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 1686; CHECK-NEXT: retq 1687 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1688 %2 = fcmp olt float %a0, %a1 1689 %3 = select i1 %2, float %a0, float %a1 1690 ret float %3 1691} 1692 1693define float @stack_fold_minss_commutable(float %a0, float %a1) #1 { 1694; CHECK-LABEL: stack_fold_minss_commutable: 1695; CHECK: # %bb.0: 1696; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1697; CHECK-NEXT: #APP 1698; CHECK-NEXT: nop 1699; CHECK-NEXT: #NO_APP 1700; CHECK-NEXT: minss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 1701; CHECK-NEXT: retq 1702 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1703 %2 = fcmp olt float %a0, %a1 1704 %3 = select i1 %2, float %a0, float %a1 1705 ret float %3 1706} 1707 1708define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) #0 { 1709; CHECK-LABEL: stack_fold_minss_int: 1710; CHECK: # %bb.0: 1711; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1712; CHECK-NEXT: #APP 1713; CHECK-NEXT: nop 1714; CHECK-NEXT: #NO_APP 1715; CHECK-NEXT: minss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1716; CHECK-NEXT: retq 1717 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1718 %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) 1719 ret <4 x float> %2 1720} 1721declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 1722 1723define <2 x double> @stack_fold_movddup(<2 x double> %a0) { 1724; CHECK-LABEL: stack_fold_movddup: 1725; CHECK: # %bb.0: 1726; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1727; CHECK-NEXT: #APP 1728; CHECK-NEXT: nop 1729; CHECK-NEXT: #NO_APP 1730; CHECK-NEXT: movddup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1731; CHECK-NEXT: # xmm0 = mem[0,0] 1732; CHECK-NEXT: retq 1733 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1734 %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1735 ret <2 x double> %2 1736} 1737; TODO stack_fold_movhpd (load / store) 1738; TODO stack_fold_movhps (load / store) 1739 1740; TODO stack_fold_movlpd (load / store) 1741; TODO stack_fold_movlps (load / store) 1742 1743define <4 x float> @stack_fold_movshdup(<4 x float> %a0) { 1744; CHECK-LABEL: stack_fold_movshdup: 1745; CHECK: # %bb.0: 1746; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1747; CHECK-NEXT: #APP 1748; CHECK-NEXT: nop 1749; CHECK-NEXT: #NO_APP 1750; CHECK-NEXT: movshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1751; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1752; CHECK-NEXT: retq 1753 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1754 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 1755 ret <4 x float> %2 1756} 1757 1758define <4 x float> @stack_fold_movsldup(<4 x float> %a0) { 1759; CHECK-LABEL: stack_fold_movsldup: 1760; CHECK: # %bb.0: 1761; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1762; CHECK-NEXT: #APP 1763; CHECK-NEXT: nop 1764; CHECK-NEXT: #NO_APP 1765; CHECK-NEXT: movsldup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1766; CHECK-NEXT: # xmm0 = mem[0,0,2,2] 1767; CHECK-NEXT: retq 1768 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1769 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 1770 ret <4 x float> %2 1771} 1772 1773define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) { 1774; CHECK-LABEL: stack_fold_mulpd: 1775; CHECK: # %bb.0: 1776; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1777; CHECK-NEXT: #APP 1778; CHECK-NEXT: nop 1779; CHECK-NEXT: #NO_APP 1780; CHECK-NEXT: mulpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1781; CHECK-NEXT: retq 1782 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1783 %2 = fmul <2 x double> %a0, %a1 1784 ret <2 x double> %2 1785} 1786 1787define <4 x float> @stack_fold_mulps(<4 x float> %a0, <4 x float> %a1) { 1788; CHECK-LABEL: stack_fold_mulps: 1789; CHECK: # %bb.0: 1790; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1791; CHECK-NEXT: #APP 1792; CHECK-NEXT: nop 1793; CHECK-NEXT: #NO_APP 1794; CHECK-NEXT: mulps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1795; CHECK-NEXT: retq 1796 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1797 %2 = fmul <4 x float> %a0, %a1 1798 ret <4 x float> %2 1799} 1800 1801define double @stack_fold_mulsd(double %a0, double %a1) { 1802; CHECK-LABEL: stack_fold_mulsd: 1803; CHECK: # %bb.0: 1804; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1805; CHECK-NEXT: #APP 1806; CHECK-NEXT: nop 1807; CHECK-NEXT: #NO_APP 1808; CHECK-NEXT: mulsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 1809; CHECK-NEXT: retq 1810 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1811 %2 = fmul double %a0, %a1 1812 ret double %2 1813} 1814 1815define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) { 1816; CHECK-LABEL: stack_fold_mulsd_int: 1817; CHECK: # %bb.0: 1818; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1819; CHECK-NEXT: #APP 1820; CHECK-NEXT: nop 1821; CHECK-NEXT: #NO_APP 1822; CHECK-NEXT: mulsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1823; CHECK-NEXT: retq 1824 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1825 %2 = extractelement <2 x double> %a0, i32 0 1826 %3 = extractelement <2 x double> %a1, i32 0 1827 %4 = fmul double %2, %3 1828 %5 = insertelement <2 x double> %a0, double %4, i32 0 1829 ret <2 x double> %5 1830} 1831 1832define float @stack_fold_mulss(float %a0, float %a1) { 1833; CHECK-LABEL: stack_fold_mulss: 1834; CHECK: # %bb.0: 1835; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1836; CHECK-NEXT: #APP 1837; CHECK-NEXT: nop 1838; CHECK-NEXT: #NO_APP 1839; CHECK-NEXT: mulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 1840; CHECK-NEXT: retq 1841 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1842 %2 = fmul float %a0, %a1 1843 ret float %2 1844} 1845 1846define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) { 1847; CHECK-LABEL: stack_fold_mulss_int: 1848; CHECK: # %bb.0: 1849; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1850; CHECK-NEXT: #APP 1851; CHECK-NEXT: nop 1852; CHECK-NEXT: #NO_APP 1853; CHECK-NEXT: mulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1854; CHECK-NEXT: retq 1855 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1856 %2 = extractelement <4 x float> %a0, i32 0 1857 %3 = extractelement <4 x float> %a1, i32 0 1858 %4 = fmul float %2, %3 1859 %5 = insertelement <4 x float> %a0, float %4, i32 0 1860 ret <4 x float> %5 1861} 1862 1863define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) { 1864; CHECK-LABEL: stack_fold_orpd: 1865; CHECK: # %bb.0: 1866; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1867; CHECK-NEXT: #APP 1868; CHECK-NEXT: nop 1869; CHECK-NEXT: #NO_APP 1870; CHECK-NEXT: orpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1871; CHECK-NEXT: xorpd %xmm1, %xmm1 1872; CHECK-NEXT: addpd %xmm1, %xmm0 1873; CHECK-NEXT: retq 1874 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1875 %2 = bitcast <2 x double> %a0 to <2 x i64> 1876 %3 = bitcast <2 x double> %a1 to <2 x i64> 1877 %4 = or <2 x i64> %2, %3 1878 %5 = bitcast <2 x i64> %4 to <2 x double> 1879 ; fadd forces execution domain 1880 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 1881 ret <2 x double> %6 1882} 1883 1884define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) { 1885; CHECK-LABEL: stack_fold_orps: 1886; CHECK: # %bb.0: 1887; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1888; CHECK-NEXT: #APP 1889; CHECK-NEXT: nop 1890; CHECK-NEXT: #NO_APP 1891; CHECK-NEXT: orps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1892; CHECK-NEXT: xorps %xmm1, %xmm1 1893; CHECK-NEXT: addps %xmm1, %xmm0 1894; CHECK-NEXT: retq 1895 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1896 %2 = bitcast <4 x float> %a0 to <2 x i64> 1897 %3 = bitcast <4 x float> %a1 to <2 x i64> 1898 %4 = or <2 x i64> %2, %3 1899 %5 = bitcast <2 x i64> %4 to <4 x float> 1900 ; fadd forces execution domain 1901 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 1902 ret <4 x float> %6 1903} 1904 1905; TODO stack_fold_rcpps 1906 1907define <4 x float> @stack_fold_rcpps_int(<4 x float> %a0) { 1908; CHECK-LABEL: stack_fold_rcpps_int: 1909; CHECK: # %bb.0: 1910; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1911; CHECK-NEXT: #APP 1912; CHECK-NEXT: nop 1913; CHECK-NEXT: #NO_APP 1914; CHECK-NEXT: rcpps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1915; CHECK-NEXT: retq 1916 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1917 %2 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) 1918 ret <4 x float> %2 1919} 1920declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 1921 1922; TODO stack_fold_rcpss 1923 1924define <4 x float> @stack_fold_rcpss_int(<4 x float> %a0, <4 x float> %a1) optsize { 1925; CHECK-LABEL: stack_fold_rcpss_int: 1926; CHECK: # %bb.0: 1927; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1928; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1929; CHECK-NEXT: #APP 1930; CHECK-NEXT: nop 1931; CHECK-NEXT: #NO_APP 1932; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1933; CHECK-NEXT: rcpss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1934; CHECK-NEXT: retq 1935 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1936 %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a1) 1937 %3 = extractelement <4 x float> %2, i32 0 1938 %4 = insertelement <4 x float> %a0, float %3, i32 0 1939 ret <4 x float> %4 1940} 1941declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) 1942 1943define <2 x double> @stack_fold_roundpd(<2 x double> %a0) { 1944; CHECK-LABEL: stack_fold_roundpd: 1945; CHECK: # %bb.0: 1946; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1947; CHECK-NEXT: #APP 1948; CHECK-NEXT: nop 1949; CHECK-NEXT: #NO_APP 1950; CHECK-NEXT: roundpd $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1951; CHECK-NEXT: retq 1952 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1953 %2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) 1954 ret <2 x double> %2 1955} 1956declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 1957 1958define <4 x float> @stack_fold_roundps(<4 x float> %a0) { 1959; CHECK-LABEL: stack_fold_roundps: 1960; CHECK: # %bb.0: 1961; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1962; CHECK-NEXT: #APP 1963; CHECK-NEXT: nop 1964; CHECK-NEXT: #NO_APP 1965; CHECK-NEXT: roundps $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1966; CHECK-NEXT: retq 1967 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1968 %2 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) 1969 ret <4 x float> %2 1970} 1971declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 1972 1973define double @stack_fold_roundsd(double %a0) optsize { 1974; CHECK-LABEL: stack_fold_roundsd: 1975; CHECK: # %bb.0: 1976; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1977; CHECK-NEXT: #APP 1978; CHECK-NEXT: nop 1979; CHECK-NEXT: #NO_APP 1980; CHECK-NEXT: xorps %xmm0, %xmm0 1981; CHECK-NEXT: roundsd $9, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 1982; CHECK-NEXT: retq 1983 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1984 %2 = call double @llvm.floor.f64(double %a0) 1985 ret double %2 1986} 1987declare double @llvm.floor.f64(double) nounwind readnone 1988 1989define <2 x double> @stack_fold_roundsd_int(<2 x double> %a0, <2 x double> %a1) optsize { 1990; CHECK-LABEL: stack_fold_roundsd_int: 1991; CHECK: # %bb.0: 1992; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1993; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1994; CHECK-NEXT: #APP 1995; CHECK-NEXT: nop 1996; CHECK-NEXT: #NO_APP 1997; CHECK-NEXT: movapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1998; CHECK-NEXT: roundsd $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1999; CHECK-NEXT: retq 2000 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2001 %2 = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) 2002 ret <2 x double> %2 2003} 2004declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 2005 2006define float @stack_fold_roundss(float %a0) minsize { 2007; CHECK-LABEL: stack_fold_roundss: 2008; CHECK: # %bb.0: 2009; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2010; CHECK-NEXT: #APP 2011; CHECK-NEXT: nop 2012; CHECK-NEXT: #NO_APP 2013; CHECK-NEXT: roundss $9, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 2014; CHECK-NEXT: retq 2015 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2016 %2 = call float @llvm.floor.f32(float %a0) 2017 ret float %2 2018} 2019declare float @llvm.floor.f32(float) nounwind readnone 2020 2021define <4 x float> @stack_fold_roundss_int(<4 x float> %a0, <4 x float> %a1) optsize { 2022; CHECK-LABEL: stack_fold_roundss_int: 2023; CHECK: # %bb.0: 2024; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2025; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2026; CHECK-NEXT: #APP 2027; CHECK-NEXT: nop 2028; CHECK-NEXT: #NO_APP 2029; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 2030; CHECK-NEXT: roundss $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2031; CHECK-NEXT: retq 2032 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2033 %2 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) 2034 ret <4 x float> %2 2035} 2036declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 2037 2038; TODO stack_fold_rsqrtps 2039 2040define <4 x float> @stack_fold_rsqrtps_int(<4 x float> %a0) { 2041; CHECK-LABEL: stack_fold_rsqrtps_int: 2042; CHECK: # %bb.0: 2043; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2044; CHECK-NEXT: #APP 2045; CHECK-NEXT: nop 2046; CHECK-NEXT: #NO_APP 2047; CHECK-NEXT: rsqrtps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2048; CHECK-NEXT: retq 2049 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2050 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) 2051 ret <4 x float> %2 2052} 2053declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 2054 2055; TODO stack_fold_rsqrtss 2056 2057define <4 x float> @stack_fold_rsqrtss_int(<4 x float> %a0, <4 x float> %a1) optsize { 2058; CHECK-LABEL: stack_fold_rsqrtss_int: 2059; CHECK: # %bb.0: 2060; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2061; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2062; CHECK-NEXT: #APP 2063; CHECK-NEXT: nop 2064; CHECK-NEXT: #NO_APP 2065; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 2066; CHECK-NEXT: rsqrtss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2067; CHECK-NEXT: retq 2068 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2069 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a1) 2070 %3 = extractelement <4 x float> %2, i32 0 2071 %4 = insertelement <4 x float> %a0, float %3, i32 0 2072 ret <4 x float> %4 2073} 2074declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) 2075 2076define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) { 2077; CHECK-LABEL: stack_fold_shufpd: 2078; CHECK: # %bb.0: 2079; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2080; CHECK-NEXT: #APP 2081; CHECK-NEXT: nop 2082; CHECK-NEXT: #NO_APP 2083; CHECK-NEXT: shufpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2084; CHECK-NEXT: # xmm0 = xmm0[1],mem[0] 2085; CHECK-NEXT: xorpd %xmm1, %xmm1 2086; CHECK-NEXT: addpd %xmm1, %xmm0 2087; CHECK-NEXT: retq 2088 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2089 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2> 2090 ; fadd forces execution domain 2091 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 2092 ret <2 x double> %3 2093} 2094 2095define <4 x float> @stack_fold_shufps(<4 x float> %a0, <4 x float> %a1) { 2096; CHECK-LABEL: stack_fold_shufps: 2097; CHECK: # %bb.0: 2098; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2099; CHECK-NEXT: #APP 2100; CHECK-NEXT: nop 2101; CHECK-NEXT: #NO_APP 2102; CHECK-NEXT: shufps $200, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2103; CHECK-NEXT: # xmm0 = xmm0[0,2],mem[0,3] 2104; CHECK-NEXT: retq 2105 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2106 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 7> 2107 ret <4 x float> %2 2108} 2109 2110define <2 x double> @stack_fold_sqrtpd(<2 x double> %a0) { 2111; CHECK-LABEL: stack_fold_sqrtpd: 2112; CHECK: # %bb.0: 2113; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2114; CHECK-NEXT: #APP 2115; CHECK-NEXT: nop 2116; CHECK-NEXT: #NO_APP 2117; CHECK-NEXT: sqrtpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2118; CHECK-NEXT: retq 2119 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2120 %2 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a0) 2121 ret <2 x double> %2 2122} 2123 2124define <4 x float> @stack_fold_sqrtps(<4 x float> %a0) { 2125; CHECK-LABEL: stack_fold_sqrtps: 2126; CHECK: # %bb.0: 2127; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2128; CHECK-NEXT: #APP 2129; CHECK-NEXT: nop 2130; CHECK-NEXT: #NO_APP 2131; CHECK-NEXT: sqrtps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2132; CHECK-NEXT: retq 2133 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2134 %2 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a0) 2135 ret <4 x float> %2 2136} 2137 2138define double @stack_fold_sqrtsd(double %a0) optsize { 2139; CHECK-LABEL: stack_fold_sqrtsd: 2140; CHECK: # %bb.0: 2141; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2142; CHECK-NEXT: #APP 2143; CHECK-NEXT: nop 2144; CHECK-NEXT: #NO_APP 2145; CHECK-NEXT: xorps %xmm0, %xmm0 2146; CHECK-NEXT: sqrtsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 2147; CHECK-NEXT: retq 2148 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2149 %2 = call double @llvm.sqrt.f64(double %a0) 2150 ret double %2 2151} 2152declare double @llvm.sqrt.f64(double) nounwind readnone 2153 2154define <2 x double> @stack_fold_sqrtsd_int(<2 x double> %a0, <2 x double> %a1) optsize { 2155; CHECK-LABEL: stack_fold_sqrtsd_int: 2156; CHECK: # %bb.0: 2157; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2158; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2159; CHECK-NEXT: #APP 2160; CHECK-NEXT: nop 2161; CHECK-NEXT: #NO_APP 2162; CHECK-NEXT: movapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 2163; CHECK-NEXT: sqrtsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2164; CHECK-NEXT: retq 2165 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2166 %2 = extractelement <2 x double> %a1, i64 0 2167 %3 = call double @llvm.sqrt.f64(double %2) 2168 %4 = insertelement <2 x double> %a1, double %3, i64 0 2169 %5 = extractelement <2 x double> %4, i32 0 2170 %6 = insertelement <2 x double> %a0, double %5, i32 0 2171 ret <2 x double> %6 2172} 2173 2174define float @stack_fold_sqrtss(float %a0) minsize { 2175; CHECK-LABEL: stack_fold_sqrtss: 2176; CHECK: # %bb.0: 2177; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2178; CHECK-NEXT: #APP 2179; CHECK-NEXT: nop 2180; CHECK-NEXT: #NO_APP 2181; CHECK-NEXT: sqrtss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 2182; CHECK-NEXT: retq 2183 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2184 %2 = call float @llvm.sqrt.f32(float %a0) 2185 ret float %2 2186} 2187declare float @llvm.sqrt.f32(float) nounwind readnone 2188 2189define <4 x float> @stack_fold_sqrtss_int(<4 x float> %a0, <4 x float> %a1) optsize { 2190; CHECK-LABEL: stack_fold_sqrtss_int: 2191; CHECK: # %bb.0: 2192; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2193; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2194; CHECK-NEXT: #APP 2195; CHECK-NEXT: nop 2196; CHECK-NEXT: #NO_APP 2197; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 2198; CHECK-NEXT: sqrtss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2199; CHECK-NEXT: retq 2200 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2201 %2 = extractelement <4 x float> %a1, i64 0 2202 %3 = call float @llvm.sqrt.f32(float %2) 2203 %4 = insertelement <4 x float> %a1, float %3, i64 0 2204 %5 = extractelement <4 x float> %4, i32 0 2205 %6 = insertelement <4 x float> %a0, float %5, i32 0 2206 ret <4 x float> %6 2207} 2208 2209define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) { 2210; CHECK-LABEL: stack_fold_subpd: 2211; CHECK: # %bb.0: 2212; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2213; CHECK-NEXT: #APP 2214; CHECK-NEXT: nop 2215; CHECK-NEXT: #NO_APP 2216; CHECK-NEXT: subpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2217; CHECK-NEXT: retq 2218 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2219 %2 = fsub <2 x double> %a0, %a1 2220 ret <2 x double> %2 2221} 2222 2223define <4 x float> @stack_fold_subps(<4 x float> %a0, <4 x float> %a1) { 2224; CHECK-LABEL: stack_fold_subps: 2225; CHECK: # %bb.0: 2226; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2227; CHECK-NEXT: #APP 2228; CHECK-NEXT: nop 2229; CHECK-NEXT: #NO_APP 2230; CHECK-NEXT: subps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2231; CHECK-NEXT: retq 2232 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2233 %2 = fsub <4 x float> %a0, %a1 2234 ret <4 x float> %2 2235} 2236 2237define double @stack_fold_subsd(double %a0, double %a1) { 2238; CHECK-LABEL: stack_fold_subsd: 2239; CHECK: # %bb.0: 2240; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2241; CHECK-NEXT: #APP 2242; CHECK-NEXT: nop 2243; CHECK-NEXT: #NO_APP 2244; CHECK-NEXT: subsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 2245; CHECK-NEXT: retq 2246 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2247 %2 = fsub double %a0, %a1 2248 ret double %2 2249} 2250 2251define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) { 2252; CHECK-LABEL: stack_fold_subsd_int: 2253; CHECK: # %bb.0: 2254; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2255; CHECK-NEXT: #APP 2256; CHECK-NEXT: nop 2257; CHECK-NEXT: #NO_APP 2258; CHECK-NEXT: subsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2259; CHECK-NEXT: retq 2260 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2261 %2 = extractelement <2 x double> %a0, i32 0 2262 %3 = extractelement <2 x double> %a1, i32 0 2263 %4 = fsub double %2, %3 2264 %5 = insertelement <2 x double> %a0, double %4, i32 0 2265 ret <2 x double> %5 2266} 2267 2268define float @stack_fold_subss(float %a0, float %a1) { 2269; CHECK-LABEL: stack_fold_subss: 2270; CHECK: # %bb.0: 2271; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2272; CHECK-NEXT: #APP 2273; CHECK-NEXT: nop 2274; CHECK-NEXT: #NO_APP 2275; CHECK-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 2276; CHECK-NEXT: retq 2277 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2278 %2 = fsub float %a0, %a1 2279 ret float %2 2280} 2281 2282define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) { 2283; CHECK-LABEL: stack_fold_subss_int: 2284; CHECK: # %bb.0: 2285; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2286; CHECK-NEXT: #APP 2287; CHECK-NEXT: nop 2288; CHECK-NEXT: #NO_APP 2289; CHECK-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2290; CHECK-NEXT: retq 2291 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2292 %2 = extractelement <4 x float> %a0, i32 0 2293 %3 = extractelement <4 x float> %a1, i32 0 2294 %4 = fsub float %2, %3 2295 %5 = insertelement <4 x float> %a0, float %4, i32 0 2296 ret <4 x float> %5 2297} 2298 2299define i32 @stack_fold_ucomisd(double %a0, double %a1) { 2300; CHECK-LABEL: stack_fold_ucomisd: 2301; CHECK: # %bb.0: 2302; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2303; CHECK-NEXT: #APP 2304; CHECK-NEXT: nop 2305; CHECK-NEXT: #NO_APP 2306; CHECK-NEXT: xorl %eax, %eax 2307; CHECK-NEXT: ucomisd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 2308; CHECK-NEXT: sete %al 2309; CHECK-NEXT: leal -1(%rax,%rax), %eax 2310; CHECK-NEXT: retq 2311 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2312 %2 = fcmp ueq double %a0, %a1 2313 %3 = select i1 %2, i32 1, i32 -1 2314 ret i32 %3 2315} 2316 2317define i32 @stack_fold_ucomisd_int(<2 x double> %a0, <2 x double> %a1) { 2318; CHECK-LABEL: stack_fold_ucomisd_int: 2319; CHECK: # %bb.0: 2320; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2321; CHECK-NEXT: #APP 2322; CHECK-NEXT: nop 2323; CHECK-NEXT: #NO_APP 2324; CHECK-NEXT: ucomisd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2325; CHECK-NEXT: setnp %al 2326; CHECK-NEXT: sete %cl 2327; CHECK-NEXT: andb %al, %cl 2328; CHECK-NEXT: movzbl %cl, %eax 2329; CHECK-NEXT: retq 2330 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2331 %2 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) 2332 ret i32 %2 2333} 2334declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 2335 2336define i32 @stack_fold_ucomiss(float %a0, float %a1) { 2337; CHECK-LABEL: stack_fold_ucomiss: 2338; CHECK: # %bb.0: 2339; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2340; CHECK-NEXT: #APP 2341; CHECK-NEXT: nop 2342; CHECK-NEXT: #NO_APP 2343; CHECK-NEXT: xorl %eax, %eax 2344; CHECK-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 2345; CHECK-NEXT: sete %al 2346; CHECK-NEXT: leal -1(%rax,%rax), %eax 2347; CHECK-NEXT: retq 2348 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2349 %2 = fcmp ueq float %a0, %a1 2350 %3 = select i1 %2, i32 1, i32 -1 2351 ret i32 %3 2352} 2353 2354define i32 @stack_fold_ucomiss_int(<4 x float> %a0, <4 x float> %a1) { 2355; CHECK-LABEL: stack_fold_ucomiss_int: 2356; CHECK: # %bb.0: 2357; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2358; CHECK-NEXT: #APP 2359; CHECK-NEXT: nop 2360; CHECK-NEXT: #NO_APP 2361; CHECK-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2362; CHECK-NEXT: setnp %al 2363; CHECK-NEXT: sete %cl 2364; CHECK-NEXT: andb %al, %cl 2365; CHECK-NEXT: movzbl %cl, %eax 2366; CHECK-NEXT: retq 2367 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2368 %2 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) 2369 ret i32 %2 2370} 2371declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 2372 2373define <2 x double> @stack_fold_unpckhpd(<2 x double> %a0, <2 x double> %a1) { 2374; CHECK-LABEL: stack_fold_unpckhpd: 2375; CHECK: # %bb.0: 2376; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2377; CHECK-NEXT: #APP 2378; CHECK-NEXT: nop 2379; CHECK-NEXT: #NO_APP 2380; CHECK-NEXT: unpckhpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2381; CHECK-NEXT: # xmm0 = xmm0[1],mem[1] 2382; CHECK-NEXT: xorpd %xmm1, %xmm1 2383; CHECK-NEXT: addpd %xmm1, %xmm0 2384; CHECK-NEXT: retq 2385 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2386 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3> 2387 ; fadd forces execution domain 2388 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 2389 ret <2 x double> %3 2390} 2391 2392define <4 x float> @stack_fold_unpckhps(<4 x float> %a0, <4 x float> %a1) { 2393; CHECK-LABEL: stack_fold_unpckhps: 2394; CHECK: # %bb.0: 2395; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2396; CHECK-NEXT: #APP 2397; CHECK-NEXT: nop 2398; CHECK-NEXT: #NO_APP 2399; CHECK-NEXT: unpckhps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2400; CHECK-NEXT: # xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] 2401; CHECK-NEXT: xorps %xmm1, %xmm1 2402; CHECK-NEXT: addps %xmm1, %xmm0 2403; CHECK-NEXT: retq 2404 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2405 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 2406 ; fadd forces execution domain 2407 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 2408 ret <4 x float> %3 2409} 2410 2411define <2 x double> @stack_fold_unpcklpd(<2 x double> %a0, <2 x double> %a1) { 2412; CHECK-LABEL: stack_fold_unpcklpd: 2413; CHECK: # %bb.0: 2414; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2415; CHECK-NEXT: #APP 2416; CHECK-NEXT: nop 2417; CHECK-NEXT: #NO_APP 2418; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2419; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 2420; CHECK-NEXT: xorpd %xmm1, %xmm1 2421; CHECK-NEXT: addpd %xmm1, %xmm0 2422; CHECK-NEXT: retq 2423 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2424 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2> 2425 ; fadd forces execution domain 2426 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 2427 ret <2 x double> %3 2428} 2429 2430define <4 x float> @stack_fold_unpcklps(<4 x float> %a0, <4 x float> %a1) { 2431; CHECK-LABEL: stack_fold_unpcklps: 2432; CHECK: # %bb.0: 2433; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2434; CHECK-NEXT: #APP 2435; CHECK-NEXT: nop 2436; CHECK-NEXT: #NO_APP 2437; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2438; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 2439; CHECK-NEXT: xorps %xmm1, %xmm1 2440; CHECK-NEXT: addps %xmm1, %xmm0 2441; CHECK-NEXT: retq 2442 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2443 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 2444 ; fadd forces execution domain 2445 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 2446 ret <4 x float> %3 2447} 2448 2449define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) { 2450; CHECK-LABEL: stack_fold_xorpd: 2451; CHECK: # %bb.0: 2452; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2453; CHECK-NEXT: #APP 2454; CHECK-NEXT: nop 2455; CHECK-NEXT: #NO_APP 2456; CHECK-NEXT: xorpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2457; CHECK-NEXT: xorpd %xmm1, %xmm1 2458; CHECK-NEXT: addpd %xmm1, %xmm0 2459; CHECK-NEXT: retq 2460 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2461 %2 = bitcast <2 x double> %a0 to <2 x i64> 2462 %3 = bitcast <2 x double> %a1 to <2 x i64> 2463 %4 = xor <2 x i64> %2, %3 2464 %5 = bitcast <2 x i64> %4 to <2 x double> 2465 ; fadd forces execution domain 2466 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 2467 ret <2 x double> %6 2468} 2469 2470define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) { 2471; CHECK-LABEL: stack_fold_xorps: 2472; CHECK: # %bb.0: 2473; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2474; CHECK-NEXT: #APP 2475; CHECK-NEXT: nop 2476; CHECK-NEXT: #NO_APP 2477; CHECK-NEXT: xorps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2478; CHECK-NEXT: xorps %xmm1, %xmm1 2479; CHECK-NEXT: addps %xmm1, %xmm0 2480; CHECK-NEXT: retq 2481 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 2482 %2 = bitcast <4 x float> %a0 to <2 x i64> 2483 %3 = bitcast <4 x float> %a1 to <2 x i64> 2484 %4 = xor <2 x i64> %2, %3 2485 %5 = bitcast <2 x i64> %4 to <4 x float> 2486 ; fadd forces execution domain 2487 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 2488 ret <4 x float> %6 2489} 2490 2491declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) 2492declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) 2493 2494attributes #0 = { "unsafe-fp-math"="false" } 2495attributes #1 = { "unsafe-fp-math"="true" } 2496