1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s 3 4define <1 x i64> @stack_fold_cvtpd2pi(<2 x double> %a0) { 5; CHECK-LABEL: stack_fold_cvtpd2pi: 6; CHECK: # %bb.0: 7; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 8; CHECK-NEXT: #APP 9; CHECK-NEXT: nop 10; CHECK-NEXT: #NO_APP 11; CHECK-NEXT: cvtpd2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload 12; CHECK-NEXT: movq %mm0, %rax 13; CHECK-NEXT: retq 14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 15 %2 = call <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> %a0) nounwind readnone 16 ret <1 x i64> %2 17} 18declare <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone 19 20define <2 x double> @stack_fold_cvtpi2pd(<1 x i64> %a0) { 21; CHECK-LABEL: stack_fold_cvtpi2pd: 22; CHECK: # %bb.0: 23; CHECK-NEXT: movq %rdi, %mm0 24; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 25; CHECK-NEXT: #APP 26; CHECK-NEXT: nop 27; CHECK-NEXT: #NO_APP 28; CHECK-NEXT: cvtpi2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 29; CHECK-NEXT: retq 30 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 31 %2 = call <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> %a0) nounwind readnone 32 ret <2 x double> %2 33} 34declare <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64>) nounwind readnone 35 36define <4 x float> @stack_fold_cvtpi2ps(<4 x float> %a0, <1 x i64> %a1) { 37; CHECK-LABEL: stack_fold_cvtpi2ps: 38; CHECK: # %bb.0: 39; CHECK-NEXT: movq %rdi, %mm0 40; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 41; CHECK-NEXT: #APP 42; CHECK-NEXT: nop 43; CHECK-NEXT: #NO_APP 44; CHECK-NEXT: cvtpi2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 45; CHECK-NEXT: retq 46 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 47 %2 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a0, <1 x i64> %a1) nounwind readnone 48 ret <4 x float> %2 49} 50declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>) nounwind readnone 51 52define <1 x i64> @stack_fold_cvtps2pi(<4 x float> %a0) { 53; CHECK-LABEL: stack_fold_cvtps2pi: 54; CHECK: # %bb.0: 55; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 56; CHECK-NEXT: #APP 57; CHECK-NEXT: nop 58; CHECK-NEXT: #NO_APP 59; CHECK-NEXT: cvtps2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload 60; CHECK-NEXT: movq %mm0, %rax 61; CHECK-NEXT: retq 62 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 63 %2 = call <1 x i64> @llvm.x86.sse.cvtps2pi(<4 x float> %a0) nounwind readnone 64 ret <1 x i64> %2 65} 66declare <1 x i64> @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone 67 68define <1 x i64> @stack_fold_cvttpd2pi(<2 x double> %a0) { 69; CHECK-LABEL: stack_fold_cvttpd2pi: 70; CHECK: # %bb.0: 71; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 72; CHECK-NEXT: #APP 73; CHECK-NEXT: nop 74; CHECK-NEXT: #NO_APP 75; CHECK-NEXT: cvttpd2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload 76; CHECK-NEXT: movq %mm0, %rax 77; CHECK-NEXT: retq 78 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 79 %2 = call <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> %a0) nounwind readnone 80 ret <1 x i64> %2 81} 82declare <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone 83 84define <1 x i64> @stack_fold_cvttps2pi(<4 x float> %a0) { 85; CHECK-LABEL: stack_fold_cvttps2pi: 86; CHECK: # %bb.0: 87; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 88; CHECK-NEXT: #APP 89; CHECK-NEXT: nop 90; CHECK-NEXT: #NO_APP 91; CHECK-NEXT: cvttps2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload 92; CHECK-NEXT: movq %mm0, %rax 93; CHECK-NEXT: retq 94 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 95 %2 = call <1 x i64> @llvm.x86.sse.cvttps2pi(<4 x float> %a0) nounwind readnone 96 ret <1 x i64> %2 97} 98declare <1 x i64> @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone 99 100; TODO stack_fold_movd_load 101 102; padd forces execution on mmx 103define i32 @stack_fold_movd_store(<1 x i64> %a0) nounwind { 104; CHECK-LABEL: stack_fold_movd_store: 105; CHECK: # %bb.0: 106; CHECK-NEXT: pushq %rbp 107; CHECK-NEXT: pushq %r15 108; CHECK-NEXT: pushq %r14 109; CHECK-NEXT: pushq %r13 110; CHECK-NEXT: pushq %r12 111; CHECK-NEXT: pushq %rbx 112; CHECK-NEXT: movq %rdi, %mm0 113; CHECK-NEXT: paddb %mm0, %mm0 114; CHECK-NEXT: movd %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 115; CHECK-NEXT: #APP 116; CHECK-NEXT: nop 117; CHECK-NEXT: #NO_APP 118; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 119; CHECK-NEXT: popq %rbx 120; CHECK-NEXT: popq %r12 121; CHECK-NEXT: popq %r13 122; CHECK-NEXT: popq %r14 123; CHECK-NEXT: popq %r15 124; CHECK-NEXT: popq %rbp 125; CHECK-NEXT: retq 126 %1 = call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> %a0, <1 x i64> %a0) 127 %2 = bitcast <1 x i64> %1 to <2 x i32> 128 %3 = extractelement <2 x i32> %2, i32 0 129 %4 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 130 ret i32 %3 131} 132 133; TODO stack_fold_movq_load 134 135; padd forces execution on mmx 136define i64 @stack_fold_movq_store(<1 x i64> %a0) nounwind { 137; CHECK-LABEL: stack_fold_movq_store: 138; CHECK: # %bb.0: 139; CHECK-NEXT: pushq %rbp 140; CHECK-NEXT: pushq %r15 141; CHECK-NEXT: pushq %r14 142; CHECK-NEXT: pushq %r13 143; CHECK-NEXT: pushq %r12 144; CHECK-NEXT: pushq %rbx 145; CHECK-NEXT: movq %rdi, %mm0 146; CHECK-NEXT: paddb %mm0, %mm0 147; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 148; CHECK-NEXT: #APP 149; CHECK-NEXT: nop 150; CHECK-NEXT: #NO_APP 151; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 152; CHECK-NEXT: popq %rbx 153; CHECK-NEXT: popq %r12 154; CHECK-NEXT: popq %r13 155; CHECK-NEXT: popq %r14 156; CHECK-NEXT: popq %r15 157; CHECK-NEXT: popq %rbp 158; CHECK-NEXT: retq 159 %1 = call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> %a0, <1 x i64> %a0) 160 %2 = bitcast <1 x i64> %1 to i64 161 %3 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 162 ret i64 %2 163} 164 165define <1 x i64> @stack_fold_pabsb(<1 x i64> %a0) { 166; CHECK-LABEL: stack_fold_pabsb: 167; CHECK: # %bb.0: 168; CHECK-NEXT: movq %rdi, %mm0 169; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 170; CHECK-NEXT: #APP 171; CHECK-NEXT: nop 172; CHECK-NEXT: #NO_APP 173; CHECK-NEXT: pabsb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 174; CHECK-NEXT: movq %mm0, %rax 175; CHECK-NEXT: retq 176 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 177 %2 = call <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64> %a0) nounwind readnone 178 ret <1 x i64> %2 179} 180declare <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64>) nounwind readnone 181 182define <1 x i64> @stack_fold_pabsd(<1 x i64> %a0) { 183; CHECK-LABEL: stack_fold_pabsd: 184; CHECK: # %bb.0: 185; CHECK-NEXT: movq %rdi, %mm0 186; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 187; CHECK-NEXT: #APP 188; CHECK-NEXT: nop 189; CHECK-NEXT: #NO_APP 190; CHECK-NEXT: pabsd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 191; CHECK-NEXT: movq %mm0, %rax 192; CHECK-NEXT: retq 193 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 194 %2 = call <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64> %a0) nounwind readnone 195 ret <1 x i64> %2 196} 197declare <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64>) nounwind readnone 198 199define <1 x i64> @stack_fold_pabsw(<1 x i64> %a0) { 200; CHECK-LABEL: stack_fold_pabsw: 201; CHECK: # %bb.0: 202; CHECK-NEXT: movq %rdi, %mm0 203; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 204; CHECK-NEXT: #APP 205; CHECK-NEXT: nop 206; CHECK-NEXT: #NO_APP 207; CHECK-NEXT: pabsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 208; CHECK-NEXT: movq %mm0, %rax 209; CHECK-NEXT: retq 210 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 211 %2 = call <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64> %a0) nounwind readnone 212 ret <1 x i64> %2 213} 214declare <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64>) nounwind readnone 215 216define <1 x i64> @stack_fold_packssdw(<1 x i64> %a, <1 x i64> %b) { 217; CHECK-LABEL: stack_fold_packssdw: 218; CHECK: # %bb.0: 219; CHECK-NEXT: movq %rsi, %mm0 220; CHECK-NEXT: movq %rdi, %mm1 221; CHECK-NEXT: packssdw %mm0, %mm1 222; CHECK-NEXT: movq %mm1, %rax 223; CHECK-NEXT: #APP 224; CHECK-NEXT: nop 225; CHECK-NEXT: #NO_APP 226; CHECK-NEXT: retq 227 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 228 %2 = call <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64> %a, <1 x i64> %b) nounwind readnone 229 ret <1 x i64> %2 230} 231declare <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64>, <1 x i64>) nounwind readnone 232 233define <1 x i64> @stack_fold_packsswb(<1 x i64> %a, <1 x i64> %b) { 234; CHECK-LABEL: stack_fold_packsswb: 235; CHECK: # %bb.0: 236; CHECK-NEXT: movq %rsi, %mm0 237; CHECK-NEXT: movq %rdi, %mm1 238; CHECK-NEXT: packsswb %mm0, %mm1 239; CHECK-NEXT: movq %mm1, %rax 240; CHECK-NEXT: #APP 241; CHECK-NEXT: nop 242; CHECK-NEXT: #NO_APP 243; CHECK-NEXT: retq 244 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 245 %2 = call <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64> %a, <1 x i64> %b) nounwind readnone 246 ret <1 x i64> %2 247} 248declare <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64>, <1 x i64>) nounwind readnone 249 250define <1 x i64> @stack_fold_packuswb(<1 x i64> %a, <1 x i64> %b) { 251; CHECK-LABEL: stack_fold_packuswb: 252; CHECK: # %bb.0: 253; CHECK-NEXT: movq %rsi, %mm0 254; CHECK-NEXT: movq %rdi, %mm1 255; CHECK-NEXT: packuswb %mm0, %mm1 256; CHECK-NEXT: movq %mm1, %rax 257; CHECK-NEXT: #APP 258; CHECK-NEXT: nop 259; CHECK-NEXT: #NO_APP 260; CHECK-NEXT: retq 261 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 262 %2 = call <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64> %a, <1 x i64> %b) nounwind readnone 263 ret <1 x i64> %2 264} 265declare <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64>, <1 x i64>) nounwind readnone 266 267define <1 x i64> @stack_fold_paddb(<1 x i64> %a, <1 x i64> %b) { 268; CHECK-LABEL: stack_fold_paddb: 269; CHECK: # %bb.0: 270; CHECK-NEXT: movq %rsi, %mm0 271; CHECK-NEXT: movq %rdi, %mm1 272; CHECK-NEXT: paddb %mm0, %mm1 273; CHECK-NEXT: movq %mm1, %rax 274; CHECK-NEXT: #APP 275; CHECK-NEXT: nop 276; CHECK-NEXT: #NO_APP 277; CHECK-NEXT: retq 278 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 279 %2 = call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone 280 ret <1 x i64> %2 281} 282declare <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64>, <1 x i64>) nounwind readnone 283 284define <1 x i64> @stack_fold_paddd(<1 x i64> %a, <1 x i64> %b) { 285; CHECK-LABEL: stack_fold_paddd: 286; CHECK: # %bb.0: 287; CHECK-NEXT: movq %rsi, %mm0 288; CHECK-NEXT: movq %rdi, %mm1 289; CHECK-NEXT: paddd %mm0, %mm1 290; CHECK-NEXT: movq %mm1, %rax 291; CHECK-NEXT: #APP 292; CHECK-NEXT: nop 293; CHECK-NEXT: #NO_APP 294; CHECK-NEXT: retq 295 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 296 %2 = call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone 297 ret <1 x i64> %2 298} 299declare <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64>, <1 x i64>) nounwind readnone 300 301define <1 x i64> @stack_fold_paddq(<1 x i64> %a, <1 x i64> %b) { 302; CHECK-LABEL: stack_fold_paddq: 303; CHECK: # %bb.0: 304; CHECK-NEXT: movq %rsi, %mm0 305; CHECK-NEXT: movq %rdi, %mm1 306; CHECK-NEXT: paddq %mm0, %mm1 307; CHECK-NEXT: movq %mm1, %rax 308; CHECK-NEXT: #APP 309; CHECK-NEXT: nop 310; CHECK-NEXT: #NO_APP 311; CHECK-NEXT: retq 312 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 313 %2 = call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> %a, <1 x i64> %b) nounwind readnone 314 ret <1 x i64> %2 315} 316declare <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64>, <1 x i64>) nounwind readnone 317 318define <1 x i64> @stack_fold_paddsb(<1 x i64> %a, <1 x i64> %b) { 319; CHECK-LABEL: stack_fold_paddsb: 320; CHECK: # %bb.0: 321; CHECK-NEXT: movq %rsi, %mm0 322; CHECK-NEXT: movq %rdi, %mm1 323; CHECK-NEXT: paddsb %mm0, %mm1 324; CHECK-NEXT: movq %mm1, %rax 325; CHECK-NEXT: #APP 326; CHECK-NEXT: nop 327; CHECK-NEXT: #NO_APP 328; CHECK-NEXT: retq 329 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 330 %2 = call <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone 331 ret <1 x i64> %2 332} 333declare <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64>, <1 x i64>) nounwind readnone 334 335define <1 x i64> @stack_fold_paddsw(<1 x i64> %a, <1 x i64> %b) { 336; CHECK-LABEL: stack_fold_paddsw: 337; CHECK: # %bb.0: 338; CHECK-NEXT: movq %rsi, %mm0 339; CHECK-NEXT: movq %rdi, %mm1 340; CHECK-NEXT: paddsw %mm0, %mm1 341; CHECK-NEXT: movq %mm1, %rax 342; CHECK-NEXT: #APP 343; CHECK-NEXT: nop 344; CHECK-NEXT: #NO_APP 345; CHECK-NEXT: retq 346 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 347 %2 = call <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone 348 ret <1 x i64> %2 349} 350declare <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64>, <1 x i64>) nounwind readnone 351 352define <1 x i64> @stack_fold_paddusb(<1 x i64> %a, <1 x i64> %b) { 353; CHECK-LABEL: stack_fold_paddusb: 354; CHECK: # %bb.0: 355; CHECK-NEXT: movq %rsi, %mm0 356; CHECK-NEXT: movq %rdi, %mm1 357; CHECK-NEXT: paddusb %mm0, %mm1 358; CHECK-NEXT: movq %mm1, %rax 359; CHECK-NEXT: #APP 360; CHECK-NEXT: nop 361; CHECK-NEXT: #NO_APP 362; CHECK-NEXT: retq 363 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 364 %2 = call <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone 365 ret <1 x i64> %2 366} 367declare <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64>, <1 x i64>) nounwind readnone 368 369define <1 x i64> @stack_fold_paddusw(<1 x i64> %a, <1 x i64> %b) { 370; CHECK-LABEL: stack_fold_paddusw: 371; CHECK: # %bb.0: 372; CHECK-NEXT: movq %rsi, %mm0 373; CHECK-NEXT: movq %rdi, %mm1 374; CHECK-NEXT: paddusw %mm0, %mm1 375; CHECK-NEXT: movq %mm1, %rax 376; CHECK-NEXT: #APP 377; CHECK-NEXT: nop 378; CHECK-NEXT: #NO_APP 379; CHECK-NEXT: retq 380 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 381 %2 = call <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone 382 ret <1 x i64> %2 383} 384declare <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64>, <1 x i64>) nounwind readnone 385 386define <1 x i64> @stack_fold_paddw(<1 x i64> %a, <1 x i64> %b) { 387; CHECK-LABEL: stack_fold_paddw: 388; CHECK: # %bb.0: 389; CHECK-NEXT: movq %rsi, %mm0 390; CHECK-NEXT: movq %rdi, %mm1 391; CHECK-NEXT: paddw %mm0, %mm1 392; CHECK-NEXT: movq %mm1, %rax 393; CHECK-NEXT: #APP 394; CHECK-NEXT: nop 395; CHECK-NEXT: #NO_APP 396; CHECK-NEXT: retq 397 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 398 %2 = call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone 399 ret <1 x i64> %2 400} 401declare <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64>, <1 x i64>) nounwind readnone 402 403define <1 x i64> @stack_fold_palignr(<1 x i64> %a, <1 x i64> %b) { 404; CHECK-LABEL: stack_fold_palignr: 405; CHECK: # %bb.0: 406; CHECK-NEXT: movq %rsi, %mm0 407; CHECK-NEXT: movq %rdi, %mm1 408; CHECK-NEXT: palignr $1, %mm0, %mm1 409; CHECK-NEXT: movq %mm1, %rax 410; CHECK-NEXT: #APP 411; CHECK-NEXT: nop 412; CHECK-NEXT: #NO_APP 413; CHECK-NEXT: retq 414 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 415 %2 = call <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> %a, <1 x i64> %b, i8 1) nounwind readnone 416 ret <1 x i64> %2 417} 418declare <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64>, <1 x i64>, i8) nounwind readnone 419 420define <1 x i64> @stack_fold_pand(<1 x i64> %a, <1 x i64> %b) { 421; CHECK-LABEL: stack_fold_pand: 422; CHECK: # %bb.0: 423; CHECK-NEXT: movq %rsi, %mm0 424; CHECK-NEXT: movq %rdi, %mm1 425; CHECK-NEXT: pand %mm0, %mm1 426; CHECK-NEXT: movq %mm1, %rax 427; CHECK-NEXT: #APP 428; CHECK-NEXT: nop 429; CHECK-NEXT: #NO_APP 430; CHECK-NEXT: retq 431 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 432 %2 = call <1 x i64> @llvm.x86.mmx.pand(<1 x i64> %a, <1 x i64> %b) nounwind readnone 433 ret <1 x i64> %2 434} 435declare <1 x i64> @llvm.x86.mmx.pand(<1 x i64>, <1 x i64>) nounwind readnone 436 437define <1 x i64> @stack_fold_pandn(<1 x i64> %a, <1 x i64> %b) { 438; CHECK-LABEL: stack_fold_pandn: 439; CHECK: # %bb.0: 440; CHECK-NEXT: movq %rsi, %mm0 441; CHECK-NEXT: movq %rdi, %mm1 442; CHECK-NEXT: pandn %mm0, %mm1 443; CHECK-NEXT: movq %mm1, %rax 444; CHECK-NEXT: #APP 445; CHECK-NEXT: nop 446; CHECK-NEXT: #NO_APP 447; CHECK-NEXT: retq 448 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 449 %2 = call <1 x i64> @llvm.x86.mmx.pandn(<1 x i64> %a, <1 x i64> %b) nounwind readnone 450 ret <1 x i64> %2 451} 452declare <1 x i64> @llvm.x86.mmx.pandn(<1 x i64>, <1 x i64>) nounwind readnone 453 454define <1 x i64> @stack_fold_pavgb(<1 x i64> %a, <1 x i64> %b) { 455; CHECK-LABEL: stack_fold_pavgb: 456; CHECK: # %bb.0: 457; CHECK-NEXT: movq %rsi, %mm0 458; CHECK-NEXT: movq %rdi, %mm1 459; CHECK-NEXT: pavgb %mm0, %mm1 460; CHECK-NEXT: movq %mm1, %rax 461; CHECK-NEXT: #APP 462; CHECK-NEXT: nop 463; CHECK-NEXT: #NO_APP 464; CHECK-NEXT: retq 465 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 466 %2 = call <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone 467 ret <1 x i64> %2 468} 469declare <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64>, <1 x i64>) nounwind readnone 470 471define <1 x i64> @stack_fold_pavgw(<1 x i64> %a, <1 x i64> %b) { 472; CHECK-LABEL: stack_fold_pavgw: 473; CHECK: # %bb.0: 474; CHECK-NEXT: movq %rsi, %mm0 475; CHECK-NEXT: movq %rdi, %mm1 476; CHECK-NEXT: pavgw %mm0, %mm1 477; CHECK-NEXT: movq %mm1, %rax 478; CHECK-NEXT: #APP 479; CHECK-NEXT: nop 480; CHECK-NEXT: #NO_APP 481; CHECK-NEXT: retq 482 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 483 %2 = call <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone 484 ret <1 x i64> %2 485} 486declare <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64>, <1 x i64>) nounwind readnone 487 488define <1 x i64> @stack_fold_pcmpeqb(<1 x i64> %a, <1 x i64> %b) { 489; CHECK-LABEL: stack_fold_pcmpeqb: 490; CHECK: # %bb.0: 491; CHECK-NEXT: movq %rsi, %mm0 492; CHECK-NEXT: movq %rdi, %mm1 493; CHECK-NEXT: pcmpeqb %mm0, %mm1 494; CHECK-NEXT: movq %mm1, %rax 495; CHECK-NEXT: #APP 496; CHECK-NEXT: nop 497; CHECK-NEXT: #NO_APP 498; CHECK-NEXT: retq 499 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 500 %2 = call <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone 501 ret <1 x i64> %2 502} 503declare <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64>, <1 x i64>) nounwind readnone 504 505define <1 x i64> @stack_fold_pcmpeqd(<1 x i64> %a, <1 x i64> %b) { 506; CHECK-LABEL: stack_fold_pcmpeqd: 507; CHECK: # %bb.0: 508; CHECK-NEXT: movq %rsi, %mm0 509; CHECK-NEXT: movq %rdi, %mm1 510; CHECK-NEXT: pcmpeqd %mm0, %mm1 511; CHECK-NEXT: movq %mm1, %rax 512; CHECK-NEXT: #APP 513; CHECK-NEXT: nop 514; CHECK-NEXT: #NO_APP 515; CHECK-NEXT: retq 516 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 517 %2 = call <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone 518 ret <1 x i64> %2 519} 520declare <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64>, <1 x i64>) nounwind readnone 521 522define <1 x i64> @stack_fold_pcmpeqw(<1 x i64> %a, <1 x i64> %b) { 523; CHECK-LABEL: stack_fold_pcmpeqw: 524; CHECK: # %bb.0: 525; CHECK-NEXT: movq %rsi, %mm0 526; CHECK-NEXT: movq %rdi, %mm1 527; CHECK-NEXT: pcmpeqw %mm0, %mm1 528; CHECK-NEXT: movq %mm1, %rax 529; CHECK-NEXT: #APP 530; CHECK-NEXT: nop 531; CHECK-NEXT: #NO_APP 532; CHECK-NEXT: retq 533 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 534 %2 = call <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone 535 ret <1 x i64> %2 536} 537declare <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64>, <1 x i64>) nounwind readnone 538 539define <1 x i64> @stack_fold_pcmpgtb(<1 x i64> %a, <1 x i64> %b) { 540; CHECK-LABEL: stack_fold_pcmpgtb: 541; CHECK: # %bb.0: 542; CHECK-NEXT: movq %rsi, %mm0 543; CHECK-NEXT: movq %rdi, %mm1 544; CHECK-NEXT: pcmpgtb %mm0, %mm1 545; CHECK-NEXT: movq %mm1, %rax 546; CHECK-NEXT: #APP 547; CHECK-NEXT: nop 548; CHECK-NEXT: #NO_APP 549; CHECK-NEXT: retq 550 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 551 %2 = call <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone 552 ret <1 x i64> %2 553} 554declare <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64>, <1 x i64>) nounwind readnone 555 556define <1 x i64> @stack_fold_pcmpgtd(<1 x i64> %a, <1 x i64> %b) { 557; CHECK-LABEL: stack_fold_pcmpgtd: 558; CHECK: # %bb.0: 559; CHECK-NEXT: movq %rsi, %mm0 560; CHECK-NEXT: movq %rdi, %mm1 561; CHECK-NEXT: pcmpgtd %mm0, %mm1 562; CHECK-NEXT: movq %mm1, %rax 563; CHECK-NEXT: #APP 564; CHECK-NEXT: nop 565; CHECK-NEXT: #NO_APP 566; CHECK-NEXT: retq 567 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 568 %2 = call <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone 569 ret <1 x i64> %2 570} 571declare <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64>, <1 x i64>) nounwind readnone 572 573define <1 x i64> @stack_fold_pcmpgtw(<1 x i64> %a, <1 x i64> %b) { 574; CHECK-LABEL: stack_fold_pcmpgtw: 575; CHECK: # %bb.0: 576; CHECK-NEXT: movq %rsi, %mm0 577; CHECK-NEXT: movq %rdi, %mm1 578; CHECK-NEXT: pcmpgtw %mm0, %mm1 579; CHECK-NEXT: movq %mm1, %rax 580; CHECK-NEXT: #APP 581; CHECK-NEXT: nop 582; CHECK-NEXT: #NO_APP 583; CHECK-NEXT: retq 584 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 585 %2 = call <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone 586 ret <1 x i64> %2 587} 588declare <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64>, <1 x i64>) nounwind readnone 589 590define <1 x i64> @stack_fold_phaddd(<1 x i64> %a, <1 x i64> %b) { 591; CHECK-LABEL: stack_fold_phaddd: 592; CHECK: # %bb.0: 593; CHECK-NEXT: movq %rsi, %mm0 594; CHECK-NEXT: movq %rdi, %mm1 595; CHECK-NEXT: phaddd %mm0, %mm1 596; CHECK-NEXT: movq %mm1, %rax 597; CHECK-NEXT: #APP 598; CHECK-NEXT: nop 599; CHECK-NEXT: #NO_APP 600; CHECK-NEXT: retq 601 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 602 %2 = call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone 603 ret <1 x i64> %2 604} 605declare <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64>, <1 x i64>) nounwind readnone 606 607define <1 x i64> @stack_fold_phaddsw(<1 x i64> %a, <1 x i64> %b) { 608; CHECK-LABEL: stack_fold_phaddsw: 609; CHECK: # %bb.0: 610; CHECK-NEXT: movq %rsi, %mm0 611; CHECK-NEXT: movq %rdi, %mm1 612; CHECK-NEXT: phaddsw %mm0, %mm1 613; CHECK-NEXT: movq %mm1, %rax 614; CHECK-NEXT: #APP 615; CHECK-NEXT: nop 616; CHECK-NEXT: #NO_APP 617; CHECK-NEXT: retq 618 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 619 %2 = call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> %a, <1 x i64> %b) nounwind readnone 620 ret <1 x i64> %2 621} 622declare <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64>, <1 x i64>) nounwind readnone 623 624define <1 x i64> @stack_fold_phaddw(<1 x i64> %a, <1 x i64> %b) { 625; CHECK-LABEL: stack_fold_phaddw: 626; CHECK: # %bb.0: 627; CHECK-NEXT: movq %rsi, %mm0 628; CHECK-NEXT: movq %rdi, %mm1 629; CHECK-NEXT: phaddw %mm0, %mm1 630; CHECK-NEXT: movq %mm1, %rax 631; CHECK-NEXT: #APP 632; CHECK-NEXT: nop 633; CHECK-NEXT: #NO_APP 634; CHECK-NEXT: retq 635 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 636 %2 = call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone 637 ret <1 x i64> %2 638} 639declare <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64>, <1 x i64>) nounwind readnone 640 641define <1 x i64> @stack_fold_phsubd(<1 x i64> %a, <1 x i64> %b) { 642; CHECK-LABEL: stack_fold_phsubd: 643; CHECK: # %bb.0: 644; CHECK-NEXT: movq %rsi, %mm0 645; CHECK-NEXT: movq %rdi, %mm1 646; CHECK-NEXT: phsubd %mm0, %mm1 647; CHECK-NEXT: movq %mm1, %rax 648; CHECK-NEXT: #APP 649; CHECK-NEXT: nop 650; CHECK-NEXT: #NO_APP 651; CHECK-NEXT: retq 652 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 653 %2 = call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone 654 ret <1 x i64> %2 655} 656declare <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64>, <1 x i64>) nounwind readnone 657 658define <1 x i64> @stack_fold_phsubsw(<1 x i64> %a, <1 x i64> %b) { 659; CHECK-LABEL: stack_fold_phsubsw: 660; CHECK: # %bb.0: 661; CHECK-NEXT: movq %rsi, %mm0 662; CHECK-NEXT: movq %rdi, %mm1 663; CHECK-NEXT: phsubsw %mm0, %mm1 664; CHECK-NEXT: movq %mm1, %rax 665; CHECK-NEXT: #APP 666; CHECK-NEXT: nop 667; CHECK-NEXT: #NO_APP 668; CHECK-NEXT: retq 669 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 670 %2 = call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> %a, <1 x i64> %b) nounwind readnone 671 ret <1 x i64> %2 672} 673declare <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64>, <1 x i64>) nounwind readnone 674 675define <1 x i64> @stack_fold_phsubw(<1 x i64> %a, <1 x i64> %b) { 676; CHECK-LABEL: stack_fold_phsubw: 677; CHECK: # %bb.0: 678; CHECK-NEXT: movq %rsi, %mm0 679; CHECK-NEXT: movq %rdi, %mm1 680; CHECK-NEXT: phsubw %mm0, %mm1 681; CHECK-NEXT: movq %mm1, %rax 682; CHECK-NEXT: #APP 683; CHECK-NEXT: nop 684; CHECK-NEXT: #NO_APP 685; CHECK-NEXT: retq 686 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 687 %2 = call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone 688 ret <1 x i64> %2 689} 690declare <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64>, <1 x i64>) nounwind readnone 691 692; TODO stack_fold_pinsrw 693 694define <1 x i64> @stack_fold_pmaddubsw(<1 x i64> %a, <1 x i64> %b) { 695; CHECK-LABEL: stack_fold_pmaddubsw: 696; CHECK: # %bb.0: 697; CHECK-NEXT: movq %rsi, %mm0 698; CHECK-NEXT: movq %rdi, %mm1 699; CHECK-NEXT: pmaddubsw %mm0, %mm1 700; CHECK-NEXT: movq %mm1, %rax 701; CHECK-NEXT: #APP 702; CHECK-NEXT: nop 703; CHECK-NEXT: #NO_APP 704; CHECK-NEXT: retq 705 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 706 %2 = call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> %a, <1 x i64> %b) nounwind readnone 707 ret <1 x i64> %2 708} 709declare <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>) nounwind readnone 710 711define <1 x i64> @stack_fold_pmaddwd(<1 x i64> %a, <1 x i64> %b) { 712; CHECK-LABEL: stack_fold_pmaddwd: 713; CHECK: # %bb.0: 714; CHECK-NEXT: movq %rsi, %mm0 715; CHECK-NEXT: movq %rdi, %mm1 716; CHECK-NEXT: pmaddwd %mm0, %mm1 717; CHECK-NEXT: movq %mm1, %rax 718; CHECK-NEXT: #APP 719; CHECK-NEXT: nop 720; CHECK-NEXT: #NO_APP 721; CHECK-NEXT: retq 722 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 723 %2 = call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %a, <1 x i64> %b) nounwind readnone 724 ret <1 x i64> %2 725} 726declare <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>) nounwind readnone 727 728define <1 x i64> @stack_fold_pmaxsw(<1 x i64> %a, <1 x i64> %b) { 729; CHECK-LABEL: stack_fold_pmaxsw: 730; CHECK: # %bb.0: 731; CHECK-NEXT: movq %rsi, %mm0 732; CHECK-NEXT: movq %rdi, %mm1 733; CHECK-NEXT: pmaxsw %mm0, %mm1 734; CHECK-NEXT: movq %mm1, %rax 735; CHECK-NEXT: #APP 736; CHECK-NEXT: nop 737; CHECK-NEXT: #NO_APP 738; CHECK-NEXT: retq 739 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 740 %2 = call <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone 741 ret <1 x i64> %2 742} 743declare <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64>, <1 x i64>) nounwind readnone 744 745define <1 x i64> @stack_fold_pmaxub(<1 x i64> %a, <1 x i64> %b) { 746; CHECK-LABEL: stack_fold_pmaxub: 747; CHECK: # %bb.0: 748; CHECK-NEXT: movq %rsi, %mm0 749; CHECK-NEXT: movq %rdi, %mm1 750; CHECK-NEXT: pmaxub %mm0, %mm1 751; CHECK-NEXT: movq %mm1, %rax 752; CHECK-NEXT: #APP 753; CHECK-NEXT: nop 754; CHECK-NEXT: #NO_APP 755; CHECK-NEXT: retq 756 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 757 %2 = call <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone 758 ret <1 x i64> %2 759} 760declare <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64>, <1 x i64>) nounwind readnone 761 762define <1 x i64> @stack_fold_pminsw(<1 x i64> %a, <1 x i64> %b) { 763; CHECK-LABEL: stack_fold_pminsw: 764; CHECK: # %bb.0: 765; CHECK-NEXT: movq %rsi, %mm0 766; CHECK-NEXT: movq %rdi, %mm1 767; CHECK-NEXT: pminsw %mm0, %mm1 768; CHECK-NEXT: movq %mm1, %rax 769; CHECK-NEXT: #APP 770; CHECK-NEXT: nop 771; CHECK-NEXT: #NO_APP 772; CHECK-NEXT: retq 773 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 774 %2 = call <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone 775 ret <1 x i64> %2 776} 777declare <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64>, <1 x i64>) nounwind readnone 778 779define <1 x i64> @stack_fold_pminub(<1 x i64> %a, <1 x i64> %b) { 780; CHECK-LABEL: stack_fold_pminub: 781; CHECK: # %bb.0: 782; CHECK-NEXT: movq %rsi, %mm0 783; CHECK-NEXT: movq %rdi, %mm1 784; CHECK-NEXT: pminub %mm0, %mm1 785; CHECK-NEXT: movq %mm1, %rax 786; CHECK-NEXT: #APP 787; CHECK-NEXT: nop 788; CHECK-NEXT: #NO_APP 789; CHECK-NEXT: retq 790 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 791 %2 = call <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone 792 ret <1 x i64> %2 793} 794declare <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64>, <1 x i64>) nounwind readnone 795 796define <1 x i64> @stack_fold_pmulhrsw(<1 x i64> %a, <1 x i64> %b) { 797; CHECK-LABEL: stack_fold_pmulhrsw: 798; CHECK: # %bb.0: 799; CHECK-NEXT: movq %rsi, %mm0 800; CHECK-NEXT: movq %rdi, %mm1 801; CHECK-NEXT: pmulhrsw %mm0, %mm1 802; CHECK-NEXT: movq %mm1, %rax 803; CHECK-NEXT: #APP 804; CHECK-NEXT: nop 805; CHECK-NEXT: #NO_APP 806; CHECK-NEXT: retq 807 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 808 %2 = call <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64> %a, <1 x i64> %b) nounwind readnone 809 ret <1 x i64> %2 810} 811declare <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64>, <1 x i64>) nounwind readnone 812 813define <1 x i64> @stack_fold_pmulhuw(<1 x i64> %a, <1 x i64> %b) { 814; CHECK-LABEL: stack_fold_pmulhuw: 815; CHECK: # %bb.0: 816; CHECK-NEXT: movq %rsi, %mm0 817; CHECK-NEXT: movq %rdi, %mm1 818; CHECK-NEXT: pmulhuw %mm0, %mm1 819; CHECK-NEXT: movq %mm1, %rax 820; CHECK-NEXT: #APP 821; CHECK-NEXT: nop 822; CHECK-NEXT: #NO_APP 823; CHECK-NEXT: retq 824 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 825 %2 = call <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone 826 ret <1 x i64> %2 827} 828declare <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64>, <1 x i64>) nounwind readnone 829 830define <1 x i64> @stack_fold_pmulhw(<1 x i64> %a, <1 x i64> %b) { 831; CHECK-LABEL: stack_fold_pmulhw: 832; CHECK: # %bb.0: 833; CHECK-NEXT: movq %rsi, %mm0 834; CHECK-NEXT: movq %rdi, %mm1 835; CHECK-NEXT: pmulhw %mm0, %mm1 836; CHECK-NEXT: movq %mm1, %rax 837; CHECK-NEXT: #APP 838; CHECK-NEXT: nop 839; CHECK-NEXT: #NO_APP 840; CHECK-NEXT: retq 841 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 842 %2 = call <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone 843 ret <1 x i64> %2 844} 845declare <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64>, <1 x i64>) nounwind readnone 846 847define <1 x i64> @stack_fold_pmullw(<1 x i64> %a, <1 x i64> %b) { 848; CHECK-LABEL: stack_fold_pmullw: 849; CHECK: # %bb.0: 850; CHECK-NEXT: movq %rsi, %mm0 851; CHECK-NEXT: movq %rdi, %mm1 852; CHECK-NEXT: pmullw %mm0, %mm1 853; CHECK-NEXT: movq %mm1, %rax 854; CHECK-NEXT: #APP 855; CHECK-NEXT: nop 856; CHECK-NEXT: #NO_APP 857; CHECK-NEXT: retq 858 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 859 %2 = call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone 860 ret <1 x i64> %2 861} 862declare <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64>, <1 x i64>) nounwind readnone 863 864define <1 x i64> @stack_fold_pmuludq(<1 x i64> %a, <1 x i64> %b) { 865; CHECK-LABEL: stack_fold_pmuludq: 866; CHECK: # %bb.0: 867; CHECK-NEXT: movq %rsi, %mm0 868; CHECK-NEXT: movq %rdi, %mm1 869; CHECK-NEXT: pmuludq %mm0, %mm1 870; CHECK-NEXT: movq %mm1, %rax 871; CHECK-NEXT: #APP 872; CHECK-NEXT: nop 873; CHECK-NEXT: #NO_APP 874; CHECK-NEXT: retq 875 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 876 %2 = call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> %a, <1 x i64> %b) nounwind readnone 877 ret <1 x i64> %2 878} 879declare <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64>, <1 x i64>) nounwind readnone 880 881define <1 x i64> @stack_fold_por(<1 x i64> %a, <1 x i64> %b) { 882; CHECK-LABEL: stack_fold_por: 883; CHECK: # %bb.0: 884; CHECK-NEXT: movq %rsi, %mm0 885; CHECK-NEXT: movq %rdi, %mm1 886; CHECK-NEXT: por %mm0, %mm1 887; CHECK-NEXT: movq %mm1, %rax 888; CHECK-NEXT: #APP 889; CHECK-NEXT: nop 890; CHECK-NEXT: #NO_APP 891; CHECK-NEXT: retq 892 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 893 %2 = call <1 x i64> @llvm.x86.mmx.por(<1 x i64> %a, <1 x i64> %b) nounwind readnone 894 ret <1 x i64> %2 895} 896declare <1 x i64> @llvm.x86.mmx.por(<1 x i64>, <1 x i64>) nounwind readnone 897 898define <1 x i64> @stack_fold_psadbw(<1 x i64> %a, <1 x i64> %b) { 899; CHECK-LABEL: stack_fold_psadbw: 900; CHECK: # %bb.0: 901; CHECK-NEXT: movq %rsi, %mm0 902; CHECK-NEXT: movq %rdi, %mm1 903; CHECK-NEXT: psadbw %mm0, %mm1 904; CHECK-NEXT: movq %mm1, %rax 905; CHECK-NEXT: #APP 906; CHECK-NEXT: nop 907; CHECK-NEXT: #NO_APP 908; CHECK-NEXT: retq 909 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 910 %2 = call <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64> %a, <1 x i64> %b) nounwind readnone 911 ret <1 x i64> %2 912} 913declare <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64>, <1 x i64>) nounwind readnone 914 915define <1 x i64> @stack_fold_pshufb(<1 x i64> %a, <1 x i64> %b) { 916; CHECK-LABEL: stack_fold_pshufb: 917; CHECK: # %bb.0: 918; CHECK-NEXT: movq %rsi, %mm0 919; CHECK-NEXT: movq %rdi, %mm1 920; CHECK-NEXT: pshufb %mm0, %mm1 921; CHECK-NEXT: movq %mm1, %rax 922; CHECK-NEXT: #APP 923; CHECK-NEXT: nop 924; CHECK-NEXT: #NO_APP 925; CHECK-NEXT: retq 926 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 927 %2 = call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone 928 ret <1 x i64> %2 929} 930declare <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64>, <1 x i64>) nounwind readnone 931 932define <1 x i64> @stack_fold_pshufw(<1 x i64> %a) { 933; CHECK-LABEL: stack_fold_pshufw: 934; CHECK: # %bb.0: 935; CHECK-NEXT: movq %rdi, %mm0 936; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 937; CHECK-NEXT: #APP 938; CHECK-NEXT: nop 939; CHECK-NEXT: #NO_APP 940; CHECK-NEXT: pshufw $1, {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 941; CHECK-NEXT: # mm0 = mem[1,0,0,0] 942; CHECK-NEXT: movq %mm0, %rax 943; CHECK-NEXT: retq 944 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 945 %2 = call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %a, i8 1) nounwind readnone 946 ret <1 x i64> %2 947} 948declare <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64>, i8) nounwind readnone 949 950define <1 x i64> @stack_fold_psignb(<1 x i64> %a0, <1 x i64> %a1) { 951; CHECK-LABEL: stack_fold_psignb: 952; CHECK: # %bb.0: 953; CHECK-NEXT: movq %rsi, %mm0 954; CHECK-NEXT: movq %rdi, %mm1 955; CHECK-NEXT: psignb %mm0, %mm1 956; CHECK-NEXT: movq %mm1, %rax 957; CHECK-NEXT: #APP 958; CHECK-NEXT: nop 959; CHECK-NEXT: #NO_APP 960; CHECK-NEXT: retq 961 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 962 %2 = call <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64> %a0, <1 x i64> %a1) nounwind readnone 963 ret <1 x i64> %2 964} 965declare <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64>, <1 x i64>) nounwind readnone 966 967define <1 x i64> @stack_fold_psignd(<1 x i64> %a0, <1 x i64> %a1) { 968; CHECK-LABEL: stack_fold_psignd: 969; CHECK: # %bb.0: 970; CHECK-NEXT: movq %rsi, %mm0 971; CHECK-NEXT: movq %rdi, %mm1 972; CHECK-NEXT: psignd %mm0, %mm1 973; CHECK-NEXT: movq %mm1, %rax 974; CHECK-NEXT: #APP 975; CHECK-NEXT: nop 976; CHECK-NEXT: #NO_APP 977; CHECK-NEXT: retq 978 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 979 %2 = call <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64> %a0, <1 x i64> %a1) nounwind readnone 980 ret <1 x i64> %2 981} 982declare <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64>, <1 x i64>) nounwind readnone 983 984define <1 x i64> @stack_fold_psignw(<1 x i64> %a0, <1 x i64> %a1) { 985; CHECK-LABEL: stack_fold_psignw: 986; CHECK: # %bb.0: 987; CHECK-NEXT: movq %rsi, %mm0 988; CHECK-NEXT: movq %rdi, %mm1 989; CHECK-NEXT: psignw %mm0, %mm1 990; CHECK-NEXT: movq %mm1, %rax 991; CHECK-NEXT: #APP 992; CHECK-NEXT: nop 993; CHECK-NEXT: #NO_APP 994; CHECK-NEXT: retq 995 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 996 %2 = call <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64> %a0, <1 x i64> %a1) nounwind readnone 997 ret <1 x i64> %2 998} 999declare <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64>, <1 x i64>) nounwind readnone 1000 1001define <1 x i64> @stack_fold_pslld(<1 x i64> %a, <1 x i64> %b) { 1002; CHECK-LABEL: stack_fold_pslld: 1003; CHECK: # %bb.0: 1004; CHECK-NEXT: movq %rsi, %mm0 1005; CHECK-NEXT: movq %rdi, %mm1 1006; CHECK-NEXT: pslld %mm0, %mm1 1007; CHECK-NEXT: movq %mm1, %rax 1008; CHECK-NEXT: #APP 1009; CHECK-NEXT: nop 1010; CHECK-NEXT: #NO_APP 1011; CHECK-NEXT: retq 1012 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1013 %2 = call <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1014 ret <1 x i64> %2 1015} 1016declare <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64>, <1 x i64>) nounwind readnone 1017 1018define <1 x i64> @stack_fold_psllq(<1 x i64> %a, <1 x i64> %b) { 1019; CHECK-LABEL: stack_fold_psllq: 1020; CHECK: # %bb.0: 1021; CHECK-NEXT: movq %rsi, %mm0 1022; CHECK-NEXT: movq %rdi, %mm1 1023; CHECK-NEXT: psllq %mm0, %mm1 1024; CHECK-NEXT: movq %mm1, %rax 1025; CHECK-NEXT: #APP 1026; CHECK-NEXT: nop 1027; CHECK-NEXT: #NO_APP 1028; CHECK-NEXT: retq 1029 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1030 %2 = call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1031 ret <1 x i64> %2 1032} 1033declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <1 x i64>) nounwind readnone 1034 1035define <1 x i64> @stack_fold_psllw(<1 x i64> %a, <1 x i64> %b) { 1036; CHECK-LABEL: stack_fold_psllw: 1037; CHECK: # %bb.0: 1038; CHECK-NEXT: movq %rsi, %mm0 1039; CHECK-NEXT: movq %rdi, %mm1 1040; CHECK-NEXT: psllw %mm0, %mm1 1041; CHECK-NEXT: movq %mm1, %rax 1042; CHECK-NEXT: #APP 1043; CHECK-NEXT: nop 1044; CHECK-NEXT: #NO_APP 1045; CHECK-NEXT: retq 1046 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1047 %2 = call <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1048 ret <1 x i64> %2 1049} 1050declare <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64>, <1 x i64>) nounwind readnone 1051 1052define <1 x i64> @stack_fold_psrad(<1 x i64> %a, <1 x i64> %b) { 1053; CHECK-LABEL: stack_fold_psrad: 1054; CHECK: # %bb.0: 1055; CHECK-NEXT: movq %rsi, %mm0 1056; CHECK-NEXT: movq %rdi, %mm1 1057; CHECK-NEXT: psrad %mm0, %mm1 1058; CHECK-NEXT: movq %mm1, %rax 1059; CHECK-NEXT: #APP 1060; CHECK-NEXT: nop 1061; CHECK-NEXT: #NO_APP 1062; CHECK-NEXT: retq 1063 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1064 %2 = call <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1065 ret <1 x i64> %2 1066} 1067declare <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64>, <1 x i64>) nounwind readnone 1068 1069define <1 x i64> @stack_fold_psraw(<1 x i64> %a, <1 x i64> %b) { 1070; CHECK-LABEL: stack_fold_psraw: 1071; CHECK: # %bb.0: 1072; CHECK-NEXT: movq %rsi, %mm0 1073; CHECK-NEXT: movq %rdi, %mm1 1074; CHECK-NEXT: psraw %mm0, %mm1 1075; CHECK-NEXT: movq %mm1, %rax 1076; CHECK-NEXT: #APP 1077; CHECK-NEXT: nop 1078; CHECK-NEXT: #NO_APP 1079; CHECK-NEXT: retq 1080 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1081 %2 = call <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1082 ret <1 x i64> %2 1083} 1084declare <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64>, <1 x i64>) nounwind readnone 1085 1086define <1 x i64> @stack_fold_psrld(<1 x i64> %a, <1 x i64> %b) { 1087; CHECK-LABEL: stack_fold_psrld: 1088; CHECK: # %bb.0: 1089; CHECK-NEXT: movq %rsi, %mm0 1090; CHECK-NEXT: movq %rdi, %mm1 1091; CHECK-NEXT: psrld %mm0, %mm1 1092; CHECK-NEXT: movq %mm1, %rax 1093; CHECK-NEXT: #APP 1094; CHECK-NEXT: nop 1095; CHECK-NEXT: #NO_APP 1096; CHECK-NEXT: retq 1097 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1098 %2 = call <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1099 ret <1 x i64> %2 1100} 1101declare <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64>, <1 x i64>) nounwind readnone 1102 1103define <1 x i64> @stack_fold_psrlq(<1 x i64> %a, <1 x i64> %b) { 1104; CHECK-LABEL: stack_fold_psrlq: 1105; CHECK: # %bb.0: 1106; CHECK-NEXT: movq %rsi, %mm0 1107; CHECK-NEXT: movq %rdi, %mm1 1108; CHECK-NEXT: psrlq %mm0, %mm1 1109; CHECK-NEXT: movq %mm1, %rax 1110; CHECK-NEXT: #APP 1111; CHECK-NEXT: nop 1112; CHECK-NEXT: #NO_APP 1113; CHECK-NEXT: retq 1114 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1115 %2 = call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1116 ret <1 x i64> %2 1117} 1118declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <1 x i64>) nounwind readnone 1119 1120define <1 x i64> @stack_fold_psrlw(<1 x i64> %a, <1 x i64> %b) { 1121; CHECK-LABEL: stack_fold_psrlw: 1122; CHECK: # %bb.0: 1123; CHECK-NEXT: movq %rsi, %mm0 1124; CHECK-NEXT: movq %rdi, %mm1 1125; CHECK-NEXT: psrlw %mm0, %mm1 1126; CHECK-NEXT: movq %mm1, %rax 1127; CHECK-NEXT: #APP 1128; CHECK-NEXT: nop 1129; CHECK-NEXT: #NO_APP 1130; CHECK-NEXT: retq 1131 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1132 %2 = call <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1133 ret <1 x i64> %2 1134} 1135declare <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64>, <1 x i64>) nounwind readnone 1136 1137define <1 x i64> @stack_fold_psubb(<1 x i64> %a, <1 x i64> %b) { 1138; CHECK-LABEL: stack_fold_psubb: 1139; CHECK: # %bb.0: 1140; CHECK-NEXT: movq %rsi, %mm0 1141; CHECK-NEXT: movq %rdi, %mm1 1142; CHECK-NEXT: psubb %mm0, %mm1 1143; CHECK-NEXT: movq %mm1, %rax 1144; CHECK-NEXT: #APP 1145; CHECK-NEXT: nop 1146; CHECK-NEXT: #NO_APP 1147; CHECK-NEXT: retq 1148 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1149 %2 = call <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1150 ret <1 x i64> %2 1151} 1152declare <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64>, <1 x i64>) nounwind readnone 1153 1154define <1 x i64> @stack_fold_psubd(<1 x i64> %a, <1 x i64> %b) { 1155; CHECK-LABEL: stack_fold_psubd: 1156; CHECK: # %bb.0: 1157; CHECK-NEXT: movq %rsi, %mm0 1158; CHECK-NEXT: movq %rdi, %mm1 1159; CHECK-NEXT: psubd %mm0, %mm1 1160; CHECK-NEXT: movq %mm1, %rax 1161; CHECK-NEXT: #APP 1162; CHECK-NEXT: nop 1163; CHECK-NEXT: #NO_APP 1164; CHECK-NEXT: retq 1165 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1166 %2 = call <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1167 ret <1 x i64> %2 1168} 1169declare <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64>, <1 x i64>) nounwind readnone 1170 1171define <1 x i64> @stack_fold_psubq(<1 x i64> %a, <1 x i64> %b) { 1172; CHECK-LABEL: stack_fold_psubq: 1173; CHECK: # %bb.0: 1174; CHECK-NEXT: movq %rsi, %mm0 1175; CHECK-NEXT: movq %rdi, %mm1 1176; CHECK-NEXT: psubq %mm0, %mm1 1177; CHECK-NEXT: movq %mm1, %rax 1178; CHECK-NEXT: #APP 1179; CHECK-NEXT: nop 1180; CHECK-NEXT: #NO_APP 1181; CHECK-NEXT: retq 1182 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1183 %2 = call <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1184 ret <1 x i64> %2 1185} 1186declare <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64>, <1 x i64>) nounwind readnone 1187 1188define <1 x i64> @stack_fold_psubsb(<1 x i64> %a, <1 x i64> %b) { 1189; CHECK-LABEL: stack_fold_psubsb: 1190; CHECK: # %bb.0: 1191; CHECK-NEXT: movq %rsi, %mm0 1192; CHECK-NEXT: movq %rdi, %mm1 1193; CHECK-NEXT: psubsb %mm0, %mm1 1194; CHECK-NEXT: movq %mm1, %rax 1195; CHECK-NEXT: #APP 1196; CHECK-NEXT: nop 1197; CHECK-NEXT: #NO_APP 1198; CHECK-NEXT: retq 1199 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1200 %2 = call <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1201 ret <1 x i64> %2 1202} 1203declare <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64>, <1 x i64>) nounwind readnone 1204 1205define <1 x i64> @stack_fold_psubsw(<1 x i64> %a, <1 x i64> %b) { 1206; CHECK-LABEL: stack_fold_psubsw: 1207; CHECK: # %bb.0: 1208; CHECK-NEXT: movq %rsi, %mm0 1209; CHECK-NEXT: movq %rdi, %mm1 1210; CHECK-NEXT: psubsw %mm0, %mm1 1211; CHECK-NEXT: movq %mm1, %rax 1212; CHECK-NEXT: #APP 1213; CHECK-NEXT: nop 1214; CHECK-NEXT: #NO_APP 1215; CHECK-NEXT: retq 1216 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1217 %2 = call <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1218 ret <1 x i64> %2 1219} 1220declare <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64>, <1 x i64>) nounwind readnone 1221 1222define <1 x i64> @stack_fold_psubusb(<1 x i64> %a, <1 x i64> %b) { 1223; CHECK-LABEL: stack_fold_psubusb: 1224; CHECK: # %bb.0: 1225; CHECK-NEXT: movq %rsi, %mm0 1226; CHECK-NEXT: movq %rdi, %mm1 1227; CHECK-NEXT: psubusb %mm0, %mm1 1228; CHECK-NEXT: movq %mm1, %rax 1229; CHECK-NEXT: #APP 1230; CHECK-NEXT: nop 1231; CHECK-NEXT: #NO_APP 1232; CHECK-NEXT: retq 1233 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1234 %2 = call <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1235 ret <1 x i64> %2 1236} 1237declare <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64>, <1 x i64>) nounwind readnone 1238 1239define <1 x i64> @stack_fold_psubusw(<1 x i64> %a, <1 x i64> %b) { 1240; CHECK-LABEL: stack_fold_psubusw: 1241; CHECK: # %bb.0: 1242; CHECK-NEXT: movq %rsi, %mm0 1243; CHECK-NEXT: movq %rdi, %mm1 1244; CHECK-NEXT: psubusw %mm0, %mm1 1245; CHECK-NEXT: movq %mm1, %rax 1246; CHECK-NEXT: #APP 1247; CHECK-NEXT: nop 1248; CHECK-NEXT: #NO_APP 1249; CHECK-NEXT: retq 1250 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1251 %2 = call <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1252 ret <1 x i64> %2 1253} 1254declare <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64>, <1 x i64>) nounwind readnone 1255 1256define <1 x i64> @stack_fold_psubw(<1 x i64> %a, <1 x i64> %b) { 1257; CHECK-LABEL: stack_fold_psubw: 1258; CHECK: # %bb.0: 1259; CHECK-NEXT: movq %rsi, %mm0 1260; CHECK-NEXT: movq %rdi, %mm1 1261; CHECK-NEXT: psubw %mm0, %mm1 1262; CHECK-NEXT: movq %mm1, %rax 1263; CHECK-NEXT: #APP 1264; CHECK-NEXT: nop 1265; CHECK-NEXT: #NO_APP 1266; CHECK-NEXT: retq 1267 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1268 %2 = call <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1269 ret <1 x i64> %2 1270} 1271declare <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64>, <1 x i64>) nounwind readnone 1272 1273define <1 x i64> @stack_fold_punpckhbw(<1 x i64> %a, <1 x i64> %b) { 1274; CHECK-LABEL: stack_fold_punpckhbw: 1275; CHECK: # %bb.0: 1276; CHECK-NEXT: movq %rsi, %mm0 1277; CHECK-NEXT: movq %rdi, %mm1 1278; CHECK-NEXT: punpckhbw %mm0, %mm1 # mm1 = mm1[4],mm0[4],mm1[5],mm0[5],mm1[6],mm0[6],mm1[7],mm0[7] 1279; CHECK-NEXT: movq %mm1, %rax 1280; CHECK-NEXT: #APP 1281; CHECK-NEXT: nop 1282; CHECK-NEXT: #NO_APP 1283; CHECK-NEXT: retq 1284 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1285 %2 = call <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1286 ret <1 x i64> %2 1287} 1288declare <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64>, <1 x i64>) nounwind readnone 1289 1290define <1 x i64> @stack_fold_punpckhdq(<1 x i64> %a, <1 x i64> %b) { 1291; CHECK-LABEL: stack_fold_punpckhdq: 1292; CHECK: # %bb.0: 1293; CHECK-NEXT: movq %rsi, %mm0 1294; CHECK-NEXT: movq %rdi, %mm1 1295; CHECK-NEXT: punpckhdq %mm0, %mm1 # mm1 = mm1[1],mm0[1] 1296; CHECK-NEXT: movq %mm1, %rax 1297; CHECK-NEXT: #APP 1298; CHECK-NEXT: nop 1299; CHECK-NEXT: #NO_APP 1300; CHECK-NEXT: retq 1301 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1302 %2 = call <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1303 ret <1 x i64> %2 1304} 1305declare <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64>, <1 x i64>) nounwind readnone 1306 1307define <1 x i64> @stack_fold_punpckhwd(<1 x i64> %a, <1 x i64> %b) { 1308; CHECK-LABEL: stack_fold_punpckhwd: 1309; CHECK: # %bb.0: 1310; CHECK-NEXT: movq %rsi, %mm0 1311; CHECK-NEXT: movq %rdi, %mm1 1312; CHECK-NEXT: punpckhwd %mm0, %mm1 # mm1 = mm1[2],mm0[2],mm1[3],mm0[3] 1313; CHECK-NEXT: movq %mm1, %rax 1314; CHECK-NEXT: #APP 1315; CHECK-NEXT: nop 1316; CHECK-NEXT: #NO_APP 1317; CHECK-NEXT: retq 1318 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1319 %2 = call <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1320 ret <1 x i64> %2 1321} 1322declare <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64>, <1 x i64>) nounwind readnone 1323 1324define <1 x i64> @stack_fold_punpcklbw(<1 x i64> %a, <1 x i64> %b) { 1325; CHECK-LABEL: stack_fold_punpcklbw: 1326; CHECK: # %bb.0: 1327; CHECK-NEXT: movq %rsi, %mm0 1328; CHECK-NEXT: movq %rdi, %mm1 1329; CHECK-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 1330; CHECK-NEXT: movq %mm1, %rax 1331; CHECK-NEXT: #APP 1332; CHECK-NEXT: nop 1333; CHECK-NEXT: #NO_APP 1334; CHECK-NEXT: retq 1335 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1336 %2 = call <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1337 ret <1 x i64> %2 1338} 1339declare <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64>, <1 x i64>) nounwind readnone 1340 1341define <1 x i64> @stack_fold_punpckldq(<1 x i64> %a, <1 x i64> %b) { 1342; CHECK-LABEL: stack_fold_punpckldq: 1343; CHECK: # %bb.0: 1344; CHECK-NEXT: movq %rsi, %mm0 1345; CHECK-NEXT: movq %rdi, %mm1 1346; CHECK-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 1347; CHECK-NEXT: movq %mm1, %rax 1348; CHECK-NEXT: #APP 1349; CHECK-NEXT: nop 1350; CHECK-NEXT: #NO_APP 1351; CHECK-NEXT: retq 1352 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1353 %2 = call <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1354 ret <1 x i64> %2 1355} 1356declare <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64>, <1 x i64>) nounwind readnone 1357 1358define <1 x i64> @stack_fold_punpcklwd(<1 x i64> %a, <1 x i64> %b) { 1359; CHECK-LABEL: stack_fold_punpcklwd: 1360; CHECK: # %bb.0: 1361; CHECK-NEXT: movq %rsi, %mm0 1362; CHECK-NEXT: movq %rdi, %mm1 1363; CHECK-NEXT: punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1] 1364; CHECK-NEXT: movq %mm1, %rax 1365; CHECK-NEXT: #APP 1366; CHECK-NEXT: nop 1367; CHECK-NEXT: #NO_APP 1368; CHECK-NEXT: retq 1369 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1370 %2 = call <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1371 ret <1 x i64> %2 1372} 1373declare <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64>, <1 x i64>) nounwind readnone 1374 1375define <1 x i64> @stack_fold_pxor(<1 x i64> %a, <1 x i64> %b) { 1376; CHECK-LABEL: stack_fold_pxor: 1377; CHECK: # %bb.0: 1378; CHECK-NEXT: movq %rsi, %mm0 1379; CHECK-NEXT: movq %rdi, %mm1 1380; CHECK-NEXT: pxor %mm0, %mm1 1381; CHECK-NEXT: movq %mm1, %rax 1382; CHECK-NEXT: #APP 1383; CHECK-NEXT: nop 1384; CHECK-NEXT: #NO_APP 1385; CHECK-NEXT: retq 1386 %1 = tail call <1 x i64> asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1387 %2 = call <1 x i64> @llvm.x86.mmx.pxor(<1 x i64> %a, <1 x i64> %b) nounwind readnone 1388 ret <1 x i64> %2 1389} 1390declare <1 x i64> @llvm.x86.mmx.pxor(<1 x i64>, <1 x i64>) nounwind readnone 1391