1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avxifma < %s | FileCheck %s 3 4declare <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>) 5declare <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>) 6declare <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>) 7declare <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>) 8 9define <2 x i64> @stack_fold_vpmadd52huq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { 10; CHECK-LABEL: stack_fold_vpmadd52huq: 11; CHECK: # %bb.0: 12; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 13; CHECK-NEXT: #APP 14; CHECK-NEXT: nop 15; CHECK-NEXT: #NO_APP 16; CHECK-NEXT: {vex} vpmadd52huq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload 17; CHECK-NEXT: retq 18 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 19 %2 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) 20 ret <2 x i64> %2 21} 22 23define <2 x i64> @stack_fold_vpmadd52huq_commuted(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { 24; CHECK-LABEL: stack_fold_vpmadd52huq_commuted: 25; CHECK: # %bb.0: 26; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 27; CHECK-NEXT: #APP 28; CHECK-NEXT: nop 29; CHECK-NEXT: #NO_APP 30; CHECK-NEXT: {vex} vpmadd52huq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload 31; CHECK-NEXT: retq 32 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 33 %2 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %a0, <2 x i64> %a2, <2 x i64> %a1) 34 ret <2 x i64> %2 35} 36 37define <4 x i64> @stack_fold_vpmadd52huq_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) { 38; CHECK-LABEL: stack_fold_vpmadd52huq_256: 39; CHECK: # %bb.0: 40; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 41; CHECK-NEXT: #APP 42; CHECK-NEXT: nop 43; CHECK-NEXT: #NO_APP 44; CHECK-NEXT: {vex} vpmadd52huq {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload 45; CHECK-NEXT: retq 46 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 47 %2 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) 48 ret <4 x i64> %2 49} 50 51define <4 x i64> @stack_fold_vpmadd52huq_256_commuted(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) { 52; CHECK-LABEL: stack_fold_vpmadd52huq_256_commuted: 53; CHECK: # %bb.0: 54; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 55; CHECK-NEXT: #APP 56; CHECK-NEXT: nop 57; CHECK-NEXT: #NO_APP 58; CHECK-NEXT: {vex} vpmadd52huq {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload 59; CHECK-NEXT: retq 60 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 61 %2 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %a0, <4 x i64> %a2, <4 x i64> %a1) 62 ret <4 x i64> %2 63} 64 65define <2 x i64> @stack_fold_vpmadd52luq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { 66; CHECK-LABEL: stack_fold_vpmadd52luq: 67; CHECK: # %bb.0: 68; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 69; CHECK-NEXT: #APP 70; CHECK-NEXT: nop 71; CHECK-NEXT: #NO_APP 72; CHECK-NEXT: {vex} vpmadd52luq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload 73; CHECK-NEXT: retq 74 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 75 %2 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) 76 ret <2 x i64> %2 77} 78 79define <2 x i64> @stack_fold_vpmadd52luq_commuted(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { 80; CHECK-LABEL: stack_fold_vpmadd52luq_commuted: 81; CHECK: # %bb.0: 82; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 83; CHECK-NEXT: #APP 84; CHECK-NEXT: nop 85; CHECK-NEXT: #NO_APP 86; CHECK-NEXT: {vex} vpmadd52luq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload 87; CHECK-NEXT: retq 88 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 89 %2 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %a0, <2 x i64> %a2, <2 x i64> %a1) 90 ret <2 x i64> %2 91} 92 93define <4 x i64> @stack_fold_vpmadd52luq_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) { 94; CHECK-LABEL: stack_fold_vpmadd52luq_256: 95; CHECK: # %bb.0: 96; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 97; CHECK-NEXT: #APP 98; CHECK-NEXT: nop 99; CHECK-NEXT: #NO_APP 100; CHECK-NEXT: {vex} vpmadd52luq {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload 101; CHECK-NEXT: retq 102 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 103 %2 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) 104 ret <4 x i64> %2 105} 106 107define <4 x i64> @stack_fold_vpmadd52luq_256_commuted(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) { 108; CHECK-LABEL: stack_fold_vpmadd52luq_256_commuted: 109; CHECK: # %bb.0: 110; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 111; CHECK-NEXT: #APP 112; CHECK-NEXT: nop 113; CHECK-NEXT: #NO_APP 114; CHECK-NEXT: {vex} vpmadd52luq {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload 115; CHECK-NEXT: retq 116 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 117 %2 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %a0, <4 x i64> %a2, <4 x i64> %a1) 118 ret <4 x i64> %2 119} 120