1c50ca3daSChia; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 21cb59983SLuke Lau; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING 31cb59983SLuke Lau; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING 41cb59983SLuke Lau; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING,ZVFH 51cb59983SLuke Lau; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING,ZVFHMIN 6c50ca3daSChia; Check that the default value enables the web folding and 7c50ca3daSChia; that it is bigger than 3. 81cb59983SLuke Lau; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=FOLDING 9c50ca3daSChia 10c50ca3daSChiadefine void @vfwmul_v2f116_multiple_users(ptr %x, ptr %y, ptr %z, <2 x half> %a, <2 x half> %b, <2 x half> %b2) { 11c50ca3daSChia; NO_FOLDING-LABEL: vfwmul_v2f116_multiple_users: 12c50ca3daSChia; NO_FOLDING: # %bb.0: 13c50ca3daSChia; NO_FOLDING-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 14c50ca3daSChia; NO_FOLDING-NEXT: vfwcvt.f.f.v v11, v8 15c50ca3daSChia; NO_FOLDING-NEXT: vfwcvt.f.f.v v8, v9 16c50ca3daSChia; NO_FOLDING-NEXT: vfwcvt.f.f.v v9, v10 17c50ca3daSChia; NO_FOLDING-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 18c50ca3daSChia; NO_FOLDING-NEXT: vfmul.vv v10, v11, v8 19c50ca3daSChia; NO_FOLDING-NEXT: vfadd.vv v11, v11, v9 20c50ca3daSChia; NO_FOLDING-NEXT: vfsub.vv v8, v8, v9 21c50ca3daSChia; NO_FOLDING-NEXT: vse32.v v10, (a0) 22c50ca3daSChia; NO_FOLDING-NEXT: vse32.v v11, (a1) 23c50ca3daSChia; NO_FOLDING-NEXT: vse32.v v8, (a2) 24c50ca3daSChia; NO_FOLDING-NEXT: ret 25c50ca3daSChia; 26deb40a25SCraig Topper; ZVFH-LABEL: vfwmul_v2f116_multiple_users: 27deb40a25SCraig Topper; ZVFH: # %bb.0: 28deb40a25SCraig Topper; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 29deb40a25SCraig Topper; ZVFH-NEXT: vfwmul.vv v11, v8, v9 30deb40a25SCraig Topper; ZVFH-NEXT: vfwadd.vv v12, v8, v10 31deb40a25SCraig Topper; ZVFH-NEXT: vfwsub.vv v8, v9, v10 32deb40a25SCraig Topper; ZVFH-NEXT: vse32.v v11, (a0) 33deb40a25SCraig Topper; ZVFH-NEXT: vse32.v v12, (a1) 34deb40a25SCraig Topper; ZVFH-NEXT: vse32.v v8, (a2) 35deb40a25SCraig Topper; ZVFH-NEXT: ret 36deb40a25SCraig Topper; 37c50ca3daSChia; ZVFHMIN-LABEL: vfwmul_v2f116_multiple_users: 38c50ca3daSChia; ZVFHMIN: # %bb.0: 39c50ca3daSChia; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 40c50ca3daSChia; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 41c50ca3daSChia; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 42c50ca3daSChia; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 43c50ca3daSChia; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 44c50ca3daSChia; ZVFHMIN-NEXT: vfmul.vv v10, v11, v8 45c50ca3daSChia; ZVFHMIN-NEXT: vfadd.vv v11, v11, v9 46c50ca3daSChia; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9 47c50ca3daSChia; ZVFHMIN-NEXT: vse32.v v10, (a0) 48c50ca3daSChia; ZVFHMIN-NEXT: vse32.v v11, (a1) 49c50ca3daSChia; ZVFHMIN-NEXT: vse32.v v8, (a2) 50c50ca3daSChia; ZVFHMIN-NEXT: ret 51c50ca3daSChia %c = fpext <2 x half> %a to <2 x float> 52c50ca3daSChia %d = fpext <2 x half> %b to <2 x float> 53c50ca3daSChia %d2 = fpext <2 x half> %b2 to <2 x float> 54c50ca3daSChia %e = fmul <2 x float> %c, %d 55c50ca3daSChia %f = fadd <2 x float> %c, %d2 56c50ca3daSChia %g = fsub <2 x float> %d, %d2 57c50ca3daSChia store <2 x float> %e, ptr %x 58c50ca3daSChia store <2 x float> %f, ptr %y 59c50ca3daSChia store <2 x float> %g, ptr %z 60c50ca3daSChia ret void 61c50ca3daSChia} 62c50ca3daSChia 63c50ca3daSChiadefine void @vfwmul_v2f32_multiple_users(ptr %x, ptr %y, ptr %z, <2 x float> %a, <2 x float> %b, <2 x float> %b2) { 64c50ca3daSChia; NO_FOLDING-LABEL: vfwmul_v2f32_multiple_users: 65c50ca3daSChia; NO_FOLDING: # %bb.0: 66c50ca3daSChia; NO_FOLDING-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 67c50ca3daSChia; NO_FOLDING-NEXT: vfwcvt.f.f.v v11, v8 68c50ca3daSChia; NO_FOLDING-NEXT: vfwcvt.f.f.v v8, v9 69c50ca3daSChia; NO_FOLDING-NEXT: vfwcvt.f.f.v v9, v10 70c50ca3daSChia; NO_FOLDING-NEXT: vsetvli zero, zero, e64, m1, ta, ma 71c50ca3daSChia; NO_FOLDING-NEXT: vfmul.vv v10, v11, v8 72c50ca3daSChia; NO_FOLDING-NEXT: vfadd.vv v11, v11, v9 73c50ca3daSChia; NO_FOLDING-NEXT: vfsub.vv v8, v8, v9 74c50ca3daSChia; NO_FOLDING-NEXT: vse64.v v10, (a0) 75c50ca3daSChia; NO_FOLDING-NEXT: vse64.v v11, (a1) 76c50ca3daSChia; NO_FOLDING-NEXT: vse64.v v8, (a2) 77c50ca3daSChia; NO_FOLDING-NEXT: ret 78c50ca3daSChia; 79c50ca3daSChia; FOLDING-LABEL: vfwmul_v2f32_multiple_users: 80c50ca3daSChia; FOLDING: # %bb.0: 81c50ca3daSChia; FOLDING-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 82c50ca3daSChia; FOLDING-NEXT: vfwmul.vv v11, v8, v9 83c50ca3daSChia; FOLDING-NEXT: vfwadd.vv v12, v8, v10 84c50ca3daSChia; FOLDING-NEXT: vfwsub.vv v8, v9, v10 85c50ca3daSChia; FOLDING-NEXT: vse64.v v11, (a0) 86c50ca3daSChia; FOLDING-NEXT: vse64.v v12, (a1) 87c50ca3daSChia; FOLDING-NEXT: vse64.v v8, (a2) 88c50ca3daSChia; FOLDING-NEXT: ret 89c50ca3daSChia %c = fpext <2 x float> %a to <2 x double> 90c50ca3daSChia %d = fpext <2 x float> %b to <2 x double> 91c50ca3daSChia %d2 = fpext <2 x float> %b2 to <2 x double> 92c50ca3daSChia %e = fmul <2 x double> %c, %d 93c50ca3daSChia %f = fadd <2 x double> %c, %d2 94c50ca3daSChia %g = fsub <2 x double> %d, %d2 95c50ca3daSChia store <2 x double> %e, ptr %x 96c50ca3daSChia store <2 x double> %f, ptr %y 97c50ca3daSChia store <2 x double> %g, ptr %z 98c50ca3daSChia ret void 99c50ca3daSChia} 100b582b658SCraig Topper 101b582b658SCraig Topperdefine void @vfwmacc_v2f32_multiple_users(ptr %x, ptr %y, ptr %z, <2 x float> %a, <2 x float> %b, <2 x float> %b2, <2 x double> %w) { 102b582b658SCraig Topper; NO_FOLDING-LABEL: vfwmacc_v2f32_multiple_users: 103b582b658SCraig Topper; NO_FOLDING: # %bb.0: 104b582b658SCraig Topper; NO_FOLDING-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 105b582b658SCraig Topper; NO_FOLDING-NEXT: vfwcvt.f.f.v v12, v8 106b582b658SCraig Topper; NO_FOLDING-NEXT: vfwcvt.f.f.v v8, v9 107b582b658SCraig Topper; NO_FOLDING-NEXT: vfwcvt.f.f.v v9, v10 108b582b658SCraig Topper; NO_FOLDING-NEXT: vsetvli zero, zero, e64, m1, ta, ma 109b582b658SCraig Topper; NO_FOLDING-NEXT: vfmul.vv v10, v12, v8 110b582b658SCraig Topper; NO_FOLDING-NEXT: vfmadd.vv v12, v9, v11 111b582b658SCraig Topper; NO_FOLDING-NEXT: vfsub.vv v8, v8, v9 112b582b658SCraig Topper; NO_FOLDING-NEXT: vse64.v v10, (a0) 113b582b658SCraig Topper; NO_FOLDING-NEXT: vse64.v v12, (a1) 114b582b658SCraig Topper; NO_FOLDING-NEXT: vse64.v v8, (a2) 115b582b658SCraig Topper; NO_FOLDING-NEXT: ret 116b582b658SCraig Topper; 117b582b658SCraig Topper; FOLDING-LABEL: vfwmacc_v2f32_multiple_users: 118b582b658SCraig Topper; FOLDING: # %bb.0: 119b582b658SCraig Topper; FOLDING-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 120b582b658SCraig Topper; FOLDING-NEXT: vfwmul.vv v12, v8, v9 121*9122c523SPengcheng Wang; FOLDING-NEXT: vfwsub.vv v13, v9, v10 122b582b658SCraig Topper; FOLDING-NEXT: vfwmacc.vv v11, v8, v10 123b582b658SCraig Topper; FOLDING-NEXT: vse64.v v12, (a0) 124b582b658SCraig Topper; FOLDING-NEXT: vse64.v v11, (a1) 125*9122c523SPengcheng Wang; FOLDING-NEXT: vse64.v v13, (a2) 126b582b658SCraig Topper; FOLDING-NEXT: ret 127b582b658SCraig Topper %c = fpext <2 x float> %a to <2 x double> 128b582b658SCraig Topper %d = fpext <2 x float> %b to <2 x double> 129b582b658SCraig Topper %d2 = fpext <2 x float> %b2 to <2 x double> 130b582b658SCraig Topper %e = fmul <2 x double> %c, %d 131b582b658SCraig Topper %f = call <2 x double> @llvm.fma(<2 x double> %c, <2 x double> %d2, <2 x double> %w) 132b582b658SCraig Topper %g = fsub <2 x double> %d, %d2 133b582b658SCraig Topper store <2 x double> %e, ptr %x 134b582b658SCraig Topper store <2 x double> %f, ptr %y 135b582b658SCraig Topper store <2 x double> %g, ptr %z 136b582b658SCraig Topper ret void 137b582b658SCraig Topper} 138b582b658SCraig Topper 139b582b658SCraig Topper; Negative test. We can't fold because the FMA addend is a user. 140b582b658SCraig Topperdefine void @vfwmacc_v2f32_multiple_users_addend_user(ptr %x, ptr %y, ptr %z, <2 x float> %a, <2 x float> %b, <2 x float> %b2) { 141b582b658SCraig Topper; NO_FOLDING-LABEL: vfwmacc_v2f32_multiple_users_addend_user: 142b582b658SCraig Topper; NO_FOLDING: # %bb.0: 143b582b658SCraig Topper; NO_FOLDING-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 144b582b658SCraig Topper; NO_FOLDING-NEXT: vfwcvt.f.f.v v11, v8 145b582b658SCraig Topper; NO_FOLDING-NEXT: vfwcvt.f.f.v v8, v9 146b582b658SCraig Topper; NO_FOLDING-NEXT: vfwcvt.f.f.v v9, v10 147b582b658SCraig Topper; NO_FOLDING-NEXT: vsetvli zero, zero, e64, m1, ta, ma 148b582b658SCraig Topper; NO_FOLDING-NEXT: vfmul.vv v10, v11, v8 149b582b658SCraig Topper; NO_FOLDING-NEXT: vfmadd.vv v11, v9, v8 150b582b658SCraig Topper; NO_FOLDING-NEXT: vfsub.vv v8, v8, v9 151b582b658SCraig Topper; NO_FOLDING-NEXT: vse64.v v10, (a0) 152b582b658SCraig Topper; NO_FOLDING-NEXT: vse64.v v11, (a1) 153b582b658SCraig Topper; NO_FOLDING-NEXT: vse64.v v8, (a2) 154b582b658SCraig Topper; NO_FOLDING-NEXT: ret 155b582b658SCraig Topper; 156b582b658SCraig Topper; FOLDING-LABEL: vfwmacc_v2f32_multiple_users_addend_user: 157b582b658SCraig Topper; FOLDING: # %bb.0: 158b582b658SCraig Topper; FOLDING-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 159b582b658SCraig Topper; FOLDING-NEXT: vfwcvt.f.f.v v11, v8 160b582b658SCraig Topper; FOLDING-NEXT: vfwcvt.f.f.v v8, v9 161b582b658SCraig Topper; FOLDING-NEXT: vfwcvt.f.f.v v9, v10 162b582b658SCraig Topper; FOLDING-NEXT: vsetvli zero, zero, e64, m1, ta, ma 163b582b658SCraig Topper; FOLDING-NEXT: vfmul.vv v10, v11, v8 164b582b658SCraig Topper; FOLDING-NEXT: vfmadd.vv v11, v9, v8 165b582b658SCraig Topper; FOLDING-NEXT: vfsub.vv v8, v8, v9 166b582b658SCraig Topper; FOLDING-NEXT: vse64.v v10, (a0) 167b582b658SCraig Topper; FOLDING-NEXT: vse64.v v11, (a1) 168b582b658SCraig Topper; FOLDING-NEXT: vse64.v v8, (a2) 169b582b658SCraig Topper; FOLDING-NEXT: ret 170b582b658SCraig Topper %c = fpext <2 x float> %a to <2 x double> 171b582b658SCraig Topper %d = fpext <2 x float> %b to <2 x double> 172b582b658SCraig Topper %d2 = fpext <2 x float> %b2 to <2 x double> 173b582b658SCraig Topper %e = fmul <2 x double> %c, %d 174b582b658SCraig Topper %f = call <2 x double> @llvm.fma(<2 x double> %c, <2 x double> %d2, <2 x double> %d) 175b582b658SCraig Topper %g = fsub <2 x double> %d, %d2 176b582b658SCraig Topper store <2 x double> %e, ptr %x 177b582b658SCraig Topper store <2 x double> %f, ptr %y 178b582b658SCraig Topper store <2 x double> %g, ptr %z 179b582b658SCraig Topper ret void 180b582b658SCraig Topper} 181b582b658SCraig Topper 182b582b658SCraig Topper; Negative test. We can't fold because the FMA addend is a user. 183b582b658SCraig Topperdefine void @vfwmacc_v2f32_addend_user(ptr %x, <2 x float> %a, <2 x float> %b) { 184b582b658SCraig Topper; NO_FOLDING-LABEL: vfwmacc_v2f32_addend_user: 185b582b658SCraig Topper; NO_FOLDING: # %bb.0: 186b582b658SCraig Topper; NO_FOLDING-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 187b582b658SCraig Topper; NO_FOLDING-NEXT: vfwcvt.f.f.v v10, v8 188b582b658SCraig Topper; NO_FOLDING-NEXT: vfwcvt.f.f.v v8, v9 189b582b658SCraig Topper; NO_FOLDING-NEXT: vsetvli zero, zero, e64, m1, ta, ma 190b582b658SCraig Topper; NO_FOLDING-NEXT: vfmadd.vv v8, v10, v8 191b582b658SCraig Topper; NO_FOLDING-NEXT: vse64.v v8, (a0) 192b582b658SCraig Topper; NO_FOLDING-NEXT: ret 193b582b658SCraig Topper; 194b582b658SCraig Topper; FOLDING-LABEL: vfwmacc_v2f32_addend_user: 195b582b658SCraig Topper; FOLDING: # %bb.0: 196b582b658SCraig Topper; FOLDING-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 197b582b658SCraig Topper; FOLDING-NEXT: vfwcvt.f.f.v v10, v8 198b582b658SCraig Topper; FOLDING-NEXT: vfwcvt.f.f.v v8, v9 199b582b658SCraig Topper; FOLDING-NEXT: vsetvli zero, zero, e64, m1, ta, ma 200b582b658SCraig Topper; FOLDING-NEXT: vfmadd.vv v8, v10, v8 201b582b658SCraig Topper; FOLDING-NEXT: vse64.v v8, (a0) 202b582b658SCraig Topper; FOLDING-NEXT: ret 203b582b658SCraig Topper %c = fpext <2 x float> %a to <2 x double> 204b582b658SCraig Topper %d = fpext <2 x float> %b to <2 x double> 205b582b658SCraig Topper %f = call <2 x double> @llvm.fma(<2 x double> %c, <2 x double> %d, <2 x double> %d) 206b582b658SCraig Topper store <2 x double> %f, ptr %x 207b582b658SCraig Topper ret void 208b582b658SCraig Topper} 209