1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING 3; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING 4; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING,ZVFH 5; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING,ZVFHMIN 6; Check that the default value enables the web folding and 7; that it is bigger than 3. 8; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=FOLDING 9 10define void @vfwmul_v2f116_multiple_users(ptr %x, ptr %y, ptr %z, <2 x half> %a, <2 x half> %b, <2 x half> %b2) { 11; NO_FOLDING-LABEL: vfwmul_v2f116_multiple_users: 12; NO_FOLDING: # %bb.0: 13; NO_FOLDING-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 14; NO_FOLDING-NEXT: vfwcvt.f.f.v v11, v8 15; NO_FOLDING-NEXT: vfwcvt.f.f.v v8, v9 16; NO_FOLDING-NEXT: vfwcvt.f.f.v v9, v10 17; NO_FOLDING-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 18; NO_FOLDING-NEXT: vfmul.vv v10, v11, v8 19; NO_FOLDING-NEXT: vfadd.vv v11, v11, v9 20; NO_FOLDING-NEXT: vfsub.vv v8, v8, v9 21; NO_FOLDING-NEXT: vse32.v v10, (a0) 22; NO_FOLDING-NEXT: vse32.v v11, (a1) 23; NO_FOLDING-NEXT: vse32.v v8, (a2) 24; NO_FOLDING-NEXT: ret 25; 26; ZVFH-LABEL: vfwmul_v2f116_multiple_users: 27; ZVFH: # %bb.0: 28; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 29; ZVFH-NEXT: vfwmul.vv v11, v8, v9 30; ZVFH-NEXT: vfwadd.vv v12, v8, v10 31; ZVFH-NEXT: vfwsub.vv v8, v9, v10 32; ZVFH-NEXT: vse32.v v11, (a0) 33; ZVFH-NEXT: vse32.v v12, (a1) 34; ZVFH-NEXT: vse32.v v8, (a2) 35; ZVFH-NEXT: ret 36; 37; ZVFHMIN-LABEL: vfwmul_v2f116_multiple_users: 38; ZVFHMIN: # %bb.0: 39; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 40; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 41; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 42; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 43; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 44; ZVFHMIN-NEXT: vfmul.vv v10, v11, v8 45; ZVFHMIN-NEXT: vfadd.vv v11, v11, v9 46; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9 47; ZVFHMIN-NEXT: vse32.v v10, (a0) 48; ZVFHMIN-NEXT: vse32.v v11, (a1) 49; ZVFHMIN-NEXT: vse32.v v8, (a2) 50; ZVFHMIN-NEXT: ret 51 %c = fpext <2 x half> %a to <2 x float> 52 %d = fpext <2 x half> %b to <2 x float> 53 %d2 = fpext <2 x half> %b2 to <2 x float> 54 %e = fmul <2 x float> %c, %d 55 %f = fadd <2 x float> %c, %d2 56 %g = fsub <2 x float> %d, %d2 57 store <2 x float> %e, ptr %x 58 store <2 x float> %f, ptr %y 59 store <2 x float> %g, ptr %z 60 ret void 61} 62 63define void @vfwmul_v2f32_multiple_users(ptr %x, ptr %y, ptr %z, <2 x float> %a, <2 x float> %b, <2 x float> %b2) { 64; NO_FOLDING-LABEL: vfwmul_v2f32_multiple_users: 65; NO_FOLDING: # %bb.0: 66; NO_FOLDING-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 67; NO_FOLDING-NEXT: vfwcvt.f.f.v v11, v8 68; NO_FOLDING-NEXT: vfwcvt.f.f.v v8, v9 69; NO_FOLDING-NEXT: vfwcvt.f.f.v v9, v10 70; NO_FOLDING-NEXT: vsetvli zero, zero, e64, m1, ta, ma 71; NO_FOLDING-NEXT: vfmul.vv v10, v11, v8 72; NO_FOLDING-NEXT: vfadd.vv v11, v11, v9 73; NO_FOLDING-NEXT: vfsub.vv v8, v8, v9 74; NO_FOLDING-NEXT: vse64.v v10, (a0) 75; NO_FOLDING-NEXT: vse64.v v11, (a1) 76; NO_FOLDING-NEXT: vse64.v v8, (a2) 77; NO_FOLDING-NEXT: ret 78; 79; FOLDING-LABEL: vfwmul_v2f32_multiple_users: 80; FOLDING: # %bb.0: 81; FOLDING-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 82; FOLDING-NEXT: vfwmul.vv v11, v8, v9 83; FOLDING-NEXT: vfwadd.vv v12, v8, v10 84; FOLDING-NEXT: vfwsub.vv v8, v9, v10 85; FOLDING-NEXT: vse64.v v11, (a0) 86; FOLDING-NEXT: vse64.v v12, (a1) 87; FOLDING-NEXT: vse64.v v8, (a2) 88; FOLDING-NEXT: ret 89 %c = fpext <2 x float> %a to <2 x double> 90 %d = fpext <2 x float> %b to <2 x double> 91 %d2 = fpext <2 x float> %b2 to <2 x double> 92 %e = fmul <2 x double> %c, %d 93 %f = fadd <2 x double> %c, %d2 94 %g = fsub <2 x double> %d, %d2 95 store <2 x double> %e, ptr %x 96 store <2 x double> %f, ptr %y 97 store <2 x double> %g, ptr %z 98 ret void 99} 100 101define void @vfwmacc_v2f32_multiple_users(ptr %x, ptr %y, ptr %z, <2 x float> %a, <2 x float> %b, <2 x float> %b2, <2 x double> %w) { 102; NO_FOLDING-LABEL: vfwmacc_v2f32_multiple_users: 103; NO_FOLDING: # %bb.0: 104; NO_FOLDING-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 105; NO_FOLDING-NEXT: vfwcvt.f.f.v v12, v8 106; NO_FOLDING-NEXT: vfwcvt.f.f.v v8, v9 107; NO_FOLDING-NEXT: vfwcvt.f.f.v v9, v10 108; NO_FOLDING-NEXT: vsetvli zero, zero, e64, m1, ta, ma 109; NO_FOLDING-NEXT: vfmul.vv v10, v12, v8 110; NO_FOLDING-NEXT: vfmadd.vv v12, v9, v11 111; NO_FOLDING-NEXT: vfsub.vv v8, v8, v9 112; NO_FOLDING-NEXT: vse64.v v10, (a0) 113; NO_FOLDING-NEXT: vse64.v v12, (a1) 114; NO_FOLDING-NEXT: vse64.v v8, (a2) 115; NO_FOLDING-NEXT: ret 116; 117; FOLDING-LABEL: vfwmacc_v2f32_multiple_users: 118; FOLDING: # %bb.0: 119; FOLDING-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 120; FOLDING-NEXT: vfwmul.vv v12, v8, v9 121; FOLDING-NEXT: vfwsub.vv v13, v9, v10 122; FOLDING-NEXT: vfwmacc.vv v11, v8, v10 123; FOLDING-NEXT: vse64.v v12, (a0) 124; FOLDING-NEXT: vse64.v v11, (a1) 125; FOLDING-NEXT: vse64.v v13, (a2) 126; FOLDING-NEXT: ret 127 %c = fpext <2 x float> %a to <2 x double> 128 %d = fpext <2 x float> %b to <2 x double> 129 %d2 = fpext <2 x float> %b2 to <2 x double> 130 %e = fmul <2 x double> %c, %d 131 %f = call <2 x double> @llvm.fma(<2 x double> %c, <2 x double> %d2, <2 x double> %w) 132 %g = fsub <2 x double> %d, %d2 133 store <2 x double> %e, ptr %x 134 store <2 x double> %f, ptr %y 135 store <2 x double> %g, ptr %z 136 ret void 137} 138 139; Negative test. We can't fold because the FMA addend is a user. 140define void @vfwmacc_v2f32_multiple_users_addend_user(ptr %x, ptr %y, ptr %z, <2 x float> %a, <2 x float> %b, <2 x float> %b2) { 141; NO_FOLDING-LABEL: vfwmacc_v2f32_multiple_users_addend_user: 142; NO_FOLDING: # %bb.0: 143; NO_FOLDING-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 144; NO_FOLDING-NEXT: vfwcvt.f.f.v v11, v8 145; NO_FOLDING-NEXT: vfwcvt.f.f.v v8, v9 146; NO_FOLDING-NEXT: vfwcvt.f.f.v v9, v10 147; NO_FOLDING-NEXT: vsetvli zero, zero, e64, m1, ta, ma 148; NO_FOLDING-NEXT: vfmul.vv v10, v11, v8 149; NO_FOLDING-NEXT: vfmadd.vv v11, v9, v8 150; NO_FOLDING-NEXT: vfsub.vv v8, v8, v9 151; NO_FOLDING-NEXT: vse64.v v10, (a0) 152; NO_FOLDING-NEXT: vse64.v v11, (a1) 153; NO_FOLDING-NEXT: vse64.v v8, (a2) 154; NO_FOLDING-NEXT: ret 155; 156; FOLDING-LABEL: vfwmacc_v2f32_multiple_users_addend_user: 157; FOLDING: # %bb.0: 158; FOLDING-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 159; FOLDING-NEXT: vfwcvt.f.f.v v11, v8 160; FOLDING-NEXT: vfwcvt.f.f.v v8, v9 161; FOLDING-NEXT: vfwcvt.f.f.v v9, v10 162; FOLDING-NEXT: vsetvli zero, zero, e64, m1, ta, ma 163; FOLDING-NEXT: vfmul.vv v10, v11, v8 164; FOLDING-NEXT: vfmadd.vv v11, v9, v8 165; FOLDING-NEXT: vfsub.vv v8, v8, v9 166; FOLDING-NEXT: vse64.v v10, (a0) 167; FOLDING-NEXT: vse64.v v11, (a1) 168; FOLDING-NEXT: vse64.v v8, (a2) 169; FOLDING-NEXT: ret 170 %c = fpext <2 x float> %a to <2 x double> 171 %d = fpext <2 x float> %b to <2 x double> 172 %d2 = fpext <2 x float> %b2 to <2 x double> 173 %e = fmul <2 x double> %c, %d 174 %f = call <2 x double> @llvm.fma(<2 x double> %c, <2 x double> %d2, <2 x double> %d) 175 %g = fsub <2 x double> %d, %d2 176 store <2 x double> %e, ptr %x 177 store <2 x double> %f, ptr %y 178 store <2 x double> %g, ptr %z 179 ret void 180} 181 182; Negative test. We can't fold because the FMA addend is a user. 183define void @vfwmacc_v2f32_addend_user(ptr %x, <2 x float> %a, <2 x float> %b) { 184; NO_FOLDING-LABEL: vfwmacc_v2f32_addend_user: 185; NO_FOLDING: # %bb.0: 186; NO_FOLDING-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 187; NO_FOLDING-NEXT: vfwcvt.f.f.v v10, v8 188; NO_FOLDING-NEXT: vfwcvt.f.f.v v8, v9 189; NO_FOLDING-NEXT: vsetvli zero, zero, e64, m1, ta, ma 190; NO_FOLDING-NEXT: vfmadd.vv v8, v10, v8 191; NO_FOLDING-NEXT: vse64.v v8, (a0) 192; NO_FOLDING-NEXT: ret 193; 194; FOLDING-LABEL: vfwmacc_v2f32_addend_user: 195; FOLDING: # %bb.0: 196; FOLDING-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 197; FOLDING-NEXT: vfwcvt.f.f.v v10, v8 198; FOLDING-NEXT: vfwcvt.f.f.v v8, v9 199; FOLDING-NEXT: vsetvli zero, zero, e64, m1, ta, ma 200; FOLDING-NEXT: vfmadd.vv v8, v10, v8 201; FOLDING-NEXT: vse64.v v8, (a0) 202; FOLDING-NEXT: ret 203 %c = fpext <2 x float> %a to <2 x double> 204 %d = fpext <2 x float> %b to <2 x double> 205 %f = call <2 x double> @llvm.fma(<2 x double> %c, <2 x double> %d, <2 x double> %d) 206 store <2 x double> %f, ptr %x 207 ret void 208} 209