1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING,RV32 3; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING,RV32 4; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING,RV32 5; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING,RV64 6; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING,RV64 7; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING,RV64 8; Check that the default value enables the web folding and 9; that it is bigger than 3. 10; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=FOLDING,RV32 11; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=FOLDING,RV64 12 13; Check that the scalable vector add/sub/mul operations are all promoted into their 14; vw counterpart when the folding of the web size is increased to 3. 15; We need the web size to be at least 3 for the folding to happen, because 16; %c has 3 uses. 17; see https://github.com/llvm/llvm-project/pull/72340 18 19define <vscale x 2 x i16> @vwop_vscale_sext_i8i16_multiple_users(ptr %x, ptr %y, ptr %z) { 20; NO_FOLDING-LABEL: vwop_vscale_sext_i8i16_multiple_users: 21; NO_FOLDING: # %bb.0: 22; NO_FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma 23; NO_FOLDING-NEXT: vle8.v v8, (a0) 24; NO_FOLDING-NEXT: vle8.v v9, (a1) 25; NO_FOLDING-NEXT: vle8.v v10, (a2) 26; NO_FOLDING-NEXT: vsext.vf2 v11, v8 27; NO_FOLDING-NEXT: vsext.vf2 v8, v9 28; NO_FOLDING-NEXT: vsext.vf2 v9, v10 29; NO_FOLDING-NEXT: vmul.vv v8, v11, v8 30; NO_FOLDING-NEXT: vadd.vv v10, v11, v9 31; NO_FOLDING-NEXT: vsub.vv v9, v11, v9 32; NO_FOLDING-NEXT: vor.vv v8, v8, v10 33; NO_FOLDING-NEXT: vor.vv v8, v8, v9 34; NO_FOLDING-NEXT: ret 35; 36; FOLDING-LABEL: vwop_vscale_sext_i8i16_multiple_users: 37; FOLDING: # %bb.0: 38; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma 39; FOLDING-NEXT: vle8.v v8, (a0) 40; FOLDING-NEXT: vle8.v v9, (a1) 41; FOLDING-NEXT: vle8.v v10, (a2) 42; FOLDING-NEXT: vwmul.vv v11, v8, v9 43; FOLDING-NEXT: vwadd.vv v9, v8, v10 44; FOLDING-NEXT: vwsub.vv v12, v8, v10 45; FOLDING-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 46; FOLDING-NEXT: vor.vv v8, v11, v9 47; FOLDING-NEXT: vor.vv v8, v8, v12 48; FOLDING-NEXT: ret 49 %a = load <vscale x 2 x i8>, ptr %x 50 %b = load <vscale x 2 x i8>, ptr %y 51 %b2 = load <vscale x 2 x i8>, ptr %z 52 %c = sext <vscale x 2 x i8> %a to <vscale x 2 x i16> 53 %d = sext <vscale x 2 x i8> %b to <vscale x 2 x i16> 54 %d2 = sext <vscale x 2 x i8> %b2 to <vscale x 2 x i16> 55 %e = mul <vscale x 2 x i16> %c, %d 56 %f = add <vscale x 2 x i16> %c, %d2 57 %g = sub <vscale x 2 x i16> %c, %d2 58 %h = or <vscale x 2 x i16> %e, %f 59 %i = or <vscale x 2 x i16> %h, %g 60 ret <vscale x 2 x i16> %i 61} 62 63define <vscale x 2 x i32> @vwop_vscale_sext_i16i32_multiple_users(ptr %x, ptr %y, ptr %z) { 64; NO_FOLDING-LABEL: vwop_vscale_sext_i16i32_multiple_users: 65; NO_FOLDING: # %bb.0: 66; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, ma 67; NO_FOLDING-NEXT: vle16.v v8, (a0) 68; NO_FOLDING-NEXT: vle16.v v9, (a1) 69; NO_FOLDING-NEXT: vle16.v v10, (a2) 70; NO_FOLDING-NEXT: vsext.vf2 v11, v8 71; NO_FOLDING-NEXT: vsext.vf2 v8, v9 72; NO_FOLDING-NEXT: vsext.vf2 v9, v10 73; NO_FOLDING-NEXT: vmul.vv v8, v11, v8 74; NO_FOLDING-NEXT: vadd.vv v10, v11, v9 75; NO_FOLDING-NEXT: vsub.vv v9, v11, v9 76; NO_FOLDING-NEXT: vor.vv v8, v8, v10 77; NO_FOLDING-NEXT: vor.vv v8, v8, v9 78; NO_FOLDING-NEXT: ret 79; 80; FOLDING-LABEL: vwop_vscale_sext_i16i32_multiple_users: 81; FOLDING: # %bb.0: 82; FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma 83; FOLDING-NEXT: vle16.v v8, (a0) 84; FOLDING-NEXT: vle16.v v9, (a1) 85; FOLDING-NEXT: vle16.v v10, (a2) 86; FOLDING-NEXT: vwmul.vv v11, v8, v9 87; FOLDING-NEXT: vwadd.vv v9, v8, v10 88; FOLDING-NEXT: vwsub.vv v12, v8, v10 89; FOLDING-NEXT: vsetvli zero, zero, e32, m1, ta, ma 90; FOLDING-NEXT: vor.vv v8, v11, v9 91; FOLDING-NEXT: vor.vv v8, v8, v12 92; FOLDING-NEXT: ret 93 %a = load <vscale x 2 x i16>, ptr %x 94 %b = load <vscale x 2 x i16>, ptr %y 95 %b2 = load <vscale x 2 x i16>, ptr %z 96 %c = sext <vscale x 2 x i16> %a to <vscale x 2 x i32> 97 %d = sext <vscale x 2 x i16> %b to <vscale x 2 x i32> 98 %d2 = sext <vscale x 2 x i16> %b2 to <vscale x 2 x i32> 99 %e = mul <vscale x 2 x i32> %c, %d 100 %f = add <vscale x 2 x i32> %c, %d2 101 %g = sub <vscale x 2 x i32> %c, %d2 102 %h = or <vscale x 2 x i32> %e, %f 103 %i = or <vscale x 2 x i32> %h, %g 104 ret <vscale x 2 x i32> %i 105} 106 107define <vscale x 2 x i64> @vwop_vscale_sext_i32i64_multiple_users(ptr %x, ptr %y, ptr %z) { 108; NO_FOLDING-LABEL: vwop_vscale_sext_i32i64_multiple_users: 109; NO_FOLDING: # %bb.0: 110; NO_FOLDING-NEXT: vl1re32.v v8, (a0) 111; NO_FOLDING-NEXT: vl1re32.v v9, (a1) 112; NO_FOLDING-NEXT: vl1re32.v v10, (a2) 113; NO_FOLDING-NEXT: vsetvli a0, zero, e64, m2, ta, ma 114; NO_FOLDING-NEXT: vsext.vf2 v12, v8 115; NO_FOLDING-NEXT: vsext.vf2 v14, v9 116; NO_FOLDING-NEXT: vsext.vf2 v8, v10 117; NO_FOLDING-NEXT: vmul.vv v10, v12, v14 118; NO_FOLDING-NEXT: vadd.vv v14, v12, v8 119; NO_FOLDING-NEXT: vsub.vv v8, v12, v8 120; NO_FOLDING-NEXT: vor.vv v10, v10, v14 121; NO_FOLDING-NEXT: vor.vv v8, v10, v8 122; NO_FOLDING-NEXT: ret 123; 124; FOLDING-LABEL: vwop_vscale_sext_i32i64_multiple_users: 125; FOLDING: # %bb.0: 126; FOLDING-NEXT: vl1re32.v v8, (a0) 127; FOLDING-NEXT: vl1re32.v v9, (a1) 128; FOLDING-NEXT: vl1re32.v v10, (a2) 129; FOLDING-NEXT: vsetvli a0, zero, e32, m1, ta, ma 130; FOLDING-NEXT: vwmul.vv v12, v8, v9 131; FOLDING-NEXT: vwadd.vv v14, v8, v10 132; FOLDING-NEXT: vwsub.vv v16, v8, v10 133; FOLDING-NEXT: vsetvli zero, zero, e64, m2, ta, ma 134; FOLDING-NEXT: vor.vv v8, v12, v14 135; FOLDING-NEXT: vor.vv v8, v8, v16 136; FOLDING-NEXT: ret 137 %a = load <vscale x 2 x i32>, ptr %x 138 %b = load <vscale x 2 x i32>, ptr %y 139 %b2 = load <vscale x 2 x i32>, ptr %z 140 %c = sext <vscale x 2 x i32> %a to <vscale x 2 x i64> 141 %d = sext <vscale x 2 x i32> %b to <vscale x 2 x i64> 142 %d2 = sext <vscale x 2 x i32> %b2 to <vscale x 2 x i64> 143 %e = mul <vscale x 2 x i64> %c, %d 144 %f = add <vscale x 2 x i64> %c, %d2 145 %g = sub <vscale x 2 x i64> %c, %d2 146 %h = or <vscale x 2 x i64> %e, %f 147 %i = or <vscale x 2 x i64> %h, %g 148 ret <vscale x 2 x i64> %i 149} 150 151define <vscale x 2 x i32> @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y, ptr %z) { 152; NO_FOLDING-LABEL: vwop_vscale_sext_i1i32_multiple_users: 153; NO_FOLDING: # %bb.0: 154; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu 155; NO_FOLDING-NEXT: vlm.v v8, (a0) 156; NO_FOLDING-NEXT: vlm.v v9, (a1) 157; NO_FOLDING-NEXT: vlm.v v10, (a2) 158; NO_FOLDING-NEXT: vmv.v.i v11, 0 159; NO_FOLDING-NEXT: li a0, 1 160; NO_FOLDING-NEXT: vmv.v.v v0, v8 161; NO_FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 162; NO_FOLDING-NEXT: vmv.v.v v0, v9 163; NO_FOLDING-NEXT: vmerge.vim v9, v11, -1, v0 164; NO_FOLDING-NEXT: vmv.v.v v0, v10 165; NO_FOLDING-NEXT: vmerge.vim v10, v11, -1, v0 166; NO_FOLDING-NEXT: vmul.vv v9, v12, v9 167; NO_FOLDING-NEXT: vsub.vv v11, v12, v10 168; NO_FOLDING-NEXT: vmv.v.v v0, v8 169; NO_FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t 170; NO_FOLDING-NEXT: vor.vv v8, v9, v10 171; NO_FOLDING-NEXT: vor.vv v8, v8, v11 172; NO_FOLDING-NEXT: ret 173; 174; FOLDING-LABEL: vwop_vscale_sext_i1i32_multiple_users: 175; FOLDING: # %bb.0: 176; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu 177; FOLDING-NEXT: vlm.v v8, (a0) 178; FOLDING-NEXT: vlm.v v9, (a1) 179; FOLDING-NEXT: vlm.v v10, (a2) 180; FOLDING-NEXT: vmv.v.i v11, 0 181; FOLDING-NEXT: li a0, 1 182; FOLDING-NEXT: vmv.v.v v0, v8 183; FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 184; FOLDING-NEXT: vmv.v.v v0, v9 185; FOLDING-NEXT: vmerge.vim v9, v11, -1, v0 186; FOLDING-NEXT: vmv.v.v v0, v10 187; FOLDING-NEXT: vmerge.vim v10, v11, -1, v0 188; FOLDING-NEXT: vmul.vv v9, v12, v9 189; FOLDING-NEXT: vsub.vv v11, v12, v10 190; FOLDING-NEXT: vmv.v.v v0, v8 191; FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t 192; FOLDING-NEXT: vor.vv v8, v9, v10 193; FOLDING-NEXT: vor.vv v8, v8, v11 194; FOLDING-NEXT: ret 195 %a = load <vscale x 2 x i1>, ptr %x 196 %b = load <vscale x 2 x i1>, ptr %y 197 %b2 = load <vscale x 2 x i1>, ptr %z 198 %c = sext <vscale x 2 x i1> %a to <vscale x 2 x i32> 199 %d = sext <vscale x 2 x i1> %b to <vscale x 2 x i32> 200 %d2 = sext <vscale x 2 x i1> %b2 to <vscale x 2 x i32> 201 %e = mul <vscale x 2 x i32> %c, %d 202 %f = add <vscale x 2 x i32> %c, %d2 203 %g = sub <vscale x 2 x i32> %c, %d2 204 %h = or <vscale x 2 x i32> %e, %f 205 %i = or <vscale x 2 x i32> %h, %g 206 ret <vscale x 2 x i32> %i 207} 208 209define <vscale x 2 x i8> @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, ptr %z) { 210; NO_FOLDING-LABEL: vwop_vscale_sext_i1i8_multiple_users: 211; NO_FOLDING: # %bb.0: 212; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu 213; NO_FOLDING-NEXT: vlm.v v8, (a0) 214; NO_FOLDING-NEXT: vlm.v v9, (a1) 215; NO_FOLDING-NEXT: vlm.v v10, (a2) 216; NO_FOLDING-NEXT: vmv.v.i v11, 0 217; NO_FOLDING-NEXT: li a0, 1 218; NO_FOLDING-NEXT: vmv1r.v v0, v8 219; NO_FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 220; NO_FOLDING-NEXT: vmv1r.v v0, v9 221; NO_FOLDING-NEXT: vmerge.vim v9, v11, -1, v0 222; NO_FOLDING-NEXT: vmv1r.v v0, v10 223; NO_FOLDING-NEXT: vmerge.vim v10, v11, -1, v0 224; NO_FOLDING-NEXT: vmul.vv v9, v12, v9 225; NO_FOLDING-NEXT: vsub.vv v11, v12, v10 226; NO_FOLDING-NEXT: vmv1r.v v0, v8 227; NO_FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t 228; NO_FOLDING-NEXT: vor.vv v8, v9, v10 229; NO_FOLDING-NEXT: vor.vv v8, v8, v11 230; NO_FOLDING-NEXT: ret 231; 232; FOLDING-LABEL: vwop_vscale_sext_i1i8_multiple_users: 233; FOLDING: # %bb.0: 234; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu 235; FOLDING-NEXT: vlm.v v8, (a0) 236; FOLDING-NEXT: vlm.v v9, (a1) 237; FOLDING-NEXT: vlm.v v10, (a2) 238; FOLDING-NEXT: vmv.v.i v11, 0 239; FOLDING-NEXT: li a0, 1 240; FOLDING-NEXT: vmv1r.v v0, v8 241; FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 242; FOLDING-NEXT: vmv1r.v v0, v9 243; FOLDING-NEXT: vmerge.vim v9, v11, -1, v0 244; FOLDING-NEXT: vmv1r.v v0, v10 245; FOLDING-NEXT: vmerge.vim v10, v11, -1, v0 246; FOLDING-NEXT: vmul.vv v9, v12, v9 247; FOLDING-NEXT: vsub.vv v11, v12, v10 248; FOLDING-NEXT: vmv1r.v v0, v8 249; FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t 250; FOLDING-NEXT: vor.vv v8, v9, v10 251; FOLDING-NEXT: vor.vv v8, v8, v11 252; FOLDING-NEXT: ret 253 %a = load <vscale x 2 x i1>, ptr %x 254 %b = load <vscale x 2 x i1>, ptr %y 255 %b2 = load <vscale x 2 x i1>, ptr %z 256 %c = sext <vscale x 2 x i1> %a to <vscale x 2 x i8> 257 %d = sext <vscale x 2 x i1> %b to <vscale x 2 x i8> 258 %d2 = sext <vscale x 2 x i1> %b2 to <vscale x 2 x i8> 259 %e = mul <vscale x 2 x i8> %c, %d 260 %f = add <vscale x 2 x i8> %c, %d2 261 %g = sub <vscale x 2 x i8> %c, %d2 262 %h = or <vscale x 2 x i8> %e, %f 263 %i = or <vscale x 2 x i8> %h, %g 264 ret <vscale x 2 x i8> %i 265} 266 267define <vscale x 2 x i32> @vwop_vscale_sext_i8i32_multiple_users(ptr %x, ptr %y, ptr %z) { 268; NO_FOLDING-LABEL: vwop_vscale_sext_i8i32_multiple_users: 269; NO_FOLDING: # %bb.0: 270; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, ma 271; NO_FOLDING-NEXT: vle8.v v8, (a0) 272; NO_FOLDING-NEXT: vle8.v v9, (a1) 273; NO_FOLDING-NEXT: vle8.v v10, (a2) 274; NO_FOLDING-NEXT: vsext.vf4 v11, v8 275; NO_FOLDING-NEXT: vsext.vf4 v8, v9 276; NO_FOLDING-NEXT: vsext.vf4 v9, v10 277; NO_FOLDING-NEXT: vmul.vv v8, v11, v8 278; NO_FOLDING-NEXT: vadd.vv v10, v11, v9 279; NO_FOLDING-NEXT: vsub.vv v9, v11, v9 280; NO_FOLDING-NEXT: vor.vv v8, v8, v10 281; NO_FOLDING-NEXT: vor.vv v8, v8, v9 282; NO_FOLDING-NEXT: ret 283; 284; FOLDING-LABEL: vwop_vscale_sext_i8i32_multiple_users: 285; FOLDING: # %bb.0: 286; FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma 287; FOLDING-NEXT: vle8.v v8, (a0) 288; FOLDING-NEXT: vle8.v v9, (a1) 289; FOLDING-NEXT: vle8.v v10, (a2) 290; FOLDING-NEXT: vsext.vf2 v11, v8 291; FOLDING-NEXT: vsext.vf2 v8, v9 292; FOLDING-NEXT: vsext.vf2 v9, v10 293; FOLDING-NEXT: vwmul.vv v10, v11, v8 294; FOLDING-NEXT: vwadd.vv v8, v11, v9 295; FOLDING-NEXT: vwsub.vv v12, v11, v9 296; FOLDING-NEXT: vsetvli zero, zero, e32, m1, ta, ma 297; FOLDING-NEXT: vor.vv v8, v10, v8 298; FOLDING-NEXT: vor.vv v8, v8, v12 299; FOLDING-NEXT: ret 300 %a = load <vscale x 2 x i8>, ptr %x 301 %b = load <vscale x 2 x i8>, ptr %y 302 %b2 = load <vscale x 2 x i8>, ptr %z 303 %c = sext <vscale x 2 x i8> %a to <vscale x 2 x i32> 304 %d = sext <vscale x 2 x i8> %b to <vscale x 2 x i32> 305 %d2 = sext <vscale x 2 x i8> %b2 to <vscale x 2 x i32> 306 %e = mul <vscale x 2 x i32> %c, %d 307 %f = add <vscale x 2 x i32> %c, %d2 308 %g = sub <vscale x 2 x i32> %c, %d2 309 %h = or <vscale x 2 x i32> %e, %f 310 %i = or <vscale x 2 x i32> %h, %g 311 ret <vscale x 2 x i32> %i 312} 313 314define <vscale x 2 x i16> @vwop_vscale_zext_i8i16_multiple_users(ptr %x, ptr %y, ptr %z) { 315; NO_FOLDING-LABEL: vwop_vscale_zext_i8i16_multiple_users: 316; NO_FOLDING: # %bb.0: 317; NO_FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma 318; NO_FOLDING-NEXT: vle8.v v8, (a0) 319; NO_FOLDING-NEXT: vle8.v v9, (a1) 320; NO_FOLDING-NEXT: vle8.v v10, (a2) 321; NO_FOLDING-NEXT: vzext.vf2 v11, v8 322; NO_FOLDING-NEXT: vzext.vf2 v8, v9 323; NO_FOLDING-NEXT: vzext.vf2 v9, v10 324; NO_FOLDING-NEXT: vmul.vv v8, v11, v8 325; NO_FOLDING-NEXT: vadd.vv v10, v11, v9 326; NO_FOLDING-NEXT: vsub.vv v9, v11, v9 327; NO_FOLDING-NEXT: vor.vv v8, v8, v10 328; NO_FOLDING-NEXT: vor.vv v8, v8, v9 329; NO_FOLDING-NEXT: ret 330; 331; FOLDING-LABEL: vwop_vscale_zext_i8i16_multiple_users: 332; FOLDING: # %bb.0: 333; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma 334; FOLDING-NEXT: vle8.v v8, (a0) 335; FOLDING-NEXT: vle8.v v9, (a1) 336; FOLDING-NEXT: vle8.v v10, (a2) 337; FOLDING-NEXT: vwmulu.vv v11, v8, v9 338; FOLDING-NEXT: vwaddu.vv v9, v8, v10 339; FOLDING-NEXT: vwsubu.vv v12, v8, v10 340; FOLDING-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 341; FOLDING-NEXT: vor.vv v8, v11, v9 342; FOLDING-NEXT: vor.vv v8, v8, v12 343; FOLDING-NEXT: ret 344 %a = load <vscale x 2 x i8>, ptr %x 345 %b = load <vscale x 2 x i8>, ptr %y 346 %b2 = load <vscale x 2 x i8>, ptr %z 347 %c = zext <vscale x 2 x i8> %a to <vscale x 2 x i16> 348 %d = zext <vscale x 2 x i8> %b to <vscale x 2 x i16> 349 %d2 = zext <vscale x 2 x i8> %b2 to <vscale x 2 x i16> 350 %e = mul <vscale x 2 x i16> %c, %d 351 %f = add <vscale x 2 x i16> %c, %d2 352 %g = sub <vscale x 2 x i16> %c, %d2 353 %h = or <vscale x 2 x i16> %e, %f 354 %i = or <vscale x 2 x i16> %h, %g 355 ret <vscale x 2 x i16> %i 356} 357 358define <vscale x 2 x i32> @vwop_vscale_zext_i16i32_multiple_users(ptr %x, ptr %y, ptr %z) { 359; NO_FOLDING-LABEL: vwop_vscale_zext_i16i32_multiple_users: 360; NO_FOLDING: # %bb.0: 361; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, ma 362; NO_FOLDING-NEXT: vle16.v v8, (a0) 363; NO_FOLDING-NEXT: vle16.v v9, (a1) 364; NO_FOLDING-NEXT: vle16.v v10, (a2) 365; NO_FOLDING-NEXT: vzext.vf2 v11, v8 366; NO_FOLDING-NEXT: vzext.vf2 v8, v9 367; NO_FOLDING-NEXT: vzext.vf2 v9, v10 368; NO_FOLDING-NEXT: vmul.vv v8, v11, v8 369; NO_FOLDING-NEXT: vadd.vv v10, v11, v9 370; NO_FOLDING-NEXT: vsub.vv v9, v11, v9 371; NO_FOLDING-NEXT: vor.vv v8, v8, v10 372; NO_FOLDING-NEXT: vor.vv v8, v8, v9 373; NO_FOLDING-NEXT: ret 374; 375; FOLDING-LABEL: vwop_vscale_zext_i16i32_multiple_users: 376; FOLDING: # %bb.0: 377; FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma 378; FOLDING-NEXT: vle16.v v8, (a0) 379; FOLDING-NEXT: vle16.v v9, (a1) 380; FOLDING-NEXT: vle16.v v10, (a2) 381; FOLDING-NEXT: vwmulu.vv v11, v8, v9 382; FOLDING-NEXT: vwaddu.vv v9, v8, v10 383; FOLDING-NEXT: vwsubu.vv v12, v8, v10 384; FOLDING-NEXT: vsetvli zero, zero, e32, m1, ta, ma 385; FOLDING-NEXT: vor.vv v8, v11, v9 386; FOLDING-NEXT: vor.vv v8, v8, v12 387; FOLDING-NEXT: ret 388 %a = load <vscale x 2 x i16>, ptr %x 389 %b = load <vscale x 2 x i16>, ptr %y 390 %b2 = load <vscale x 2 x i16>, ptr %z 391 %c = zext <vscale x 2 x i16> %a to <vscale x 2 x i32> 392 %d = zext <vscale x 2 x i16> %b to <vscale x 2 x i32> 393 %d2 = zext <vscale x 2 x i16> %b2 to <vscale x 2 x i32> 394 %e = mul <vscale x 2 x i32> %c, %d 395 %f = add <vscale x 2 x i32> %c, %d2 396 %g = sub <vscale x 2 x i32> %c, %d2 397 %h = or <vscale x 2 x i32> %e, %f 398 %i = or <vscale x 2 x i32> %h, %g 399 ret <vscale x 2 x i32> %i 400} 401 402define <vscale x 2 x i64> @vwop_vscale_zext_i32i64_multiple_users(ptr %x, ptr %y, ptr %z) { 403; NO_FOLDING-LABEL: vwop_vscale_zext_i32i64_multiple_users: 404; NO_FOLDING: # %bb.0: 405; NO_FOLDING-NEXT: vl1re32.v v8, (a0) 406; NO_FOLDING-NEXT: vl1re32.v v9, (a1) 407; NO_FOLDING-NEXT: vl1re32.v v10, (a2) 408; NO_FOLDING-NEXT: vsetvli a0, zero, e64, m2, ta, ma 409; NO_FOLDING-NEXT: vzext.vf2 v12, v8 410; NO_FOLDING-NEXT: vzext.vf2 v14, v9 411; NO_FOLDING-NEXT: vzext.vf2 v8, v10 412; NO_FOLDING-NEXT: vmul.vv v10, v12, v14 413; NO_FOLDING-NEXT: vadd.vv v14, v12, v8 414; NO_FOLDING-NEXT: vsub.vv v8, v12, v8 415; NO_FOLDING-NEXT: vor.vv v10, v10, v14 416; NO_FOLDING-NEXT: vor.vv v8, v10, v8 417; NO_FOLDING-NEXT: ret 418; 419; FOLDING-LABEL: vwop_vscale_zext_i32i64_multiple_users: 420; FOLDING: # %bb.0: 421; FOLDING-NEXT: vl1re32.v v8, (a0) 422; FOLDING-NEXT: vl1re32.v v9, (a1) 423; FOLDING-NEXT: vl1re32.v v10, (a2) 424; FOLDING-NEXT: vsetvli a0, zero, e32, m1, ta, ma 425; FOLDING-NEXT: vwmulu.vv v12, v8, v9 426; FOLDING-NEXT: vwaddu.vv v14, v8, v10 427; FOLDING-NEXT: vwsubu.vv v16, v8, v10 428; FOLDING-NEXT: vsetvli zero, zero, e64, m2, ta, ma 429; FOLDING-NEXT: vor.vv v8, v12, v14 430; FOLDING-NEXT: vor.vv v8, v8, v16 431; FOLDING-NEXT: ret 432 %a = load <vscale x 2 x i32>, ptr %x 433 %b = load <vscale x 2 x i32>, ptr %y 434 %b2 = load <vscale x 2 x i32>, ptr %z 435 %c = zext <vscale x 2 x i32> %a to <vscale x 2 x i64> 436 %d = zext <vscale x 2 x i32> %b to <vscale x 2 x i64> 437 %d2 = zext <vscale x 2 x i32> %b2 to <vscale x 2 x i64> 438 %e = mul <vscale x 2 x i64> %c, %d 439 %f = add <vscale x 2 x i64> %c, %d2 440 %g = sub <vscale x 2 x i64> %c, %d2 441 %h = or <vscale x 2 x i64> %e, %f 442 %i = or <vscale x 2 x i64> %h, %g 443 ret <vscale x 2 x i64> %i 444} 445 446define <vscale x 2 x i32> @vwop_vscale_zext_i1i32_multiple_users(ptr %x, ptr %y, ptr %z) { 447; NO_FOLDING-LABEL: vwop_vscale_zext_i1i32_multiple_users: 448; NO_FOLDING: # %bb.0: 449; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu 450; NO_FOLDING-NEXT: vlm.v v0, (a0) 451; NO_FOLDING-NEXT: vlm.v v8, (a2) 452; NO_FOLDING-NEXT: vlm.v v9, (a1) 453; NO_FOLDING-NEXT: vmv.v.i v10, 0 454; NO_FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 455; NO_FOLDING-NEXT: vmv.v.v v0, v8 456; NO_FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 457; NO_FOLDING-NEXT: vadd.vv v10, v11, v8 458; NO_FOLDING-NEXT: vsub.vv v8, v11, v8 459; NO_FOLDING-NEXT: vmv.v.v v0, v9 460; NO_FOLDING-NEXT: vor.vv v10, v10, v11, v0.t 461; NO_FOLDING-NEXT: vor.vv v8, v10, v8 462; NO_FOLDING-NEXT: ret 463; 464; FOLDING-LABEL: vwop_vscale_zext_i1i32_multiple_users: 465; FOLDING: # %bb.0: 466; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu 467; FOLDING-NEXT: vlm.v v0, (a0) 468; FOLDING-NEXT: vlm.v v8, (a2) 469; FOLDING-NEXT: vlm.v v9, (a1) 470; FOLDING-NEXT: vmv.v.i v10, 0 471; FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 472; FOLDING-NEXT: vmv.v.v v0, v8 473; FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 474; FOLDING-NEXT: vadd.vv v10, v11, v8 475; FOLDING-NEXT: vsub.vv v8, v11, v8 476; FOLDING-NEXT: vmv.v.v v0, v9 477; FOLDING-NEXT: vor.vv v10, v10, v11, v0.t 478; FOLDING-NEXT: vor.vv v8, v10, v8 479; FOLDING-NEXT: ret 480 %a = load <vscale x 2 x i1>, ptr %x 481 %b = load <vscale x 2 x i1>, ptr %y 482 %b2 = load <vscale x 2 x i1>, ptr %z 483 %c = zext <vscale x 2 x i1> %a to <vscale x 2 x i32> 484 %d = zext <vscale x 2 x i1> %b to <vscale x 2 x i32> 485 %d2 = zext <vscale x 2 x i1> %b2 to <vscale x 2 x i32> 486 %e = mul <vscale x 2 x i32> %c, %d 487 %f = add <vscale x 2 x i32> %c, %d2 488 %g = sub <vscale x 2 x i32> %c, %d2 489 %h = or <vscale x 2 x i32> %e, %f 490 %i = or <vscale x 2 x i32> %h, %g 491 ret <vscale x 2 x i32> %i 492} 493 494define <vscale x 2 x i8> @vwop_vscale_zext_i1i8_multiple_users(ptr %x, ptr %y, ptr %z) { 495; NO_FOLDING-LABEL: vwop_vscale_zext_i1i8_multiple_users: 496; NO_FOLDING: # %bb.0: 497; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu 498; NO_FOLDING-NEXT: vlm.v v0, (a0) 499; NO_FOLDING-NEXT: vlm.v v8, (a2) 500; NO_FOLDING-NEXT: vlm.v v9, (a1) 501; NO_FOLDING-NEXT: vmv.v.i v10, 0 502; NO_FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 503; NO_FOLDING-NEXT: vmv1r.v v0, v8 504; NO_FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 505; NO_FOLDING-NEXT: vadd.vv v10, v11, v8 506; NO_FOLDING-NEXT: vsub.vv v8, v11, v8 507; NO_FOLDING-NEXT: vmv1r.v v0, v9 508; NO_FOLDING-NEXT: vor.vv v10, v10, v11, v0.t 509; NO_FOLDING-NEXT: vor.vv v8, v10, v8 510; NO_FOLDING-NEXT: ret 511; 512; FOLDING-LABEL: vwop_vscale_zext_i1i8_multiple_users: 513; FOLDING: # %bb.0: 514; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu 515; FOLDING-NEXT: vlm.v v0, (a0) 516; FOLDING-NEXT: vlm.v v8, (a2) 517; FOLDING-NEXT: vlm.v v9, (a1) 518; FOLDING-NEXT: vmv.v.i v10, 0 519; FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 520; FOLDING-NEXT: vmv1r.v v0, v8 521; FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 522; FOLDING-NEXT: vadd.vv v10, v11, v8 523; FOLDING-NEXT: vsub.vv v8, v11, v8 524; FOLDING-NEXT: vmv1r.v v0, v9 525; FOLDING-NEXT: vor.vv v10, v10, v11, v0.t 526; FOLDING-NEXT: vor.vv v8, v10, v8 527; FOLDING-NEXT: ret 528 %a = load <vscale x 2 x i1>, ptr %x 529 %b = load <vscale x 2 x i1>, ptr %y 530 %b2 = load <vscale x 2 x i1>, ptr %z 531 %c = zext <vscale x 2 x i1> %a to <vscale x 2 x i8> 532 %d = zext <vscale x 2 x i1> %b to <vscale x 2 x i8> 533 %d2 = zext <vscale x 2 x i1> %b2 to <vscale x 2 x i8> 534 %e = mul <vscale x 2 x i8> %c, %d 535 %f = add <vscale x 2 x i8> %c, %d2 536 %g = sub <vscale x 2 x i8> %c, %d2 537 %h = or <vscale x 2 x i8> %e, %f 538 %i = or <vscale x 2 x i8> %h, %g 539 ret <vscale x 2 x i8> %i 540} 541 542define <vscale x 2 x i32> @vwop_vscale_zext_i8i32_multiple_users(ptr %x, ptr %y, ptr %z) { 543; NO_FOLDING-LABEL: vwop_vscale_zext_i8i32_multiple_users: 544; NO_FOLDING: # %bb.0: 545; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, ma 546; NO_FOLDING-NEXT: vle8.v v8, (a0) 547; NO_FOLDING-NEXT: vle8.v v9, (a1) 548; NO_FOLDING-NEXT: vle8.v v10, (a2) 549; NO_FOLDING-NEXT: vzext.vf4 v11, v8 550; NO_FOLDING-NEXT: vzext.vf4 v8, v9 551; NO_FOLDING-NEXT: vzext.vf4 v9, v10 552; NO_FOLDING-NEXT: vmul.vv v8, v11, v8 553; NO_FOLDING-NEXT: vadd.vv v10, v11, v9 554; NO_FOLDING-NEXT: vsub.vv v9, v11, v9 555; NO_FOLDING-NEXT: vor.vv v8, v8, v10 556; NO_FOLDING-NEXT: vor.vv v8, v8, v9 557; NO_FOLDING-NEXT: ret 558; 559; FOLDING-LABEL: vwop_vscale_zext_i8i32_multiple_users: 560; FOLDING: # %bb.0: 561; FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma 562; FOLDING-NEXT: vle8.v v8, (a0) 563; FOLDING-NEXT: vle8.v v9, (a1) 564; FOLDING-NEXT: vle8.v v10, (a2) 565; FOLDING-NEXT: vzext.vf2 v11, v8 566; FOLDING-NEXT: vzext.vf2 v8, v9 567; FOLDING-NEXT: vzext.vf2 v9, v10 568; FOLDING-NEXT: vwmulu.vv v10, v11, v8 569; FOLDING-NEXT: vwaddu.vv v8, v11, v9 570; FOLDING-NEXT: vwsubu.vv v12, v11, v9 571; FOLDING-NEXT: vsetvli zero, zero, e32, m1, ta, ma 572; FOLDING-NEXT: vor.vv v8, v10, v8 573; FOLDING-NEXT: vor.vv v8, v8, v12 574; FOLDING-NEXT: ret 575 %a = load <vscale x 2 x i8>, ptr %x 576 %b = load <vscale x 2 x i8>, ptr %y 577 %b2 = load <vscale x 2 x i8>, ptr %z 578 %c = zext <vscale x 2 x i8> %a to <vscale x 2 x i32> 579 %d = zext <vscale x 2 x i8> %b to <vscale x 2 x i32> 580 %d2 = zext <vscale x 2 x i8> %b2 to <vscale x 2 x i32> 581 %e = mul <vscale x 2 x i32> %c, %d 582 %f = add <vscale x 2 x i32> %c, %d2 583 %g = sub <vscale x 2 x i32> %c, %d2 584 %h = or <vscale x 2 x i32> %e, %f 585 %i = or <vscale x 2 x i32> %h, %g 586 ret <vscale x 2 x i32> %i 587} 588 589 590 591;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 592; RV32: {{.*}} 593; RV64: {{.*}} 594