1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ 3; RUN: -riscv-enable-vl-optimizer=false | FileCheck %s -check-prefixes=CHECK,NOVLOPT 4; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ 5; RUN: -riscv-enable-vl-optimizer=false | FileCheck %s -check-prefixes=CHECK,NOVLOPT 6; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -riscv-enable-vl-optimizer \ 7; RUN: -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,VLOPT 8; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v -riscv-enable-vl-optimizer \ 9; RUN: -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,VLOPT 10 11declare <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, iXLen) 12 13define <vscale x 4 x i32> @different_imm_vl_with_ta(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) { 14; NOVLOPT-LABEL: different_imm_vl_with_ta: 15; NOVLOPT: # %bb.0: 16; NOVLOPT-NEXT: vsetivli zero, 5, e32, m2, ta, ma 17; NOVLOPT-NEXT: vadd.vv v8, v10, v12 18; NOVLOPT-NEXT: vsetivli zero, 4, e32, m2, ta, ma 19; NOVLOPT-NEXT: vadd.vv v8, v8, v10 20; NOVLOPT-NEXT: ret 21; 22; VLOPT-LABEL: different_imm_vl_with_ta: 23; VLOPT: # %bb.0: 24; VLOPT-NEXT: vsetivli zero, 4, e32, m2, ta, ma 25; VLOPT-NEXT: vadd.vv v8, v10, v12 26; VLOPT-NEXT: vadd.vv v8, v8, v10 27; VLOPT-NEXT: ret 28 %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 5) 29 %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen 4) 30 ret <vscale x 4 x i32> %w 31} 32 33define <vscale x 4 x i32> @vlmax_and_imm_vl_with_ta(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) { 34; NOVLOPT-LABEL: vlmax_and_imm_vl_with_ta: 35; NOVLOPT: # %bb.0: 36; NOVLOPT-NEXT: vsetvli a0, zero, e32, m2, ta, ma 37; NOVLOPT-NEXT: vadd.vv v8, v10, v12 38; NOVLOPT-NEXT: vsetivli zero, 4, e32, m2, ta, ma 39; NOVLOPT-NEXT: vadd.vv v8, v8, v10 40; NOVLOPT-NEXT: ret 41; 42; VLOPT-LABEL: vlmax_and_imm_vl_with_ta: 43; VLOPT: # %bb.0: 44; VLOPT-NEXT: vsetivli zero, 4, e32, m2, ta, ma 45; VLOPT-NEXT: vadd.vv v8, v10, v12 46; VLOPT-NEXT: vadd.vv v8, v8, v10 47; VLOPT-NEXT: ret 48 %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1) 49 %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen 4) 50 ret <vscale x 4 x i32> %w 51} 52 53; Not beneficial to propagate VL since VL is larger in the use side. 54define <vscale x 4 x i32> @different_imm_vl_with_ta_larger_vl(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) { 55; CHECK-LABEL: different_imm_vl_with_ta_larger_vl: 56; CHECK: # %bb.0: 57; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma 58; CHECK-NEXT: vadd.vv v8, v10, v12 59; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma 60; CHECK-NEXT: vadd.vv v8, v8, v10 61; CHECK-NEXT: ret 62 %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 4) 63 %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen 5) 64 ret <vscale x 4 x i32> %w 65} 66 67define <vscale x 4 x i32> @different_imm_reg_vl_with_ta(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) { 68; CHECK-LABEL: different_imm_reg_vl_with_ta: 69; CHECK: # %bb.0: 70; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma 71; CHECK-NEXT: vadd.vv v8, v10, v12 72; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 73; CHECK-NEXT: vadd.vv v8, v8, v10 74; CHECK-NEXT: ret 75 %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 4) 76 %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen %vl1) 77 ret <vscale x 4 x i32> %w 78} 79 80; Not beneficial to propagate VL since VL is already one. 81define <vscale x 4 x i32> @different_imm_vl_with_ta_1(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) { 82; CHECK-LABEL: different_imm_vl_with_ta_1: 83; CHECK: # %bb.0: 84; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma 85; CHECK-NEXT: vadd.vv v8, v10, v12 86; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 87; CHECK-NEXT: vadd.vv v8, v8, v10 88; CHECK-NEXT: ret 89 %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 1) 90 %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen %vl1) 91 ret <vscale x 4 x i32> %w 92} 93 94; Propgate %vl2 to last instruction since it is may smaller than %vl1, 95; it's still safe even %vl2 is larger than %vl1, becuase rest of the vector are 96; undefined value. 97define <vscale x 4 x i32> @different_vl_with_ta(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) { 98; CHECK-LABEL: different_vl_with_ta: 99; CHECK: # %bb.0: 100; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 101; CHECK-NEXT: vadd.vv v10, v8, v10 102; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma 103; CHECK-NEXT: vadd.vv v8, v10, v8 104; CHECK-NEXT: ret 105 %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1) 106 %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a,iXLen %vl2) 107 ret <vscale x 4 x i32> %w 108} 109 110; We can propagate VL to a tail-undisturbed policy, provided none of its users 111; are passthrus (i.e. read past VL). 112define <vscale x 4 x i32> @different_vl_with_tu(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) { 113; CHECK-LABEL: different_vl_with_tu: 114; CHECK: # %bb.0: 115; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma 116; CHECK-NEXT: vmv2r.v v14, v10 117; CHECK-NEXT: vadd.vv v14, v10, v12 118; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma 119; CHECK-NEXT: vadd.vv v8, v14, v10 120; CHECK-NEXT: ret 121 %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1) 122 %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen %vl2) 123 ret <vscale x 4 x i32> %w 124} 125 126; We can propagate VL to a tail-undisturbed policy, provided none of its users 127; are passthrus (i.e. read past VL). 128define <vscale x 4 x i32> @different_imm_vl_with_tu(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) { 129; NOVLOPT-LABEL: different_imm_vl_with_tu: 130; NOVLOPT: # %bb.0: 131; NOVLOPT-NEXT: vsetivli zero, 5, e32, m2, tu, ma 132; NOVLOPT-NEXT: vmv2r.v v14, v10 133; NOVLOPT-NEXT: vadd.vv v14, v10, v12 134; NOVLOPT-NEXT: vsetivli zero, 4, e32, m2, tu, ma 135; NOVLOPT-NEXT: vadd.vv v8, v14, v10 136; NOVLOPT-NEXT: ret 137; 138; VLOPT-LABEL: different_imm_vl_with_tu: 139; VLOPT: # %bb.0: 140; VLOPT-NEXT: vsetivli zero, 4, e32, m2, tu, ma 141; VLOPT-NEXT: vmv2r.v v14, v10 142; VLOPT-NEXT: vadd.vv v14, v10, v12 143; VLOPT-NEXT: vadd.vv v8, v14, v10 144; VLOPT-NEXT: ret 145 %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 5) 146 %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen 4) 147 ret <vscale x 4 x i32> %w 148} 149 150; We can't reduce the VL as %v is used as a passthru, i.e. the elements past VL 151; are demanded. 152define <vscale x 4 x i32> @different_vl_as_passthru(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) { 153; CHECK-LABEL: different_vl_as_passthru: 154; CHECK: # %bb.0: 155; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma 156; CHECK-NEXT: vmv2r.v v12, v8 157; CHECK-NEXT: vadd.vv v12, v8, v10 158; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma 159; CHECK-NEXT: vadd.vv v12, v8, v10 160; CHECK-NEXT: vmv2r.v v8, v12 161; CHECK-NEXT: ret 162 %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1) 163 %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %v, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl2) 164 ret <vscale x 4 x i32> %w 165} 166 167; We can't reduce the VL as %v is used as a passthru, i.e. the elements past VL 168; are demanded. 169define <vscale x 4 x i32> @different_imm_vl_as_passthru(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) { 170; CHECK-LABEL: different_imm_vl_as_passthru: 171; CHECK: # %bb.0: 172; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma 173; CHECK-NEXT: vmv2r.v v12, v8 174; CHECK-NEXT: vadd.vv v12, v8, v10 175; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma 176; CHECK-NEXT: vadd.vv v12, v8, v10 177; CHECK-NEXT: vmv2r.v v8, v12 178; CHECK-NEXT: ret 179 %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 5) 180 %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %v, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 4) 181 ret <vscale x 4 x i32> %w 182} 183 184define <vscale x 4 x i32> @dont_optimize_tied_def(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) { 185; CHECK-LABEL: dont_optimize_tied_def: 186; CHECK: # %bb.0: 187; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma 188; CHECK-NEXT: vwmacc.vv v8, v10, v11 189; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma 190; CHECK-NEXT: vwmacc.vv v8, v10, v11 191; CHECK-NEXT: ret 192 %1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0) 193 %2 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16(<vscale x 4 x i32> %1, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl, iXLen 0) 194 ret <vscale x 4 x i32> %2 195} 196 197