1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvl512b -verify-machineinstrs \ 3; RUN: -riscv-enable-vl-optimizer=false | FileCheck %s -check-prefixes=CHECK,NOVLOPT 4; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvl512b -verify-machineinstrs \ 5; RUN: -riscv-enable-vl-optimizer=false | FileCheck %s -check-prefixes=CHECK,NOVLOPT 6; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvl512b -riscv-enable-vl-optimizer \ 7; RUN: -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,VLOPT 8; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvl512b -riscv-enable-vl-optimizer \ 9; RUN: -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,VLOPT 10 11define <2 x i32> @vdot_lane_s32(<2 x i32> noundef %var_1, <8 x i8> noundef %var_3, <8 x i8> noundef %var_5, <8 x i16> %x) { 12; CHECK-LABEL: vdot_lane_s32: 13; CHECK: # %bb.0: # %entry 14; CHECK-NEXT: vsetivli zero, 4, e16, mf4, ta, ma 15; CHECK-NEXT: vnsrl.wi v8, v11, 0 16; CHECK-NEXT: vnsrl.wi v9, v11, 16 17; CHECK-NEXT: li a0, 32 18; CHECK-NEXT: vwadd.vv v10, v8, v9 19; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 20; CHECK-NEXT: vnsrl.wi v8, v10, 0 21; CHECK-NEXT: vnsrl.wx v9, v10, a0 22; CHECK-NEXT: vadd.vv v8, v8, v9 23; CHECK-NEXT: ret 24entry: 25 %a = shufflevector <8 x i16> %x, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 26 %b = shufflevector <8 x i16> %x, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 27 %c = sext <4 x i16> %a to <4 x i32> 28 %d = sext <4 x i16> %b to <4 x i32> 29 %e = add nsw <4 x i32> %c, %d 30 %z10 = shufflevector <4 x i32> %e, <4 x i32> poison, <2 x i32> <i32 0, i32 2> 31 %z11 = shufflevector <4 x i32> %e, <4 x i32> poison, <2 x i32> <i32 1, i32 3> 32 %y12 = add <2 x i32> %z10, %z11 33 ret <2 x i32> %y12 34} 35 36declare <vscale x 2 x i16> @llvm.riscv.vnsrl.nxv2i16.nxv2i32.nxv2i16( 37 <vscale x 2 x i16>, 38 <vscale x 2 x i32>, 39 <vscale x 2 x i16>, 40 iXLen); 41 42define <vscale x 2 x i16> @intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, iXLen %2, <vscale x 2 x i32> %3, <vscale x 2 x i32> %4, <vscale x 2 x i16> %z) nounwind { 43; NOVLOPT-LABEL: intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16: 44; NOVLOPT: # %bb.0: # %entry 45; NOVLOPT-NEXT: vsetvli a1, zero, e16, mf2, ta, ma 46; NOVLOPT-NEXT: vwadd.vv v10, v8, v9 47; NOVLOPT-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 48; NOVLOPT-NEXT: vnsrl.wv v8, v10, v12 49; NOVLOPT-NEXT: ret 50; 51; VLOPT-LABEL: intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16: 52; VLOPT: # %bb.0: # %entry 53; VLOPT-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 54; VLOPT-NEXT: vwadd.vv v10, v8, v9 55; VLOPT-NEXT: vnsrl.wv v8, v10, v12 56; VLOPT-NEXT: ret 57entry: 58 %c = sext <vscale x 2 x i16> %a to <vscale x 2 x i32> 59 %d = sext <vscale x 2 x i16> %b to <vscale x 2 x i32> 60 %v1 = add <vscale x 2 x i32> %c, %d 61 %x = call <vscale x 2 x i16> @llvm.riscv.vnsrl.nxv2i16.nxv2i32.nxv2i16( 62 <vscale x 2 x i16> undef, 63 <vscale x 2 x i32> %v1, 64 <vscale x 2 x i16> %z, 65 iXLen %2) 66 67 ret <vscale x 2 x i16> %x 68} 69 70declare <vscale x 2 x i16> @llvm.riscv.vnclip.nxv2i16.nxv2i32.nxv2i16( 71 <vscale x 2 x i16>, 72 <vscale x 2 x i32>, 73 <vscale x 2 x i16>, 74 iXLen, iXLen); 75 76define <vscale x 2 x i16> @vnclip(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, iXLen %2, <vscale x 2 x i32> %3, <vscale x 2 x i32> %4, <vscale x 2 x i16> %z) nounwind { 77; NOVLOPT-LABEL: vnclip: 78; NOVLOPT: # %bb.0: # %entry 79; NOVLOPT-NEXT: vsetvli a1, zero, e16, mf2, ta, ma 80; NOVLOPT-NEXT: vwadd.vv v10, v8, v9 81; NOVLOPT-NEXT: csrwi vxrm, 0 82; NOVLOPT-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 83; NOVLOPT-NEXT: vnclip.wv v8, v10, v12 84; NOVLOPT-NEXT: ret 85; 86; VLOPT-LABEL: vnclip: 87; VLOPT: # %bb.0: # %entry 88; VLOPT-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 89; VLOPT-NEXT: vwadd.vv v10, v8, v9 90; VLOPT-NEXT: csrwi vxrm, 0 91; VLOPT-NEXT: vnclip.wv v8, v10, v12 92; VLOPT-NEXT: ret 93entry: 94 %c = sext <vscale x 2 x i16> %a to <vscale x 2 x i32> 95 %d = sext <vscale x 2 x i16> %b to <vscale x 2 x i32> 96 %v1 = add <vscale x 2 x i32> %c, %d 97 %x = call <vscale x 2 x i16> @llvm.riscv.vnclip.nxv2i16.nxv2i32.nxv2i16( 98 <vscale x 2 x i16> undef, 99 <vscale x 2 x i32> %v1, 100 <vscale x 2 x i16> %z, 101 iXLen 0, iXLen %2) 102 103 ret <vscale x 2 x i16> %x 104} 105 106