1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py 2; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck %s 3 4target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" 5 6define void @fixed() { 7; CHECK-LABEL: 'fixed' 8; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i8> undef) 9; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i8> undef) 10; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i8> undef) 11; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 8, <16 x i1> undef, <16 x i8> undef) 12; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i16> undef) 13; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i16> undef) 14; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i16> undef) 15; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i32> undef) 16; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i32> undef) 17; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i64> undef) 18; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef) 19; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef) 20; CHECK-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef) 21; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x float> undef) 22; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x float> undef) 23; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x double> undef) 24; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i64> undef) 25; CHECK-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef) 26; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 27; 28entry: 29 ; Legal fixed-width integer types 30 %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i8> undef) 31 %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i8> undef) 32 %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i8> undef) 33 %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 8, <16 x i1> undef, <16 x i8> undef) 34 %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i16> undef) 35 %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i16> undef) 36 %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i16> undef) 37 %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i32> undef) 38 %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i32> undef) 39 %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i64> undef) 40 41 ; Legal fixed-width floating point types 42 %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef) 43 %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef) 44 %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef) 45 %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x float> undef) 46 %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x float> undef) 47 %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x double> undef) 48 49 ; A couple of examples of illegal fixed-width types 50 %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i64> undef) 51 %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef) 52 53 ret void 54} 55 56 57define void @scalable() { 58; CHECK-LABEL: 'scalable' 59; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef) 60; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef) 61; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) 62; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) 63; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16 = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef) 64; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef) 65; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef) 66; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef) 67; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef) 68; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef) 69; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16 = call <vscale x 2 x half> @llvm.masked.load.nxv2f16.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x half> undef) 70; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16 = call <vscale x 4 x half> @llvm.masked.load.nxv4f16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x half> undef) 71; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16 = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x half> undef) 72; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32 = call <vscale x 2 x float> @llvm.masked.load.nxv2f32.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x float> undef) 73; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x float> undef) 74; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64 = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef) 75; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv1i64 = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef) 76; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64 = call <vscale x 4 x i64> @llvm.masked.load.nxv4i64.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef) 77; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32f16 = call <vscale x 32 x half> @llvm.masked.load.nxv32f16.p0(ptr undef, i32 8, <vscale x 32 x i1> undef, <vscale x 32 x half> undef) 78; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4i1 = call <vscale x 4 x i1> @llvm.masked.load.nxv4i1.p0(ptr undef, i32 16, <vscale x 4 x i1> undef, <vscale x 4 x i1> undef) 79; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 80; 81entry: 82 ; Legal scalable integer types 83 %nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef) 84 %nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef) 85 %nxv8i8 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) 86 %nxv16i8 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) 87 %nxv2i16 = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef) 88 %nxv4i16 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef) 89 %nxv8i16 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef) 90 %nxv2i32 = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef) 91 %nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef) 92 %nxv2i64 = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef) 93 94 ; Legal scalable floating point types 95 %nxv2f16 = call <vscale x 2 x half> @llvm.masked.load.nxv2f16.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x half> undef) 96 %nxv4f16 = call <vscale x 4 x half> @llvm.masked.load.nxv4f16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x half> undef) 97 %nxv8f16 = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x half> undef) 98 %nxv2f32 = call <vscale x 2 x float> @llvm.masked.load.nxv2f32.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x float> undef) 99 %nxv4f32 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x float> undef) 100 %nxv2f64 = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef) 101 102 ; A couple of examples of illegal scalable types 103 %nxv1i64 = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef) 104 %nxv4i64 = call <vscale x 4 x i64> @llvm.masked.load.nxv4i64.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef) 105 %nxv32f16 = call <vscale x 32 x half> @llvm.masked.load.nxv32f16.p0(ptr undef, i32 8, <vscale x 32 x i1> undef, <vscale x 32 x half> undef) 106 107 ; Types that are legal, but for which we have no masked load/store lowering 108 %nxv4i1 = call <vscale x 4 x i1> @llvm.masked.load.nxv4i1.p0(ptr undef, i32 16, <vscale x 4 x i1> undef, <vscale x 4 x i1> undef) 109 110 ret void 111} 112 113 114define void @scalable_ext_loads() { 115; CHECK-LABEL: 'scalable_ext_loads' 116; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv16i8 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) 117; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zext.nxv16i8to16 = zext <vscale x 16 x i8> %load.nxv16i8 to <vscale x 16 x i16> 118; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv16i8.2 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) 119; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %zext.nxv16i8to32 = zext <vscale x 16 x i8> %load.nxv16i8.2 to <vscale x 16 x i32> 120; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv16i8.3 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) 121; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %zext.nxv16i8to64 = zext <vscale x 16 x i8> %load.nxv16i8.3 to <vscale x 16 x i64> 122; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv8i8 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) 123; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zext.nxv8i8to16 = zext <vscale x 8 x i8> %load.nxv8i8 to <vscale x 8 x i16> 124; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv8i8.2 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) 125; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zext.nxv8i8to32 = zext <vscale x 8 x i8> %load.nxv8i8.2 to <vscale x 8 x i32> 126; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv8i8.3 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) 127; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %zext.nxv8i8to64 = zext <vscale x 8 x i8> %load.nxv8i8.3 to <vscale x 8 x i64> 128; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef) 129; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zext.nxv4i8to32 = zext <vscale x 4 x i8> %load.nxv4i8 to <vscale x 4 x i32> 130; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv4i8.2 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef) 131; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zext.nxv4i8to64 = zext <vscale x 4 x i8> %load.nxv4i8.2 to <vscale x 4 x i64> 132; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef) 133; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zext.nxv2i8to64 = zext <vscale x 2 x i8> %load.nxv2i8 to <vscale x 2 x i64> 134; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv8i16 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef) 135; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zext.nxv8i16to32 = zext <vscale x 8 x i16> %load.nxv8i16 to <vscale x 8 x i32> 136; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv8i16.2 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef) 137; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %zext.nxv8i16to64 = zext <vscale x 8 x i16> %load.nxv8i16.2 to <vscale x 8 x i64> 138; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv4i16 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef) 139; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zext.nxv4i16to32 = zext <vscale x 4 x i16> %load.nxv4i16 to <vscale x 4 x i32> 140; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv4i16.2 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef) 141; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zext.nxv4i16to64 = zext <vscale x 4 x i16> %load.nxv4i16.2 to <vscale x 4 x i64> 142; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv2i16 = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef) 143; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zext.nxv2i16to64 = zext <vscale x 2 x i16> %load.nxv2i16 to <vscale x 2 x i64> 144; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef) 145; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zext.nxv4i32to64 = zext <vscale x 4 x i32> %load.nxv4i32 to <vscale x 4 x i64> 146; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load.nxv2i32 = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef) 147; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zext.nxv2i32to64 = zext <vscale x 2 x i32> %load.nxv2i32 to <vscale x 2 x i64> 148; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv16i8 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) 149; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext.nxv16i8to16 = sext <vscale x 16 x i8> %load2.nxv16i8 to <vscale x 16 x i16> 150; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv16i8.2 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) 151; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext.nxv16i8to32 = sext <vscale x 16 x i8> %load2.nxv16i8.2 to <vscale x 16 x i32> 152; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv16i8.3 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) 153; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext.nxv16i8to64 = sext <vscale x 16 x i8> %load2.nxv16i8.3 to <vscale x 16 x i64> 154; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv8i8 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) 155; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext.nxv8i8to16 = sext <vscale x 8 x i8> %load2.nxv8i8 to <vscale x 8 x i16> 156; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv8i8.2 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) 157; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext.nxv8i8to32 = sext <vscale x 8 x i8> %load2.nxv8i8.2 to <vscale x 8 x i32> 158; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv8i8.3 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) 159; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext.nxv8i8to64 = sext <vscale x 8 x i8> %load2.nxv8i8.3 to <vscale x 8 x i64> 160; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef) 161; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext.nxv4i8to32 = sext <vscale x 4 x i8> %load2.nxv4i8 to <vscale x 4 x i32> 162; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv4i8.2 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef) 163; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext.nxv4i8to64 = sext <vscale x 4 x i8> %load2.nxv4i8.2 to <vscale x 4 x i64> 164; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef) 165; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext.nxv2i8to64 = sext <vscale x 2 x i8> %load2.nxv2i8 to <vscale x 2 x i64> 166; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv8i16 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef) 167; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext.nxv8i16to32 = sext <vscale x 8 x i16> %load2.nxv8i16 to <vscale x 8 x i32> 168; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv8i16.2 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef) 169; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext.nxv8i16to64 = sext <vscale x 8 x i16> %load2.nxv8i16.2 to <vscale x 8 x i64> 170; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv4i16 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef) 171; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext.nxv4i16to32 = sext <vscale x 4 x i16> %load2.nxv4i16 to <vscale x 4 x i32> 172; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv4i16.2 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef) 173; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext.nxv4i16to64 = sext <vscale x 4 x i16> %load2.nxv4i16.2 to <vscale x 4 x i64> 174; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv2i16 = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef) 175; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext.nxv2i16to64 = sext <vscale x 2 x i16> %load2.nxv2i16 to <vscale x 2 x i64> 176; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef) 177; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext.nxv4i32to64 = sext <vscale x 4 x i32> %load2.nxv4i32 to <vscale x 4 x i64> 178; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv2i32 = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef) 179; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext.nxv2i32to64 = sext <vscale x 2 x i32> %load2.nxv2i32 to <vscale x 2 x i64> 180; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 181; 182 183 184 %load.nxv16i8 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) 185 %zext.nxv16i8to16 = zext <vscale x 16 x i8> %load.nxv16i8 to <vscale x 16 x i16> 186 %load.nxv16i8.2 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) 187 %zext.nxv16i8to32 = zext <vscale x 16 x i8> %load.nxv16i8.2 to <vscale x 16 x i32> 188 %load.nxv16i8.3 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) 189 %zext.nxv16i8to64 = zext <vscale x 16 x i8> %load.nxv16i8.3 to <vscale x 16 x i64> 190 %load.nxv8i8 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) 191 %zext.nxv8i8to16 = zext <vscale x 8 x i8> %load.nxv8i8 to <vscale x 8 x i16> 192 %load.nxv8i8.2 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) 193 %zext.nxv8i8to32 = zext <vscale x 8 x i8> %load.nxv8i8.2 to <vscale x 8 x i32> 194 %load.nxv8i8.3 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) 195 %zext.nxv8i8to64 = zext <vscale x 8 x i8> %load.nxv8i8.3 to <vscale x 8 x i64> 196 %load.nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef) 197 %zext.nxv4i8to32 = zext <vscale x 4 x i8> %load.nxv4i8 to <vscale x 4 x i32> 198 %load.nxv4i8.2 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef) 199 %zext.nxv4i8to64 = zext <vscale x 4 x i8> %load.nxv4i8.2 to <vscale x 4 x i64> 200 %load.nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef) 201 %zext.nxv2i8to64 = zext <vscale x 2 x i8> %load.nxv2i8 to <vscale x 2 x i64> 202 %load.nxv8i16 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef) 203 %zext.nxv8i16to32 = zext <vscale x 8 x i16> %load.nxv8i16 to <vscale x 8 x i32> 204 %load.nxv8i16.2 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef) 205 %zext.nxv8i16to64 = zext <vscale x 8 x i16> %load.nxv8i16.2 to <vscale x 8 x i64> 206 %load.nxv4i16 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef) 207 %zext.nxv4i16to32 = zext <vscale x 4 x i16> %load.nxv4i16 to <vscale x 4 x i32> 208 %load.nxv4i16.2 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef) 209 %zext.nxv4i16to64 = zext <vscale x 4 x i16> %load.nxv4i16.2 to <vscale x 4 x i64> 210 %load.nxv2i16 = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef) 211 %zext.nxv2i16to64 = zext <vscale x 2 x i16> %load.nxv2i16 to <vscale x 2 x i64> 212 %load.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef) 213 %zext.nxv4i32to64 = zext <vscale x 4 x i32> %load.nxv4i32 to <vscale x 4 x i64> 214 %load.nxv2i32 = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef) 215 %zext.nxv2i32to64 = zext <vscale x 2 x i32> %load.nxv2i32 to <vscale x 2 x i64> 216 217 %load2.nxv16i8 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) 218 %sext.nxv16i8to16 = sext <vscale x 16 x i8> %load2.nxv16i8 to <vscale x 16 x i16> 219 %load2.nxv16i8.2 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) 220 %sext.nxv16i8to32 = sext <vscale x 16 x i8> %load2.nxv16i8.2 to <vscale x 16 x i32> 221 %load2.nxv16i8.3 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) 222 %sext.nxv16i8to64 = sext <vscale x 16 x i8> %load2.nxv16i8.3 to <vscale x 16 x i64> 223 %load2.nxv8i8 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) 224 %sext.nxv8i8to16 = sext <vscale x 8 x i8> %load2.nxv8i8 to <vscale x 8 x i16> 225 %load2.nxv8i8.2 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) 226 %sext.nxv8i8to32 = sext <vscale x 8 x i8> %load2.nxv8i8.2 to <vscale x 8 x i32> 227 %load2.nxv8i8.3 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) 228 %sext.nxv8i8to64 = sext <vscale x 8 x i8> %load2.nxv8i8.3 to <vscale x 8 x i64> 229 %load2.nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef) 230 %sext.nxv4i8to32 = sext <vscale x 4 x i8> %load2.nxv4i8 to <vscale x 4 x i32> 231 %load2.nxv4i8.2 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef) 232 %sext.nxv4i8to64 = sext <vscale x 4 x i8> %load2.nxv4i8.2 to <vscale x 4 x i64> 233 %load2.nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef) 234 %sext.nxv2i8to64 = sext <vscale x 2 x i8> %load2.nxv2i8 to <vscale x 2 x i64> 235 %load2.nxv8i16 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef) 236 %sext.nxv8i16to32 = sext <vscale x 8 x i16> %load2.nxv8i16 to <vscale x 8 x i32> 237 %load2.nxv8i16.2 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef) 238 %sext.nxv8i16to64 = sext <vscale x 8 x i16> %load2.nxv8i16.2 to <vscale x 8 x i64> 239 %load2.nxv4i16 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef) 240 %sext.nxv4i16to32 = sext <vscale x 4 x i16> %load2.nxv4i16 to <vscale x 4 x i32> 241 %load2.nxv4i16.2 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef) 242 %sext.nxv4i16to64 = sext <vscale x 4 x i16> %load2.nxv4i16.2 to <vscale x 4 x i64> 243 %load2.nxv2i16 = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef) 244 %sext.nxv2i16to64 = sext <vscale x 2 x i16> %load2.nxv2i16 to <vscale x 2 x i64> 245 %load2.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef) 246 %sext.nxv4i32to64 = sext <vscale x 4 x i32> %load2.nxv4i32 to <vscale x 4 x i64> 247 %load2.nxv2i32 = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef) 248 %sext.nxv2i32to64 = sext <vscale x 2 x i32> %load2.nxv2i32 to <vscale x 2 x i64> 249 250 ret void 251} 252 253 254declare <2 x i8> @llvm.masked.load.v2i8.p0(ptr, i32, <2 x i1>, <2 x i8>) 255declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32, <4 x i1>, <4 x i8>) 256declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32, <8 x i1>, <8 x i8>) 257declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32, <16 x i1>, <16 x i8>) 258declare <2 x i16> @llvm.masked.load.v2i16.p0(ptr, i32, <2 x i1>, <2 x i16>) 259declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32, <4 x i1>, <4 x i16>) 260declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32, <8 x i1>, <8 x i16>) 261declare <2 x i32> @llvm.masked.load.v2i32.p0(ptr, i32, <2 x i1>, <2 x i32>) 262declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>) 263declare <2 x i64> @llvm.masked.load.v2i64.p0(ptr, i32, <2 x i1>, <2 x i64>) 264declare <4 x i64> @llvm.masked.load.v4i64.p0(ptr, i32, <4 x i1>, <4 x i64>) 265declare <2 x half> @llvm.masked.load.v2f16.p0(ptr, i32, <2 x i1>, <2 x half>) 266declare <4 x half> @llvm.masked.load.v4f16.p0(ptr, i32, <4 x i1>, <4 x half>) 267declare <8 x half> @llvm.masked.load.v8f16.p0(ptr, i32, <8 x i1>, <8 x half>) 268declare <32 x half> @llvm.masked.load.v32f16.p0(ptr, i32, <32 x i1>, <32 x half>) 269declare <2 x float> @llvm.masked.load.v2f32.p0(ptr, i32, <2 x i1>, <2 x float>) 270declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32, <4 x i1>, <4 x float>) 271declare <2 x double> @llvm.masked.load.v2f64.p0(ptr, i32, <2 x i1>, <2 x double>) 272declare <vscale x 4 x i1> @llvm.masked.load.nxv4i1.p0(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i1>) 273 274 275declare <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i8>) 276declare <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i8>) 277declare <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr, i32, <vscale x 8 x i1>, <vscale x 8 x i8>) 278declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr, i32, <vscale x 16 x i1>, <vscale x 16 x i8>) 279declare <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i16>) 280declare <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i16>) 281declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr, i32, <vscale x 8 x i1>, <vscale x 8 x i16>) 282declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i32>) 283declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i32>) 284declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i64>) 285declare <vscale x 4 x i64> @llvm.masked.load.nxv4i64.p0(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i64>) 286declare <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr, i32, <vscale x 1 x i1>, <vscale x 1 x i64>) 287declare <vscale x 2 x half> @llvm.masked.load.nxv2f16.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x half>) 288declare <vscale x 4 x half> @llvm.masked.load.nxv4f16.p0(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x half>) 289declare <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0(ptr, i32, <vscale x 8 x i1>, <vscale x 8 x half>) 290declare <vscale x 32 x half> @llvm.masked.load.nxv32f16.p0(ptr, i32, <vscale x 32 x i1>, <vscale x 32 x half>) 291declare <vscale x 2 x float> @llvm.masked.load.nxv2f32.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x float>) 292declare <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x float>) 293declare <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x double>) 294