1; RUN: opt -S -aarch64-sve-intrinsic-opts < %s | FileCheck %s 2 3target triple = "aarch64-unknown-linux-gnu" 4 5define <vscale x 16 x i1> @pred_load_v2i8(ptr %addr) #0 { 6; CHECK-LABEL: @pred_load_v2i8( 7; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr 8; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]] 9 %load = load <2 x i8>, ptr %addr, align 4 10 %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %load, i64 0) 11 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1> 12 ret <vscale x 16 x i1> %ret 13} 14 15define <vscale x 16 x i1> @pred_load_v4i8(ptr %addr) #1 { 16; CHECK-LABEL: @pred_load_v4i8( 17; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr 18; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]] 19 %load = load <4 x i8>, ptr %addr, align 4 20 %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0) 21 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1> 22 ret <vscale x 16 x i1> %ret 23} 24 25define <vscale x 16 x i1> @pred_load_v8i8(ptr %addr) #2 { 26; CHECK-LABEL: @pred_load_v8i8( 27; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr 28; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]] 29 %load = load <8 x i8>, ptr %addr, align 4 30 %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> %load, i64 0) 31 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1> 32 ret <vscale x 16 x i1> %ret 33} 34 35; Ensure the insertion point is at the load 36define <vscale x 16 x i1> @pred_load_insertion_point(ptr %addr) #0 { 37; CHECK-LABEL: @pred_load_insertion_point( 38; CHECK-NEXT: entry: 39; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr 40; CHECK-NEXT: br label %bb1 41; CHECK: bb1: 42; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]] 43entry: 44 %load = load <2 x i8>, ptr %addr, align 4 45 br label %bb1 46 47bb1: 48 %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %load, i64 0) 49 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1> 50 ret <vscale x 16 x i1> %ret 51} 52 53; Check that too small of a vscale prevents optimization 54define <vscale x 16 x i1> @pred_load_neg1(ptr %addr) #0 { 55; CHECK-LABEL: @pred_load_neg1( 56; CHECK: call <vscale x 2 x i8> @llvm.vector.insert 57 %load = load <4 x i8>, ptr %addr, align 4 58 %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0) 59 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1> 60 ret <vscale x 16 x i1> %ret 61} 62 63; Check that too large of a vscale prevents optimization 64define <vscale x 16 x i1> @pred_load_neg2(ptr %addr) #2 { 65; CHECK-LABEL: @pred_load_neg2( 66; CHECK: call <vscale x 2 x i8> @llvm.vector.insert 67 %load = load <4 x i8>, ptr %addr, align 4 68 %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0) 69 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1> 70 ret <vscale x 16 x i1> %ret 71} 72 73; Check that a non-zero index prevents optimization 74define <vscale x 16 x i1> @pred_load_neg3(ptr %addr) #1 { 75; CHECK-LABEL: @pred_load_neg3( 76; CHECK: call <vscale x 2 x i8> @llvm.vector.insert 77 %load = load <4 x i8>, ptr %addr, align 4 78 %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 4) 79 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1> 80 ret <vscale x 16 x i1> %ret 81} 82 83; Check that differing vscale min/max prevents optimization 84define <vscale x 16 x i1> @pred_load_neg4(ptr %addr) #3 { 85; CHECK-LABEL: @pred_load_neg4( 86; CHECK: call <vscale x 2 x i8> @llvm.vector.insert 87 %load = load <4 x i8>, ptr %addr, align 4 88 %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0) 89 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1> 90 ret <vscale x 16 x i1> %ret 91} 92 93; Check that insertion into a non-undef vector prevents optimization 94define <vscale x 16 x i1> @pred_load_neg5(ptr %addr, <vscale x 2 x i8> %passthru) #1 { 95; CHECK-LABEL: @pred_load_neg5( 96; CHECK: call <vscale x 2 x i8> @llvm.vector.insert 97 %load = load <4 x i8>, ptr %addr, align 4 98 %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> %passthru, <4 x i8> %load, i64 0) 99 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1> 100 ret <vscale x 16 x i1> %ret 101} 102 103declare <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8>, <2 x i8>, i64) 104declare <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8>, <4 x i8>, i64) 105declare <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8>, <8 x i8>, i64) 106 107attributes #0 = { "target-features"="+sve" vscale_range(1,1) } 108attributes #1 = { "target-features"="+sve" vscale_range(2,2) } 109attributes #2 = { "target-features"="+sve" vscale_range(4,4) } 110attributes #3 = { "target-features"="+sve" vscale_range(2,4) } 111