1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s | FileCheck %s 3target triple = "aarch64-unknown-linux-gnu" 4 5define <vscale x 8 x i1> @masked_load_sext_i8i16(ptr %ap, <vscale x 16 x i8> %b) #0 { 6; CHECK-LABEL: masked_load_sext_i8i16: 7; CHECK: // %bb.0: 8; CHECK-NEXT: ptrue p0.b, vl32 9; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 10; CHECK-NEXT: punpklo p0.h, p0.b 11; CHECK-NEXT: ret 12 %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 10) 13 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer) 14 %extract = call <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0) 15 %ext1 = sext <vscale x 8 x i1> %extract to <vscale x 8 x i16> 16 %p1 = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 10) 17 %cmp1 = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1> %p1, <vscale x 8 x i16> %ext1, <vscale x 8 x i16> zeroinitializer) 18 ret <vscale x 8 x i1> %cmp1 19} 20 21; This negative test ensures the two ptrues have the same vl 22define <vscale x 8 x i1> @masked_load_sext_i8i16_ptrue_vl(ptr %ap, <vscale x 16 x i8> %b) #0 { 23; CHECK-LABEL: masked_load_sext_i8i16_ptrue_vl: 24; CHECK: // %bb.0: 25; CHECK-NEXT: ptrue p0.b, vl64 26; CHECK-NEXT: ptrue p1.h, vl32 27; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 28; CHECK-NEXT: punpklo p0.h, p0.b 29; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b 30; CHECK-NEXT: ret 31 %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11) 32 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer) 33 %extract = call <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0) 34 %ext1 = sext <vscale x 8 x i1> %extract to <vscale x 8 x i16> 35 %p1 = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 10) 36 %cmp1 = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1> %p1, <vscale x 8 x i16> %ext1, <vscale x 8 x i16> zeroinitializer) 37 ret <vscale x 8 x i1> %cmp1 38} 39 40; This negative test enforces that both predicates are ptrues 41define <vscale x 8 x i1> @masked_load_sext_i8i16_parg(ptr %ap, <vscale x 16 x i8> %b, <vscale x 16 x i1> %p0) #0 { 42; CHECK-LABEL: masked_load_sext_i8i16_parg: 43; CHECK: // %bb.0: 44; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 45; CHECK-NEXT: ptrue p1.h, vl32 46; CHECK-NEXT: punpklo p0.h, p0.b 47; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b 48; CHECK-NEXT: ret 49 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer) 50 %extract = call <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0) 51 %ext1 = sext <vscale x 8 x i1> %extract to <vscale x 8 x i16> 52 %p1 = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 10) 53 %cmp1 = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1> %p1, <vscale x 8 x i16> %ext1, <vscale x 8 x i16> zeroinitializer) 54 ret <vscale x 8 x i1> %cmp1 55} 56 57define <vscale x 4 x i1> @masked_load_sext_i8i32(ptr %ap, <vscale x 16 x i8> %b) #0 { 58; CHECK-LABEL: masked_load_sext_i8i32: 59; CHECK: // %bb.0: 60; CHECK-NEXT: ptrue p0.b, vl32 61; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 62; CHECK-NEXT: punpklo p0.h, p0.b 63; CHECK-NEXT: punpklo p0.h, p0.b 64; CHECK-NEXT: ret 65 %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 10) 66 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer) 67 %extract = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0) 68 %ext1 = sext <vscale x 4 x i1> %extract to <vscale x 4 x i32> 69 %p1 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 10) 70 %cmp1 = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %p1, <vscale x 4 x i32> %ext1, <vscale x 4 x i32> zeroinitializer) 71 ret <vscale x 4 x i1> %cmp1 72} 73 74; This negative test ensures the two ptrues have the same vl 75define <vscale x 4 x i1> @masked_load_sext_i8i32_ptrue_vl(ptr %ap, <vscale x 16 x i8> %b) #0 { 76; CHECK-LABEL: masked_load_sext_i8i32_ptrue_vl: 77; CHECK: // %bb.0: 78; CHECK-NEXT: ptrue p0.b, vl64 79; CHECK-NEXT: ptrue p1.s, vl32 80; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 81; CHECK-NEXT: punpklo p0.h, p0.b 82; CHECK-NEXT: punpklo p0.h, p0.b 83; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b 84; CHECK-NEXT: ret 85 %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11) 86 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer) 87 %extract = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0) 88 %ext1 = sext <vscale x 4 x i1> %extract to <vscale x 4 x i32> 89 %p1 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 10) 90 %cmp1 = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %p1, <vscale x 4 x i32> %ext1, <vscale x 4 x i32> zeroinitializer) 91 ret <vscale x 4 x i1> %cmp1 92} 93 94; This negative test enforces that both predicates are ptrues 95define <vscale x 4 x i1> @masked_load_sext_i8i32_parg(ptr %ap, <vscale x 16 x i8> %b, <vscale x 16 x i1> %p0) #0 { 96; CHECK-LABEL: masked_load_sext_i8i32_parg: 97; CHECK: // %bb.0: 98; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 99; CHECK-NEXT: ptrue p1.s, vl32 100; CHECK-NEXT: punpklo p0.h, p0.b 101; CHECK-NEXT: punpklo p0.h, p0.b 102; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b 103; CHECK-NEXT: ret 104 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer) 105 %extract = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0) 106 %ext1 = sext <vscale x 4 x i1> %extract to <vscale x 4 x i32> 107 %p1 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 10) 108 %cmp1 = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %p1, <vscale x 4 x i32> %ext1, <vscale x 4 x i32> zeroinitializer) 109 ret <vscale x 4 x i1> %cmp1 110} 111 112define <vscale x 2 x i1> @masked_load_sext_i8i64(ptr %ap, <vscale x 16 x i8> %b) #0 { 113; CHECK-LABEL: masked_load_sext_i8i64: 114; CHECK: // %bb.0: 115; CHECK-NEXT: ptrue p0.b, vl32 116; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 117; CHECK-NEXT: punpklo p0.h, p0.b 118; CHECK-NEXT: punpklo p0.h, p0.b 119; CHECK-NEXT: punpklo p0.h, p0.b 120; CHECK-NEXT: ret 121 %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 10) 122 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer) 123 %extract = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0) 124 %ext1 = sext <vscale x 2 x i1> %extract to <vscale x 2 x i64> 125 %p1 = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 10) 126 %cmp1 = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %p1, <vscale x 2 x i64> %ext1, <vscale x 2 x i64> zeroinitializer) 127 ret <vscale x 2 x i1> %cmp1 128} 129 130; This negative test ensures the two ptrues have the same vl 131define <vscale x 2 x i1> @masked_load_sext_i8i64_ptrue_vl(ptr %ap, <vscale x 16 x i8> %b) #0 { 132; CHECK-LABEL: masked_load_sext_i8i64_ptrue_vl: 133; CHECK: // %bb.0: 134; CHECK-NEXT: ptrue p0.b, vl64 135; CHECK-NEXT: ptrue p1.d, vl32 136; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 137; CHECK-NEXT: punpklo p0.h, p0.b 138; CHECK-NEXT: punpklo p0.h, p0.b 139; CHECK-NEXT: punpklo p0.h, p0.b 140; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b 141; CHECK-NEXT: ret 142 %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11) 143 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer) 144 %extract = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0) 145 %ext1 = sext <vscale x 2 x i1> %extract to <vscale x 2 x i64> 146 %p1 = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 10) 147 %cmp1 = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %p1, <vscale x 2 x i64> %ext1, <vscale x 2 x i64> zeroinitializer) 148 ret <vscale x 2 x i1> %cmp1 149} 150 151; This negative test enforces that both predicates are ptrues 152define <vscale x 2 x i1> @masked_load_sext_i8i64_parg(ptr %ap, <vscale x 16 x i8> %b, <vscale x 16 x i1> %p0) #0 { 153; CHECK-LABEL: masked_load_sext_i8i64_parg: 154; CHECK: // %bb.0: 155; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 156; CHECK-NEXT: ptrue p1.d, vl32 157; CHECK-NEXT: punpklo p0.h, p0.b 158; CHECK-NEXT: punpklo p0.h, p0.b 159; CHECK-NEXT: punpklo p0.h, p0.b 160; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b 161; CHECK-NEXT: ret 162 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer) 163 %extract = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0) 164 %ext1 = sext <vscale x 2 x i1> %extract to <vscale x 2 x i64> 165 %p1 = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 10) 166 %cmp1 = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %p1, <vscale x 2 x i64> %ext1, <vscale x 2 x i64> zeroinitializer) 167 ret <vscale x 2 x i1> %cmp1 168} 169 170; This negative test enforces that the ptrues have a specified vl 171define <vscale x 8 x i1> @masked_load_sext_i8i16_ptrue_all(ptr %ap, <vscale x 16 x i8> %b) #0 { 172; CHECK-LABEL: masked_load_sext_i8i16_ptrue_all: 173; CHECK: // %bb.0: 174; CHECK-NEXT: ptrue p0.b, vl64 175; CHECK-NEXT: ptrue p1.h, vl32 176; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 177; CHECK-NEXT: punpklo p0.h, p0.b 178; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b 179; CHECK-NEXT: ret 180 %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11) 181 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer) 182 %extract = call <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0) 183 %ext1 = sext <vscale x 8 x i1> %extract to <vscale x 8 x i16> 184 %p1 = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 10) 185 %cmp1 = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1> %p1, <vscale x 8 x i16> %ext1, <vscale x 8 x i16> zeroinitializer) 186 ret <vscale x 8 x i1> %cmp1 187} 188 189; This negative test enforces that the ptrues have a specified vl 190define <vscale x 4 x i1> @masked_load_sext_i8i32_ptrue_all(ptr %ap, <vscale x 16 x i8> %b) #0 { 191; CHECK-LABEL: masked_load_sext_i8i32_ptrue_all: 192; CHECK: // %bb.0: 193; CHECK-NEXT: ptrue p0.b, vl64 194; CHECK-NEXT: ptrue p1.s, vl32 195; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 196; CHECK-NEXT: punpklo p0.h, p0.b 197; CHECK-NEXT: punpklo p0.h, p0.b 198; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b 199; CHECK-NEXT: ret 200 %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11) 201 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer) 202 %extract = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0) 203 %ext1 = sext <vscale x 4 x i1> %extract to <vscale x 4 x i32> 204 %p1 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 10) 205 %cmp1 = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %p1, <vscale x 4 x i32> %ext1, <vscale x 4 x i32> zeroinitializer) 206 ret <vscale x 4 x i1> %cmp1 207} 208 209; This negative test enforces that the ptrues have a specified vl 210define <vscale x 2 x i1> @masked_load_sext_i8i64_ptrue_all(ptr %ap, <vscale x 16 x i8> %b) #0 { 211; CHECK-LABEL: masked_load_sext_i8i64_ptrue_all: 212; CHECK: // %bb.0: 213; CHECK-NEXT: ptrue p0.b 214; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 215; CHECK-NEXT: punpklo p0.h, p0.b 216; CHECK-NEXT: punpklo p0.h, p0.b 217; CHECK-NEXT: punpklo p0.h, p0.b 218; CHECK-NEXT: ret 219 %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 220 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer) 221 %extract = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0) 222 %ext1 = sext <vscale x 2 x i1> %extract to <vscale x 2 x i64> 223 %p1 = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 224 %cmp1 = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %p1, <vscale x 2 x i64> %ext1, <vscale x 2 x i64> zeroinitializer) 225 ret <vscale x 2 x i1> %cmp1 226} 227 228declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 229 230declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32) 231declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32) 232declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32) 233declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32) 234 235declare <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1>, i64) 236declare <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1>, i64) 237declare <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1>, i64) 238 239declare <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 240declare <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 241declare <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 242 243attributes #0 = { "target-features"="+sve" } 244