1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=instcombine -S < %s | FileCheck --check-prefix OPT %s 3 4target triple = "aarch64" 5 6; Most of the testing is covered by the lastb cases, but here we ensure that 7; lasta with a predicate having no active lanes is treated as an alias to 8; extracting the first vector element. 9define i8 @lasta_extractelement_0(<vscale x 16 x i8> %v) #0 { 10; OPT-LABEL: @lasta_extractelement_0( 11; OPT-NEXT: [[E0:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 0 12; OPT-NEXT: ret i8 [[E0]] 13; 14 %e0 = tail call i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %v) 15 ret i8 %e0 16} 17 18; Most of the testing is covered by the lastb cases, but here we check the 19; resulting extraction index is one more than the lastb case because lasta 20; extracts the element after the last active. 21define i8 @lasta_extractelement_8(<vscale x 16 x i8> %v) #0 { 22; OPT-LABEL: @lasta_extractelement_8( 23; OPT-NEXT: [[E1:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 8 24; OPT-NEXT: ret i8 [[E1]] 25; 26 %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 8) 27 %e1 = tail call i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v) 28 ret i8 %e1 29} 30 31define i8 @lastb_extractelement_0(<vscale x 16 x i8> %v) #0 { 32; OPT-LABEL: @lastb_extractelement_0( 33; OPT-NEXT: [[E0:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 0 34; OPT-NEXT: ret i8 [[E0]] 35; 36 %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 1) 37 %e0 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v) 38 ret i8 %e0 39} 40 41define i8 @lastb_extractelement_1(<vscale x 16 x i8> %v) #0 { 42; OPT-LABEL: @lastb_extractelement_1( 43; OPT-NEXT: [[E1:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 1 44; OPT-NEXT: ret i8 [[E1]] 45; 46 %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 2) 47 %e1 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v) 48 ret i8 %e1 49} 50 51define i8 @lastb_extractelement_2(<vscale x 16 x i8> %v) #0 { 52; OPT-LABEL: @lastb_extractelement_2( 53; OPT-NEXT: [[E2:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 2 54; OPT-NEXT: ret i8 [[E2]] 55; 56 %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 3) 57 %e2 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v) 58 ret i8 %e2 59} 60 61define i8 @lastb_extractelement_3(<vscale x 16 x i8> %v) #0 { 62; OPT-LABEL: @lastb_extractelement_3( 63; OPT-NEXT: [[E3:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 3 64; OPT-NEXT: ret i8 [[E3]] 65; 66 %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 4) 67 %e3 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v) 68 ret i8 %e3 69} 70 71define i8 @lastb_extractelement_4(<vscale x 16 x i8> %v) #0 { 72; OPT-LABEL: @lastb_extractelement_4( 73; OPT-NEXT: [[E4:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 4 74; OPT-NEXT: ret i8 [[E4]] 75; 76 %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 5) 77 %e4 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v) 78 ret i8 %e4 79} 80 81define i8 @lastb_extractelement_5(<vscale x 16 x i8> %v) #0 { 82; OPT-LABEL: @lastb_extractelement_5( 83; OPT-NEXT: [[E5:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 5 84; OPT-NEXT: ret i8 [[E5]] 85; 86 %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 6) 87 %e5 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v) 88 ret i8 %e5 89} 90 91define i8 @lastb_extractelement_6(<vscale x 16 x i8> %v) #0 { 92; OPT-LABEL: @lastb_extractelement_6( 93; OPT-NEXT: [[E6:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 6 94; OPT-NEXT: ret i8 [[E6]] 95; 96 %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 7) 97 %e6 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v) 98 ret i8 %e6 99} 100 101define i8 @lastb_extractelement_7(<vscale x 16 x i8> %v) #0 { 102; OPT-LABEL: @lastb_extractelement_7( 103; OPT-NEXT: [[E7:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 7 104; OPT-NEXT: ret i8 [[E7]] 105; 106 %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 8) 107 %e7 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v) 108 ret i8 %e7 109} 110 111define i8 @lastb_extractelement_15(<vscale x 16 x i8> %v) #0 { 112; OPT-LABEL: @lastb_extractelement_15( 113; OPT-NEXT: [[E15:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 15 114; OPT-NEXT: ret i8 [[E15]] 115; 116 %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 9) 117 %e15 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v) 118 ret i8 %e15 119} 120 121; No transformation because the requested element is beyond the range of the 122; known minimum element count so we maintain the user's intentions. 123define i8 @lastb_extractelement_31(<vscale x 16 x i8> %v) #0 { 124; OPT-LABEL: @lastb_extractelement_31( 125; OPT-NEXT: [[PG:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 10) 126; OPT-NEXT: [[E31:%.*]] = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[V:%.*]]) 127; OPT-NEXT: ret i8 [[E31]] 128; 129 %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 10) 130 %e31 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v) 131 ret i8 %e31 132} 133 134; No transformation because the ptrue's predicate pattern is bogus and thus 135; nothing can be inferred about the result. 136define i8 @lastb_extractelement_invalid_predicate_pattern(<vscale x 16 x i8> %v) #0 { 137; OPT-LABEL: @lastb_extractelement_invalid_predicate_pattern( 138; OPT-NEXT: [[PG:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 15) 139; OPT-NEXT: [[E:%.*]] = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[V:%.*]]) 140; OPT-NEXT: ret i8 [[E]] 141; 142 %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 15) 143 %e = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v) 144 ret i8 %e 145} 146 147; Return the splatted value irrespective of the predicate. 148define i8 @lasta_splat(<vscale x 16 x i1> %pg, i8 %a) #0 { 149; OPT-LABEL: @lasta_splat( 150; OPT-NEXT: ret i8 [[A:%.*]] 151; 152 %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %a, i32 0 153 %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer 154 %last = tail call i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %splat) 155 ret i8 %last 156} 157 158define i8 @lastb_splat(<vscale x 16 x i1> %pg, i8 %a) #0 { 159; OPT-LABEL: @lastb_splat( 160; OPT-NEXT: ret i8 [[A:%.*]] 161; 162 %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %a, i32 0 163 %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer 164 %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %splat) 165 ret i8 %last 166} 167 168; Check that we move the lastb before the binary operation so that the new binary op is scalar. 169define i8 @lastb_binop_RHS_splat_sdiv(<vscale x 16 x i1> %pg, i8 %scalar, <vscale x 16 x i8> %vector) #0 { 170; OPT-LABEL: @lastb_binop_RHS_splat_sdiv( 171; OPT-NEXT: [[TMP1:%.*]] = call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[VECTOR:%.*]]) 172; OPT-NEXT: [[BINOP1:%.*]] = sdiv i8 [[TMP1]], [[SCALAR:%.*]] 173; OPT-NEXT: ret i8 [[BINOP1]] 174; 175 %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %scalar, i32 0 176 %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer 177 %binop = sdiv <vscale x 16 x i8> %vector, %splat 178 %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %binop) 179 ret i8 %last 180} 181 182define i8 @lastb_binop_RHS_splat_sdiv_exact(<vscale x 16 x i1> %pg, i8 %scalar, <vscale x 16 x i8> %vector) #0 { 183; OPT-LABEL: @lastb_binop_RHS_splat_sdiv_exact( 184; OPT-NEXT: [[TMP1:%.*]] = call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[VECTOR:%.*]]) 185; OPT-NEXT: [[BINOP1:%.*]] = sdiv exact i8 [[TMP1]], [[SCALAR:%.*]] 186; OPT-NEXT: ret i8 [[BINOP1]] 187; 188 %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %scalar, i32 0 189 %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer 190 %binop = sdiv exact <vscale x 16 x i8> %vector, %splat 191 %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %binop) 192 ret i8 %last 193} 194 195define float @lastb_binop_RHS_splat_fdiv_float_fast(<vscale x 4 x i1> %pg, float %scalar, <vscale x 4 x float> %vector) #0 { 196; OPT-LABEL: @lastb_binop_RHS_splat_fdiv_float_fast( 197; OPT-NEXT: [[TMP1:%.*]] = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x float> [[VECTOR:%.*]]) 198; OPT-NEXT: [[BINOP1:%.*]] = fdiv fast float [[TMP1]], [[SCALAR:%.*]] 199; OPT-NEXT: ret float [[BINOP1]] 200; 201 %splat_insert = insertelement <vscale x 4 x float> poison, float %scalar, i32 0 202 %splat = shufflevector <vscale x 4 x float> %splat_insert, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer 203 %binop = fdiv fast <vscale x 4 x float> %vector, %splat 204 %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %binop) 205 ret float %last 206} 207 208define float @lastb_binop_RHS_splat_fdiv_float(<vscale x 4 x i1> %pg, float %scalar, <vscale x 4 x float> %vector) #0 { 209; OPT-LABEL: @lastb_binop_RHS_splat_fdiv_float( 210; OPT-NEXT: [[TMP1:%.*]] = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x float> [[VECTOR:%.*]]) 211; OPT-NEXT: [[BINOP1:%.*]] = fdiv float [[TMP1]], [[SCALAR:%.*]] 212; OPT-NEXT: ret float [[BINOP1]] 213; 214 %splat_insert = insertelement <vscale x 4 x float> poison, float %scalar, i32 0 215 %splat = shufflevector <vscale x 4 x float> %splat_insert, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer 216 %binop = fdiv <vscale x 4 x float> %vector, %splat 217 %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %binop) 218 ret float %last 219} 220 221define i8 @lastb_binop_LHS_splat_sdiv(<vscale x 16 x i1> %pg, i8 %scalar, <vscale x 16 x i8> %vector) #0 { 222; OPT-LABEL: @lastb_binop_LHS_splat_sdiv( 223; OPT-NEXT: [[TMP1:%.*]] = call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[VECTOR:%.*]]) 224; OPT-NEXT: [[BINOP1:%.*]] = sdiv i8 [[SCALAR:%.*]], [[TMP1]] 225; OPT-NEXT: ret i8 [[BINOP1]] 226; 227 %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %scalar, i32 0 228 %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer 229 %binop = sdiv <vscale x 16 x i8> %splat, %vector 230 %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %binop) 231 ret i8 %last 232} 233 234define i8 @lastb_binop_LHS_splat_sdiv_exact(<vscale x 16 x i1> %pg, i8 %scalar, <vscale x 16 x i8> %vector) #0 { 235; OPT-LABEL: @lastb_binop_LHS_splat_sdiv_exact( 236; OPT-NEXT: [[TMP1:%.*]] = call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[VECTOR:%.*]]) 237; OPT-NEXT: [[BINOP1:%.*]] = sdiv exact i8 [[SCALAR:%.*]], [[TMP1]] 238; OPT-NEXT: ret i8 [[BINOP1]] 239; 240 %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %scalar, i32 0 241 %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer 242 %binop = sdiv exact <vscale x 16 x i8> %splat, %vector 243 %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %binop) 244 ret i8 %last 245} 246 247define float @lastb_binop_LHS_splat_fdiv_float_fast(<vscale x 4 x i1> %pg, float %scalar, <vscale x 4 x float> %vector) #0 { 248; OPT-LABEL: @lastb_binop_LHS_splat_fdiv_float_fast( 249; OPT-NEXT: [[TMP1:%.*]] = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x float> [[VECTOR:%.*]]) 250; OPT-NEXT: [[BINOP1:%.*]] = fdiv fast float [[SCALAR:%.*]], [[TMP1]] 251; OPT-NEXT: ret float [[BINOP1]] 252; 253 %splat_insert = insertelement <vscale x 4 x float> poison, float %scalar, i32 0 254 %splat = shufflevector <vscale x 4 x float> %splat_insert, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer 255 %binop = fdiv fast <vscale x 4 x float> %splat, %vector 256 %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %binop) 257 ret float %last 258} 259 260define float @lastb_binop_LHS_splat_fdiv_float(<vscale x 4 x i1> %pg, float %scalar, <vscale x 4 x float> %vector) #0 { 261; OPT-LABEL: @lastb_binop_LHS_splat_fdiv_float( 262; OPT-NEXT: [[TMP1:%.*]] = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x float> [[VECTOR:%.*]]) 263; OPT-NEXT: [[BINOP1:%.*]] = fdiv float [[SCALAR:%.*]], [[TMP1]] 264; OPT-NEXT: ret float [[BINOP1]] 265; 266 %splat_insert = insertelement <vscale x 4 x float> poison, float %scalar, i32 0 267 %splat = shufflevector <vscale x 4 x float> %splat_insert, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer 268 %binop = fdiv <vscale x 4 x float> %splat, %vector 269 %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %binop) 270 ret float %last 271} 272 273define i8 @lastb_binop_LHS_RHS_splat_sdiv(<vscale x 16 x i1> %pg, i8 %scalar1, i8 %scalar2) #0 { 274; OPT-LABEL: @lastb_binop_LHS_RHS_splat_sdiv( 275; OPT-NEXT: [[BINOP1:%.*]] = sdiv i8 [[SCALAR1:%.*]], [[SCALAR2:%.*]] 276; OPT-NEXT: ret i8 [[BINOP1]] 277; 278 %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %scalar1, i8 0 279 %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer 280 %splat_insert2 = insertelement <vscale x 16 x i8> poison, i8 %scalar2, i8 0 281 %splat2 = shufflevector <vscale x 16 x i8> %splat_insert2, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer 282 %binop = sdiv <vscale x 16 x i8> %splat, %splat2 283 %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %binop) 284 ret i8 %last 285} 286 287; Check that we don't do anything as the binary op has multiple uses. 288define i8 @lastb_binop_nochange(<vscale x 16 x i1> %pg, i8 %scalar, <vscale x 16 x i8> %vector) #0 { 289; OPT-LABEL: @lastb_binop_nochange( 290; OPT-NEXT: [[SPLAT_INSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[SCALAR:%.*]], i64 0 291; OPT-NEXT: [[SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_INSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer 292; OPT-NEXT: [[BINOP:%.*]] = sdiv <vscale x 16 x i8> [[VECTOR:%.*]], [[SPLAT]] 293; OPT-NEXT: [[LAST:%.*]] = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[BINOP]]) 294; OPT-NEXT: call void @use(<vscale x 16 x i8> [[BINOP]]) 295; OPT-NEXT: ret i8 [[LAST]] 296; 297 %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %scalar, i32 0 298 %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer 299 %binop = sdiv <vscale x 16 x i8> %vector, %splat 300 %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %binop) 301 call void @use(<vscale x 16 x i8> %binop) 302 ret i8 %last 303} 304 305declare void @use(<vscale x 16 x i8>) 306declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32) 307declare i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>) 308declare i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>) 309declare float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>) 310 311attributes #0 = { "target-features"="+sve" } 312