1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call.*(foo|bar|baz|quux|goo)|extractelement" --version 2 2; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=NEON 3; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=2 -S | FileCheck %s --check-prefixes=NEON_INTERLEAVE 4; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=SVE_OR_NEON 5; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=2 -S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s --check-prefixes=SVE_OR_NEON_INTERLEAVE 6; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s --check-prefixes=SVE_TF 7; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=2 -S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s --check-prefixes=SVE_TF_INTERLEAVE 8 9target triple = "aarch64-unknown-linux-gnu" 10 11; A call whose argument can remain a scalar because it's sequential and only the 12; starting value is required. 13define void @test_linear8(ptr noalias %a, ptr readnone %b, i64 %n) { 14; NEON-LABEL: define void @test_linear8 15; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { 16; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]] 17; 18; NEON_INTERLEAVE-LABEL: define void @test_linear8 19; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { 20; NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]] 21; 22; SVE_OR_NEON-LABEL: define void @test_linear8 23; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { 24; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] 25; 26; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear8 27; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { 28; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] 29; 30; SVE_TF-LABEL: define void @test_linear8 31; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { 32; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] 33; 34; SVE_TF_INTERLEAVE-LABEL: define void @test_linear8 35; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { 36; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] 37; 38entry: 39 br label %for.body 40 41for.body: 42 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 43 %gepb = getelementptr i64, ptr %b, i64 %indvars.iv 44 %data = call i64 @foo(ptr %gepb) #0 45 %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv 46 store i64 %data, ptr %gepa 47 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 48 %exitcond = icmp eq i64 %indvars.iv.next, %n 49 br i1 %exitcond, label %for.cond.cleanup, label %for.body 50 51for.cond.cleanup: 52 ret void 53} 54 55define void @test_vector_linear4(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) { 56; NEON-LABEL: define void @test_vector_linear4 57; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { 58; NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] 59; 60; NEON_INTERLEAVE-LABEL: define void @test_vector_linear4 61; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { 62; NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] 63; 64; SVE_OR_NEON-LABEL: define void @test_vector_linear4 65; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 66; SVE_OR_NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] 67; 68; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_vector_linear4 69; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 70; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] 71; 72; SVE_TF-LABEL: define void @test_vector_linear4 73; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 74; SVE_TF: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] 75; 76; SVE_TF_INTERLEAVE-LABEL: define void @test_vector_linear4 77; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 78; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] 79; 80entry: 81 br label %for.body 82 83for.body: 84 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 85 %gepc = getelementptr i32, ptr %c, i64 %indvars.iv 86 %input = load i32, ptr %gepc, align 8 87 %gepb = getelementptr i32, ptr %b, i64 %indvars.iv 88 %data = call i32 @baz(i32 %input, ptr %gepb) #1 89 %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 90 store i32 %data, ptr %gepa, align 8 91 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 92 %exitcond = icmp eq i64 %indvars.iv.next, %n 93 br i1 %exitcond, label %for.cond.cleanup, label %for.body 94 95for.cond.cleanup: 96 ret void 97} 98 99define void @test_linear8_bad_stride(ptr noalias %a, ptr readnone %b, i64 %n) { 100; NEON-LABEL: define void @test_linear8_bad_stride 101; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { 102; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] 103; 104; NEON_INTERLEAVE-LABEL: define void @test_linear8_bad_stride 105; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { 106; NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] 107; 108; SVE_OR_NEON-LABEL: define void @test_linear8_bad_stride 109; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 110; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] 111; 112; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear8_bad_stride 113; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 114; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] 115; 116; SVE_TF-LABEL: define void @test_linear8_bad_stride 117; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 118; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] 119; 120; SVE_TF_INTERLEAVE-LABEL: define void @test_linear8_bad_stride 121; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 122; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] 123; 124entry: 125 br label %for.body 126 127for.body: 128 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 129 %gepb = getelementptr i64, ptr %b, i64 %indvars.iv 130 %data = call i64 @foo(ptr %gepb) #2 131 %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv 132 store i64 %data, ptr %gepa 133 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 134 %exitcond = icmp eq i64 %indvars.iv.next, %n 135 br i1 %exitcond, label %for.cond.cleanup, label %for.body 136 137for.cond.cleanup: 138 ret void 139} 140 141define void @test_linear16_wide_stride(ptr noalias %a, ptr readnone %b, i64 %n) { 142; NEON-LABEL: define void @test_linear16_wide_stride 143; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { 144; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2]] 145; 146; NEON_INTERLEAVE-LABEL: define void @test_linear16_wide_stride 147; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { 148; NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2]] 149; 150; SVE_OR_NEON-LABEL: define void @test_linear16_wide_stride 151; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 152; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4]] 153; 154; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear16_wide_stride 155; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 156; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]] 157; 158; SVE_TF-LABEL: define void @test_linear16_wide_stride 159; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 160; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]] 161; 162; SVE_TF_INTERLEAVE-LABEL: define void @test_linear16_wide_stride 163; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 164; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]] 165; 166entry: 167 br label %for.body 168 169for.body: 170 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 171 %double = mul i64 %indvars.iv, 2 172 %gepb = getelementptr i64, ptr %b, i64 %double 173 %data = call i64 @foo(ptr %gepb) #2 174 %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv 175 store i64 %data, ptr %gepa 176 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 177 %exitcond = icmp eq i64 %indvars.iv.next, %n 178 br i1 %exitcond, label %for.cond.cleanup, label %for.body 179 180for.cond.cleanup: 181 ret void 182} 183 184define void @test_linear4_linear8(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) { 185; NEON-LABEL: define void @test_linear4_linear8 186; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { 187; NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] 188; 189; NEON_INTERLEAVE-LABEL: define void @test_linear4_linear8 190; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { 191; NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] 192; 193; SVE_OR_NEON-LABEL: define void @test_linear4_linear8 194; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 195; SVE_OR_NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]] 196; 197; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear4_linear8 198; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 199; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] 200; 201; SVE_TF-LABEL: define void @test_linear4_linear8 202; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 203; SVE_TF: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] 204; 205; SVE_TF_INTERLEAVE-LABEL: define void @test_linear4_linear8 206; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 207; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] 208; 209entry: 210 br label %for.body 211 212for.body: 213 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 214 %gepc = getelementptr i32, ptr %c, i64 %indvars.iv 215 %gepb = getelementptr i64, ptr %b, i64 %indvars.iv 216 %data = call i32 @quux(ptr %gepc, ptr %gepb) #3 217 %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 218 store i32 %data, ptr %gepa, align 8 219 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 220 %exitcond = icmp eq i64 %indvars.iv.next, %n 221 br i1 %exitcond, label %for.cond.cleanup, label %for.body 222 223for.cond.cleanup: 224 ret void 225} 226 227define void @test_linear3_non_ptr(ptr noalias %a, i64 %n) { 228; NEON-LABEL: define void @test_linear3_non_ptr 229; NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) { 230; NEON: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1:%.*]], i32 0 231; NEON: [[TMP3:%.*]] = call <4 x i32> @vec_bar_linear3_nomask_neon(i32 [[TMP2]]) 232; NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR4:[0-9]+]] 233; 234; NEON_INTERLEAVE-LABEL: define void @test_linear3_non_ptr 235; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) { 236; NEON_INTERLEAVE: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2:%.*]], i32 0 237; NEON_INTERLEAVE: [[TMP5:%.*]] = call <4 x i32> @vec_bar_linear3_nomask_neon(i32 [[TMP4]]) 238; NEON_INTERLEAVE: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3:%.*]], i32 0 239; NEON_INTERLEAVE: [[TMP7:%.*]] = call <4 x i32> @vec_bar_linear3_nomask_neon(i32 [[TMP6]]) 240; NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR4:[0-9]+]] 241; 242; SVE_OR_NEON-LABEL: define void @test_linear3_non_ptr 243; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 244; SVE_OR_NEON: [[TMP13:%.*]] = extractelement <vscale x 4 x i32> [[TMP12:%.*]], i32 0 245; SVE_OR_NEON: [[TMP14:%.*]] = call <vscale x 4 x i32> @vec_bar_linear3_nomask_sve(i32 [[TMP13]]) 246; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR6:[0-9]+]] 247; 248; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear3_non_ptr 249; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 250; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]] 251; 252; SVE_TF-LABEL: define void @test_linear3_non_ptr 253; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 254; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]] 255; 256; SVE_TF_INTERLEAVE-LABEL: define void @test_linear3_non_ptr 257; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 258; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]] 259; 260entry: 261 br label %for.body 262 263for.body: 264 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 265 %little.iv = trunc i64 %indvars.iv to i32 266 %trebled = mul i32 %little.iv, 3 267 %data = call i32 @bar(i32 %trebled) #4 268 %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 269 store i32 %data, ptr %gepa 270 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 271 %exitcond = icmp eq i64 %indvars.iv.next, %n 272 br i1 %exitcond, label %for.cond.cleanup, label %for.body 273 274for.cond.cleanup: 275 ret void 276} 277 278define void @test_linearn5_non_ptr_neg_stride(ptr noalias %a, i64 %n) { 279; NEON-LABEL: define void @test_linearn5_non_ptr_neg_stride 280; NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) { 281; NEON: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1:%.*]], i32 0 282; NEON: [[TMP3:%.*]] = call <4 x i32> @vec_bar_linearn5_nomask_neon(i32 [[TMP2]]) 283; NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR5:[0-9]+]] 284; 285; NEON_INTERLEAVE-LABEL: define void @test_linearn5_non_ptr_neg_stride 286; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) { 287; NEON_INTERLEAVE: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2:%.*]], i32 0 288; NEON_INTERLEAVE: [[TMP5:%.*]] = call <4 x i32> @vec_bar_linearn5_nomask_neon(i32 [[TMP4]]) 289; NEON_INTERLEAVE: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3:%.*]], i32 0 290; NEON_INTERLEAVE: [[TMP7:%.*]] = call <4 x i32> @vec_bar_linearn5_nomask_neon(i32 [[TMP6]]) 291; NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR5:[0-9]+]] 292; 293; SVE_OR_NEON-LABEL: define void @test_linearn5_non_ptr_neg_stride 294; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 295; SVE_OR_NEON: [[TMP13:%.*]] = extractelement <vscale x 4 x i32> [[TMP12:%.*]], i32 0 296; SVE_OR_NEON: [[TMP14:%.*]] = call <vscale x 4 x i32> @vec_bar_linearn5_nomask_sve(i32 [[TMP13]]) 297; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR7:[0-9]+]] 298; 299; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linearn5_non_ptr_neg_stride 300; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 301; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]] 302; 303; SVE_TF-LABEL: define void @test_linearn5_non_ptr_neg_stride 304; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 305; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]] 306; 307; SVE_TF_INTERLEAVE-LABEL: define void @test_linearn5_non_ptr_neg_stride 308; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 309; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]] 310; 311entry: 312 br label %for.body 313 314for.body: 315 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 316 %little.iv = trunc i64 %indvars.iv to i32 317 %negstride = mul i32 %little.iv, -5 318 %data = call i32 @bar(i32 %negstride) #5 319 %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 320 store i32 %data, ptr %gepa 321 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 322 %exitcond = icmp eq i64 %indvars.iv.next, %n 323 br i1 %exitcond, label %for.cond.cleanup, label %for.body 324 325for.cond.cleanup: 326 ret void 327} 328 329define void @test_linear8_return_void(ptr noalias %in, ptr noalias %out, i64 %n) { 330; NEON-LABEL: define void @test_linear8_return_void 331; NEON-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) { 332; NEON: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR6:[0-9]+]] 333; 334; NEON_INTERLEAVE-LABEL: define void @test_linear8_return_void 335; NEON_INTERLEAVE-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) { 336; NEON_INTERLEAVE: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR6:[0-9]+]] 337; 338; SVE_OR_NEON-LABEL: define void @test_linear8_return_void 339; SVE_OR_NEON-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 340; SVE_OR_NEON: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR8:[0-9]+]] 341; 342; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear8_return_void 343; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 344; SVE_OR_NEON_INTERLEAVE: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR7:[0-9]+]] 345; 346; SVE_TF-LABEL: define void @test_linear8_return_void 347; SVE_TF-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 348; SVE_TF: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR7:[0-9]+]] 349; 350; SVE_TF_INTERLEAVE-LABEL: define void @test_linear8_return_void 351; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 352; SVE_TF_INTERLEAVE: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR7:[0-9]+]] 353; 354entry: 355 br label %for.body 356 357for.body: 358 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 359 %gep.in = getelementptr i64, ptr %in, i64 %indvars.iv 360 %num = load i64, ptr %gep.in, align 8 361 %gep.out = getelementptr i64, ptr %out, i64 %indvars.iv 362 call void @goo(i64 %num, ptr %gep.out) #6 363 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 364 %exitcond = icmp eq i64 %indvars.iv.next, %n 365 br i1 %exitcond, label %for.cond.cleanup, label %for.body 366 367for.cond.cleanup: 368 ret void 369} 370 371; Note: Vectorizing pointer arguments is currently disabled as LAA cannot detect 372; aliasing from output/input pointers. 373 374declare i64 @foo(ptr) 375declare i32 @baz(i32, ptr) 376declare i32 @quux(ptr, ptr) 377declare i32 @bar(i32) 378declare void @goo(i64, ptr) 379 380; neon vector variants of foo 381declare <2 x i64> @vec_foo_linear8_nomask_neon(ptr) 382declare <2 x i64> @vec_foo_linear16_nomask_neon(ptr) 383declare <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32>, ptr) 384declare <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr, ptr) 385declare <4 x i32> @vec_bar_linear3_nomask_neon(i32) 386declare <4 x i32> @vec_bar_linearn5_nomask_neon(i32) 387declare void @vec_goo_linear8_nomask_neon(<2 x i64>, ptr) 388 389; scalable vector variants of foo 390declare <vscale x 2 x i64> @vec_foo_linear8_mask_sve(ptr, <vscale x 2 x i1>) 391declare <vscale x 2 x i64> @vec_foo_linear8_nomask_sve(ptr) 392declare <vscale x 2 x i64> @vec_foo_linear16_nomask_sve(ptr) 393declare <vscale x 4 x i32> @vec_baz_vector_linear4_nomask_sve(<vscale x 4 x i32>, ptr) 394declare <vscale x 4 x i32> @vec_quux_linear4_linear8_mask_sve(ptr, ptr, <vscale x 4 x i1>) 395declare <vscale x 4 x i32> @vec_bar_linear3_nomask_sve(i32) 396declare <vscale x 4 x i32> @vec_bar_linearn5_nomask_sve(i32) 397declare void @vec_goo_linear8_nomask_sve(<vscale x 2 x i64>, ptr) 398declare void @vec_goo_linear8_mask_sve(<vscale x 2 x i64>, ptr, <vscale x 2 x i1>) 399 400attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVsNxl8_foo(vec_foo_linear8_nomask_sve),_ZGVsMxl8_foo(vec_foo_linear8_mask_sve),_ZGVnN2l8_foo(vec_foo_linear8_nomask_neon)" } 401attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsNxvl4_baz(vec_baz_vector_linear4_nomask_sve),_ZGVnN4vl4_baz(vec_baz_vector_linear4_nomask_neon)" } 402attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVsNxl16_foo(vec_foo_linear16_nomask_sve),_ZGVnN2l16_foo(vec_foo_linear16_nomask_neon)" } 403attributes #3 = { nounwind "vector-function-abi-variant"="_ZGVsMxl4l8_quux(vec_quux_linear4_linear8_mask_sve),_ZGVnN4l4l8_quux(vec_quux_linear4_linear8_nomask_neon)" } 404attributes #4 = { nounwind "vector-function-abi-variant"="_ZGVsNxl3_bar(vec_bar_linear3_nomask_sve),_ZGVnN4l3_bar(vec_bar_linear3_nomask_neon)" } 405attributes #5 = { nounwind "vector-function-abi-variant"="_ZGVsNxln5_bar(vec_bar_linearn5_nomask_sve),_ZGVnN4ln5_bar(vec_bar_linearn5_nomask_neon)" } 406attributes #6 = { nounwind "vector-function-abi-variant"="_ZGVsNxvl8_goo(vec_goo_linear8_nomask_sve),_ZGVsMxvl8_goo(vec_goo_linear8_mask_sve),_ZGVsN2vl8_goo(vec_goo_linear8_nomask_neon)" } 407