1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -mtriple=thumbv8.1m.main -mattr=+mve %s -S -loop-reduce -o - | FileCheck %s 3target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" 4target triple = "thumbv8.1m-arm-none-eabi" 5 6define float @vctp8(ptr %0, i32 %1) { 7; CHECK-LABEL: @vctp8( 8; CHECK-NEXT: [[TMP3:%.*]] = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8) 9; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0 10; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP1:%.*]], -1 11; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP0:%.*]] to i32 12; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0 13; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], <i32 -32, i32 undef, i32 undef, i32 undef> 14; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> zeroinitializer 15; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP4]], [[TMP9]] 16; CHECK-NEXT: br label [[TMP11:%.*]] 17; CHECK: 11: 18; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP5]], [[TMP2:%.*]] ], [ [[TMP21:%.*]], [[TMP11]] ] 19; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x float> [ zeroinitializer, [[TMP2]] ], [ [[TMP19:%.*]], [[TMP11]] ] 20; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP10]], [[TMP2]] ], [ [[TMP17:%.*]], [[TMP11]] ] 21; CHECK-NEXT: [[TMP15:%.*]] = tail call <16 x i1> @llvm.arm.mve.vctp8(i32 [[TMP12]]) 22; CHECK-NEXT: [[MASK:%.*]] = tail call <4 x i1> @v16i1_to_v4i1(<16 x i1> [[TMP15]]) 23; CHECK-NEXT: [[TMP16:%.*]] = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> [[TMP14]], i32 32, <4 x i1> [[MASK]]) 24; CHECK-NEXT: [[TMP17]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 1 25; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 0 26; CHECK-NEXT: [[TMP19]] = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[TMP13]], <4 x float> [[TMP18]], <4 x i1> [[MASK]], <4 x float> [[TMP13]]) 27; CHECK-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP12]], 4 28; CHECK-NEXT: [[TMP21]] = add i32 [[TMP12]], -4 29; CHECK-NEXT: br i1 [[TMP20]], label [[TMP11]], label [[TMP22:%.*]] 30; CHECK: 22: 31; CHECK-NEXT: [[TMP23:%.*]] = tail call i32 @vecAddAcrossF32Mve(<4 x float> [[TMP19]]) 32; CHECK-NEXT: [[TMP24:%.*]] = sitofp i32 [[TMP23]] to float 33; CHECK-NEXT: [[TMP25:%.*]] = tail call float @llvm.fabs.f32(float [[TMP24]]) 34; CHECK-NEXT: ret float [[TMP25]] 35; 36 %3 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8) 37 %4 = extractvalue { <4 x i32>, i32 } %3, 0 38 %5 = add nsw i32 %1, -1 39 %6 = ptrtoint ptr %0 to i32 40 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 41 %8 = add <4 x i32> %7, <i32 -32, i32 undef, i32 undef, i32 undef> 42 %9 = shufflevector <4 x i32> %8, <4 x i32> undef, <4 x i32> zeroinitializer 43 %10 = add <4 x i32> %4, %9 44 br label %11 45 4611: ; preds = %11, %2 47 %12 = phi i32 [ %5, %2 ], [ %20, %11 ] 48 %13 = phi <4 x float> [ zeroinitializer, %2 ], [ %19, %11 ] 49 %14 = phi <4 x i32> [ %10, %2 ], [ %17, %11 ] 50 %15 = tail call <16 x i1> @llvm.arm.mve.vctp8(i32 %12) 51 %mask = tail call <4 x i1> @v16i1_to_v4i1(<16 x i1> %15) 52 %16 = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %14, i32 32, <4 x i1> %mask) 53 %17 = extractvalue { <4 x float>, <4 x i32> } %16, 1 54 %18 = extractvalue { <4 x float>, <4 x i32> } %16, 0 55 %19 = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %13, <4 x float> %18, <4 x i1> %mask, <4 x float> %13) 56 %20 = add nsw i32 %12, -4 57 %21 = icmp sgt i32 %12, 4 58 br i1 %21, label %11, label %22 59 6022: ; preds = %11 61 %23 = tail call i32 @vecAddAcrossF32Mve(<4 x float> %19) 62 %24 = sitofp i32 %23 to float 63 %25 = tail call float @llvm.fabs.f32(float %24) 64 ret float %25 65} 66 67define float @vctp16(ptr %0, i32 %1) { 68; CHECK-LABEL: @vctp16( 69; CHECK-NEXT: [[TMP3:%.*]] = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8) 70; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0 71; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP1:%.*]], -1 72; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP0:%.*]] to i32 73; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0 74; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], <i32 -32, i32 undef, i32 undef, i32 undef> 75; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> zeroinitializer 76; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP4]], [[TMP9]] 77; CHECK-NEXT: br label [[TMP11:%.*]] 78; CHECK: 11: 79; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP5]], [[TMP2:%.*]] ], [ [[TMP21:%.*]], [[TMP11]] ] 80; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x float> [ zeroinitializer, [[TMP2]] ], [ [[TMP19:%.*]], [[TMP11]] ] 81; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP10]], [[TMP2]] ], [ [[TMP17:%.*]], [[TMP11]] ] 82; CHECK-NEXT: [[TMP15:%.*]] = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 [[TMP12]]) 83; CHECK-NEXT: [[MASK:%.*]] = tail call <4 x i1> @v8i1_to_v4i1(<8 x i1> [[TMP15]]) 84; CHECK-NEXT: [[TMP16:%.*]] = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> [[TMP14]], i32 32, <4 x i1> [[MASK]]) 85; CHECK-NEXT: [[TMP17]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 1 86; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 0 87; CHECK-NEXT: [[TMP19]] = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[TMP13]], <4 x float> [[TMP18]], <4 x i1> [[MASK]], <4 x float> [[TMP13]]) 88; CHECK-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP12]], 4 89; CHECK-NEXT: [[TMP21]] = add i32 [[TMP12]], -4 90; CHECK-NEXT: br i1 [[TMP20]], label [[TMP11]], label [[TMP22:%.*]] 91; CHECK: 22: 92; CHECK-NEXT: [[TMP23:%.*]] = tail call i32 @vecAddAcrossF32Mve(<4 x float> [[TMP19]]) 93; CHECK-NEXT: [[TMP24:%.*]] = sitofp i32 [[TMP23]] to float 94; CHECK-NEXT: [[TMP25:%.*]] = tail call float @llvm.fabs.f32(float [[TMP24]]) 95; CHECK-NEXT: ret float [[TMP25]] 96; 97 %3 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8) 98 %4 = extractvalue { <4 x i32>, i32 } %3, 0 99 %5 = add nsw i32 %1, -1 100 %6 = ptrtoint ptr %0 to i32 101 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 102 %8 = add <4 x i32> %7, <i32 -32, i32 undef, i32 undef, i32 undef> 103 %9 = shufflevector <4 x i32> %8, <4 x i32> undef, <4 x i32> zeroinitializer 104 %10 = add <4 x i32> %4, %9 105 br label %11 106 10711: ; preds = %11, %2 108 %12 = phi i32 [ %5, %2 ], [ %20, %11 ] 109 %13 = phi <4 x float> [ zeroinitializer, %2 ], [ %19, %11 ] 110 %14 = phi <4 x i32> [ %10, %2 ], [ %17, %11 ] 111 %15 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %12) 112 %mask = tail call <4 x i1> @v8i1_to_v4i1(<8 x i1> %15) 113 %16 = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %14, i32 32, <4 x i1> %mask) 114 %17 = extractvalue { <4 x float>, <4 x i32> } %16, 1 115 %18 = extractvalue { <4 x float>, <4 x i32> } %16, 0 116 %19 = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %13, <4 x float> %18, <4 x i1> %mask, <4 x float> %13) 117 %20 = add nsw i32 %12, -4 118 %21 = icmp sgt i32 %12, 4 119 br i1 %21, label %11, label %22 120 12122: ; preds = %11 122 %23 = tail call i32 @vecAddAcrossF32Mve(<4 x float> %19) 123 %24 = sitofp i32 %23 to float 124 %25 = tail call float @llvm.fabs.f32(float %24) 125 ret float %25 126} 127 128define float @vctpi32(ptr %0, i32 %1) { 129; CHECK-LABEL: @vctpi32( 130; CHECK-NEXT: [[TMP3:%.*]] = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8) 131; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0 132; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP1:%.*]], -1 133; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP0:%.*]] to i32 134; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0 135; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], <i32 -32, i32 undef, i32 undef, i32 undef> 136; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> zeroinitializer 137; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP4]], [[TMP9]] 138; CHECK-NEXT: br label [[TMP11:%.*]] 139; CHECK: 11: 140; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP5]], [[TMP2:%.*]] ], [ [[TMP21:%.*]], [[TMP11]] ] 141; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x float> [ zeroinitializer, [[TMP2]] ], [ [[TMP19:%.*]], [[TMP11]] ] 142; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP10]], [[TMP2]] ], [ [[TMP17:%.*]], [[TMP11]] ] 143; CHECK-NEXT: [[TMP15:%.*]] = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP12]]) 144; CHECK-NEXT: [[TMP16:%.*]] = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> [[TMP14]], i32 32, <4 x i1> [[TMP15]]) 145; CHECK-NEXT: [[TMP17]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 1 146; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP16]], 0 147; CHECK-NEXT: [[TMP19]] = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[TMP13]], <4 x float> [[TMP18]], <4 x i1> [[TMP15]], <4 x float> [[TMP13]]) 148; CHECK-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP12]], 4 149; CHECK-NEXT: [[TMP21]] = add i32 [[TMP12]], -4 150; CHECK-NEXT: br i1 [[TMP20]], label [[TMP11]], label [[TMP22:%.*]] 151; CHECK: 22: 152; CHECK-NEXT: [[TMP23:%.*]] = tail call i32 @vecAddAcrossF32Mve(<4 x float> [[TMP19]]) 153; CHECK-NEXT: [[TMP24:%.*]] = sitofp i32 [[TMP23]] to float 154; CHECK-NEXT: [[TMP25:%.*]] = tail call float @llvm.fabs.f32(float [[TMP24]]) 155; CHECK-NEXT: ret float [[TMP25]] 156; 157 %3 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8) 158 %4 = extractvalue { <4 x i32>, i32 } %3, 0 159 %5 = add nsw i32 %1, -1 160 %6 = ptrtoint ptr %0 to i32 161 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 162 %8 = add <4 x i32> %7, <i32 -32, i32 undef, i32 undef, i32 undef> 163 %9 = shufflevector <4 x i32> %8, <4 x i32> undef, <4 x i32> zeroinitializer 164 %10 = add <4 x i32> %4, %9 165 br label %11 166 16711: ; preds = %11, %2 168 %12 = phi i32 [ %5, %2 ], [ %20, %11 ] 169 %13 = phi <4 x float> [ zeroinitializer, %2 ], [ %19, %11 ] 170 %14 = phi <4 x i32> [ %10, %2 ], [ %17, %11 ] 171 %15 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %12) 172 %16 = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %14, i32 32, <4 x i1> %15) 173 %17 = extractvalue { <4 x float>, <4 x i32> } %16, 1 174 %18 = extractvalue { <4 x float>, <4 x i32> } %16, 0 175 %19 = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %13, <4 x float> %18, <4 x i1> %15, <4 x float> %13) 176 %20 = add nsw i32 %12, -4 177 %21 = icmp sgt i32 %12, 4 178 br i1 %21, label %11, label %22 179 18022: ; preds = %11 181 %23 = tail call i32 @vecAddAcrossF32Mve(<4 x float> %19) 182 %24 = sitofp i32 %23 to float 183 %25 = tail call float @llvm.fabs.f32(float %24) 184 ret float %25 185} 186 187 188define float @vctpi64(ptr %0, i32 %1) { 189; CHECK-LABEL: @vctpi64( 190; CHECK-NEXT: [[TMP3:%.*]] = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8) 191; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0 192; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP1:%.*]], -1 193; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP0:%.*]] to i32 194; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0 195; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], <i32 -32, i32 undef, i32 undef, i32 undef> 196; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> zeroinitializer 197; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP4]], [[TMP9]] 198; CHECK-NEXT: br label [[TMP11:%.*]] 199; CHECK: 11: 200; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP5]], [[TMP2:%.*]] ], [ [[TMP23:%.*]], [[TMP11]] ] 201; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x float> [ zeroinitializer, [[TMP2]] ], [ [[TMP21:%.*]], [[TMP11]] ] 202; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP10]], [[TMP2]] ], [ [[TMP19:%.*]], [[TMP11]] ] 203; CHECK-NEXT: [[TMP15:%.*]] = call <2 x i1> @llvm.arm.mve.vctp64(i32 [[TMP12]]) 204; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v2i1(<2 x i1> [[TMP15]]) 205; CHECK-NEXT: [[TMP17:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP16]]) 206; CHECK-NEXT: [[TMP18:%.*]] = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> [[TMP14]], i32 32, <4 x i1> [[TMP17]]) 207; CHECK-NEXT: [[TMP19]] = extractvalue { <4 x float>, <4 x i32> } [[TMP18]], 1 208; CHECK-NEXT: [[TMP20:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP18]], 0 209; CHECK-NEXT: [[TMP21]] = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[TMP13]], <4 x float> [[TMP20]], <4 x i1> [[TMP17]], <4 x float> [[TMP13]]) 210; CHECK-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP12]], 4 211; CHECK-NEXT: [[TMP23]] = add i32 [[TMP12]], -4 212; CHECK-NEXT: br i1 [[TMP22]], label [[TMP11]], label [[TMP24:%.*]] 213; CHECK: 24: 214; CHECK-NEXT: [[TMP25:%.*]] = tail call i32 @vecAddAcrossF32Mve(<4 x float> [[TMP21]]) 215; CHECK-NEXT: [[TMP26:%.*]] = sitofp i32 [[TMP25]] to float 216; CHECK-NEXT: [[TMP27:%.*]] = tail call float @llvm.fabs.f32(float [[TMP26]]) 217; CHECK-NEXT: ret float [[TMP27]] 218; 219 %3 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8) 220 %4 = extractvalue { <4 x i32>, i32 } %3, 0 221 %5 = add nsw i32 %1, -1 222 %6 = ptrtoint ptr %0 to i32 223 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 224 %8 = add <4 x i32> %7, <i32 -32, i32 undef, i32 undef, i32 undef> 225 %9 = shufflevector <4 x i32> %8, <4 x i32> undef, <4 x i32> zeroinitializer 226 %10 = add <4 x i32> %4, %9 227 br label %11 228 22911: ; preds = %11, %2 230 %12 = phi i32 [ %5, %2 ], [ %20, %11 ] 231 %13 = phi <4 x float> [ zeroinitializer, %2 ], [ %19, %11 ] 232 %14 = phi <4 x i32> [ %10, %2 ], [ %17, %11 ] 233 %15 = tail call <4 x i1> @llvm.arm.mve.vctp64(i32 %12) 234 %16 = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %14, i32 32, <4 x i1> %15) 235 %17 = extractvalue { <4 x float>, <4 x i32> } %16, 1 236 %18 = extractvalue { <4 x float>, <4 x i32> } %16, 0 237 %19 = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %13, <4 x float> %18, <4 x i1> %15, <4 x float> %13) 238 %20 = add nsw i32 %12, -4 239 %21 = icmp sgt i32 %12, 4 240 br i1 %21, label %11, label %22 241 24222: ; preds = %11 243 %23 = tail call i32 @vecAddAcrossF32Mve(<4 x float> %19) 244 %24 = sitofp i32 %23 to float 245 %25 = tail call float @llvm.fabs.f32(float %24) 246 ret float %25 247} 248 249declare { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32, i32) 250declare <16 x i1> @llvm.arm.mve.vctp8(i32) 251declare <8 x i1> @llvm.arm.mve.vctp16(i32) 252declare <4 x i1> @llvm.arm.mve.vctp32(i32) 253declare <4 x i1> @llvm.arm.mve.vctp64(i32) 254declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>) 255declare <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) 256declare i32 @vecAddAcrossF32Mve(...) 257declare <4 x i1> @v8i1_to_v4i1(<8 x i1>) 258declare <4 x i1> @v16i1_to_v4i1(<16 x i1>) 259declare float @llvm.fabs.f32(float) 260