1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 2; RUN: opt -passes=loop-vectorize \ 3; RUN: -force-tail-folding-style=data-with-evl \ 4; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ 5; RUN: -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s --check-prefix=IF-EVL 6 7; RUN: opt -passes=loop-vectorize \ 8; RUN: -force-tail-folding-style=none \ 9; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ 10; RUN: -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s --check-prefix=NO-VP 11 12define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { 13; IF-EVL-LABEL: define void @vp_smax( 14; IF-EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { 15; IF-EVL-NEXT: [[ENTRY:.*]]: 16; IF-EVL-NEXT: [[C3:%.*]] = ptrtoint ptr [[C]] to i64 17; IF-EVL-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 18; IF-EVL-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 19; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]] 20; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 21; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 22; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 13, i64 [[TMP2]]) 23; IF-EVL-NEXT: [[TMP22:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]] 24; IF-EVL-NEXT: br i1 [[TMP22]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] 25; IF-EVL: [[VECTOR_MEMCHECK]]: 26; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 27; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 28; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4 29; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] 30; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]] 31; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4 32; IF-EVL-NEXT: [[TMP26:%.*]] = sub i64 [[A1]], [[C3]] 33; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]] 34; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 35; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] 36; IF-EVL: [[VECTOR_PH]]: 37; IF-EVL-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() 38; IF-EVL-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 4 39; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP28]], 1 40; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 41; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP28]] 42; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 43; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 44; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 45; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] 46; IF-EVL: [[VECTOR_BODY]]: 47; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 48; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] 49; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 50; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 51; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 52; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP10]] 53; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 54; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 55; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP10]] 56; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 57; IF-EVL-NEXT: [[VP_OP_LOAD5:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 58; IF-EVL-NEXT: [[TMP29:%.*]] = call <vscale x 4 x i32> @llvm.vp.smax.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD5]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 59; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] 60; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 61; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP29]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 62; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 63; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] 64; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 65; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 66; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] 67; IF-EVL: [[MIDDLE_BLOCK]]: 68; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] 69; IF-EVL: [[SCALAR_PH]]: 70; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] 71; IF-EVL-NEXT: br label %[[LOOP:.*]] 72; IF-EVL: [[LOOP]]: 73; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] 74; IF-EVL-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] 75; IF-EVL-NEXT: [[TMP20:%.*]] = load i32, ptr [[GEP]], align 4 76; IF-EVL-NEXT: [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] 77; IF-EVL-NEXT: [[TMP21:%.*]] = load i32, ptr [[GEP3]], align 4 78; IF-EVL-NEXT: [[DOT:%.*]] = tail call i32 @llvm.smax.i32(i32 [[TMP20]], i32 [[TMP21]]) 79; IF-EVL-NEXT: [[GEP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 80; IF-EVL-NEXT: store i32 [[DOT]], ptr [[GEP11]], align 4 81; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 82; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 83; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]] 84; IF-EVL: [[EXIT]]: 85; IF-EVL-NEXT: ret void 86; 87; NO-VP-LABEL: define void @vp_smax( 88; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { 89; NO-VP-NEXT: [[ENTRY:.*]]: 90; NO-VP-NEXT: br label %[[LOOP:.*]] 91; NO-VP: [[LOOP]]: 92; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] 93; NO-VP-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] 94; NO-VP-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP]], align 4 95; NO-VP-NEXT: [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] 96; NO-VP-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP3]], align 4 97; NO-VP-NEXT: [[DOT:%.*]] = tail call i32 @llvm.smax.i32(i32 [[TMP0]], i32 [[TMP1]]) 98; NO-VP-NEXT: [[GEP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 99; NO-VP-NEXT: store i32 [[DOT]], ptr [[GEP11]], align 4 100; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 101; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 102; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] 103; NO-VP: [[EXIT]]: 104; NO-VP-NEXT: ret void 105; 106 107entry: 108 br label %loop 109 110loop: 111 %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] 112 %gep = getelementptr inbounds i32, ptr %b, i64 %iv 113 %0 = load i32, ptr %gep, align 4 114 %gep3 = getelementptr inbounds i32, ptr %c, i64 %iv 115 %1 = load i32, ptr %gep3, align 4 116 %. = tail call i32 @llvm.smax.i32(i32 %0, i32 %1) 117 %gep11 = getelementptr inbounds i32, ptr %a, i64 %iv 118 store i32 %., ptr %gep11, align 4 119 %iv.next = add nuw nsw i64 %iv, 1 120 %exitcond.not = icmp eq i64 %iv.next, %N 121 br i1 %exitcond.not, label %exit, label %loop 122 123exit: 124 ret void 125} 126 127define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { 128; IF-EVL-LABEL: define void @vp_smin( 129; IF-EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 130; IF-EVL-NEXT: [[ENTRY:.*]]: 131; IF-EVL-NEXT: [[C3:%.*]] = ptrtoint ptr [[C]] to i64 132; IF-EVL-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 133; IF-EVL-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 134; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]] 135; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 136; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 137; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 13, i64 [[TMP2]]) 138; IF-EVL-NEXT: [[TMP22:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]] 139; IF-EVL-NEXT: br i1 [[TMP22]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] 140; IF-EVL: [[VECTOR_MEMCHECK]]: 141; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 142; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 143; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4 144; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] 145; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]] 146; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4 147; IF-EVL-NEXT: [[TMP26:%.*]] = sub i64 [[A1]], [[C3]] 148; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]] 149; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 150; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] 151; IF-EVL: [[VECTOR_PH]]: 152; IF-EVL-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() 153; IF-EVL-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 4 154; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP28]], 1 155; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 156; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP28]] 157; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 158; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 159; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 160; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] 161; IF-EVL: [[VECTOR_BODY]]: 162; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 163; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] 164; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 165; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 166; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 167; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP10]] 168; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 169; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 170; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP10]] 171; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 172; IF-EVL-NEXT: [[VP_OP_LOAD5:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 173; IF-EVL-NEXT: [[TMP29:%.*]] = call <vscale x 4 x i32> @llvm.vp.smin.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD5]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 174; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] 175; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 176; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP29]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 177; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 178; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] 179; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 180; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 181; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 182; IF-EVL: [[MIDDLE_BLOCK]]: 183; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] 184; IF-EVL: [[SCALAR_PH]]: 185; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] 186; IF-EVL-NEXT: br label %[[LOOP:.*]] 187; IF-EVL: [[LOOP]]: 188; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] 189; IF-EVL-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] 190; IF-EVL-NEXT: [[TMP20:%.*]] = load i32, ptr [[GEP]], align 4 191; IF-EVL-NEXT: [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] 192; IF-EVL-NEXT: [[TMP21:%.*]] = load i32, ptr [[GEP3]], align 4 193; IF-EVL-NEXT: [[DOT:%.*]] = tail call i32 @llvm.smin.i32(i32 [[TMP20]], i32 [[TMP21]]) 194; IF-EVL-NEXT: [[GEP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 195; IF-EVL-NEXT: store i32 [[DOT]], ptr [[GEP11]], align 4 196; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 197; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 198; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]] 199; IF-EVL: [[EXIT]]: 200; IF-EVL-NEXT: ret void 201; 202; NO-VP-LABEL: define void @vp_smin( 203; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 204; NO-VP-NEXT: [[ENTRY:.*]]: 205; NO-VP-NEXT: br label %[[LOOP:.*]] 206; NO-VP: [[LOOP]]: 207; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] 208; NO-VP-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] 209; NO-VP-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP]], align 4 210; NO-VP-NEXT: [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] 211; NO-VP-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP3]], align 4 212; NO-VP-NEXT: [[DOT:%.*]] = tail call i32 @llvm.smin.i32(i32 [[TMP0]], i32 [[TMP1]]) 213; NO-VP-NEXT: [[GEP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 214; NO-VP-NEXT: store i32 [[DOT]], ptr [[GEP11]], align 4 215; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 216; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 217; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] 218; NO-VP: [[EXIT]]: 219; NO-VP-NEXT: ret void 220; 221 222entry: 223 br label %loop 224 225loop: 226 %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] 227 %gep = getelementptr inbounds i32, ptr %b, i64 %iv 228 %0 = load i32, ptr %gep, align 4 229 %gep3 = getelementptr inbounds i32, ptr %c, i64 %iv 230 %1 = load i32, ptr %gep3, align 4 231 %. = tail call i32 @llvm.smin.i32(i32 %0, i32 %1) 232 %gep11 = getelementptr inbounds i32, ptr %a, i64 %iv 233 store i32 %., ptr %gep11, align 4 234 %iv.next = add nuw nsw i64 %iv, 1 235 %exitcond.not = icmp eq i64 %iv.next, %N 236 br i1 %exitcond.not, label %exit, label %loop 237 238exit: 239 ret void 240} 241 242define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { 243; IF-EVL-LABEL: define void @vp_umax( 244; IF-EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 245; IF-EVL-NEXT: [[ENTRY:.*]]: 246; IF-EVL-NEXT: [[C3:%.*]] = ptrtoint ptr [[C]] to i64 247; IF-EVL-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 248; IF-EVL-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 249; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]] 250; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 251; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 252; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 13, i64 [[TMP2]]) 253; IF-EVL-NEXT: [[TMP22:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]] 254; IF-EVL-NEXT: br i1 [[TMP22]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] 255; IF-EVL: [[VECTOR_MEMCHECK]]: 256; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 257; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 258; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4 259; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] 260; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]] 261; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4 262; IF-EVL-NEXT: [[TMP26:%.*]] = sub i64 [[A1]], [[C3]] 263; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]] 264; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 265; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] 266; IF-EVL: [[VECTOR_PH]]: 267; IF-EVL-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() 268; IF-EVL-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 4 269; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP28]], 1 270; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 271; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP28]] 272; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 273; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 274; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 275; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] 276; IF-EVL: [[VECTOR_BODY]]: 277; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 278; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] 279; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 280; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 281; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 282; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP10]] 283; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 284; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 285; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP10]] 286; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 287; IF-EVL-NEXT: [[VP_OP_LOAD5:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 288; IF-EVL-NEXT: [[TMP29:%.*]] = call <vscale x 4 x i32> @llvm.vp.umax.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD5]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 289; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] 290; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 291; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP29]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 292; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 293; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] 294; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 295; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 296; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 297; IF-EVL: [[MIDDLE_BLOCK]]: 298; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] 299; IF-EVL: [[SCALAR_PH]]: 300; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] 301; IF-EVL-NEXT: br label %[[LOOP:.*]] 302; IF-EVL: [[LOOP]]: 303; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] 304; IF-EVL-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] 305; IF-EVL-NEXT: [[TMP20:%.*]] = load i32, ptr [[GEP]], align 4 306; IF-EVL-NEXT: [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] 307; IF-EVL-NEXT: [[TMP21:%.*]] = load i32, ptr [[GEP3]], align 4 308; IF-EVL-NEXT: [[DOT:%.*]] = tail call i32 @llvm.umax.i32(i32 [[TMP20]], i32 [[TMP21]]) 309; IF-EVL-NEXT: [[GEP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 310; IF-EVL-NEXT: store i32 [[DOT]], ptr [[GEP11]], align 4 311; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 312; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 313; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]] 314; IF-EVL: [[EXIT]]: 315; IF-EVL-NEXT: ret void 316; 317; NO-VP-LABEL: define void @vp_umax( 318; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 319; NO-VP-NEXT: [[ENTRY:.*]]: 320; NO-VP-NEXT: br label %[[LOOP:.*]] 321; NO-VP: [[LOOP]]: 322; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] 323; NO-VP-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] 324; NO-VP-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP]], align 4 325; NO-VP-NEXT: [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] 326; NO-VP-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP3]], align 4 327; NO-VP-NEXT: [[DOT:%.*]] = tail call i32 @llvm.umax.i32(i32 [[TMP0]], i32 [[TMP1]]) 328; NO-VP-NEXT: [[GEP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 329; NO-VP-NEXT: store i32 [[DOT]], ptr [[GEP11]], align 4 330; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 331; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 332; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] 333; NO-VP: [[EXIT]]: 334; NO-VP-NEXT: ret void 335; 336 337entry: 338 br label %loop 339 340loop: 341 %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] 342 %gep = getelementptr inbounds i32, ptr %b, i64 %iv 343 %0 = load i32, ptr %gep, align 4 344 %gep3 = getelementptr inbounds i32, ptr %c, i64 %iv 345 %1 = load i32, ptr %gep3, align 4 346 %. = tail call i32 @llvm.umax.i32(i32 %0, i32 %1) 347 %gep11 = getelementptr inbounds i32, ptr %a, i64 %iv 348 store i32 %., ptr %gep11, align 4 349 %iv.next = add nuw nsw i64 %iv, 1 350 %exitcond.not = icmp eq i64 %iv.next, %N 351 br i1 %exitcond.not, label %exit, label %loop 352 353exit: 354 ret void 355} 356 357define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { 358; IF-EVL-LABEL: define void @vp_umin( 359; IF-EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 360; IF-EVL-NEXT: [[ENTRY:.*]]: 361; IF-EVL-NEXT: [[C3:%.*]] = ptrtoint ptr [[C]] to i64 362; IF-EVL-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 363; IF-EVL-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 364; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]] 365; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 366; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 367; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 13, i64 [[TMP2]]) 368; IF-EVL-NEXT: [[TMP22:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]] 369; IF-EVL-NEXT: br i1 [[TMP22]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] 370; IF-EVL: [[VECTOR_MEMCHECK]]: 371; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 372; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 373; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4 374; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] 375; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]] 376; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4 377; IF-EVL-NEXT: [[TMP26:%.*]] = sub i64 [[A1]], [[C3]] 378; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]] 379; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 380; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] 381; IF-EVL: [[VECTOR_PH]]: 382; IF-EVL-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() 383; IF-EVL-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 4 384; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP28]], 1 385; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 386; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP28]] 387; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 388; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 389; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 390; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] 391; IF-EVL: [[VECTOR_BODY]]: 392; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 393; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] 394; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 395; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 396; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 397; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP10]] 398; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 399; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 400; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP10]] 401; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 402; IF-EVL-NEXT: [[VP_OP_LOAD5:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 403; IF-EVL-NEXT: [[TMP29:%.*]] = call <vscale x 4 x i32> @llvm.vp.umin.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD5]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 404; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] 405; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 406; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP29]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 407; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 408; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] 409; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 410; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 411; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 412; IF-EVL: [[MIDDLE_BLOCK]]: 413; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] 414; IF-EVL: [[SCALAR_PH]]: 415; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] 416; IF-EVL-NEXT: br label %[[LOOP:.*]] 417; IF-EVL: [[LOOP]]: 418; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] 419; IF-EVL-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] 420; IF-EVL-NEXT: [[TMP20:%.*]] = load i32, ptr [[GEP]], align 4 421; IF-EVL-NEXT: [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] 422; IF-EVL-NEXT: [[TMP21:%.*]] = load i32, ptr [[GEP3]], align 4 423; IF-EVL-NEXT: [[DOT:%.*]] = tail call i32 @llvm.umin.i32(i32 [[TMP20]], i32 [[TMP21]]) 424; IF-EVL-NEXT: [[GEP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 425; IF-EVL-NEXT: store i32 [[DOT]], ptr [[GEP11]], align 4 426; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 427; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 428; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]] 429; IF-EVL: [[EXIT]]: 430; IF-EVL-NEXT: ret void 431; 432; NO-VP-LABEL: define void @vp_umin( 433; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 434; NO-VP-NEXT: [[ENTRY:.*]]: 435; NO-VP-NEXT: br label %[[LOOP:.*]] 436; NO-VP: [[LOOP]]: 437; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] 438; NO-VP-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] 439; NO-VP-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP]], align 4 440; NO-VP-NEXT: [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] 441; NO-VP-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP3]], align 4 442; NO-VP-NEXT: [[DOT:%.*]] = tail call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]]) 443; NO-VP-NEXT: [[GEP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 444; NO-VP-NEXT: store i32 [[DOT]], ptr [[GEP11]], align 4 445; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 446; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 447; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] 448; NO-VP: [[EXIT]]: 449; NO-VP-NEXT: ret void 450; 451 452entry: 453 br label %loop 454 455loop: 456 %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] 457 %gep = getelementptr inbounds i32, ptr %b, i64 %iv 458 %0 = load i32, ptr %gep, align 4 459 %gep3 = getelementptr inbounds i32, ptr %c, i64 %iv 460 %1 = load i32, ptr %gep3, align 4 461 %. = tail call i32 @llvm.umin.i32(i32 %0, i32 %1) 462 %gep11 = getelementptr inbounds i32, ptr %a, i64 %iv 463 store i32 %., ptr %gep11, align 4 464 %iv.next = add nuw nsw i64 %iv, 1 465 %exitcond.not = icmp eq i64 %iv.next, %N 466 br i1 %exitcond.not, label %exit, label %loop 467 468exit: 469 ret void 470} 471 472 473define void @vp_ctlz(ptr %a, ptr %b, i64 %N) { 474; IF-EVL-LABEL: define void @vp_ctlz( 475; IF-EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 476; IF-EVL-NEXT: [[ENTRY:.*]]: 477; IF-EVL-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 478; IF-EVL-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 479; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]] 480; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 481; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 482; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 483; IF-EVL-NEXT: br i1 [[TMP3]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] 484; IF-EVL: [[VECTOR_MEMCHECK]]: 485; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 486; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 487; IF-EVL-NEXT: [[TMP20:%.*]] = mul i64 [[TMP5]], 4 488; IF-EVL-NEXT: [[TMP21:%.*]] = sub i64 [[A1]], [[B2]] 489; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP21]], [[TMP20]] 490; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] 491; IF-EVL: [[VECTOR_PH]]: 492; IF-EVL-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() 493; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4 494; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP23]], 1 495; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 496; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP23]] 497; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 498; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 499; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 500; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] 501; IF-EVL: [[VECTOR_BODY]]: 502; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 503; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] 504; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 505; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 506; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 507; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP10]] 508; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 509; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 510; IF-EVL-NEXT: [[TMP24:%.*]] = call <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], i1 true, <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 511; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] 512; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 513; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP24]], ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 514; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 515; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] 516; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 517; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 518; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 519; IF-EVL: [[MIDDLE_BLOCK]]: 520; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] 521; IF-EVL: [[SCALAR_PH]]: 522; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] 523; IF-EVL-NEXT: br label %[[LOOP:.*]] 524; IF-EVL: [[LOOP]]: 525; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] 526; IF-EVL-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] 527; IF-EVL-NEXT: [[TMP18:%.*]] = load i32, ptr [[GEP]], align 4 528; IF-EVL-NEXT: [[TMP19:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP18]], i1 true) 529; IF-EVL-NEXT: [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 530; IF-EVL-NEXT: store i32 [[TMP19]], ptr [[GEP3]], align 4 531; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 532; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 533; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]] 534; IF-EVL: [[EXIT]]: 535; IF-EVL-NEXT: ret void 536; 537; NO-VP-LABEL: define void @vp_ctlz( 538; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 539; NO-VP-NEXT: [[ENTRY:.*]]: 540; NO-VP-NEXT: br label %[[LOOP:.*]] 541; NO-VP: [[LOOP]]: 542; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] 543; NO-VP-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] 544; NO-VP-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP]], align 4 545; NO-VP-NEXT: [[TMP1:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 true) 546; NO-VP-NEXT: [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 547; NO-VP-NEXT: store i32 [[TMP1]], ptr [[GEP3]], align 4 548; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 549; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 550; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] 551; NO-VP: [[EXIT]]: 552; NO-VP-NEXT: ret void 553; 554 555entry: 556 br label %loop 557 558loop: 559 %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] 560 %gep = getelementptr inbounds i32, ptr %b, i64 %iv 561 %0 = load i32, ptr %gep, align 4 562 %1 = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 %0, i1 true) 563 %gep3 = getelementptr inbounds i32, ptr %a, i64 %iv 564 store i32 %1, ptr %gep3, align 4 565 %iv.next = add nuw nsw i64 %iv, 1 566 %exitcond.not = icmp eq i64 %iv.next, %N 567 br i1 %exitcond.not, label %exit, label %loop 568 569exit: 570 ret void 571} 572 573define void @vp_cttz(ptr %a, ptr %b, i64 %N) { 574; IF-EVL-LABEL: define void @vp_cttz( 575; IF-EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 576; IF-EVL-NEXT: [[ENTRY:.*]]: 577; IF-EVL-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 578; IF-EVL-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 579; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]] 580; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 581; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 582; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 583; IF-EVL-NEXT: br i1 [[TMP3]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] 584; IF-EVL: [[VECTOR_MEMCHECK]]: 585; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 586; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 587; IF-EVL-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 588; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[A1]], [[B2]] 589; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP7]], [[TMP6]] 590; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] 591; IF-EVL: [[VECTOR_PH]]: 592; IF-EVL-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 593; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 594; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 [[TMP9]], 1 595; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP10]] 596; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP9]] 597; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 598; IF-EVL-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() 599; IF-EVL-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 4 600; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] 601; IF-EVL: [[VECTOR_BODY]]: 602; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 603; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] 604; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 605; IF-EVL-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 606; IF-EVL-NEXT: [[TMP14:%.*]] = add i64 [[EVL_BASED_IV]], 0 607; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP14]] 608; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0 609; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP13]]) 610; IF-EVL-NEXT: [[TMP17:%.*]] = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], i1 true, <vscale x 4 x i1> splat (i1 true), i32 [[TMP13]]) 611; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]] 612; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0 613; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP17]], ptr align 4 [[TMP19]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP13]]) 614; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP13]] to i64 615; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]] 616; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP12]] 617; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 618; IF-EVL-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 619; IF-EVL: [[MIDDLE_BLOCK]]: 620; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] 621; IF-EVL: [[SCALAR_PH]]: 622; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] 623; IF-EVL-NEXT: br label %[[LOOP:.*]] 624; IF-EVL: [[LOOP]]: 625; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] 626; IF-EVL-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] 627; IF-EVL-NEXT: [[TMP22:%.*]] = load i32, ptr [[GEP]], align 4 628; IF-EVL-NEXT: [[TMP23:%.*]] = tail call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP22]], i1 true) 629; IF-EVL-NEXT: [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 630; IF-EVL-NEXT: store i32 [[TMP23]], ptr [[GEP3]], align 4 631; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 632; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 633; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]] 634; IF-EVL: [[EXIT]]: 635; IF-EVL-NEXT: ret void 636; 637; NO-VP-LABEL: define void @vp_cttz( 638; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 639; NO-VP-NEXT: [[ENTRY:.*]]: 640; NO-VP-NEXT: br label %[[LOOP:.*]] 641; NO-VP: [[LOOP]]: 642; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] 643; NO-VP-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] 644; NO-VP-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP]], align 4 645; NO-VP-NEXT: [[TMP1:%.*]] = tail call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP0]], i1 true) 646; NO-VP-NEXT: [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 647; NO-VP-NEXT: store i32 [[TMP1]], ptr [[GEP3]], align 4 648; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 649; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 650; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] 651; NO-VP: [[EXIT]]: 652; NO-VP-NEXT: ret void 653; 654 655entry: 656 br label %loop 657 658loop: 659 %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] 660 %gep = getelementptr inbounds i32, ptr %b, i64 %iv 661 %0 = load i32, ptr %gep, align 4 662 %1 = tail call range(i32 0, 33) i32 @llvm.cttz.i32(i32 %0, i1 true) 663 %gep3 = getelementptr inbounds i32, ptr %a, i64 %iv 664 store i32 %1, ptr %gep3, align 4 665 %iv.next = add nuw nsw i64 %iv, 1 666 %exitcond.not = icmp eq i64 %iv.next, %N 667 br i1 %exitcond.not, label %exit, label %loop 668 669exit: 670 ret void 671} 672 673define void @vp_lrint(ptr %a, ptr %b, i64 %N) { 674; IF-EVL-LABEL: define void @vp_lrint( 675; IF-EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 676; IF-EVL-NEXT: [[ENTRY:.*]]: 677; IF-EVL-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 678; IF-EVL-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 679; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]] 680; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 681; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 682; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 9, i64 [[TMP2]]) 683; IF-EVL-NEXT: [[TMP22:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]] 684; IF-EVL-NEXT: br i1 [[TMP22]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] 685; IF-EVL: [[VECTOR_MEMCHECK]]: 686; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 687; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 688; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4 689; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] 690; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]] 691; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] 692; IF-EVL: [[VECTOR_PH]]: 693; IF-EVL-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() 694; IF-EVL-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 4 695; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP26]], 1 696; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 697; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP26]] 698; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 699; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 700; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 701; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] 702; IF-EVL: [[VECTOR_BODY]]: 703; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 704; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] 705; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 706; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 707; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 708; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP10]] 709; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0 710; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 711; IF-EVL-NEXT: [[TMP13:%.*]] = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 712; IF-EVL-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.vp.lrint.nxv4i64.nxv4f64(<vscale x 4 x double> [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 713; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 4 x i32> @llvm.vp.trunc.nxv4i32.nxv4i64(<vscale x 4 x i64> [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 714; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] 715; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 716; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP15]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 717; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 718; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] 719; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 720; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 721; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 722; IF-EVL: [[MIDDLE_BLOCK]]: 723; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] 724; IF-EVL: [[SCALAR_PH]]: 725; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] 726; IF-EVL-NEXT: br label %[[LOOP:.*]] 727; IF-EVL: [[LOOP]]: 728; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] 729; IF-EVL-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 730; IF-EVL-NEXT: [[TMP20:%.*]] = load float, ptr [[GEP]], align 4 731; IF-EVL-NEXT: [[CONV2:%.*]] = fpext float [[TMP20]] to double 732; IF-EVL-NEXT: [[TMP21:%.*]] = tail call i64 @llvm.lrint.i64.f64(double [[CONV2]]) 733; IF-EVL-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP21]] to i32 734; IF-EVL-NEXT: [[GEP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 735; IF-EVL-NEXT: store i32 [[CONV3]], ptr [[GEP5]], align 4 736; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 737; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 738; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]] 739; IF-EVL: [[EXIT]]: 740; IF-EVL-NEXT: ret void 741; 742; NO-VP-LABEL: define void @vp_lrint( 743; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 744; NO-VP-NEXT: [[ENTRY:.*]]: 745; NO-VP-NEXT: br label %[[LOOP:.*]] 746; NO-VP: [[LOOP]]: 747; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] 748; NO-VP-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 749; NO-VP-NEXT: [[TMP0:%.*]] = load float, ptr [[GEP]], align 4 750; NO-VP-NEXT: [[CONV2:%.*]] = fpext float [[TMP0]] to double 751; NO-VP-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.lrint.i64.f64(double [[CONV2]]) 752; NO-VP-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP1]] to i32 753; NO-VP-NEXT: [[GEP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 754; NO-VP-NEXT: store i32 [[CONV3]], ptr [[GEP5]], align 4 755; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 756; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 757; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] 758; NO-VP: [[EXIT]]: 759; NO-VP-NEXT: ret void 760; 761 762entry: 763 br label %loop 764 765loop: 766 %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] 767 %gep = getelementptr inbounds float, ptr %b, i64 %iv 768 %0 = load float, ptr %gep, align 4 769 %conv2 = fpext float %0 to double 770 %1 = tail call i64 @llvm.lrint.i64.f64(double %conv2) 771 %conv3 = trunc i64 %1 to i32 772 %gep5 = getelementptr inbounds i32, ptr %a, i64 %iv 773 store i32 %conv3, ptr %gep5, align 4 774 %iv.next = add nuw nsw i64 %iv, 1 775 %exitcond.not = icmp eq i64 %iv.next, %N 776 br i1 %exitcond.not, label %exit, label %loop 777 778exit: 779 ret void 780} 781 782define void @vp_llrint(ptr %a, ptr %b, i64 %N) { 783; IF-EVL-LABEL: define void @vp_llrint( 784; IF-EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 785; IF-EVL-NEXT: [[ENTRY:.*]]: 786; IF-EVL-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 787; IF-EVL-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 788; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]] 789; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 790; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 791; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 9, i64 [[TMP2]]) 792; IF-EVL-NEXT: [[TMP22:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]] 793; IF-EVL-NEXT: br i1 [[TMP22]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] 794; IF-EVL: [[VECTOR_MEMCHECK]]: 795; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 796; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 797; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4 798; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] 799; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]] 800; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] 801; IF-EVL: [[VECTOR_PH]]: 802; IF-EVL-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() 803; IF-EVL-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 4 804; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP26]], 1 805; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 806; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP26]] 807; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 808; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 809; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 810; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] 811; IF-EVL: [[VECTOR_BODY]]: 812; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 813; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] 814; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 815; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 816; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 817; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP10]] 818; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0 819; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 820; IF-EVL-NEXT: [[TMP13:%.*]] = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 821; IF-EVL-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.vp.llrint.nxv4i64.nxv4f64(<vscale x 4 x double> [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 822; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 4 x i32> @llvm.vp.trunc.nxv4i32.nxv4i64(<vscale x 4 x i64> [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 823; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] 824; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 825; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP15]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 826; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 827; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] 828; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 829; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 830; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 831; IF-EVL: [[MIDDLE_BLOCK]]: 832; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] 833; IF-EVL: [[SCALAR_PH]]: 834; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] 835; IF-EVL-NEXT: br label %[[LOOP:.*]] 836; IF-EVL: [[LOOP]]: 837; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] 838; IF-EVL-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 839; IF-EVL-NEXT: [[TMP20:%.*]] = load float, ptr [[GEP]], align 4 840; IF-EVL-NEXT: [[CONV2:%.*]] = fpext float [[TMP20]] to double 841; IF-EVL-NEXT: [[TMP21:%.*]] = tail call i64 @llvm.llrint.i64.f64(double [[CONV2]]) 842; IF-EVL-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP21]] to i32 843; IF-EVL-NEXT: [[GEP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 844; IF-EVL-NEXT: store i32 [[CONV3]], ptr [[GEP5]], align 4 845; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 846; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 847; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]] 848; IF-EVL: [[EXIT]]: 849; IF-EVL-NEXT: ret void 850; 851; NO-VP-LABEL: define void @vp_llrint( 852; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 853; NO-VP-NEXT: [[ENTRY:.*]]: 854; NO-VP-NEXT: br label %[[LOOP:.*]] 855; NO-VP: [[LOOP]]: 856; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] 857; NO-VP-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 858; NO-VP-NEXT: [[TMP0:%.*]] = load float, ptr [[GEP]], align 4 859; NO-VP-NEXT: [[CONV2:%.*]] = fpext float [[TMP0]] to double 860; NO-VP-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.llrint.i64.f64(double [[CONV2]]) 861; NO-VP-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP1]] to i32 862; NO-VP-NEXT: [[GEP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 863; NO-VP-NEXT: store i32 [[CONV3]], ptr [[GEP5]], align 4 864; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 865; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 866; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] 867; NO-VP: [[EXIT]]: 868; NO-VP-NEXT: ret void 869; 870 871entry: 872 br label %loop 873 874loop: 875 %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] 876 %gep = getelementptr inbounds float, ptr %b, i64 %iv 877 %0 = load float, ptr %gep, align 4 878 %conv2 = fpext float %0 to double 879 %1 = tail call i64 @llvm.llrint.i64.f64(double %conv2) 880 %conv3 = trunc i64 %1 to i32 881 %gep5 = getelementptr inbounds i32, ptr %a, i64 %iv 882 store i32 %conv3, ptr %gep5, align 4 883 %iv.next = add nuw nsw i64 %iv, 1 884 %exitcond.not = icmp eq i64 %iv.next, %N 885 br i1 %exitcond.not, label %exit, label %loop 886 887exit: 888 ret void 889} 890 891define void @vp_abs(ptr %a, ptr %b, i64 %N) { 892; IF-EVL-LABEL: define void @vp_abs( 893; IF-EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 894; IF-EVL-NEXT: [[ENTRY:.*]]: 895; IF-EVL-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64 896; IF-EVL-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 897; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]] 898; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 899; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 900; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP2]]) 901; IF-EVL-NEXT: [[TMP19:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]] 902; IF-EVL-NEXT: br i1 [[TMP19]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] 903; IF-EVL: [[VECTOR_MEMCHECK]]: 904; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 905; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 906; IF-EVL-NEXT: [[TMP20:%.*]] = mul i64 [[TMP5]], 4 907; IF-EVL-NEXT: [[TMP21:%.*]] = sub i64 [[A1]], [[B2]] 908; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP21]], [[TMP20]] 909; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] 910; IF-EVL: [[VECTOR_PH]]: 911; IF-EVL-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() 912; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4 913; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP23]], 1 914; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 915; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP23]] 916; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 917; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 918; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 919; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] 920; IF-EVL: [[VECTOR_BODY]]: 921; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 922; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] 923; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 924; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) 925; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 926; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP10]] 927; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 928; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 929; IF-EVL-NEXT: [[TMP24:%.*]] = call <vscale x 4 x i32> @llvm.vp.abs.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], i1 true, <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 930; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] 931; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 932; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP24]], ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) 933; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 934; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] 935; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 936; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 937; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 938; IF-EVL: [[MIDDLE_BLOCK]]: 939; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] 940; IF-EVL: [[SCALAR_PH]]: 941; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] 942; IF-EVL-NEXT: br label %[[LOOP:.*]] 943; IF-EVL: [[LOOP]]: 944; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] 945; IF-EVL-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] 946; IF-EVL-NEXT: [[TMP18:%.*]] = load i32, ptr [[GEP]], align 4 947; IF-EVL-NEXT: [[COND:%.*]] = tail call i32 @llvm.abs.i32(i32 [[TMP18]], i1 true) 948; IF-EVL-NEXT: [[GEP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 949; IF-EVL-NEXT: store i32 [[COND]], ptr [[GEP9]], align 4 950; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 951; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 952; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]] 953; IF-EVL: [[EXIT]]: 954; IF-EVL-NEXT: ret void 955; 956; NO-VP-LABEL: define void @vp_abs( 957; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 958; NO-VP-NEXT: [[ENTRY:.*]]: 959; NO-VP-NEXT: br label %[[LOOP:.*]] 960; NO-VP: [[LOOP]]: 961; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] 962; NO-VP-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] 963; NO-VP-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP]], align 4 964; NO-VP-NEXT: [[COND:%.*]] = tail call i32 @llvm.abs.i32(i32 [[TMP0]], i1 true) 965; NO-VP-NEXT: [[GEP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 966; NO-VP-NEXT: store i32 [[COND]], ptr [[GEP9]], align 4 967; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 968; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 969; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] 970; NO-VP: [[EXIT]]: 971; NO-VP-NEXT: ret void 972; 973 974entry: 975 br label %loop 976 977loop: 978 %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] 979 %gep = getelementptr inbounds i32, ptr %b, i64 %iv 980 %0 = load i32, ptr %gep, align 4 981 %cond = tail call i32 @llvm.abs.i32(i32 %0, i1 true) 982 %gep9 = getelementptr inbounds i32, ptr %a, i64 %iv 983 store i32 %cond, ptr %gep9, align 4 984 %iv.next = add nuw nsw i64 %iv, 1 985 %exitcond.not = icmp eq i64 %iv.next, %N 986 br i1 %exitcond.not, label %exit, label %loop 987 988exit: 989 ret void 990} 991 992; There's no @llvm.vp.log10, so don't transform it. 993define void @log10(ptr %a, ptr %b, i64 %N) { 994; IF-EVL-LABEL: define void @log10( 995; IF-EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 996; IF-EVL-NEXT: [[ENTRY:.*]]: 997; IF-EVL-NEXT: br label %[[LOOP:.*]] 998; IF-EVL: [[LOOP]]: 999; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] 1000; IF-EVL-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 1001; IF-EVL-NEXT: [[TMP0:%.*]] = load float, ptr [[GEP]], align 4 1002; IF-EVL-NEXT: [[COND:%.*]] = tail call float @llvm.log10.f32(float [[TMP0]]) 1003; IF-EVL-NEXT: [[GEP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1004; IF-EVL-NEXT: store float [[COND]], ptr [[GEP9]], align 4 1005; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1006; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1007; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] 1008; IF-EVL: [[EXIT]]: 1009; IF-EVL-NEXT: ret void 1010; 1011; NO-VP-LABEL: define void @log10( 1012; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 1013; NO-VP-NEXT: [[ENTRY:.*]]: 1014; NO-VP-NEXT: br label %[[LOOP:.*]] 1015; NO-VP: [[LOOP]]: 1016; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] 1017; NO-VP-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] 1018; NO-VP-NEXT: [[TMP0:%.*]] = load float, ptr [[GEP]], align 4 1019; NO-VP-NEXT: [[COND:%.*]] = tail call float @llvm.log10.f32(float [[TMP0]]) 1020; NO-VP-NEXT: [[GEP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1021; NO-VP-NEXT: store float [[COND]], ptr [[GEP9]], align 4 1022; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1023; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1024; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] 1025; NO-VP: [[EXIT]]: 1026; NO-VP-NEXT: ret void 1027; 1028 1029entry: 1030 br label %loop 1031 1032loop: 1033 %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] 1034 %gep = getelementptr inbounds float, ptr %b, i64 %iv 1035 %0 = load float, ptr %gep, align 4 1036 %cond = tail call float @llvm.log10.f32(float %0) 1037 %gep9 = getelementptr inbounds float, ptr %a, i64 %iv 1038 store float %cond, ptr %gep9, align 4 1039 %iv.next = add nuw nsw i64 %iv, 1 1040 %exitcond.not = icmp eq i64 %iv.next, %N 1041 br i1 %exitcond.not, label %exit, label %loop 1042 1043exit: 1044 ret void 1045} 1046 1047 1048declare i32 @llvm.smax.i32(i32, i32) 1049declare i32 @llvm.smin.i32(i32, i32) 1050declare i32 @llvm.umax.i32(i32, i32) 1051declare i32 @llvm.umin.i32(i32, i32) 1052declare i32 @llvm.ctlz.i32(i32, i1 immarg) 1053declare i32 @llvm.cttz.i32(i32, i1 immarg) 1054declare i64 @llvm.lrint.i64.f64(double) 1055declare i64 @llvm.llrint.i64.f64(double) 1056declare i32 @llvm.abs.i32(i32, i1 immarg) 1057