1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 2; RUN: opt -p loop-vectorize -S %s | FileCheck %s 3 4target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" 5target triple = "x86_64-apple-macosx10.15.0" 6 7define void @test_free_instructions_feeding_geps_for_interleave_groups(ptr noalias %p.invar, ptr noalias %dst.1, ptr noalias %dst.2) { 8; CHECK-LABEL: define void @test_free_instructions_feeding_geps_for_interleave_groups( 9; CHECK-SAME: ptr noalias [[P_INVAR:%.*]], ptr noalias [[DST_1:%.*]], ptr noalias [[DST_2:%.*]]) { 10; CHECK-NEXT: [[ENTRY:.*]]: 11; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 12; CHECK: [[VECTOR_PH]]: 13; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 14; CHECK: [[VECTOR_BODY]]: 15; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 16; CHECK-NEXT: [[TMP39:%.*]] = add i64 [[INDEX]], 0 17; CHECK-NEXT: [[TMP40:%.*]] = load float, ptr [[P_INVAR]], align 4 18; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP40]], i64 0 19; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer 20; CHECK-NEXT: [[TMP41:%.*]] = shl i64 [[TMP39]], 2 21; CHECK-NEXT: [[TMP44:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[TMP41]] 22; CHECK-NEXT: [[TMP42:%.*]] = load float, ptr [[P_INVAR]], align 4 23; CHECK-NEXT: [[BROADCAST_SPLATINSERT27:%.*]] = insertelement <2 x float> poison, float [[TMP42]], i64 0 24; CHECK-NEXT: [[BROADCAST_SPLAT28:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT27]], <2 x float> poison, <2 x i32> zeroinitializer 25; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT28]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 26; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <4 x float> [[TMP46]], <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 27; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP47]], <8 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> 28; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC]], ptr [[TMP44]], align 4 29; CHECK-NEXT: [[TMP48:%.*]] = load float, ptr [[P_INVAR]], align 4 30; CHECK-NEXT: [[BROADCAST_SPLATINSERT29:%.*]] = insertelement <2 x float> poison, float [[TMP48]], i64 0 31; CHECK-NEXT: [[BROADCAST_SPLAT30:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT29]], <2 x float> poison, <2 x i32> zeroinitializer 32; CHECK-NEXT: [[TMP49:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[TMP41]] 33; CHECK-NEXT: [[BROADCAST_SPLAT36:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLAT30]], <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 34; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLAT36]], <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 35; CHECK-NEXT: [[INTERLEAVED_VEC31:%.*]] = shufflevector <8 x float> [[TMP51]], <8 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> 36; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC31]], ptr [[TMP49]], align 4 37; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 38; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 39; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 40; CHECK: [[MIDDLE_BLOCK]]: 41; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] 42; CHECK: [[SCALAR_PH]]: 43; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] 44; CHECK-NEXT: br label %[[LOOP:.*]] 45; CHECK: [[LOOP]]: 46; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] 47; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[P_INVAR]], align 4 48; CHECK-NEXT: [[IV_MUL:%.*]] = shl i64 [[IV]], 2 49; CHECK-NEXT: [[GEP_DST_19:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[IV_MUL]] 50; CHECK-NEXT: store float [[L_0]], ptr [[GEP_DST_19]], align 4 51; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[P_INVAR]], align 4 52; CHECK-NEXT: [[ADD_1:%.*]] = or disjoint i64 [[IV_MUL]], 1 53; CHECK-NEXT: [[GEP_DST_119:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[ADD_1]] 54; CHECK-NEXT: store float [[L_1]], ptr [[GEP_DST_119]], align 4 55; CHECK-NEXT: [[ADD_2:%.*]] = or disjoint i64 [[IV_MUL]], 2 56; CHECK-NEXT: [[GEP_DST_129:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[ADD_2]] 57; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP_DST_129]], align 4 58; CHECK-NEXT: [[ADD_3:%.*]] = or disjoint i64 [[IV_MUL]], 3 59; CHECK-NEXT: [[GEP_DST_140:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[ADD_3]] 60; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP_DST_140]], align 4 61; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[P_INVAR]], align 4 62; CHECK-NEXT: [[GEP_DST_247:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[IV_MUL]] 63; CHECK-NEXT: store float [[L_2]], ptr [[GEP_DST_247]], align 4 64; CHECK-NEXT: [[GEP_DST_255:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[ADD_1]] 65; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP_DST_255]], align 4 66; CHECK-NEXT: [[GEP_DST_265:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[ADD_2]] 67; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP_DST_265]], align 4 68; CHECK-NEXT: [[GEP_DST_276:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[ADD_3]] 69; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP_DST_276]], align 4 70; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 71; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 72; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] 73; CHECK: [[EXIT]]: 74; CHECK-NEXT: ret void 75; 76entry: 77 br label %loop 78 79loop: 80 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 81 %l.0 = load float, ptr %p.invar, align 4 82 %iv.mul = shl i64 %iv, 2 83 %gep.dst.19 = getelementptr float, ptr %dst.1, i64 %iv.mul 84 store float %l.0, ptr %gep.dst.19, align 4 85 %l.1 = load float, ptr %p.invar, align 4 86 %add.1 = or disjoint i64 %iv.mul, 1 87 %gep.dst.119 = getelementptr float, ptr %dst.1, i64 %add.1 88 store float %l.1, ptr %gep.dst.119, align 4 89 %add.2 = or disjoint i64 %iv.mul, 2 90 %gep.dst.129 = getelementptr float, ptr %dst.1, i64 %add.2 91 store float 0.000000e+00, ptr %gep.dst.129, align 4 92 %add.3 = or disjoint i64 %iv.mul, 3 93 %gep.dst.140 = getelementptr float, ptr %dst.1, i64 %add.3 94 store float 0.000000e+00, ptr %gep.dst.140, align 4 95 %l.2 = load float, ptr %p.invar, align 4 96 %gep.dst.247 = getelementptr float, ptr %dst.2, i64 %iv.mul 97 store float %l.2, ptr %gep.dst.247, align 4 98 %gep.dst.255 = getelementptr float, ptr %dst.2, i64 %add.1 99 store float 0.000000e+00, ptr %gep.dst.255, align 4 100 %gep.dst.265 = getelementptr float, ptr %dst.2, i64 %add.2 101 store float 0.000000e+00, ptr %gep.dst.265, align 4 102 %gep.dst.276 = getelementptr float, ptr %dst.2, i64 %add.3 103 store float 0.000000e+00, ptr %gep.dst.276, align 4 104 %iv.next = add i64 %iv, 1 105 %ec = icmp eq i64 %iv.next, 1024 106 br i1 %ec, label %exit, label %loop 107 108exit: 109 ret void 110} 111 112define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr %arg2) #0 { 113; CHECK-LABEL: define void @geps_feeding_interleave_groups_with_reuse( 114; CHECK-SAME: ptr [[ARG:%.*]], i64 [[ARG1:%.*]], ptr [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] { 115; CHECK-NEXT: [[ENTRY:.*]]: 116; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[ARG1]], 1 117; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 54 118; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] 119; CHECK: [[VECTOR_SCEVCHECK]]: 120; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ARG2]], i64 8 121; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]]) 122; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 123; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 124; CHECK-NEXT: [[TMP1:%.*]] = sub i64 0, [[MUL_RESULT]] 125; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] 126; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]] 127; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]] 128; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[ARG2]], i64 12 129; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]]) 130; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 131; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 132; CHECK-NEXT: [[TMP5:%.*]] = sub i64 0, [[MUL_RESULT3]] 133; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]] 134; CHECK-NEXT: [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP1]] 135; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW4]] 136; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[ARG2]], i64 4 137; CHECK-NEXT: [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]]) 138; CHECK-NEXT: [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0 139; CHECK-NEXT: [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1 140; CHECK-NEXT: [[TMP9:%.*]] = sub i64 0, [[MUL_RESULT7]] 141; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 [[MUL_RESULT7]] 142; CHECK-NEXT: [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[SCEVGEP5]] 143; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW8]] 144; CHECK-NEXT: [[MUL9:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]]) 145; CHECK-NEXT: [[MUL_RESULT10:%.*]] = extractvalue { i64, i1 } [[MUL9]], 0 146; CHECK-NEXT: [[MUL_OVERFLOW11:%.*]] = extractvalue { i64, i1 } [[MUL9]], 1 147; CHECK-NEXT: [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT10]] 148; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[MUL_RESULT10]] 149; CHECK-NEXT: [[TMP15:%.*]] = icmp ult ptr [[TMP14]], [[ARG2]] 150; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW11]] 151; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP4]], [[TMP8]] 152; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[TMP12]] 153; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP18]], [[TMP16]] 154; CHECK-NEXT: br i1 [[TMP19]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] 155; CHECK: [[VECTOR_MEMCHECK]]: 156; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[ARG1]], 4 157; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 16 158; CHECK-NEXT: [[SCEVGEP12:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[TMP21]] 159; CHECK-NEXT: [[TMP22:%.*]] = shl i64 [[ARG1]], 5 160; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], 32 161; CHECK-NEXT: [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[TMP23]] 162; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[ARG2]], [[SCEVGEP13]] 163; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[ARG]], [[SCEVGEP12]] 164; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 165; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] 166; CHECK: [[VECTOR_PH]]: 167; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2 168; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] 169; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 170; CHECK: [[VECTOR_BODY]]: 171; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 172; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[INDEX]], 0 173; CHECK-NEXT: [[TMP25:%.*]] = shl i64 [[TMP24]], 5 174; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[TMP25]] 175; CHECK-NEXT: [[TMP27:%.*]] = shl i64 [[TMP24]], 4 176; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[TMP27]] 177; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x float>, ptr [[TMP26]], align 4 178; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 0, i32 8> 179; CHECK-NEXT: [[STRIDED_VEC14:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 1, i32 9> 180; CHECK-NEXT: [[STRIDED_VEC15:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 2, i32 10> 181; CHECK-NEXT: [[STRIDED_VEC16:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 3, i32 11> 182; CHECK-NEXT: [[STRIDED_VEC17:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 4, i32 12> 183; CHECK-NEXT: [[STRIDED_VEC18:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 5, i32 13> 184; CHECK-NEXT: [[STRIDED_VEC19:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 6, i32 14> 185; CHECK-NEXT: [[STRIDED_VEC20:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 7, i32 15> 186; CHECK-NEXT: [[TMP30:%.*]] = fadd <2 x float> [[STRIDED_VEC]], [[STRIDED_VEC17]] 187; CHECK-NEXT: [[TMP31:%.*]] = fmul <2 x float> [[TMP30]], zeroinitializer 188; CHECK-NEXT: [[TMP32:%.*]] = fadd <2 x float> [[STRIDED_VEC14]], [[STRIDED_VEC18]] 189; CHECK-NEXT: [[TMP33:%.*]] = fmul <2 x float> [[TMP32]], zeroinitializer 190; CHECK-NEXT: [[TMP34:%.*]] = fadd <2 x float> [[STRIDED_VEC15]], [[STRIDED_VEC19]] 191; CHECK-NEXT: [[TMP35:%.*]] = fmul <2 x float> [[TMP34]], zeroinitializer 192; CHECK-NEXT: [[TMP36:%.*]] = fadd <2 x float> [[STRIDED_VEC16]], [[STRIDED_VEC20]] 193; CHECK-NEXT: [[TMP37:%.*]] = fmul <2 x float> [[TMP36]], zeroinitializer 194; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <2 x float> [[TMP31]], <2 x float> [[TMP33]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 195; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <2 x float> [[TMP35]], <2 x float> [[TMP37]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 196; CHECK-NEXT: [[TMP42:%.*]] = shufflevector <4 x float> [[TMP40]], <4 x float> [[TMP41]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 197; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP42]], <8 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> 198; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC]], ptr [[TMP28]], align 4 199; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 200; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 201; CHECK-NEXT: br i1 [[TMP43]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 202; CHECK: [[MIDDLE_BLOCK]]: 203; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] 204; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] 205; CHECK: [[SCALAR_PH]]: 206; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ] 207; CHECK-NEXT: br label %[[LOOP:.*]] 208; CHECK: [[LOOP]]: 209; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] 210; CHECK-NEXT: [[SHL_IV_5:%.*]] = shl i64 [[IV]], 5 211; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[SHL_IV_5]] 212; CHECK-NEXT: [[ADD_5:%.*]] = or disjoint i64 [[SHL_IV_5]], 16 213; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[ADD_5]] 214; CHECK-NEXT: [[SHL_IV_4:%.*]] = shl i64 [[IV]], 4 215; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[SHL_IV_4]] 216; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_1]], align 4 217; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_2]], align 4 218; CHECK-NEXT: [[ADD_1:%.*]] = fadd float [[L_1]], [[L_2]] 219; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[ADD_1]], 0.000000e+00 220; CHECK-NEXT: store float [[MUL_1]], ptr [[GEP_3]], align 4 221; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr i8, ptr [[GEP_1]], i64 4 222; CHECK-NEXT: [[L_3:%.*]] = load float, ptr [[GEP_4]], align 4 223; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr i8, ptr [[GEP_2]], i64 4 224; CHECK-NEXT: [[L_4:%.*]] = load float, ptr [[GEP_5]], align 4 225; CHECK-NEXT: [[ADD_2:%.*]] = fadd float [[L_3]], [[L_4]] 226; CHECK-NEXT: [[MUL_2:%.*]] = fmul float [[ADD_2]], 0.000000e+00 227; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr i8, ptr [[GEP_3]], i64 4 228; CHECK-NEXT: store float [[MUL_2]], ptr [[GEP_6]], align 4 229; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr i8, ptr [[GEP_1]], i64 8 230; CHECK-NEXT: [[L_5:%.*]] = load float, ptr [[GEP_7]], align 4 231; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr i8, ptr [[GEP_2]], i64 8 232; CHECK-NEXT: [[L_6:%.*]] = load float, ptr [[GEP_8]], align 4 233; CHECK-NEXT: [[ADD_3:%.*]] = fadd float [[L_5]], [[L_6]] 234; CHECK-NEXT: [[MUL_3:%.*]] = fmul float [[ADD_3]], 0.000000e+00 235; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr i8, ptr [[GEP_3]], i64 8 236; CHECK-NEXT: store float [[MUL_3]], ptr [[GEP_9]], align 4 237; CHECK-NEXT: [[I27:%.*]] = getelementptr i8, ptr [[GEP_1]], i64 12 238; CHECK-NEXT: [[L_7:%.*]] = load float, ptr [[I27]], align 4 239; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr i8, ptr [[GEP_2]], i64 12 240; CHECK-NEXT: [[L_8:%.*]] = load float, ptr [[GEP_10]], align 4 241; CHECK-NEXT: [[ADD_4:%.*]] = fadd float [[L_7]], [[L_8]] 242; CHECK-NEXT: [[MUL_4:%.*]] = fmul float [[ADD_4]], 0.000000e+00 243; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr i8, ptr [[GEP_3]], i64 12 244; CHECK-NEXT: store float [[MUL_4]], ptr [[GEP_11]], align 4 245; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 246; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[ARG1]] 247; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] 248; CHECK: [[EXIT]]: 249; CHECK-NEXT: ret void 250; 251entry: 252 br label %loop 253 254loop: 255 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 256 %shl.iv.5 = shl i64 %iv, 5 257 %gep.1 = getelementptr i8, ptr %arg, i64 %shl.iv.5 258 %add.5 = or disjoint i64 %shl.iv.5, 16 259 %gep.2 = getelementptr i8, ptr %arg, i64 %add.5 260 %shl.iv.4 = shl i64 %iv, 4 261 %gep.3 = getelementptr i8, ptr %arg2, i64 %shl.iv.4 262 %l.1 = load float, ptr %gep.1, align 4 263 %l.2 = load float, ptr %gep.2, align 4 264 %add.1 = fadd float %l.1, %l.2 265 %mul.1 = fmul float %add.1, 0.000000e+00 266 store float %mul.1, ptr %gep.3, align 4 267 %gep.4 = getelementptr i8, ptr %gep.1, i64 4 268 %l.3 = load float, ptr %gep.4, align 4 269 %gep.5 = getelementptr i8, ptr %gep.2, i64 4 270 %l.4 = load float, ptr %gep.5, align 4 271 %add.2 = fadd float %l.3, %l.4 272 %mul.2 = fmul float %add.2, 0.000000e+00 273 %gep.6 = getelementptr i8, ptr %gep.3, i64 4 274 store float %mul.2, ptr %gep.6, align 4 275 %gep.7 = getelementptr i8, ptr %gep.1, i64 8 276 %l.5 = load float, ptr %gep.7, align 4 277 %gep.8 = getelementptr i8, ptr %gep.2, i64 8 278 %l.6 = load float, ptr %gep.8, align 4 279 %add.3 = fadd float %l.5, %l.6 280 %mul.3 = fmul float %add.3, 0.000000e+00 281 %gep.9 = getelementptr i8, ptr %gep.3, i64 8 282 store float %mul.3, ptr %gep.9, align 4 283 %i27 = getelementptr i8, ptr %gep.1, i64 12 284 %l.7 = load float, ptr %i27, align 4 285 %gep.10 = getelementptr i8, ptr %gep.2, i64 12 286 %l.8 = load float, ptr %gep.10, align 4 287 %add.4 = fadd float %l.7, %l.8 288 %mul.4 = fmul float %add.4, 0.000000e+00 289 %gep.11 = getelementptr i8, ptr %gep.3, i64 12 290 store float %mul.4, ptr %gep.11, align 4 291 %iv.next = add i64 %iv, 1 292 %ec = icmp eq i64 %iv, %arg1 293 br i1 %ec, label %exit, label %loop 294 295exit: 296 ret void 297} 298 299define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) #1 { 300; CHECK-LABEL: define void @geps_feeding_interleave_groups_with_reuse2( 301; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] { 302; CHECK-NEXT: [[ENTRY:.*]]: 303; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[N]], 3 304; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1 305; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 52 306; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] 307; CHECK: [[VECTOR_SCEVCHECK]]: 308; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[N]], 3 309; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 24 310; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) 311; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 312; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 313; CHECK-NEXT: [[TMP3:%.*]] = sub i64 0, [[MUL_RESULT]] 314; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] 315; CHECK-NEXT: [[TMP5:%.*]] = icmp ult ptr [[TMP4]], [[SCEVGEP]] 316; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]] 317; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 28 318; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) 319; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 320; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 321; CHECK-NEXT: [[TMP7:%.*]] = sub i64 0, [[MUL_RESULT3]] 322; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]] 323; CHECK-NEXT: [[TMP9:%.*]] = icmp ult ptr [[TMP8]], [[SCEVGEP1]] 324; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW4]] 325; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 20 326; CHECK-NEXT: [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) 327; CHECK-NEXT: [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0 328; CHECK-NEXT: [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1 329; CHECK-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT7]] 330; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 [[MUL_RESULT7]] 331; CHECK-NEXT: [[TMP13:%.*]] = icmp ult ptr [[TMP12]], [[SCEVGEP5]] 332; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[MUL_OVERFLOW8]] 333; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr i8, ptr [[A]], i64 16 334; CHECK-NEXT: [[MUL10:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) 335; CHECK-NEXT: [[MUL_RESULT11:%.*]] = extractvalue { i64, i1 } [[MUL10]], 0 336; CHECK-NEXT: [[MUL_OVERFLOW12:%.*]] = extractvalue { i64, i1 } [[MUL10]], 1 337; CHECK-NEXT: [[TMP15:%.*]] = sub i64 0, [[MUL_RESULT11]] 338; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[SCEVGEP9]], i64 [[MUL_RESULT11]] 339; CHECK-NEXT: [[TMP17:%.*]] = icmp ult ptr [[TMP16]], [[SCEVGEP9]] 340; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW12]] 341; CHECK-NEXT: [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[A]], i64 12 342; CHECK-NEXT: [[MUL14:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) 343; CHECK-NEXT: [[MUL_RESULT15:%.*]] = extractvalue { i64, i1 } [[MUL14]], 0 344; CHECK-NEXT: [[MUL_OVERFLOW16:%.*]] = extractvalue { i64, i1 } [[MUL14]], 1 345; CHECK-NEXT: [[TMP19:%.*]] = sub i64 0, [[MUL_RESULT15]] 346; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[SCEVGEP13]], i64 [[MUL_RESULT15]] 347; CHECK-NEXT: [[TMP21:%.*]] = icmp ult ptr [[TMP20]], [[SCEVGEP13]] 348; CHECK-NEXT: [[TMP22:%.*]] = or i1 [[TMP21]], [[MUL_OVERFLOW16]] 349; CHECK-NEXT: [[SCEVGEP17:%.*]] = getelementptr i8, ptr [[A]], i64 8 350; CHECK-NEXT: [[MUL18:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) 351; CHECK-NEXT: [[MUL_RESULT19:%.*]] = extractvalue { i64, i1 } [[MUL18]], 0 352; CHECK-NEXT: [[MUL_OVERFLOW20:%.*]] = extractvalue { i64, i1 } [[MUL18]], 1 353; CHECK-NEXT: [[TMP23:%.*]] = sub i64 0, [[MUL_RESULT19]] 354; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[SCEVGEP17]], i64 [[MUL_RESULT19]] 355; CHECK-NEXT: [[TMP25:%.*]] = icmp ult ptr [[TMP24]], [[SCEVGEP17]] 356; CHECK-NEXT: [[TMP26:%.*]] = or i1 [[TMP25]], [[MUL_OVERFLOW20]] 357; CHECK-NEXT: [[SCEVGEP21:%.*]] = getelementptr i8, ptr [[A]], i64 4 358; CHECK-NEXT: [[MUL22:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) 359; CHECK-NEXT: [[MUL_RESULT23:%.*]] = extractvalue { i64, i1 } [[MUL22]], 0 360; CHECK-NEXT: [[MUL_OVERFLOW24:%.*]] = extractvalue { i64, i1 } [[MUL22]], 1 361; CHECK-NEXT: [[TMP27:%.*]] = sub i64 0, [[MUL_RESULT23]] 362; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[SCEVGEP21]], i64 [[MUL_RESULT23]] 363; CHECK-NEXT: [[TMP29:%.*]] = icmp ult ptr [[TMP28]], [[SCEVGEP21]] 364; CHECK-NEXT: [[TMP30:%.*]] = or i1 [[TMP29]], [[MUL_OVERFLOW24]] 365; CHECK-NEXT: [[MUL25:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) 366; CHECK-NEXT: [[MUL_RESULT26:%.*]] = extractvalue { i64, i1 } [[MUL25]], 0 367; CHECK-NEXT: [[MUL_OVERFLOW27:%.*]] = extractvalue { i64, i1 } [[MUL25]], 1 368; CHECK-NEXT: [[TMP31:%.*]] = sub i64 0, [[MUL_RESULT26]] 369; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[MUL_RESULT26]] 370; CHECK-NEXT: [[TMP33:%.*]] = icmp ult ptr [[TMP32]], [[A]] 371; CHECK-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[MUL_OVERFLOW27]] 372; CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP6]], [[TMP10]] 373; CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP14]] 374; CHECK-NEXT: [[TMP37:%.*]] = or i1 [[TMP36]], [[TMP18]] 375; CHECK-NEXT: [[TMP38:%.*]] = or i1 [[TMP37]], [[TMP22]] 376; CHECK-NEXT: [[TMP39:%.*]] = or i1 [[TMP38]], [[TMP26]] 377; CHECK-NEXT: [[TMP40:%.*]] = or i1 [[TMP39]], [[TMP30]] 378; CHECK-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP34]] 379; CHECK-NEXT: br i1 [[TMP41]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] 380; CHECK: [[VECTOR_MEMCHECK]]: 381; CHECK-NEXT: [[TMP42:%.*]] = lshr i64 [[N]], 3 382; CHECK-NEXT: [[TMP43:%.*]] = shl i64 [[TMP42]], 5 383; CHECK-NEXT: [[TMP44:%.*]] = add i64 [[TMP43]], 32 384; CHECK-NEXT: [[SCEVGEP28:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP44]] 385; CHECK-NEXT: [[TMP45:%.*]] = add nuw nsw i64 [[TMP43]], 4 386; CHECK-NEXT: [[SCEVGEP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP45]] 387; CHECK-NEXT: [[TMP46:%.*]] = shl i64 [[TMP42]], 4 388; CHECK-NEXT: [[TMP47:%.*]] = add nuw nsw i64 [[TMP46]], 8 389; CHECK-NEXT: [[SCEVGEP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP47]] 390; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP29]] 391; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP28]] 392; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 393; CHECK-NEXT: [[BOUND031:%.*]] = icmp ult ptr [[A]], [[SCEVGEP30]] 394; CHECK-NEXT: [[BOUND132:%.*]] = icmp ult ptr [[B]], [[SCEVGEP28]] 395; CHECK-NEXT: [[FOUND_CONFLICT33:%.*]] = and i1 [[BOUND031]], [[BOUND132]] 396; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT33]] 397; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] 398; CHECK: [[VECTOR_PH]]: 399; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 4 400; CHECK-NEXT: [[TMP48:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 401; CHECK-NEXT: [[TMP49:%.*]] = select i1 [[TMP48]], i64 4, i64 [[N_MOD_VF]] 402; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[TMP49]] 403; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 8 404; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 405; CHECK: [[VECTOR_BODY]]: 406; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 407; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 8, i64 16, i64 24>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] 408; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 409; CHECK-NEXT: [[TMP50:%.*]] = add i64 [[OFFSET_IDX]], 0 410; CHECK-NEXT: [[TMP51:%.*]] = lshr exact i64 [[TMP50]], 1 411; CHECK-NEXT: [[TMP52:%.*]] = getelementptr nusw i32, ptr [[B]], i64 [[TMP51]] 412; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP52]], align 4 413; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 414; CHECK-NEXT: [[STRIDED_VEC34:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 415; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP50]] 416; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[B]], <4 x i64> [[VEC_IND]] 417; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP54]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison), !alias.scope [[META6:![0-9]+]] 418; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 419; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC34]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 420; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_GATHER]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 421; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <8 x i32> [[TMP58]], <8 x i32> [[TMP59]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 422; CHECK-NEXT: [[TMP62:%.*]] = shufflevector <8 x i32> [[TMP60]], <8 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 423; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 424; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[TMP63]], <32 x i32> poison, <32 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 425; CHECK-NEXT: store <32 x i32> [[INTERLEAVED_VEC]], ptr [[TMP56]], align 4 426; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 427; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 32) 428; CHECK-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 429; CHECK-NEXT: br i1 [[TMP64]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 430; CHECK: [[MIDDLE_BLOCK]]: 431; CHECK-NEXT: br label %[[SCALAR_PH]] 432; CHECK: [[SCALAR_PH]]: 433; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ] 434; CHECK-NEXT: br label %[[LOOP:.*]] 435; CHECK: [[LOOP]]: 436; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT_7:%.*]], %[[LOOP]] ] 437; CHECK-NEXT: [[SHR_1:%.*]] = lshr exact i64 [[IV]], 1 438; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr nusw i32, ptr [[B]], i64 [[SHR_1]] 439; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_B]], align 4 440; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] 441; CHECK-NEXT: store i32 [[L]], ptr [[GEP_A]], align 4 442; CHECK-NEXT: [[IV_NEXT:%.*]] = or disjoint i64 [[IV]], 1 443; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT]] 444; CHECK-NEXT: store i32 0, ptr [[GEP_A_1]], align 4 445; CHECK-NEXT: [[IV_NEXT_1:%.*]] = or disjoint i64 [[IV]], 2 446; CHECK-NEXT: [[SHR_2:%.*]] = lshr exact i64 [[IV_NEXT_1]], 1 447; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr i32, ptr [[B]], i64 [[SHR_2]] 448; CHECK-NEXT: [[TMP65:%.*]] = load i32, ptr [[GEP_B_2]], align 4 449; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_1]] 450; CHECK-NEXT: store i32 [[TMP65]], ptr [[GEP_A_2]], align 4 451; CHECK-NEXT: [[IV_NEXT_2:%.*]] = or disjoint i64 [[IV]], 3 452; CHECK-NEXT: [[GEP_A_3:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_2]] 453; CHECK-NEXT: store i32 0, ptr [[GEP_A_3]], align 4 454; CHECK-NEXT: [[IV_NEXT_3:%.*]] = or disjoint i64 [[IV]], 4 455; CHECK-NEXT: [[GEP_B_4:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]] 456; CHECK-NEXT: [[TMP66:%.*]] = load i32, ptr [[GEP_B_4]], align 4 457; CHECK-NEXT: [[GEP_A_4:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_3]] 458; CHECK-NEXT: store i32 [[TMP66]], ptr [[GEP_A_4]], align 4 459; CHECK-NEXT: [[IV_NEXT_4:%.*]] = or disjoint i64 [[IV]], 5 460; CHECK-NEXT: [[GEP_A_5:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_4]] 461; CHECK-NEXT: store i32 0, ptr [[GEP_A_5]], align 4 462; CHECK-NEXT: [[IV_NEXT_5:%.*]] = or disjoint i64 [[IV]], 6 463; CHECK-NEXT: [[GEP_A_6:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_5]] 464; CHECK-NEXT: store i32 0, ptr [[GEP_A_6]], align 4 465; CHECK-NEXT: [[IV_NEXT_6:%.*]] = or disjoint i64 [[IV]], 7 466; CHECK-NEXT: [[GEP_A_7:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_6]] 467; CHECK-NEXT: store i32 0, ptr [[GEP_A_7]], align 4 468; CHECK-NEXT: [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8 469; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] 470; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] 471; CHECK: [[EXIT]]: 472; CHECK-NEXT: ret void 473; 474entry: 475 br label %loop 476 477loop: 478 %iv = phi i64 [ 0, %entry ], [ %iv.next.7, %loop ] 479 %shr.1 = lshr exact i64 %iv, 1 480 %gep.B = getelementptr nusw i32, ptr %B, i64 %shr.1 481 %l = load i32, ptr %gep.B, align 4 482 %gep.A = getelementptr i32, ptr %A, i64 %iv 483 store i32 %l, ptr %gep.A, align 4 484 %iv.next = or disjoint i64 %iv, 1 485 %gep.A.1 = getelementptr i32, ptr %A, i64 %iv.next 486 store i32 0, ptr %gep.A.1, align 4 487 %iv.next.1 = or disjoint i64 %iv, 2 488 %shr.2 = lshr exact i64 %iv.next.1, 1 489 %gep.B.2 = getelementptr i32, ptr %B, i64 %shr.2 490 %1 = load i32, ptr %gep.B.2, align 4 491 %gep.A.2 = getelementptr i32, ptr %A, i64 %iv.next.1 492 store i32 %1, ptr %gep.A.2, align 4 493 %iv.next.2 = or disjoint i64 %iv, 3 494 %gep.A.3 = getelementptr i32, ptr %A, i64 %iv.next.2 495 store i32 0, ptr %gep.A.3, align 4 496 %iv.next.3 = or disjoint i64 %iv, 4 497 %gep.B.4 = getelementptr i32, ptr %B, i64 %iv 498 %2 = load i32, ptr %gep.B.4, align 4 499 %gep.A.4 = getelementptr i32, ptr %A, i64 %iv.next.3 500 store i32 %2, ptr %gep.A.4, align 4 501 %iv.next.4 = or disjoint i64 %iv, 5 502 %gep.A.5 = getelementptr i32, ptr %A, i64 %iv.next.4 503 store i32 0, ptr %gep.A.5, align 4 504 %iv.next.5 = or disjoint i64 %iv, 6 505 %gep.A.6 = getelementptr i32, ptr %A, i64 %iv.next.5 506 store i32 0, ptr %gep.A.6, align 4 507 %iv.next.6 = or disjoint i64 %iv, 7 508 %gep.A.7 = getelementptr i32, ptr %A, i64 %iv.next.6 509 store i32 0, ptr %gep.A.7, align 4 510 %iv.next.7 = add nuw nsw i64 %iv, 8 511 %ec = icmp eq i64 %iv, %N 512 br i1 %ec, label %exit, label %loop 513 514exit: 515 ret void 516} 517 518; Test case for https://github.com/llvm/llvm-project/issues/112922. 519define void @interleave_store_double_i64(ptr %dst) { 520; CHECK-LABEL: define void @interleave_store_double_i64( 521; CHECK-SAME: ptr [[DST:%.*]]) { 522; CHECK-NEXT: [[ENTRY:.*]]: 523; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 524; CHECK: [[VECTOR_PH]]: 525; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 526; CHECK: [[VECTOR_BODY]]: 527; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 528; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] 529; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 530; CHECK-NEXT: [[TMP1:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[TMP0]] 531; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[VEC_IND]] to <2 x double> 532; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 533; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 534; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8 535; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) 536; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 537; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 538; CHECK: [[MIDDLE_BLOCK]]: 539; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] 540; CHECK: [[SCALAR_PH]]: 541; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] 542; CHECK-NEXT: br label %[[LOOP:.*]] 543; CHECK: [[LOOP]]: 544; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] 545; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]], i32 1 546; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_1]], align 8 547; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]] 548; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_0]], align 8 549; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 550; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 551; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP12:![0-9]+]] 552; CHECK: [[EXIT]]: 553; CHECK-NEXT: ret void 554; 555entry: 556 br label %loop 557 558loop: 559 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 560 %gep.1 = getelementptr { double, i64 }, ptr %dst, i64 %iv, i32 1 561 store i64 %iv, ptr %gep.1, align 8 562 %gep.0 = getelementptr { double, i64 }, ptr %dst, i64 %iv 563 store double 0.000000e+00, ptr %gep.0, align 8 564 %iv.next = add i64 %iv, 1 565 %ec = icmp eq i64 %iv, 1 566 br i1 %ec, label %exit, label %loop 567 568exit: 569 ret void 570} 571 572define void @interleave_store_i64_double(ptr %dst) { 573; CHECK-LABEL: define void @interleave_store_i64_double( 574; CHECK-SAME: ptr [[DST:%.*]]) { 575; CHECK-NEXT: [[ENTRY:.*]]: 576; CHECK-NEXT: br label %[[LOOP:.*]] 577; CHECK: [[LOOP]]: 578; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] 579; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]] 580; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_0]], align 8 581; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]], i32 1 582; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_1]], align 8 583; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 584; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 585; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] 586; CHECK: [[EXIT]]: 587; CHECK-NEXT: ret void 588; 589entry: 590 br label %loop 591 592loop: 593 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 594 %gep.0 = getelementptr { double, i64 }, ptr %dst, i64 %iv 595 store double 0.000000e+00, ptr %gep.0, align 8 596 %gep.1 = getelementptr { double, i64 }, ptr %dst, i64 %iv, i32 1 597 store i64 %iv, ptr %gep.1, align 8 598 %iv.next = add i64 %iv, 1 599 %ec = icmp eq i64 %iv, 1 600 br i1 %ec, label %exit, label %loop 601 602exit: 603 ret void 604} 605 606; TODO: The interleave group should likely have the same cost as @interleave_store_double_i64. 607define void @interleave_store_double_i64_2(ptr %dst) { 608; CHECK-LABEL: define void @interleave_store_double_i64_2( 609; CHECK-SAME: ptr [[DST:%.*]]) { 610; CHECK-NEXT: [[ENTRY:.*]]: 611; CHECK-NEXT: br label %[[LOOP:.*]] 612; CHECK: [[LOOP]]: 613; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] 614; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]], i32 1 615; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_1]], align 8 616; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]] 617; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_0]], align 8 618; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 619; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 620; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] 621; CHECK: [[EXIT]]: 622; CHECK-NEXT: ret void 623; 624entry: 625 br label %loop 626 627loop: 628 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 629 %gep.1 = getelementptr { i64, double }, ptr %dst, i64 %iv, i32 1 630 store double 0.000000e+00, ptr %gep.1, align 8 631 %gep.0 = getelementptr { i64, double }, ptr %dst, i64 %iv 632 store i64 %iv, ptr %gep.0, align 8 633 %iv.next = add i64 %iv, 1 634 %ec = icmp eq i64 %iv, 1 635 br i1 %ec, label %exit, label %loop 636 637exit: 638 ret void 639} 640 641define void @interleave_store_i64_double_2(ptr %dst) { 642; CHECK-LABEL: define void @interleave_store_i64_double_2( 643; CHECK-SAME: ptr [[DST:%.*]]) { 644; CHECK-NEXT: [[ENTRY:.*]]: 645; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 646; CHECK: [[VECTOR_PH]]: 647; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 648; CHECK: [[VECTOR_BODY]]: 649; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 650; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] 651; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 652; CHECK-NEXT: [[TMP1:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[TMP0]] 653; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[VEC_IND]] to <2 x double> 654; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 655; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 656; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8 657; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) 658; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 659; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 660; CHECK: [[MIDDLE_BLOCK]]: 661; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] 662; CHECK: [[SCALAR_PH]]: 663; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] 664; CHECK-NEXT: br label %[[LOOP:.*]] 665; CHECK: [[LOOP]]: 666; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] 667; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]] 668; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_0]], align 8 669; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]], i32 1 670; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_1]], align 8 671; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 672; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 673; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP14:![0-9]+]] 674; CHECK: [[EXIT]]: 675; CHECK-NEXT: ret void 676; 677entry: 678 br label %loop 679 680loop: 681 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 682 %gep.0 = getelementptr { i64, double }, ptr %dst, i64 %iv 683 store i64 %iv, ptr %gep.0, align 8 684 %gep.1 = getelementptr { i64, double }, ptr %dst, i64 %iv, i32 1 685 store double 0.000000e+00, ptr %gep.1, align 8 686 %iv.next = add i64 %iv, 1 687 %ec = icmp eq i64 %iv, 1 688 br i1 %ec, label %exit, label %loop 689 690exit: 691 ret void 692} 693 694 695 696attributes #0 = { "target-features"="+sse4.2" } 697attributes #1 = { "min-legal-vector-width"="0" "target-cpu"="cascadelake" } 698 699;. 700; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} 701; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} 702; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} 703; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} 704; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} 705; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} 706; CHECK: [[META6]] = !{[[META7:![0-9]+]]} 707; CHECK: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]} 708; CHECK: [[META8]] = distinct !{[[META8]], !"LVerDomain"} 709; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} 710; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]} 711; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]} 712; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META1]]} 713; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]} 714; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META2]], [[META1]]} 715;. 716