1; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp < %s -codegenprepare -S | FileCheck -check-prefix=CHECK %s 2 3define void @sink_add_mul(ptr %s1, i32 %x, ptr %d, i32 %n) { 4; CHECK-LABEL: @sink_add_mul( 5; CHECK: vector.ph: 6; CHECK-NOT: %{{.*}} = insertelement <4 x i32> undef, i32 %{{.*}}, i32 0 7; CHECK-NOT: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer 8; CHECK: vector.body: 9; CHECK: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 10; CHECK: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer 11; 12entry: 13 %cmp6 = icmp sgt i32 %n, 0 14 br i1 %cmp6, label %vector.ph, label %for.cond.cleanup 15 16vector.ph: ; preds = %for.body.preheader 17 %n.vec = and i32 %n, -4 18 %broadcast.splatinsert8 = insertelement <4 x i32> undef, i32 %x, i32 0 19 %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer 20 br label %vector.body 21 22vector.body: ; preds = %vector.body, %vector.ph 23 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 24 %0 = getelementptr inbounds i32, ptr %s1, i32 %index 25 %wide.load = load <4 x i32>, ptr %0, align 4 26 %1 = mul nsw <4 x i32> %wide.load, %broadcast.splat9 27 %2 = getelementptr inbounds i32, ptr %d, i32 %index 28 %wide.load10 = load <4 x i32>, ptr %2, align 4 29 %3 = add nsw <4 x i32> %wide.load10, %1 30 store <4 x i32> %3, ptr %2, align 4 31 %index.next = add i32 %index, 4 32 %4 = icmp eq i32 %index.next, %n.vec 33 br i1 %4, label %for.cond.cleanup, label %vector.body 34 35for.cond.cleanup: ; preds = %for.body, %middle.block, %entry 36 ret void 37} 38 39define void @sink_add_mul_multiple(ptr %s1, ptr %s2, i32 %x, ptr %d, ptr %d2, i32 %n) { 40; CHECK-LABEL: @sink_add_mul_multiple( 41; CHECK: vector.ph: 42; CHECK-NOT: %{{.*}} = insertelement <4 x i32> undef, i32 %{{.*}}, i32 0 43; CHECK-NOT: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer 44; CHECK: vector.body: 45; CHECK: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 %x, i32 0 46; CHECK: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer 47; CHECK: mul nsw <4 x i32> %wide.load, [[TMP3]] 48; CHECK: [[TMP2b:%.*]] = insertelement <4 x i32> undef, i32 %x, i32 0 49; CHECK: [[TMP3b:%.*]] = shufflevector <4 x i32> [[TMP2b]], <4 x i32> undef, <4 x i32> zeroinitializer 50; CHECK: mul nsw <4 x i32> %wide.load18, [[TMP3b]] 51; 52entry: 53 %cmp13 = icmp sgt i32 %n, 0 54 br i1 %cmp13, label %vector.ph, label %for.cond.cleanup 55 56vector.ph: ; preds = %for.body.preheader 57 %n.vec = and i32 %n, -4 58 %broadcast.splatinsert15 = insertelement <4 x i32> undef, i32 %x, i32 0 59 %broadcast.splat16 = shufflevector <4 x i32> %broadcast.splatinsert15, <4 x i32> undef, <4 x i32> zeroinitializer 60 br label %vector.body 61 62vector.body: ; preds = %vector.body, %vector.ph 63 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 64 %0 = getelementptr inbounds i32, ptr %s1, i32 %index 65 %wide.load = load <4 x i32>, ptr %0, align 4 66 %1 = mul nsw <4 x i32> %wide.load, %broadcast.splat16 67 %2 = getelementptr inbounds i32, ptr %d, i32 %index 68 %wide.load17 = load <4 x i32>, ptr %2, align 4 69 %3 = add nsw <4 x i32> %wide.load17, %1 70 store <4 x i32> %3, ptr %2, align 4 71 %4 = getelementptr inbounds i32, ptr %s2, i32 %index 72 %wide.load18 = load <4 x i32>, ptr %4, align 4 73 %5 = mul nsw <4 x i32> %wide.load18, %broadcast.splat16 74 %6 = getelementptr inbounds i32, ptr %d2, i32 %index 75 %wide.load19 = load <4 x i32>, ptr %6, align 4 76 %7 = add nsw <4 x i32> %wide.load19, %5 77 store <4 x i32> %7, ptr %6, align 4 78 %index.next = add i32 %index, 4 79 %8 = icmp eq i32 %index.next, %n.vec 80 br i1 %8, label %for.cond.cleanup, label %vector.body 81 82for.cond.cleanup: ; preds = %for.body, %middle.block, %entry 83 ret void 84} 85 86 87define void @sink_add_sub_unsinkable(ptr %s1, ptr %s2, i32 %x, ptr %d, ptr %d2, i32 %n) { 88; CHECK-LABEL: @sink_add_sub_unsinkable( 89; CHECK-NEXT: entry: 90; CHECK-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[N:%.*]], 0 91; CHECK-NEXT: br i1 [[CMP13]], label [[VECTOR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] 92; CHECK: vector.ph: 93; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N]], -4 94; CHECK-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 95; CHECK-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT15]], <4 x i32> undef, <4 x i32> zeroinitializer 96; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 97; 98entry: 99 %cmp13 = icmp sgt i32 %n, 0 100 br i1 %cmp13, label %vector.ph, label %for.cond.cleanup 101 102vector.ph: ; preds = %for.body.preheader 103 %n.vec = and i32 %n, -4 104 %broadcast.splatinsert15 = insertelement <4 x i32> undef, i32 %x, i32 0 105 %broadcast.splat16 = shufflevector <4 x i32> %broadcast.splatinsert15, <4 x i32> undef, <4 x i32> zeroinitializer 106 br label %vector.body 107 108vector.body: ; preds = %vector.body, %vector.ph 109 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 110 %0 = getelementptr inbounds i32, ptr %s1, i32 %index 111 %wide.load = load <4 x i32>, ptr %0, align 4 112 %1 = mul nsw <4 x i32> %wide.load, %broadcast.splat16 113 %2 = getelementptr inbounds i32, ptr %d, i32 %index 114 %wide.load17 = load <4 x i32>, ptr %2, align 4 115 %3 = add nsw <4 x i32> %wide.load17, %1 116 store <4 x i32> %3, ptr %2, align 4 117 %4 = getelementptr inbounds i32, ptr %s2, i32 %index 118 %wide.load18 = load <4 x i32>, ptr %4, align 4 119 %5 = sub nsw <4 x i32> %broadcast.splat16, %wide.load18 120 %6 = getelementptr inbounds i32, ptr %d2, i32 %index 121 %wide.load19 = load <4 x i32>, ptr %6, align 4 122 %7 = add nsw <4 x i32> %wide.load19, %5 123 store <4 x i32> %7, ptr %6, align 4 124 %index.next = add i32 %index, 4 125 %8 = icmp eq i32 %index.next, %n.vec 126 br i1 %8, label %for.cond.cleanup, label %vector.body 127 128for.cond.cleanup: ; preds = %for.body, %middle.block, %entry 129 ret void 130} 131 132define void @sink_sub(ptr %s1, i32 %x, ptr %d, i32 %n) { 133; CHECK-LABEL: @sink_sub( 134; CHECK: vector.ph: 135; CHECK-NOT: %{{.*}} = insertelement <4 x i32> undef, i32 %{{.*}}, i32 0 136; CHECK-NOT: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer 137; CHECK: vector.body: 138; CHECK: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 139; CHECK: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer 140; 141entry: 142 %cmp6 = icmp sgt i32 %n, 0 143 br i1 %cmp6, label %vector.ph, label %for.cond.cleanup 144 145vector.ph: ; preds = %for.body.preheader 146 %n.vec = and i32 %n, -4 147 %broadcast.splatinsert8 = insertelement <4 x i32> undef, i32 %x, i32 0 148 %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer 149 br label %vector.body 150 151vector.body: ; preds = %vector.body, %vector.ph 152 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 153 %0 = getelementptr inbounds i32, ptr %s1, i32 %index 154 %wide.load = load <4 x i32>, ptr %0, align 4 155 %1 = sub nsw <4 x i32> %wide.load, %broadcast.splat9 156 %2 = getelementptr inbounds i32, ptr %d, i32 %index 157 store <4 x i32> %1, ptr %2, align 4 158 %index.next = add i32 %index, 4 159 %3 = icmp eq i32 %index.next, %n.vec 160 br i1 %3, label %for.cond.cleanup, label %vector.body 161 162for.cond.cleanup: ; preds = %for.body, %middle.block, %entry 163 ret void 164} 165 166define void @sink_sub_unsinkable(ptr %s1, i32 %x, ptr %d, i32 %n) { 167entry: 168; CHECK-LABEL: @sink_sub_unsinkable( 169; CHECK: vector.ph: 170; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N]], -4 171; CHECK-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 172; CHECK-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT15]], <4 x i32> undef, <4 x i32> zeroinitializer 173; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 174; CHECK: vector.body: 175; CHECK-NOT: %{{.*}} = insertelement <4 x i32> undef, i32 %{{.*}}, i32 0 176; CHECK-NOT: %{{.*}} = shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer 177; 178 %cmp6 = icmp sgt i32 %n, 0 179 br i1 %cmp6, label %vector.ph, label %for.cond.cleanup 180 181vector.ph: ; preds = %for.body.preheader 182 %n.vec = and i32 %n, -4 183 %broadcast.splatinsert8 = insertelement <4 x i32> undef, i32 %x, i32 0 184 %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer 185 br label %vector.body 186 187vector.body: ; preds = %vector.body, %vector.ph 188 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 189 %0 = getelementptr inbounds i32, ptr %s1, i32 %index 190 %wide.load = load <4 x i32>, ptr %0, align 4 191 %1 = sub nsw <4 x i32> %broadcast.splat9, %wide.load 192 %2 = getelementptr inbounds i32, ptr %d, i32 %index 193 store <4 x i32> %1, ptr %2, align 4 194 %index.next = add i32 %index, 4 195 %3 = icmp eq i32 %index.next, %n.vec 196 br i1 %3, label %for.cond.cleanup, label %vector.body 197 198for.cond.cleanup: ; preds = %for.body, %middle.block, %entry 199 ret void 200} 201