1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 2; RUN: opt -S -O3 < %s | FileCheck %s 3 4; Check unrolling / SLP vectorization where the order of lanes is important for 5; producing efficient shuffles. The shuffles should be regular and cheap for 6; AArch64. [0 2 4 6] and [1 3 5 7] will produce uzp1/uzp2 instruction. The 7; v16i32 shuffles will be legalized to individual v4i32. 8 9target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" 10target triple = "aarch64" 11 12; Function Attrs: nounwind uwtable 13define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32 noundef %ip2) #0 { 14; CHECK-LABEL: define range(i32 0, 65536) i32 @slpordering 15; CHECK-SAME: (ptr noundef readonly captures(none) [[P1:%.*]], i32 noundef [[IP1:%.*]], ptr noundef readonly captures(none) [[P2:%.*]], i32 noundef [[IP2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { 16; CHECK-NEXT: entry: 17; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[IP1]] to i64 18; CHECK-NEXT: [[IDX_EXT63:%.*]] = sext i32 [[IP2]] to i64 19; CHECK-NEXT: [[RRRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 4 20; CHECK-NEXT: [[RRRAYIDX5:%.*]] = getelementptr inbounds nuw i8, ptr [[P2]], i64 4 21; CHECK-NEXT: [[RDD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]] 22; CHECK-NEXT: [[RDD_PTR64:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT63]] 23; CHECK-NEXT: [[RRRAYIDX3_1:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR]], i64 4 24; CHECK-NEXT: [[RRRAYIDX5_1:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR64]], i64 4 25; CHECK-NEXT: [[RDD_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[RDD_PTR]], i64 [[IDX_EXT]] 26; CHECK-NEXT: [[RDD_PTR64_1:%.*]] = getelementptr inbounds i8, ptr [[RDD_PTR64]], i64 [[IDX_EXT63]] 27; CHECK-NEXT: [[RRRAYIDX3_2:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR_1]], i64 4 28; CHECK-NEXT: [[RRRAYIDX5_2:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR64_1]], i64 4 29; CHECK-NEXT: [[RDD_PTR_2:%.*]] = getelementptr inbounds i8, ptr [[RDD_PTR_1]], i64 [[IDX_EXT]] 30; CHECK-NEXT: [[RDD_PTR64_2:%.*]] = getelementptr inbounds i8, ptr [[RDD_PTR64_1]], i64 [[IDX_EXT63]] 31; CHECK-NEXT: [[RRRAYIDX3_3:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR_2]], i64 4 32; CHECK-NEXT: [[RRRAYIDX5_3:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR64_2]], i64 4 33; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[P1]], align 1, !tbaa [[TBAA0:![0-9]+]] 34; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[P2]], align 1, !tbaa [[TBAA0]] 35; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3]], align 1, !tbaa [[TBAA0]] 36; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5]], align 1, !tbaa [[TBAA0]] 37; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[RDD_PTR]], align 1, !tbaa [[TBAA0]] 38; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i8>, ptr [[RDD_PTR64]], align 1, !tbaa [[TBAA0]] 39; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3_1]], align 1, !tbaa [[TBAA0]] 40; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5_1]], align 1, !tbaa [[TBAA0]] 41; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i8>, ptr [[RDD_PTR_1]], align 1, !tbaa [[TBAA0]] 42; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[RDD_PTR64_1]], align 1, !tbaa [[TBAA0]] 43; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3_2]], align 1, !tbaa [[TBAA0]] 44; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5_2]], align 1, !tbaa [[TBAA0]] 45; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[RDD_PTR_2]], align 1, !tbaa [[TBAA0]] 46; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 47; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP8]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 48; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP13]], <16 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison> 49; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 50; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x i8> [[TMP15]], <16 x i8> [[TMP16]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> 51; CHECK-NEXT: [[TMP18:%.*]] = zext <16 x i8> [[TMP17]] to <16 x i32> 52; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i8>, ptr [[RDD_PTR64_2]], align 1, !tbaa [[TBAA0]] 53; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 54; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 55; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <16 x i8> [[TMP20]], <16 x i8> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison> 56; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x i8> [[TMP19]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 57; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> [[TMP23]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> 58; CHECK-NEXT: [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i32> 59; CHECK-NEXT: [[TMP26:%.*]] = sub nsw <16 x i32> [[TMP18]], [[TMP25]] 60; CHECK-NEXT: [[TMP27:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3_3]], align 1, !tbaa [[TBAA0]] 61; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 62; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <4 x i8> [[TMP10]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 63; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <16 x i8> [[TMP28]], <16 x i8> [[TMP29]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison> 64; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP27]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 65; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <16 x i8> [[TMP30]], <16 x i8> [[TMP31]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> 66; CHECK-NEXT: [[TMP33:%.*]] = zext <16 x i8> [[TMP32]] to <16 x i32> 67; CHECK-NEXT: [[TMP34:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5_3]], align 1, !tbaa [[TBAA0]] 68; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> [[TMP7]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 69; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 70; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <16 x i8> [[TMP35]], <16 x i8> [[TMP36]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison> 71; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <4 x i8> [[TMP34]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 72; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <16 x i8> [[TMP37]], <16 x i8> [[TMP38]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> 73; CHECK-NEXT: [[TMP40:%.*]] = zext <16 x i8> [[TMP39]] to <16 x i32> 74; CHECK-NEXT: [[TMP41:%.*]] = sub nsw <16 x i32> [[TMP33]], [[TMP40]] 75; CHECK-NEXT: [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], splat (i32 16) 76; CHECK-NEXT: [[TMP43:%.*]] = add nsw <16 x i32> [[TMP42]], [[TMP26]] 77; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 78; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 79; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 80; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 81; CHECK-NEXT: [[TMP48:%.*]] = add nsw <16 x i32> [[TMP45]], [[TMP47]] 82; CHECK-NEXT: [[TMP49:%.*]] = sub nsw <16 x i32> [[TMP44]], [[TMP46]] 83; CHECK-NEXT: [[TMP50:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 84; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 85; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 86; CHECK-NEXT: [[TMP53:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 87; CHECK-NEXT: [[TMP54:%.*]] = add nsw <16 x i32> [[TMP51]], [[TMP53]] 88; CHECK-NEXT: [[TMP55:%.*]] = sub nsw <16 x i32> [[TMP50]], [[TMP52]] 89; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 90; CHECK-NEXT: [[TMP57:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 91; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 92; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 93; CHECK-NEXT: [[TMP60:%.*]] = sub nsw <16 x i32> [[TMP57]], [[TMP59]] 94; CHECK-NEXT: [[TMP61:%.*]] = add nsw <16 x i32> [[TMP56]], [[TMP58]] 95; CHECK-NEXT: [[TMP62:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 96; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 97; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 98; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 99; CHECK-NEXT: [[TMP66:%.*]] = add nsw <16 x i32> [[TMP63]], [[TMP65]] 100; CHECK-NEXT: [[TMP67:%.*]] = sub nsw <16 x i32> [[TMP62]], [[TMP64]] 101; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 102; CHECK-NEXT: [[TMP69:%.*]] = lshr <16 x i32> [[TMP68]], splat (i32 15) 103; CHECK-NEXT: [[TMP70:%.*]] = and <16 x i32> [[TMP69]], splat (i32 65537) 104; CHECK-NEXT: [[TMP71:%.*]] = mul nuw <16 x i32> [[TMP70]], splat (i32 65535) 105; CHECK-NEXT: [[TMP72:%.*]] = add <16 x i32> [[TMP71]], [[TMP68]] 106; CHECK-NEXT: [[TMP73:%.*]] = xor <16 x i32> [[TMP72]], [[TMP71]] 107; CHECK-NEXT: [[TMP74:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP73]]) 108; CHECK-NEXT: [[CONV118:%.*]] = and i32 [[TMP74]], 65535 109; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP74]], 16 110; CHECK-NEXT: [[RDD119:%.*]] = add nuw nsw i32 [[CONV118]], [[SHR]] 111; CHECK-NEXT: [[SHR120:%.*]] = lshr i32 [[RDD119]], 1 112; CHECK-NEXT: ret i32 [[SHR120]] 113; 114entry: 115 %p1.addr = alloca ptr, align 8 116 %ip1.addr = alloca i32, align 4 117 %p2.addr = alloca ptr, align 8 118 %ip2.addr = alloca i32, align 4 119 %emp = alloca [4 x [4 x i32]], align 4 120 %r0 = alloca i32, align 4 121 %r1 = alloca i32, align 4 122 %r2 = alloca i32, align 4 123 %r3 = alloca i32, align 4 124 %sum = alloca i32, align 4 125 %i = alloca i32, align 4 126 %e0 = alloca i32, align 4 127 %e1 = alloca i32, align 4 128 %e2 = alloca i32, align 4 129 %e3 = alloca i32, align 4 130 %i65 = alloca i32, align 4 131 %e071 = alloca i32, align 4 132 %e179 = alloca i32, align 4 133 %e287 = alloca i32, align 4 134 %e395 = alloca i32, align 4 135 store ptr %p1, ptr %p1.addr, align 8, !tbaa !4 136 store i32 %ip1, ptr %ip1.addr, align 4, !tbaa !8 137 store ptr %p2, ptr %p2.addr, align 8, !tbaa !4 138 store i32 %ip2, ptr %ip2.addr, align 4, !tbaa !8 139 call void @llvm.lifetime.start.p0(i64 64, ptr %emp) #2 140 call void @llvm.lifetime.start.p0(i64 4, ptr %r0) #2 141 call void @llvm.lifetime.start.p0(i64 4, ptr %r1) #2 142 call void @llvm.lifetime.start.p0(i64 4, ptr %r2) #2 143 call void @llvm.lifetime.start.p0(i64 4, ptr %r3) #2 144 call void @llvm.lifetime.start.p0(i64 4, ptr %sum) #2 145 store i32 0, ptr %sum, align 4, !tbaa !8 146 call void @llvm.lifetime.start.p0(i64 4, ptr %i) #2 147 store i32 0, ptr %i, align 4, !tbaa !8 148 br label %for.cond 149 150for.cond: ; preds = %for.inc, %entry 151 %0 = load i32, ptr %i, align 4, !tbaa !8 152 %cmp = icmp slt i32 %0, 4 153 br i1 %cmp, label %for.body, label %for.cond.cleanup 154 155for.cond.cleanup: ; preds = %for.cond 156 call void @llvm.lifetime.end.p0(i64 4, ptr %i) #2 157 br label %for.end 158 159for.body: ; preds = %for.cond 160 %1 = load ptr, ptr %p1.addr, align 8, !tbaa !4 161 %rrrayidx = getelementptr inbounds i8, ptr %1, i64 0 162 %2 = load i8, ptr %rrrayidx, align 1, !tbaa !10 163 %conv = zext i8 %2 to i32 164 %3 = load ptr, ptr %p2.addr, align 8, !tbaa !4 165 %rrrayidx1 = getelementptr inbounds i8, ptr %3, i64 0 166 %4 = load i8, ptr %rrrayidx1, align 1, !tbaa !10 167 %conv2 = zext i8 %4 to i32 168 %sub = sub nsw i32 %conv, %conv2 169 %5 = load ptr, ptr %p1.addr, align 8, !tbaa !4 170 %rrrayidx3 = getelementptr inbounds i8, ptr %5, i64 4 171 %6 = load i8, ptr %rrrayidx3, align 1, !tbaa !10 172 %conv4 = zext i8 %6 to i32 173 %7 = load ptr, ptr %p2.addr, align 8, !tbaa !4 174 %rrrayidx5 = getelementptr inbounds i8, ptr %7, i64 4 175 %8 = load i8, ptr %rrrayidx5, align 1, !tbaa !10 176 %conv6 = zext i8 %8 to i32 177 %sub7 = sub nsw i32 %conv4, %conv6 178 %shl = shl i32 %sub7, 16 179 %rdd = add nsw i32 %sub, %shl 180 store i32 %rdd, ptr %r0, align 4, !tbaa !8 181 %9 = load ptr, ptr %p1.addr, align 8, !tbaa !4 182 %rrrayidx8 = getelementptr inbounds i8, ptr %9, i64 1 183 %10 = load i8, ptr %rrrayidx8, align 1, !tbaa !10 184 %conv9 = zext i8 %10 to i32 185 %11 = load ptr, ptr %p2.addr, align 8, !tbaa !4 186 %rrrayidx10 = getelementptr inbounds i8, ptr %11, i64 1 187 %12 = load i8, ptr %rrrayidx10, align 1, !tbaa !10 188 %conv11 = zext i8 %12 to i32 189 %sub12 = sub nsw i32 %conv9, %conv11 190 %13 = load ptr, ptr %p1.addr, align 8, !tbaa !4 191 %rrrayidx13 = getelementptr inbounds i8, ptr %13, i64 5 192 %14 = load i8, ptr %rrrayidx13, align 1, !tbaa !10 193 %conv14 = zext i8 %14 to i32 194 %15 = load ptr, ptr %p2.addr, align 8, !tbaa !4 195 %rrrayidx15 = getelementptr inbounds i8, ptr %15, i64 5 196 %16 = load i8, ptr %rrrayidx15, align 1, !tbaa !10 197 %conv16 = zext i8 %16 to i32 198 %sub17 = sub nsw i32 %conv14, %conv16 199 %shl18 = shl i32 %sub17, 16 200 %rdd19 = add nsw i32 %sub12, %shl18 201 store i32 %rdd19, ptr %r1, align 4, !tbaa !8 202 %17 = load ptr, ptr %p1.addr, align 8, !tbaa !4 203 %rrrayidx20 = getelementptr inbounds i8, ptr %17, i64 2 204 %18 = load i8, ptr %rrrayidx20, align 1, !tbaa !10 205 %conv21 = zext i8 %18 to i32 206 %19 = load ptr, ptr %p2.addr, align 8, !tbaa !4 207 %rrrayidx22 = getelementptr inbounds i8, ptr %19, i64 2 208 %20 = load i8, ptr %rrrayidx22, align 1, !tbaa !10 209 %conv23 = zext i8 %20 to i32 210 %sub24 = sub nsw i32 %conv21, %conv23 211 %21 = load ptr, ptr %p1.addr, align 8, !tbaa !4 212 %rrrayidx25 = getelementptr inbounds i8, ptr %21, i64 6 213 %22 = load i8, ptr %rrrayidx25, align 1, !tbaa !10 214 %conv26 = zext i8 %22 to i32 215 %23 = load ptr, ptr %p2.addr, align 8, !tbaa !4 216 %rrrayidx27 = getelementptr inbounds i8, ptr %23, i64 6 217 %24 = load i8, ptr %rrrayidx27, align 1, !tbaa !10 218 %conv28 = zext i8 %24 to i32 219 %sub29 = sub nsw i32 %conv26, %conv28 220 %shl30 = shl i32 %sub29, 16 221 %rdd31 = add nsw i32 %sub24, %shl30 222 store i32 %rdd31, ptr %r2, align 4, !tbaa !8 223 %25 = load ptr, ptr %p1.addr, align 8, !tbaa !4 224 %rrrayidx32 = getelementptr inbounds i8, ptr %25, i64 3 225 %26 = load i8, ptr %rrrayidx32, align 1, !tbaa !10 226 %conv33 = zext i8 %26 to i32 227 %27 = load ptr, ptr %p2.addr, align 8, !tbaa !4 228 %rrrayidx34 = getelementptr inbounds i8, ptr %27, i64 3 229 %28 = load i8, ptr %rrrayidx34, align 1, !tbaa !10 230 %conv35 = zext i8 %28 to i32 231 %sub36 = sub nsw i32 %conv33, %conv35 232 %29 = load ptr, ptr %p1.addr, align 8, !tbaa !4 233 %rrrayidx37 = getelementptr inbounds i8, ptr %29, i64 7 234 %30 = load i8, ptr %rrrayidx37, align 1, !tbaa !10 235 %conv38 = zext i8 %30 to i32 236 %31 = load ptr, ptr %p2.addr, align 8, !tbaa !4 237 %rrrayidx39 = getelementptr inbounds i8, ptr %31, i64 7 238 %32 = load i8, ptr %rrrayidx39, align 1, !tbaa !10 239 %conv40 = zext i8 %32 to i32 240 %sub41 = sub nsw i32 %conv38, %conv40 241 %shl42 = shl i32 %sub41, 16 242 %rdd43 = add nsw i32 %sub36, %shl42 243 store i32 %rdd43, ptr %r3, align 4, !tbaa !8 244 call void @llvm.lifetime.start.p0(i64 4, ptr %e0) #2 245 %33 = load i32, ptr %r0, align 4, !tbaa !8 246 %34 = load i32, ptr %r1, align 4, !tbaa !8 247 %rdd44 = add i32 %33, %34 248 store i32 %rdd44, ptr %e0, align 4, !tbaa !8 249 call void @llvm.lifetime.start.p0(i64 4, ptr %e1) #2 250 %35 = load i32, ptr %r0, align 4, !tbaa !8 251 %36 = load i32, ptr %r1, align 4, !tbaa !8 252 %sub45 = sub i32 %35, %36 253 store i32 %sub45, ptr %e1, align 4, !tbaa !8 254 call void @llvm.lifetime.start.p0(i64 4, ptr %e2) #2 255 %37 = load i32, ptr %r2, align 4, !tbaa !8 256 %38 = load i32, ptr %r3, align 4, !tbaa !8 257 %rdd46 = add i32 %37, %38 258 store i32 %rdd46, ptr %e2, align 4, !tbaa !8 259 call void @llvm.lifetime.start.p0(i64 4, ptr %e3) #2 260 %39 = load i32, ptr %r2, align 4, !tbaa !8 261 %40 = load i32, ptr %r3, align 4, !tbaa !8 262 %sub47 = sub i32 %39, %40 263 store i32 %sub47, ptr %e3, align 4, !tbaa !8 264 %41 = load i32, ptr %e0, align 4, !tbaa !8 265 %42 = load i32, ptr %e2, align 4, !tbaa !8 266 %rdd48 = add nsw i32 %41, %42 267 %43 = load i32, ptr %i, align 4, !tbaa !8 268 %idxprom = sext i32 %43 to i64 269 %rrrayidx49 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 %idxprom 270 %rrrayidx50 = getelementptr inbounds [4 x i32], ptr %rrrayidx49, i64 0, i64 0 271 store i32 %rdd48, ptr %rrrayidx50, align 4, !tbaa !8 272 %44 = load i32, ptr %e0, align 4, !tbaa !8 273 %45 = load i32, ptr %e2, align 4, !tbaa !8 274 %sub51 = sub nsw i32 %44, %45 275 %46 = load i32, ptr %i, align 4, !tbaa !8 276 %idxprom52 = sext i32 %46 to i64 277 %rrrayidx53 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 %idxprom52 278 %rrrayidx54 = getelementptr inbounds [4 x i32], ptr %rrrayidx53, i64 0, i64 2 279 store i32 %sub51, ptr %rrrayidx54, align 4, !tbaa !8 280 %47 = load i32, ptr %e1, align 4, !tbaa !8 281 %48 = load i32, ptr %e3, align 4, !tbaa !8 282 %rdd55 = add nsw i32 %47, %48 283 %49 = load i32, ptr %i, align 4, !tbaa !8 284 %idxprom56 = sext i32 %49 to i64 285 %rrrayidx57 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 %idxprom56 286 %rrrayidx58 = getelementptr inbounds [4 x i32], ptr %rrrayidx57, i64 0, i64 1 287 store i32 %rdd55, ptr %rrrayidx58, align 4, !tbaa !8 288 %50 = load i32, ptr %e1, align 4, !tbaa !8 289 %51 = load i32, ptr %e3, align 4, !tbaa !8 290 %sub59 = sub nsw i32 %50, %51 291 %52 = load i32, ptr %i, align 4, !tbaa !8 292 %idxprom60 = sext i32 %52 to i64 293 %rrrayidx61 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 %idxprom60 294 %rrrayidx62 = getelementptr inbounds [4 x i32], ptr %rrrayidx61, i64 0, i64 3 295 store i32 %sub59, ptr %rrrayidx62, align 4, !tbaa !8 296 call void @llvm.lifetime.end.p0(i64 4, ptr %e3) #2 297 call void @llvm.lifetime.end.p0(i64 4, ptr %e2) #2 298 call void @llvm.lifetime.end.p0(i64 4, ptr %e1) #2 299 call void @llvm.lifetime.end.p0(i64 4, ptr %e0) #2 300 br label %for.inc 301 302for.inc: ; preds = %for.body 303 %53 = load i32, ptr %i, align 4, !tbaa !8 304 %inc = add nsw i32 %53, 1 305 store i32 %inc, ptr %i, align 4, !tbaa !8 306 %54 = load i32, ptr %ip1.addr, align 4, !tbaa !8 307 %55 = load ptr, ptr %p1.addr, align 8, !tbaa !4 308 %idx.ext = sext i32 %54 to i64 309 %rdd.ptr = getelementptr inbounds i8, ptr %55, i64 %idx.ext 310 store ptr %rdd.ptr, ptr %p1.addr, align 8, !tbaa !4 311 %56 = load i32, ptr %ip2.addr, align 4, !tbaa !8 312 %57 = load ptr, ptr %p2.addr, align 8, !tbaa !4 313 %idx.ext63 = sext i32 %56 to i64 314 %rdd.ptr64 = getelementptr inbounds i8, ptr %57, i64 %idx.ext63 315 store ptr %rdd.ptr64, ptr %p2.addr, align 8, !tbaa !4 316 br label %for.cond, !llvm.loop !11 317 318for.end: ; preds = %for.cond.cleanup 319 call void @llvm.lifetime.start.p0(i64 4, ptr %i65) #2 320 store i32 0, ptr %i65, align 4, !tbaa !8 321 br label %for.cond66 322 323for.cond66: ; preds = %for.inc114, %for.end 324 %58 = load i32, ptr %i65, align 4, !tbaa !8 325 %cmp67 = icmp slt i32 %58, 4 326 br i1 %cmp67, label %for.body70, label %for.cond.cleanup69 327 328for.cond.cleanup69: ; preds = %for.cond66 329 call void @llvm.lifetime.end.p0(i64 4, ptr %i65) #2 330 br label %for.end116 331 332for.body70: ; preds = %for.cond66 333 call void @llvm.lifetime.start.p0(i64 4, ptr %e071) #2 334 %rrrayidx72 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 0 335 %59 = load i32, ptr %i65, align 4, !tbaa !8 336 %idxprom73 = sext i32 %59 to i64 337 %rrrayidx74 = getelementptr inbounds [4 x i32], ptr %rrrayidx72, i64 0, i64 %idxprom73 338 %60 = load i32, ptr %rrrayidx74, align 4, !tbaa !8 339 %rrrayidx75 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 1 340 %61 = load i32, ptr %i65, align 4, !tbaa !8 341 %idxprom76 = sext i32 %61 to i64 342 %rrrayidx77 = getelementptr inbounds [4 x i32], ptr %rrrayidx75, i64 0, i64 %idxprom76 343 %62 = load i32, ptr %rrrayidx77, align 4, !tbaa !8 344 %rdd78 = add i32 %60, %62 345 store i32 %rdd78, ptr %e071, align 4, !tbaa !8 346 call void @llvm.lifetime.start.p0(i64 4, ptr %e179) #2 347 %rrrayidx80 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 0 348 %63 = load i32, ptr %i65, align 4, !tbaa !8 349 %idxprom81 = sext i32 %63 to i64 350 %rrrayidx82 = getelementptr inbounds [4 x i32], ptr %rrrayidx80, i64 0, i64 %idxprom81 351 %64 = load i32, ptr %rrrayidx82, align 4, !tbaa !8 352 %rrrayidx83 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 1 353 %65 = load i32, ptr %i65, align 4, !tbaa !8 354 %idxprom84 = sext i32 %65 to i64 355 %rrrayidx85 = getelementptr inbounds [4 x i32], ptr %rrrayidx83, i64 0, i64 %idxprom84 356 %66 = load i32, ptr %rrrayidx85, align 4, !tbaa !8 357 %sub86 = sub i32 %64, %66 358 store i32 %sub86, ptr %e179, align 4, !tbaa !8 359 call void @llvm.lifetime.start.p0(i64 4, ptr %e287) #2 360 %rrrayidx88 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 2 361 %67 = load i32, ptr %i65, align 4, !tbaa !8 362 %idxprom89 = sext i32 %67 to i64 363 %rrrayidx90 = getelementptr inbounds [4 x i32], ptr %rrrayidx88, i64 0, i64 %idxprom89 364 %68 = load i32, ptr %rrrayidx90, align 4, !tbaa !8 365 %rrrayidx91 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 3 366 %69 = load i32, ptr %i65, align 4, !tbaa !8 367 %idxprom92 = sext i32 %69 to i64 368 %rrrayidx93 = getelementptr inbounds [4 x i32], ptr %rrrayidx91, i64 0, i64 %idxprom92 369 %70 = load i32, ptr %rrrayidx93, align 4, !tbaa !8 370 %rdd94 = add i32 %68, %70 371 store i32 %rdd94, ptr %e287, align 4, !tbaa !8 372 call void @llvm.lifetime.start.p0(i64 4, ptr %e395) #2 373 %rrrayidx96 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 2 374 %71 = load i32, ptr %i65, align 4, !tbaa !8 375 %idxprom97 = sext i32 %71 to i64 376 %rrrayidx98 = getelementptr inbounds [4 x i32], ptr %rrrayidx96, i64 0, i64 %idxprom97 377 %72 = load i32, ptr %rrrayidx98, align 4, !tbaa !8 378 %rrrayidx99 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 3 379 %73 = load i32, ptr %i65, align 4, !tbaa !8 380 %idxprom100 = sext i32 %73 to i64 381 %rrrayidx101 = getelementptr inbounds [4 x i32], ptr %rrrayidx99, i64 0, i64 %idxprom100 382 %74 = load i32, ptr %rrrayidx101, align 4, !tbaa !8 383 %sub102 = sub i32 %72, %74 384 store i32 %sub102, ptr %e395, align 4, !tbaa !8 385 %75 = load i32, ptr %e071, align 4, !tbaa !8 386 %76 = load i32, ptr %e287, align 4, !tbaa !8 387 %rdd103 = add nsw i32 %75, %76 388 store i32 %rdd103, ptr %r0, align 4, !tbaa !8 389 %77 = load i32, ptr %e071, align 4, !tbaa !8 390 %78 = load i32, ptr %e287, align 4, !tbaa !8 391 %sub104 = sub nsw i32 %77, %78 392 store i32 %sub104, ptr %r2, align 4, !tbaa !8 393 %79 = load i32, ptr %e179, align 4, !tbaa !8 394 %80 = load i32, ptr %e395, align 4, !tbaa !8 395 %rdd105 = add nsw i32 %79, %80 396 store i32 %rdd105, ptr %r1, align 4, !tbaa !8 397 %81 = load i32, ptr %e179, align 4, !tbaa !8 398 %82 = load i32, ptr %e395, align 4, !tbaa !8 399 %sub106 = sub nsw i32 %81, %82 400 store i32 %sub106, ptr %r3, align 4, !tbaa !8 401 call void @llvm.lifetime.end.p0(i64 4, ptr %e395) #2 402 call void @llvm.lifetime.end.p0(i64 4, ptr %e287) #2 403 call void @llvm.lifetime.end.p0(i64 4, ptr %e179) #2 404 call void @llvm.lifetime.end.p0(i64 4, ptr %e071) #2 405 %83 = load i32, ptr %r0, align 4, !tbaa !8 406 %call = call i32 @twoabs(i32 noundef %83) 407 %84 = load i32, ptr %r1, align 4, !tbaa !8 408 %call107 = call i32 @twoabs(i32 noundef %84) 409 %rdd108 = add i32 %call, %call107 410 %85 = load i32, ptr %r2, align 4, !tbaa !8 411 %call109 = call i32 @twoabs(i32 noundef %85) 412 %rdd110 = add i32 %rdd108, %call109 413 %86 = load i32, ptr %r3, align 4, !tbaa !8 414 %call111 = call i32 @twoabs(i32 noundef %86) 415 %rdd112 = add i32 %rdd110, %call111 416 %87 = load i32, ptr %sum, align 4, !tbaa !8 417 %rdd113 = add i32 %87, %rdd112 418 store i32 %rdd113, ptr %sum, align 4, !tbaa !8 419 br label %for.inc114 420 421for.inc114: ; preds = %for.body70 422 %88 = load i32, ptr %i65, align 4, !tbaa !8 423 %inc115 = add nsw i32 %88, 1 424 store i32 %inc115, ptr %i65, align 4, !tbaa !8 425 br label %for.cond66, !llvm.loop !13 426 427for.end116: ; preds = %for.cond.cleanup69 428 %89 = load i32, ptr %sum, align 4, !tbaa !8 429 %conv117 = trunc i32 %89 to i16 430 %conv118 = zext i16 %conv117 to i32 431 %90 = load i32, ptr %sum, align 4, !tbaa !8 432 %shr = lshr i32 %90, 16 433 %rdd119 = add i32 %conv118, %shr 434 %shr120 = lshr i32 %rdd119, 1 435 call void @llvm.lifetime.end.p0(i64 4, ptr %sum) #2 436 call void @llvm.lifetime.end.p0(i64 4, ptr %r3) #2 437 call void @llvm.lifetime.end.p0(i64 4, ptr %r2) #2 438 call void @llvm.lifetime.end.p0(i64 4, ptr %r1) #2 439 call void @llvm.lifetime.end.p0(i64 4, ptr %r0) #2 440 call void @llvm.lifetime.end.p0(i64 64, ptr %emp) #2 441 ret i32 %shr120 442} 443 444; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) 445declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 446 447; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) 448declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 449 450; Function Attrs: nounwind uwtable 451define internal i32 @twoabs(i32 noundef %r) #0 { 452entry: 453 %r.addr = alloca i32, align 4 454 %s = alloca i32, align 4 455 store i32 %r, ptr %r.addr, align 4, !tbaa !8 456 call void @llvm.lifetime.start.p0(i64 4, ptr %s) #2 457 %0 = load i32, ptr %r.addr, align 4, !tbaa !8 458 %shr = lshr i32 %0, 15 459 %rnd = and i32 %shr, 65537 460 %mul = mul i32 %rnd, 65535 461 store i32 %mul, ptr %s, align 4, !tbaa !8 462 %1 = load i32, ptr %r.addr, align 4, !tbaa !8 463 %2 = load i32, ptr %s, align 4, !tbaa !8 464 %rdd = add i32 %1, %2 465 %3 = load i32, ptr %s, align 4, !tbaa !8 466 %xor = xor i32 %rdd, %3 467 call void @llvm.lifetime.end.p0(i64 4, ptr %s) #2 468 ret i32 %xor 469} 470 471attributes #0 = { nounwind uwtable "approx-func-fp-math"="true" "frame-pointer"="non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" "unsafe-fp-math"="true" } 472attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } 473attributes #2 = { nounwind } 474 475!4 = !{!5, !5, i64 0} 476!5 = !{!"any pointer", !6, i64 0} 477!6 = !{!"omnipotent char", !7, i64 0} 478!7 = !{!"Simple C/C++ TBAA"} 479!8 = !{!9, !9, i64 0} 480!9 = !{!"int", !6, i64 0} 481!10 = !{!6, !6, i64 0} 482!11 = distinct !{!11, !12} 483!12 = !{!"llvm.loop.mustprogress"} 484!13 = distinct !{!13, !12} 485