xref: /llvm-project/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll (revision 29441e4f5fa5f5c7709f7cf180815ba97f611297)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2; RUN: opt -S -O3 < %s | FileCheck %s
3
4; Check unrolling / SLP vectorization where the order of lanes is important for
5; producing efficient shuffles. The shuffles should be regular and cheap for
6; AArch64. [0 2 4 6] and [1 3 5 7] will produce uzp1/uzp2 instruction. The
7; v16i32 shuffles will be legalized to individual v4i32.
8
9target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
10target triple = "aarch64"
11
12; Function Attrs: nounwind uwtable
13define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32 noundef %ip2) #0 {
14; CHECK-LABEL: define range(i32 0, 65536) i32 @slpordering
15; CHECK-SAME: (ptr noundef readonly captures(none) [[P1:%.*]], i32 noundef [[IP1:%.*]], ptr noundef readonly captures(none) [[P2:%.*]], i32 noundef [[IP2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
16; CHECK-NEXT:  entry:
17; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[IP1]] to i64
18; CHECK-NEXT:    [[IDX_EXT63:%.*]] = sext i32 [[IP2]] to i64
19; CHECK-NEXT:    [[RRRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 4
20; CHECK-NEXT:    [[RRRAYIDX5:%.*]] = getelementptr inbounds nuw i8, ptr [[P2]], i64 4
21; CHECK-NEXT:    [[RDD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]]
22; CHECK-NEXT:    [[RDD_PTR64:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT63]]
23; CHECK-NEXT:    [[RRRAYIDX3_1:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR]], i64 4
24; CHECK-NEXT:    [[RRRAYIDX5_1:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR64]], i64 4
25; CHECK-NEXT:    [[RDD_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[RDD_PTR]], i64 [[IDX_EXT]]
26; CHECK-NEXT:    [[RDD_PTR64_1:%.*]] = getelementptr inbounds i8, ptr [[RDD_PTR64]], i64 [[IDX_EXT63]]
27; CHECK-NEXT:    [[RRRAYIDX3_2:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR_1]], i64 4
28; CHECK-NEXT:    [[RRRAYIDX5_2:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR64_1]], i64 4
29; CHECK-NEXT:    [[RDD_PTR_2:%.*]] = getelementptr inbounds i8, ptr [[RDD_PTR_1]], i64 [[IDX_EXT]]
30; CHECK-NEXT:    [[RDD_PTR64_2:%.*]] = getelementptr inbounds i8, ptr [[RDD_PTR64_1]], i64 [[IDX_EXT63]]
31; CHECK-NEXT:    [[RRRAYIDX3_3:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR_2]], i64 4
32; CHECK-NEXT:    [[RRRAYIDX5_3:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR64_2]], i64 4
33; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[P1]], align 1, !tbaa [[TBAA0:![0-9]+]]
34; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[P2]], align 1, !tbaa [[TBAA0]]
35; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3]], align 1, !tbaa [[TBAA0]]
36; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5]], align 1, !tbaa [[TBAA0]]
37; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i8>, ptr [[RDD_PTR]], align 1, !tbaa [[TBAA0]]
38; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i8>, ptr [[RDD_PTR64]], align 1, !tbaa [[TBAA0]]
39; CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3_1]], align 1, !tbaa [[TBAA0]]
40; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5_1]], align 1, !tbaa [[TBAA0]]
41; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i8>, ptr [[RDD_PTR_1]], align 1, !tbaa [[TBAA0]]
42; CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i8>, ptr [[RDD_PTR64_1]], align 1, !tbaa [[TBAA0]]
43; CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3_2]], align 1, !tbaa [[TBAA0]]
44; CHECK-NEXT:    [[TMP11:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5_2]], align 1, !tbaa [[TBAA0]]
45; CHECK-NEXT:    [[TMP12:%.*]] = load <4 x i8>, ptr [[RDD_PTR_2]], align 1, !tbaa [[TBAA0]]
46; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
47; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP8]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
48; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP13]], <16 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
49; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
50; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <16 x i8> [[TMP15]], <16 x i8> [[TMP16]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
51; CHECK-NEXT:    [[TMP18:%.*]] = zext <16 x i8> [[TMP17]] to <16 x i32>
52; CHECK-NEXT:    [[TMP19:%.*]] = load <4 x i8>, ptr [[RDD_PTR64_2]], align 1, !tbaa [[TBAA0]]
53; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
54; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
55; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <16 x i8> [[TMP20]], <16 x i8> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
56; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <4 x i8> [[TMP19]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
57; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> [[TMP23]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
58; CHECK-NEXT:    [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i32>
59; CHECK-NEXT:    [[TMP26:%.*]] = sub nsw <16 x i32> [[TMP18]], [[TMP25]]
60; CHECK-NEXT:    [[TMP27:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3_3]], align 1, !tbaa [[TBAA0]]
61; CHECK-NEXT:    [[TMP28:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
62; CHECK-NEXT:    [[TMP29:%.*]] = shufflevector <4 x i8> [[TMP10]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
63; CHECK-NEXT:    [[TMP30:%.*]] = shufflevector <16 x i8> [[TMP28]], <16 x i8> [[TMP29]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
64; CHECK-NEXT:    [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP27]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
65; CHECK-NEXT:    [[TMP32:%.*]] = shufflevector <16 x i8> [[TMP30]], <16 x i8> [[TMP31]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
66; CHECK-NEXT:    [[TMP33:%.*]] = zext <16 x i8> [[TMP32]] to <16 x i32>
67; CHECK-NEXT:    [[TMP34:%.*]] = load <4 x i8>, ptr [[RRRAYIDX5_3]], align 1, !tbaa [[TBAA0]]
68; CHECK-NEXT:    [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> [[TMP7]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
69; CHECK-NEXT:    [[TMP36:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
70; CHECK-NEXT:    [[TMP37:%.*]] = shufflevector <16 x i8> [[TMP35]], <16 x i8> [[TMP36]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
71; CHECK-NEXT:    [[TMP38:%.*]] = shufflevector <4 x i8> [[TMP34]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
72; CHECK-NEXT:    [[TMP39:%.*]] = shufflevector <16 x i8> [[TMP37]], <16 x i8> [[TMP38]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
73; CHECK-NEXT:    [[TMP40:%.*]] = zext <16 x i8> [[TMP39]] to <16 x i32>
74; CHECK-NEXT:    [[TMP41:%.*]] = sub nsw <16 x i32> [[TMP33]], [[TMP40]]
75; CHECK-NEXT:    [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], splat (i32 16)
76; CHECK-NEXT:    [[TMP43:%.*]] = add nsw <16 x i32> [[TMP42]], [[TMP26]]
77; CHECK-NEXT:    [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
78; CHECK-NEXT:    [[TMP45:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
79; CHECK-NEXT:    [[TMP46:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
80; CHECK-NEXT:    [[TMP47:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
81; CHECK-NEXT:    [[TMP48:%.*]] = add nsw <16 x i32> [[TMP45]], [[TMP47]]
82; CHECK-NEXT:    [[TMP49:%.*]] = sub nsw <16 x i32> [[TMP44]], [[TMP46]]
83; CHECK-NEXT:    [[TMP50:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
84; CHECK-NEXT:    [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
85; CHECK-NEXT:    [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
86; CHECK-NEXT:    [[TMP53:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
87; CHECK-NEXT:    [[TMP54:%.*]] = add nsw <16 x i32> [[TMP51]], [[TMP53]]
88; CHECK-NEXT:    [[TMP55:%.*]] = sub nsw <16 x i32> [[TMP50]], [[TMP52]]
89; CHECK-NEXT:    [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
90; CHECK-NEXT:    [[TMP57:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
91; CHECK-NEXT:    [[TMP58:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
92; CHECK-NEXT:    [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
93; CHECK-NEXT:    [[TMP60:%.*]] = sub nsw <16 x i32> [[TMP57]], [[TMP59]]
94; CHECK-NEXT:    [[TMP61:%.*]] = add nsw <16 x i32> [[TMP56]], [[TMP58]]
95; CHECK-NEXT:    [[TMP62:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
96; CHECK-NEXT:    [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
97; CHECK-NEXT:    [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
98; CHECK-NEXT:    [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
99; CHECK-NEXT:    [[TMP66:%.*]] = add nsw <16 x i32> [[TMP63]], [[TMP65]]
100; CHECK-NEXT:    [[TMP67:%.*]] = sub nsw <16 x i32> [[TMP62]], [[TMP64]]
101; CHECK-NEXT:    [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
102; CHECK-NEXT:    [[TMP69:%.*]] = lshr <16 x i32> [[TMP68]], splat (i32 15)
103; CHECK-NEXT:    [[TMP70:%.*]] = and <16 x i32> [[TMP69]], splat (i32 65537)
104; CHECK-NEXT:    [[TMP71:%.*]] = mul nuw <16 x i32> [[TMP70]], splat (i32 65535)
105; CHECK-NEXT:    [[TMP72:%.*]] = add <16 x i32> [[TMP71]], [[TMP68]]
106; CHECK-NEXT:    [[TMP73:%.*]] = xor <16 x i32> [[TMP72]], [[TMP71]]
107; CHECK-NEXT:    [[TMP74:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP73]])
108; CHECK-NEXT:    [[CONV118:%.*]] = and i32 [[TMP74]], 65535
109; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[TMP74]], 16
110; CHECK-NEXT:    [[RDD119:%.*]] = add nuw nsw i32 [[CONV118]], [[SHR]]
111; CHECK-NEXT:    [[SHR120:%.*]] = lshr i32 [[RDD119]], 1
112; CHECK-NEXT:    ret i32 [[SHR120]]
113;
114entry:
115  %p1.addr = alloca ptr, align 8
116  %ip1.addr = alloca i32, align 4
117  %p2.addr = alloca ptr, align 8
118  %ip2.addr = alloca i32, align 4
119  %emp = alloca [4 x [4 x i32]], align 4
120  %r0 = alloca i32, align 4
121  %r1 = alloca i32, align 4
122  %r2 = alloca i32, align 4
123  %r3 = alloca i32, align 4
124  %sum = alloca i32, align 4
125  %i = alloca i32, align 4
126  %e0 = alloca i32, align 4
127  %e1 = alloca i32, align 4
128  %e2 = alloca i32, align 4
129  %e3 = alloca i32, align 4
130  %i65 = alloca i32, align 4
131  %e071 = alloca i32, align 4
132  %e179 = alloca i32, align 4
133  %e287 = alloca i32, align 4
134  %e395 = alloca i32, align 4
135  store ptr %p1, ptr %p1.addr, align 8, !tbaa !4
136  store i32 %ip1, ptr %ip1.addr, align 4, !tbaa !8
137  store ptr %p2, ptr %p2.addr, align 8, !tbaa !4
138  store i32 %ip2, ptr %ip2.addr, align 4, !tbaa !8
139  call void @llvm.lifetime.start.p0(i64 64, ptr %emp) #2
140  call void @llvm.lifetime.start.p0(i64 4, ptr %r0) #2
141  call void @llvm.lifetime.start.p0(i64 4, ptr %r1) #2
142  call void @llvm.lifetime.start.p0(i64 4, ptr %r2) #2
143  call void @llvm.lifetime.start.p0(i64 4, ptr %r3) #2
144  call void @llvm.lifetime.start.p0(i64 4, ptr %sum) #2
145  store i32 0, ptr %sum, align 4, !tbaa !8
146  call void @llvm.lifetime.start.p0(i64 4, ptr %i) #2
147  store i32 0, ptr %i, align 4, !tbaa !8
148  br label %for.cond
149
150for.cond:                                         ; preds = %for.inc, %entry
151  %0 = load i32, ptr %i, align 4, !tbaa !8
152  %cmp = icmp slt i32 %0, 4
153  br i1 %cmp, label %for.body, label %for.cond.cleanup
154
155for.cond.cleanup:                                 ; preds = %for.cond
156  call void @llvm.lifetime.end.p0(i64 4, ptr %i) #2
157  br label %for.end
158
159for.body:                                         ; preds = %for.cond
160  %1 = load ptr, ptr %p1.addr, align 8, !tbaa !4
161  %rrrayidx = getelementptr inbounds i8, ptr %1, i64 0
162  %2 = load i8, ptr %rrrayidx, align 1, !tbaa !10
163  %conv = zext i8 %2 to i32
164  %3 = load ptr, ptr %p2.addr, align 8, !tbaa !4
165  %rrrayidx1 = getelementptr inbounds i8, ptr %3, i64 0
166  %4 = load i8, ptr %rrrayidx1, align 1, !tbaa !10
167  %conv2 = zext i8 %4 to i32
168  %sub = sub nsw i32 %conv, %conv2
169  %5 = load ptr, ptr %p1.addr, align 8, !tbaa !4
170  %rrrayidx3 = getelementptr inbounds i8, ptr %5, i64 4
171  %6 = load i8, ptr %rrrayidx3, align 1, !tbaa !10
172  %conv4 = zext i8 %6 to i32
173  %7 = load ptr, ptr %p2.addr, align 8, !tbaa !4
174  %rrrayidx5 = getelementptr inbounds i8, ptr %7, i64 4
175  %8 = load i8, ptr %rrrayidx5, align 1, !tbaa !10
176  %conv6 = zext i8 %8 to i32
177  %sub7 = sub nsw i32 %conv4, %conv6
178  %shl = shl i32 %sub7, 16
179  %rdd = add nsw i32 %sub, %shl
180  store i32 %rdd, ptr %r0, align 4, !tbaa !8
181  %9 = load ptr, ptr %p1.addr, align 8, !tbaa !4
182  %rrrayidx8 = getelementptr inbounds i8, ptr %9, i64 1
183  %10 = load i8, ptr %rrrayidx8, align 1, !tbaa !10
184  %conv9 = zext i8 %10 to i32
185  %11 = load ptr, ptr %p2.addr, align 8, !tbaa !4
186  %rrrayidx10 = getelementptr inbounds i8, ptr %11, i64 1
187  %12 = load i8, ptr %rrrayidx10, align 1, !tbaa !10
188  %conv11 = zext i8 %12 to i32
189  %sub12 = sub nsw i32 %conv9, %conv11
190  %13 = load ptr, ptr %p1.addr, align 8, !tbaa !4
191  %rrrayidx13 = getelementptr inbounds i8, ptr %13, i64 5
192  %14 = load i8, ptr %rrrayidx13, align 1, !tbaa !10
193  %conv14 = zext i8 %14 to i32
194  %15 = load ptr, ptr %p2.addr, align 8, !tbaa !4
195  %rrrayidx15 = getelementptr inbounds i8, ptr %15, i64 5
196  %16 = load i8, ptr %rrrayidx15, align 1, !tbaa !10
197  %conv16 = zext i8 %16 to i32
198  %sub17 = sub nsw i32 %conv14, %conv16
199  %shl18 = shl i32 %sub17, 16
200  %rdd19 = add nsw i32 %sub12, %shl18
201  store i32 %rdd19, ptr %r1, align 4, !tbaa !8
202  %17 = load ptr, ptr %p1.addr, align 8, !tbaa !4
203  %rrrayidx20 = getelementptr inbounds i8, ptr %17, i64 2
204  %18 = load i8, ptr %rrrayidx20, align 1, !tbaa !10
205  %conv21 = zext i8 %18 to i32
206  %19 = load ptr, ptr %p2.addr, align 8, !tbaa !4
207  %rrrayidx22 = getelementptr inbounds i8, ptr %19, i64 2
208  %20 = load i8, ptr %rrrayidx22, align 1, !tbaa !10
209  %conv23 = zext i8 %20 to i32
210  %sub24 = sub nsw i32 %conv21, %conv23
211  %21 = load ptr, ptr %p1.addr, align 8, !tbaa !4
212  %rrrayidx25 = getelementptr inbounds i8, ptr %21, i64 6
213  %22 = load i8, ptr %rrrayidx25, align 1, !tbaa !10
214  %conv26 = zext i8 %22 to i32
215  %23 = load ptr, ptr %p2.addr, align 8, !tbaa !4
216  %rrrayidx27 = getelementptr inbounds i8, ptr %23, i64 6
217  %24 = load i8, ptr %rrrayidx27, align 1, !tbaa !10
218  %conv28 = zext i8 %24 to i32
219  %sub29 = sub nsw i32 %conv26, %conv28
220  %shl30 = shl i32 %sub29, 16
221  %rdd31 = add nsw i32 %sub24, %shl30
222  store i32 %rdd31, ptr %r2, align 4, !tbaa !8
223  %25 = load ptr, ptr %p1.addr, align 8, !tbaa !4
224  %rrrayidx32 = getelementptr inbounds i8, ptr %25, i64 3
225  %26 = load i8, ptr %rrrayidx32, align 1, !tbaa !10
226  %conv33 = zext i8 %26 to i32
227  %27 = load ptr, ptr %p2.addr, align 8, !tbaa !4
228  %rrrayidx34 = getelementptr inbounds i8, ptr %27, i64 3
229  %28 = load i8, ptr %rrrayidx34, align 1, !tbaa !10
230  %conv35 = zext i8 %28 to i32
231  %sub36 = sub nsw i32 %conv33, %conv35
232  %29 = load ptr, ptr %p1.addr, align 8, !tbaa !4
233  %rrrayidx37 = getelementptr inbounds i8, ptr %29, i64 7
234  %30 = load i8, ptr %rrrayidx37, align 1, !tbaa !10
235  %conv38 = zext i8 %30 to i32
236  %31 = load ptr, ptr %p2.addr, align 8, !tbaa !4
237  %rrrayidx39 = getelementptr inbounds i8, ptr %31, i64 7
238  %32 = load i8, ptr %rrrayidx39, align 1, !tbaa !10
239  %conv40 = zext i8 %32 to i32
240  %sub41 = sub nsw i32 %conv38, %conv40
241  %shl42 = shl i32 %sub41, 16
242  %rdd43 = add nsw i32 %sub36, %shl42
243  store i32 %rdd43, ptr %r3, align 4, !tbaa !8
244  call void @llvm.lifetime.start.p0(i64 4, ptr %e0) #2
245  %33 = load i32, ptr %r0, align 4, !tbaa !8
246  %34 = load i32, ptr %r1, align 4, !tbaa !8
247  %rdd44 = add i32 %33, %34
248  store i32 %rdd44, ptr %e0, align 4, !tbaa !8
249  call void @llvm.lifetime.start.p0(i64 4, ptr %e1) #2
250  %35 = load i32, ptr %r0, align 4, !tbaa !8
251  %36 = load i32, ptr %r1, align 4, !tbaa !8
252  %sub45 = sub i32 %35, %36
253  store i32 %sub45, ptr %e1, align 4, !tbaa !8
254  call void @llvm.lifetime.start.p0(i64 4, ptr %e2) #2
255  %37 = load i32, ptr %r2, align 4, !tbaa !8
256  %38 = load i32, ptr %r3, align 4, !tbaa !8
257  %rdd46 = add i32 %37, %38
258  store i32 %rdd46, ptr %e2, align 4, !tbaa !8
259  call void @llvm.lifetime.start.p0(i64 4, ptr %e3) #2
260  %39 = load i32, ptr %r2, align 4, !tbaa !8
261  %40 = load i32, ptr %r3, align 4, !tbaa !8
262  %sub47 = sub i32 %39, %40
263  store i32 %sub47, ptr %e3, align 4, !tbaa !8
264  %41 = load i32, ptr %e0, align 4, !tbaa !8
265  %42 = load i32, ptr %e2, align 4, !tbaa !8
266  %rdd48 = add nsw i32 %41, %42
267  %43 = load i32, ptr %i, align 4, !tbaa !8
268  %idxprom = sext i32 %43 to i64
269  %rrrayidx49 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 %idxprom
270  %rrrayidx50 = getelementptr inbounds [4 x i32], ptr %rrrayidx49, i64 0, i64 0
271  store i32 %rdd48, ptr %rrrayidx50, align 4, !tbaa !8
272  %44 = load i32, ptr %e0, align 4, !tbaa !8
273  %45 = load i32, ptr %e2, align 4, !tbaa !8
274  %sub51 = sub nsw i32 %44, %45
275  %46 = load i32, ptr %i, align 4, !tbaa !8
276  %idxprom52 = sext i32 %46 to i64
277  %rrrayidx53 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 %idxprom52
278  %rrrayidx54 = getelementptr inbounds [4 x i32], ptr %rrrayidx53, i64 0, i64 2
279  store i32 %sub51, ptr %rrrayidx54, align 4, !tbaa !8
280  %47 = load i32, ptr %e1, align 4, !tbaa !8
281  %48 = load i32, ptr %e3, align 4, !tbaa !8
282  %rdd55 = add nsw i32 %47, %48
283  %49 = load i32, ptr %i, align 4, !tbaa !8
284  %idxprom56 = sext i32 %49 to i64
285  %rrrayidx57 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 %idxprom56
286  %rrrayidx58 = getelementptr inbounds [4 x i32], ptr %rrrayidx57, i64 0, i64 1
287  store i32 %rdd55, ptr %rrrayidx58, align 4, !tbaa !8
288  %50 = load i32, ptr %e1, align 4, !tbaa !8
289  %51 = load i32, ptr %e3, align 4, !tbaa !8
290  %sub59 = sub nsw i32 %50, %51
291  %52 = load i32, ptr %i, align 4, !tbaa !8
292  %idxprom60 = sext i32 %52 to i64
293  %rrrayidx61 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 %idxprom60
294  %rrrayidx62 = getelementptr inbounds [4 x i32], ptr %rrrayidx61, i64 0, i64 3
295  store i32 %sub59, ptr %rrrayidx62, align 4, !tbaa !8
296  call void @llvm.lifetime.end.p0(i64 4, ptr %e3) #2
297  call void @llvm.lifetime.end.p0(i64 4, ptr %e2) #2
298  call void @llvm.lifetime.end.p0(i64 4, ptr %e1) #2
299  call void @llvm.lifetime.end.p0(i64 4, ptr %e0) #2
300  br label %for.inc
301
302for.inc:                                          ; preds = %for.body
303  %53 = load i32, ptr %i, align 4, !tbaa !8
304  %inc = add nsw i32 %53, 1
305  store i32 %inc, ptr %i, align 4, !tbaa !8
306  %54 = load i32, ptr %ip1.addr, align 4, !tbaa !8
307  %55 = load ptr, ptr %p1.addr, align 8, !tbaa !4
308  %idx.ext = sext i32 %54 to i64
309  %rdd.ptr = getelementptr inbounds i8, ptr %55, i64 %idx.ext
310  store ptr %rdd.ptr, ptr %p1.addr, align 8, !tbaa !4
311  %56 = load i32, ptr %ip2.addr, align 4, !tbaa !8
312  %57 = load ptr, ptr %p2.addr, align 8, !tbaa !4
313  %idx.ext63 = sext i32 %56 to i64
314  %rdd.ptr64 = getelementptr inbounds i8, ptr %57, i64 %idx.ext63
315  store ptr %rdd.ptr64, ptr %p2.addr, align 8, !tbaa !4
316  br label %for.cond, !llvm.loop !11
317
318for.end:                                          ; preds = %for.cond.cleanup
319  call void @llvm.lifetime.start.p0(i64 4, ptr %i65) #2
320  store i32 0, ptr %i65, align 4, !tbaa !8
321  br label %for.cond66
322
323for.cond66:                                       ; preds = %for.inc114, %for.end
324  %58 = load i32, ptr %i65, align 4, !tbaa !8
325  %cmp67 = icmp slt i32 %58, 4
326  br i1 %cmp67, label %for.body70, label %for.cond.cleanup69
327
328for.cond.cleanup69:                               ; preds = %for.cond66
329  call void @llvm.lifetime.end.p0(i64 4, ptr %i65) #2
330  br label %for.end116
331
332for.body70:                                       ; preds = %for.cond66
333  call void @llvm.lifetime.start.p0(i64 4, ptr %e071) #2
334  %rrrayidx72 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 0
335  %59 = load i32, ptr %i65, align 4, !tbaa !8
336  %idxprom73 = sext i32 %59 to i64
337  %rrrayidx74 = getelementptr inbounds [4 x i32], ptr %rrrayidx72, i64 0, i64 %idxprom73
338  %60 = load i32, ptr %rrrayidx74, align 4, !tbaa !8
339  %rrrayidx75 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 1
340  %61 = load i32, ptr %i65, align 4, !tbaa !8
341  %idxprom76 = sext i32 %61 to i64
342  %rrrayidx77 = getelementptr inbounds [4 x i32], ptr %rrrayidx75, i64 0, i64 %idxprom76
343  %62 = load i32, ptr %rrrayidx77, align 4, !tbaa !8
344  %rdd78 = add i32 %60, %62
345  store i32 %rdd78, ptr %e071, align 4, !tbaa !8
346  call void @llvm.lifetime.start.p0(i64 4, ptr %e179) #2
347  %rrrayidx80 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 0
348  %63 = load i32, ptr %i65, align 4, !tbaa !8
349  %idxprom81 = sext i32 %63 to i64
350  %rrrayidx82 = getelementptr inbounds [4 x i32], ptr %rrrayidx80, i64 0, i64 %idxprom81
351  %64 = load i32, ptr %rrrayidx82, align 4, !tbaa !8
352  %rrrayidx83 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 1
353  %65 = load i32, ptr %i65, align 4, !tbaa !8
354  %idxprom84 = sext i32 %65 to i64
355  %rrrayidx85 = getelementptr inbounds [4 x i32], ptr %rrrayidx83, i64 0, i64 %idxprom84
356  %66 = load i32, ptr %rrrayidx85, align 4, !tbaa !8
357  %sub86 = sub i32 %64, %66
358  store i32 %sub86, ptr %e179, align 4, !tbaa !8
359  call void @llvm.lifetime.start.p0(i64 4, ptr %e287) #2
360  %rrrayidx88 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 2
361  %67 = load i32, ptr %i65, align 4, !tbaa !8
362  %idxprom89 = sext i32 %67 to i64
363  %rrrayidx90 = getelementptr inbounds [4 x i32], ptr %rrrayidx88, i64 0, i64 %idxprom89
364  %68 = load i32, ptr %rrrayidx90, align 4, !tbaa !8
365  %rrrayidx91 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 3
366  %69 = load i32, ptr %i65, align 4, !tbaa !8
367  %idxprom92 = sext i32 %69 to i64
368  %rrrayidx93 = getelementptr inbounds [4 x i32], ptr %rrrayidx91, i64 0, i64 %idxprom92
369  %70 = load i32, ptr %rrrayidx93, align 4, !tbaa !8
370  %rdd94 = add i32 %68, %70
371  store i32 %rdd94, ptr %e287, align 4, !tbaa !8
372  call void @llvm.lifetime.start.p0(i64 4, ptr %e395) #2
373  %rrrayidx96 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 2
374  %71 = load i32, ptr %i65, align 4, !tbaa !8
375  %idxprom97 = sext i32 %71 to i64
376  %rrrayidx98 = getelementptr inbounds [4 x i32], ptr %rrrayidx96, i64 0, i64 %idxprom97
377  %72 = load i32, ptr %rrrayidx98, align 4, !tbaa !8
378  %rrrayidx99 = getelementptr inbounds [4 x [4 x i32]], ptr %emp, i64 0, i64 3
379  %73 = load i32, ptr %i65, align 4, !tbaa !8
380  %idxprom100 = sext i32 %73 to i64
381  %rrrayidx101 = getelementptr inbounds [4 x i32], ptr %rrrayidx99, i64 0, i64 %idxprom100
382  %74 = load i32, ptr %rrrayidx101, align 4, !tbaa !8
383  %sub102 = sub i32 %72, %74
384  store i32 %sub102, ptr %e395, align 4, !tbaa !8
385  %75 = load i32, ptr %e071, align 4, !tbaa !8
386  %76 = load i32, ptr %e287, align 4, !tbaa !8
387  %rdd103 = add nsw i32 %75, %76
388  store i32 %rdd103, ptr %r0, align 4, !tbaa !8
389  %77 = load i32, ptr %e071, align 4, !tbaa !8
390  %78 = load i32, ptr %e287, align 4, !tbaa !8
391  %sub104 = sub nsw i32 %77, %78
392  store i32 %sub104, ptr %r2, align 4, !tbaa !8
393  %79 = load i32, ptr %e179, align 4, !tbaa !8
394  %80 = load i32, ptr %e395, align 4, !tbaa !8
395  %rdd105 = add nsw i32 %79, %80
396  store i32 %rdd105, ptr %r1, align 4, !tbaa !8
397  %81 = load i32, ptr %e179, align 4, !tbaa !8
398  %82 = load i32, ptr %e395, align 4, !tbaa !8
399  %sub106 = sub nsw i32 %81, %82
400  store i32 %sub106, ptr %r3, align 4, !tbaa !8
401  call void @llvm.lifetime.end.p0(i64 4, ptr %e395) #2
402  call void @llvm.lifetime.end.p0(i64 4, ptr %e287) #2
403  call void @llvm.lifetime.end.p0(i64 4, ptr %e179) #2
404  call void @llvm.lifetime.end.p0(i64 4, ptr %e071) #2
405  %83 = load i32, ptr %r0, align 4, !tbaa !8
406  %call = call i32 @twoabs(i32 noundef %83)
407  %84 = load i32, ptr %r1, align 4, !tbaa !8
408  %call107 = call i32 @twoabs(i32 noundef %84)
409  %rdd108 = add i32 %call, %call107
410  %85 = load i32, ptr %r2, align 4, !tbaa !8
411  %call109 = call i32 @twoabs(i32 noundef %85)
412  %rdd110 = add i32 %rdd108, %call109
413  %86 = load i32, ptr %r3, align 4, !tbaa !8
414  %call111 = call i32 @twoabs(i32 noundef %86)
415  %rdd112 = add i32 %rdd110, %call111
416  %87 = load i32, ptr %sum, align 4, !tbaa !8
417  %rdd113 = add i32 %87, %rdd112
418  store i32 %rdd113, ptr %sum, align 4, !tbaa !8
419  br label %for.inc114
420
421for.inc114:                                       ; preds = %for.body70
422  %88 = load i32, ptr %i65, align 4, !tbaa !8
423  %inc115 = add nsw i32 %88, 1
424  store i32 %inc115, ptr %i65, align 4, !tbaa !8
425  br label %for.cond66, !llvm.loop !13
426
427for.end116:                                       ; preds = %for.cond.cleanup69
428  %89 = load i32, ptr %sum, align 4, !tbaa !8
429  %conv117 = trunc i32 %89 to i16
430  %conv118 = zext i16 %conv117 to i32
431  %90 = load i32, ptr %sum, align 4, !tbaa !8
432  %shr = lshr i32 %90, 16
433  %rdd119 = add i32 %conv118, %shr
434  %shr120 = lshr i32 %rdd119, 1
435  call void @llvm.lifetime.end.p0(i64 4, ptr %sum) #2
436  call void @llvm.lifetime.end.p0(i64 4, ptr %r3) #2
437  call void @llvm.lifetime.end.p0(i64 4, ptr %r2) #2
438  call void @llvm.lifetime.end.p0(i64 4, ptr %r1) #2
439  call void @llvm.lifetime.end.p0(i64 4, ptr %r0) #2
440  call void @llvm.lifetime.end.p0(i64 64, ptr %emp) #2
441  ret i32 %shr120
442}
443
444; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
445declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
446
447; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
448declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
449
450; Function Attrs: nounwind uwtable
451define internal i32 @twoabs(i32 noundef %r) #0 {
452entry:
453  %r.addr = alloca i32, align 4
454  %s = alloca i32, align 4
455  store i32 %r, ptr %r.addr, align 4, !tbaa !8
456  call void @llvm.lifetime.start.p0(i64 4, ptr %s) #2
457  %0 = load i32, ptr %r.addr, align 4, !tbaa !8
458  %shr = lshr i32 %0, 15
459  %rnd = and i32 %shr, 65537
460  %mul = mul i32 %rnd, 65535
461  store i32 %mul, ptr %s, align 4, !tbaa !8
462  %1 = load i32, ptr %r.addr, align 4, !tbaa !8
463  %2 = load i32, ptr %s, align 4, !tbaa !8
464  %rdd = add i32 %1, %2
465  %3 = load i32, ptr %s, align 4, !tbaa !8
466  %xor = xor i32 %rdd, %3
467  call void @llvm.lifetime.end.p0(i64 4, ptr %s) #2
468  ret i32 %xor
469}
470
471attributes #0 = { nounwind uwtable "approx-func-fp-math"="true" "frame-pointer"="non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" "unsafe-fp-math"="true" }
472attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
473attributes #2 = { nounwind }
474
475!4 = !{!5, !5, i64 0}
476!5 = !{!"any pointer", !6, i64 0}
477!6 = !{!"omnipotent char", !7, i64 0}
478!7 = !{!"Simple C/C++ TBAA"}
479!8 = !{!9, !9, i64 0}
480!9 = !{!"int", !6, i64 0}
481!10 = !{!6, !6, i64 0}
482!11 = distinct !{!11, !12}
483!12 = !{!"llvm.loop.mustprogress"}
484!13 = distinct !{!13, !12}
485