xref: /llvm-project/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll (revision 1157187496afbbb203b8ec7aa320769ec6eed8c4)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -mcpu=corei7 -passes="default<O1>" -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1
3; RUN: opt < %s -mcpu=corei7 -passes="default<O2>" -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O2
4; RUN: opt < %s -mcpu=corei7 -passes="default<O3>" -S -unroll-threshold=150 -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3
5; RUN: opt < %s -mcpu=corei7 -passes="default<O3>" -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DEFAULT
6; RUN: opt < %s -mcpu=corei7 -passes="default<Os>" -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Os
7; RUN: opt < %s -mcpu=corei7 -passes="default<Oz>" -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Oz
8; RUN: opt < %s -mcpu=corei7 -passes="default<O1>,loop-vectorize" -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC2
9; RUN: opt < %s -mcpu=corei7 -passes="default<Oz>,loop-vectorize" -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC2
10; RUN: opt < %s -mcpu=corei7 -passes="default<O3>" -unroll-threshold=150 -vectorize-loops=false -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS
11
12; This file tests the llvm.loop.vectorize.enable metadata forcing
13; vectorization even when optimization levels are too low, or when
14; vectorization is disabled.
15
16target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
17target triple = "x86_64-unknown-linux-gnu"
18
19define i32 @enabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i32 %N) {
20; O1-LABEL: @enabled(
21; O1-NEXT:  entry:
22; O1-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
23; O1-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
24; O1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
25; O1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
26; O1-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
27; O1-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
28; O1-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
29; O1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
30; O1-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
31; O1-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
32; O1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
33; O1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
34; O1-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
35; O1-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
36; O1-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
37; O1-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
38; O1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
39; O1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
40; O1-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
41; O1-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
42; O1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
43; O1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
44; O1-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
45; O1-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
46; O1-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
47; O1-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
48; O1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
49; O1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
50; O1-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
51; O1-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
52; O1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
53; O1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
54; O1-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
55; O1-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
56; O1-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
57; O1-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
58; O1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
59; O1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
60; O1-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
61; O1-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
62; O1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
63; O1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
64; O1-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
65; O1-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
66; O1-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
67; O1-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
68; O1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
69; O1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
70; O1-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
71; O1-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
72; O1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
73; O1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
74; O1-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
75; O1-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
76; O1-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
77; O1-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
78; O1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
79; O1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
80; O1-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
81; O1-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
82; O1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
83; O1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
84; O1-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
85; O1-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
86; O1-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
87; O1-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
88; O1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
89; O1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
90; O1-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
91; O1-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
92; O1-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
93; O1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
94; O1-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
95; O1-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
96; O1-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
97; O1-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
98; O1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
99; O1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
100; O1-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
101; O1-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
102; O1-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
103; O1-NEXT:    ret i32 [[TMP46]]
104;
105; O2-LABEL: @enabled(
106; O2-NEXT:  entry:
107; O2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
108; O2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
109; O2-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
110; O2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
111; O2-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
112; O2-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
113; O2-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
114; O2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
115; O2-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
116; O2-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
117; O2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
118; O2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
119; O2-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
120; O2-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
121; O2-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
122; O2-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
123; O2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
124; O2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
125; O2-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
126; O2-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
127; O2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
128; O2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
129; O2-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
130; O2-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
131; O2-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
132; O2-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
133; O2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
134; O2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
135; O2-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
136; O2-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
137; O2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
138; O2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
139; O2-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
140; O2-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
141; O2-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
142; O2-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
143; O2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
144; O2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
145; O2-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
146; O2-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
147; O2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
148; O2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
149; O2-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
150; O2-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
151; O2-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
152; O2-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
153; O2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
154; O2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
155; O2-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
156; O2-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
157; O2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
158; O2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
159; O2-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
160; O2-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
161; O2-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
162; O2-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
163; O2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
164; O2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
165; O2-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
166; O2-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
167; O2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
168; O2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
169; O2-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
170; O2-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
171; O2-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
172; O2-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
173; O2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
174; O2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
175; O2-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
176; O2-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
177; O2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
178; O2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
179; O2-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
180; O2-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
181; O2-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
182; O2-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
183; O2-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
184; O2-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
185; O2-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
186; O2-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
187; O2-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
188; O2-NEXT:    ret i32 [[TMP46]]
189;
190; O3-LABEL: @enabled(
191; O3-NEXT:  entry:
192; O3-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
193; O3-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
194; O3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
195; O3-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
196; O3-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
197; O3-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
198; O3-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
199; O3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
200; O3-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
201; O3-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
202; O3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
203; O3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
204; O3-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
205; O3-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
206; O3-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
207; O3-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
208; O3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
209; O3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
210; O3-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
211; O3-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
212; O3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
213; O3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
214; O3-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
215; O3-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
216; O3-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
217; O3-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
218; O3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
219; O3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
220; O3-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
221; O3-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
222; O3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
223; O3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
224; O3-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
225; O3-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
226; O3-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
227; O3-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
228; O3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
229; O3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
230; O3-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
231; O3-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
232; O3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
233; O3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
234; O3-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
235; O3-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
236; O3-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
237; O3-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
238; O3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
239; O3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
240; O3-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
241; O3-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
242; O3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
243; O3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
244; O3-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
245; O3-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
246; O3-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
247; O3-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
248; O3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
249; O3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
250; O3-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
251; O3-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
252; O3-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
253; O3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
254; O3-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
255; O3-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
256; O3-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
257; O3-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
258; O3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
259; O3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
260; O3-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
261; O3-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
262; O3-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
263; O3-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
264; O3-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
265; O3-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
266; O3-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
267; O3-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
268; O3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
269; O3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
270; O3-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
271; O3-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
272; O3-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
273; O3-NEXT:    ret i32 [[TMP46]]
274;
275; O3DEFAULT-LABEL: @enabled(
276; O3DEFAULT-NEXT:  entry:
277; O3DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
278; O3DEFAULT-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
279; O3DEFAULT-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
280; O3DEFAULT-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
281; O3DEFAULT-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
282; O3DEFAULT-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
283; O3DEFAULT-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
284; O3DEFAULT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
285; O3DEFAULT-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
286; O3DEFAULT-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
287; O3DEFAULT-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
288; O3DEFAULT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
289; O3DEFAULT-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
290; O3DEFAULT-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
291; O3DEFAULT-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
292; O3DEFAULT-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
293; O3DEFAULT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
294; O3DEFAULT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
295; O3DEFAULT-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
296; O3DEFAULT-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
297; O3DEFAULT-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
298; O3DEFAULT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
299; O3DEFAULT-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
300; O3DEFAULT-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
301; O3DEFAULT-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
302; O3DEFAULT-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
303; O3DEFAULT-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
304; O3DEFAULT-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
305; O3DEFAULT-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
306; O3DEFAULT-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
307; O3DEFAULT-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
308; O3DEFAULT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
309; O3DEFAULT-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
310; O3DEFAULT-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
311; O3DEFAULT-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
312; O3DEFAULT-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
313; O3DEFAULT-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
314; O3DEFAULT-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
315; O3DEFAULT-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
316; O3DEFAULT-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
317; O3DEFAULT-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
318; O3DEFAULT-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
319; O3DEFAULT-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
320; O3DEFAULT-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
321; O3DEFAULT-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
322; O3DEFAULT-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
323; O3DEFAULT-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
324; O3DEFAULT-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
325; O3DEFAULT-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
326; O3DEFAULT-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
327; O3DEFAULT-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
328; O3DEFAULT-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
329; O3DEFAULT-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
330; O3DEFAULT-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
331; O3DEFAULT-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
332; O3DEFAULT-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
333; O3DEFAULT-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
334; O3DEFAULT-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
335; O3DEFAULT-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
336; O3DEFAULT-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
337; O3DEFAULT-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
338; O3DEFAULT-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
339; O3DEFAULT-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
340; O3DEFAULT-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
341; O3DEFAULT-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
342; O3DEFAULT-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
343; O3DEFAULT-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
344; O3DEFAULT-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
345; O3DEFAULT-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
346; O3DEFAULT-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
347; O3DEFAULT-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
348; O3DEFAULT-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
349; O3DEFAULT-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
350; O3DEFAULT-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
351; O3DEFAULT-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
352; O3DEFAULT-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
353; O3DEFAULT-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
354; O3DEFAULT-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
355; O3DEFAULT-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
356; O3DEFAULT-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
357; O3DEFAULT-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
358; O3DEFAULT-NEXT:    ret i32 [[TMP46]]
359;
360; Os-LABEL: @enabled(
361; Os-NEXT:  entry:
362; Os-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
363; Os-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
364; Os-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
365; Os-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
366; Os-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
367; Os-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
368; Os-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
369; Os-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
370; Os-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
371; Os-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
372; Os-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
373; Os-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
374; Os-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
375; Os-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
376; Os-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
377; Os-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
378; Os-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
379; Os-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
380; Os-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
381; Os-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
382; Os-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
383; Os-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
384; Os-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
385; Os-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
386; Os-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
387; Os-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
388; Os-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
389; Os-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
390; Os-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
391; Os-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
392; Os-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
393; Os-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
394; Os-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
395; Os-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
396; Os-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
397; Os-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
398; Os-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
399; Os-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
400; Os-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
401; Os-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
402; Os-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
403; Os-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
404; Os-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
405; Os-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
406; Os-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
407; Os-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
408; Os-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
409; Os-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
410; Os-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
411; Os-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
412; Os-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
413; Os-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
414; Os-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
415; Os-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
416; Os-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
417; Os-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
418; Os-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
419; Os-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
420; Os-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
421; Os-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
422; Os-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
423; Os-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
424; Os-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
425; Os-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
426; Os-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
427; Os-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
428; Os-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
429; Os-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
430; Os-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
431; Os-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
432; Os-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
433; Os-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
434; Os-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
435; Os-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
436; Os-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
437; Os-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
438; Os-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
439; Os-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
440; Os-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
441; Os-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
442; Os-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
443; Os-NEXT:    ret i32 [[TMP46]]
444;
445; Oz-LABEL: @enabled(
446; Oz-NEXT:  entry:
447; Oz-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
448; Oz-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
449; Oz-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
450; Oz-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
451; Oz-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
452; Oz-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
453; Oz-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
454; Oz-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
455; Oz-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
456; Oz-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
457; Oz-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
458; Oz-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
459; Oz-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
460; Oz-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
461; Oz-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
462; Oz-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
463; Oz-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
464; Oz-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
465; Oz-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
466; Oz-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
467; Oz-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
468; Oz-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
469; Oz-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
470; Oz-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
471; Oz-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
472; Oz-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
473; Oz-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
474; Oz-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
475; Oz-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
476; Oz-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
477; Oz-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
478; Oz-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
479; Oz-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
480; Oz-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
481; Oz-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
482; Oz-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
483; Oz-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
484; Oz-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
485; Oz-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
486; Oz-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
487; Oz-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
488; Oz-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
489; Oz-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
490; Oz-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
491; Oz-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
492; Oz-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
493; Oz-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
494; Oz-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
495; Oz-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
496; Oz-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
497; Oz-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
498; Oz-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
499; Oz-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
500; Oz-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
501; Oz-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
502; Oz-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
503; Oz-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
504; Oz-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
505; Oz-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
506; Oz-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
507; Oz-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
508; Oz-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
509; Oz-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
510; Oz-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
511; Oz-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
512; Oz-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
513; Oz-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
514; Oz-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
515; Oz-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
516; Oz-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
517; Oz-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
518; Oz-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
519; Oz-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
520; Oz-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
521; Oz-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
522; Oz-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
523; Oz-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
524; Oz-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
525; Oz-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
526; Oz-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
527; Oz-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
528; Oz-NEXT:    ret i32 [[TMP46]]
529;
530; O1VEC2-LABEL: @enabled(
531; O1VEC2-NEXT:  entry:
532; O1VEC2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
533; O1VEC2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
534; O1VEC2-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
535; O1VEC2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
536; O1VEC2-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
537; O1VEC2-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
538; O1VEC2-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
539; O1VEC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
540; O1VEC2-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
541; O1VEC2-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
542; O1VEC2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
543; O1VEC2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
544; O1VEC2-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
545; O1VEC2-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
546; O1VEC2-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
547; O1VEC2-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
548; O1VEC2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
549; O1VEC2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
550; O1VEC2-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
551; O1VEC2-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
552; O1VEC2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
553; O1VEC2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
554; O1VEC2-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
555; O1VEC2-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
556; O1VEC2-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
557; O1VEC2-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
558; O1VEC2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
559; O1VEC2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
560; O1VEC2-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
561; O1VEC2-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
562; O1VEC2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
563; O1VEC2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
564; O1VEC2-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
565; O1VEC2-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
566; O1VEC2-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
567; O1VEC2-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
568; O1VEC2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
569; O1VEC2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
570; O1VEC2-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
571; O1VEC2-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
572; O1VEC2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
573; O1VEC2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
574; O1VEC2-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
575; O1VEC2-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
576; O1VEC2-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
577; O1VEC2-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
578; O1VEC2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
579; O1VEC2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
580; O1VEC2-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
581; O1VEC2-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
582; O1VEC2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
583; O1VEC2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
584; O1VEC2-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
585; O1VEC2-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
586; O1VEC2-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
587; O1VEC2-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
588; O1VEC2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
589; O1VEC2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
590; O1VEC2-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
591; O1VEC2-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
592; O1VEC2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
593; O1VEC2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
594; O1VEC2-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
595; O1VEC2-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
596; O1VEC2-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
597; O1VEC2-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
598; O1VEC2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
599; O1VEC2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
600; O1VEC2-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
601; O1VEC2-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
602; O1VEC2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
603; O1VEC2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
604; O1VEC2-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
605; O1VEC2-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
606; O1VEC2-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
607; O1VEC2-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
608; O1VEC2-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
609; O1VEC2-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
610; O1VEC2-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
611; O1VEC2-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
612; O1VEC2-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
613; O1VEC2-NEXT:    ret i32 [[TMP46]]
614;
615; OzVEC2-LABEL: @enabled(
616; OzVEC2-NEXT:  entry:
617; OzVEC2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
618; OzVEC2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
619; OzVEC2-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
620; OzVEC2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
621; OzVEC2-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
622; OzVEC2-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
623; OzVEC2-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
624; OzVEC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
625; OzVEC2-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
626; OzVEC2-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
627; OzVEC2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
628; OzVEC2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
629; OzVEC2-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
630; OzVEC2-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
631; OzVEC2-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
632; OzVEC2-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
633; OzVEC2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
634; OzVEC2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
635; OzVEC2-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
636; OzVEC2-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
637; OzVEC2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
638; OzVEC2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
639; OzVEC2-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
640; OzVEC2-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
641; OzVEC2-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
642; OzVEC2-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
643; OzVEC2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
644; OzVEC2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
645; OzVEC2-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
646; OzVEC2-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
647; OzVEC2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
648; OzVEC2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
649; OzVEC2-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
650; OzVEC2-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
651; OzVEC2-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
652; OzVEC2-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
653; OzVEC2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
654; OzVEC2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
655; OzVEC2-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
656; OzVEC2-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
657; OzVEC2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
658; OzVEC2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
659; OzVEC2-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
660; OzVEC2-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
661; OzVEC2-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
662; OzVEC2-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
663; OzVEC2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
664; OzVEC2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
665; OzVEC2-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
666; OzVEC2-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
667; OzVEC2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
668; OzVEC2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
669; OzVEC2-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
670; OzVEC2-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
671; OzVEC2-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
672; OzVEC2-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
673; OzVEC2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
674; OzVEC2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
675; OzVEC2-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
676; OzVEC2-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
677; OzVEC2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
678; OzVEC2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
679; OzVEC2-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
680; OzVEC2-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
681; OzVEC2-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
682; OzVEC2-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
683; OzVEC2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
684; OzVEC2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
685; OzVEC2-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
686; OzVEC2-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
687; OzVEC2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
688; OzVEC2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
689; OzVEC2-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
690; OzVEC2-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
691; OzVEC2-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
692; OzVEC2-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
693; OzVEC2-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
694; OzVEC2-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
695; OzVEC2-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
696; OzVEC2-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
697; OzVEC2-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
698; OzVEC2-NEXT:    ret i32 [[TMP46]]
699;
700; O3DIS-LABEL: @enabled(
701; O3DIS-NEXT:  entry:
702; O3DIS-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
703; O3DIS-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
704; O3DIS-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
705; O3DIS-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
706; O3DIS-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
707; O3DIS-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
708; O3DIS-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
709; O3DIS-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
710; O3DIS-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
711; O3DIS-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
712; O3DIS-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
713; O3DIS-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
714; O3DIS-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
715; O3DIS-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
716; O3DIS-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
717; O3DIS-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
718; O3DIS-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
719; O3DIS-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
720; O3DIS-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
721; O3DIS-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
722; O3DIS-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
723; O3DIS-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
724; O3DIS-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
725; O3DIS-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
726; O3DIS-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
727; O3DIS-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
728; O3DIS-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
729; O3DIS-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
730; O3DIS-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
731; O3DIS-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
732; O3DIS-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
733; O3DIS-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
734; O3DIS-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
735; O3DIS-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
736; O3DIS-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
737; O3DIS-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
738; O3DIS-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
739; O3DIS-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
740; O3DIS-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
741; O3DIS-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
742; O3DIS-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
743; O3DIS-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
744; O3DIS-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
745; O3DIS-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
746; O3DIS-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
747; O3DIS-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
748; O3DIS-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
749; O3DIS-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
750; O3DIS-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
751; O3DIS-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
752; O3DIS-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
753; O3DIS-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
754; O3DIS-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
755; O3DIS-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
756; O3DIS-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
757; O3DIS-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
758; O3DIS-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
759; O3DIS-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
760; O3DIS-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
761; O3DIS-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
762; O3DIS-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
763; O3DIS-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
764; O3DIS-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
765; O3DIS-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
766; O3DIS-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
767; O3DIS-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
768; O3DIS-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
769; O3DIS-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
770; O3DIS-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
771; O3DIS-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
772; O3DIS-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
773; O3DIS-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
774; O3DIS-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
775; O3DIS-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
776; O3DIS-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
777; O3DIS-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
778; O3DIS-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
779; O3DIS-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
780; O3DIS-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
781; O3DIS-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
782; O3DIS-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
783; O3DIS-NEXT:    ret i32 [[TMP46]]
784;
785entry:
786  br label %for.body
787
788for.body:                                         ; preds = %for.body, %entry
789  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
790  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
791  %0 = load i32, ptr %arrayidx, align 4
792  %add = add nsw i32 %0, %N
793  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
794  store i32 %add, ptr %arrayidx2, align 4
795  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
796  %exitcond = icmp eq i64 %indvars.iv.next, 64
797  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
798
799for.end:                                          ; preds = %for.body
800  %1 = load i32, ptr %a, align 4
801  ret i32 %1
802}
803
804define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i32 %N) {
805; O1-LABEL: @nopragma(
806; O1-NEXT:  entry:
807; O1-NEXT:    br label [[FOR_BODY:%.*]]
808; O1:       for.body:
809; O1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
810; O1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
811; O1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
812; O1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
813; O1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
814; O1-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
815; O1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
816; O1-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
817; O1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
818; O1:       for.end:
819; O1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
820; O1-NEXT:    ret i32 [[TMP1]]
821;
822; O2-LABEL: @nopragma(
823; O2-NEXT:  entry:
824; O2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
825; O2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
826; O2-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
827; O2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
828; O2-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
829; O2-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
830; O2-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
831; O2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
832; O2-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
833; O2-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
834; O2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
835; O2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
836; O2-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
837; O2-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
838; O2-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
839; O2-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
840; O2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
841; O2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
842; O2-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
843; O2-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
844; O2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
845; O2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
846; O2-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
847; O2-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
848; O2-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
849; O2-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
850; O2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
851; O2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
852; O2-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
853; O2-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
854; O2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
855; O2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
856; O2-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
857; O2-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
858; O2-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
859; O2-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
860; O2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
861; O2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
862; O2-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
863; O2-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
864; O2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
865; O2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
866; O2-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
867; O2-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
868; O2-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
869; O2-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
870; O2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
871; O2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
872; O2-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
873; O2-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
874; O2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
875; O2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
876; O2-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
877; O2-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
878; O2-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
879; O2-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
880; O2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
881; O2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
882; O2-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
883; O2-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
884; O2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
885; O2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
886; O2-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
887; O2-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
888; O2-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
889; O2-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
890; O2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
891; O2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
892; O2-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
893; O2-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
894; O2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
895; O2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
896; O2-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
897; O2-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
898; O2-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
899; O2-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
900; O2-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
901; O2-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
902; O2-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
903; O2-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
904; O2-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
905; O2-NEXT:    ret i32 [[TMP46]]
906;
907; O3-LABEL: @nopragma(
908; O3-NEXT:  entry:
909; O3-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
910; O3-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
911; O3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
912; O3-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
913; O3-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
914; O3-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
915; O3-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
916; O3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
917; O3-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
918; O3-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
919; O3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
920; O3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
921; O3-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
922; O3-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
923; O3-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
924; O3-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
925; O3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
926; O3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
927; O3-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
928; O3-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
929; O3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
930; O3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
931; O3-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
932; O3-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
933; O3-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
934; O3-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
935; O3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
936; O3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
937; O3-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
938; O3-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
939; O3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
940; O3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
941; O3-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
942; O3-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
943; O3-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
944; O3-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
945; O3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
946; O3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
947; O3-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
948; O3-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
949; O3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
950; O3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
951; O3-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
952; O3-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
953; O3-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
954; O3-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
955; O3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
956; O3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
957; O3-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
958; O3-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
959; O3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
960; O3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
961; O3-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
962; O3-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
963; O3-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
964; O3-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
965; O3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
966; O3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
967; O3-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
968; O3-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
969; O3-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
970; O3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
971; O3-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
972; O3-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
973; O3-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
974; O3-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
975; O3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
976; O3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
977; O3-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
978; O3-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
979; O3-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
980; O3-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
981; O3-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
982; O3-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
983; O3-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
984; O3-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
985; O3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
986; O3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
987; O3-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
988; O3-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
989; O3-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
990; O3-NEXT:    ret i32 [[TMP46]]
991;
992; O3DEFAULT-LABEL: @nopragma(
993; O3DEFAULT-NEXT:  entry:
994; O3DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
995; O3DEFAULT-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
996; O3DEFAULT-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
997; O3DEFAULT-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
998; O3DEFAULT-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
999; O3DEFAULT-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
1000; O3DEFAULT-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
1001; O3DEFAULT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
1002; O3DEFAULT-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
1003; O3DEFAULT-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
1004; O3DEFAULT-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
1005; O3DEFAULT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
1006; O3DEFAULT-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
1007; O3DEFAULT-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
1008; O3DEFAULT-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
1009; O3DEFAULT-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
1010; O3DEFAULT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
1011; O3DEFAULT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
1012; O3DEFAULT-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
1013; O3DEFAULT-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
1014; O3DEFAULT-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
1015; O3DEFAULT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
1016; O3DEFAULT-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
1017; O3DEFAULT-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
1018; O3DEFAULT-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
1019; O3DEFAULT-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
1020; O3DEFAULT-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
1021; O3DEFAULT-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
1022; O3DEFAULT-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
1023; O3DEFAULT-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
1024; O3DEFAULT-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
1025; O3DEFAULT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
1026; O3DEFAULT-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
1027; O3DEFAULT-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
1028; O3DEFAULT-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
1029; O3DEFAULT-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
1030; O3DEFAULT-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
1031; O3DEFAULT-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
1032; O3DEFAULT-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
1033; O3DEFAULT-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
1034; O3DEFAULT-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
1035; O3DEFAULT-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
1036; O3DEFAULT-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
1037; O3DEFAULT-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
1038; O3DEFAULT-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
1039; O3DEFAULT-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
1040; O3DEFAULT-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
1041; O3DEFAULT-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
1042; O3DEFAULT-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
1043; O3DEFAULT-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
1044; O3DEFAULT-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
1045; O3DEFAULT-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
1046; O3DEFAULT-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
1047; O3DEFAULT-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
1048; O3DEFAULT-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
1049; O3DEFAULT-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
1050; O3DEFAULT-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
1051; O3DEFAULT-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
1052; O3DEFAULT-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
1053; O3DEFAULT-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
1054; O3DEFAULT-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
1055; O3DEFAULT-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
1056; O3DEFAULT-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
1057; O3DEFAULT-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
1058; O3DEFAULT-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
1059; O3DEFAULT-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
1060; O3DEFAULT-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
1061; O3DEFAULT-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
1062; O3DEFAULT-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
1063; O3DEFAULT-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
1064; O3DEFAULT-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
1065; O3DEFAULT-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
1066; O3DEFAULT-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
1067; O3DEFAULT-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
1068; O3DEFAULT-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
1069; O3DEFAULT-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
1070; O3DEFAULT-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
1071; O3DEFAULT-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
1072; O3DEFAULT-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
1073; O3DEFAULT-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
1074; O3DEFAULT-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
1075; O3DEFAULT-NEXT:    ret i32 [[TMP46]]
1076;
1077; Os-LABEL: @nopragma(
1078; Os-NEXT:  entry:
1079; Os-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
1080; Os-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
1081; Os-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
1082; Os-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
1083; Os-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
1084; Os-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
1085; Os-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
1086; Os-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
1087; Os-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
1088; Os-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
1089; Os-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
1090; Os-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
1091; Os-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
1092; Os-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
1093; Os-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
1094; Os-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
1095; Os-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
1096; Os-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
1097; Os-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
1098; Os-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
1099; Os-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
1100; Os-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
1101; Os-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
1102; Os-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
1103; Os-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
1104; Os-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
1105; Os-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
1106; Os-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
1107; Os-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
1108; Os-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
1109; Os-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
1110; Os-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
1111; Os-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
1112; Os-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
1113; Os-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
1114; Os-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
1115; Os-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
1116; Os-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
1117; Os-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
1118; Os-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
1119; Os-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
1120; Os-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
1121; Os-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
1122; Os-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
1123; Os-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
1124; Os-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
1125; Os-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
1126; Os-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
1127; Os-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
1128; Os-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
1129; Os-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
1130; Os-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
1131; Os-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
1132; Os-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
1133; Os-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
1134; Os-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
1135; Os-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
1136; Os-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
1137; Os-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
1138; Os-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
1139; Os-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
1140; Os-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
1141; Os-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
1142; Os-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
1143; Os-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
1144; Os-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
1145; Os-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
1146; Os-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
1147; Os-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
1148; Os-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
1149; Os-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
1150; Os-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
1151; Os-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
1152; Os-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
1153; Os-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
1154; Os-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
1155; Os-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
1156; Os-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
1157; Os-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
1158; Os-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
1159; Os-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
1160; Os-NEXT:    ret i32 [[TMP46]]
1161;
1162; Oz-LABEL: @nopragma(
1163; Oz-NEXT:  entry:
1164; Oz-NEXT:    br label [[FOR_BODY:%.*]]
1165; Oz:       for.body:
1166; Oz-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1167; Oz-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
1168; Oz-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1169; Oz-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1170; Oz-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
1171; Oz-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1172; Oz-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1173; Oz-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
1174; Oz-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
1175; Oz:       for.end:
1176; Oz-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
1177; Oz-NEXT:    ret i32 [[TMP1]]
1178;
1179; O1VEC2-LABEL: @nopragma(
1180; O1VEC2-NEXT:  entry:
1181; O1VEC2-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1182; O1VEC2:       vector.ph:
1183; O1VEC2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
1184; O1VEC2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
1185; O1VEC2-NEXT:    br label [[VECTOR_BODY:%.*]]
1186; O1VEC2:       vector.body:
1187; O1VEC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1188; O1VEC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1189; O1VEC2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[TMP0]]
1190; O1VEC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 0
1191; O1VEC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 4
1192; O1VEC2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
1193; O1VEC2-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
1194; O1VEC2-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
1195; O1VEC2-NEXT:    [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
1196; O1VEC2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[TMP0]]
1197; O1VEC2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP6]], i32 0
1198; O1VEC2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP6]], i32 4
1199; O1VEC2-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP7]], align 4
1200; O1VEC2-NEXT:    store <4 x i32> [[TMP5]], ptr [[TMP8]], align 4
1201; O1VEC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1202; O1VEC2-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
1203; O1VEC2-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1204; O1VEC2:       middle.block:
1205; O1VEC2-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1206; O1VEC2:       scalar.ph:
1207; O1VEC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1208; O1VEC2-NEXT:    br label [[FOR_BODY:%.*]]
1209; O1VEC2:       for.body:
1210; O1VEC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1211; O1VEC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS_IV]]
1212; O1VEC2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1213; O1VEC2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP10]], [[N]]
1214; O1VEC2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS_IV]]
1215; O1VEC2-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1216; O1VEC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1217; O1VEC2-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
1218; O1VEC2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
1219; O1VEC2:       for.end:
1220; O1VEC2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[A]], align 4
1221; O1VEC2-NEXT:    ret i32 [[TMP11]]
1222;
1223; OzVEC2-LABEL: @nopragma(
1224; OzVEC2-NEXT:  entry:
1225; OzVEC2-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1226; OzVEC2:       vector.ph:
1227; OzVEC2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
1228; OzVEC2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
1229; OzVEC2-NEXT:    br label [[VECTOR_BODY:%.*]]
1230; OzVEC2:       vector.body:
1231; OzVEC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1232; OzVEC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1233; OzVEC2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[TMP0]]
1234; OzVEC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 0
1235; OzVEC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 4
1236; OzVEC2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
1237; OzVEC2-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
1238; OzVEC2-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
1239; OzVEC2-NEXT:    [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
1240; OzVEC2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[TMP0]]
1241; OzVEC2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP6]], i32 0
1242; OzVEC2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP6]], i32 4
1243; OzVEC2-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP7]], align 4
1244; OzVEC2-NEXT:    store <4 x i32> [[TMP5]], ptr [[TMP8]], align 4
1245; OzVEC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1246; OzVEC2-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
1247; OzVEC2-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1248; OzVEC2:       middle.block:
1249; OzVEC2-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1250; OzVEC2:       scalar.ph:
1251; OzVEC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1252; OzVEC2-NEXT:    br label [[FOR_BODY:%.*]]
1253; OzVEC2:       for.body:
1254; OzVEC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1255; OzVEC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS_IV]]
1256; OzVEC2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1257; OzVEC2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP10]], [[N]]
1258; OzVEC2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS_IV]]
1259; OzVEC2-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1260; OzVEC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1261; OzVEC2-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
1262; OzVEC2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
1263; OzVEC2:       for.end:
1264; OzVEC2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[A]], align 4
1265; OzVEC2-NEXT:    ret i32 [[TMP11]]
1266;
1267; O3DIS-LABEL: @nopragma(
1268; O3DIS-NEXT:  entry:
1269; O3DIS-NEXT:    br label [[FOR_BODY:%.*]]
1270; O3DIS:       for.body:
1271; O3DIS-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1272; O3DIS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
1273; O3DIS-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1274; O3DIS-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1275; O3DIS-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
1276; O3DIS-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1277; O3DIS-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1278; O3DIS-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
1279; O3DIS-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
1280; O3DIS:       for.end:
1281; O3DIS-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
1282; O3DIS-NEXT:    ret i32 [[TMP1]]
1283;
1284entry:
1285  br label %for.body
1286
1287for.body:                                         ; preds = %for.body, %entry
1288  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
1289  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
1290  %0 = load i32, ptr %arrayidx, align 4
1291  %add = add nsw i32 %0, %N
1292  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
1293  store i32 %add, ptr %arrayidx2, align 4
1294  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1295  %exitcond = icmp eq i64 %indvars.iv.next, 64
1296  br i1 %exitcond, label %for.end, label %for.body
1297
1298for.end:                                          ; preds = %for.body
1299  %1 = load i32, ptr %a, align 4
1300  ret i32 %1
1301}
1302
1303define i32 @disabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i32 %N) {
1304; O1-LABEL: @disabled(
1305; O1-NEXT:  entry:
1306; O1-NEXT:    br label [[FOR_BODY:%.*]]
1307; O1:       for.body:
1308; O1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1309; O1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
1310; O1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1311; O1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1312; O1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
1313; O1-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1314; O1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1315; O1-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1316; O1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1317; O1:       for.end:
1318; O1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
1319; O1-NEXT:    ret i32 [[TMP1]]
1320;
1321; O2-LABEL: @disabled(
1322; O2-NEXT:  entry:
1323; O2-NEXT:    br label [[FOR_BODY:%.*]]
1324; O2:       for.body:
1325; O2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1326; O2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
1327; O2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1328; O2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1329; O2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
1330; O2-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1331; O2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1332; O2-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1333; O2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1334; O2:       for.end:
1335; O2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
1336; O2-NEXT:    ret i32 [[TMP1]]
1337;
1338; O3-LABEL: @disabled(
1339; O3-NEXT:  entry:
1340; O3-NEXT:    br label [[FOR_BODY:%.*]]
1341; O3:       for.body:
1342; O3-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1343; O3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
1344; O3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1345; O3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1346; O3-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
1347; O3-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1348; O3-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1349; O3-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1350; O3-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1351; O3:       for.end:
1352; O3-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
1353; O3-NEXT:    ret i32 [[TMP1]]
1354;
1355; O3DEFAULT-LABEL: @disabled(
1356; O3DEFAULT-NEXT:  entry:
1357; O3DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
1358; O3DEFAULT-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
1359; O3DEFAULT-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
1360; O3DEFAULT-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[TMP0]], [[TMP2]]
1361; O3DEFAULT-NEXT:    store <4 x i32> [[TMP3]], ptr [[A:%.*]], align 4
1362; O3DEFAULT-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 16
1363; O3DEFAULT-NEXT:    [[ARRAYIDX2_4:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 16
1364; O3DEFAULT-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_4]], align 4
1365; O3DEFAULT-NEXT:    [[TMP5:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP2]]
1366; O3DEFAULT-NEXT:    store <4 x i32> [[TMP5]], ptr [[ARRAYIDX2_4]], align 4
1367; O3DEFAULT-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
1368; O3DEFAULT-NEXT:    [[ARRAYIDX2_8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
1369; O3DEFAULT-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_8]], align 4
1370; O3DEFAULT-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP2]]
1371; O3DEFAULT-NEXT:    store <4 x i32> [[TMP7]], ptr [[ARRAYIDX2_8]], align 4
1372; O3DEFAULT-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
1373; O3DEFAULT-NEXT:    [[ARRAYIDX2_12:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
1374; O3DEFAULT-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_12]], align 4
1375; O3DEFAULT-NEXT:    [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP2]]
1376; O3DEFAULT-NEXT:    store <4 x i32> [[TMP9]], ptr [[ARRAYIDX2_12]], align 4
1377; O3DEFAULT-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
1378; O3DEFAULT-NEXT:    [[ARRAYIDX2_16:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
1379; O3DEFAULT-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_16]], align 4
1380; O3DEFAULT-NEXT:    [[TMP11:%.*]] = add nsw <4 x i32> [[TMP10]], [[TMP2]]
1381; O3DEFAULT-NEXT:    store <4 x i32> [[TMP11]], ptr [[ARRAYIDX2_16]], align 4
1382; O3DEFAULT-NEXT:    [[ARRAYIDX_20:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
1383; O3DEFAULT-NEXT:    [[ARRAYIDX2_20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
1384; O3DEFAULT-NEXT:    [[TMP12:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_20]], align 4
1385; O3DEFAULT-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[TMP12]], [[TMP2]]
1386; O3DEFAULT-NEXT:    store <4 x i32> [[TMP13]], ptr [[ARRAYIDX2_20]], align 4
1387; O3DEFAULT-NEXT:    [[ARRAYIDX_24:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
1388; O3DEFAULT-NEXT:    [[ARRAYIDX2_24:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
1389; O3DEFAULT-NEXT:    [[TMP14:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_24]], align 4
1390; O3DEFAULT-NEXT:    [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], [[TMP2]]
1391; O3DEFAULT-NEXT:    store <4 x i32> [[TMP15]], ptr [[ARRAYIDX2_24]], align 4
1392; O3DEFAULT-NEXT:    [[ARRAYIDX_28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
1393; O3DEFAULT-NEXT:    [[ARRAYIDX2_28:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
1394; O3DEFAULT-NEXT:    [[TMP16:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_28]], align 4
1395; O3DEFAULT-NEXT:    [[TMP17:%.*]] = add nsw <4 x i32> [[TMP16]], [[TMP2]]
1396; O3DEFAULT-NEXT:    store <4 x i32> [[TMP17]], ptr [[ARRAYIDX2_28]], align 4
1397; O3DEFAULT-NEXT:    [[ARRAYIDX_32:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
1398; O3DEFAULT-NEXT:    [[ARRAYIDX2_32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
1399; O3DEFAULT-NEXT:    [[TMP18:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_32]], align 4
1400; O3DEFAULT-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[TMP18]], [[TMP2]]
1401; O3DEFAULT-NEXT:    store <4 x i32> [[TMP19]], ptr [[ARRAYIDX2_32]], align 4
1402; O3DEFAULT-NEXT:    [[ARRAYIDX_36:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
1403; O3DEFAULT-NEXT:    [[ARRAYIDX2_36:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
1404; O3DEFAULT-NEXT:    [[TMP20:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_36]], align 4
1405; O3DEFAULT-NEXT:    [[TMP21:%.*]] = add nsw <4 x i32> [[TMP20]], [[TMP2]]
1406; O3DEFAULT-NEXT:    store <4 x i32> [[TMP21]], ptr [[ARRAYIDX2_36]], align 4
1407; O3DEFAULT-NEXT:    [[ARRAYIDX_40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
1408; O3DEFAULT-NEXT:    [[ARRAYIDX2_40:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
1409; O3DEFAULT-NEXT:    [[TMP22:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_40]], align 4
1410; O3DEFAULT-NEXT:    [[TMP23:%.*]] = add nsw <4 x i32> [[TMP22]], [[TMP2]]
1411; O3DEFAULT-NEXT:    store <4 x i32> [[TMP23]], ptr [[ARRAYIDX2_40]], align 4
1412; O3DEFAULT-NEXT:    [[ARRAYIDX_44:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
1413; O3DEFAULT-NEXT:    [[ARRAYIDX2_44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
1414; O3DEFAULT-NEXT:    [[TMP24:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_44]], align 4
1415; O3DEFAULT-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[TMP24]], [[TMP2]]
1416; O3DEFAULT-NEXT:    store <4 x i32> [[TMP25]], ptr [[ARRAYIDX2_44]], align 4
1417; O3DEFAULT-NEXT:    [[TMP26:%.*]] = load i32, ptr [[A]], align 4
1418; O3DEFAULT-NEXT:    ret i32 [[TMP26]]
1419;
1420; Os-LABEL: @disabled(
1421; Os-NEXT:  entry:
1422; Os-NEXT:    br label [[FOR_BODY:%.*]]
1423; Os:       for.body:
1424; Os-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1425; Os-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
1426; Os-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1427; Os-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1428; Os-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
1429; Os-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1430; Os-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1431; Os-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1432; Os-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1433; Os:       for.end:
1434; Os-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
1435; Os-NEXT:    ret i32 [[TMP1]]
1436;
1437; Oz-LABEL: @disabled(
1438; Oz-NEXT:  entry:
1439; Oz-NEXT:    br label [[FOR_BODY:%.*]]
1440; Oz:       for.body:
1441; Oz-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1442; Oz-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
1443; Oz-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1444; Oz-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1445; Oz-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
1446; Oz-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1447; Oz-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1448; Oz-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1449; Oz-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1450; Oz:       for.end:
1451; Oz-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
1452; Oz-NEXT:    ret i32 [[TMP1]]
1453;
1454; O1VEC2-LABEL: @disabled(
1455; O1VEC2-NEXT:  entry:
1456; O1VEC2-NEXT:    br label [[FOR_BODY:%.*]]
1457; O1VEC2:       for.body:
1458; O1VEC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1459; O1VEC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
1460; O1VEC2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1461; O1VEC2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1462; O1VEC2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
1463; O1VEC2-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1464; O1VEC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1465; O1VEC2-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1466; O1VEC2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
1467; O1VEC2:       for.end:
1468; O1VEC2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
1469; O1VEC2-NEXT:    ret i32 [[TMP1]]
1470;
1471; OzVEC2-LABEL: @disabled(
1472; OzVEC2-NEXT:  entry:
1473; OzVEC2-NEXT:    br label [[FOR_BODY:%.*]]
1474; OzVEC2:       for.body:
1475; OzVEC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1476; OzVEC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
1477; OzVEC2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1478; OzVEC2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1479; OzVEC2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
1480; OzVEC2-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1481; OzVEC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1482; OzVEC2-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1483; OzVEC2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
1484; OzVEC2:       for.end:
1485; OzVEC2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
1486; OzVEC2-NEXT:    ret i32 [[TMP1]]
1487;
1488; O3DIS-LABEL: @disabled(
1489; O3DIS-NEXT:  entry:
1490; O3DIS-NEXT:    br label [[FOR_BODY:%.*]]
1491; O3DIS:       for.body:
1492; O3DIS-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1493; O3DIS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
1494; O3DIS-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1495; O3DIS-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1496; O3DIS-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
1497; O3DIS-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1498; O3DIS-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1499; O3DIS-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1500; O3DIS-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1501; O3DIS:       for.end:
1502; O3DIS-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
1503; O3DIS-NEXT:    ret i32 [[TMP1]]
1504;
1505entry:
1506  br label %for.body
1507
1508for.body:                                         ; preds = %for.body, %entry
1509  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
1510  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
1511  %0 = load i32, ptr %arrayidx, align 4
1512  %add = add nsw i32 %0, %N
1513  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
1514  store i32 %add, ptr %arrayidx2, align 4
1515  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1516  %exitcond = icmp eq i64 %indvars.iv.next, 48
1517  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
1518
1519for.end:                                          ; preds = %for.body
1520  %1 = load i32, ptr %a, align 4
1521  ret i32 %1
1522}
1523
1524!0 = !{!0, !1}
1525!1 = !{!"llvm.loop.vectorize.enable", i1 1}
1526!2 = !{!2, !3}
1527!3 = !{!"llvm.loop.vectorize.enable", i1 0}
1528