xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll (revision 462cb3cd6cecd0511ecaf0e3ebcaba455ece587d)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN:  opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+sse2     | FileCheck %s --check-prefixes=SSE
3; RUN:  opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx      | FileCheck %s --check-prefixes=AVX
4; RUN:  opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx2     | FileCheck %s --check-prefixes=AVX2
5; RUN:  opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx512f  | FileCheck %s --check-prefixes=AVX512F
6; RUN:  opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512VL
7
8define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
9; SSE-LABEL: @gather_load(
10; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
11; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
12; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
13; SSE-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
14; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
15; SSE-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
16; SSE-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
17; SSE-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
18; SSE-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
19; SSE-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
20; SSE-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
21; SSE-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
22; SSE-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
23; SSE-NEXT:    ret void
24;
25; AVX-LABEL: @gather_load(
26; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
27; AVX-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
28; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
29; AVX-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
30; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
31; AVX-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
32; AVX-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
33; AVX-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
34; AVX-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
35; AVX-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
36; AVX-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
37; AVX-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
38; AVX-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
39; AVX-NEXT:    ret void
40;
41; AVX2-LABEL: @gather_load(
42; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
43; AVX2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
44; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
45; AVX2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
46; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
47; AVX2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
48; AVX2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
49; AVX2-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
50; AVX2-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
51; AVX2-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
52; AVX2-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
53; AVX2-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
54; AVX2-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
55; AVX2-NEXT:    ret void
56;
57; AVX512F-LABEL: @gather_load(
58; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
59; AVX512F-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
60; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
61; AVX512F-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
62; AVX512F-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
63; AVX512F-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
64; AVX512F-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
65; AVX512F-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
66; AVX512F-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
67; AVX512F-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
68; AVX512F-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
69; AVX512F-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
70; AVX512F-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
71; AVX512F-NEXT:    ret void
72;
73; AVX512VL-LABEL: @gather_load(
74; AVX512VL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
75; AVX512VL-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
76; AVX512VL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
77; AVX512VL-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
78; AVX512VL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
79; AVX512VL-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
80; AVX512VL-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
81; AVX512VL-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
82; AVX512VL-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
83; AVX512VL-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
84; AVX512VL-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
85; AVX512VL-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
86; AVX512VL-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
87; AVX512VL-NEXT:    ret void
88;
89  %3 = getelementptr inbounds i32, ptr %1, i64 1
90  %4 = load i32, ptr %1, align 4, !tbaa !2
91  %5 = getelementptr inbounds i32, ptr %0, i64 1
92  %6 = getelementptr inbounds i32, ptr %1, i64 11
93  %7 = load i32, ptr %6, align 4, !tbaa !2
94  %8 = getelementptr inbounds i32, ptr %0, i64 2
95  %9 = getelementptr inbounds i32, ptr %1, i64 4
96  %10 = load i32, ptr %9, align 4, !tbaa !2
97  %11 = getelementptr inbounds i32, ptr %0, i64 3
98  %12 = load i32, ptr %3, align 4, !tbaa !2
99  %13 = insertelement <4 x i32> poison, i32 %4, i32 0
100  %14 = insertelement <4 x i32> %13, i32 %7, i32 1
101  %15 = insertelement <4 x i32> %14, i32 %10, i32 2
102  %16 = insertelement <4 x i32> %15, i32 %12, i32 3
103  %17 = add nsw <4 x i32> %16, <i32 1, i32 2, i32 3, i32 4>
104  store <4 x i32> %17, ptr %0, align 4, !tbaa !2
105  ret void
106}
107
108define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
109; SSE-LABEL: @gather_load_2(
110; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
111; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
112; SSE-NEXT:    [[TMP5:%.*]] = add nsw i32 [[TMP4]], 1
113; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0:%.*]], i64 4
114; SSE-NEXT:    store i32 [[TMP5]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
115; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
116; SSE-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
117; SSE-NEXT:    [[TMP9:%.*]] = add nsw i32 [[TMP8]], 2
118; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8
119; SSE-NEXT:    store i32 [[TMP9]], ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
120; SSE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
121; SSE-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
122; SSE-NEXT:    [[TMP13:%.*]] = add nsw i32 [[TMP12]], 3
123; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 12
124; SSE-NEXT:    store i32 [[TMP13]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
125; SSE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
126; SSE-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
127; SSE-NEXT:    [[TMP17:%.*]] = add nsw i32 [[TMP16]], 4
128; SSE-NEXT:    store i32 [[TMP17]], ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
129; SSE-NEXT:    ret void
130;
131; AVX-LABEL: @gather_load_2(
132; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
133; AVX-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
134; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
135; AVX-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
136; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
137; AVX-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
138; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
139; AVX-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
140; AVX-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
141; AVX-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1
142; AVX-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i64 2
143; AVX-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3
144; AVX-NEXT:    [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4>
145; AVX-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
146; AVX-NEXT:    ret void
147;
148; AVX2-LABEL: @gather_load_2(
149; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
150; AVX2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
151; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
152; AVX2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
153; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
154; AVX2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
155; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
156; AVX2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
157; AVX2-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
158; AVX2-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1
159; AVX2-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i64 2
160; AVX2-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3
161; AVX2-NEXT:    [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4>
162; AVX2-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
163; AVX2-NEXT:    ret void
164;
165; AVX512F-LABEL: @gather_load_2(
166; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
167; AVX512F-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
168; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
169; AVX512F-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
170; AVX512F-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
171; AVX512F-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
172; AVX512F-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
173; AVX512F-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
174; AVX512F-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
175; AVX512F-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1
176; AVX512F-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i64 2
177; AVX512F-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3
178; AVX512F-NEXT:    [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4>
179; AVX512F-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
180; AVX512F-NEXT:    ret void
181;
182; AVX512VL-LABEL: @gather_load_2(
183; AVX512VL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
184; AVX512VL-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
185; AVX512VL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
186; AVX512VL-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
187; AVX512VL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
188; AVX512VL-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
189; AVX512VL-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
190; AVX512VL-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
191; AVX512VL-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
192; AVX512VL-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1
193; AVX512VL-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i64 2
194; AVX512VL-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3
195; AVX512VL-NEXT:    [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4>
196; AVX512VL-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
197; AVX512VL-NEXT:    ret void
198;
199  %3 = getelementptr inbounds i32, ptr %1, i64 1
200  %4 = load i32, ptr %3, align 4, !tbaa !2
201  %5 = add nsw i32 %4, 1
202  %6 = getelementptr inbounds i32, ptr %0, i64 1
203  store i32 %5, ptr %0, align 4, !tbaa !2
204  %7 = getelementptr inbounds i32, ptr %1, i64 10
205  %8 = load i32, ptr %7, align 4, !tbaa !2
206  %9 = add nsw i32 %8, 2
207  %10 = getelementptr inbounds i32, ptr %0, i64 2
208  store i32 %9, ptr %6, align 4, !tbaa !2
209  %11 = getelementptr inbounds i32, ptr %1, i64 3
210  %12 = load i32, ptr %11, align 4, !tbaa !2
211  %13 = add nsw i32 %12, 3
212  %14 = getelementptr inbounds i32, ptr %0, i64 3
213  store i32 %13, ptr %10, align 4, !tbaa !2
214  %15 = getelementptr inbounds i32, ptr %1, i64 5
215  %16 = load i32, ptr %15, align 4, !tbaa !2
216  %17 = add nsw i32 %16, 4
217  store i32 %17, ptr %14, align 4, !tbaa !2
218  ret void
219}
220
221
222define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
223; SSE-LABEL: @gather_load_3(
224; SSE-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
225; SSE-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], 1
226; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0:%.*]], i64 4
227; SSE-NEXT:    store i32 [[TMP4]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
228; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
229; SSE-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
230; SSE-NEXT:    [[TMP8:%.*]] = add i32 [[TMP7]], 2
231; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8
232; SSE-NEXT:    store i32 [[TMP8]], ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
233; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
234; SSE-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
235; SSE-NEXT:    [[TMP12:%.*]] = add i32 [[TMP11]], 3
236; SSE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 12
237; SSE-NEXT:    store i32 [[TMP12]], ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
238; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 60
239; SSE-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
240; SSE-NEXT:    [[TMP16:%.*]] = add i32 [[TMP15]], 4
241; SSE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
242; SSE-NEXT:    store i32 [[TMP16]], ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
243; SSE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 72
244; SSE-NEXT:    [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !tbaa [[TBAA0]]
245; SSE-NEXT:    [[TMP20:%.*]] = add i32 [[TMP19]], 1
246; SSE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 20
247; SSE-NEXT:    store i32 [[TMP20]], ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
248; SSE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 36
249; SSE-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !tbaa [[TBAA0]]
250; SSE-NEXT:    [[TMP24:%.*]] = add i32 [[TMP23]], 2
251; SSE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 24
252; SSE-NEXT:    store i32 [[TMP24]], ptr [[TMP21]], align 4, !tbaa [[TBAA0]]
253; SSE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 24
254; SSE-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !tbaa [[TBAA0]]
255; SSE-NEXT:    [[TMP28:%.*]] = add i32 [[TMP27]], 3
256; SSE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 28
257; SSE-NEXT:    store i32 [[TMP28]], ptr [[TMP25]], align 4, !tbaa [[TBAA0]]
258; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 84
259; SSE-NEXT:    [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4, !tbaa [[TBAA0]]
260; SSE-NEXT:    [[TMP32:%.*]] = add i32 [[TMP31]], 4
261; SSE-NEXT:    store i32 [[TMP32]], ptr [[TMP29]], align 4, !tbaa [[TBAA0]]
262; SSE-NEXT:    ret void
263;
264; AVX-LABEL: @gather_load_3(
265; AVX-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
266; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
267; AVX-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
268; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
269; AVX-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
270; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 60
271; AVX-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
272; AVX-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 72
273; AVX-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
274; AVX-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 36
275; AVX-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[TBAA0]]
276; AVX-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 24
277; AVX-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
278; AVX-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 84
279; AVX-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !tbaa [[TBAA0]]
280; AVX-NEXT:    [[TMP18:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i64 0
281; AVX-NEXT:    [[TMP19:%.*]] = insertelement <8 x i32> [[TMP18]], i32 [[TMP5]], i64 1
282; AVX-NEXT:    [[TMP20:%.*]] = insertelement <8 x i32> [[TMP19]], i32 [[TMP7]], i64 2
283; AVX-NEXT:    [[TMP21:%.*]] = insertelement <8 x i32> [[TMP20]], i32 [[TMP9]], i64 3
284; AVX-NEXT:    [[TMP22:%.*]] = insertelement <8 x i32> [[TMP21]], i32 [[TMP11]], i64 4
285; AVX-NEXT:    [[TMP23:%.*]] = insertelement <8 x i32> [[TMP22]], i32 [[TMP13]], i64 5
286; AVX-NEXT:    [[TMP24:%.*]] = insertelement <8 x i32> [[TMP23]], i32 [[TMP15]], i64 6
287; AVX-NEXT:    [[TMP25:%.*]] = insertelement <8 x i32> [[TMP24]], i32 [[TMP17]], i64 7
288; AVX-NEXT:    [[TMP26:%.*]] = add <8 x i32> [[TMP25]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
289; AVX-NEXT:    store <8 x i32> [[TMP26]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
290; AVX-NEXT:    ret void
291;
292; AVX2-LABEL: @gather_load_3(
293; AVX2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
294; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
295; AVX2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
296; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
297; AVX2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
298; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 60
299; AVX2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
300; AVX2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 72
301; AVX2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
302; AVX2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 36
303; AVX2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[TBAA0]]
304; AVX2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 24
305; AVX2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
306; AVX2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 84
307; AVX2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !tbaa [[TBAA0]]
308; AVX2-NEXT:    [[TMP18:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i64 0
309; AVX2-NEXT:    [[TMP19:%.*]] = insertelement <8 x i32> [[TMP18]], i32 [[TMP5]], i64 1
310; AVX2-NEXT:    [[TMP20:%.*]] = insertelement <8 x i32> [[TMP19]], i32 [[TMP7]], i64 2
311; AVX2-NEXT:    [[TMP21:%.*]] = insertelement <8 x i32> [[TMP20]], i32 [[TMP9]], i64 3
312; AVX2-NEXT:    [[TMP22:%.*]] = insertelement <8 x i32> [[TMP21]], i32 [[TMP11]], i64 4
313; AVX2-NEXT:    [[TMP23:%.*]] = insertelement <8 x i32> [[TMP22]], i32 [[TMP13]], i64 5
314; AVX2-NEXT:    [[TMP24:%.*]] = insertelement <8 x i32> [[TMP23]], i32 [[TMP15]], i64 6
315; AVX2-NEXT:    [[TMP25:%.*]] = insertelement <8 x i32> [[TMP24]], i32 [[TMP17]], i64 7
316; AVX2-NEXT:    [[TMP26:%.*]] = add <8 x i32> [[TMP25]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
317; AVX2-NEXT:    store <8 x i32> [[TMP26]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
318; AVX2-NEXT:    ret void
319;
320; AVX512F-LABEL: @gather_load_3(
321; AVX512F-NEXT:    [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0
322; AVX512F-NEXT:    [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer
323; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
324; AVX512F-NEXT:    [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]]
325; AVX512F-NEXT:    [[TMP7:%.*]] = add <8 x i32> [[TMP6]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
326; AVX512F-NEXT:    store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
327; AVX512F-NEXT:    ret void
328;
329; AVX512VL-LABEL: @gather_load_3(
330; AVX512VL-NEXT:    [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0
331; AVX512VL-NEXT:    [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer
332; AVX512VL-NEXT:    [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
333; AVX512VL-NEXT:    [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]]
334; AVX512VL-NEXT:    [[TMP7:%.*]] = add <8 x i32> [[TMP6]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
335; AVX512VL-NEXT:    store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
336; AVX512VL-NEXT:    ret void
337;
338  %3 = load i32, ptr %1, align 4, !tbaa !2
339  %4 = add i32 %3, 1
340  %5 = getelementptr inbounds i32, ptr %0, i64 1
341  store i32 %4, ptr %0, align 4, !tbaa !2
342  %6 = getelementptr inbounds i32, ptr %1, i64 11
343  %7 = load i32, ptr %6, align 4, !tbaa !2
344  %8 = add i32 %7, 2
345  %9 = getelementptr inbounds i32, ptr %0, i64 2
346  store i32 %8, ptr %5, align 4, !tbaa !2
347  %10 = getelementptr inbounds i32, ptr %1, i64 4
348  %11 = load i32, ptr %10, align 4, !tbaa !2
349  %12 = add i32 %11, 3
350  %13 = getelementptr inbounds i32, ptr %0, i64 3
351  store i32 %12, ptr %9, align 4, !tbaa !2
352  %14 = getelementptr inbounds i32, ptr %1, i64 15
353  %15 = load i32, ptr %14, align 4, !tbaa !2
354  %16 = add i32 %15, 4
355  %17 = getelementptr inbounds i32, ptr %0, i64 4
356  store i32 %16, ptr %13, align 4, !tbaa !2
357  %18 = getelementptr inbounds i32, ptr %1, i64 18
358  %19 = load i32, ptr %18, align 4, !tbaa !2
359  %20 = add i32 %19, 1
360  %21 = getelementptr inbounds i32, ptr %0, i64 5
361  store i32 %20, ptr %17, align 4, !tbaa !2
362  %22 = getelementptr inbounds i32, ptr %1, i64 9
363  %23 = load i32, ptr %22, align 4, !tbaa !2
364  %24 = add i32 %23, 2
365  %25 = getelementptr inbounds i32, ptr %0, i64 6
366  store i32 %24, ptr %21, align 4, !tbaa !2
367  %26 = getelementptr inbounds i32, ptr %1, i64 6
368  %27 = load i32, ptr %26, align 4, !tbaa !2
369  %28 = add i32 %27, 3
370  %29 = getelementptr inbounds i32, ptr %0, i64 7
371  store i32 %28, ptr %25, align 4, !tbaa !2
372  %30 = getelementptr inbounds i32, ptr %1, i64 21
373  %31 = load i32, ptr %30, align 4, !tbaa !2
374  %32 = add i32 %31, 4
375  store i32 %32, ptr %29, align 4, !tbaa !2
376  ret void
377}
378
379define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture readonly %t1) {
380; SSE-LABEL: @gather_load_4(
381; SSE-NEXT:    [[T5:%.*]] = getelementptr inbounds nuw i8, ptr [[T0:%.*]], i64 4
382; SSE-NEXT:    [[T6:%.*]] = getelementptr inbounds nuw i8, ptr [[T1:%.*]], i64 44
383; SSE-NEXT:    [[T9:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 8
384; SSE-NEXT:    [[T10:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 16
385; SSE-NEXT:    [[T13:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 12
386; SSE-NEXT:    [[T14:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 60
387; SSE-NEXT:    [[T17:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 16
388; SSE-NEXT:    [[T18:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 72
389; SSE-NEXT:    [[T21:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 20
390; SSE-NEXT:    [[T22:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 36
391; SSE-NEXT:    [[T25:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 24
392; SSE-NEXT:    [[T26:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 24
393; SSE-NEXT:    [[T29:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 28
394; SSE-NEXT:    [[T30:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 84
395; SSE-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
396; SSE-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
397; SSE-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
398; SSE-NEXT:    [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]]
399; SSE-NEXT:    [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]]
400; SSE-NEXT:    [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]]
401; SSE-NEXT:    [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]]
402; SSE-NEXT:    [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]]
403; SSE-NEXT:    [[T4:%.*]] = add i32 [[T3]], 1
404; SSE-NEXT:    [[T8:%.*]] = add i32 [[T7]], 2
405; SSE-NEXT:    [[T12:%.*]] = add i32 [[T11]], 3
406; SSE-NEXT:    [[T16:%.*]] = add i32 [[T15]], 4
407; SSE-NEXT:    [[T20:%.*]] = add i32 [[T19]], 1
408; SSE-NEXT:    [[T24:%.*]] = add i32 [[T23]], 2
409; SSE-NEXT:    [[T28:%.*]] = add i32 [[T27]], 3
410; SSE-NEXT:    [[T32:%.*]] = add i32 [[T31]], 4
411; SSE-NEXT:    store i32 [[T4]], ptr [[T0]], align 4, !tbaa [[TBAA0]]
412; SSE-NEXT:    store i32 [[T8]], ptr [[T5]], align 4, !tbaa [[TBAA0]]
413; SSE-NEXT:    store i32 [[T12]], ptr [[T9]], align 4, !tbaa [[TBAA0]]
414; SSE-NEXT:    store i32 [[T16]], ptr [[T13]], align 4, !tbaa [[TBAA0]]
415; SSE-NEXT:    store i32 [[T20]], ptr [[T17]], align 4, !tbaa [[TBAA0]]
416; SSE-NEXT:    store i32 [[T24]], ptr [[T21]], align 4, !tbaa [[TBAA0]]
417; SSE-NEXT:    store i32 [[T28]], ptr [[T25]], align 4, !tbaa [[TBAA0]]
418; SSE-NEXT:    store i32 [[T32]], ptr [[T29]], align 4, !tbaa [[TBAA0]]
419; SSE-NEXT:    ret void
420;
421; AVX-LABEL: @gather_load_4(
422; AVX-NEXT:    [[T6:%.*]] = getelementptr inbounds nuw i8, ptr [[T1:%.*]], i64 44
423; AVX-NEXT:    [[T10:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 16
424; AVX-NEXT:    [[T14:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 60
425; AVX-NEXT:    [[T18:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 72
426; AVX-NEXT:    [[T22:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 36
427; AVX-NEXT:    [[T26:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 24
428; AVX-NEXT:    [[T30:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 84
429; AVX-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
430; AVX-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
431; AVX-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
432; AVX-NEXT:    [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]]
433; AVX-NEXT:    [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]]
434; AVX-NEXT:    [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]]
435; AVX-NEXT:    [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]]
436; AVX-NEXT:    [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]]
437; AVX-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[T3]], i64 0
438; AVX-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[T7]], i64 1
439; AVX-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[T11]], i64 2
440; AVX-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[T15]], i64 3
441; AVX-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[T19]], i64 4
442; AVX-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[T23]], i64 5
443; AVX-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T27]], i64 6
444; AVX-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[T31]], i64 7
445; AVX-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[TMP8]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
446; AVX-NEXT:    store <8 x i32> [[TMP9]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
447; AVX-NEXT:    ret void
448;
449; AVX2-LABEL: @gather_load_4(
450; AVX2-NEXT:    [[T6:%.*]] = getelementptr inbounds nuw i8, ptr [[T1:%.*]], i64 44
451; AVX2-NEXT:    [[T10:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 16
452; AVX2-NEXT:    [[T14:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 60
453; AVX2-NEXT:    [[T18:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 72
454; AVX2-NEXT:    [[T22:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 36
455; AVX2-NEXT:    [[T26:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 24
456; AVX2-NEXT:    [[T30:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 84
457; AVX2-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
458; AVX2-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
459; AVX2-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
460; AVX2-NEXT:    [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]]
461; AVX2-NEXT:    [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]]
462; AVX2-NEXT:    [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]]
463; AVX2-NEXT:    [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]]
464; AVX2-NEXT:    [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]]
465; AVX2-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[T3]], i64 0
466; AVX2-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[T7]], i64 1
467; AVX2-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[T11]], i64 2
468; AVX2-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[T15]], i64 3
469; AVX2-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[T19]], i64 4
470; AVX2-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[T23]], i64 5
471; AVX2-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T27]], i64 6
472; AVX2-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[T31]], i64 7
473; AVX2-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[TMP8]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
474; AVX2-NEXT:    store <8 x i32> [[TMP9]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
475; AVX2-NEXT:    ret void
476;
477; AVX512F-LABEL: @gather_load_4(
478; AVX512F-NEXT:    [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0
479; AVX512F-NEXT:    [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer
480; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
481; AVX512F-NEXT:    [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]]
482; AVX512F-NEXT:    [[TMP5:%.*]] = add <8 x i32> [[TMP4]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
483; AVX512F-NEXT:    store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
484; AVX512F-NEXT:    ret void
485;
486; AVX512VL-LABEL: @gather_load_4(
487; AVX512VL-NEXT:    [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0
488; AVX512VL-NEXT:    [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer
489; AVX512VL-NEXT:    [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
490; AVX512VL-NEXT:    [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]]
491; AVX512VL-NEXT:    [[TMP5:%.*]] = add <8 x i32> [[TMP4]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
492; AVX512VL-NEXT:    store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
493; AVX512VL-NEXT:    ret void
494;
495  %t5 = getelementptr inbounds i32, ptr %t0, i64 1
496  %t6 = getelementptr inbounds i32, ptr %t1, i64 11
497  %t9 = getelementptr inbounds i32, ptr %t0, i64 2
498  %t10 = getelementptr inbounds i32, ptr %t1, i64 4
499  %t13 = getelementptr inbounds i32, ptr %t0, i64 3
500  %t14 = getelementptr inbounds i32, ptr %t1, i64 15
501  %t17 = getelementptr inbounds i32, ptr %t0, i64 4
502  %t18 = getelementptr inbounds i32, ptr %t1, i64 18
503  %t21 = getelementptr inbounds i32, ptr %t0, i64 5
504  %t22 = getelementptr inbounds i32, ptr %t1, i64 9
505  %t25 = getelementptr inbounds i32, ptr %t0, i64 6
506  %t26 = getelementptr inbounds i32, ptr %t1, i64 6
507  %t29 = getelementptr inbounds i32, ptr %t0, i64 7
508  %t30 = getelementptr inbounds i32, ptr %t1, i64 21
509
510  %t3 = load i32, ptr %t1, align 4, !tbaa !2
511  %t7 = load i32, ptr %t6, align 4, !tbaa !2
512  %t11 = load i32, ptr %t10, align 4, !tbaa !2
513  %t15 = load i32, ptr %t14, align 4, !tbaa !2
514  %t19 = load i32, ptr %t18, align 4, !tbaa !2
515  %t23 = load i32, ptr %t22, align 4, !tbaa !2
516  %t27 = load i32, ptr %t26, align 4, !tbaa !2
517  %t31 = load i32, ptr %t30, align 4, !tbaa !2
518
519  %t4 = add i32 %t3, 1
520  %t8 = add i32 %t7, 2
521  %t12 = add i32 %t11, 3
522  %t16 = add i32 %t15, 4
523  %t20 = add i32 %t19, 1
524  %t24 = add i32 %t23, 2
525  %t28 = add i32 %t27, 3
526  %t32 = add i32 %t31, 4
527
528  store i32 %t4, ptr %t0, align 4, !tbaa !2
529  store i32 %t8, ptr %t5, align 4, !tbaa !2
530  store i32 %t12, ptr %t9, align 4, !tbaa !2
531  store i32 %t16, ptr %t13, align 4, !tbaa !2
532  store i32 %t20, ptr %t17, align 4, !tbaa !2
533  store i32 %t24, ptr %t21, align 4, !tbaa !2
534  store i32 %t28, ptr %t25, align 4, !tbaa !2
535  store i32 %t32, ptr %t29, align 4, !tbaa !2
536
537  ret void
538}
539
540
541define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
542; SSE-LABEL: @gather_load_div(
543; SSE-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
544; SSE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
545; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 52
546; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
547; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 176
548; SSE-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
549; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0:%.*]], i64 16
550; SSE-NEXT:    [[TMP10:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
551; SSE-NEXT:    [[TMP11:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
552; SSE-NEXT:    [[TMP12:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
553; SSE-NEXT:    [[TMP13:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
554; SSE-NEXT:    [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
555; SSE-NEXT:    [[TMP15:%.*]] = shufflevector <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x i32> <i32 0, i32 4, i32 poison, i32 poison>
556; SSE-NEXT:    [[TMP16:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
557; SSE-NEXT:    [[TMP17:%.*]] = shufflevector <4 x float> [[TMP15]], <4 x float> [[TMP16]], <4 x i32> <i32 0, i32 1, i32 4, i32 poison>
558; SSE-NEXT:    [[TMP18:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
559; SSE-NEXT:    [[TMP19:%.*]] = shufflevector <4 x float> [[TMP17]], <4 x float> [[TMP18]], <4 x i32> <i32 0, i32 1, i32 2, i32 5>
560; SSE-NEXT:    [[TMP20:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> [[TMP12]], <4 x i32> <i32 1, i32 2, i32 poison, i32 poison>
561; SSE-NEXT:    [[TMP21:%.*]] = shufflevector <4 x float> [[TMP20]], <4 x float> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 5, i32 poison>
562; SSE-NEXT:    [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP8]], i64 3
563; SSE-NEXT:    [[TMP23:%.*]] = fdiv <4 x float> [[TMP19]], [[TMP22]]
564; SSE-NEXT:    store <4 x float> [[TMP23]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
565; SSE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 68
566; SSE-NEXT:    [[TMP25:%.*]] = load float, ptr [[TMP24]], align 4, !tbaa [[TBAA0]]
567; SSE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 132
568; SSE-NEXT:    [[TMP27:%.*]] = load float, ptr [[TMP26]], align 4, !tbaa [[TBAA0]]
569; SSE-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 32
570; SSE-NEXT:    [[TMP29:%.*]] = load float, ptr [[TMP28]], align 4, !tbaa [[TBAA0]]
571; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 120
572; SSE-NEXT:    [[TMP31:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[TBAA0]]
573; SSE-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
574; SSE-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4, !tbaa [[TBAA0]]
575; SSE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 108
576; SSE-NEXT:    [[TMP35:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[TBAA0]]
577; SSE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 80
578; SSE-NEXT:    [[TMP37:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[TBAA0]]
579; SSE-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 92
580; SSE-NEXT:    [[TMP39:%.*]] = load float, ptr [[TMP38]], align 4, !tbaa [[TBAA0]]
581; SSE-NEXT:    [[TMP40:%.*]] = insertelement <4 x float> poison, float [[TMP25]], i64 0
582; SSE-NEXT:    [[TMP41:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP29]], i64 1
583; SSE-NEXT:    [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP33]], i64 2
584; SSE-NEXT:    [[TMP43:%.*]] = insertelement <4 x float> [[TMP42]], float [[TMP37]], i64 3
585; SSE-NEXT:    [[TMP44:%.*]] = insertelement <4 x float> poison, float [[TMP27]], i64 0
586; SSE-NEXT:    [[TMP45:%.*]] = insertelement <4 x float> [[TMP44]], float [[TMP31]], i64 1
587; SSE-NEXT:    [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP35]], i64 2
588; SSE-NEXT:    [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP39]], i64 3
589; SSE-NEXT:    [[TMP48:%.*]] = fdiv <4 x float> [[TMP43]], [[TMP47]]
590; SSE-NEXT:    store <4 x float> [[TMP48]], ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
591; SSE-NEXT:    ret void
592;
593; AVX-LABEL: @gather_load_div(
594; AVX-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
595; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
596; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 52
597; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
598; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 176
599; AVX-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
600; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 68
601; AVX-NEXT:    [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
602; AVX-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 132
603; AVX-NEXT:    [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
604; AVX-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 32
605; AVX-NEXT:    [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
606; AVX-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 120
607; AVX-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
608; AVX-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
609; AVX-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
610; AVX-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 108
611; AVX-NEXT:    [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[TBAA0]]
612; AVX-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 80
613; AVX-NEXT:    [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[TBAA0]]
614; AVX-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 92
615; AVX-NEXT:    [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[TBAA0]]
616; AVX-NEXT:    [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
617; AVX-NEXT:    [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
618; AVX-NEXT:    [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
619; AVX-NEXT:    [[TMP28:%.*]] = insertelement <8 x float> poison, float [[TMP3]], i64 0
620; AVX-NEXT:    [[TMP29:%.*]] = shufflevector <2 x float> [[TMP26]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
621; AVX-NEXT:    [[TMP30:%.*]] = shufflevector <8 x float> [[TMP28]], <8 x float> [[TMP29]], <8 x i32> <i32 0, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
622; AVX-NEXT:    [[TMP31:%.*]] = shufflevector <2 x float> [[TMP25]], <2 x float> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
623; AVX-NEXT:    [[TMP32:%.*]] = shufflevector <8 x float> [[TMP30]], <8 x float> [[TMP31]], <8 x i32> <i32 0, i32 1, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
624; AVX-NEXT:    [[TMP33:%.*]] = shufflevector <2 x float> [[TMP27]], <2 x float> poison, <8 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
625; AVX-NEXT:    [[TMP34:%.*]] = shufflevector <8 x float> [[TMP32]], <8 x float> [[TMP33]], <8 x i32> <i32 0, i32 1, i32 2, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
626; AVX-NEXT:    [[TMP35:%.*]] = insertelement <8 x float> [[TMP34]], float [[TMP10]], i64 4
627; AVX-NEXT:    [[TMP36:%.*]] = insertelement <8 x float> [[TMP35]], float [[TMP14]], i64 5
628; AVX-NEXT:    [[TMP37:%.*]] = insertelement <8 x float> [[TMP36]], float [[TMP18]], i64 6
629; AVX-NEXT:    [[TMP38:%.*]] = insertelement <8 x float> [[TMP37]], float [[TMP22]], i64 7
630; AVX-NEXT:    [[TMP39:%.*]] = shufflevector <2 x float> [[TMP25]], <2 x float> [[TMP27]], <8 x i32> <i32 1, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
631; AVX-NEXT:    [[TMP40:%.*]] = shufflevector <8 x float> [[TMP39]], <8 x float> [[TMP29]], <8 x i32> <i32 0, i32 1, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
632; AVX-NEXT:    [[TMP41:%.*]] = insertelement <8 x float> [[TMP40]], float [[TMP8]], i64 3
633; AVX-NEXT:    [[TMP42:%.*]] = insertelement <8 x float> [[TMP41]], float [[TMP12]], i64 4
634; AVX-NEXT:    [[TMP43:%.*]] = insertelement <8 x float> [[TMP42]], float [[TMP16]], i64 5
635; AVX-NEXT:    [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP20]], i64 6
636; AVX-NEXT:    [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[TMP24]], i64 7
637; AVX-NEXT:    [[TMP46:%.*]] = fdiv <8 x float> [[TMP38]], [[TMP45]]
638; AVX-NEXT:    store <8 x float> [[TMP46]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
639; AVX-NEXT:    ret void
640;
641; AVX2-LABEL: @gather_load_div(
642; AVX2-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
643; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
644; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 52
645; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
646; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 176
647; AVX2-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
648; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 68
649; AVX2-NEXT:    [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
650; AVX2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 132
651; AVX2-NEXT:    [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
652; AVX2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 32
653; AVX2-NEXT:    [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
654; AVX2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 120
655; AVX2-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
656; AVX2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
657; AVX2-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
658; AVX2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 108
659; AVX2-NEXT:    [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[TBAA0]]
660; AVX2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 80
661; AVX2-NEXT:    [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[TBAA0]]
662; AVX2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 92
663; AVX2-NEXT:    [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[TBAA0]]
664; AVX2-NEXT:    [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
665; AVX2-NEXT:    [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
666; AVX2-NEXT:    [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
667; AVX2-NEXT:    [[TMP28:%.*]] = insertelement <8 x float> poison, float [[TMP3]], i64 0
668; AVX2-NEXT:    [[TMP29:%.*]] = shufflevector <2 x float> [[TMP26]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
669; AVX2-NEXT:    [[TMP30:%.*]] = shufflevector <8 x float> [[TMP28]], <8 x float> [[TMP29]], <8 x i32> <i32 0, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
670; AVX2-NEXT:    [[TMP31:%.*]] = shufflevector <2 x float> [[TMP25]], <2 x float> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
671; AVX2-NEXT:    [[TMP32:%.*]] = shufflevector <8 x float> [[TMP30]], <8 x float> [[TMP31]], <8 x i32> <i32 0, i32 1, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
672; AVX2-NEXT:    [[TMP33:%.*]] = shufflevector <2 x float> [[TMP27]], <2 x float> poison, <8 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
673; AVX2-NEXT:    [[TMP34:%.*]] = shufflevector <8 x float> [[TMP32]], <8 x float> [[TMP33]], <8 x i32> <i32 0, i32 1, i32 2, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
674; AVX2-NEXT:    [[TMP35:%.*]] = insertelement <8 x float> [[TMP34]], float [[TMP10]], i64 4
675; AVX2-NEXT:    [[TMP36:%.*]] = insertelement <8 x float> [[TMP35]], float [[TMP14]], i64 5
676; AVX2-NEXT:    [[TMP37:%.*]] = insertelement <8 x float> [[TMP36]], float [[TMP18]], i64 6
677; AVX2-NEXT:    [[TMP38:%.*]] = insertelement <8 x float> [[TMP37]], float [[TMP22]], i64 7
678; AVX2-NEXT:    [[TMP39:%.*]] = shufflevector <2 x float> [[TMP25]], <2 x float> [[TMP27]], <8 x i32> <i32 1, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
679; AVX2-NEXT:    [[TMP40:%.*]] = shufflevector <8 x float> [[TMP39]], <8 x float> [[TMP29]], <8 x i32> <i32 0, i32 1, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
680; AVX2-NEXT:    [[TMP41:%.*]] = insertelement <8 x float> [[TMP40]], float [[TMP8]], i64 3
681; AVX2-NEXT:    [[TMP42:%.*]] = insertelement <8 x float> [[TMP41]], float [[TMP12]], i64 4
682; AVX2-NEXT:    [[TMP43:%.*]] = insertelement <8 x float> [[TMP42]], float [[TMP16]], i64 5
683; AVX2-NEXT:    [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP20]], i64 6
684; AVX2-NEXT:    [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[TMP24]], i64 7
685; AVX2-NEXT:    [[TMP46:%.*]] = fdiv <8 x float> [[TMP38]], [[TMP45]]
686; AVX2-NEXT:    store <8 x float> [[TMP46]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
687; AVX2-NEXT:    ret void
688;
689; AVX512F-LABEL: @gather_load_div(
690; AVX512F-NEXT:    [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0
691; AVX512F-NEXT:    [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer
692; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
693; AVX512F-NEXT:    [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> <i64 0, i64 10, i64 3, i64 14, i64 17, i64 8, i64 5, i64 20>
694; AVX512F-NEXT:    [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]]
695; AVX512F-NEXT:    [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]]
696; AVX512F-NEXT:    [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]]
697; AVX512F-NEXT:    store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
698; AVX512F-NEXT:    ret void
699;
700; AVX512VL-LABEL: @gather_load_div(
701; AVX512VL-NEXT:    [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0
702; AVX512VL-NEXT:    [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer
703; AVX512VL-NEXT:    [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
704; AVX512VL-NEXT:    [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> <i64 0, i64 10, i64 3, i64 14, i64 17, i64 8, i64 5, i64 20>
705; AVX512VL-NEXT:    [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]]
706; AVX512VL-NEXT:    [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]]
707; AVX512VL-NEXT:    [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]]
708; AVX512VL-NEXT:    store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
709; AVX512VL-NEXT:    ret void
710;
711  %3 = load float, ptr %1, align 4, !tbaa !2
712  %4 = getelementptr inbounds float, ptr %1, i64 4
713  %5 = load float, ptr %4, align 4, !tbaa !2
714  %6 = fdiv float %3, %5
715  %7 = getelementptr inbounds float, ptr %0, i64 1
716  store float %6, ptr %0, align 4, !tbaa !2
717  %8 = getelementptr inbounds float, ptr %1, i64 10
718  %9 = load float, ptr %8, align 4, !tbaa !2
719  %10 = getelementptr inbounds float, ptr %1, i64 13
720  %11 = load float, ptr %10, align 4, !tbaa !2
721  %12 = fdiv float %9, %11
722  %13 = getelementptr inbounds float, ptr %0, i64 2
723  store float %12, ptr %7, align 4, !tbaa !2
724  %14 = getelementptr inbounds float, ptr %1, i64 3
725  %15 = load float, ptr %14, align 4, !tbaa !2
726  %16 = getelementptr inbounds float, ptr %1, i64 11
727  %17 = load float, ptr %16, align 4, !tbaa !2
728  %18 = fdiv float %15, %17
729  %19 = getelementptr inbounds float, ptr %0, i64 3
730  store float %18, ptr %13, align 4, !tbaa !2
731  %20 = getelementptr inbounds float, ptr %1, i64 14
732  %21 = load float, ptr %20, align 4, !tbaa !2
733  %22 = getelementptr inbounds float, ptr %1, i64 44
734  %23 = load float, ptr %22, align 4, !tbaa !2
735  %24 = fdiv float %21, %23
736  %25 = getelementptr inbounds float, ptr %0, i64 4
737  store float %24, ptr %19, align 4, !tbaa !2
738  %26 = getelementptr inbounds float, ptr %1, i64 17
739  %27 = load float, ptr %26, align 4, !tbaa !2
740  %28 = getelementptr inbounds float, ptr %1, i64 33
741  %29 = load float, ptr %28, align 4, !tbaa !2
742  %30 = fdiv float %27, %29
743  %31 = getelementptr inbounds float, ptr %0, i64 5
744  store float %30, ptr %25, align 4, !tbaa !2
745  %32 = getelementptr inbounds float, ptr %1, i64 8
746  %33 = load float, ptr %32, align 4, !tbaa !2
747  %34 = getelementptr inbounds float, ptr %1, i64 30
748  %35 = load float, ptr %34, align 4, !tbaa !2
749  %36 = fdiv float %33, %35
750  %37 = getelementptr inbounds float, ptr %0, i64 6
751  store float %36, ptr %31, align 4, !tbaa !2
752  %38 = getelementptr inbounds float, ptr %1, i64 5
753  %39 = load float, ptr %38, align 4, !tbaa !2
754  %40 = getelementptr inbounds float, ptr %1, i64 27
755  %41 = load float, ptr %40, align 4, !tbaa !2
756  %42 = fdiv float %39, %41
757  %43 = getelementptr inbounds float, ptr %0, i64 7
758  store float %42, ptr %37, align 4, !tbaa !2
759  %44 = getelementptr inbounds float, ptr %1, i64 20
760  %45 = load float, ptr %44, align 4, !tbaa !2
761  %46 = getelementptr inbounds float, ptr %1, i64 23
762  %47 = load float, ptr %46, align 4, !tbaa !2
763  %48 = fdiv float %45, %47
764  store float %48, ptr %43, align 4, !tbaa !2
765  ret void
766}
767
768!2 = !{!3, !3, i64 0}
769!3 = !{!"short", !4, i64 0}
770!4 = !{!"omnipotent char", !5, i64 0}
771!5 = !{!"Simple C++ TBAA"}
772