xref: /llvm-project/llvm/test/CodeGen/X86/elementwise-store-of-scalar-splat.ll (revision 902d0e86bdbf668d7e6429c11aa6c5b218d5ad08)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-sse2 | FileCheck %s --check-prefixes=ALL,SCALAR
3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefixes=ALL,SSE,SSE2,SSE2-ONLY
4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefixes=ALL,SSE,SSE2,SSE3
5; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+ssse3 | FileCheck %s --check-prefixes=ALL,SSE,SSSE3,SSSE3-ONLY
6; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 | FileCheck %s --check-prefixes=ALL,SSE,SSSE3,SSE41
7; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 | FileCheck %s --check-prefixes=ALL,SSE,SSSE3,SSE42
8; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX,AVX1
9; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX,AVX2
10; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl | FileCheck %s --check-prefixes=ALL,AVX512,AVX512F
11; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=ALL,AVX512,AVX512BW
12
13define void @vec16_i8(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
14; ALL-LABEL: vec16_i8:
15; ALL:       # %bb.0:
16; ALL-NEXT:    movzbl (%rdi), %eax
17; ALL-NEXT:    notb %al
18; ALL-NEXT:    movb %al, (%rsi)
19; ALL-NEXT:    movb %al, 1(%rsi)
20; ALL-NEXT:    retq
21  %in.elt.not = load i8, ptr %in.elt.ptr, align 64
22  %in.elt = xor i8 %in.elt.not, -1
23  %out.elt0.ptr = getelementptr i8, ptr %out.vec.ptr, i64 0
24  store i8 %in.elt, ptr %out.elt0.ptr, align 64
25  %out.elt1.ptr = getelementptr i8, ptr %out.vec.ptr, i64 1
26  store i8 %in.elt, ptr %out.elt1.ptr, align 1
27  ret void
28}
29
30define void @vec32_i8(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
31; ALL-LABEL: vec32_i8:
32; ALL:       # %bb.0:
33; ALL-NEXT:    movzbl (%rdi), %eax
34; ALL-NEXT:    notb %al
35; ALL-NEXT:    movb %al, (%rsi)
36; ALL-NEXT:    movb %al, 1(%rsi)
37; ALL-NEXT:    movb %al, 2(%rsi)
38; ALL-NEXT:    movb %al, 3(%rsi)
39; ALL-NEXT:    retq
40  %in.elt.not = load i8, ptr %in.elt.ptr, align 64
41  %in.elt = xor i8 %in.elt.not, -1
42  %out.elt0.ptr = getelementptr i8, ptr %out.vec.ptr, i64 0
43  store i8 %in.elt, ptr %out.elt0.ptr, align 64
44  %out.elt1.ptr = getelementptr i8, ptr %out.vec.ptr, i64 1
45  store i8 %in.elt, ptr %out.elt1.ptr, align 1
46  %out.elt2.ptr = getelementptr i8, ptr %out.vec.ptr, i64 2
47  store i8 %in.elt, ptr %out.elt2.ptr, align 2
48  %out.elt3.ptr = getelementptr i8, ptr %out.vec.ptr, i64 3
49  store i8 %in.elt, ptr %out.elt3.ptr, align 1
50  ret void
51}
52
53define void @vec32_i16(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
54; ALL-LABEL: vec32_i16:
55; ALL:       # %bb.0:
56; ALL-NEXT:    movl (%rdi), %eax
57; ALL-NEXT:    notl %eax
58; ALL-NEXT:    movw %ax, (%rsi)
59; ALL-NEXT:    movw %ax, 2(%rsi)
60; ALL-NEXT:    retq
61  %in.elt.not = load i16, ptr %in.elt.ptr, align 64
62  %in.elt = xor i16 %in.elt.not, -1
63  %out.elt0.ptr = getelementptr i16, ptr %out.vec.ptr, i64 0
64  store i16 %in.elt, ptr %out.elt0.ptr, align 64
65  %out.elt1.ptr = getelementptr i16, ptr %out.vec.ptr, i64 1
66  store i16 %in.elt, ptr %out.elt1.ptr, align 2
67  ret void
68}
69
70define void @vec64_i8(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
71; ALL-LABEL: vec64_i8:
72; ALL:       # %bb.0:
73; ALL-NEXT:    movzbl (%rdi), %eax
74; ALL-NEXT:    notb %al
75; ALL-NEXT:    movb %al, (%rsi)
76; ALL-NEXT:    movb %al, 1(%rsi)
77; ALL-NEXT:    movb %al, 2(%rsi)
78; ALL-NEXT:    movb %al, 3(%rsi)
79; ALL-NEXT:    movb %al, 4(%rsi)
80; ALL-NEXT:    movb %al, 5(%rsi)
81; ALL-NEXT:    movb %al, 6(%rsi)
82; ALL-NEXT:    movb %al, 7(%rsi)
83; ALL-NEXT:    retq
84  %in.elt.not = load i8, ptr %in.elt.ptr, align 64
85  %in.elt = xor i8 %in.elt.not, -1
86  %out.elt0.ptr = getelementptr i8, ptr %out.vec.ptr, i64 0
87  store i8 %in.elt, ptr %out.elt0.ptr, align 64
88  %out.elt1.ptr = getelementptr i8, ptr %out.vec.ptr, i64 1
89  store i8 %in.elt, ptr %out.elt1.ptr, align 1
90  %out.elt2.ptr = getelementptr i8, ptr %out.vec.ptr, i64 2
91  store i8 %in.elt, ptr %out.elt2.ptr, align 2
92  %out.elt3.ptr = getelementptr i8, ptr %out.vec.ptr, i64 3
93  store i8 %in.elt, ptr %out.elt3.ptr, align 1
94  %out.elt4.ptr = getelementptr i8, ptr %out.vec.ptr, i64 4
95  store i8 %in.elt, ptr %out.elt4.ptr, align 4
96  %out.elt5.ptr = getelementptr i8, ptr %out.vec.ptr, i64 5
97  store i8 %in.elt, ptr %out.elt5.ptr, align 1
98  %out.elt6.ptr = getelementptr i8, ptr %out.vec.ptr, i64 6
99  store i8 %in.elt, ptr %out.elt6.ptr, align 2
100  %out.elt7.ptr = getelementptr i8, ptr %out.vec.ptr, i64 7
101  store i8 %in.elt, ptr %out.elt7.ptr, align 1
102  ret void
103}
104
105define void @vec64_i16(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
106; ALL-LABEL: vec64_i16:
107; ALL:       # %bb.0:
108; ALL-NEXT:    movl (%rdi), %eax
109; ALL-NEXT:    notl %eax
110; ALL-NEXT:    movw %ax, (%rsi)
111; ALL-NEXT:    movw %ax, 2(%rsi)
112; ALL-NEXT:    movw %ax, 4(%rsi)
113; ALL-NEXT:    movw %ax, 6(%rsi)
114; ALL-NEXT:    retq
115  %in.elt.not = load i16, ptr %in.elt.ptr, align 64
116  %in.elt = xor i16 %in.elt.not, -1
117  %out.elt0.ptr = getelementptr i16, ptr %out.vec.ptr, i64 0
118  store i16 %in.elt, ptr %out.elt0.ptr, align 64
119  %out.elt1.ptr = getelementptr i16, ptr %out.vec.ptr, i64 1
120  store i16 %in.elt, ptr %out.elt1.ptr, align 2
121  %out.elt2.ptr = getelementptr i16, ptr %out.vec.ptr, i64 2
122  store i16 %in.elt, ptr %out.elt2.ptr, align 4
123  %out.elt3.ptr = getelementptr i16, ptr %out.vec.ptr, i64 3
124  store i16 %in.elt, ptr %out.elt3.ptr, align 2
125  ret void
126}
127
128define void @vec64_i32(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
129; ALL-LABEL: vec64_i32:
130; ALL:       # %bb.0:
131; ALL-NEXT:    movl (%rdi), %eax
132; ALL-NEXT:    notl %eax
133; ALL-NEXT:    movl %eax, (%rsi)
134; ALL-NEXT:    movl %eax, 4(%rsi)
135; ALL-NEXT:    retq
136  %in.elt.not = load i32, ptr %in.elt.ptr, align 64
137  %in.elt = xor i32 %in.elt.not, -1
138  %out.elt0.ptr = getelementptr i32, ptr %out.vec.ptr, i64 0
139  store i32 %in.elt, ptr %out.elt0.ptr, align 64
140  %out.elt1.ptr = getelementptr i32, ptr %out.vec.ptr, i64 1
141  store i32 %in.elt, ptr %out.elt1.ptr, align 4
142  ret void
143}
144
145define void @vec64_float(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
146; ALL-LABEL: vec64_float:
147; ALL:       # %bb.0:
148; ALL-NEXT:    movl (%rdi), %eax
149; ALL-NEXT:    notl %eax
150; ALL-NEXT:    movl %eax, (%rsi)
151; ALL-NEXT:    movl %eax, 4(%rsi)
152; ALL-NEXT:    retq
153  %in.elt.not = load i32, ptr %in.elt.ptr, align 64
154  %in.elt.int = xor i32 %in.elt.not, -1
155  %in.elt = bitcast i32 %in.elt.int to float
156  %out.elt0.ptr = getelementptr float, ptr %out.vec.ptr, i64 0
157  store float %in.elt, ptr %out.elt0.ptr, align 64
158  %out.elt1.ptr = getelementptr float, ptr %out.vec.ptr, i64 1
159  store float %in.elt, ptr %out.elt1.ptr, align 4
160  ret void
161}
162
163define void @vec128_i8(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
164; ALL-LABEL: vec128_i8:
165; ALL:       # %bb.0:
166; ALL-NEXT:    movzbl (%rdi), %eax
167; ALL-NEXT:    notb %al
168; ALL-NEXT:    movb %al, (%rsi)
169; ALL-NEXT:    movb %al, 1(%rsi)
170; ALL-NEXT:    movb %al, 2(%rsi)
171; ALL-NEXT:    movb %al, 3(%rsi)
172; ALL-NEXT:    movb %al, 4(%rsi)
173; ALL-NEXT:    movb %al, 5(%rsi)
174; ALL-NEXT:    movb %al, 6(%rsi)
175; ALL-NEXT:    movb %al, 7(%rsi)
176; ALL-NEXT:    movb %al, 8(%rsi)
177; ALL-NEXT:    movb %al, 9(%rsi)
178; ALL-NEXT:    movb %al, 10(%rsi)
179; ALL-NEXT:    movb %al, 11(%rsi)
180; ALL-NEXT:    movb %al, 12(%rsi)
181; ALL-NEXT:    movb %al, 13(%rsi)
182; ALL-NEXT:    movb %al, 14(%rsi)
183; ALL-NEXT:    movb %al, 15(%rsi)
184; ALL-NEXT:    retq
185  %in.elt.not = load i8, ptr %in.elt.ptr, align 64
186  %in.elt = xor i8 %in.elt.not, -1
187  %out.elt0.ptr = getelementptr i8, ptr %out.vec.ptr, i64 0
188  store i8 %in.elt, ptr %out.elt0.ptr, align 64
189  %out.elt1.ptr = getelementptr i8, ptr %out.vec.ptr, i64 1
190  store i8 %in.elt, ptr %out.elt1.ptr, align 1
191  %out.elt2.ptr = getelementptr i8, ptr %out.vec.ptr, i64 2
192  store i8 %in.elt, ptr %out.elt2.ptr, align 2
193  %out.elt3.ptr = getelementptr i8, ptr %out.vec.ptr, i64 3
194  store i8 %in.elt, ptr %out.elt3.ptr, align 1
195  %out.elt4.ptr = getelementptr i8, ptr %out.vec.ptr, i64 4
196  store i8 %in.elt, ptr %out.elt4.ptr, align 4
197  %out.elt5.ptr = getelementptr i8, ptr %out.vec.ptr, i64 5
198  store i8 %in.elt, ptr %out.elt5.ptr, align 1
199  %out.elt6.ptr = getelementptr i8, ptr %out.vec.ptr, i64 6
200  store i8 %in.elt, ptr %out.elt6.ptr, align 2
201  %out.elt7.ptr = getelementptr i8, ptr %out.vec.ptr, i64 7
202  store i8 %in.elt, ptr %out.elt7.ptr, align 1
203  %out.elt8.ptr = getelementptr i8, ptr %out.vec.ptr, i64 8
204  store i8 %in.elt, ptr %out.elt8.ptr, align 8
205  %out.elt9.ptr = getelementptr i8, ptr %out.vec.ptr, i64 9
206  store i8 %in.elt, ptr %out.elt9.ptr, align 1
207  %out.elt10.ptr = getelementptr i8, ptr %out.vec.ptr, i64 10
208  store i8 %in.elt, ptr %out.elt10.ptr, align 2
209  %out.elt11.ptr = getelementptr i8, ptr %out.vec.ptr, i64 11
210  store i8 %in.elt, ptr %out.elt11.ptr, align 1
211  %out.elt12.ptr = getelementptr i8, ptr %out.vec.ptr, i64 12
212  store i8 %in.elt, ptr %out.elt12.ptr, align 4
213  %out.elt13.ptr = getelementptr i8, ptr %out.vec.ptr, i64 13
214  store i8 %in.elt, ptr %out.elt13.ptr, align 1
215  %out.elt14.ptr = getelementptr i8, ptr %out.vec.ptr, i64 14
216  store i8 %in.elt, ptr %out.elt14.ptr, align 2
217  %out.elt15.ptr = getelementptr i8, ptr %out.vec.ptr, i64 15
218  store i8 %in.elt, ptr %out.elt15.ptr, align 1
219  ret void
220}
221
222define void @vec128_i16(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
223; ALL-LABEL: vec128_i16:
224; ALL:       # %bb.0:
225; ALL-NEXT:    movl (%rdi), %eax
226; ALL-NEXT:    notl %eax
227; ALL-NEXT:    movw %ax, (%rsi)
228; ALL-NEXT:    movw %ax, 2(%rsi)
229; ALL-NEXT:    movw %ax, 4(%rsi)
230; ALL-NEXT:    movw %ax, 6(%rsi)
231; ALL-NEXT:    movw %ax, 8(%rsi)
232; ALL-NEXT:    movw %ax, 10(%rsi)
233; ALL-NEXT:    movw %ax, 12(%rsi)
234; ALL-NEXT:    movw %ax, 14(%rsi)
235; ALL-NEXT:    retq
236  %in.elt.not = load i16, ptr %in.elt.ptr, align 64
237  %in.elt = xor i16 %in.elt.not, -1
238  %out.elt0.ptr = getelementptr i16, ptr %out.vec.ptr, i64 0
239  store i16 %in.elt, ptr %out.elt0.ptr, align 64
240  %out.elt1.ptr = getelementptr i16, ptr %out.vec.ptr, i64 1
241  store i16 %in.elt, ptr %out.elt1.ptr, align 2
242  %out.elt2.ptr = getelementptr i16, ptr %out.vec.ptr, i64 2
243  store i16 %in.elt, ptr %out.elt2.ptr, align 4
244  %out.elt3.ptr = getelementptr i16, ptr %out.vec.ptr, i64 3
245  store i16 %in.elt, ptr %out.elt3.ptr, align 2
246  %out.elt4.ptr = getelementptr i16, ptr %out.vec.ptr, i64 4
247  store i16 %in.elt, ptr %out.elt4.ptr, align 8
248  %out.elt5.ptr = getelementptr i16, ptr %out.vec.ptr, i64 5
249  store i16 %in.elt, ptr %out.elt5.ptr, align 2
250  %out.elt6.ptr = getelementptr i16, ptr %out.vec.ptr, i64 6
251  store i16 %in.elt, ptr %out.elt6.ptr, align 4
252  %out.elt7.ptr = getelementptr i16, ptr %out.vec.ptr, i64 7
253  store i16 %in.elt, ptr %out.elt7.ptr, align 2
254  ret void
255}
256
257define void @vec128_i32(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
258; ALL-LABEL: vec128_i32:
259; ALL:       # %bb.0:
260; ALL-NEXT:    movl (%rdi), %eax
261; ALL-NEXT:    notl %eax
262; ALL-NEXT:    movl %eax, (%rsi)
263; ALL-NEXT:    movl %eax, 4(%rsi)
264; ALL-NEXT:    movl %eax, 8(%rsi)
265; ALL-NEXT:    movl %eax, 12(%rsi)
266; ALL-NEXT:    retq
267  %in.elt.not = load i32, ptr %in.elt.ptr, align 64
268  %in.elt = xor i32 %in.elt.not, -1
269  %out.elt0.ptr = getelementptr i32, ptr %out.vec.ptr, i64 0
270  store i32 %in.elt, ptr %out.elt0.ptr, align 64
271  %out.elt1.ptr = getelementptr i32, ptr %out.vec.ptr, i64 1
272  store i32 %in.elt, ptr %out.elt1.ptr, align 4
273  %out.elt2.ptr = getelementptr i32, ptr %out.vec.ptr, i64 2
274  store i32 %in.elt, ptr %out.elt2.ptr, align 8
275  %out.elt3.ptr = getelementptr i32, ptr %out.vec.ptr, i64 3
276  store i32 %in.elt, ptr %out.elt3.ptr, align 4
277  ret void
278}
279
280define void @vec128_float(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
281; ALL-LABEL: vec128_float:
282; ALL:       # %bb.0:
283; ALL-NEXT:    movl (%rdi), %eax
284; ALL-NEXT:    notl %eax
285; ALL-NEXT:    movl %eax, (%rsi)
286; ALL-NEXT:    movl %eax, 4(%rsi)
287; ALL-NEXT:    movl %eax, 8(%rsi)
288; ALL-NEXT:    movl %eax, 12(%rsi)
289; ALL-NEXT:    retq
290  %in.elt.not = load i32, ptr %in.elt.ptr, align 64
291  %in.elt.int = xor i32 %in.elt.not, -1
292  %in.elt = bitcast i32 %in.elt.int to float
293  %out.elt0.ptr = getelementptr float, ptr %out.vec.ptr, i64 0
294  store float %in.elt, ptr %out.elt0.ptr, align 64
295  %out.elt1.ptr = getelementptr float, ptr %out.vec.ptr, i64 1
296  store float %in.elt, ptr %out.elt1.ptr, align 4
297  %out.elt2.ptr = getelementptr float, ptr %out.vec.ptr, i64 2
298  store float %in.elt, ptr %out.elt2.ptr, align 8
299  %out.elt3.ptr = getelementptr float, ptr %out.vec.ptr, i64 3
300  store float %in.elt, ptr %out.elt3.ptr, align 4
301  ret void
302}
303
304define void @vec128_i64(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
305; ALL-LABEL: vec128_i64:
306; ALL:       # %bb.0:
307; ALL-NEXT:    movq (%rdi), %rax
308; ALL-NEXT:    notq %rax
309; ALL-NEXT:    movq %rax, (%rsi)
310; ALL-NEXT:    movq %rax, 8(%rsi)
311; ALL-NEXT:    retq
312  %in.elt.not = load i64, ptr %in.elt.ptr, align 64
313  %in.elt = xor i64 %in.elt.not, -1
314  %out.elt0.ptr = getelementptr i64, ptr %out.vec.ptr, i64 0
315  store i64 %in.elt, ptr %out.elt0.ptr, align 64
316  %out.elt1.ptr = getelementptr i64, ptr %out.vec.ptr, i64 1
317  store i64 %in.elt, ptr %out.elt1.ptr, align 8
318  ret void
319}
320
321define void @vec128_double(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
322; ALL-LABEL: vec128_double:
323; ALL:       # %bb.0:
324; ALL-NEXT:    movq (%rdi), %rax
325; ALL-NEXT:    notq %rax
326; ALL-NEXT:    movq %rax, (%rsi)
327; ALL-NEXT:    movq %rax, 8(%rsi)
328; ALL-NEXT:    retq
329  %in.elt.not = load i64, ptr %in.elt.ptr, align 64
330  %in.elt.int = xor i64 %in.elt.not, -1
331  %in.elt = bitcast i64 %in.elt.int to double
332  %out.elt0.ptr = getelementptr double, ptr %out.vec.ptr, i64 0
333  store double %in.elt, ptr %out.elt0.ptr, align 64
334  %out.elt1.ptr = getelementptr double, ptr %out.vec.ptr, i64 1
335  store double %in.elt, ptr %out.elt1.ptr, align 8
336  ret void
337}
338
339define void @vec256_i8(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
340; ALL-LABEL: vec256_i8:
341; ALL:       # %bb.0:
342; ALL-NEXT:    movzbl (%rdi), %eax
343; ALL-NEXT:    notb %al
344; ALL-NEXT:    movb %al, (%rsi)
345; ALL-NEXT:    movb %al, 1(%rsi)
346; ALL-NEXT:    movb %al, 2(%rsi)
347; ALL-NEXT:    movb %al, 3(%rsi)
348; ALL-NEXT:    movb %al, 4(%rsi)
349; ALL-NEXT:    movb %al, 5(%rsi)
350; ALL-NEXT:    movb %al, 6(%rsi)
351; ALL-NEXT:    movb %al, 7(%rsi)
352; ALL-NEXT:    movb %al, 8(%rsi)
353; ALL-NEXT:    movb %al, 9(%rsi)
354; ALL-NEXT:    movb %al, 10(%rsi)
355; ALL-NEXT:    movb %al, 11(%rsi)
356; ALL-NEXT:    movb %al, 12(%rsi)
357; ALL-NEXT:    movb %al, 13(%rsi)
358; ALL-NEXT:    movb %al, 14(%rsi)
359; ALL-NEXT:    movb %al, 15(%rsi)
360; ALL-NEXT:    movb %al, 16(%rsi)
361; ALL-NEXT:    movb %al, 17(%rsi)
362; ALL-NEXT:    movb %al, 18(%rsi)
363; ALL-NEXT:    movb %al, 19(%rsi)
364; ALL-NEXT:    movb %al, 20(%rsi)
365; ALL-NEXT:    movb %al, 21(%rsi)
366; ALL-NEXT:    movb %al, 22(%rsi)
367; ALL-NEXT:    movb %al, 23(%rsi)
368; ALL-NEXT:    movb %al, 24(%rsi)
369; ALL-NEXT:    movb %al, 25(%rsi)
370; ALL-NEXT:    movb %al, 26(%rsi)
371; ALL-NEXT:    movb %al, 27(%rsi)
372; ALL-NEXT:    movb %al, 28(%rsi)
373; ALL-NEXT:    movb %al, 29(%rsi)
374; ALL-NEXT:    movb %al, 30(%rsi)
375; ALL-NEXT:    movb %al, 31(%rsi)
376; ALL-NEXT:    retq
377  %in.elt.not = load i8, ptr %in.elt.ptr, align 64
378  %in.elt = xor i8 %in.elt.not, -1
379  %out.elt0.ptr = getelementptr i8, ptr %out.vec.ptr, i64 0
380  store i8 %in.elt, ptr %out.elt0.ptr, align 64
381  %out.elt1.ptr = getelementptr i8, ptr %out.vec.ptr, i64 1
382  store i8 %in.elt, ptr %out.elt1.ptr, align 1
383  %out.elt2.ptr = getelementptr i8, ptr %out.vec.ptr, i64 2
384  store i8 %in.elt, ptr %out.elt2.ptr, align 2
385  %out.elt3.ptr = getelementptr i8, ptr %out.vec.ptr, i64 3
386  store i8 %in.elt, ptr %out.elt3.ptr, align 1
387  %out.elt4.ptr = getelementptr i8, ptr %out.vec.ptr, i64 4
388  store i8 %in.elt, ptr %out.elt4.ptr, align 4
389  %out.elt5.ptr = getelementptr i8, ptr %out.vec.ptr, i64 5
390  store i8 %in.elt, ptr %out.elt5.ptr, align 1
391  %out.elt6.ptr = getelementptr i8, ptr %out.vec.ptr, i64 6
392  store i8 %in.elt, ptr %out.elt6.ptr, align 2
393  %out.elt7.ptr = getelementptr i8, ptr %out.vec.ptr, i64 7
394  store i8 %in.elt, ptr %out.elt7.ptr, align 1
395  %out.elt8.ptr = getelementptr i8, ptr %out.vec.ptr, i64 8
396  store i8 %in.elt, ptr %out.elt8.ptr, align 8
397  %out.elt9.ptr = getelementptr i8, ptr %out.vec.ptr, i64 9
398  store i8 %in.elt, ptr %out.elt9.ptr, align 1
399  %out.elt10.ptr = getelementptr i8, ptr %out.vec.ptr, i64 10
400  store i8 %in.elt, ptr %out.elt10.ptr, align 2
401  %out.elt11.ptr = getelementptr i8, ptr %out.vec.ptr, i64 11
402  store i8 %in.elt, ptr %out.elt11.ptr, align 1
403  %out.elt12.ptr = getelementptr i8, ptr %out.vec.ptr, i64 12
404  store i8 %in.elt, ptr %out.elt12.ptr, align 4
405  %out.elt13.ptr = getelementptr i8, ptr %out.vec.ptr, i64 13
406  store i8 %in.elt, ptr %out.elt13.ptr, align 1
407  %out.elt14.ptr = getelementptr i8, ptr %out.vec.ptr, i64 14
408  store i8 %in.elt, ptr %out.elt14.ptr, align 2
409  %out.elt15.ptr = getelementptr i8, ptr %out.vec.ptr, i64 15
410  store i8 %in.elt, ptr %out.elt15.ptr, align 1
411  %out.elt16.ptr = getelementptr i8, ptr %out.vec.ptr, i64 16
412  store i8 %in.elt, ptr %out.elt16.ptr, align 16
413  %out.elt17.ptr = getelementptr i8, ptr %out.vec.ptr, i64 17
414  store i8 %in.elt, ptr %out.elt17.ptr, align 1
415  %out.elt18.ptr = getelementptr i8, ptr %out.vec.ptr, i64 18
416  store i8 %in.elt, ptr %out.elt18.ptr, align 2
417  %out.elt19.ptr = getelementptr i8, ptr %out.vec.ptr, i64 19
418  store i8 %in.elt, ptr %out.elt19.ptr, align 1
419  %out.elt20.ptr = getelementptr i8, ptr %out.vec.ptr, i64 20
420  store i8 %in.elt, ptr %out.elt20.ptr, align 4
421  %out.elt21.ptr = getelementptr i8, ptr %out.vec.ptr, i64 21
422  store i8 %in.elt, ptr %out.elt21.ptr, align 1
423  %out.elt22.ptr = getelementptr i8, ptr %out.vec.ptr, i64 22
424  store i8 %in.elt, ptr %out.elt22.ptr, align 2
425  %out.elt23.ptr = getelementptr i8, ptr %out.vec.ptr, i64 23
426  store i8 %in.elt, ptr %out.elt23.ptr, align 1
427  %out.elt24.ptr = getelementptr i8, ptr %out.vec.ptr, i64 24
428  store i8 %in.elt, ptr %out.elt24.ptr, align 8
429  %out.elt25.ptr = getelementptr i8, ptr %out.vec.ptr, i64 25
430  store i8 %in.elt, ptr %out.elt25.ptr, align 1
431  %out.elt26.ptr = getelementptr i8, ptr %out.vec.ptr, i64 26
432  store i8 %in.elt, ptr %out.elt26.ptr, align 2
433  %out.elt27.ptr = getelementptr i8, ptr %out.vec.ptr, i64 27
434  store i8 %in.elt, ptr %out.elt27.ptr, align 1
435  %out.elt28.ptr = getelementptr i8, ptr %out.vec.ptr, i64 28
436  store i8 %in.elt, ptr %out.elt28.ptr, align 4
437  %out.elt29.ptr = getelementptr i8, ptr %out.vec.ptr, i64 29
438  store i8 %in.elt, ptr %out.elt29.ptr, align 1
439  %out.elt30.ptr = getelementptr i8, ptr %out.vec.ptr, i64 30
440  store i8 %in.elt, ptr %out.elt30.ptr, align 2
441  %out.elt31.ptr = getelementptr i8, ptr %out.vec.ptr, i64 31
442  store i8 %in.elt, ptr %out.elt31.ptr, align 1
443  ret void
444}
445
446define void @vec256_i16(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
447; ALL-LABEL: vec256_i16:
448; ALL:       # %bb.0:
449; ALL-NEXT:    movl (%rdi), %eax
450; ALL-NEXT:    notl %eax
451; ALL-NEXT:    movw %ax, (%rsi)
452; ALL-NEXT:    movw %ax, 2(%rsi)
453; ALL-NEXT:    movw %ax, 4(%rsi)
454; ALL-NEXT:    movw %ax, 6(%rsi)
455; ALL-NEXT:    movw %ax, 8(%rsi)
456; ALL-NEXT:    movw %ax, 10(%rsi)
457; ALL-NEXT:    movw %ax, 12(%rsi)
458; ALL-NEXT:    movw %ax, 14(%rsi)
459; ALL-NEXT:    movw %ax, 16(%rsi)
460; ALL-NEXT:    movw %ax, 18(%rsi)
461; ALL-NEXT:    movw %ax, 20(%rsi)
462; ALL-NEXT:    movw %ax, 22(%rsi)
463; ALL-NEXT:    movw %ax, 24(%rsi)
464; ALL-NEXT:    movw %ax, 26(%rsi)
465; ALL-NEXT:    movw %ax, 28(%rsi)
466; ALL-NEXT:    movw %ax, 30(%rsi)
467; ALL-NEXT:    retq
468  %in.elt.not = load i16, ptr %in.elt.ptr, align 64
469  %in.elt = xor i16 %in.elt.not, -1
470  %out.elt0.ptr = getelementptr i16, ptr %out.vec.ptr, i64 0
471  store i16 %in.elt, ptr %out.elt0.ptr, align 64
472  %out.elt1.ptr = getelementptr i16, ptr %out.vec.ptr, i64 1
473  store i16 %in.elt, ptr %out.elt1.ptr, align 2
474  %out.elt2.ptr = getelementptr i16, ptr %out.vec.ptr, i64 2
475  store i16 %in.elt, ptr %out.elt2.ptr, align 4
476  %out.elt3.ptr = getelementptr i16, ptr %out.vec.ptr, i64 3
477  store i16 %in.elt, ptr %out.elt3.ptr, align 2
478  %out.elt4.ptr = getelementptr i16, ptr %out.vec.ptr, i64 4
479  store i16 %in.elt, ptr %out.elt4.ptr, align 8
480  %out.elt5.ptr = getelementptr i16, ptr %out.vec.ptr, i64 5
481  store i16 %in.elt, ptr %out.elt5.ptr, align 2
482  %out.elt6.ptr = getelementptr i16, ptr %out.vec.ptr, i64 6
483  store i16 %in.elt, ptr %out.elt6.ptr, align 4
484  %out.elt7.ptr = getelementptr i16, ptr %out.vec.ptr, i64 7
485  store i16 %in.elt, ptr %out.elt7.ptr, align 2
486  %out.elt8.ptr = getelementptr i16, ptr %out.vec.ptr, i64 8
487  store i16 %in.elt, ptr %out.elt8.ptr, align 16
488  %out.elt9.ptr = getelementptr i16, ptr %out.vec.ptr, i64 9
489  store i16 %in.elt, ptr %out.elt9.ptr, align 2
490  %out.elt10.ptr = getelementptr i16, ptr %out.vec.ptr, i64 10
491  store i16 %in.elt, ptr %out.elt10.ptr, align 4
492  %out.elt11.ptr = getelementptr i16, ptr %out.vec.ptr, i64 11
493  store i16 %in.elt, ptr %out.elt11.ptr, align 2
494  %out.elt12.ptr = getelementptr i16, ptr %out.vec.ptr, i64 12
495  store i16 %in.elt, ptr %out.elt12.ptr, align 8
496  %out.elt13.ptr = getelementptr i16, ptr %out.vec.ptr, i64 13
497  store i16 %in.elt, ptr %out.elt13.ptr, align 2
498  %out.elt14.ptr = getelementptr i16, ptr %out.vec.ptr, i64 14
499  store i16 %in.elt, ptr %out.elt14.ptr, align 4
500  %out.elt15.ptr = getelementptr i16, ptr %out.vec.ptr, i64 15
501  store i16 %in.elt, ptr %out.elt15.ptr, align 2
502  ret void
503}
504
505define void @vec256_i32(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
506; ALL-LABEL: vec256_i32:
507; ALL:       # %bb.0:
508; ALL-NEXT:    movl (%rdi), %eax
509; ALL-NEXT:    notl %eax
510; ALL-NEXT:    movl %eax, (%rsi)
511; ALL-NEXT:    movl %eax, 4(%rsi)
512; ALL-NEXT:    movl %eax, 8(%rsi)
513; ALL-NEXT:    movl %eax, 12(%rsi)
514; ALL-NEXT:    movl %eax, 16(%rsi)
515; ALL-NEXT:    movl %eax, 20(%rsi)
516; ALL-NEXT:    movl %eax, 24(%rsi)
517; ALL-NEXT:    movl %eax, 28(%rsi)
518; ALL-NEXT:    retq
519  %in.elt.not = load i32, ptr %in.elt.ptr, align 64
520  %in.elt = xor i32 %in.elt.not, -1
521  %out.elt0.ptr = getelementptr i32, ptr %out.vec.ptr, i64 0
522  store i32 %in.elt, ptr %out.elt0.ptr, align 64
523  %out.elt1.ptr = getelementptr i32, ptr %out.vec.ptr, i64 1
524  store i32 %in.elt, ptr %out.elt1.ptr, align 4
525  %out.elt2.ptr = getelementptr i32, ptr %out.vec.ptr, i64 2
526  store i32 %in.elt, ptr %out.elt2.ptr, align 8
527  %out.elt3.ptr = getelementptr i32, ptr %out.vec.ptr, i64 3
528  store i32 %in.elt, ptr %out.elt3.ptr, align 4
529  %out.elt4.ptr = getelementptr i32, ptr %out.vec.ptr, i64 4
530  store i32 %in.elt, ptr %out.elt4.ptr, align 16
531  %out.elt5.ptr = getelementptr i32, ptr %out.vec.ptr, i64 5
532  store i32 %in.elt, ptr %out.elt5.ptr, align 4
533  %out.elt6.ptr = getelementptr i32, ptr %out.vec.ptr, i64 6
534  store i32 %in.elt, ptr %out.elt6.ptr, align 8
535  %out.elt7.ptr = getelementptr i32, ptr %out.vec.ptr, i64 7
536  store i32 %in.elt, ptr %out.elt7.ptr, align 4
537  ret void
538}
539
540define void @vec256_float(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
541; ALL-LABEL: vec256_float:
542; ALL:       # %bb.0:
543; ALL-NEXT:    movl (%rdi), %eax
544; ALL-NEXT:    notl %eax
545; ALL-NEXT:    movl %eax, (%rsi)
546; ALL-NEXT:    movl %eax, 4(%rsi)
547; ALL-NEXT:    movl %eax, 8(%rsi)
548; ALL-NEXT:    movl %eax, 12(%rsi)
549; ALL-NEXT:    movl %eax, 16(%rsi)
550; ALL-NEXT:    movl %eax, 20(%rsi)
551; ALL-NEXT:    movl %eax, 24(%rsi)
552; ALL-NEXT:    movl %eax, 28(%rsi)
553; ALL-NEXT:    retq
554  %in.elt.not = load i32, ptr %in.elt.ptr, align 64
555  %in.elt.int = xor i32 %in.elt.not, -1
556  %in.elt = bitcast i32 %in.elt.int to float
557  %out.elt0.ptr = getelementptr float, ptr %out.vec.ptr, i64 0
558  store float %in.elt, ptr %out.elt0.ptr, align 64
559  %out.elt1.ptr = getelementptr float, ptr %out.vec.ptr, i64 1
560  store float %in.elt, ptr %out.elt1.ptr, align 4
561  %out.elt2.ptr = getelementptr float, ptr %out.vec.ptr, i64 2
562  store float %in.elt, ptr %out.elt2.ptr, align 8
563  %out.elt3.ptr = getelementptr float, ptr %out.vec.ptr, i64 3
564  store float %in.elt, ptr %out.elt3.ptr, align 4
565  %out.elt4.ptr = getelementptr float, ptr %out.vec.ptr, i64 4
566  store float %in.elt, ptr %out.elt4.ptr, align 16
567  %out.elt5.ptr = getelementptr float, ptr %out.vec.ptr, i64 5
568  store float %in.elt, ptr %out.elt5.ptr, align 4
569  %out.elt6.ptr = getelementptr float, ptr %out.vec.ptr, i64 6
570  store float %in.elt, ptr %out.elt6.ptr, align 8
571  %out.elt7.ptr = getelementptr float, ptr %out.vec.ptr, i64 7
572  store float %in.elt, ptr %out.elt7.ptr, align 4
573  ret void
574}
575
576define void @vec256_i64(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
577; ALL-LABEL: vec256_i64:
578; ALL:       # %bb.0:
579; ALL-NEXT:    movq (%rdi), %rax
580; ALL-NEXT:    notq %rax
581; ALL-NEXT:    movq %rax, (%rsi)
582; ALL-NEXT:    movq %rax, 8(%rsi)
583; ALL-NEXT:    movq %rax, 16(%rsi)
584; ALL-NEXT:    movq %rax, 24(%rsi)
585; ALL-NEXT:    retq
586  %in.elt.not = load i64, ptr %in.elt.ptr, align 64
587  %in.elt = xor i64 %in.elt.not, -1
588  %out.elt0.ptr = getelementptr i64, ptr %out.vec.ptr, i64 0
589  store i64 %in.elt, ptr %out.elt0.ptr, align 64
590  %out.elt1.ptr = getelementptr i64, ptr %out.vec.ptr, i64 1
591  store i64 %in.elt, ptr %out.elt1.ptr, align 8
592  %out.elt2.ptr = getelementptr i64, ptr %out.vec.ptr, i64 2
593  store i64 %in.elt, ptr %out.elt2.ptr, align 16
594  %out.elt3.ptr = getelementptr i64, ptr %out.vec.ptr, i64 3
595  store i64 %in.elt, ptr %out.elt3.ptr, align 8
596  ret void
597}
598
599define void @vec256_double(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
600; ALL-LABEL: vec256_double:
601; ALL:       # %bb.0:
602; ALL-NEXT:    movq (%rdi), %rax
603; ALL-NEXT:    notq %rax
604; ALL-NEXT:    movq %rax, (%rsi)
605; ALL-NEXT:    movq %rax, 8(%rsi)
606; ALL-NEXT:    movq %rax, 16(%rsi)
607; ALL-NEXT:    movq %rax, 24(%rsi)
608; ALL-NEXT:    retq
609  %in.elt.not = load i64, ptr %in.elt.ptr, align 64
610  %in.elt.int = xor i64 %in.elt.not, -1
611  %in.elt = bitcast i64 %in.elt.int to double
612  %out.elt0.ptr = getelementptr double, ptr %out.vec.ptr, i64 0
613  store double %in.elt, ptr %out.elt0.ptr, align 64
614  %out.elt1.ptr = getelementptr double, ptr %out.vec.ptr, i64 1
615  store double %in.elt, ptr %out.elt1.ptr, align 8
616  %out.elt2.ptr = getelementptr double, ptr %out.vec.ptr, i64 2
617  store double %in.elt, ptr %out.elt2.ptr, align 16
618  %out.elt3.ptr = getelementptr double, ptr %out.vec.ptr, i64 3
619  store double %in.elt, ptr %out.elt3.ptr, align 8
620  ret void
621}
622
623define void @vec256_i128(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
624; ALL-LABEL: vec256_i128:
625; ALL:       # %bb.0:
626; ALL-NEXT:    movq (%rdi), %rax
627; ALL-NEXT:    movq 8(%rdi), %rcx
628; ALL-NEXT:    notq %rcx
629; ALL-NEXT:    notq %rax
630; ALL-NEXT:    movq %rax, (%rsi)
631; ALL-NEXT:    movq %rcx, 8(%rsi)
632; ALL-NEXT:    movq %rcx, 24(%rsi)
633; ALL-NEXT:    movq %rax, 16(%rsi)
634; ALL-NEXT:    retq
635  %in.elt.not = load i128, ptr %in.elt.ptr, align 64
636  %in.elt = xor i128 %in.elt.not, -1
637  %out.elt0.ptr = getelementptr i128, ptr %out.vec.ptr, i64 0
638  store i128 %in.elt, ptr %out.elt0.ptr, align 64
639  %out.elt1.ptr = getelementptr i128, ptr %out.vec.ptr, i64 1
640  store i128 %in.elt, ptr %out.elt1.ptr, align 16
641  ret void
642}
643
644define void @vec384_i8(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
645; ALL-LABEL: vec384_i8:
646; ALL:       # %bb.0:
647; ALL-NEXT:    movzbl (%rdi), %eax
648; ALL-NEXT:    notb %al
649; ALL-NEXT:    movb %al, (%rsi)
650; ALL-NEXT:    movb %al, 1(%rsi)
651; ALL-NEXT:    movb %al, 2(%rsi)
652; ALL-NEXT:    movb %al, 3(%rsi)
653; ALL-NEXT:    movb %al, 4(%rsi)
654; ALL-NEXT:    movb %al, 5(%rsi)
655; ALL-NEXT:    movb %al, 6(%rsi)
656; ALL-NEXT:    movb %al, 7(%rsi)
657; ALL-NEXT:    movb %al, 8(%rsi)
658; ALL-NEXT:    movb %al, 9(%rsi)
659; ALL-NEXT:    movb %al, 10(%rsi)
660; ALL-NEXT:    movb %al, 11(%rsi)
661; ALL-NEXT:    movb %al, 12(%rsi)
662; ALL-NEXT:    movb %al, 13(%rsi)
663; ALL-NEXT:    movb %al, 14(%rsi)
664; ALL-NEXT:    movb %al, 15(%rsi)
665; ALL-NEXT:    movb %al, 16(%rsi)
666; ALL-NEXT:    movb %al, 17(%rsi)
667; ALL-NEXT:    movb %al, 18(%rsi)
668; ALL-NEXT:    movb %al, 19(%rsi)
669; ALL-NEXT:    movb %al, 20(%rsi)
670; ALL-NEXT:    movb %al, 21(%rsi)
671; ALL-NEXT:    movb %al, 22(%rsi)
672; ALL-NEXT:    movb %al, 23(%rsi)
673; ALL-NEXT:    movb %al, 24(%rsi)
674; ALL-NEXT:    movb %al, 25(%rsi)
675; ALL-NEXT:    movb %al, 26(%rsi)
676; ALL-NEXT:    movb %al, 27(%rsi)
677; ALL-NEXT:    movb %al, 28(%rsi)
678; ALL-NEXT:    movb %al, 29(%rsi)
679; ALL-NEXT:    movb %al, 30(%rsi)
680; ALL-NEXT:    movb %al, 31(%rsi)
681; ALL-NEXT:    movb %al, 32(%rsi)
682; ALL-NEXT:    movb %al, 33(%rsi)
683; ALL-NEXT:    movb %al, 34(%rsi)
684; ALL-NEXT:    movb %al, 35(%rsi)
685; ALL-NEXT:    movb %al, 36(%rsi)
686; ALL-NEXT:    movb %al, 37(%rsi)
687; ALL-NEXT:    movb %al, 38(%rsi)
688; ALL-NEXT:    movb %al, 39(%rsi)
689; ALL-NEXT:    movb %al, 40(%rsi)
690; ALL-NEXT:    movb %al, 41(%rsi)
691; ALL-NEXT:    movb %al, 42(%rsi)
692; ALL-NEXT:    movb %al, 43(%rsi)
693; ALL-NEXT:    movb %al, 44(%rsi)
694; ALL-NEXT:    movb %al, 45(%rsi)
695; ALL-NEXT:    movb %al, 46(%rsi)
696; ALL-NEXT:    movb %al, 47(%rsi)
697; ALL-NEXT:    retq
698  %in.elt.not = load i8, ptr %in.elt.ptr, align 64
699  %in.elt = xor i8 %in.elt.not, -1
700  %out.elt0.ptr = getelementptr i8, ptr %out.vec.ptr, i64 0
701  store i8 %in.elt, ptr %out.elt0.ptr, align 64
702  %out.elt1.ptr = getelementptr i8, ptr %out.vec.ptr, i64 1
703  store i8 %in.elt, ptr %out.elt1.ptr, align 1
704  %out.elt2.ptr = getelementptr i8, ptr %out.vec.ptr, i64 2
705  store i8 %in.elt, ptr %out.elt2.ptr, align 2
706  %out.elt3.ptr = getelementptr i8, ptr %out.vec.ptr, i64 3
707  store i8 %in.elt, ptr %out.elt3.ptr, align 1
708  %out.elt4.ptr = getelementptr i8, ptr %out.vec.ptr, i64 4
709  store i8 %in.elt, ptr %out.elt4.ptr, align 4
710  %out.elt5.ptr = getelementptr i8, ptr %out.vec.ptr, i64 5
711  store i8 %in.elt, ptr %out.elt5.ptr, align 1
712  %out.elt6.ptr = getelementptr i8, ptr %out.vec.ptr, i64 6
713  store i8 %in.elt, ptr %out.elt6.ptr, align 2
714  %out.elt7.ptr = getelementptr i8, ptr %out.vec.ptr, i64 7
715  store i8 %in.elt, ptr %out.elt7.ptr, align 1
716  %out.elt8.ptr = getelementptr i8, ptr %out.vec.ptr, i64 8
717  store i8 %in.elt, ptr %out.elt8.ptr, align 8
718  %out.elt9.ptr = getelementptr i8, ptr %out.vec.ptr, i64 9
719  store i8 %in.elt, ptr %out.elt9.ptr, align 1
720  %out.elt10.ptr = getelementptr i8, ptr %out.vec.ptr, i64 10
721  store i8 %in.elt, ptr %out.elt10.ptr, align 2
722  %out.elt11.ptr = getelementptr i8, ptr %out.vec.ptr, i64 11
723  store i8 %in.elt, ptr %out.elt11.ptr, align 1
724  %out.elt12.ptr = getelementptr i8, ptr %out.vec.ptr, i64 12
725  store i8 %in.elt, ptr %out.elt12.ptr, align 4
726  %out.elt13.ptr = getelementptr i8, ptr %out.vec.ptr, i64 13
727  store i8 %in.elt, ptr %out.elt13.ptr, align 1
728  %out.elt14.ptr = getelementptr i8, ptr %out.vec.ptr, i64 14
729  store i8 %in.elt, ptr %out.elt14.ptr, align 2
730  %out.elt15.ptr = getelementptr i8, ptr %out.vec.ptr, i64 15
731  store i8 %in.elt, ptr %out.elt15.ptr, align 1
732  %out.elt16.ptr = getelementptr i8, ptr %out.vec.ptr, i64 16
733  store i8 %in.elt, ptr %out.elt16.ptr, align 16
734  %out.elt17.ptr = getelementptr i8, ptr %out.vec.ptr, i64 17
735  store i8 %in.elt, ptr %out.elt17.ptr, align 1
736  %out.elt18.ptr = getelementptr i8, ptr %out.vec.ptr, i64 18
737  store i8 %in.elt, ptr %out.elt18.ptr, align 2
738  %out.elt19.ptr = getelementptr i8, ptr %out.vec.ptr, i64 19
739  store i8 %in.elt, ptr %out.elt19.ptr, align 1
740  %out.elt20.ptr = getelementptr i8, ptr %out.vec.ptr, i64 20
741  store i8 %in.elt, ptr %out.elt20.ptr, align 4
742  %out.elt21.ptr = getelementptr i8, ptr %out.vec.ptr, i64 21
743  store i8 %in.elt, ptr %out.elt21.ptr, align 1
744  %out.elt22.ptr = getelementptr i8, ptr %out.vec.ptr, i64 22
745  store i8 %in.elt, ptr %out.elt22.ptr, align 2
746  %out.elt23.ptr = getelementptr i8, ptr %out.vec.ptr, i64 23
747  store i8 %in.elt, ptr %out.elt23.ptr, align 1
748  %out.elt24.ptr = getelementptr i8, ptr %out.vec.ptr, i64 24
749  store i8 %in.elt, ptr %out.elt24.ptr, align 8
750  %out.elt25.ptr = getelementptr i8, ptr %out.vec.ptr, i64 25
751  store i8 %in.elt, ptr %out.elt25.ptr, align 1
752  %out.elt26.ptr = getelementptr i8, ptr %out.vec.ptr, i64 26
753  store i8 %in.elt, ptr %out.elt26.ptr, align 2
754  %out.elt27.ptr = getelementptr i8, ptr %out.vec.ptr, i64 27
755  store i8 %in.elt, ptr %out.elt27.ptr, align 1
756  %out.elt28.ptr = getelementptr i8, ptr %out.vec.ptr, i64 28
757  store i8 %in.elt, ptr %out.elt28.ptr, align 4
758  %out.elt29.ptr = getelementptr i8, ptr %out.vec.ptr, i64 29
759  store i8 %in.elt, ptr %out.elt29.ptr, align 1
760  %out.elt30.ptr = getelementptr i8, ptr %out.vec.ptr, i64 30
761  store i8 %in.elt, ptr %out.elt30.ptr, align 2
762  %out.elt31.ptr = getelementptr i8, ptr %out.vec.ptr, i64 31
763  store i8 %in.elt, ptr %out.elt31.ptr, align 1
764  %out.elt32.ptr = getelementptr i8, ptr %out.vec.ptr, i64 32
765  store i8 %in.elt, ptr %out.elt32.ptr, align 32
766  %out.elt33.ptr = getelementptr i8, ptr %out.vec.ptr, i64 33
767  store i8 %in.elt, ptr %out.elt33.ptr, align 1
768  %out.elt34.ptr = getelementptr i8, ptr %out.vec.ptr, i64 34
769  store i8 %in.elt, ptr %out.elt34.ptr, align 2
770  %out.elt35.ptr = getelementptr i8, ptr %out.vec.ptr, i64 35
771  store i8 %in.elt, ptr %out.elt35.ptr, align 1
772  %out.elt36.ptr = getelementptr i8, ptr %out.vec.ptr, i64 36
773  store i8 %in.elt, ptr %out.elt36.ptr, align 4
774  %out.elt37.ptr = getelementptr i8, ptr %out.vec.ptr, i64 37
775  store i8 %in.elt, ptr %out.elt37.ptr, align 1
776  %out.elt38.ptr = getelementptr i8, ptr %out.vec.ptr, i64 38
777  store i8 %in.elt, ptr %out.elt38.ptr, align 2
778  %out.elt39.ptr = getelementptr i8, ptr %out.vec.ptr, i64 39
779  store i8 %in.elt, ptr %out.elt39.ptr, align 1
780  %out.elt40.ptr = getelementptr i8, ptr %out.vec.ptr, i64 40
781  store i8 %in.elt, ptr %out.elt40.ptr, align 8
782  %out.elt41.ptr = getelementptr i8, ptr %out.vec.ptr, i64 41
783  store i8 %in.elt, ptr %out.elt41.ptr, align 1
784  %out.elt42.ptr = getelementptr i8, ptr %out.vec.ptr, i64 42
785  store i8 %in.elt, ptr %out.elt42.ptr, align 2
786  %out.elt43.ptr = getelementptr i8, ptr %out.vec.ptr, i64 43
787  store i8 %in.elt, ptr %out.elt43.ptr, align 1
788  %out.elt44.ptr = getelementptr i8, ptr %out.vec.ptr, i64 44
789  store i8 %in.elt, ptr %out.elt44.ptr, align 4
790  %out.elt45.ptr = getelementptr i8, ptr %out.vec.ptr, i64 45
791  store i8 %in.elt, ptr %out.elt45.ptr, align 1
792  %out.elt46.ptr = getelementptr i8, ptr %out.vec.ptr, i64 46
793  store i8 %in.elt, ptr %out.elt46.ptr, align 2
794  %out.elt47.ptr = getelementptr i8, ptr %out.vec.ptr, i64 47
795  store i8 %in.elt, ptr %out.elt47.ptr, align 1
796  ret void
797}
798
799define void @vec384_i16(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
800; ALL-LABEL: vec384_i16:
801; ALL:       # %bb.0:
802; ALL-NEXT:    movl (%rdi), %eax
803; ALL-NEXT:    notl %eax
804; ALL-NEXT:    movw %ax, (%rsi)
805; ALL-NEXT:    movw %ax, 2(%rsi)
806; ALL-NEXT:    movw %ax, 4(%rsi)
807; ALL-NEXT:    movw %ax, 6(%rsi)
808; ALL-NEXT:    movw %ax, 8(%rsi)
809; ALL-NEXT:    movw %ax, 10(%rsi)
810; ALL-NEXT:    movw %ax, 12(%rsi)
811; ALL-NEXT:    movw %ax, 14(%rsi)
812; ALL-NEXT:    movw %ax, 16(%rsi)
813; ALL-NEXT:    movw %ax, 18(%rsi)
814; ALL-NEXT:    movw %ax, 20(%rsi)
815; ALL-NEXT:    movw %ax, 22(%rsi)
816; ALL-NEXT:    movw %ax, 24(%rsi)
817; ALL-NEXT:    movw %ax, 26(%rsi)
818; ALL-NEXT:    movw %ax, 28(%rsi)
819; ALL-NEXT:    movw %ax, 30(%rsi)
820; ALL-NEXT:    movw %ax, 32(%rsi)
821; ALL-NEXT:    movw %ax, 34(%rsi)
822; ALL-NEXT:    movw %ax, 36(%rsi)
823; ALL-NEXT:    movw %ax, 38(%rsi)
824; ALL-NEXT:    movw %ax, 40(%rsi)
825; ALL-NEXT:    movw %ax, 42(%rsi)
826; ALL-NEXT:    movw %ax, 44(%rsi)
827; ALL-NEXT:    movw %ax, 46(%rsi)
828; ALL-NEXT:    retq
829  %in.elt.not = load i16, ptr %in.elt.ptr, align 64
830  %in.elt = xor i16 %in.elt.not, -1
831  %out.elt0.ptr = getelementptr i16, ptr %out.vec.ptr, i64 0
832  store i16 %in.elt, ptr %out.elt0.ptr, align 64
833  %out.elt1.ptr = getelementptr i16, ptr %out.vec.ptr, i64 1
834  store i16 %in.elt, ptr %out.elt1.ptr, align 2
835  %out.elt2.ptr = getelementptr i16, ptr %out.vec.ptr, i64 2
836  store i16 %in.elt, ptr %out.elt2.ptr, align 4
837  %out.elt3.ptr = getelementptr i16, ptr %out.vec.ptr, i64 3
838  store i16 %in.elt, ptr %out.elt3.ptr, align 2
839  %out.elt4.ptr = getelementptr i16, ptr %out.vec.ptr, i64 4
840  store i16 %in.elt, ptr %out.elt4.ptr, align 8
841  %out.elt5.ptr = getelementptr i16, ptr %out.vec.ptr, i64 5
842  store i16 %in.elt, ptr %out.elt5.ptr, align 2
843  %out.elt6.ptr = getelementptr i16, ptr %out.vec.ptr, i64 6
844  store i16 %in.elt, ptr %out.elt6.ptr, align 4
845  %out.elt7.ptr = getelementptr i16, ptr %out.vec.ptr, i64 7
846  store i16 %in.elt, ptr %out.elt7.ptr, align 2
847  %out.elt8.ptr = getelementptr i16, ptr %out.vec.ptr, i64 8
848  store i16 %in.elt, ptr %out.elt8.ptr, align 16
849  %out.elt9.ptr = getelementptr i16, ptr %out.vec.ptr, i64 9
850  store i16 %in.elt, ptr %out.elt9.ptr, align 2
851  %out.elt10.ptr = getelementptr i16, ptr %out.vec.ptr, i64 10
852  store i16 %in.elt, ptr %out.elt10.ptr, align 4
853  %out.elt11.ptr = getelementptr i16, ptr %out.vec.ptr, i64 11
854  store i16 %in.elt, ptr %out.elt11.ptr, align 2
855  %out.elt12.ptr = getelementptr i16, ptr %out.vec.ptr, i64 12
856  store i16 %in.elt, ptr %out.elt12.ptr, align 8
857  %out.elt13.ptr = getelementptr i16, ptr %out.vec.ptr, i64 13
858  store i16 %in.elt, ptr %out.elt13.ptr, align 2
859  %out.elt14.ptr = getelementptr i16, ptr %out.vec.ptr, i64 14
860  store i16 %in.elt, ptr %out.elt14.ptr, align 4
861  %out.elt15.ptr = getelementptr i16, ptr %out.vec.ptr, i64 15
862  store i16 %in.elt, ptr %out.elt15.ptr, align 2
863  %out.elt16.ptr = getelementptr i16, ptr %out.vec.ptr, i64 16
864  store i16 %in.elt, ptr %out.elt16.ptr, align 32
865  %out.elt17.ptr = getelementptr i16, ptr %out.vec.ptr, i64 17
866  store i16 %in.elt, ptr %out.elt17.ptr, align 2
867  %out.elt18.ptr = getelementptr i16, ptr %out.vec.ptr, i64 18
868  store i16 %in.elt, ptr %out.elt18.ptr, align 4
869  %out.elt19.ptr = getelementptr i16, ptr %out.vec.ptr, i64 19
870  store i16 %in.elt, ptr %out.elt19.ptr, align 2
871  %out.elt20.ptr = getelementptr i16, ptr %out.vec.ptr, i64 20
872  store i16 %in.elt, ptr %out.elt20.ptr, align 8
873  %out.elt21.ptr = getelementptr i16, ptr %out.vec.ptr, i64 21
874  store i16 %in.elt, ptr %out.elt21.ptr, align 2
875  %out.elt22.ptr = getelementptr i16, ptr %out.vec.ptr, i64 22
876  store i16 %in.elt, ptr %out.elt22.ptr, align 4
877  %out.elt23.ptr = getelementptr i16, ptr %out.vec.ptr, i64 23
878  store i16 %in.elt, ptr %out.elt23.ptr, align 2
879  ret void
880}
881
882define void @vec384_i32(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
883; ALL-LABEL: vec384_i32:
884; ALL:       # %bb.0:
885; ALL-NEXT:    movl (%rdi), %eax
886; ALL-NEXT:    notl %eax
887; ALL-NEXT:    movl %eax, (%rsi)
888; ALL-NEXT:    movl %eax, 4(%rsi)
889; ALL-NEXT:    movl %eax, 8(%rsi)
890; ALL-NEXT:    movl %eax, 12(%rsi)
891; ALL-NEXT:    movl %eax, 16(%rsi)
892; ALL-NEXT:    movl %eax, 20(%rsi)
893; ALL-NEXT:    movl %eax, 24(%rsi)
894; ALL-NEXT:    movl %eax, 28(%rsi)
895; ALL-NEXT:    movl %eax, 32(%rsi)
896; ALL-NEXT:    movl %eax, 36(%rsi)
897; ALL-NEXT:    movl %eax, 40(%rsi)
898; ALL-NEXT:    movl %eax, 44(%rsi)
899; ALL-NEXT:    retq
900  %in.elt.not = load i32, ptr %in.elt.ptr, align 64
901  %in.elt = xor i32 %in.elt.not, -1
902  %out.elt0.ptr = getelementptr i32, ptr %out.vec.ptr, i64 0
903  store i32 %in.elt, ptr %out.elt0.ptr, align 64
904  %out.elt1.ptr = getelementptr i32, ptr %out.vec.ptr, i64 1
905  store i32 %in.elt, ptr %out.elt1.ptr, align 4
906  %out.elt2.ptr = getelementptr i32, ptr %out.vec.ptr, i64 2
907  store i32 %in.elt, ptr %out.elt2.ptr, align 8
908  %out.elt3.ptr = getelementptr i32, ptr %out.vec.ptr, i64 3
909  store i32 %in.elt, ptr %out.elt3.ptr, align 4
910  %out.elt4.ptr = getelementptr i32, ptr %out.vec.ptr, i64 4
911  store i32 %in.elt, ptr %out.elt4.ptr, align 16
912  %out.elt5.ptr = getelementptr i32, ptr %out.vec.ptr, i64 5
913  store i32 %in.elt, ptr %out.elt5.ptr, align 4
914  %out.elt6.ptr = getelementptr i32, ptr %out.vec.ptr, i64 6
915  store i32 %in.elt, ptr %out.elt6.ptr, align 8
916  %out.elt7.ptr = getelementptr i32, ptr %out.vec.ptr, i64 7
917  store i32 %in.elt, ptr %out.elt7.ptr, align 4
918  %out.elt8.ptr = getelementptr i32, ptr %out.vec.ptr, i64 8
919  store i32 %in.elt, ptr %out.elt8.ptr, align 32
920  %out.elt9.ptr = getelementptr i32, ptr %out.vec.ptr, i64 9
921  store i32 %in.elt, ptr %out.elt9.ptr, align 4
922  %out.elt10.ptr = getelementptr i32, ptr %out.vec.ptr, i64 10
923  store i32 %in.elt, ptr %out.elt10.ptr, align 8
924  %out.elt11.ptr = getelementptr i32, ptr %out.vec.ptr, i64 11
925  store i32 %in.elt, ptr %out.elt11.ptr, align 4
926  ret void
927}
928
929define void @vec384_float(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
930; ALL-LABEL: vec384_float:
931; ALL:       # %bb.0:
932; ALL-NEXT:    movl (%rdi), %eax
933; ALL-NEXT:    notl %eax
934; ALL-NEXT:    movl %eax, (%rsi)
935; ALL-NEXT:    movl %eax, 4(%rsi)
936; ALL-NEXT:    movl %eax, 8(%rsi)
937; ALL-NEXT:    movl %eax, 12(%rsi)
938; ALL-NEXT:    movl %eax, 16(%rsi)
939; ALL-NEXT:    movl %eax, 20(%rsi)
940; ALL-NEXT:    movl %eax, 24(%rsi)
941; ALL-NEXT:    movl %eax, 28(%rsi)
942; ALL-NEXT:    movl %eax, 32(%rsi)
943; ALL-NEXT:    movl %eax, 36(%rsi)
944; ALL-NEXT:    movl %eax, 40(%rsi)
945; ALL-NEXT:    movl %eax, 44(%rsi)
946; ALL-NEXT:    retq
947  %in.elt.not = load i32, ptr %in.elt.ptr, align 64
948  %in.elt.int = xor i32 %in.elt.not, -1
949  %in.elt = bitcast i32 %in.elt.int to float
950  %out.elt0.ptr = getelementptr float, ptr %out.vec.ptr, i64 0
951  store float %in.elt, ptr %out.elt0.ptr, align 64
952  %out.elt1.ptr = getelementptr float, ptr %out.vec.ptr, i64 1
953  store float %in.elt, ptr %out.elt1.ptr, align 4
954  %out.elt2.ptr = getelementptr float, ptr %out.vec.ptr, i64 2
955  store float %in.elt, ptr %out.elt2.ptr, align 8
956  %out.elt3.ptr = getelementptr float, ptr %out.vec.ptr, i64 3
957  store float %in.elt, ptr %out.elt3.ptr, align 4
958  %out.elt4.ptr = getelementptr float, ptr %out.vec.ptr, i64 4
959  store float %in.elt, ptr %out.elt4.ptr, align 16
960  %out.elt5.ptr = getelementptr float, ptr %out.vec.ptr, i64 5
961  store float %in.elt, ptr %out.elt5.ptr, align 4
962  %out.elt6.ptr = getelementptr float, ptr %out.vec.ptr, i64 6
963  store float %in.elt, ptr %out.elt6.ptr, align 8
964  %out.elt7.ptr = getelementptr float, ptr %out.vec.ptr, i64 7
965  store float %in.elt, ptr %out.elt7.ptr, align 4
966  %out.elt8.ptr = getelementptr float, ptr %out.vec.ptr, i64 8
967  store float %in.elt, ptr %out.elt8.ptr, align 32
968  %out.elt9.ptr = getelementptr float, ptr %out.vec.ptr, i64 9
969  store float %in.elt, ptr %out.elt9.ptr, align 4
970  %out.elt10.ptr = getelementptr float, ptr %out.vec.ptr, i64 10
971  store float %in.elt, ptr %out.elt10.ptr, align 8
972  %out.elt11.ptr = getelementptr float, ptr %out.vec.ptr, i64 11
973  store float %in.elt, ptr %out.elt11.ptr, align 4
974  ret void
975}
976
977define void @vec384_i64(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
978; ALL-LABEL: vec384_i64:
979; ALL:       # %bb.0:
980; ALL-NEXT:    movq (%rdi), %rax
981; ALL-NEXT:    notq %rax
982; ALL-NEXT:    movq %rax, (%rsi)
983; ALL-NEXT:    movq %rax, 8(%rsi)
984; ALL-NEXT:    movq %rax, 16(%rsi)
985; ALL-NEXT:    movq %rax, 24(%rsi)
986; ALL-NEXT:    movq %rax, 32(%rsi)
987; ALL-NEXT:    movq %rax, 40(%rsi)
988; ALL-NEXT:    retq
989  %in.elt.not = load i64, ptr %in.elt.ptr, align 64
990  %in.elt = xor i64 %in.elt.not, -1
991  %out.elt0.ptr = getelementptr i64, ptr %out.vec.ptr, i64 0
992  store i64 %in.elt, ptr %out.elt0.ptr, align 64
993  %out.elt1.ptr = getelementptr i64, ptr %out.vec.ptr, i64 1
994  store i64 %in.elt, ptr %out.elt1.ptr, align 8
995  %out.elt2.ptr = getelementptr i64, ptr %out.vec.ptr, i64 2
996  store i64 %in.elt, ptr %out.elt2.ptr, align 16
997  %out.elt3.ptr = getelementptr i64, ptr %out.vec.ptr, i64 3
998  store i64 %in.elt, ptr %out.elt3.ptr, align 8
999  %out.elt4.ptr = getelementptr i64, ptr %out.vec.ptr, i64 4
1000  store i64 %in.elt, ptr %out.elt4.ptr, align 32
1001  %out.elt5.ptr = getelementptr i64, ptr %out.vec.ptr, i64 5
1002  store i64 %in.elt, ptr %out.elt5.ptr, align 8
1003  ret void
1004}
1005
1006define void @vec384_double(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
1007; ALL-LABEL: vec384_double:
1008; ALL:       # %bb.0:
1009; ALL-NEXT:    movq (%rdi), %rax
1010; ALL-NEXT:    notq %rax
1011; ALL-NEXT:    movq %rax, (%rsi)
1012; ALL-NEXT:    movq %rax, 8(%rsi)
1013; ALL-NEXT:    movq %rax, 16(%rsi)
1014; ALL-NEXT:    movq %rax, 24(%rsi)
1015; ALL-NEXT:    movq %rax, 32(%rsi)
1016; ALL-NEXT:    movq %rax, 40(%rsi)
1017; ALL-NEXT:    retq
1018  %in.elt.not = load i64, ptr %in.elt.ptr, align 64
1019  %in.elt.int = xor i64 %in.elt.not, -1
1020  %in.elt = bitcast i64 %in.elt.int to double
1021  %out.elt0.ptr = getelementptr double, ptr %out.vec.ptr, i64 0
1022  store double %in.elt, ptr %out.elt0.ptr, align 64
1023  %out.elt1.ptr = getelementptr double, ptr %out.vec.ptr, i64 1
1024  store double %in.elt, ptr %out.elt1.ptr, align 8
1025  %out.elt2.ptr = getelementptr double, ptr %out.vec.ptr, i64 2
1026  store double %in.elt, ptr %out.elt2.ptr, align 16
1027  %out.elt3.ptr = getelementptr double, ptr %out.vec.ptr, i64 3
1028  store double %in.elt, ptr %out.elt3.ptr, align 8
1029  %out.elt4.ptr = getelementptr double, ptr %out.vec.ptr, i64 4
1030  store double %in.elt, ptr %out.elt4.ptr, align 32
1031  %out.elt5.ptr = getelementptr double, ptr %out.vec.ptr, i64 5
1032  store double %in.elt, ptr %out.elt5.ptr, align 8
1033  ret void
1034}
1035
1036define void @vec384_i128(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
1037; ALL-LABEL: vec384_i128:
1038; ALL:       # %bb.0:
1039; ALL-NEXT:    movq (%rdi), %rax
1040; ALL-NEXT:    movq 8(%rdi), %rcx
1041; ALL-NEXT:    notq %rcx
1042; ALL-NEXT:    notq %rax
1043; ALL-NEXT:    movq %rax, (%rsi)
1044; ALL-NEXT:    movq %rcx, 8(%rsi)
1045; ALL-NEXT:    movq %rcx, 24(%rsi)
1046; ALL-NEXT:    movq %rax, 16(%rsi)
1047; ALL-NEXT:    movq %rcx, 40(%rsi)
1048; ALL-NEXT:    movq %rax, 32(%rsi)
1049; ALL-NEXT:    retq
1050  %in.elt.not = load i128, ptr %in.elt.ptr, align 64
1051  %in.elt = xor i128 %in.elt.not, -1
1052  %out.elt0.ptr = getelementptr i128, ptr %out.vec.ptr, i64 0
1053  store i128 %in.elt, ptr %out.elt0.ptr, align 64
1054  %out.elt1.ptr = getelementptr i128, ptr %out.vec.ptr, i64 1
1055  store i128 %in.elt, ptr %out.elt1.ptr, align 16
1056  %out.elt2.ptr = getelementptr i128, ptr %out.vec.ptr, i64 2
1057  store i128 %in.elt, ptr %out.elt2.ptr, align 32
1058  ret void
1059}
1060
1061define void @vec512_i8(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
1062; ALL-LABEL: vec512_i8:
1063; ALL:       # %bb.0:
1064; ALL-NEXT:    movzbl (%rdi), %eax
1065; ALL-NEXT:    notb %al
1066; ALL-NEXT:    movb %al, (%rsi)
1067; ALL-NEXT:    movb %al, 1(%rsi)
1068; ALL-NEXT:    movb %al, 2(%rsi)
1069; ALL-NEXT:    movb %al, 3(%rsi)
1070; ALL-NEXT:    movb %al, 4(%rsi)
1071; ALL-NEXT:    movb %al, 5(%rsi)
1072; ALL-NEXT:    movb %al, 6(%rsi)
1073; ALL-NEXT:    movb %al, 7(%rsi)
1074; ALL-NEXT:    movb %al, 8(%rsi)
1075; ALL-NEXT:    movb %al, 9(%rsi)
1076; ALL-NEXT:    movb %al, 10(%rsi)
1077; ALL-NEXT:    movb %al, 11(%rsi)
1078; ALL-NEXT:    movb %al, 12(%rsi)
1079; ALL-NEXT:    movb %al, 13(%rsi)
1080; ALL-NEXT:    movb %al, 14(%rsi)
1081; ALL-NEXT:    movb %al, 15(%rsi)
1082; ALL-NEXT:    movb %al, 16(%rsi)
1083; ALL-NEXT:    movb %al, 17(%rsi)
1084; ALL-NEXT:    movb %al, 18(%rsi)
1085; ALL-NEXT:    movb %al, 19(%rsi)
1086; ALL-NEXT:    movb %al, 20(%rsi)
1087; ALL-NEXT:    movb %al, 21(%rsi)
1088; ALL-NEXT:    movb %al, 22(%rsi)
1089; ALL-NEXT:    movb %al, 23(%rsi)
1090; ALL-NEXT:    movb %al, 24(%rsi)
1091; ALL-NEXT:    movb %al, 25(%rsi)
1092; ALL-NEXT:    movb %al, 26(%rsi)
1093; ALL-NEXT:    movb %al, 27(%rsi)
1094; ALL-NEXT:    movb %al, 28(%rsi)
1095; ALL-NEXT:    movb %al, 29(%rsi)
1096; ALL-NEXT:    movb %al, 30(%rsi)
1097; ALL-NEXT:    movb %al, 31(%rsi)
1098; ALL-NEXT:    movb %al, 32(%rsi)
1099; ALL-NEXT:    movb %al, 33(%rsi)
1100; ALL-NEXT:    movb %al, 34(%rsi)
1101; ALL-NEXT:    movb %al, 35(%rsi)
1102; ALL-NEXT:    movb %al, 36(%rsi)
1103; ALL-NEXT:    movb %al, 37(%rsi)
1104; ALL-NEXT:    movb %al, 38(%rsi)
1105; ALL-NEXT:    movb %al, 39(%rsi)
1106; ALL-NEXT:    movb %al, 40(%rsi)
1107; ALL-NEXT:    movb %al, 41(%rsi)
1108; ALL-NEXT:    movb %al, 42(%rsi)
1109; ALL-NEXT:    movb %al, 43(%rsi)
1110; ALL-NEXT:    movb %al, 44(%rsi)
1111; ALL-NEXT:    movb %al, 45(%rsi)
1112; ALL-NEXT:    movb %al, 46(%rsi)
1113; ALL-NEXT:    movb %al, 47(%rsi)
1114; ALL-NEXT:    movb %al, 48(%rsi)
1115; ALL-NEXT:    movb %al, 49(%rsi)
1116; ALL-NEXT:    movb %al, 50(%rsi)
1117; ALL-NEXT:    movb %al, 51(%rsi)
1118; ALL-NEXT:    movb %al, 52(%rsi)
1119; ALL-NEXT:    movb %al, 53(%rsi)
1120; ALL-NEXT:    movb %al, 54(%rsi)
1121; ALL-NEXT:    movb %al, 55(%rsi)
1122; ALL-NEXT:    movb %al, 56(%rsi)
1123; ALL-NEXT:    movb %al, 57(%rsi)
1124; ALL-NEXT:    movb %al, 58(%rsi)
1125; ALL-NEXT:    movb %al, 59(%rsi)
1126; ALL-NEXT:    movb %al, 60(%rsi)
1127; ALL-NEXT:    movb %al, 61(%rsi)
1128; ALL-NEXT:    movb %al, 62(%rsi)
1129; ALL-NEXT:    movb %al, 63(%rsi)
1130; ALL-NEXT:    retq
1131  %in.elt.not = load i8, ptr %in.elt.ptr, align 64
1132  %in.elt = xor i8 %in.elt.not, -1
1133  %out.elt0.ptr = getelementptr i8, ptr %out.vec.ptr, i64 0
1134  store i8 %in.elt, ptr %out.elt0.ptr, align 64
1135  %out.elt1.ptr = getelementptr i8, ptr %out.vec.ptr, i64 1
1136  store i8 %in.elt, ptr %out.elt1.ptr, align 1
1137  %out.elt2.ptr = getelementptr i8, ptr %out.vec.ptr, i64 2
1138  store i8 %in.elt, ptr %out.elt2.ptr, align 2
1139  %out.elt3.ptr = getelementptr i8, ptr %out.vec.ptr, i64 3
1140  store i8 %in.elt, ptr %out.elt3.ptr, align 1
1141  %out.elt4.ptr = getelementptr i8, ptr %out.vec.ptr, i64 4
1142  store i8 %in.elt, ptr %out.elt4.ptr, align 4
1143  %out.elt5.ptr = getelementptr i8, ptr %out.vec.ptr, i64 5
1144  store i8 %in.elt, ptr %out.elt5.ptr, align 1
1145  %out.elt6.ptr = getelementptr i8, ptr %out.vec.ptr, i64 6
1146  store i8 %in.elt, ptr %out.elt6.ptr, align 2
1147  %out.elt7.ptr = getelementptr i8, ptr %out.vec.ptr, i64 7
1148  store i8 %in.elt, ptr %out.elt7.ptr, align 1
1149  %out.elt8.ptr = getelementptr i8, ptr %out.vec.ptr, i64 8
1150  store i8 %in.elt, ptr %out.elt8.ptr, align 8
1151  %out.elt9.ptr = getelementptr i8, ptr %out.vec.ptr, i64 9
1152  store i8 %in.elt, ptr %out.elt9.ptr, align 1
1153  %out.elt10.ptr = getelementptr i8, ptr %out.vec.ptr, i64 10
1154  store i8 %in.elt, ptr %out.elt10.ptr, align 2
1155  %out.elt11.ptr = getelementptr i8, ptr %out.vec.ptr, i64 11
1156  store i8 %in.elt, ptr %out.elt11.ptr, align 1
1157  %out.elt12.ptr = getelementptr i8, ptr %out.vec.ptr, i64 12
1158  store i8 %in.elt, ptr %out.elt12.ptr, align 4
1159  %out.elt13.ptr = getelementptr i8, ptr %out.vec.ptr, i64 13
1160  store i8 %in.elt, ptr %out.elt13.ptr, align 1
1161  %out.elt14.ptr = getelementptr i8, ptr %out.vec.ptr, i64 14
1162  store i8 %in.elt, ptr %out.elt14.ptr, align 2
1163  %out.elt15.ptr = getelementptr i8, ptr %out.vec.ptr, i64 15
1164  store i8 %in.elt, ptr %out.elt15.ptr, align 1
1165  %out.elt16.ptr = getelementptr i8, ptr %out.vec.ptr, i64 16
1166  store i8 %in.elt, ptr %out.elt16.ptr, align 16
1167  %out.elt17.ptr = getelementptr i8, ptr %out.vec.ptr, i64 17
1168  store i8 %in.elt, ptr %out.elt17.ptr, align 1
1169  %out.elt18.ptr = getelementptr i8, ptr %out.vec.ptr, i64 18
1170  store i8 %in.elt, ptr %out.elt18.ptr, align 2
1171  %out.elt19.ptr = getelementptr i8, ptr %out.vec.ptr, i64 19
1172  store i8 %in.elt, ptr %out.elt19.ptr, align 1
1173  %out.elt20.ptr = getelementptr i8, ptr %out.vec.ptr, i64 20
1174  store i8 %in.elt, ptr %out.elt20.ptr, align 4
1175  %out.elt21.ptr = getelementptr i8, ptr %out.vec.ptr, i64 21
1176  store i8 %in.elt, ptr %out.elt21.ptr, align 1
1177  %out.elt22.ptr = getelementptr i8, ptr %out.vec.ptr, i64 22
1178  store i8 %in.elt, ptr %out.elt22.ptr, align 2
1179  %out.elt23.ptr = getelementptr i8, ptr %out.vec.ptr, i64 23
1180  store i8 %in.elt, ptr %out.elt23.ptr, align 1
1181  %out.elt24.ptr = getelementptr i8, ptr %out.vec.ptr, i64 24
1182  store i8 %in.elt, ptr %out.elt24.ptr, align 8
1183  %out.elt25.ptr = getelementptr i8, ptr %out.vec.ptr, i64 25
1184  store i8 %in.elt, ptr %out.elt25.ptr, align 1
1185  %out.elt26.ptr = getelementptr i8, ptr %out.vec.ptr, i64 26
1186  store i8 %in.elt, ptr %out.elt26.ptr, align 2
1187  %out.elt27.ptr = getelementptr i8, ptr %out.vec.ptr, i64 27
1188  store i8 %in.elt, ptr %out.elt27.ptr, align 1
1189  %out.elt28.ptr = getelementptr i8, ptr %out.vec.ptr, i64 28
1190  store i8 %in.elt, ptr %out.elt28.ptr, align 4
1191  %out.elt29.ptr = getelementptr i8, ptr %out.vec.ptr, i64 29
1192  store i8 %in.elt, ptr %out.elt29.ptr, align 1
1193  %out.elt30.ptr = getelementptr i8, ptr %out.vec.ptr, i64 30
1194  store i8 %in.elt, ptr %out.elt30.ptr, align 2
1195  %out.elt31.ptr = getelementptr i8, ptr %out.vec.ptr, i64 31
1196  store i8 %in.elt, ptr %out.elt31.ptr, align 1
1197  %out.elt32.ptr = getelementptr i8, ptr %out.vec.ptr, i64 32
1198  store i8 %in.elt, ptr %out.elt32.ptr, align 32
1199  %out.elt33.ptr = getelementptr i8, ptr %out.vec.ptr, i64 33
1200  store i8 %in.elt, ptr %out.elt33.ptr, align 1
1201  %out.elt34.ptr = getelementptr i8, ptr %out.vec.ptr, i64 34
1202  store i8 %in.elt, ptr %out.elt34.ptr, align 2
1203  %out.elt35.ptr = getelementptr i8, ptr %out.vec.ptr, i64 35
1204  store i8 %in.elt, ptr %out.elt35.ptr, align 1
1205  %out.elt36.ptr = getelementptr i8, ptr %out.vec.ptr, i64 36
1206  store i8 %in.elt, ptr %out.elt36.ptr, align 4
1207  %out.elt37.ptr = getelementptr i8, ptr %out.vec.ptr, i64 37
1208  store i8 %in.elt, ptr %out.elt37.ptr, align 1
1209  %out.elt38.ptr = getelementptr i8, ptr %out.vec.ptr, i64 38
1210  store i8 %in.elt, ptr %out.elt38.ptr, align 2
1211  %out.elt39.ptr = getelementptr i8, ptr %out.vec.ptr, i64 39
1212  store i8 %in.elt, ptr %out.elt39.ptr, align 1
1213  %out.elt40.ptr = getelementptr i8, ptr %out.vec.ptr, i64 40
1214  store i8 %in.elt, ptr %out.elt40.ptr, align 8
1215  %out.elt41.ptr = getelementptr i8, ptr %out.vec.ptr, i64 41
1216  store i8 %in.elt, ptr %out.elt41.ptr, align 1
1217  %out.elt42.ptr = getelementptr i8, ptr %out.vec.ptr, i64 42
1218  store i8 %in.elt, ptr %out.elt42.ptr, align 2
1219  %out.elt43.ptr = getelementptr i8, ptr %out.vec.ptr, i64 43
1220  store i8 %in.elt, ptr %out.elt43.ptr, align 1
1221  %out.elt44.ptr = getelementptr i8, ptr %out.vec.ptr, i64 44
1222  store i8 %in.elt, ptr %out.elt44.ptr, align 4
1223  %out.elt45.ptr = getelementptr i8, ptr %out.vec.ptr, i64 45
1224  store i8 %in.elt, ptr %out.elt45.ptr, align 1
1225  %out.elt46.ptr = getelementptr i8, ptr %out.vec.ptr, i64 46
1226  store i8 %in.elt, ptr %out.elt46.ptr, align 2
1227  %out.elt47.ptr = getelementptr i8, ptr %out.vec.ptr, i64 47
1228  store i8 %in.elt, ptr %out.elt47.ptr, align 1
1229  %out.elt48.ptr = getelementptr i8, ptr %out.vec.ptr, i64 48
1230  store i8 %in.elt, ptr %out.elt48.ptr, align 16
1231  %out.elt49.ptr = getelementptr i8, ptr %out.vec.ptr, i64 49
1232  store i8 %in.elt, ptr %out.elt49.ptr, align 1
1233  %out.elt50.ptr = getelementptr i8, ptr %out.vec.ptr, i64 50
1234  store i8 %in.elt, ptr %out.elt50.ptr, align 2
1235  %out.elt51.ptr = getelementptr i8, ptr %out.vec.ptr, i64 51
1236  store i8 %in.elt, ptr %out.elt51.ptr, align 1
1237  %out.elt52.ptr = getelementptr i8, ptr %out.vec.ptr, i64 52
1238  store i8 %in.elt, ptr %out.elt52.ptr, align 4
1239  %out.elt53.ptr = getelementptr i8, ptr %out.vec.ptr, i64 53
1240  store i8 %in.elt, ptr %out.elt53.ptr, align 1
1241  %out.elt54.ptr = getelementptr i8, ptr %out.vec.ptr, i64 54
1242  store i8 %in.elt, ptr %out.elt54.ptr, align 2
1243  %out.elt55.ptr = getelementptr i8, ptr %out.vec.ptr, i64 55
1244  store i8 %in.elt, ptr %out.elt55.ptr, align 1
1245  %out.elt56.ptr = getelementptr i8, ptr %out.vec.ptr, i64 56
1246  store i8 %in.elt, ptr %out.elt56.ptr, align 8
1247  %out.elt57.ptr = getelementptr i8, ptr %out.vec.ptr, i64 57
1248  store i8 %in.elt, ptr %out.elt57.ptr, align 1
1249  %out.elt58.ptr = getelementptr i8, ptr %out.vec.ptr, i64 58
1250  store i8 %in.elt, ptr %out.elt58.ptr, align 2
1251  %out.elt59.ptr = getelementptr i8, ptr %out.vec.ptr, i64 59
1252  store i8 %in.elt, ptr %out.elt59.ptr, align 1
1253  %out.elt60.ptr = getelementptr i8, ptr %out.vec.ptr, i64 60
1254  store i8 %in.elt, ptr %out.elt60.ptr, align 4
1255  %out.elt61.ptr = getelementptr i8, ptr %out.vec.ptr, i64 61
1256  store i8 %in.elt, ptr %out.elt61.ptr, align 1
1257  %out.elt62.ptr = getelementptr i8, ptr %out.vec.ptr, i64 62
1258  store i8 %in.elt, ptr %out.elt62.ptr, align 2
1259  %out.elt63.ptr = getelementptr i8, ptr %out.vec.ptr, i64 63
1260  store i8 %in.elt, ptr %out.elt63.ptr, align 1
1261  ret void
1262}
1263
1264define void @vec512_i16(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
1265; ALL-LABEL: vec512_i16:
1266; ALL:       # %bb.0:
1267; ALL-NEXT:    movl (%rdi), %eax
1268; ALL-NEXT:    notl %eax
1269; ALL-NEXT:    movw %ax, (%rsi)
1270; ALL-NEXT:    movw %ax, 2(%rsi)
1271; ALL-NEXT:    movw %ax, 4(%rsi)
1272; ALL-NEXT:    movw %ax, 6(%rsi)
1273; ALL-NEXT:    movw %ax, 8(%rsi)
1274; ALL-NEXT:    movw %ax, 10(%rsi)
1275; ALL-NEXT:    movw %ax, 12(%rsi)
1276; ALL-NEXT:    movw %ax, 14(%rsi)
1277; ALL-NEXT:    movw %ax, 16(%rsi)
1278; ALL-NEXT:    movw %ax, 18(%rsi)
1279; ALL-NEXT:    movw %ax, 20(%rsi)
1280; ALL-NEXT:    movw %ax, 22(%rsi)
1281; ALL-NEXT:    movw %ax, 24(%rsi)
1282; ALL-NEXT:    movw %ax, 26(%rsi)
1283; ALL-NEXT:    movw %ax, 28(%rsi)
1284; ALL-NEXT:    movw %ax, 30(%rsi)
1285; ALL-NEXT:    movw %ax, 32(%rsi)
1286; ALL-NEXT:    movw %ax, 34(%rsi)
1287; ALL-NEXT:    movw %ax, 36(%rsi)
1288; ALL-NEXT:    movw %ax, 38(%rsi)
1289; ALL-NEXT:    movw %ax, 40(%rsi)
1290; ALL-NEXT:    movw %ax, 42(%rsi)
1291; ALL-NEXT:    movw %ax, 44(%rsi)
1292; ALL-NEXT:    movw %ax, 46(%rsi)
1293; ALL-NEXT:    movw %ax, 48(%rsi)
1294; ALL-NEXT:    movw %ax, 50(%rsi)
1295; ALL-NEXT:    movw %ax, 52(%rsi)
1296; ALL-NEXT:    movw %ax, 54(%rsi)
1297; ALL-NEXT:    movw %ax, 56(%rsi)
1298; ALL-NEXT:    movw %ax, 58(%rsi)
1299; ALL-NEXT:    movw %ax, 60(%rsi)
1300; ALL-NEXT:    movw %ax, 62(%rsi)
1301; ALL-NEXT:    retq
1302  %in.elt.not = load i16, ptr %in.elt.ptr, align 64
1303  %in.elt = xor i16 %in.elt.not, -1
1304  %out.elt0.ptr = getelementptr i16, ptr %out.vec.ptr, i64 0
1305  store i16 %in.elt, ptr %out.elt0.ptr, align 64
1306  %out.elt1.ptr = getelementptr i16, ptr %out.vec.ptr, i64 1
1307  store i16 %in.elt, ptr %out.elt1.ptr, align 2
1308  %out.elt2.ptr = getelementptr i16, ptr %out.vec.ptr, i64 2
1309  store i16 %in.elt, ptr %out.elt2.ptr, align 4
1310  %out.elt3.ptr = getelementptr i16, ptr %out.vec.ptr, i64 3
1311  store i16 %in.elt, ptr %out.elt3.ptr, align 2
1312  %out.elt4.ptr = getelementptr i16, ptr %out.vec.ptr, i64 4
1313  store i16 %in.elt, ptr %out.elt4.ptr, align 8
1314  %out.elt5.ptr = getelementptr i16, ptr %out.vec.ptr, i64 5
1315  store i16 %in.elt, ptr %out.elt5.ptr, align 2
1316  %out.elt6.ptr = getelementptr i16, ptr %out.vec.ptr, i64 6
1317  store i16 %in.elt, ptr %out.elt6.ptr, align 4
1318  %out.elt7.ptr = getelementptr i16, ptr %out.vec.ptr, i64 7
1319  store i16 %in.elt, ptr %out.elt7.ptr, align 2
1320  %out.elt8.ptr = getelementptr i16, ptr %out.vec.ptr, i64 8
1321  store i16 %in.elt, ptr %out.elt8.ptr, align 16
1322  %out.elt9.ptr = getelementptr i16, ptr %out.vec.ptr, i64 9
1323  store i16 %in.elt, ptr %out.elt9.ptr, align 2
1324  %out.elt10.ptr = getelementptr i16, ptr %out.vec.ptr, i64 10
1325  store i16 %in.elt, ptr %out.elt10.ptr, align 4
1326  %out.elt11.ptr = getelementptr i16, ptr %out.vec.ptr, i64 11
1327  store i16 %in.elt, ptr %out.elt11.ptr, align 2
1328  %out.elt12.ptr = getelementptr i16, ptr %out.vec.ptr, i64 12
1329  store i16 %in.elt, ptr %out.elt12.ptr, align 8
1330  %out.elt13.ptr = getelementptr i16, ptr %out.vec.ptr, i64 13
1331  store i16 %in.elt, ptr %out.elt13.ptr, align 2
1332  %out.elt14.ptr = getelementptr i16, ptr %out.vec.ptr, i64 14
1333  store i16 %in.elt, ptr %out.elt14.ptr, align 4
1334  %out.elt15.ptr = getelementptr i16, ptr %out.vec.ptr, i64 15
1335  store i16 %in.elt, ptr %out.elt15.ptr, align 2
1336  %out.elt16.ptr = getelementptr i16, ptr %out.vec.ptr, i64 16
1337  store i16 %in.elt, ptr %out.elt16.ptr, align 32
1338  %out.elt17.ptr = getelementptr i16, ptr %out.vec.ptr, i64 17
1339  store i16 %in.elt, ptr %out.elt17.ptr, align 2
1340  %out.elt18.ptr = getelementptr i16, ptr %out.vec.ptr, i64 18
1341  store i16 %in.elt, ptr %out.elt18.ptr, align 4
1342  %out.elt19.ptr = getelementptr i16, ptr %out.vec.ptr, i64 19
1343  store i16 %in.elt, ptr %out.elt19.ptr, align 2
1344  %out.elt20.ptr = getelementptr i16, ptr %out.vec.ptr, i64 20
1345  store i16 %in.elt, ptr %out.elt20.ptr, align 8
1346  %out.elt21.ptr = getelementptr i16, ptr %out.vec.ptr, i64 21
1347  store i16 %in.elt, ptr %out.elt21.ptr, align 2
1348  %out.elt22.ptr = getelementptr i16, ptr %out.vec.ptr, i64 22
1349  store i16 %in.elt, ptr %out.elt22.ptr, align 4
1350  %out.elt23.ptr = getelementptr i16, ptr %out.vec.ptr, i64 23
1351  store i16 %in.elt, ptr %out.elt23.ptr, align 2
1352  %out.elt24.ptr = getelementptr i16, ptr %out.vec.ptr, i64 24
1353  store i16 %in.elt, ptr %out.elt24.ptr, align 16
1354  %out.elt25.ptr = getelementptr i16, ptr %out.vec.ptr, i64 25
1355  store i16 %in.elt, ptr %out.elt25.ptr, align 2
1356  %out.elt26.ptr = getelementptr i16, ptr %out.vec.ptr, i64 26
1357  store i16 %in.elt, ptr %out.elt26.ptr, align 4
1358  %out.elt27.ptr = getelementptr i16, ptr %out.vec.ptr, i64 27
1359  store i16 %in.elt, ptr %out.elt27.ptr, align 2
1360  %out.elt28.ptr = getelementptr i16, ptr %out.vec.ptr, i64 28
1361  store i16 %in.elt, ptr %out.elt28.ptr, align 8
1362  %out.elt29.ptr = getelementptr i16, ptr %out.vec.ptr, i64 29
1363  store i16 %in.elt, ptr %out.elt29.ptr, align 2
1364  %out.elt30.ptr = getelementptr i16, ptr %out.vec.ptr, i64 30
1365  store i16 %in.elt, ptr %out.elt30.ptr, align 4
1366  %out.elt31.ptr = getelementptr i16, ptr %out.vec.ptr, i64 31
1367  store i16 %in.elt, ptr %out.elt31.ptr, align 2
1368  ret void
1369}
1370
1371define void @vec512_i32(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
1372; ALL-LABEL: vec512_i32:
1373; ALL:       # %bb.0:
1374; ALL-NEXT:    movl (%rdi), %eax
1375; ALL-NEXT:    notl %eax
1376; ALL-NEXT:    movl %eax, (%rsi)
1377; ALL-NEXT:    movl %eax, 4(%rsi)
1378; ALL-NEXT:    movl %eax, 8(%rsi)
1379; ALL-NEXT:    movl %eax, 12(%rsi)
1380; ALL-NEXT:    movl %eax, 16(%rsi)
1381; ALL-NEXT:    movl %eax, 20(%rsi)
1382; ALL-NEXT:    movl %eax, 24(%rsi)
1383; ALL-NEXT:    movl %eax, 28(%rsi)
1384; ALL-NEXT:    movl %eax, 32(%rsi)
1385; ALL-NEXT:    movl %eax, 36(%rsi)
1386; ALL-NEXT:    movl %eax, 40(%rsi)
1387; ALL-NEXT:    movl %eax, 44(%rsi)
1388; ALL-NEXT:    movl %eax, 48(%rsi)
1389; ALL-NEXT:    movl %eax, 52(%rsi)
1390; ALL-NEXT:    movl %eax, 56(%rsi)
1391; ALL-NEXT:    movl %eax, 60(%rsi)
1392; ALL-NEXT:    retq
1393  %in.elt.not = load i32, ptr %in.elt.ptr, align 64
1394  %in.elt = xor i32 %in.elt.not, -1
1395  %out.elt0.ptr = getelementptr i32, ptr %out.vec.ptr, i64 0
1396  store i32 %in.elt, ptr %out.elt0.ptr, align 64
1397  %out.elt1.ptr = getelementptr i32, ptr %out.vec.ptr, i64 1
1398  store i32 %in.elt, ptr %out.elt1.ptr, align 4
1399  %out.elt2.ptr = getelementptr i32, ptr %out.vec.ptr, i64 2
1400  store i32 %in.elt, ptr %out.elt2.ptr, align 8
1401  %out.elt3.ptr = getelementptr i32, ptr %out.vec.ptr, i64 3
1402  store i32 %in.elt, ptr %out.elt3.ptr, align 4
1403  %out.elt4.ptr = getelementptr i32, ptr %out.vec.ptr, i64 4
1404  store i32 %in.elt, ptr %out.elt4.ptr, align 16
1405  %out.elt5.ptr = getelementptr i32, ptr %out.vec.ptr, i64 5
1406  store i32 %in.elt, ptr %out.elt5.ptr, align 4
1407  %out.elt6.ptr = getelementptr i32, ptr %out.vec.ptr, i64 6
1408  store i32 %in.elt, ptr %out.elt6.ptr, align 8
1409  %out.elt7.ptr = getelementptr i32, ptr %out.vec.ptr, i64 7
1410  store i32 %in.elt, ptr %out.elt7.ptr, align 4
1411  %out.elt8.ptr = getelementptr i32, ptr %out.vec.ptr, i64 8
1412  store i32 %in.elt, ptr %out.elt8.ptr, align 32
1413  %out.elt9.ptr = getelementptr i32, ptr %out.vec.ptr, i64 9
1414  store i32 %in.elt, ptr %out.elt9.ptr, align 4
1415  %out.elt10.ptr = getelementptr i32, ptr %out.vec.ptr, i64 10
1416  store i32 %in.elt, ptr %out.elt10.ptr, align 8
1417  %out.elt11.ptr = getelementptr i32, ptr %out.vec.ptr, i64 11
1418  store i32 %in.elt, ptr %out.elt11.ptr, align 4
1419  %out.elt12.ptr = getelementptr i32, ptr %out.vec.ptr, i64 12
1420  store i32 %in.elt, ptr %out.elt12.ptr, align 16
1421  %out.elt13.ptr = getelementptr i32, ptr %out.vec.ptr, i64 13
1422  store i32 %in.elt, ptr %out.elt13.ptr, align 4
1423  %out.elt14.ptr = getelementptr i32, ptr %out.vec.ptr, i64 14
1424  store i32 %in.elt, ptr %out.elt14.ptr, align 8
1425  %out.elt15.ptr = getelementptr i32, ptr %out.vec.ptr, i64 15
1426  store i32 %in.elt, ptr %out.elt15.ptr, align 4
1427  ret void
1428}
1429
1430define void @vec512_float(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
1431; ALL-LABEL: vec512_float:
1432; ALL:       # %bb.0:
1433; ALL-NEXT:    movl (%rdi), %eax
1434; ALL-NEXT:    notl %eax
1435; ALL-NEXT:    movl %eax, (%rsi)
1436; ALL-NEXT:    movl %eax, 4(%rsi)
1437; ALL-NEXT:    movl %eax, 8(%rsi)
1438; ALL-NEXT:    movl %eax, 12(%rsi)
1439; ALL-NEXT:    movl %eax, 16(%rsi)
1440; ALL-NEXT:    movl %eax, 20(%rsi)
1441; ALL-NEXT:    movl %eax, 24(%rsi)
1442; ALL-NEXT:    movl %eax, 28(%rsi)
1443; ALL-NEXT:    movl %eax, 32(%rsi)
1444; ALL-NEXT:    movl %eax, 36(%rsi)
1445; ALL-NEXT:    movl %eax, 40(%rsi)
1446; ALL-NEXT:    movl %eax, 44(%rsi)
1447; ALL-NEXT:    movl %eax, 48(%rsi)
1448; ALL-NEXT:    movl %eax, 52(%rsi)
1449; ALL-NEXT:    movl %eax, 56(%rsi)
1450; ALL-NEXT:    movl %eax, 60(%rsi)
1451; ALL-NEXT:    retq
1452  %in.elt.not = load i32, ptr %in.elt.ptr, align 64
1453  %in.elt.int = xor i32 %in.elt.not, -1
1454  %in.elt = bitcast i32 %in.elt.int to float
1455  %out.elt0.ptr = getelementptr float, ptr %out.vec.ptr, i64 0
1456  store float %in.elt, ptr %out.elt0.ptr, align 64
1457  %out.elt1.ptr = getelementptr float, ptr %out.vec.ptr, i64 1
1458  store float %in.elt, ptr %out.elt1.ptr, align 4
1459  %out.elt2.ptr = getelementptr float, ptr %out.vec.ptr, i64 2
1460  store float %in.elt, ptr %out.elt2.ptr, align 8
1461  %out.elt3.ptr = getelementptr float, ptr %out.vec.ptr, i64 3
1462  store float %in.elt, ptr %out.elt3.ptr, align 4
1463  %out.elt4.ptr = getelementptr float, ptr %out.vec.ptr, i64 4
1464  store float %in.elt, ptr %out.elt4.ptr, align 16
1465  %out.elt5.ptr = getelementptr float, ptr %out.vec.ptr, i64 5
1466  store float %in.elt, ptr %out.elt5.ptr, align 4
1467  %out.elt6.ptr = getelementptr float, ptr %out.vec.ptr, i64 6
1468  store float %in.elt, ptr %out.elt6.ptr, align 8
1469  %out.elt7.ptr = getelementptr float, ptr %out.vec.ptr, i64 7
1470  store float %in.elt, ptr %out.elt7.ptr, align 4
1471  %out.elt8.ptr = getelementptr float, ptr %out.vec.ptr, i64 8
1472  store float %in.elt, ptr %out.elt8.ptr, align 32
1473  %out.elt9.ptr = getelementptr float, ptr %out.vec.ptr, i64 9
1474  store float %in.elt, ptr %out.elt9.ptr, align 4
1475  %out.elt10.ptr = getelementptr float, ptr %out.vec.ptr, i64 10
1476  store float %in.elt, ptr %out.elt10.ptr, align 8
1477  %out.elt11.ptr = getelementptr float, ptr %out.vec.ptr, i64 11
1478  store float %in.elt, ptr %out.elt11.ptr, align 4
1479  %out.elt12.ptr = getelementptr float, ptr %out.vec.ptr, i64 12
1480  store float %in.elt, ptr %out.elt12.ptr, align 16
1481  %out.elt13.ptr = getelementptr float, ptr %out.vec.ptr, i64 13
1482  store float %in.elt, ptr %out.elt13.ptr, align 4
1483  %out.elt14.ptr = getelementptr float, ptr %out.vec.ptr, i64 14
1484  store float %in.elt, ptr %out.elt14.ptr, align 8
1485  %out.elt15.ptr = getelementptr float, ptr %out.vec.ptr, i64 15
1486  store float %in.elt, ptr %out.elt15.ptr, align 4
1487  ret void
1488}
1489
1490define void @vec512_i64(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
1491; ALL-LABEL: vec512_i64:
1492; ALL:       # %bb.0:
1493; ALL-NEXT:    movq (%rdi), %rax
1494; ALL-NEXT:    notq %rax
1495; ALL-NEXT:    movq %rax, (%rsi)
1496; ALL-NEXT:    movq %rax, 8(%rsi)
1497; ALL-NEXT:    movq %rax, 16(%rsi)
1498; ALL-NEXT:    movq %rax, 24(%rsi)
1499; ALL-NEXT:    movq %rax, 32(%rsi)
1500; ALL-NEXT:    movq %rax, 40(%rsi)
1501; ALL-NEXT:    movq %rax, 48(%rsi)
1502; ALL-NEXT:    movq %rax, 56(%rsi)
1503; ALL-NEXT:    retq
1504  %in.elt.not = load i64, ptr %in.elt.ptr, align 64
1505  %in.elt = xor i64 %in.elt.not, -1
1506  %out.elt0.ptr = getelementptr i64, ptr %out.vec.ptr, i64 0
1507  store i64 %in.elt, ptr %out.elt0.ptr, align 64
1508  %out.elt1.ptr = getelementptr i64, ptr %out.vec.ptr, i64 1
1509  store i64 %in.elt, ptr %out.elt1.ptr, align 8
1510  %out.elt2.ptr = getelementptr i64, ptr %out.vec.ptr, i64 2
1511  store i64 %in.elt, ptr %out.elt2.ptr, align 16
1512  %out.elt3.ptr = getelementptr i64, ptr %out.vec.ptr, i64 3
1513  store i64 %in.elt, ptr %out.elt3.ptr, align 8
1514  %out.elt4.ptr = getelementptr i64, ptr %out.vec.ptr, i64 4
1515  store i64 %in.elt, ptr %out.elt4.ptr, align 32
1516  %out.elt5.ptr = getelementptr i64, ptr %out.vec.ptr, i64 5
1517  store i64 %in.elt, ptr %out.elt5.ptr, align 8
1518  %out.elt6.ptr = getelementptr i64, ptr %out.vec.ptr, i64 6
1519  store i64 %in.elt, ptr %out.elt6.ptr, align 16
1520  %out.elt7.ptr = getelementptr i64, ptr %out.vec.ptr, i64 7
1521  store i64 %in.elt, ptr %out.elt7.ptr, align 8
1522  ret void
1523}
1524
1525define void @vec512_double(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
1526; ALL-LABEL: vec512_double:
1527; ALL:       # %bb.0:
1528; ALL-NEXT:    movq (%rdi), %rax
1529; ALL-NEXT:    notq %rax
1530; ALL-NEXT:    movq %rax, (%rsi)
1531; ALL-NEXT:    movq %rax, 8(%rsi)
1532; ALL-NEXT:    movq %rax, 16(%rsi)
1533; ALL-NEXT:    movq %rax, 24(%rsi)
1534; ALL-NEXT:    movq %rax, 32(%rsi)
1535; ALL-NEXT:    movq %rax, 40(%rsi)
1536; ALL-NEXT:    movq %rax, 48(%rsi)
1537; ALL-NEXT:    movq %rax, 56(%rsi)
1538; ALL-NEXT:    retq
1539  %in.elt.not = load i64, ptr %in.elt.ptr, align 64
1540  %in.elt.int = xor i64 %in.elt.not, -1
1541  %in.elt = bitcast i64 %in.elt.int to double
1542  %out.elt0.ptr = getelementptr double, ptr %out.vec.ptr, i64 0
1543  store double %in.elt, ptr %out.elt0.ptr, align 64
1544  %out.elt1.ptr = getelementptr double, ptr %out.vec.ptr, i64 1
1545  store double %in.elt, ptr %out.elt1.ptr, align 8
1546  %out.elt2.ptr = getelementptr double, ptr %out.vec.ptr, i64 2
1547  store double %in.elt, ptr %out.elt2.ptr, align 16
1548  %out.elt3.ptr = getelementptr double, ptr %out.vec.ptr, i64 3
1549  store double %in.elt, ptr %out.elt3.ptr, align 8
1550  %out.elt4.ptr = getelementptr double, ptr %out.vec.ptr, i64 4
1551  store double %in.elt, ptr %out.elt4.ptr, align 32
1552  %out.elt5.ptr = getelementptr double, ptr %out.vec.ptr, i64 5
1553  store double %in.elt, ptr %out.elt5.ptr, align 8
1554  %out.elt6.ptr = getelementptr double, ptr %out.vec.ptr, i64 6
1555  store double %in.elt, ptr %out.elt6.ptr, align 16
1556  %out.elt7.ptr = getelementptr double, ptr %out.vec.ptr, i64 7
1557  store double %in.elt, ptr %out.elt7.ptr, align 8
1558  ret void
1559}
1560
1561define void @vec512_i128(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
1562; ALL-LABEL: vec512_i128:
1563; ALL:       # %bb.0:
1564; ALL-NEXT:    movq (%rdi), %rax
1565; ALL-NEXT:    movq 8(%rdi), %rcx
1566; ALL-NEXT:    notq %rcx
1567; ALL-NEXT:    notq %rax
1568; ALL-NEXT:    movq %rax, (%rsi)
1569; ALL-NEXT:    movq %rcx, 8(%rsi)
1570; ALL-NEXT:    movq %rcx, 24(%rsi)
1571; ALL-NEXT:    movq %rax, 16(%rsi)
1572; ALL-NEXT:    movq %rcx, 40(%rsi)
1573; ALL-NEXT:    movq %rax, 32(%rsi)
1574; ALL-NEXT:    movq %rcx, 56(%rsi)
1575; ALL-NEXT:    movq %rax, 48(%rsi)
1576; ALL-NEXT:    retq
1577  %in.elt.not = load i128, ptr %in.elt.ptr, align 64
1578  %in.elt = xor i128 %in.elt.not, -1
1579  %out.elt0.ptr = getelementptr i128, ptr %out.vec.ptr, i64 0
1580  store i128 %in.elt, ptr %out.elt0.ptr, align 64
1581  %out.elt1.ptr = getelementptr i128, ptr %out.vec.ptr, i64 1
1582  store i128 %in.elt, ptr %out.elt1.ptr, align 16
1583  %out.elt2.ptr = getelementptr i128, ptr %out.vec.ptr, i64 2
1584  store i128 %in.elt, ptr %out.elt2.ptr, align 32
1585  %out.elt3.ptr = getelementptr i128, ptr %out.vec.ptr, i64 3
1586  store i128 %in.elt, ptr %out.elt3.ptr, align 16
1587  ret void
1588}
1589
1590define void @vec512_i256(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
1591; ALL-LABEL: vec512_i256:
1592; ALL:       # %bb.0:
1593; ALL-NEXT:    movq 16(%rdi), %rax
1594; ALL-NEXT:    movq 24(%rdi), %rcx
1595; ALL-NEXT:    movq (%rdi), %rdx
1596; ALL-NEXT:    movq 8(%rdi), %rdi
1597; ALL-NEXT:    notq %rdi
1598; ALL-NEXT:    notq %rdx
1599; ALL-NEXT:    notq %rcx
1600; ALL-NEXT:    notq %rax
1601; ALL-NEXT:    movq %rax, 16(%rsi)
1602; ALL-NEXT:    movq %rcx, 24(%rsi)
1603; ALL-NEXT:    movq %rdx, (%rsi)
1604; ALL-NEXT:    movq %rdi, 8(%rsi)
1605; ALL-NEXT:    movq %rax, 48(%rsi)
1606; ALL-NEXT:    movq %rcx, 56(%rsi)
1607; ALL-NEXT:    movq %rdx, 32(%rsi)
1608; ALL-NEXT:    movq %rdi, 40(%rsi)
1609; ALL-NEXT:    retq
1610  %in.elt.not = load i256, ptr %in.elt.ptr, align 64
1611  %in.elt = xor i256 %in.elt.not, -1
1612  %out.elt0.ptr = getelementptr i256, ptr %out.vec.ptr, i64 0
1613  store i256 %in.elt, ptr %out.elt0.ptr, align 64
1614  %out.elt1.ptr = getelementptr i256, ptr %out.vec.ptr, i64 1
1615  store i256 %in.elt, ptr %out.elt1.ptr, align 32
1616  ret void
1617}
1618;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1619; AVX: {{.*}}
1620; AVX1: {{.*}}
1621; AVX2: {{.*}}
1622; AVX512: {{.*}}
1623; AVX512BW: {{.*}}
1624; AVX512F: {{.*}}
1625; SCALAR: {{.*}}
1626; SSE: {{.*}}
1627; SSE2: {{.*}}
1628; SSE2-ONLY: {{.*}}
1629; SSE3: {{.*}}
1630; SSE41: {{.*}}
1631; SSE42: {{.*}}
1632; SSSE3: {{.*}}
1633; SSSE3-ONLY: {{.*}}
1634