xref: /llvm-project/llvm/test/CodeGen/X86/load-scalar-as-vector.ll (revision 122874c955e06defb619b1afd4e26db482dbbf19)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2       | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2     | FileCheck %s --check-prefix=SSE
4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx        | FileCheck %s --check-prefix=AVX
5; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2       | FileCheck %s --check-prefix=AVX
6; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512dq,+avx512bw | FileCheck %s --check-prefix=AVX
7
8define <4 x i32> @add_op1_constant(ptr %p) nounwind {
9; SSE-LABEL: add_op1_constant:
10; SSE:       # %bb.0:
11; SSE-NEXT:    movl (%rdi), %eax
12; SSE-NEXT:    addl $42, %eax
13; SSE-NEXT:    movd %eax, %xmm0
14; SSE-NEXT:    retq
15;
16; AVX-LABEL: add_op1_constant:
17; AVX:       # %bb.0:
18; AVX-NEXT:    movl (%rdi), %eax
19; AVX-NEXT:    addl $42, %eax
20; AVX-NEXT:    vmovd %eax, %xmm0
21; AVX-NEXT:    retq
22  %x = load i32, ptr %p
23  %b = add i32 %x, 42
24  %r = insertelement <4 x i32> undef, i32 %b, i32 0
25  ret <4 x i32> %r
26}
27
28; Code and data size may increase by using more vector ops, so the transform is disabled here.
29
30define <4 x i32> @add_op1_constant_optsize(ptr %p) nounwind optsize {
31; SSE-LABEL: add_op1_constant_optsize:
32; SSE:       # %bb.0:
33; SSE-NEXT:    movl (%rdi), %eax
34; SSE-NEXT:    addl $42, %eax
35; SSE-NEXT:    movd %eax, %xmm0
36; SSE-NEXT:    retq
37;
38; AVX-LABEL: add_op1_constant_optsize:
39; AVX:       # %bb.0:
40; AVX-NEXT:    movl (%rdi), %eax
41; AVX-NEXT:    addl $42, %eax
42; AVX-NEXT:    vmovd %eax, %xmm0
43; AVX-NEXT:    retq
44  %x = load i32, ptr %p
45  %b = add i32 %x, 42
46  %r = insertelement <4 x i32> undef, i32 %b, i32 0
47  ret <4 x i32> %r
48}
49
50define <8 x i16> @add_op0_constant(ptr %p) nounwind {
51; SSE-LABEL: add_op0_constant:
52; SSE:       # %bb.0:
53; SSE-NEXT:    movzwl (%rdi), %eax
54; SSE-NEXT:    addl $42, %eax
55; SSE-NEXT:    movd %eax, %xmm0
56; SSE-NEXT:    retq
57;
58; AVX-LABEL: add_op0_constant:
59; AVX:       # %bb.0:
60; AVX-NEXT:    movzwl (%rdi), %eax
61; AVX-NEXT:    addl $42, %eax
62; AVX-NEXT:    vmovd %eax, %xmm0
63; AVX-NEXT:    retq
64  %x = load i16, ptr %p
65  %b = add i16 42, %x
66  %r = insertelement <8 x i16> undef, i16 %b, i32 0
67  ret <8 x i16> %r
68}
69
70define <2 x i64> @sub_op0_constant(ptr %p) nounwind {
71; SSE-LABEL: sub_op0_constant:
72; SSE:       # %bb.0:
73; SSE-NEXT:    movl $42, %eax
74; SSE-NEXT:    subq (%rdi), %rax
75; SSE-NEXT:    movq %rax, %xmm0
76; SSE-NEXT:    retq
77;
78; AVX-LABEL: sub_op0_constant:
79; AVX:       # %bb.0:
80; AVX-NEXT:    movl $42, %eax
81; AVX-NEXT:    subq (%rdi), %rax
82; AVX-NEXT:    vmovq %rax, %xmm0
83; AVX-NEXT:    retq
84  %x = load i64, ptr %p
85  %b = sub i64 42, %x
86  %r = insertelement <2 x i64> undef, i64 %b, i32 0
87  ret <2 x i64> %r
88}
89
90define <16 x i8> @sub_op1_constant(ptr %p) nounwind {
91; SSE-LABEL: sub_op1_constant:
92; SSE:       # %bb.0:
93; SSE-NEXT:    movzbl (%rdi), %eax
94; SSE-NEXT:    addb $-42, %al
95; SSE-NEXT:    movzbl %al, %eax
96; SSE-NEXT:    movd %eax, %xmm0
97; SSE-NEXT:    retq
98;
99; AVX-LABEL: sub_op1_constant:
100; AVX:       # %bb.0:
101; AVX-NEXT:    movzbl (%rdi), %eax
102; AVX-NEXT:    addb $-42, %al
103; AVX-NEXT:    vmovd %eax, %xmm0
104; AVX-NEXT:    retq
105  %x = load i8, ptr %p
106  %b = sub i8 %x, 42
107  %r = insertelement <16 x i8> undef, i8 %b, i32 0
108  ret <16 x i8> %r
109}
110
111define <4 x i32> @mul_op1_constant(ptr %p) nounwind {
112; SSE-LABEL: mul_op1_constant:
113; SSE:       # %bb.0:
114; SSE-NEXT:    imull $42, (%rdi), %eax
115; SSE-NEXT:    movd %eax, %xmm0
116; SSE-NEXT:    retq
117;
118; AVX-LABEL: mul_op1_constant:
119; AVX:       # %bb.0:
120; AVX-NEXT:    imull $42, (%rdi), %eax
121; AVX-NEXT:    vmovd %eax, %xmm0
122; AVX-NEXT:    retq
123  %x = load i32, ptr %p
124  %b = mul i32 %x, 42
125  %r = insertelement <4 x i32> undef, i32 %b, i32 0
126  ret <4 x i32> %r
127}
128
129define <8 x i16> @mul_op0_constant(ptr %p) nounwind {
130; SSE-LABEL: mul_op0_constant:
131; SSE:       # %bb.0:
132; SSE-NEXT:    movzwl (%rdi), %eax
133; SSE-NEXT:    imull $42, %eax, %eax
134; SSE-NEXT:    movd %eax, %xmm0
135; SSE-NEXT:    retq
136;
137; AVX-LABEL: mul_op0_constant:
138; AVX:       # %bb.0:
139; AVX-NEXT:    movzwl (%rdi), %eax
140; AVX-NEXT:    imull $42, %eax, %eax
141; AVX-NEXT:    vmovd %eax, %xmm0
142; AVX-NEXT:    retq
143  %x = load i16, ptr %p
144  %b = mul i16 42, %x
145  %r = insertelement <8 x i16> undef, i16 %b, i32 0
146  ret <8 x i16> %r
147}
148
149define <4 x i32> @and_op1_constant(ptr %p) nounwind {
150; SSE-LABEL: and_op1_constant:
151; SSE:       # %bb.0:
152; SSE-NEXT:    movl (%rdi), %eax
153; SSE-NEXT:    andl $42, %eax
154; SSE-NEXT:    movd %eax, %xmm0
155; SSE-NEXT:    retq
156;
157; AVX-LABEL: and_op1_constant:
158; AVX:       # %bb.0:
159; AVX-NEXT:    movl (%rdi), %eax
160; AVX-NEXT:    andl $42, %eax
161; AVX-NEXT:    vmovd %eax, %xmm0
162; AVX-NEXT:    retq
163  %x = load i32, ptr %p
164  %b = and i32 %x, 42
165  %r = insertelement <4 x i32> undef, i32 %b, i32 0
166  ret <4 x i32> %r
167}
168
169define <2 x i64> @or_op1_constant(ptr %p) nounwind {
170; SSE-LABEL: or_op1_constant:
171; SSE:       # %bb.0:
172; SSE-NEXT:    movq (%rdi), %rax
173; SSE-NEXT:    orq $42, %rax
174; SSE-NEXT:    movq %rax, %xmm0
175; SSE-NEXT:    retq
176;
177; AVX-LABEL: or_op1_constant:
178; AVX:       # %bb.0:
179; AVX-NEXT:    movq (%rdi), %rax
180; AVX-NEXT:    orq $42, %rax
181; AVX-NEXT:    vmovq %rax, %xmm0
182; AVX-NEXT:    retq
183  %x = load i64, ptr %p
184  %b = or i64 %x, 42
185  %r = insertelement <2 x i64> undef, i64 %b, i32 0
186  ret <2 x i64> %r
187}
188
189define <8 x i16> @xor_op1_constant(ptr %p) nounwind {
190; SSE-LABEL: xor_op1_constant:
191; SSE:       # %bb.0:
192; SSE-NEXT:    movzwl (%rdi), %eax
193; SSE-NEXT:    xorl $42, %eax
194; SSE-NEXT:    movd %eax, %xmm0
195; SSE-NEXT:    retq
196;
197; AVX-LABEL: xor_op1_constant:
198; AVX:       # %bb.0:
199; AVX-NEXT:    movzwl (%rdi), %eax
200; AVX-NEXT:    xorl $42, %eax
201; AVX-NEXT:    vmovd %eax, %xmm0
202; AVX-NEXT:    retq
203  %x = load i16, ptr %p
204  %b = xor i16 %x, 42
205  %r = insertelement <8 x i16> undef, i16 %b, i32 0
206  ret <8 x i16> %r
207}
208
209define <4 x i32> @shl_op0_constant(ptr %p) nounwind {
210; SSE-LABEL: shl_op0_constant:
211; SSE:       # %bb.0:
212; SSE-NEXT:    movzbl (%rdi), %ecx
213; SSE-NEXT:    movl $42, %eax
214; SSE-NEXT:    shll %cl, %eax
215; SSE-NEXT:    movd %eax, %xmm0
216; SSE-NEXT:    retq
217;
218; AVX-LABEL: shl_op0_constant:
219; AVX:       # %bb.0:
220; AVX-NEXT:    movzbl (%rdi), %ecx
221; AVX-NEXT:    movl $42, %eax
222; AVX-NEXT:    shll %cl, %eax
223; AVX-NEXT:    vmovd %eax, %xmm0
224; AVX-NEXT:    retq
225  %x = load i32, ptr %p
226  %b = shl i32 42, %x
227  %r = insertelement <4 x i32> undef, i32 %b, i32 0
228  ret <4 x i32> %r
229}
230
231define <16 x i8> @shl_op1_constant(ptr %p) nounwind {
232; SSE-LABEL: shl_op1_constant:
233; SSE:       # %bb.0:
234; SSE-NEXT:    movzbl (%rdi), %eax
235; SSE-NEXT:    shlb $5, %al
236; SSE-NEXT:    movzbl %al, %eax
237; SSE-NEXT:    movd %eax, %xmm0
238; SSE-NEXT:    retq
239;
240; AVX-LABEL: shl_op1_constant:
241; AVX:       # %bb.0:
242; AVX-NEXT:    movzbl (%rdi), %eax
243; AVX-NEXT:    shlb $5, %al
244; AVX-NEXT:    vmovd %eax, %xmm0
245; AVX-NEXT:    retq
246  %x = load i8, ptr %p
247  %b = shl i8 %x, 5
248  %r = insertelement <16 x i8> undef, i8 %b, i32 0
249  ret <16 x i8> %r
250}
251
252define <2 x i64> @lshr_op0_constant(ptr %p) nounwind {
253; SSE-LABEL: lshr_op0_constant:
254; SSE:       # %bb.0:
255; SSE-NEXT:    movzbl (%rdi), %ecx
256; SSE-NEXT:    movl $42, %eax
257; SSE-NEXT:    shrq %cl, %rax
258; SSE-NEXT:    movd %eax, %xmm0
259; SSE-NEXT:    retq
260;
261; AVX-LABEL: lshr_op0_constant:
262; AVX:       # %bb.0:
263; AVX-NEXT:    movzbl (%rdi), %ecx
264; AVX-NEXT:    movl $42, %eax
265; AVX-NEXT:    shrq %cl, %rax
266; AVX-NEXT:    vmovd %eax, %xmm0
267; AVX-NEXT:    retq
268  %x = load i64, ptr %p
269  %b = lshr i64 42, %x
270  %r = insertelement <2 x i64> undef, i64 %b, i32 0
271  ret <2 x i64> %r
272}
273
274define <4 x i32> @lshr_op1_constant(ptr %p) nounwind {
275; SSE-LABEL: lshr_op1_constant:
276; SSE:       # %bb.0:
277; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
278; SSE-NEXT:    psrld $17, %xmm0
279; SSE-NEXT:    retq
280;
281; AVX-LABEL: lshr_op1_constant:
282; AVX:       # %bb.0:
283; AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
284; AVX-NEXT:    vpsrld $17, %xmm0, %xmm0
285; AVX-NEXT:    retq
286  %x = load i32, ptr %p
287  %b = lshr i32 %x, 17
288  %r = insertelement <4 x i32> undef, i32 %b, i32 0
289  ret <4 x i32> %r
290}
291
292define <8 x i16> @ashr_op0_constant(ptr %p) nounwind {
293; SSE-LABEL: ashr_op0_constant:
294; SSE:       # %bb.0:
295; SSE-NEXT:    movzbl (%rdi), %ecx
296; SSE-NEXT:    movl $-42, %eax
297; SSE-NEXT:    sarl %cl, %eax
298; SSE-NEXT:    movd %eax, %xmm0
299; SSE-NEXT:    retq
300;
301; AVX-LABEL: ashr_op0_constant:
302; AVX:       # %bb.0:
303; AVX-NEXT:    movzbl (%rdi), %ecx
304; AVX-NEXT:    movl $-42, %eax
305; AVX-NEXT:    sarl %cl, %eax
306; AVX-NEXT:    vmovd %eax, %xmm0
307; AVX-NEXT:    retq
308  %x = load i16, ptr %p
309  %b = ashr i16 -42, %x
310  %r = insertelement <8 x i16> undef, i16 %b, i32 0
311  ret <8 x i16> %r
312}
313
314define <8 x i16> @ashr_op1_constant(ptr %p) nounwind {
315; SSE-LABEL: ashr_op1_constant:
316; SSE:       # %bb.0:
317; SSE-NEXT:    movswl (%rdi), %eax
318; SSE-NEXT:    movd %eax, %xmm0
319; SSE-NEXT:    psrad $7, %xmm0
320; SSE-NEXT:    retq
321;
322; AVX-LABEL: ashr_op1_constant:
323; AVX:       # %bb.0:
324; AVX-NEXT:    movswl (%rdi), %eax
325; AVX-NEXT:    vmovd %eax, %xmm0
326; AVX-NEXT:    vpsrad $7, %xmm0, %xmm0
327; AVX-NEXT:    retq
328  %x = load i16, ptr %p
329  %b = ashr i16 %x, 7
330  %r = insertelement <8 x i16> undef, i16 %b, i32 0
331  ret <8 x i16> %r
332}
333
334define <4 x i32> @sdiv_op0_constant(ptr %p) nounwind {
335; SSE-LABEL: sdiv_op0_constant:
336; SSE:       # %bb.0:
337; SSE-NEXT:    movl $42, %eax
338; SSE-NEXT:    xorl %edx, %edx
339; SSE-NEXT:    idivl (%rdi)
340; SSE-NEXT:    movd %eax, %xmm0
341; SSE-NEXT:    retq
342;
343; AVX-LABEL: sdiv_op0_constant:
344; AVX:       # %bb.0:
345; AVX-NEXT:    movl $42, %eax
346; AVX-NEXT:    xorl %edx, %edx
347; AVX-NEXT:    idivl (%rdi)
348; AVX-NEXT:    vmovd %eax, %xmm0
349; AVX-NEXT:    retq
350  %x = load i32, ptr %p
351  %b = sdiv i32 42, %x
352  %r = insertelement <4 x i32> undef, i32 %b, i32 0
353  ret <4 x i32> %r
354}
355
356define <8 x i16> @sdiv_op1_constant(ptr %p) nounwind {
357; SSE-LABEL: sdiv_op1_constant:
358; SSE:       # %bb.0:
359; SSE-NEXT:    movswl (%rdi), %eax
360; SSE-NEXT:    imull $-15603, %eax, %ecx # imm = 0xC30D
361; SSE-NEXT:    shrl $16, %ecx
362; SSE-NEXT:    addl %eax, %ecx
363; SSE-NEXT:    movzwl %cx, %eax
364; SSE-NEXT:    movswl %ax, %ecx
365; SSE-NEXT:    shrl $15, %eax
366; SSE-NEXT:    sarl $5, %ecx
367; SSE-NEXT:    addl %eax, %ecx
368; SSE-NEXT:    movd %ecx, %xmm0
369; SSE-NEXT:    retq
370;
371; AVX-LABEL: sdiv_op1_constant:
372; AVX:       # %bb.0:
373; AVX-NEXT:    movswl (%rdi), %eax
374; AVX-NEXT:    imull $-15603, %eax, %ecx # imm = 0xC30D
375; AVX-NEXT:    shrl $16, %ecx
376; AVX-NEXT:    addl %eax, %ecx
377; AVX-NEXT:    movzwl %cx, %eax
378; AVX-NEXT:    movswl %ax, %ecx
379; AVX-NEXT:    shrl $15, %eax
380; AVX-NEXT:    sarl $5, %ecx
381; AVX-NEXT:    addl %eax, %ecx
382; AVX-NEXT:    vmovd %ecx, %xmm0
383; AVX-NEXT:    retq
384  %x = load i16, ptr %p
385  %b = sdiv i16 %x, 42
386  %r = insertelement <8 x i16> undef, i16 %b, i32 0
387  ret <8 x i16> %r
388}
389
390define <8 x i16> @srem_op0_constant(ptr %p) nounwind {
391; SSE-LABEL: srem_op0_constant:
392; SSE:       # %bb.0:
393; SSE-NEXT:    movw $42, %ax
394; SSE-NEXT:    xorl %edx, %edx
395; SSE-NEXT:    idivw (%rdi)
396; SSE-NEXT:    # kill: def $dx killed $dx def $edx
397; SSE-NEXT:    movd %edx, %xmm0
398; SSE-NEXT:    retq
399;
400; AVX-LABEL: srem_op0_constant:
401; AVX:       # %bb.0:
402; AVX-NEXT:    movw $42, %ax
403; AVX-NEXT:    xorl %edx, %edx
404; AVX-NEXT:    idivw (%rdi)
405; AVX-NEXT:    # kill: def $dx killed $dx def $edx
406; AVX-NEXT:    vmovd %edx, %xmm0
407; AVX-NEXT:    retq
408  %x = load i16, ptr %p
409  %b = srem i16 42, %x
410  %r = insertelement <8 x i16> undef, i16 %b, i32 0
411  ret <8 x i16> %r
412}
413
414define <4 x i32> @srem_op1_constant(ptr %p) nounwind {
415; SSE-LABEL: srem_op1_constant:
416; SSE:       # %bb.0:
417; SSE-NEXT:    movslq (%rdi), %rax
418; SSE-NEXT:    imulq $818089009, %rax, %rcx # imm = 0x30C30C31
419; SSE-NEXT:    movq %rcx, %rdx
420; SSE-NEXT:    shrq $63, %rdx
421; SSE-NEXT:    sarq $35, %rcx
422; SSE-NEXT:    addl %edx, %ecx
423; SSE-NEXT:    imull $42, %ecx, %ecx
424; SSE-NEXT:    subl %ecx, %eax
425; SSE-NEXT:    movd %eax, %xmm0
426; SSE-NEXT:    retq
427;
428; AVX-LABEL: srem_op1_constant:
429; AVX:       # %bb.0:
430; AVX-NEXT:    movslq (%rdi), %rax
431; AVX-NEXT:    imulq $818089009, %rax, %rcx # imm = 0x30C30C31
432; AVX-NEXT:    movq %rcx, %rdx
433; AVX-NEXT:    shrq $63, %rdx
434; AVX-NEXT:    sarq $35, %rcx
435; AVX-NEXT:    addl %edx, %ecx
436; AVX-NEXT:    imull $42, %ecx, %ecx
437; AVX-NEXT:    subl %ecx, %eax
438; AVX-NEXT:    vmovd %eax, %xmm0
439; AVX-NEXT:    retq
440  %x = load i32, ptr %p
441  %b = srem i32 %x, 42
442  %r = insertelement <4 x i32> undef, i32 %b, i32 0
443  ret <4 x i32> %r
444}
445
446define <4 x i32> @udiv_op0_constant(ptr %p) nounwind {
447; SSE-LABEL: udiv_op0_constant:
448; SSE:       # %bb.0:
449; SSE-NEXT:    movl $42, %eax
450; SSE-NEXT:    xorl %edx, %edx
451; SSE-NEXT:    divl (%rdi)
452; SSE-NEXT:    movd %eax, %xmm0
453; SSE-NEXT:    retq
454;
455; AVX-LABEL: udiv_op0_constant:
456; AVX:       # %bb.0:
457; AVX-NEXT:    movl $42, %eax
458; AVX-NEXT:    xorl %edx, %edx
459; AVX-NEXT:    divl (%rdi)
460; AVX-NEXT:    vmovd %eax, %xmm0
461; AVX-NEXT:    retq
462  %x = load i32, ptr %p
463  %b = udiv i32 42, %x
464  %r = insertelement <4 x i32> undef, i32 %b, i32 0
465  ret <4 x i32> %r
466}
467
468define <2 x i64> @udiv_op1_constant(ptr %p) nounwind {
469; SSE-LABEL: udiv_op1_constant:
470; SSE:       # %bb.0:
471; SSE-NEXT:    movq (%rdi), %rax
472; SSE-NEXT:    shrq %rax
473; SSE-NEXT:    movabsq $-4392081922311798003, %rcx # imm = 0xC30C30C30C30C30D
474; SSE-NEXT:    mulq %rcx
475; SSE-NEXT:    movq %rdx, %xmm0
476; SSE-NEXT:    psrlq $4, %xmm0
477; SSE-NEXT:    retq
478;
479; AVX-LABEL: udiv_op1_constant:
480; AVX:       # %bb.0:
481; AVX-NEXT:    movq (%rdi), %rax
482; AVX-NEXT:    shrq %rax
483; AVX-NEXT:    movabsq $-4392081922311798003, %rcx # imm = 0xC30C30C30C30C30D
484; AVX-NEXT:    mulq %rcx
485; AVX-NEXT:    vmovq %rdx, %xmm0
486; AVX-NEXT:    vpsrlq $4, %xmm0, %xmm0
487; AVX-NEXT:    retq
488  %x = load i64, ptr %p
489  %b = udiv i64 %x, 42
490  %r = insertelement <2 x i64> undef, i64 %b, i32 0
491  ret <2 x i64> %r
492}
493
494define <2 x i64> @urem_op0_constant(ptr %p) nounwind {
495; SSE-LABEL: urem_op0_constant:
496; SSE:       # %bb.0:
497; SSE-NEXT:    movl $42, %eax
498; SSE-NEXT:    xorl %edx, %edx
499; SSE-NEXT:    divq (%rdi)
500; SSE-NEXT:    movq %rdx, %xmm0
501; SSE-NEXT:    retq
502;
503; AVX-LABEL: urem_op0_constant:
504; AVX:       # %bb.0:
505; AVX-NEXT:    movl $42, %eax
506; AVX-NEXT:    xorl %edx, %edx
507; AVX-NEXT:    divq (%rdi)
508; AVX-NEXT:    vmovq %rdx, %xmm0
509; AVX-NEXT:    retq
510  %x = load i64, ptr %p
511  %b = urem i64 42, %x
512  %r = insertelement <2 x i64> undef, i64 %b, i32 0
513  ret <2 x i64> %r
514}
515
516define <16 x i8> @urem_op1_constant(ptr %p) nounwind {
517; SSE-LABEL: urem_op1_constant:
518; SSE:       # %bb.0:
519; SSE-NEXT:    movzbl (%rdi), %eax
520; SSE-NEXT:    movl %eax, %ecx
521; SSE-NEXT:    shrb %cl
522; SSE-NEXT:    movzbl %cl, %ecx
523; SSE-NEXT:    imull $49, %ecx, %ecx
524; SSE-NEXT:    shrl $10, %ecx
525; SSE-NEXT:    imull $42, %ecx, %ecx
526; SSE-NEXT:    subb %cl, %al
527; SSE-NEXT:    movzbl %al, %eax
528; SSE-NEXT:    movd %eax, %xmm0
529; SSE-NEXT:    retq
530;
531; AVX-LABEL: urem_op1_constant:
532; AVX:       # %bb.0:
533; AVX-NEXT:    movzbl (%rdi), %eax
534; AVX-NEXT:    movl %eax, %ecx
535; AVX-NEXT:    shrb %cl
536; AVX-NEXT:    movzbl %cl, %ecx
537; AVX-NEXT:    imull $49, %ecx, %ecx
538; AVX-NEXT:    shrl $10, %ecx
539; AVX-NEXT:    imull $42, %ecx, %ecx
540; AVX-NEXT:    subb %cl, %al
541; AVX-NEXT:    vmovd %eax, %xmm0
542; AVX-NEXT:    retq
543  %x = load i8, ptr %p
544  %b = urem i8 %x, 42
545  %r = insertelement <16 x i8> undef, i8 %b, i32 0
546  ret <16 x i8> %r
547}
548
549define <4 x float> @fadd_op1_constant(ptr %p) nounwind {
550; SSE-LABEL: fadd_op1_constant:
551; SSE:       # %bb.0:
552; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
553; SSE-NEXT:    addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
554; SSE-NEXT:    retq
555;
556; AVX-LABEL: fadd_op1_constant:
557; AVX:       # %bb.0:
558; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
559; AVX-NEXT:    vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
560; AVX-NEXT:    retq
561  %x = load float, ptr %p
562  %b = fadd float %x, 42.0
563  %r = insertelement <4 x float> undef, float %b, i32 0
564  ret <4 x float> %r
565}
566
567define <2 x double> @fsub_op1_constant(ptr %p) nounwind {
568; SSE-LABEL: fsub_op1_constant:
569; SSE:       # %bb.0:
570; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
571; SSE-NEXT:    addsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
572; SSE-NEXT:    retq
573;
574; AVX-LABEL: fsub_op1_constant:
575; AVX:       # %bb.0:
576; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
577; AVX-NEXT:    vaddsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
578; AVX-NEXT:    retq
579  %x = load double, ptr %p
580  %b = fsub double %x, 42.0
581  %r = insertelement <2 x double> undef, double %b, i32 0
582  ret <2 x double> %r
583}
584
585define <4 x float> @fsub_op0_constant(ptr %p) nounwind {
586; SSE-LABEL: fsub_op0_constant:
587; SSE:       # %bb.0:
588; SSE-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
589; SSE-NEXT:    subss (%rdi), %xmm0
590; SSE-NEXT:    retq
591;
592; AVX-LABEL: fsub_op0_constant:
593; AVX:       # %bb.0:
594; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
595; AVX-NEXT:    vsubss (%rdi), %xmm0, %xmm0
596; AVX-NEXT:    retq
597  %x = load float, ptr %p
598  %b = fsub float 42.0, %x
599  %r = insertelement <4 x float> undef, float %b, i32 0
600  ret <4 x float> %r
601}
602
603define <4 x float> @fmul_op1_constant(ptr %p) nounwind {
604; SSE-LABEL: fmul_op1_constant:
605; SSE:       # %bb.0:
606; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
607; SSE-NEXT:    mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
608; SSE-NEXT:    retq
609;
610; AVX-LABEL: fmul_op1_constant:
611; AVX:       # %bb.0:
612; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
613; AVX-NEXT:    vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
614; AVX-NEXT:    retq
615  %x = load float, ptr %p
616  %b = fmul float %x, 42.0
617  %r = insertelement <4 x float> undef, float %b, i32 0
618  ret <4 x float> %r
619}
620
621define <2 x double> @fdiv_op1_constant(ptr %p) nounwind {
622; SSE-LABEL: fdiv_op1_constant:
623; SSE:       # %bb.0:
624; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
625; SSE-NEXT:    divsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
626; SSE-NEXT:    retq
627;
628; AVX-LABEL: fdiv_op1_constant:
629; AVX:       # %bb.0:
630; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
631; AVX-NEXT:    vdivsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
632; AVX-NEXT:    retq
633  %x = load double, ptr %p
634  %b = fdiv double %x, 42.0
635  %r = insertelement <2 x double> undef, double %b, i32 0
636  ret <2 x double> %r
637}
638
639define <4 x float> @fdiv_op0_constant(ptr %p) nounwind {
640; SSE-LABEL: fdiv_op0_constant:
641; SSE:       # %bb.0:
642; SSE-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
643; SSE-NEXT:    divss (%rdi), %xmm0
644; SSE-NEXT:    retq
645;
646; AVX-LABEL: fdiv_op0_constant:
647; AVX:       # %bb.0:
648; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
649; AVX-NEXT:    vdivss (%rdi), %xmm0, %xmm0
650; AVX-NEXT:    retq
651  %x = load float, ptr %p
652  %b = fdiv float 42.0, %x
653  %r = insertelement <4 x float> undef, float %b, i32 0
654  ret <4 x float> %r
655}
656
657define <4 x float> @frem_op1_constant(ptr %p) nounwind {
658; SSE-LABEL: frem_op1_constant:
659; SSE:       # %bb.0:
660; SSE-NEXT:    pushq %rax
661; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
662; SSE-NEXT:    movss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
663; SSE-NEXT:    callq fmodf@PLT
664; SSE-NEXT:    popq %rax
665; SSE-NEXT:    retq
666;
667; AVX-LABEL: frem_op1_constant:
668; AVX:       # %bb.0:
669; AVX-NEXT:    pushq %rax
670; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
671; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
672; AVX-NEXT:    callq fmodf@PLT
673; AVX-NEXT:    popq %rax
674; AVX-NEXT:    retq
675  %x = load float, ptr %p
676  %b = frem float %x, 42.0
677  %r = insertelement <4 x float> undef, float %b, i32 0
678  ret <4 x float> %r
679}
680
681define <2 x double> @frem_op0_constant(ptr %p) nounwind {
682; SSE-LABEL: frem_op0_constant:
683; SSE:       # %bb.0:
684; SSE-NEXT:    pushq %rax
685; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
686; SSE-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
687; SSE-NEXT:    callq fmod@PLT
688; SSE-NEXT:    popq %rax
689; SSE-NEXT:    retq
690;
691; AVX-LABEL: frem_op0_constant:
692; AVX:       # %bb.0:
693; AVX-NEXT:    pushq %rax
694; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
695; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
696; AVX-NEXT:    callq fmod@PLT
697; AVX-NEXT:    popq %rax
698; AVX-NEXT:    retq
699  %x = load double, ptr %p
700  %b = frem double 42.0, %x
701  %r = insertelement <2 x double> undef, double %b, i32 0
702  ret <2 x double> %r
703}
704
705; Try again with 256-bit types.
706
707define <8 x i32> @add_op1_constant_v8i32(ptr %p) nounwind {
708; SSE-LABEL: add_op1_constant_v8i32:
709; SSE:       # %bb.0:
710; SSE-NEXT:    movl (%rdi), %eax
711; SSE-NEXT:    addl $42, %eax
712; SSE-NEXT:    movd %eax, %xmm0
713; SSE-NEXT:    retq
714;
715; AVX-LABEL: add_op1_constant_v8i32:
716; AVX:       # %bb.0:
717; AVX-NEXT:    movl (%rdi), %eax
718; AVX-NEXT:    addl $42, %eax
719; AVX-NEXT:    vmovd %eax, %xmm0
720; AVX-NEXT:    retq
721  %x = load i32, ptr %p
722  %b = add i32 %x, 42
723  %r = insertelement <8 x i32> undef, i32 %b, i32 0
724  ret <8 x i32> %r
725}
726
727define <4 x i64> @sub_op0_constant_v4i64(ptr %p) nounwind {
728; SSE-LABEL: sub_op0_constant_v4i64:
729; SSE:       # %bb.0:
730; SSE-NEXT:    movl $42, %eax
731; SSE-NEXT:    subq (%rdi), %rax
732; SSE-NEXT:    movq %rax, %xmm0
733; SSE-NEXT:    retq
734;
735; AVX-LABEL: sub_op0_constant_v4i64:
736; AVX:       # %bb.0:
737; AVX-NEXT:    movl $42, %eax
738; AVX-NEXT:    subq (%rdi), %rax
739; AVX-NEXT:    vmovq %rax, %xmm0
740; AVX-NEXT:    retq
741  %x = load i64, ptr %p
742  %b = sub i64 42, %x
743  %r = insertelement <4 x i64> undef, i64 %b, i32 0
744  ret <4 x i64> %r
745}
746
747define <8 x i32> @mul_op1_constant_v8i32(ptr %p) nounwind {
748; SSE-LABEL: mul_op1_constant_v8i32:
749; SSE:       # %bb.0:
750; SSE-NEXT:    imull $42, (%rdi), %eax
751; SSE-NEXT:    movd %eax, %xmm0
752; SSE-NEXT:    retq
753;
754; AVX-LABEL: mul_op1_constant_v8i32:
755; AVX:       # %bb.0:
756; AVX-NEXT:    imull $42, (%rdi), %eax
757; AVX-NEXT:    vmovd %eax, %xmm0
758; AVX-NEXT:    retq
759  %x = load i32, ptr %p
760  %b = mul i32 %x, 42
761  %r = insertelement <8 x i32> undef, i32 %b, i32 0
762  ret <8 x i32> %r
763}
764
765define <4 x i64> @or_op1_constant_v4i64(ptr %p) nounwind {
766; SSE-LABEL: or_op1_constant_v4i64:
767; SSE:       # %bb.0:
768; SSE-NEXT:    movq (%rdi), %rax
769; SSE-NEXT:    orq $42, %rax
770; SSE-NEXT:    movq %rax, %xmm0
771; SSE-NEXT:    retq
772;
773; AVX-LABEL: or_op1_constant_v4i64:
774; AVX:       # %bb.0:
775; AVX-NEXT:    movq (%rdi), %rax
776; AVX-NEXT:    orq $42, %rax
777; AVX-NEXT:    vmovq %rax, %xmm0
778; AVX-NEXT:    retq
779  %x = load i64, ptr %p
780  %b = or i64 %x, 42
781  %r = insertelement <4 x i64> undef, i64 %b, i32 0
782  ret <4 x i64> %r
783}
784
785; Try again with 512-bit types.
786
787define <16 x i32> @add_op1_constant_v16i32(ptr %p) nounwind {
788; SSE-LABEL: add_op1_constant_v16i32:
789; SSE:       # %bb.0:
790; SSE-NEXT:    movl (%rdi), %eax
791; SSE-NEXT:    addl $42, %eax
792; SSE-NEXT:    movd %eax, %xmm0
793; SSE-NEXT:    retq
794;
795; AVX-LABEL: add_op1_constant_v16i32:
796; AVX:       # %bb.0:
797; AVX-NEXT:    movl (%rdi), %eax
798; AVX-NEXT:    addl $42, %eax
799; AVX-NEXT:    vmovd %eax, %xmm0
800; AVX-NEXT:    retq
801  %x = load i32, ptr %p
802  %b = add i32 %x, 42
803  %r = insertelement <16 x i32> undef, i32 %b, i32 0
804  ret <16 x i32> %r
805}
806
807define <8 x i64> @sub_op0_constant_v8i64(ptr %p) nounwind {
808; SSE-LABEL: sub_op0_constant_v8i64:
809; SSE:       # %bb.0:
810; SSE-NEXT:    movl $42, %eax
811; SSE-NEXT:    subq (%rdi), %rax
812; SSE-NEXT:    movq %rax, %xmm0
813; SSE-NEXT:    retq
814;
815; AVX-LABEL: sub_op0_constant_v8i64:
816; AVX:       # %bb.0:
817; AVX-NEXT:    movl $42, %eax
818; AVX-NEXT:    subq (%rdi), %rax
819; AVX-NEXT:    vmovq %rax, %xmm0
820; AVX-NEXT:    retq
821  %x = load i64, ptr %p
822  %b = sub i64 42, %x
823  %r = insertelement <8 x i64> undef, i64 %b, i32 0
824  ret <8 x i64> %r
825}
826
827define <16 x i32> @mul_op1_constant_v16i32(ptr %p) nounwind {
828; SSE-LABEL: mul_op1_constant_v16i32:
829; SSE:       # %bb.0:
830; SSE-NEXT:    imull $42, (%rdi), %eax
831; SSE-NEXT:    movd %eax, %xmm0
832; SSE-NEXT:    retq
833;
834; AVX-LABEL: mul_op1_constant_v16i32:
835; AVX:       # %bb.0:
836; AVX-NEXT:    imull $42, (%rdi), %eax
837; AVX-NEXT:    vmovd %eax, %xmm0
838; AVX-NEXT:    retq
839  %x = load i32, ptr %p
840  %b = mul i32 %x, 42
841  %r = insertelement <16 x i32> undef, i32 %b, i32 0
842  ret <16 x i32> %r
843}
844
845define <8 x i64> @or_op1_constant_v8i64(ptr %p) nounwind {
846; SSE-LABEL: or_op1_constant_v8i64:
847; SSE:       # %bb.0:
848; SSE-NEXT:    movq (%rdi), %rax
849; SSE-NEXT:    orq $42, %rax
850; SSE-NEXT:    movq %rax, %xmm0
851; SSE-NEXT:    retq
852;
853; AVX-LABEL: or_op1_constant_v8i64:
854; AVX:       # %bb.0:
855; AVX-NEXT:    movq (%rdi), %rax
856; AVX-NEXT:    orq $42, %rax
857; AVX-NEXT:    vmovq %rax, %xmm0
858; AVX-NEXT:    retq
859  %x = load i64, ptr %p
860  %b = or i64 %x, 42
861  %r = insertelement <8 x i64> undef, i64 %b, i32 0
862  ret <8 x i64> %r
863}
864
865