xref: /llvm-project/llvm/test/CodeGen/X86/pr47000.ll (revision a74c5707be279bdddb51fe49aa9383c1ddc99fbe)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mcpu=pentium4 -O0 | FileCheck %s
3
4target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128"
5target triple = "i386-unknown-linux-unknown"
6
7define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind {
8; CHECK-LABEL: doTheTestMod:
9; CHECK:       # %bb.0: # %Entry
10; CHECK-NEXT:    subl $140, %esp
11; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
12; CHECK-NEXT:    movaps %xmm0, %xmm6
13; CHECK-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
14; CHECK-NEXT:    movaps %xmm0, %xmm1
15; CHECK-NEXT:    movaps %xmm0, %xmm3
16; CHECK-NEXT:    psrlq $48, %xmm3
17; CHECK-NEXT:    movaps %xmm0, %xmm2
18; CHECK-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1,1,1]
19; CHECK-NEXT:    psrld $16, %xmm0
20; CHECK-NEXT:    movaps %xmm6, %xmm7
21; CHECK-NEXT:    movaps %xmm6, %xmm4
22; CHECK-NEXT:    psrlq $48, %xmm4
23; CHECK-NEXT:    movaps %xmm6, %xmm5
24; CHECK-NEXT:    shufps {{.*#+}} xmm5 = xmm5[1,1,1,1]
25; CHECK-NEXT:    psrld $16, %xmm6
26; CHECK-NEXT:    pextrw $0, %xmm7, %eax
27; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
28; CHECK-NEXT:    movw %ax, {{[0-9]+}}(%esp)
29; CHECK-NEXT:    pextrw $0, %xmm6, %eax
30; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
31; CHECK-NEXT:    movw %ax, {{[0-9]+}}(%esp)
32; CHECK-NEXT:    pextrw $0, %xmm5, %eax
33; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
34; CHECK-NEXT:    movw %ax, {{[0-9]+}}(%esp)
35; CHECK-NEXT:    pextrw $0, %xmm4, %eax
36; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
37; CHECK-NEXT:    movw %ax, {{[0-9]+}}(%esp)
38; CHECK-NEXT:    pextrw $0, %xmm3, %eax
39; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
40; CHECK-NEXT:    movw %ax, {{[0-9]+}}(%esp)
41; CHECK-NEXT:    pextrw $0, %xmm2, %eax
42; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
43; CHECK-NEXT:    movw %ax, {{[0-9]+}}(%esp)
44; CHECK-NEXT:    pextrw $0, %xmm0, %eax
45; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
46; CHECK-NEXT:    movw %ax, {{[0-9]+}}(%esp)
47; CHECK-NEXT:    pextrw $0, %xmm1, %eax
48; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
49; CHECK-NEXT:    movw %ax, {{[0-9]+}}(%esp)
50; CHECK-NEXT:    # implicit-def: $xmm0
51; CHECK-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
52; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
53; CHECK-NEXT:    # implicit-def: $xmm0
54; CHECK-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
55; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
56; CHECK-NEXT:    # implicit-def: $xmm0
57; CHECK-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
58; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
59; CHECK-NEXT:    # implicit-def: $xmm0
60; CHECK-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
61; CHECK-NEXT:    # implicit-def: $xmm1
62; CHECK-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm1
63; CHECK-NEXT:    movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
64; CHECK-NEXT:    # implicit-def: $xmm1
65; CHECK-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm1
66; CHECK-NEXT:    movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
67; CHECK-NEXT:    # implicit-def: $xmm1
68; CHECK-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm1
69; CHECK-NEXT:    movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
70; CHECK-NEXT:    # implicit-def: $xmm1
71; CHECK-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm1
72; CHECK-NEXT:    movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
73; CHECK-NEXT:    pextrw $0, %xmm0, %eax
74; CHECK-NEXT:    movw %ax, %cx
75; CHECK-NEXT:    movl %esp, %eax
76; CHECK-NEXT:    movw %cx, (%eax)
77; CHECK-NEXT:    calll __extendhfsf2
78; CHECK-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
79; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
80; CHECK-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
81; CHECK-NEXT:    pextrw $0, %xmm0, %eax
82; CHECK-NEXT:    movw %ax, %cx
83; CHECK-NEXT:    movl %esp, %eax
84; CHECK-NEXT:    movw %cx, (%eax)
85; CHECK-NEXT:    calll __extendhfsf2
86; CHECK-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
87; CHECK-NEXT:    movl %esp, %eax
88; CHECK-NEXT:    fxch %st(1)
89; CHECK-NEXT:    fstps 4(%eax)
90; CHECK-NEXT:    fstps (%eax)
91; CHECK-NEXT:    calll fmodf
92; CHECK-NEXT:    movl %esp, %eax
93; CHECK-NEXT:    fstps (%eax)
94; CHECK-NEXT:    calll __truncsfhf2
95; CHECK-NEXT:    movaps %xmm0, %xmm1
96; CHECK-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
97; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
98; CHECK-NEXT:    movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
99; CHECK-NEXT:    pextrw $0, %xmm0, %eax
100; CHECK-NEXT:    movw %ax, %cx
101; CHECK-NEXT:    movl %esp, %eax
102; CHECK-NEXT:    movw %cx, (%eax)
103; CHECK-NEXT:    calll __extendhfsf2
104; CHECK-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
105; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
106; CHECK-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
107; CHECK-NEXT:    pextrw $0, %xmm0, %eax
108; CHECK-NEXT:    movw %ax, %cx
109; CHECK-NEXT:    movl %esp, %eax
110; CHECK-NEXT:    movw %cx, (%eax)
111; CHECK-NEXT:    calll __extendhfsf2
112; CHECK-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
113; CHECK-NEXT:    movl %esp, %eax
114; CHECK-NEXT:    fxch %st(1)
115; CHECK-NEXT:    fstps 4(%eax)
116; CHECK-NEXT:    fstps (%eax)
117; CHECK-NEXT:    calll fmodf
118; CHECK-NEXT:    movl %esp, %eax
119; CHECK-NEXT:    fstps (%eax)
120; CHECK-NEXT:    calll __truncsfhf2
121; CHECK-NEXT:    movaps %xmm0, %xmm1
122; CHECK-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
123; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
124; CHECK-NEXT:    movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
125; CHECK-NEXT:    pextrw $0, %xmm0, %eax
126; CHECK-NEXT:    movw %ax, %cx
127; CHECK-NEXT:    movl %esp, %eax
128; CHECK-NEXT:    movw %cx, (%eax)
129; CHECK-NEXT:    calll __extendhfsf2
130; CHECK-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
131; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
132; CHECK-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
133; CHECK-NEXT:    pextrw $0, %xmm0, %eax
134; CHECK-NEXT:    movw %ax, %cx
135; CHECK-NEXT:    movl %esp, %eax
136; CHECK-NEXT:    movw %cx, (%eax)
137; CHECK-NEXT:    calll __extendhfsf2
138; CHECK-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
139; CHECK-NEXT:    movl %esp, %eax
140; CHECK-NEXT:    fxch %st(1)
141; CHECK-NEXT:    fstps 4(%eax)
142; CHECK-NEXT:    fstps (%eax)
143; CHECK-NEXT:    calll fmodf
144; CHECK-NEXT:    movl %esp, %eax
145; CHECK-NEXT:    fstps (%eax)
146; CHECK-NEXT:    calll __truncsfhf2
147; CHECK-NEXT:    movaps %xmm0, %xmm1
148; CHECK-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
149; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
150; CHECK-NEXT:    movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
151; CHECK-NEXT:    pextrw $0, %xmm0, %eax
152; CHECK-NEXT:    movw %ax, %cx
153; CHECK-NEXT:    movl %esp, %eax
154; CHECK-NEXT:    movw %cx, (%eax)
155; CHECK-NEXT:    calll __extendhfsf2
156; CHECK-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
157; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
158; CHECK-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
159; CHECK-NEXT:    pextrw $0, %xmm0, %eax
160; CHECK-NEXT:    movw %ax, %cx
161; CHECK-NEXT:    movl %esp, %eax
162; CHECK-NEXT:    movw %cx, (%eax)
163; CHECK-NEXT:    calll __extendhfsf2
164; CHECK-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
165; CHECK-NEXT:    movl %esp, %eax
166; CHECK-NEXT:    fxch %st(1)
167; CHECK-NEXT:    fstps 4(%eax)
168; CHECK-NEXT:    fstps (%eax)
169; CHECK-NEXT:    calll fmodf
170; CHECK-NEXT:    movl %esp, %eax
171; CHECK-NEXT:    fstps (%eax)
172; CHECK-NEXT:    calll __truncsfhf2
173; CHECK-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 4-byte Reload
174; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
175; CHECK-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 4-byte Reload
176; CHECK-NEXT:    # xmm2 = mem[0],zero,zero,zero
177; CHECK-NEXT:    movaps %xmm0, %xmm3
178; CHECK-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
179; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
180; CHECK-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
181; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
182; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
183; CHECK-NEXT:    addl $140, %esp
184; CHECK-NEXT:    retl
185Entry:
186  %x = alloca <4 x half>, align 8
187  %y = alloca <4 x half>, align 8
188  store <4 x half> %0, ptr %x, align 8
189  store <4 x half> %1, ptr %y, align 8
190  %2 = load <4 x half>, ptr %x, align 8
191  %3 = load <4 x half>, ptr %y, align 8
192  %4 = frem <4 x half> %2, %3
193  ret <4 x half> %4
194}
195
196