1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mcpu=pentium4 -O0 | FileCheck %s 3 4target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128" 5target triple = "i386-unknown-linux-unknown" 6 7define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind { 8; CHECK-LABEL: doTheTestMod: 9; CHECK: # %bb.0: # %Entry 10; CHECK-NEXT: subl $140, %esp 11; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 12; CHECK-NEXT: movaps %xmm0, %xmm6 13; CHECK-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 14; CHECK-NEXT: movaps %xmm0, %xmm1 15; CHECK-NEXT: movaps %xmm0, %xmm3 16; CHECK-NEXT: psrlq $48, %xmm3 17; CHECK-NEXT: movaps %xmm0, %xmm2 18; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,1,1] 19; CHECK-NEXT: psrld $16, %xmm0 20; CHECK-NEXT: movaps %xmm6, %xmm7 21; CHECK-NEXT: movaps %xmm6, %xmm4 22; CHECK-NEXT: psrlq $48, %xmm4 23; CHECK-NEXT: movaps %xmm6, %xmm5 24; CHECK-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1,1,1] 25; CHECK-NEXT: psrld $16, %xmm6 26; CHECK-NEXT: pextrw $0, %xmm7, %eax 27; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 28; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) 29; CHECK-NEXT: pextrw $0, %xmm6, %eax 30; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 31; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) 32; CHECK-NEXT: pextrw $0, %xmm5, %eax 33; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 34; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) 35; CHECK-NEXT: pextrw $0, %xmm4, %eax 36; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 37; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) 38; CHECK-NEXT: pextrw $0, %xmm3, %eax 39; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 40; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) 41; CHECK-NEXT: pextrw $0, %xmm2, %eax 42; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 43; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) 44; CHECK-NEXT: pextrw $0, %xmm0, %eax 45; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 46; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) 47; CHECK-NEXT: pextrw $0, %xmm1, %eax 48; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 49; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) 50; CHECK-NEXT: # implicit-def: $xmm0 51; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 52; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 53; CHECK-NEXT: # implicit-def: $xmm0 54; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 55; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 56; CHECK-NEXT: # implicit-def: $xmm0 57; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 58; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 59; CHECK-NEXT: # implicit-def: $xmm0 60; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 61; CHECK-NEXT: # implicit-def: $xmm1 62; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1 63; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 64; CHECK-NEXT: # implicit-def: $xmm1 65; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1 66; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 67; CHECK-NEXT: # implicit-def: $xmm1 68; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1 69; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 70; CHECK-NEXT: # implicit-def: $xmm1 71; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1 72; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 73; CHECK-NEXT: pextrw $0, %xmm0, %eax 74; CHECK-NEXT: movw %ax, %cx 75; CHECK-NEXT: movl %esp, %eax 76; CHECK-NEXT: movw %cx, (%eax) 77; CHECK-NEXT: calll __extendhfsf2 78; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload 79; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero 80; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 81; CHECK-NEXT: pextrw $0, %xmm0, %eax 82; CHECK-NEXT: movw %ax, %cx 83; CHECK-NEXT: movl %esp, %eax 84; CHECK-NEXT: movw %cx, (%eax) 85; CHECK-NEXT: calll __extendhfsf2 86; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 87; CHECK-NEXT: movl %esp, %eax 88; CHECK-NEXT: fxch %st(1) 89; CHECK-NEXT: fstps 4(%eax) 90; CHECK-NEXT: fstps (%eax) 91; CHECK-NEXT: calll fmodf 92; CHECK-NEXT: movl %esp, %eax 93; CHECK-NEXT: fstps (%eax) 94; CHECK-NEXT: calll __truncsfhf2 95; CHECK-NEXT: movaps %xmm0, %xmm1 96; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload 97; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero 98; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 99; CHECK-NEXT: pextrw $0, %xmm0, %eax 100; CHECK-NEXT: movw %ax, %cx 101; CHECK-NEXT: movl %esp, %eax 102; CHECK-NEXT: movw %cx, (%eax) 103; CHECK-NEXT: calll __extendhfsf2 104; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload 105; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero 106; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 107; CHECK-NEXT: pextrw $0, %xmm0, %eax 108; CHECK-NEXT: movw %ax, %cx 109; CHECK-NEXT: movl %esp, %eax 110; CHECK-NEXT: movw %cx, (%eax) 111; CHECK-NEXT: calll __extendhfsf2 112; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 113; CHECK-NEXT: movl %esp, %eax 114; CHECK-NEXT: fxch %st(1) 115; CHECK-NEXT: fstps 4(%eax) 116; CHECK-NEXT: fstps (%eax) 117; CHECK-NEXT: calll fmodf 118; CHECK-NEXT: movl %esp, %eax 119; CHECK-NEXT: fstps (%eax) 120; CHECK-NEXT: calll __truncsfhf2 121; CHECK-NEXT: movaps %xmm0, %xmm1 122; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload 123; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero 124; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 125; CHECK-NEXT: pextrw $0, %xmm0, %eax 126; CHECK-NEXT: movw %ax, %cx 127; CHECK-NEXT: movl %esp, %eax 128; CHECK-NEXT: movw %cx, (%eax) 129; CHECK-NEXT: calll __extendhfsf2 130; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload 131; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero 132; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 133; CHECK-NEXT: pextrw $0, %xmm0, %eax 134; CHECK-NEXT: movw %ax, %cx 135; CHECK-NEXT: movl %esp, %eax 136; CHECK-NEXT: movw %cx, (%eax) 137; CHECK-NEXT: calll __extendhfsf2 138; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 139; CHECK-NEXT: movl %esp, %eax 140; CHECK-NEXT: fxch %st(1) 141; CHECK-NEXT: fstps 4(%eax) 142; CHECK-NEXT: fstps (%eax) 143; CHECK-NEXT: calll fmodf 144; CHECK-NEXT: movl %esp, %eax 145; CHECK-NEXT: fstps (%eax) 146; CHECK-NEXT: calll __truncsfhf2 147; CHECK-NEXT: movaps %xmm0, %xmm1 148; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload 149; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero 150; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 151; CHECK-NEXT: pextrw $0, %xmm0, %eax 152; CHECK-NEXT: movw %ax, %cx 153; CHECK-NEXT: movl %esp, %eax 154; CHECK-NEXT: movw %cx, (%eax) 155; CHECK-NEXT: calll __extendhfsf2 156; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload 157; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero 158; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 159; CHECK-NEXT: pextrw $0, %xmm0, %eax 160; CHECK-NEXT: movw %ax, %cx 161; CHECK-NEXT: movl %esp, %eax 162; CHECK-NEXT: movw %cx, (%eax) 163; CHECK-NEXT: calll __extendhfsf2 164; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 165; CHECK-NEXT: movl %esp, %eax 166; CHECK-NEXT: fxch %st(1) 167; CHECK-NEXT: fstps 4(%eax) 168; CHECK-NEXT: fstps (%eax) 169; CHECK-NEXT: calll fmodf 170; CHECK-NEXT: movl %esp, %eax 171; CHECK-NEXT: fstps (%eax) 172; CHECK-NEXT: calll __truncsfhf2 173; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 4-byte Reload 174; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 175; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 4-byte Reload 176; CHECK-NEXT: # xmm2 = mem[0],zero,zero,zero 177; CHECK-NEXT: movaps %xmm0, %xmm3 178; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload 179; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero 180; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 181; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 182; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 183; CHECK-NEXT: addl $140, %esp 184; CHECK-NEXT: retl 185Entry: 186 %x = alloca <4 x half>, align 8 187 %y = alloca <4 x half>, align 8 188 store <4 x half> %0, ptr %x, align 8 189 store <4 x half> %1, ptr %y, align 8 190 %2 = load <4 x half>, ptr %x, align 8 191 %3 = load <4 x half>, ptr %y, align 8 192 %4 = frem <4 x half> %2, %3 193 ret <4 x half> %4 194} 195 196