1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 3; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 4 5define void @test_udiv7_v2i32(ptr %x, ptr %y) nounwind { 6; X64-LABEL: test_udiv7_v2i32: 7; X64: # %bb.0: 8; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 9; X64-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757] 10; X64-NEXT: movdqa %xmm0, %xmm2 11; X64-NEXT: pmuludq %xmm1, %xmm2 12; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 13; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 14; X64-NEXT: pmuludq %xmm1, %xmm3 15; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] 16; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 17; X64-NEXT: psubd %xmm2, %xmm0 18; X64-NEXT: psrld $1, %xmm0 19; X64-NEXT: paddd %xmm2, %xmm0 20; X64-NEXT: psrld $2, %xmm0 21; X64-NEXT: movq %xmm0, (%rsi) 22; X64-NEXT: retq 23; 24; X86-LABEL: test_udiv7_v2i32: 25; X86: # %bb.0: 26; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 27; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 28; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 29; X86-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757] 30; X86-NEXT: movdqa %xmm0, %xmm2 31; X86-NEXT: pmuludq %xmm1, %xmm2 32; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 33; X86-NEXT: movdqa %xmm0, %xmm3 34; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,1,1] 35; X86-NEXT: pmuludq %xmm1, %xmm3 36; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] 37; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 38; X86-NEXT: psubd %xmm2, %xmm0 39; X86-NEXT: psrld $1, %xmm0 40; X86-NEXT: paddd %xmm2, %xmm0 41; X86-NEXT: psrld $2, %xmm0 42; X86-NEXT: movq %xmm0, (%eax) 43; X86-NEXT: retl 44 %a = load <2 x i32>, ptr %x 45 %b = udiv <2 x i32> %a, <i32 7, i32 7> 46 store <2 x i32> %b, ptr %y 47 ret void 48} 49 50define void @test_urem7_v2i32(ptr %x, ptr %y) nounwind { 51; X64-LABEL: test_urem7_v2i32: 52; X64: # %bb.0: 53; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 54; X64-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757] 55; X64-NEXT: movdqa %xmm0, %xmm2 56; X64-NEXT: pmuludq %xmm1, %xmm2 57; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 58; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 59; X64-NEXT: pmuludq %xmm1, %xmm3 60; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] 61; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 62; X64-NEXT: movdqa %xmm0, %xmm1 63; X64-NEXT: psubd %xmm2, %xmm1 64; X64-NEXT: psrld $1, %xmm1 65; X64-NEXT: paddd %xmm2, %xmm1 66; X64-NEXT: psrld $2, %xmm1 67; X64-NEXT: movdqa %xmm1, %xmm2 68; X64-NEXT: pslld $3, %xmm2 69; X64-NEXT: psubd %xmm2, %xmm1 70; X64-NEXT: paddd %xmm0, %xmm1 71; X64-NEXT: movq %xmm1, (%rsi) 72; X64-NEXT: retq 73; 74; X86-LABEL: test_urem7_v2i32: 75; X86: # %bb.0: 76; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 77; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 78; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 79; X86-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757] 80; X86-NEXT: movdqa %xmm0, %xmm2 81; X86-NEXT: pmuludq %xmm1, %xmm2 82; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 83; X86-NEXT: movdqa %xmm0, %xmm3 84; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,1,1] 85; X86-NEXT: pmuludq %xmm1, %xmm3 86; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] 87; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 88; X86-NEXT: movdqa %xmm0, %xmm1 89; X86-NEXT: psubd %xmm2, %xmm1 90; X86-NEXT: psrld $1, %xmm1 91; X86-NEXT: paddd %xmm2, %xmm1 92; X86-NEXT: psrld $2, %xmm1 93; X86-NEXT: movdqa %xmm1, %xmm2 94; X86-NEXT: pslld $3, %xmm2 95; X86-NEXT: psubd %xmm2, %xmm1 96; X86-NEXT: paddd %xmm0, %xmm1 97; X86-NEXT: movq %xmm1, (%eax) 98; X86-NEXT: retl 99 %a = load <2 x i32>, ptr %x 100 %b = urem <2 x i32> %a, <i32 7, i32 7> 101 store <2 x i32> %b, ptr %y 102 ret void 103} 104 105define void @test_sdiv7_v2i32(ptr %x, ptr %y) nounwind { 106; X64-LABEL: test_sdiv7_v2i32: 107; X64: # %bb.0: 108; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 109; X64-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027] 110; X64-NEXT: movdqa %xmm0, %xmm2 111; X64-NEXT: pmuludq %xmm1, %xmm2 112; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 113; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 114; X64-NEXT: pmuludq %xmm1, %xmm3 115; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 116; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 117; X64-NEXT: pxor %xmm3, %xmm3 118; X64-NEXT: pcmpgtd %xmm0, %xmm3 119; X64-NEXT: pand %xmm1, %xmm3 120; X64-NEXT: paddd %xmm0, %xmm3 121; X64-NEXT: psubd %xmm3, %xmm2 122; X64-NEXT: paddd %xmm0, %xmm2 123; X64-NEXT: movdqa %xmm2, %xmm0 124; X64-NEXT: psrld $31, %xmm0 125; X64-NEXT: psrad $2, %xmm2 126; X64-NEXT: paddd %xmm0, %xmm2 127; X64-NEXT: movq %xmm2, (%rsi) 128; X64-NEXT: retq 129; 130; X86-LABEL: test_sdiv7_v2i32: 131; X86: # %bb.0: 132; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 133; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 134; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 135; X86-NEXT: movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027] 136; X86-NEXT: movdqa %xmm1, %xmm0 137; X86-NEXT: pmuludq %xmm2, %xmm0 138; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 139; X86-NEXT: movdqa %xmm1, %xmm3 140; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,1,1] 141; X86-NEXT: pmuludq %xmm2, %xmm3 142; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 143; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 144; X86-NEXT: pxor %xmm3, %xmm3 145; X86-NEXT: pcmpgtd %xmm1, %xmm3 146; X86-NEXT: pand %xmm2, %xmm3 147; X86-NEXT: paddd %xmm1, %xmm3 148; X86-NEXT: psubd %xmm3, %xmm0 149; X86-NEXT: paddd %xmm1, %xmm0 150; X86-NEXT: movdqa %xmm0, %xmm1 151; X86-NEXT: psrld $31, %xmm1 152; X86-NEXT: psrad $2, %xmm0 153; X86-NEXT: paddd %xmm1, %xmm0 154; X86-NEXT: movq %xmm0, (%eax) 155; X86-NEXT: retl 156 %a = load <2 x i32>, ptr %x 157 %b = sdiv <2 x i32> %a, <i32 7, i32 7> 158 store <2 x i32> %b, ptr %y 159 ret void 160} 161 162define void @test_srem7_v2i32(ptr %x, ptr %y) nounwind { 163; X64-LABEL: test_srem7_v2i32: 164; X64: # %bb.0: 165; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 166; X64-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027] 167; X64-NEXT: movdqa %xmm0, %xmm2 168; X64-NEXT: pmuludq %xmm1, %xmm2 169; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 170; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 171; X64-NEXT: pmuludq %xmm1, %xmm3 172; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 173; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 174; X64-NEXT: pxor %xmm3, %xmm3 175; X64-NEXT: pcmpgtd %xmm0, %xmm3 176; X64-NEXT: pand %xmm1, %xmm3 177; X64-NEXT: paddd %xmm0, %xmm3 178; X64-NEXT: psubd %xmm3, %xmm2 179; X64-NEXT: paddd %xmm0, %xmm2 180; X64-NEXT: movdqa %xmm2, %xmm1 181; X64-NEXT: psrld $31, %xmm1 182; X64-NEXT: psrad $2, %xmm2 183; X64-NEXT: paddd %xmm1, %xmm2 184; X64-NEXT: movdqa %xmm2, %xmm1 185; X64-NEXT: pslld $3, %xmm1 186; X64-NEXT: psubd %xmm1, %xmm2 187; X64-NEXT: paddd %xmm0, %xmm2 188; X64-NEXT: movq %xmm2, (%rsi) 189; X64-NEXT: retq 190; 191; X86-LABEL: test_srem7_v2i32: 192; X86: # %bb.0: 193; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 194; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 195; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 196; X86-NEXT: movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027] 197; X86-NEXT: movdqa %xmm0, %xmm1 198; X86-NEXT: pmuludq %xmm2, %xmm1 199; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 200; X86-NEXT: movdqa %xmm0, %xmm3 201; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,1,1] 202; X86-NEXT: pmuludq %xmm2, %xmm3 203; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 204; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 205; X86-NEXT: pxor %xmm3, %xmm3 206; X86-NEXT: pcmpgtd %xmm0, %xmm3 207; X86-NEXT: pand %xmm2, %xmm3 208; X86-NEXT: paddd %xmm0, %xmm3 209; X86-NEXT: psubd %xmm3, %xmm1 210; X86-NEXT: paddd %xmm0, %xmm1 211; X86-NEXT: movdqa %xmm1, %xmm2 212; X86-NEXT: psrld $31, %xmm2 213; X86-NEXT: psrad $2, %xmm1 214; X86-NEXT: paddd %xmm2, %xmm1 215; X86-NEXT: movdqa %xmm1, %xmm2 216; X86-NEXT: pslld $3, %xmm2 217; X86-NEXT: psubd %xmm2, %xmm1 218; X86-NEXT: paddd %xmm0, %xmm1 219; X86-NEXT: movq %xmm1, (%eax) 220; X86-NEXT: retl 221 %a = load <2 x i32>, ptr %x 222 %b = srem <2 x i32> %a, <i32 7, i32 7> 223 store <2 x i32> %b, ptr %y 224 ret void 225} 226 227define void @test_udiv_pow2_v2i32(ptr %x, ptr %y) nounwind { 228; X64-LABEL: test_udiv_pow2_v2i32: 229; X64: # %bb.0: 230; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 231; X64-NEXT: psrld $3, %xmm0 232; X64-NEXT: movq %xmm0, (%rsi) 233; X64-NEXT: retq 234; 235; X86-LABEL: test_udiv_pow2_v2i32: 236; X86: # %bb.0: 237; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 238; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 239; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 240; X86-NEXT: psrld $3, %xmm0 241; X86-NEXT: movq %xmm0, (%eax) 242; X86-NEXT: retl 243 %a = load <2 x i32>, ptr %x 244 %b = udiv <2 x i32> %a, <i32 8, i32 8> 245 store <2 x i32> %b, ptr %y 246 ret void 247} 248 249define void @test_urem_pow2_v2i32(ptr %x, ptr %y) nounwind { 250; X64-LABEL: test_urem_pow2_v2i32: 251; X64: # %bb.0: 252; X64-NEXT: movabsq $30064771079, %rax # imm = 0x700000007 253; X64-NEXT: andq (%rdi), %rax 254; X64-NEXT: movq %rax, (%rsi) 255; X64-NEXT: retq 256; 257; X86-LABEL: test_urem_pow2_v2i32: 258; X86: # %bb.0: 259; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 260; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 261; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 262; X86-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 263; X86-NEXT: movlps %xmm0, (%eax) 264; X86-NEXT: retl 265 %a = load <2 x i32>, ptr %x 266 %b = urem <2 x i32> %a, <i32 8, i32 8> 267 store <2 x i32> %b, ptr %y 268 ret void 269} 270 271define void @test_sdiv_pow2_v2i32(ptr %x, ptr %y) nounwind { 272; X64-LABEL: test_sdiv_pow2_v2i32: 273; X64: # %bb.0: 274; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 275; X64-NEXT: movdqa %xmm0, %xmm1 276; X64-NEXT: psrad $31, %xmm1 277; X64-NEXT: psrld $29, %xmm1 278; X64-NEXT: paddd %xmm0, %xmm1 279; X64-NEXT: psrad $3, %xmm1 280; X64-NEXT: movq %xmm1, (%rsi) 281; X64-NEXT: retq 282; 283; X86-LABEL: test_sdiv_pow2_v2i32: 284; X86: # %bb.0: 285; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 286; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 287; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 288; X86-NEXT: movdqa %xmm0, %xmm1 289; X86-NEXT: psrad $31, %xmm1 290; X86-NEXT: psrld $29, %xmm1 291; X86-NEXT: paddd %xmm0, %xmm1 292; X86-NEXT: psrad $3, %xmm1 293; X86-NEXT: movq %xmm1, (%eax) 294; X86-NEXT: retl 295 %a = load <2 x i32>, ptr %x 296 %b = sdiv <2 x i32> %a, <i32 8, i32 8> 297 store <2 x i32> %b, ptr %y 298 ret void 299} 300 301define void @test_srem_pow2_v2i32(ptr %x, ptr %y) nounwind { 302; X64-LABEL: test_srem_pow2_v2i32: 303; X64: # %bb.0: 304; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 305; X64-NEXT: psrld $3, %xmm0 306; X64-NEXT: movq %xmm0, (%rsi) 307; X64-NEXT: retq 308; 309; X86-LABEL: test_srem_pow2_v2i32: 310; X86: # %bb.0: 311; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 312; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 313; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 314; X86-NEXT: psrld $3, %xmm0 315; X86-NEXT: movq %xmm0, (%eax) 316; X86-NEXT: retl 317 %a = load <2 x i32>, ptr %x 318 %b = udiv <2 x i32> %a, <i32 8, i32 8> 319 store <2 x i32> %b, ptr %y 320 ret void 321} 322 323define void @test_udiv_v2i32(ptr %x, ptr %y, ptr %z) nounwind { 324; X64-LABEL: test_udiv_v2i32: 325; X64: # %bb.0: 326; X64-NEXT: movq %rdx, %rcx 327; X64-NEXT: movq (%rdi), %rax 328; X64-NEXT: movq %rax, %xmm0 329; X64-NEXT: movq (%rsi), %rsi 330; X64-NEXT: movq %rsi, %xmm1 331; X64-NEXT: # kill: def $eax killed $eax killed $rax 332; X64-NEXT: xorl %edx, %edx 333; X64-NEXT: divl %esi 334; X64-NEXT: movd %eax, %xmm2 335; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 336; X64-NEXT: movd %xmm0, %eax 337; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 338; X64-NEXT: movd %xmm0, %esi 339; X64-NEXT: xorl %edx, %edx 340; X64-NEXT: divl %esi 341; X64-NEXT: movd %eax, %xmm0 342; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 343; X64-NEXT: movq %xmm2, (%rcx) 344; X64-NEXT: retq 345; 346; X86-LABEL: test_udiv_v2i32: 347; X86: # %bb.0: 348; X86-NEXT: pushl %esi 349; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 350; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 351; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 352; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 353; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 354; X86-NEXT: movd %xmm0, %eax 355; X86-NEXT: movd %xmm1, %esi 356; X86-NEXT: xorl %edx, %edx 357; X86-NEXT: divl %esi 358; X86-NEXT: movd %eax, %xmm2 359; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 360; X86-NEXT: movd %xmm0, %eax 361; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 362; X86-NEXT: movd %xmm1, %esi 363; X86-NEXT: xorl %edx, %edx 364; X86-NEXT: divl %esi 365; X86-NEXT: movd %eax, %xmm0 366; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 367; X86-NEXT: movq %xmm2, (%ecx) 368; X86-NEXT: popl %esi 369; X86-NEXT: retl 370 %a = load <2 x i32>, ptr %x 371 %b = load <2 x i32>, ptr %y 372 %c = udiv <2 x i32> %a, %b 373 store <2 x i32> %c, ptr %z 374 ret void 375} 376 377define void @test_urem_v2i32(ptr %x, ptr %y, ptr %z) nounwind { 378; X64-LABEL: test_urem_v2i32: 379; X64: # %bb.0: 380; X64-NEXT: movq %rdx, %rcx 381; X64-NEXT: movq (%rdi), %rax 382; X64-NEXT: movq %rax, %xmm0 383; X64-NEXT: movq (%rsi), %rsi 384; X64-NEXT: movq %rsi, %xmm1 385; X64-NEXT: # kill: def $eax killed $eax killed $rax 386; X64-NEXT: xorl %edx, %edx 387; X64-NEXT: divl %esi 388; X64-NEXT: movd %edx, %xmm2 389; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 390; X64-NEXT: movd %xmm0, %eax 391; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 392; X64-NEXT: movd %xmm0, %esi 393; X64-NEXT: xorl %edx, %edx 394; X64-NEXT: divl %esi 395; X64-NEXT: movd %edx, %xmm0 396; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 397; X64-NEXT: movq %xmm2, (%rcx) 398; X64-NEXT: retq 399; 400; X86-LABEL: test_urem_v2i32: 401; X86: # %bb.0: 402; X86-NEXT: pushl %esi 403; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 404; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 405; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 406; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 407; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 408; X86-NEXT: movd %xmm0, %eax 409; X86-NEXT: movd %xmm1, %esi 410; X86-NEXT: xorl %edx, %edx 411; X86-NEXT: divl %esi 412; X86-NEXT: movd %edx, %xmm2 413; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 414; X86-NEXT: movd %xmm0, %eax 415; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 416; X86-NEXT: movd %xmm1, %esi 417; X86-NEXT: xorl %edx, %edx 418; X86-NEXT: divl %esi 419; X86-NEXT: movd %edx, %xmm0 420; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 421; X86-NEXT: movq %xmm2, (%ecx) 422; X86-NEXT: popl %esi 423; X86-NEXT: retl 424 %a = load <2 x i32>, ptr %x 425 %b = load <2 x i32>, ptr %y 426 %c = urem <2 x i32> %a, %b 427 store <2 x i32> %c, ptr %z 428 ret void 429} 430 431define void @test_sdiv_v2i32(ptr %x, ptr %y, ptr %z) nounwind { 432; X64-LABEL: test_sdiv_v2i32: 433; X64: # %bb.0: 434; X64-NEXT: movq %rdx, %rcx 435; X64-NEXT: movq (%rdi), %rax 436; X64-NEXT: movq %rax, %xmm0 437; X64-NEXT: movq (%rsi), %rsi 438; X64-NEXT: movq %rsi, %xmm1 439; X64-NEXT: # kill: def $eax killed $eax killed $rax 440; X64-NEXT: cltd 441; X64-NEXT: idivl %esi 442; X64-NEXT: movd %eax, %xmm2 443; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 444; X64-NEXT: movd %xmm0, %eax 445; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 446; X64-NEXT: movd %xmm0, %esi 447; X64-NEXT: cltd 448; X64-NEXT: idivl %esi 449; X64-NEXT: movd %eax, %xmm0 450; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 451; X64-NEXT: movq %xmm2, (%rcx) 452; X64-NEXT: retq 453; 454; X86-LABEL: test_sdiv_v2i32: 455; X86: # %bb.0: 456; X86-NEXT: pushl %esi 457; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 458; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 459; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 460; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 461; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 462; X86-NEXT: movd %xmm0, %eax 463; X86-NEXT: movd %xmm1, %esi 464; X86-NEXT: cltd 465; X86-NEXT: idivl %esi 466; X86-NEXT: movd %eax, %xmm2 467; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 468; X86-NEXT: movd %xmm0, %eax 469; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 470; X86-NEXT: movd %xmm1, %esi 471; X86-NEXT: cltd 472; X86-NEXT: idivl %esi 473; X86-NEXT: movd %eax, %xmm0 474; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 475; X86-NEXT: movq %xmm2, (%ecx) 476; X86-NEXT: popl %esi 477; X86-NEXT: retl 478 %a = load <2 x i32>, ptr %x 479 %b = load <2 x i32>, ptr %y 480 %c = sdiv <2 x i32> %a, %b 481 store <2 x i32> %c, ptr %z 482 ret void 483} 484 485define void @test_srem_v2i32(ptr %x, ptr %y, ptr %z) nounwind { 486; X64-LABEL: test_srem_v2i32: 487; X64: # %bb.0: 488; X64-NEXT: movq %rdx, %rcx 489; X64-NEXT: movq (%rdi), %rax 490; X64-NEXT: movq %rax, %xmm0 491; X64-NEXT: movq (%rsi), %rsi 492; X64-NEXT: movq %rsi, %xmm1 493; X64-NEXT: # kill: def $eax killed $eax killed $rax 494; X64-NEXT: cltd 495; X64-NEXT: idivl %esi 496; X64-NEXT: movd %eax, %xmm2 497; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 498; X64-NEXT: movd %xmm0, %eax 499; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 500; X64-NEXT: movd %xmm0, %esi 501; X64-NEXT: cltd 502; X64-NEXT: idivl %esi 503; X64-NEXT: movd %eax, %xmm0 504; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 505; X64-NEXT: movq %xmm2, (%rcx) 506; X64-NEXT: retq 507; 508; X86-LABEL: test_srem_v2i32: 509; X86: # %bb.0: 510; X86-NEXT: pushl %esi 511; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 512; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 513; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 514; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 515; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 516; X86-NEXT: movd %xmm0, %eax 517; X86-NEXT: movd %xmm1, %esi 518; X86-NEXT: cltd 519; X86-NEXT: idivl %esi 520; X86-NEXT: movd %eax, %xmm2 521; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 522; X86-NEXT: movd %xmm0, %eax 523; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 524; X86-NEXT: movd %xmm1, %esi 525; X86-NEXT: cltd 526; X86-NEXT: idivl %esi 527; X86-NEXT: movd %eax, %xmm0 528; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 529; X86-NEXT: movq %xmm2, (%ecx) 530; X86-NEXT: popl %esi 531; X86-NEXT: retl 532 %a = load <2 x i32>, ptr %x 533 %b = load <2 x i32>, ptr %y 534 %c = sdiv <2 x i32> %a, %b 535 store <2 x i32> %c, ptr %z 536 ret void 537} 538