1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 5 6define <8 x i16> @sdiv_vec8x16(<8 x i16> %var) { 7; SSE-LABEL: sdiv_vec8x16: 8; SSE: # %bb.0: # %entry 9; SSE-NEXT: movdqa %xmm0, %xmm1 10; SSE-NEXT: psraw $15, %xmm1 11; SSE-NEXT: psrlw $11, %xmm1 12; SSE-NEXT: paddw %xmm1, %xmm0 13; SSE-NEXT: psraw $5, %xmm0 14; SSE-NEXT: retq 15; 16; AVX-LABEL: sdiv_vec8x16: 17; AVX: # %bb.0: # %entry 18; AVX-NEXT: vpsraw $15, %xmm0, %xmm1 19; AVX-NEXT: vpsrlw $11, %xmm1, %xmm1 20; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 21; AVX-NEXT: vpsraw $5, %xmm0, %xmm0 22; AVX-NEXT: retq 23entry: 24 %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32> 25 ret <8 x i16> %0 26} 27 28define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize { 29; SSE-LABEL: sdiv_vec8x16_minsize: 30; SSE: # %bb.0: # %entry 31; SSE-NEXT: movdqa %xmm0, %xmm1 32; SSE-NEXT: psraw $15, %xmm1 33; SSE-NEXT: psrlw $11, %xmm1 34; SSE-NEXT: paddw %xmm1, %xmm0 35; SSE-NEXT: psraw $5, %xmm0 36; SSE-NEXT: retq 37; 38; AVX-LABEL: sdiv_vec8x16_minsize: 39; AVX: # %bb.0: # %entry 40; AVX-NEXT: vpsraw $15, %xmm0, %xmm1 41; AVX-NEXT: vpsrlw $11, %xmm1, %xmm1 42; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 43; AVX-NEXT: vpsraw $5, %xmm0, %xmm0 44; AVX-NEXT: retq 45entry: 46 %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32> 47 ret <8 x i16> %0 48} 49 50define <4 x i32> @sdiv_vec4x32(<4 x i32> %var) { 51; SSE-LABEL: sdiv_vec4x32: 52; SSE: # %bb.0: # %entry 53; SSE-NEXT: movdqa %xmm0, %xmm1 54; SSE-NEXT: psrad $31, %xmm1 55; SSE-NEXT: psrld $28, %xmm1 56; SSE-NEXT: paddd %xmm1, %xmm0 57; SSE-NEXT: psrad $4, %xmm0 58; SSE-NEXT: retq 59; 60; AVX-LABEL: sdiv_vec4x32: 61; AVX: # %bb.0: # %entry 62; AVX-NEXT: vpsrad $31, %xmm0, %xmm1 63; AVX-NEXT: vpsrld $28, %xmm1, %xmm1 64; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 65; AVX-NEXT: vpsrad $4, %xmm0, %xmm0 66; AVX-NEXT: retq 67entry: 68%0 = sdiv <4 x i32> %var, <i32 16, i32 16, i32 16, i32 16> 69ret <4 x i32> %0 70} 71 72define <4 x i32> @sdiv_negative(<4 x i32> %var) { 73; SSE-LABEL: sdiv_negative: 74; SSE: # %bb.0: # %entry 75; SSE-NEXT: movdqa %xmm0, %xmm1 76; SSE-NEXT: psrad $31, %xmm1 77; SSE-NEXT: psrld $28, %xmm1 78; SSE-NEXT: paddd %xmm0, %xmm1 79; SSE-NEXT: psrad $4, %xmm1 80; SSE-NEXT: pxor %xmm0, %xmm0 81; SSE-NEXT: psubd %xmm1, %xmm0 82; SSE-NEXT: retq 83; 84; AVX-LABEL: sdiv_negative: 85; AVX: # %bb.0: # %entry 86; AVX-NEXT: vpsrad $31, %xmm0, %xmm1 87; AVX-NEXT: vpsrld $28, %xmm1, %xmm1 88; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 89; AVX-NEXT: vpsrad $4, %xmm0, %xmm0 90; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 91; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0 92; AVX-NEXT: retq 93entry: 94%0 = sdiv <4 x i32> %var, <i32 -16, i32 -16, i32 -16, i32 -16> 95ret <4 x i32> %0 96} 97 98define <8 x i32> @sdiv8x32(<8 x i32> %var) { 99; SSE-LABEL: sdiv8x32: 100; SSE: # %bb.0: # %entry 101; SSE-NEXT: movdqa %xmm0, %xmm2 102; SSE-NEXT: psrad $31, %xmm2 103; SSE-NEXT: psrld $26, %xmm2 104; SSE-NEXT: paddd %xmm2, %xmm0 105; SSE-NEXT: psrad $6, %xmm0 106; SSE-NEXT: movdqa %xmm1, %xmm2 107; SSE-NEXT: psrad $31, %xmm2 108; SSE-NEXT: psrld $26, %xmm2 109; SSE-NEXT: paddd %xmm2, %xmm1 110; SSE-NEXT: psrad $6, %xmm1 111; SSE-NEXT: retq 112; 113; AVX1-LABEL: sdiv8x32: 114; AVX1: # %bb.0: # %entry 115; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1 116; AVX1-NEXT: vpsrld $26, %xmm1, %xmm1 117; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1 118; AVX1-NEXT: vpsrad $6, %xmm1, %xmm1 119; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 120; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 121; AVX1-NEXT: vpsrld $26, %xmm2, %xmm2 122; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 123; AVX1-NEXT: vpsrad $6, %xmm0, %xmm0 124; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 125; AVX1-NEXT: retq 126; 127; AVX2-LABEL: sdiv8x32: 128; AVX2: # %bb.0: # %entry 129; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1 130; AVX2-NEXT: vpsrld $26, %ymm1, %ymm1 131; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 132; AVX2-NEXT: vpsrad $6, %ymm0, %ymm0 133; AVX2-NEXT: retq 134entry: 135%0 = sdiv <8 x i32> %var, <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64> 136ret <8 x i32> %0 137} 138 139define <16 x i16> @sdiv16x16(<16 x i16> %var) { 140; SSE-LABEL: sdiv16x16: 141; SSE: # %bb.0: # %entry 142; SSE-NEXT: movdqa %xmm0, %xmm2 143; SSE-NEXT: psraw $15, %xmm2 144; SSE-NEXT: psrlw $14, %xmm2 145; SSE-NEXT: paddw %xmm2, %xmm0 146; SSE-NEXT: psraw $2, %xmm0 147; SSE-NEXT: movdqa %xmm1, %xmm2 148; SSE-NEXT: psraw $15, %xmm2 149; SSE-NEXT: psrlw $14, %xmm2 150; SSE-NEXT: paddw %xmm2, %xmm1 151; SSE-NEXT: psraw $2, %xmm1 152; SSE-NEXT: retq 153; 154; AVX1-LABEL: sdiv16x16: 155; AVX1: # %bb.0: # %entry 156; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1 157; AVX1-NEXT: vpsrlw $14, %xmm1, %xmm1 158; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm1 159; AVX1-NEXT: vpsraw $2, %xmm1, %xmm1 160; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 161; AVX1-NEXT: vpsraw $15, %xmm0, %xmm2 162; AVX1-NEXT: vpsrlw $14, %xmm2, %xmm2 163; AVX1-NEXT: vpaddw %xmm2, %xmm0, %xmm0 164; AVX1-NEXT: vpsraw $2, %xmm0, %xmm0 165; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 166; AVX1-NEXT: retq 167; 168; AVX2-LABEL: sdiv16x16: 169; AVX2: # %bb.0: # %entry 170; AVX2-NEXT: vpsraw $15, %ymm0, %ymm1 171; AVX2-NEXT: vpsrlw $14, %ymm1, %ymm1 172; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0 173; AVX2-NEXT: vpsraw $2, %ymm0, %ymm0 174; AVX2-NEXT: retq 175entry: 176 %a0 = sdiv <16 x i16> %var, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4> 177 ret <16 x i16> %a0 178} 179 180; Div-by-0 in any lane is UB. 181 182define <4 x i32> @sdiv_non_splat(<4 x i32> %x) { 183; SSE-LABEL: sdiv_non_splat: 184; SSE: # %bb.0: 185; SSE-NEXT: retq 186; 187; AVX-LABEL: sdiv_non_splat: 188; AVX: # %bb.0: 189; AVX-NEXT: retq 190 %y = sdiv <4 x i32> %x, <i32 2, i32 0, i32 0, i32 0> 191 ret <4 x i32> %y 192} 193