1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -O3 -mtriple=x86_64-linux-generic -mattr=avx < %s | FileCheck %s 3 4; Bug 45833: 5; The SplitVecRes_MSTORE method should split a extended value type 6; according to the halving of the enveloping type to avoid all sorts 7; of inconsistencies downstream. For example for a extended value type 8; with VL=14 and enveloping type VL=16 that is split 8/8, the extended 9; type should be split 8/6 and not 7/7. This also accounts for hi masked 10; store that get zero storage size (and are unused). 11 12define void @mstore_split9(<9 x float> %value, ptr %addr, <9 x i1> %mask) { 13; CHECK-LABEL: mstore_split9: 14; CHECK: # %bb.0: 15; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3] 16; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3] 17; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0] 18; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 19; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 20; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] 21; CHECK-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 22; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 23; CHECK-NEXT: vmovd %esi, %xmm2 24; CHECK-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 25; CHECK-NEXT: vpinsrb $2, %ecx, %xmm2, %xmm2 26; CHECK-NEXT: vpinsrb $3, %r8d, %xmm2, %xmm2 27; CHECK-NEXT: vpinsrb $4, %r9d, %xmm2, %xmm3 28; CHECK-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm3, %xmm3 29; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm3, %xmm3 30; CHECK-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm3, %xmm3 31; CHECK-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm3, %xmm3 32; CHECK-NEXT: vpshufb {{.*#+}} xmm4 = xmm3[8,u,u,u],zero,xmm3[u,u,u],zero,xmm3[u,u,u],zero,xmm3[u,u,u] 33; CHECK-NEXT: vpslld $31, %xmm4, %xmm4 34; CHECK-NEXT: vmaskmovps %ymm1, %ymm4, 32(%rdi) 35; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 36; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 37; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm3[1,1,1,1] 38; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 39; CHECK-NEXT: vpslld $31, %xmm2, %xmm2 40; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 41; CHECK-NEXT: vmaskmovps %ymm0, %ymm1, (%rdi) 42; CHECK-NEXT: vzeroupper 43; CHECK-NEXT: retq 44 call void @llvm.masked.store.v9f32.p0(<9 x float> %value, ptr %addr, i32 4, <9 x i1>%mask) 45 ret void 46} 47 48define void @mstore_split13(<13 x float> %value, ptr %addr, <13 x i1> %mask) { 49; CHECK-LABEL: mstore_split13: 50; CHECK: # %bb.0: 51; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3] 52; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3] 53; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0] 54; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 55; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 56; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] 57; CHECK-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 58; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 59; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] 60; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] 61; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] 62; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 63; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 64; CHECK-NEXT: vmovd %esi, %xmm2 65; CHECK-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 66; CHECK-NEXT: vpinsrb $2, %ecx, %xmm2, %xmm2 67; CHECK-NEXT: vpinsrb $3, %r8d, %xmm2, %xmm2 68; CHECK-NEXT: vpinsrb $4, %r9d, %xmm2, %xmm3 69; CHECK-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm3, %xmm3 70; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm3, %xmm3 71; CHECK-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm3, %xmm3 72; CHECK-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm3, %xmm3 73; CHECK-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm3, %xmm3 74; CHECK-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm3, %xmm3 75; CHECK-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm3, %xmm3 76; CHECK-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm3, %xmm4 77; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 78; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero 79; CHECK-NEXT: vpslld $31, %xmm3, %xmm3 80; CHECK-NEXT: vpsrldq {{.*#+}} xmm5 = xmm4[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 81; CHECK-NEXT: vpslld $31, %xmm5, %xmm5 82; CHECK-NEXT: vinsertf128 $1, %xmm5, %ymm3, %ymm3 83; CHECK-NEXT: vmaskmovps %ymm1, %ymm3, 32(%rdi) 84; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 85; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 86; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm4[1,1,1,1] 87; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 88; CHECK-NEXT: vpslld $31, %xmm2, %xmm2 89; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 90; CHECK-NEXT: vmaskmovps %ymm0, %ymm1, (%rdi) 91; CHECK-NEXT: vzeroupper 92; CHECK-NEXT: retq 93 call void @llvm.masked.store.v13f32.p0(<13 x float> %value, ptr %addr, i32 4, <13 x i1>%mask) 94 ret void 95} 96 97define void @mstore_split14(<14 x float> %value, ptr %addr, <14 x i1> %mask) { 98; CHECK-LABEL: mstore_split14: 99; CHECK: # %bb.0: 100; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3] 101; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3] 102; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0] 103; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 104; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 105; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] 106; CHECK-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 107; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 108; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] 109; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] 110; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] 111; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 112; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3] 113; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 114; CHECK-NEXT: vmovd %esi, %xmm2 115; CHECK-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 116; CHECK-NEXT: vpinsrb $2, %ecx, %xmm2, %xmm2 117; CHECK-NEXT: vpinsrb $3, %r8d, %xmm2, %xmm2 118; CHECK-NEXT: vpinsrb $4, %r9d, %xmm2, %xmm3 119; CHECK-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm3, %xmm3 120; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm3, %xmm3 121; CHECK-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm3, %xmm3 122; CHECK-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm3, %xmm3 123; CHECK-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm3, %xmm3 124; CHECK-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm3, %xmm3 125; CHECK-NEXT: vpinsrb $11, {{[0-9]+}}(%rsp), %xmm3, %xmm3 126; CHECK-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm3, %xmm3 127; CHECK-NEXT: vpinsrb $13, {{[0-9]+}}(%rsp), %xmm3, %xmm3 128; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 129; CHECK-NEXT: vpslld $31, %xmm2, %xmm2 130; CHECK-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,1,1] 131; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero 132; CHECK-NEXT: vpslld $31, %xmm4, %xmm4 133; CHECK-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 134; CHECK-NEXT: vmaskmovps %ymm0, %ymm2, (%rdi) 135; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm3[8,u,9,u,10,u,11,u,12,u,13,u],zero,xmm3[u],zero,xmm3[u] 136; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 137; CHECK-NEXT: vpslld $31, %xmm2, %xmm2 138; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 139; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 140; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 141; CHECK-NEXT: vmaskmovps %ymm1, %ymm0, 32(%rdi) 142; CHECK-NEXT: vzeroupper 143; CHECK-NEXT: retq 144 call void @llvm.masked.store.v14f32.p0(<14 x float> %value, ptr %addr, i32 4, <14 x i1>%mask) 145 ret void 146} 147 148define void @mstore_split17(<17 x float> %value, ptr %addr, <17 x i1> %mask) { 149; CHECK-LABEL: mstore_split17: 150; CHECK: # %bb.0: 151; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3] 152; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3] 153; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0] 154; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 155; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 156; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] 157; CHECK-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 158; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 159; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] 160; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] 161; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] 162; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 163; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3] 164; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3] 165; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0] 166; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 167; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 168; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 169; CHECK-NEXT: vmovd %eax, %xmm3 170; CHECK-NEXT: vpslld $31, %xmm3, %xmm3 171; CHECK-NEXT: vmaskmovps %ymm2, %ymm3, 64(%rdi) 172; CHECK-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero 173; CHECK-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2 174; CHECK-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm2, %xmm2 175; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 176; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero 177; CHECK-NEXT: vpslld $31, %xmm3, %xmm3 178; CHECK-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2 179; CHECK-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2 180; CHECK-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2 181; CHECK-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2 182; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7] 183; CHECK-NEXT: vpslld $31, %xmm2, %xmm2 184; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 185; CHECK-NEXT: vmaskmovps %ymm1, %ymm2, 32(%rdi) 186; CHECK-NEXT: vmovd %esi, %xmm1 187; CHECK-NEXT: vpinsrb $1, %edx, %xmm1, %xmm1 188; CHECK-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 189; CHECK-NEXT: vpinsrb $3, %r8d, %xmm1, %xmm1 190; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 191; CHECK-NEXT: vpslld $31, %xmm2, %xmm2 192; CHECK-NEXT: vpinsrb $4, %r9d, %xmm1, %xmm1 193; CHECK-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1 194; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1 195; CHECK-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1 196; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,1,1] 197; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 198; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 199; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 200; CHECK-NEXT: vmaskmovps %ymm0, %ymm1, (%rdi) 201; CHECK-NEXT: vzeroupper 202; CHECK-NEXT: retq 203 call void @llvm.masked.store.v17f32.p0(<17 x float> %value, ptr %addr, i32 4, <17 x i1>%mask) 204 ret void 205} 206 207define void @mstore_split23(<23 x float> %value, ptr %addr, <23 x i1> %mask) { 208; CHECK-LABEL: mstore_split23: 209; CHECK: # %bb.0: 210; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3] 211; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3] 212; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0] 213; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 214; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 215; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] 216; CHECK-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 217; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 218; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] 219; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] 220; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] 221; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 222; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3] 223; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3] 224; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 225; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 226; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3] 227; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3] 228; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0] 229; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 230; CHECK-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3] 231; CHECK-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],mem[0],xmm3[3] 232; CHECK-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],mem[0] 233; CHECK-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 234; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 235; CHECK-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero 236; CHECK-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm3, %xmm3 237; CHECK-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm3, %xmm3 238; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm3, %xmm3 239; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero 240; CHECK-NEXT: vpslld $31, %xmm4, %xmm4 241; CHECK-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm3, %xmm3 242; CHECK-NEXT: vpinsrb $10, {{[0-9]+}}(%rsp), %xmm3, %xmm3 243; CHECK-NEXT: vpinsrb $12, {{[0-9]+}}(%rsp), %xmm3, %xmm3 244; CHECK-NEXT: vpinsrb $14, {{[0-9]+}}(%rsp), %xmm3, %xmm3 245; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4,4,5,5,6,6,7,7] 246; CHECK-NEXT: vpslld $31, %xmm3, %xmm3 247; CHECK-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3 248; CHECK-NEXT: vmaskmovps %ymm2, %ymm3, 32(%rdi) 249; CHECK-NEXT: vmovd %eax, %xmm2 250; CHECK-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm2, %xmm2 251; CHECK-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2 252; CHECK-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm2, %xmm2 253; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 254; CHECK-NEXT: vpslld $31, %xmm3, %xmm3 255; CHECK-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm2, %xmm2 256; CHECK-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm2, %xmm2 257; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2 258; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,1,1] 259; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 260; CHECK-NEXT: vpslld $31, %xmm2, %xmm2 261; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 262; CHECK-NEXT: vmaskmovps %ymm1, %ymm2, 64(%rdi) 263; CHECK-NEXT: vmovd %esi, %xmm1 264; CHECK-NEXT: vpinsrb $1, %edx, %xmm1, %xmm1 265; CHECK-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 266; CHECK-NEXT: vpinsrb $3, %r8d, %xmm1, %xmm1 267; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 268; CHECK-NEXT: vpslld $31, %xmm2, %xmm2 269; CHECK-NEXT: vpinsrb $4, %r9d, %xmm1, %xmm1 270; CHECK-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1 271; CHECK-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1 272; CHECK-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1 273; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,1,1] 274; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 275; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 276; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 277; CHECK-NEXT: vmaskmovps %ymm0, %ymm1, (%rdi) 278; CHECK-NEXT: vzeroupper 279; CHECK-NEXT: retq 280 call void @llvm.masked.store.v23f32.p0(<23 x float> %value, ptr %addr, i32 4, <23 x i1>%mask) 281 ret void 282} 283 284declare void @llvm.masked.store.v9f32.p0(<9 x float>, ptr, i32, <9 x i1>) 285declare void @llvm.masked.store.v13f32.p0(<13 x float>, ptr, i32, <13 x i1>) 286declare void @llvm.masked.store.v14f32.p0(<14 x float>, ptr, i32, <14 x i1>) 287declare void @llvm.masked.store.v17f32.p0(<17 x float>, ptr, i32, <17 x i1>) 288declare void @llvm.masked.store.v23f32.p0(<23 x float>, ptr, i32, <23 x i1>) 289