1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_mem_shuffle 2; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 4; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx | FileCheck %s --check-prefixes=AVX1OR2,AVX1 5; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck %s --check-prefixes=AVX1OR2,AVX2 6; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F 7; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512dq,avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL,AVX512VLDQ 8; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512bw,avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL,AVX512VLBW 9 10; 11; vXf64 12; 13 14define <2 x double> @expandload_v2f64_v2i64(ptr %base, <2 x double> %src0, <2 x i64> %trigger) { 15; SSE2-LABEL: expandload_v2f64_v2i64: 16; SSE2: ## %bb.0: 17; SSE2-NEXT: pxor %xmm2, %xmm2 18; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 19; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2] 20; SSE2-NEXT: pand %xmm2, %xmm1 21; SSE2-NEXT: movmskpd %xmm1, %eax 22; SSE2-NEXT: testb $1, %al 23; SSE2-NEXT: jne LBB0_1 24; SSE2-NEXT: ## %bb.2: ## %else 25; SSE2-NEXT: testb $2, %al 26; SSE2-NEXT: jne LBB0_3 27; SSE2-NEXT: LBB0_4: ## %else2 28; SSE2-NEXT: retq 29; SSE2-NEXT: LBB0_1: ## %cond.load 30; SSE2-NEXT: movlps (%rdi), %xmm0 ## xmm0 = mem[0,1],xmm0[2,3] 31; SSE2-NEXT: addq $8, %rdi 32; SSE2-NEXT: testb $2, %al 33; SSE2-NEXT: je LBB0_4 34; SSE2-NEXT: LBB0_3: ## %cond.load1 35; SSE2-NEXT: movhps (%rdi), %xmm0 ## xmm0 = xmm0[0,1],mem[0,1] 36; SSE2-NEXT: retq 37; 38; SSE42-LABEL: expandload_v2f64_v2i64: 39; SSE42: ## %bb.0: 40; SSE42-NEXT: pxor %xmm2, %xmm2 41; SSE42-NEXT: pcmpeqq %xmm1, %xmm2 42; SSE42-NEXT: movmskpd %xmm2, %eax 43; SSE42-NEXT: testb $1, %al 44; SSE42-NEXT: jne LBB0_1 45; SSE42-NEXT: ## %bb.2: ## %else 46; SSE42-NEXT: testb $2, %al 47; SSE42-NEXT: jne LBB0_3 48; SSE42-NEXT: LBB0_4: ## %else2 49; SSE42-NEXT: retq 50; SSE42-NEXT: LBB0_1: ## %cond.load 51; SSE42-NEXT: movlps (%rdi), %xmm0 ## xmm0 = mem[0,1],xmm0[2,3] 52; SSE42-NEXT: addq $8, %rdi 53; SSE42-NEXT: testb $2, %al 54; SSE42-NEXT: je LBB0_4 55; SSE42-NEXT: LBB0_3: ## %cond.load1 56; SSE42-NEXT: movhps (%rdi), %xmm0 ## xmm0 = xmm0[0,1],mem[0,1] 57; SSE42-NEXT: retq 58; 59; AVX1OR2-LABEL: expandload_v2f64_v2i64: 60; AVX1OR2: ## %bb.0: 61; AVX1OR2-NEXT: vpxor %xmm2, %xmm2, %xmm2 62; AVX1OR2-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 63; AVX1OR2-NEXT: vmovmskpd %xmm1, %eax 64; AVX1OR2-NEXT: testb $1, %al 65; AVX1OR2-NEXT: jne LBB0_1 66; AVX1OR2-NEXT: ## %bb.2: ## %else 67; AVX1OR2-NEXT: testb $2, %al 68; AVX1OR2-NEXT: jne LBB0_3 69; AVX1OR2-NEXT: LBB0_4: ## %else2 70; AVX1OR2-NEXT: retq 71; AVX1OR2-NEXT: LBB0_1: ## %cond.load 72; AVX1OR2-NEXT: vmovlps (%rdi), %xmm0, %xmm0 ## xmm0 = mem[0,1],xmm0[2,3] 73; AVX1OR2-NEXT: addq $8, %rdi 74; AVX1OR2-NEXT: testb $2, %al 75; AVX1OR2-NEXT: je LBB0_4 76; AVX1OR2-NEXT: LBB0_3: ## %cond.load1 77; AVX1OR2-NEXT: vmovhps (%rdi), %xmm0, %xmm0 ## xmm0 = xmm0[0,1],mem[0,1] 78; AVX1OR2-NEXT: retq 79; 80; AVX512F-LABEL: expandload_v2f64_v2i64: 81; AVX512F: ## %bb.0: 82; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 83; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 84; AVX512F-NEXT: vptestnmq %zmm1, %zmm1, %k0 85; AVX512F-NEXT: kshiftlw $14, %k0, %k0 86; AVX512F-NEXT: kshiftrw $14, %k0, %k1 87; AVX512F-NEXT: vexpandpd (%rdi), %zmm0 {%k1} 88; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 89; AVX512F-NEXT: vzeroupper 90; AVX512F-NEXT: retq 91; 92; AVX512VL-LABEL: expandload_v2f64_v2i64: 93; AVX512VL: ## %bb.0: 94; AVX512VL-NEXT: vptestnmq %xmm1, %xmm1, %k1 95; AVX512VL-NEXT: vexpandpd (%rdi), %xmm0 {%k1} 96; AVX512VL-NEXT: retq 97 %mask = icmp eq <2 x i64> %trigger, zeroinitializer 98 %res = call <2 x double> @llvm.masked.expandload.v2f64(ptr %base, <2 x i1> %mask, <2 x double> %src0) 99 ret <2 x double>%res 100} 101 102define <4 x double> @expandload_v4f64_v4i64(ptr %base, <4 x double> %src0, <4 x i64> %trigger) { 103; SSE2-LABEL: expandload_v4f64_v4i64: 104; SSE2: ## %bb.0: 105; SSE2-NEXT: pxor %xmm4, %xmm4 106; SSE2-NEXT: pcmpeqd %xmm4, %xmm3 107; SSE2-NEXT: pcmpeqd %xmm4, %xmm2 108; SSE2-NEXT: movdqa %xmm2, %xmm4 109; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3] 110; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] 111; SSE2-NEXT: andps %xmm4, %xmm2 112; SSE2-NEXT: movmskps %xmm2, %eax 113; SSE2-NEXT: testb $1, %al 114; SSE2-NEXT: jne LBB1_1 115; SSE2-NEXT: ## %bb.2: ## %else 116; SSE2-NEXT: testb $2, %al 117; SSE2-NEXT: jne LBB1_3 118; SSE2-NEXT: LBB1_4: ## %else2 119; SSE2-NEXT: testb $4, %al 120; SSE2-NEXT: jne LBB1_5 121; SSE2-NEXT: LBB1_6: ## %else6 122; SSE2-NEXT: testb $8, %al 123; SSE2-NEXT: jne LBB1_7 124; SSE2-NEXT: LBB1_8: ## %else10 125; SSE2-NEXT: retq 126; SSE2-NEXT: LBB1_1: ## %cond.load 127; SSE2-NEXT: movlps (%rdi), %xmm0 ## xmm0 = mem[0,1],xmm0[2,3] 128; SSE2-NEXT: addq $8, %rdi 129; SSE2-NEXT: testb $2, %al 130; SSE2-NEXT: je LBB1_4 131; SSE2-NEXT: LBB1_3: ## %cond.load1 132; SSE2-NEXT: movhps (%rdi), %xmm0 ## xmm0 = xmm0[0,1],mem[0,1] 133; SSE2-NEXT: addq $8, %rdi 134; SSE2-NEXT: testb $4, %al 135; SSE2-NEXT: je LBB1_6 136; SSE2-NEXT: LBB1_5: ## %cond.load5 137; SSE2-NEXT: movlps (%rdi), %xmm1 ## xmm1 = mem[0,1],xmm1[2,3] 138; SSE2-NEXT: addq $8, %rdi 139; SSE2-NEXT: testb $8, %al 140; SSE2-NEXT: je LBB1_8 141; SSE2-NEXT: LBB1_7: ## %cond.load9 142; SSE2-NEXT: movhps (%rdi), %xmm1 ## xmm1 = xmm1[0,1],mem[0,1] 143; SSE2-NEXT: retq 144; 145; SSE42-LABEL: expandload_v4f64_v4i64: 146; SSE42: ## %bb.0: 147; SSE42-NEXT: pxor %xmm4, %xmm4 148; SSE42-NEXT: pcmpeqq %xmm4, %xmm3 149; SSE42-NEXT: pcmpeqq %xmm4, %xmm2 150; SSE42-NEXT: packssdw %xmm3, %xmm2 151; SSE42-NEXT: movmskps %xmm2, %eax 152; SSE42-NEXT: testb $1, %al 153; SSE42-NEXT: jne LBB1_1 154; SSE42-NEXT: ## %bb.2: ## %else 155; SSE42-NEXT: testb $2, %al 156; SSE42-NEXT: jne LBB1_3 157; SSE42-NEXT: LBB1_4: ## %else2 158; SSE42-NEXT: testb $4, %al 159; SSE42-NEXT: jne LBB1_5 160; SSE42-NEXT: LBB1_6: ## %else6 161; SSE42-NEXT: testb $8, %al 162; SSE42-NEXT: jne LBB1_7 163; SSE42-NEXT: LBB1_8: ## %else10 164; SSE42-NEXT: retq 165; SSE42-NEXT: LBB1_1: ## %cond.load 166; SSE42-NEXT: movlps (%rdi), %xmm0 ## xmm0 = mem[0,1],xmm0[2,3] 167; SSE42-NEXT: addq $8, %rdi 168; SSE42-NEXT: testb $2, %al 169; SSE42-NEXT: je LBB1_4 170; SSE42-NEXT: LBB1_3: ## %cond.load1 171; SSE42-NEXT: movhps (%rdi), %xmm0 ## xmm0 = xmm0[0,1],mem[0,1] 172; SSE42-NEXT: addq $8, %rdi 173; SSE42-NEXT: testb $4, %al 174; SSE42-NEXT: je LBB1_6 175; SSE42-NEXT: LBB1_5: ## %cond.load5 176; SSE42-NEXT: movlps (%rdi), %xmm1 ## xmm1 = mem[0,1],xmm1[2,3] 177; SSE42-NEXT: addq $8, %rdi 178; SSE42-NEXT: testb $8, %al 179; SSE42-NEXT: je LBB1_8 180; SSE42-NEXT: LBB1_7: ## %cond.load9 181; SSE42-NEXT: movhps (%rdi), %xmm1 ## xmm1 = xmm1[0,1],mem[0,1] 182; SSE42-NEXT: retq 183; 184; AVX1-LABEL: expandload_v4f64_v4i64: 185; AVX1: ## %bb.0: 186; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 187; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 188; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2 189; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1 190; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 191; AVX1-NEXT: vmovmskpd %ymm1, %eax 192; AVX1-NEXT: testb $1, %al 193; AVX1-NEXT: jne LBB1_1 194; AVX1-NEXT: ## %bb.2: ## %else 195; AVX1-NEXT: testb $2, %al 196; AVX1-NEXT: jne LBB1_3 197; AVX1-NEXT: LBB1_4: ## %else2 198; AVX1-NEXT: testb $4, %al 199; AVX1-NEXT: jne LBB1_5 200; AVX1-NEXT: LBB1_6: ## %else6 201; AVX1-NEXT: testb $8, %al 202; AVX1-NEXT: jne LBB1_7 203; AVX1-NEXT: LBB1_8: ## %else10 204; AVX1-NEXT: retq 205; AVX1-NEXT: LBB1_1: ## %cond.load 206; AVX1-NEXT: vmovsd (%rdi), %xmm1 ## xmm1 = mem[0],zero 207; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3] 208; AVX1-NEXT: addq $8, %rdi 209; AVX1-NEXT: testb $2, %al 210; AVX1-NEXT: je LBB1_4 211; AVX1-NEXT: LBB1_3: ## %cond.load1 212; AVX1-NEXT: vmovhpd (%rdi), %xmm0, %xmm1 ## xmm1 = xmm0[0],mem[0] 213; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 214; AVX1-NEXT: addq $8, %rdi 215; AVX1-NEXT: testb $4, %al 216; AVX1-NEXT: je LBB1_6 217; AVX1-NEXT: LBB1_5: ## %cond.load5 218; AVX1-NEXT: vbroadcastsd (%rdi), %ymm1 219; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3] 220; AVX1-NEXT: addq $8, %rdi 221; AVX1-NEXT: testb $8, %al 222; AVX1-NEXT: je LBB1_8 223; AVX1-NEXT: LBB1_7: ## %cond.load9 224; AVX1-NEXT: vbroadcastsd (%rdi), %ymm1 225; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3] 226; AVX1-NEXT: retq 227; 228; AVX2-LABEL: expandload_v4f64_v4i64: 229; AVX2: ## %bb.0: 230; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 231; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1 232; AVX2-NEXT: vmovmskpd %ymm1, %eax 233; AVX2-NEXT: testb $1, %al 234; AVX2-NEXT: jne LBB1_1 235; AVX2-NEXT: ## %bb.2: ## %else 236; AVX2-NEXT: testb $2, %al 237; AVX2-NEXT: jne LBB1_3 238; AVX2-NEXT: LBB1_4: ## %else2 239; AVX2-NEXT: testb $4, %al 240; AVX2-NEXT: jne LBB1_5 241; AVX2-NEXT: LBB1_6: ## %else6 242; AVX2-NEXT: testb $8, %al 243; AVX2-NEXT: jne LBB1_7 244; AVX2-NEXT: LBB1_8: ## %else10 245; AVX2-NEXT: retq 246; AVX2-NEXT: LBB1_1: ## %cond.load 247; AVX2-NEXT: vmovsd (%rdi), %xmm1 ## xmm1 = mem[0],zero 248; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3] 249; AVX2-NEXT: addq $8, %rdi 250; AVX2-NEXT: testb $2, %al 251; AVX2-NEXT: je LBB1_4 252; AVX2-NEXT: LBB1_3: ## %cond.load1 253; AVX2-NEXT: vmovhpd (%rdi), %xmm0, %xmm1 ## xmm1 = xmm0[0],mem[0] 254; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 255; AVX2-NEXT: addq $8, %rdi 256; AVX2-NEXT: testb $4, %al 257; AVX2-NEXT: je LBB1_6 258; AVX2-NEXT: LBB1_5: ## %cond.load5 259; AVX2-NEXT: vbroadcastsd (%rdi), %ymm1 260; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3] 261; AVX2-NEXT: addq $8, %rdi 262; AVX2-NEXT: testb $8, %al 263; AVX2-NEXT: je LBB1_8 264; AVX2-NEXT: LBB1_7: ## %cond.load9 265; AVX2-NEXT: vbroadcastsd (%rdi), %ymm1 266; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3] 267; AVX2-NEXT: retq 268; 269; AVX512F-LABEL: expandload_v4f64_v4i64: 270; AVX512F: ## %bb.0: 271; AVX512F-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 272; AVX512F-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 273; AVX512F-NEXT: vptestnmq %zmm1, %zmm1, %k0 274; AVX512F-NEXT: kshiftlw $12, %k0, %k0 275; AVX512F-NEXT: kshiftrw $12, %k0, %k1 276; AVX512F-NEXT: vexpandpd (%rdi), %zmm0 {%k1} 277; AVX512F-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0 278; AVX512F-NEXT: retq 279; 280; AVX512VL-LABEL: expandload_v4f64_v4i64: 281; AVX512VL: ## %bb.0: 282; AVX512VL-NEXT: vptestnmq %ymm1, %ymm1, %k1 283; AVX512VL-NEXT: vexpandpd (%rdi), %ymm0 {%k1} 284; AVX512VL-NEXT: retq 285 %mask = icmp eq <4 x i64> %trigger, zeroinitializer 286 %res = call <4 x double> @llvm.masked.expandload.v4f64(ptr %base, <4 x i1> %mask, <4 x double> %src0) 287 ret <4 x double>%res 288} 289 290define <8 x double> @expandload_v8f64_v8i1(ptr %base, <8 x double> %src0, <8 x i1> %mask) { 291; SSE-LABEL: expandload_v8f64_v8i1: 292; SSE: ## %bb.0: 293; SSE-NEXT: psllw $15, %xmm4 294; SSE-NEXT: packsswb %xmm4, %xmm4 295; SSE-NEXT: pmovmskb %xmm4, %eax 296; SSE-NEXT: testb $1, %al 297; SSE-NEXT: jne LBB2_1 298; SSE-NEXT: ## %bb.2: ## %else 299; SSE-NEXT: testb $2, %al 300; SSE-NEXT: jne LBB2_3 301; SSE-NEXT: LBB2_4: ## %else2 302; SSE-NEXT: testb $4, %al 303; SSE-NEXT: jne LBB2_5 304; SSE-NEXT: LBB2_6: ## %else6 305; SSE-NEXT: testb $8, %al 306; SSE-NEXT: jne LBB2_7 307; SSE-NEXT: LBB2_8: ## %else10 308; SSE-NEXT: testb $16, %al 309; SSE-NEXT: jne LBB2_9 310; SSE-NEXT: LBB2_10: ## %else14 311; SSE-NEXT: testb $32, %al 312; SSE-NEXT: jne LBB2_11 313; SSE-NEXT: LBB2_12: ## %else18 314; SSE-NEXT: testb $64, %al 315; SSE-NEXT: jne LBB2_13 316; SSE-NEXT: LBB2_14: ## %else22 317; SSE-NEXT: testb $-128, %al 318; SSE-NEXT: jne LBB2_15 319; SSE-NEXT: LBB2_16: ## %else26 320; SSE-NEXT: retq 321; SSE-NEXT: LBB2_1: ## %cond.load 322; SSE-NEXT: movlps (%rdi), %xmm0 ## xmm0 = mem[0,1],xmm0[2,3] 323; SSE-NEXT: addq $8, %rdi 324; SSE-NEXT: testb $2, %al 325; SSE-NEXT: je LBB2_4 326; SSE-NEXT: LBB2_3: ## %cond.load1 327; SSE-NEXT: movhps (%rdi), %xmm0 ## xmm0 = xmm0[0,1],mem[0,1] 328; SSE-NEXT: addq $8, %rdi 329; SSE-NEXT: testb $4, %al 330; SSE-NEXT: je LBB2_6 331; SSE-NEXT: LBB2_5: ## %cond.load5 332; SSE-NEXT: movlps (%rdi), %xmm1 ## xmm1 = mem[0,1],xmm1[2,3] 333; SSE-NEXT: addq $8, %rdi 334; SSE-NEXT: testb $8, %al 335; SSE-NEXT: je LBB2_8 336; SSE-NEXT: LBB2_7: ## %cond.load9 337; SSE-NEXT: movhps (%rdi), %xmm1 ## xmm1 = xmm1[0,1],mem[0,1] 338; SSE-NEXT: addq $8, %rdi 339; SSE-NEXT: testb $16, %al 340; SSE-NEXT: je LBB2_10 341; SSE-NEXT: LBB2_9: ## %cond.load13 342; SSE-NEXT: movlps (%rdi), %xmm2 ## xmm2 = mem[0,1],xmm2[2,3] 343; SSE-NEXT: addq $8, %rdi 344; SSE-NEXT: testb $32, %al 345; SSE-NEXT: je LBB2_12 346; SSE-NEXT: LBB2_11: ## %cond.load17 347; SSE-NEXT: movhps (%rdi), %xmm2 ## xmm2 = xmm2[0,1],mem[0,1] 348; SSE-NEXT: addq $8, %rdi 349; SSE-NEXT: testb $64, %al 350; SSE-NEXT: je LBB2_14 351; SSE-NEXT: LBB2_13: ## %cond.load21 352; SSE-NEXT: movlps (%rdi), %xmm3 ## xmm3 = mem[0,1],xmm3[2,3] 353; SSE-NEXT: addq $8, %rdi 354; SSE-NEXT: testb $-128, %al 355; SSE-NEXT: je LBB2_16 356; SSE-NEXT: LBB2_15: ## %cond.load25 357; SSE-NEXT: movhps (%rdi), %xmm3 ## xmm3 = xmm3[0,1],mem[0,1] 358; SSE-NEXT: retq 359; 360; AVX1-LABEL: expandload_v8f64_v8i1: 361; AVX1: ## %bb.0: 362; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2 363; AVX1-NEXT: vpacksswb %xmm2, %xmm2, %xmm2 364; AVX1-NEXT: vpmovmskb %xmm2, %eax 365; AVX1-NEXT: testb $1, %al 366; AVX1-NEXT: jne LBB2_1 367; AVX1-NEXT: ## %bb.2: ## %else 368; AVX1-NEXT: testb $2, %al 369; AVX1-NEXT: jne LBB2_3 370; AVX1-NEXT: LBB2_4: ## %else2 371; AVX1-NEXT: testb $4, %al 372; AVX1-NEXT: jne LBB2_5 373; AVX1-NEXT: LBB2_6: ## %else6 374; AVX1-NEXT: testb $8, %al 375; AVX1-NEXT: jne LBB2_7 376; AVX1-NEXT: LBB2_8: ## %else10 377; AVX1-NEXT: testb $16, %al 378; AVX1-NEXT: jne LBB2_9 379; AVX1-NEXT: LBB2_10: ## %else14 380; AVX1-NEXT: testb $32, %al 381; AVX1-NEXT: jne LBB2_11 382; AVX1-NEXT: LBB2_12: ## %else18 383; AVX1-NEXT: testb $64, %al 384; AVX1-NEXT: jne LBB2_13 385; AVX1-NEXT: LBB2_14: ## %else22 386; AVX1-NEXT: testb $-128, %al 387; AVX1-NEXT: jne LBB2_15 388; AVX1-NEXT: LBB2_16: ## %else26 389; AVX1-NEXT: retq 390; AVX1-NEXT: LBB2_1: ## %cond.load 391; AVX1-NEXT: vmovsd (%rdi), %xmm2 ## xmm2 = mem[0],zero 392; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1],ymm0[2,3,4,5,6,7] 393; AVX1-NEXT: addq $8, %rdi 394; AVX1-NEXT: testb $2, %al 395; AVX1-NEXT: je LBB2_4 396; AVX1-NEXT: LBB2_3: ## %cond.load1 397; AVX1-NEXT: vmovhps (%rdi), %xmm0, %xmm2 ## xmm2 = xmm0[0,1],mem[0,1] 398; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] 399; AVX1-NEXT: addq $8, %rdi 400; AVX1-NEXT: testb $4, %al 401; AVX1-NEXT: je LBB2_6 402; AVX1-NEXT: LBB2_5: ## %cond.load5 403; AVX1-NEXT: vbroadcastsd (%rdi), %ymm2 404; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5],ymm0[6,7] 405; AVX1-NEXT: addq $8, %rdi 406; AVX1-NEXT: testb $8, %al 407; AVX1-NEXT: je LBB2_8 408; AVX1-NEXT: LBB2_7: ## %cond.load9 409; AVX1-NEXT: vbroadcastsd (%rdi), %ymm2 410; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm2[6,7] 411; AVX1-NEXT: addq $8, %rdi 412; AVX1-NEXT: testb $16, %al 413; AVX1-NEXT: je LBB2_10 414; AVX1-NEXT: LBB2_9: ## %cond.load13 415; AVX1-NEXT: vmovsd (%rdi), %xmm2 ## xmm2 = mem[0],zero 416; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1],ymm1[2,3,4,5,6,7] 417; AVX1-NEXT: addq $8, %rdi 418; AVX1-NEXT: testb $32, %al 419; AVX1-NEXT: je LBB2_12 420; AVX1-NEXT: LBB2_11: ## %cond.load17 421; AVX1-NEXT: vmovhps (%rdi), %xmm1, %xmm2 ## xmm2 = xmm1[0,1],mem[0,1] 422; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 423; AVX1-NEXT: addq $8, %rdi 424; AVX1-NEXT: testb $64, %al 425; AVX1-NEXT: je LBB2_14 426; AVX1-NEXT: LBB2_13: ## %cond.load21 427; AVX1-NEXT: vbroadcastsd (%rdi), %ymm2 428; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5],ymm1[6,7] 429; AVX1-NEXT: addq $8, %rdi 430; AVX1-NEXT: testb $-128, %al 431; AVX1-NEXT: je LBB2_16 432; AVX1-NEXT: LBB2_15: ## %cond.load25 433; AVX1-NEXT: vbroadcastsd (%rdi), %ymm2 434; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm2[6,7] 435; AVX1-NEXT: retq 436; 437; AVX2-LABEL: expandload_v8f64_v8i1: 438; AVX2: ## %bb.0: 439; AVX2-NEXT: vpsllw $15, %xmm2, %xmm2 440; AVX2-NEXT: vpacksswb %xmm2, %xmm2, %xmm2 441; AVX2-NEXT: vpmovmskb %xmm2, %eax 442; AVX2-NEXT: testb $1, %al 443; AVX2-NEXT: jne LBB2_1 444; AVX2-NEXT: ## %bb.2: ## %else 445; AVX2-NEXT: testb $2, %al 446; AVX2-NEXT: jne LBB2_3 447; AVX2-NEXT: LBB2_4: ## %else2 448; AVX2-NEXT: testb $4, %al 449; AVX2-NEXT: jne LBB2_5 450; AVX2-NEXT: LBB2_6: ## %else6 451; AVX2-NEXT: testb $8, %al 452; AVX2-NEXT: jne LBB2_7 453; AVX2-NEXT: LBB2_8: ## %else10 454; AVX2-NEXT: testb $16, %al 455; AVX2-NEXT: jne LBB2_9 456; AVX2-NEXT: LBB2_10: ## %else14 457; AVX2-NEXT: testb $32, %al 458; AVX2-NEXT: jne LBB2_11 459; AVX2-NEXT: LBB2_12: ## %else18 460; AVX2-NEXT: testb $64, %al 461; AVX2-NEXT: jne LBB2_13 462; AVX2-NEXT: LBB2_14: ## %else22 463; AVX2-NEXT: testb $-128, %al 464; AVX2-NEXT: jne LBB2_15 465; AVX2-NEXT: LBB2_16: ## %else26 466; AVX2-NEXT: retq 467; AVX2-NEXT: LBB2_1: ## %cond.load 468; AVX2-NEXT: vmovq (%rdi), %xmm2 ## xmm2 = mem[0],zero 469; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1],ymm0[2,3,4,5,6,7] 470; AVX2-NEXT: addq $8, %rdi 471; AVX2-NEXT: testb $2, %al 472; AVX2-NEXT: je LBB2_4 473; AVX2-NEXT: LBB2_3: ## %cond.load1 474; AVX2-NEXT: vmovhps (%rdi), %xmm0, %xmm2 ## xmm2 = xmm0[0,1],mem[0,1] 475; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] 476; AVX2-NEXT: addq $8, %rdi 477; AVX2-NEXT: testb $4, %al 478; AVX2-NEXT: je LBB2_6 479; AVX2-NEXT: LBB2_5: ## %cond.load5 480; AVX2-NEXT: vpbroadcastq (%rdi), %ymm2 481; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5],ymm0[6,7] 482; AVX2-NEXT: addq $8, %rdi 483; AVX2-NEXT: testb $8, %al 484; AVX2-NEXT: je LBB2_8 485; AVX2-NEXT: LBB2_7: ## %cond.load9 486; AVX2-NEXT: vpbroadcastq (%rdi), %ymm2 487; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm2[6,7] 488; AVX2-NEXT: addq $8, %rdi 489; AVX2-NEXT: testb $16, %al 490; AVX2-NEXT: je LBB2_10 491; AVX2-NEXT: LBB2_9: ## %cond.load13 492; AVX2-NEXT: vmovq (%rdi), %xmm2 ## xmm2 = mem[0],zero 493; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1],ymm1[2,3,4,5,6,7] 494; AVX2-NEXT: addq $8, %rdi 495; AVX2-NEXT: testb $32, %al 496; AVX2-NEXT: je LBB2_12 497; AVX2-NEXT: LBB2_11: ## %cond.load17 498; AVX2-NEXT: vmovhps (%rdi), %xmm1, %xmm2 ## xmm2 = xmm1[0,1],mem[0,1] 499; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 500; AVX2-NEXT: addq $8, %rdi 501; AVX2-NEXT: testb $64, %al 502; AVX2-NEXT: je LBB2_14 503; AVX2-NEXT: LBB2_13: ## %cond.load21 504; AVX2-NEXT: vpbroadcastq (%rdi), %ymm2 505; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5],ymm1[6,7] 506; AVX2-NEXT: addq $8, %rdi 507; AVX2-NEXT: testb $-128, %al 508; AVX2-NEXT: je LBB2_16 509; AVX2-NEXT: LBB2_15: ## %cond.load25 510; AVX2-NEXT: vpbroadcastq (%rdi), %ymm2 511; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm2[6,7] 512; AVX2-NEXT: retq 513; 514; AVX512F-LABEL: expandload_v8f64_v8i1: 515; AVX512F: ## %bb.0: 516; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1 517; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1 518; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1 519; AVX512F-NEXT: vexpandpd (%rdi), %zmm0 {%k1} 520; AVX512F-NEXT: retq 521; 522; AVX512VLDQ-LABEL: expandload_v8f64_v8i1: 523; AVX512VLDQ: ## %bb.0: 524; AVX512VLDQ-NEXT: vpmovsxwd %xmm1, %ymm1 525; AVX512VLDQ-NEXT: vpslld $31, %ymm1, %ymm1 526; AVX512VLDQ-NEXT: vpmovd2m %ymm1, %k1 527; AVX512VLDQ-NEXT: vexpandpd (%rdi), %zmm0 {%k1} 528; AVX512VLDQ-NEXT: retq 529; 530; AVX512VLBW-LABEL: expandload_v8f64_v8i1: 531; AVX512VLBW: ## %bb.0: 532; AVX512VLBW-NEXT: vpsllw $15, %xmm1, %xmm1 533; AVX512VLBW-NEXT: vpmovw2m %xmm1, %k1 534; AVX512VLBW-NEXT: vexpandpd (%rdi), %zmm0 {%k1} 535; AVX512VLBW-NEXT: retq 536 %res = call <8 x double> @llvm.masked.expandload.v8f64(ptr %base, <8 x i1> %mask, <8 x double> %src0) 537 ret <8 x double>%res 538} 539 540define <16 x double> @expandload_v16f64_v16i32(ptr %base, <16 x double> %src0, <16 x i32> %trigger) { 541; SSE-LABEL: expandload_v16f64_v16i32: 542; SSE: ## %bb.0: 543; SSE-NEXT: movq %rdi, %rax 544; SSE-NEXT: pxor %xmm8, %xmm8 545; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 546; SSE-NEXT: pcmpeqd %xmm8, %xmm9 547; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10 548; SSE-NEXT: pcmpeqd %xmm8, %xmm10 549; SSE-NEXT: packssdw %xmm9, %xmm10 550; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 551; SSE-NEXT: pcmpeqd %xmm8, %xmm9 552; SSE-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm8 553; SSE-NEXT: packssdw %xmm9, %xmm8 554; SSE-NEXT: packsswb %xmm10, %xmm8 555; SSE-NEXT: pmovmskb %xmm8, %ecx 556; SSE-NEXT: testb $1, %cl 557; SSE-NEXT: jne LBB3_1 558; SSE-NEXT: ## %bb.2: ## %else 559; SSE-NEXT: testb $2, %cl 560; SSE-NEXT: jne LBB3_3 561; SSE-NEXT: LBB3_4: ## %else2 562; SSE-NEXT: testb $4, %cl 563; SSE-NEXT: jne LBB3_5 564; SSE-NEXT: LBB3_6: ## %else6 565; SSE-NEXT: testb $8, %cl 566; SSE-NEXT: jne LBB3_7 567; SSE-NEXT: LBB3_8: ## %else10 568; SSE-NEXT: testb $16, %cl 569; SSE-NEXT: jne LBB3_9 570; SSE-NEXT: LBB3_10: ## %else14 571; SSE-NEXT: testb $32, %cl 572; SSE-NEXT: jne LBB3_11 573; SSE-NEXT: LBB3_12: ## %else18 574; SSE-NEXT: testb $64, %cl 575; SSE-NEXT: jne LBB3_13 576; SSE-NEXT: LBB3_14: ## %else22 577; SSE-NEXT: testb %cl, %cl 578; SSE-NEXT: js LBB3_15 579; SSE-NEXT: LBB3_16: ## %else26 580; SSE-NEXT: testl $256, %ecx ## imm = 0x100 581; SSE-NEXT: jne LBB3_17 582; SSE-NEXT: LBB3_18: ## %else30 583; SSE-NEXT: testl $512, %ecx ## imm = 0x200 584; SSE-NEXT: jne LBB3_19 585; SSE-NEXT: LBB3_20: ## %else34 586; SSE-NEXT: testl $1024, %ecx ## imm = 0x400 587; SSE-NEXT: jne LBB3_21 588; SSE-NEXT: LBB3_22: ## %else38 589; SSE-NEXT: testl $2048, %ecx ## imm = 0x800 590; SSE-NEXT: jne LBB3_23 591; SSE-NEXT: LBB3_24: ## %else42 592; SSE-NEXT: testl $4096, %ecx ## imm = 0x1000 593; SSE-NEXT: jne LBB3_25 594; SSE-NEXT: LBB3_26: ## %else46 595; SSE-NEXT: testl $8192, %ecx ## imm = 0x2000 596; SSE-NEXT: jne LBB3_27 597; SSE-NEXT: LBB3_28: ## %else50 598; SSE-NEXT: testl $16384, %ecx ## imm = 0x4000 599; SSE-NEXT: jne LBB3_29 600; SSE-NEXT: LBB3_30: ## %else54 601; SSE-NEXT: testl $32768, %ecx ## imm = 0x8000 602; SSE-NEXT: je LBB3_32 603; SSE-NEXT: LBB3_31: ## %cond.load57 604; SSE-NEXT: movhps (%rsi), %xmm7 ## xmm7 = xmm7[0,1],mem[0,1] 605; SSE-NEXT: LBB3_32: ## %else58 606; SSE-NEXT: movaps %xmm0, (%rax) 607; SSE-NEXT: movaps %xmm1, 16(%rax) 608; SSE-NEXT: movaps %xmm2, 32(%rax) 609; SSE-NEXT: movaps %xmm3, 48(%rax) 610; SSE-NEXT: movaps %xmm4, 64(%rax) 611; SSE-NEXT: movaps %xmm5, 80(%rax) 612; SSE-NEXT: movaps %xmm6, 96(%rax) 613; SSE-NEXT: movaps %xmm7, 112(%rax) 614; SSE-NEXT: retq 615; SSE-NEXT: LBB3_1: ## %cond.load 616; SSE-NEXT: movlps (%rsi), %xmm0 ## xmm0 = mem[0,1],xmm0[2,3] 617; SSE-NEXT: addq $8, %rsi 618; SSE-NEXT: testb $2, %cl 619; SSE-NEXT: je LBB3_4 620; SSE-NEXT: LBB3_3: ## %cond.load1 621; SSE-NEXT: movhps (%rsi), %xmm0 ## xmm0 = xmm0[0,1],mem[0,1] 622; SSE-NEXT: addq $8, %rsi 623; SSE-NEXT: testb $4, %cl 624; SSE-NEXT: je LBB3_6 625; SSE-NEXT: LBB3_5: ## %cond.load5 626; SSE-NEXT: movlps (%rsi), %xmm1 ## xmm1 = mem[0,1],xmm1[2,3] 627; SSE-NEXT: addq $8, %rsi 628; SSE-NEXT: testb $8, %cl 629; SSE-NEXT: je LBB3_8 630; SSE-NEXT: LBB3_7: ## %cond.load9 631; SSE-NEXT: movhps (%rsi), %xmm1 ## xmm1 = xmm1[0,1],mem[0,1] 632; SSE-NEXT: addq $8, %rsi 633; SSE-NEXT: testb $16, %cl 634; SSE-NEXT: je LBB3_10 635; SSE-NEXT: LBB3_9: ## %cond.load13 636; SSE-NEXT: movlps (%rsi), %xmm2 ## xmm2 = mem[0,1],xmm2[2,3] 637; SSE-NEXT: addq $8, %rsi 638; SSE-NEXT: testb $32, %cl 639; SSE-NEXT: je LBB3_12 640; SSE-NEXT: LBB3_11: ## %cond.load17 641; SSE-NEXT: movhps (%rsi), %xmm2 ## xmm2 = xmm2[0,1],mem[0,1] 642; SSE-NEXT: addq $8, %rsi 643; SSE-NEXT: testb $64, %cl 644; SSE-NEXT: je LBB3_14 645; SSE-NEXT: LBB3_13: ## %cond.load21 646; SSE-NEXT: movlps (%rsi), %xmm3 ## xmm3 = mem[0,1],xmm3[2,3] 647; SSE-NEXT: addq $8, %rsi 648; SSE-NEXT: testb %cl, %cl 649; SSE-NEXT: jns LBB3_16 650; SSE-NEXT: LBB3_15: ## %cond.load25 651; SSE-NEXT: movhps (%rsi), %xmm3 ## xmm3 = xmm3[0,1],mem[0,1] 652; SSE-NEXT: addq $8, %rsi 653; SSE-NEXT: testl $256, %ecx ## imm = 0x100 654; SSE-NEXT: je LBB3_18 655; SSE-NEXT: LBB3_17: ## %cond.load29 656; SSE-NEXT: movlps (%rsi), %xmm4 ## xmm4 = mem[0,1],xmm4[2,3] 657; SSE-NEXT: addq $8, %rsi 658; SSE-NEXT: testl $512, %ecx ## imm = 0x200 659; SSE-NEXT: je LBB3_20 660; SSE-NEXT: LBB3_19: ## %cond.load33 661; SSE-NEXT: movhps (%rsi), %xmm4 ## xmm4 = xmm4[0,1],mem[0,1] 662; SSE-NEXT: addq $8, %rsi 663; SSE-NEXT: testl $1024, %ecx ## imm = 0x400 664; SSE-NEXT: je LBB3_22 665; SSE-NEXT: LBB3_21: ## %cond.load37 666; SSE-NEXT: movlps (%rsi), %xmm5 ## xmm5 = mem[0,1],xmm5[2,3] 667; SSE-NEXT: addq $8, %rsi 668; SSE-NEXT: testl $2048, %ecx ## imm = 0x800 669; SSE-NEXT: je LBB3_24 670; SSE-NEXT: LBB3_23: ## %cond.load41 671; SSE-NEXT: movhps (%rsi), %xmm5 ## xmm5 = xmm5[0,1],mem[0,1] 672; SSE-NEXT: addq $8, %rsi 673; SSE-NEXT: testl $4096, %ecx ## imm = 0x1000 674; SSE-NEXT: je LBB3_26 675; SSE-NEXT: LBB3_25: ## %cond.load45 676; SSE-NEXT: movlps (%rsi), %xmm6 ## xmm6 = mem[0,1],xmm6[2,3] 677; SSE-NEXT: addq $8, %rsi 678; SSE-NEXT: testl $8192, %ecx ## imm = 0x2000 679; SSE-NEXT: je LBB3_28 680; SSE-NEXT: LBB3_27: ## %cond.load49 681; SSE-NEXT: movhps (%rsi), %xmm6 ## xmm6 = xmm6[0,1],mem[0,1] 682; SSE-NEXT: addq $8, %rsi 683; SSE-NEXT: testl $16384, %ecx ## imm = 0x4000 684; SSE-NEXT: je LBB3_30 685; SSE-NEXT: LBB3_29: ## %cond.load53 686; SSE-NEXT: movlps (%rsi), %xmm7 ## xmm7 = mem[0,1],xmm7[2,3] 687; SSE-NEXT: addq $8, %rsi 688; SSE-NEXT: testl $32768, %ecx ## imm = 0x8000 689; SSE-NEXT: jne LBB3_31 690; SSE-NEXT: jmp LBB3_32 691; 692; AVX1-LABEL: expandload_v16f64_v16i32: 693; AVX1: ## %bb.0: 694; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 695; AVX1-NEXT: vpxor %xmm7, %xmm7, %xmm7 696; AVX1-NEXT: vpcmpeqd %xmm7, %xmm6, %xmm6 697; AVX1-NEXT: vpcmpeqd %xmm7, %xmm5, %xmm5 698; AVX1-NEXT: vpackssdw %xmm6, %xmm5, %xmm5 699; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm6 700; AVX1-NEXT: vpcmpeqd %xmm7, %xmm6, %xmm6 701; AVX1-NEXT: vpcmpeqd %xmm7, %xmm4, %xmm4 702; AVX1-NEXT: vpackssdw %xmm6, %xmm4, %xmm4 703; AVX1-NEXT: vpacksswb %xmm5, %xmm4, %xmm4 704; AVX1-NEXT: vpmovmskb %xmm4, %eax 705; AVX1-NEXT: testb $1, %al 706; AVX1-NEXT: jne LBB3_1 707; AVX1-NEXT: ## %bb.2: ## %else 708; AVX1-NEXT: testb $2, %al 709; AVX1-NEXT: jne LBB3_3 710; AVX1-NEXT: LBB3_4: ## %else2 711; AVX1-NEXT: testb $4, %al 712; AVX1-NEXT: jne LBB3_5 713; AVX1-NEXT: LBB3_6: ## %else6 714; AVX1-NEXT: testb $8, %al 715; AVX1-NEXT: jne LBB3_7 716; AVX1-NEXT: LBB3_8: ## %else10 717; AVX1-NEXT: testb $16, %al 718; AVX1-NEXT: jne LBB3_9 719; AVX1-NEXT: LBB3_10: ## %else14 720; AVX1-NEXT: testb $32, %al 721; AVX1-NEXT: jne LBB3_11 722; AVX1-NEXT: LBB3_12: ## %else18 723; AVX1-NEXT: testb $64, %al 724; AVX1-NEXT: jne LBB3_13 725; AVX1-NEXT: LBB3_14: ## %else22 726; AVX1-NEXT: testb %al, %al 727; AVX1-NEXT: js LBB3_15 728; AVX1-NEXT: LBB3_16: ## %else26 729; AVX1-NEXT: testl $256, %eax ## imm = 0x100 730; AVX1-NEXT: jne LBB3_17 731; AVX1-NEXT: LBB3_18: ## %else30 732; AVX1-NEXT: testl $512, %eax ## imm = 0x200 733; AVX1-NEXT: jne LBB3_19 734; AVX1-NEXT: LBB3_20: ## %else34 735; AVX1-NEXT: testl $1024, %eax ## imm = 0x400 736; AVX1-NEXT: jne LBB3_21 737; AVX1-NEXT: LBB3_22: ## %else38 738; AVX1-NEXT: testl $2048, %eax ## imm = 0x800 739; AVX1-NEXT: jne LBB3_23 740; AVX1-NEXT: LBB3_24: ## %else42 741; AVX1-NEXT: testl $4096, %eax ## imm = 0x1000 742; AVX1-NEXT: jne LBB3_25 743; AVX1-NEXT: LBB3_26: ## %else46 744; AVX1-NEXT: testl $8192, %eax ## imm = 0x2000 745; AVX1-NEXT: jne LBB3_27 746; AVX1-NEXT: LBB3_28: ## %else50 747; AVX1-NEXT: testl $16384, %eax ## imm = 0x4000 748; AVX1-NEXT: jne LBB3_29 749; AVX1-NEXT: LBB3_30: ## %else54 750; AVX1-NEXT: testl $32768, %eax ## imm = 0x8000 751; AVX1-NEXT: jne LBB3_31 752; AVX1-NEXT: LBB3_32: ## %else58 753; AVX1-NEXT: retq 754; AVX1-NEXT: LBB3_1: ## %cond.load 755; AVX1-NEXT: vmovsd (%rdi), %xmm4 ## xmm4 = mem[0],zero 756; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm4[0,1],ymm0[2,3,4,5,6,7] 757; AVX1-NEXT: addq $8, %rdi 758; AVX1-NEXT: testb $2, %al 759; AVX1-NEXT: je LBB3_4 760; AVX1-NEXT: LBB3_3: ## %cond.load1 761; AVX1-NEXT: vmovhps (%rdi), %xmm0, %xmm4 ## xmm4 = xmm0[0,1],mem[0,1] 762; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm4[0,1,2,3],ymm0[4,5,6,7] 763; AVX1-NEXT: addq $8, %rdi 764; AVX1-NEXT: testb $4, %al 765; AVX1-NEXT: je LBB3_6 766; AVX1-NEXT: LBB3_5: ## %cond.load5 767; AVX1-NEXT: vbroadcastsd (%rdi), %ymm4 768; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm4[4,5],ymm0[6,7] 769; AVX1-NEXT: addq $8, %rdi 770; AVX1-NEXT: testb $8, %al 771; AVX1-NEXT: je LBB3_8 772; AVX1-NEXT: LBB3_7: ## %cond.load9 773; AVX1-NEXT: vbroadcastsd (%rdi), %ymm4 774; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm4[6,7] 775; AVX1-NEXT: addq $8, %rdi 776; AVX1-NEXT: testb $16, %al 777; AVX1-NEXT: je LBB3_10 778; AVX1-NEXT: LBB3_9: ## %cond.load13 779; AVX1-NEXT: vmovsd (%rdi), %xmm4 ## xmm4 = mem[0],zero 780; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1],ymm1[2,3,4,5,6,7] 781; AVX1-NEXT: addq $8, %rdi 782; AVX1-NEXT: testb $32, %al 783; AVX1-NEXT: je LBB3_12 784; AVX1-NEXT: LBB3_11: ## %cond.load17 785; AVX1-NEXT: vmovhps (%rdi), %xmm1, %xmm4 ## xmm4 = xmm1[0,1],mem[0,1] 786; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7] 787; AVX1-NEXT: addq $8, %rdi 788; AVX1-NEXT: testb $64, %al 789; AVX1-NEXT: je LBB3_14 790; AVX1-NEXT: LBB3_13: ## %cond.load21 791; AVX1-NEXT: vbroadcastsd (%rdi), %ymm4 792; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm4[4,5],ymm1[6,7] 793; AVX1-NEXT: addq $8, %rdi 794; AVX1-NEXT: testb %al, %al 795; AVX1-NEXT: jns LBB3_16 796; AVX1-NEXT: LBB3_15: ## %cond.load25 797; AVX1-NEXT: vbroadcastsd (%rdi), %ymm4 798; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm4[6,7] 799; AVX1-NEXT: addq $8, %rdi 800; AVX1-NEXT: testl $256, %eax ## imm = 0x100 801; AVX1-NEXT: je LBB3_18 802; AVX1-NEXT: LBB3_17: ## %cond.load29 803; AVX1-NEXT: vmovsd (%rdi), %xmm4 ## xmm4 = mem[0],zero 804; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1],ymm2[2,3,4,5,6,7] 805; AVX1-NEXT: addq $8, %rdi 806; AVX1-NEXT: testl $512, %eax ## imm = 0x200 807; AVX1-NEXT: je LBB3_20 808; AVX1-NEXT: LBB3_19: ## %cond.load33 809; AVX1-NEXT: vmovhps (%rdi), %xmm2, %xmm4 ## xmm4 = xmm2[0,1],mem[0,1] 810; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7] 811; AVX1-NEXT: addq $8, %rdi 812; AVX1-NEXT: testl $1024, %eax ## imm = 0x400 813; AVX1-NEXT: je LBB3_22 814; AVX1-NEXT: LBB3_21: ## %cond.load37 815; AVX1-NEXT: vbroadcastsd (%rdi), %ymm4 816; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm4[4,5],ymm2[6,7] 817; AVX1-NEXT: addq $8, %rdi 818; AVX1-NEXT: testl $2048, %eax ## imm = 0x800 819; AVX1-NEXT: je LBB3_24 820; AVX1-NEXT: LBB3_23: ## %cond.load41 821; AVX1-NEXT: vbroadcastsd (%rdi), %ymm4 822; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm4[6,7] 823; AVX1-NEXT: addq $8, %rdi 824; AVX1-NEXT: testl $4096, %eax ## imm = 0x1000 825; AVX1-NEXT: je LBB3_26 826; AVX1-NEXT: LBB3_25: ## %cond.load45 827; AVX1-NEXT: vmovsd (%rdi), %xmm4 ## xmm4 = mem[0],zero 828; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1],ymm3[2,3,4,5,6,7] 829; AVX1-NEXT: addq $8, %rdi 830; AVX1-NEXT: testl $8192, %eax ## imm = 0x2000 831; AVX1-NEXT: je LBB3_28 832; AVX1-NEXT: LBB3_27: ## %cond.load49 833; AVX1-NEXT: vmovhps (%rdi), %xmm3, %xmm4 ## xmm4 = xmm3[0,1],mem[0,1] 834; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 835; AVX1-NEXT: addq $8, %rdi 836; AVX1-NEXT: testl $16384, %eax ## imm = 0x4000 837; AVX1-NEXT: je LBB3_30 838; AVX1-NEXT: LBB3_29: ## %cond.load53 839; AVX1-NEXT: vbroadcastsd (%rdi), %ymm4 840; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3],ymm4[4,5],ymm3[6,7] 841; AVX1-NEXT: addq $8, %rdi 842; AVX1-NEXT: testl $32768, %eax ## imm = 0x8000 843; AVX1-NEXT: je LBB3_32 844; AVX1-NEXT: LBB3_31: ## %cond.load57 845; AVX1-NEXT: vbroadcastsd (%rdi), %ymm4 846; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3,4,5],ymm4[6,7] 847; AVX1-NEXT: retq 848; 849; AVX2-LABEL: expandload_v16f64_v16i32: 850; AVX2: ## %bb.0: 851; AVX2-NEXT: vpxor %xmm6, %xmm6, %xmm6 852; AVX2-NEXT: vpcmpeqd %ymm6, %ymm5, %ymm5 853; AVX2-NEXT: vpcmpeqd %ymm6, %ymm4, %ymm4 854; AVX2-NEXT: vpackssdw %ymm5, %ymm4, %ymm4 855; AVX2-NEXT: vextracti128 $1, %ymm4, %xmm5 856; AVX2-NEXT: vpacksswb %xmm5, %xmm4, %xmm4 857; AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,2,1,3] 858; AVX2-NEXT: vpmovmskb %xmm4, %eax 859; AVX2-NEXT: testb $1, %al 860; AVX2-NEXT: jne LBB3_1 861; AVX2-NEXT: ## %bb.2: ## %else 862; AVX2-NEXT: testb $2, %al 863; AVX2-NEXT: jne LBB3_3 864; AVX2-NEXT: LBB3_4: ## %else2 865; AVX2-NEXT: testb $4, %al 866; AVX2-NEXT: jne LBB3_5 867; AVX2-NEXT: LBB3_6: ## %else6 868; AVX2-NEXT: testb $8, %al 869; AVX2-NEXT: jne LBB3_7 870; AVX2-NEXT: LBB3_8: ## %else10 871; AVX2-NEXT: testb $16, %al 872; AVX2-NEXT: jne LBB3_9 873; AVX2-NEXT: LBB3_10: ## %else14 874; AVX2-NEXT: testb $32, %al 875; AVX2-NEXT: jne LBB3_11 876; AVX2-NEXT: LBB3_12: ## %else18 877; AVX2-NEXT: testb $64, %al 878; AVX2-NEXT: jne LBB3_13 879; AVX2-NEXT: LBB3_14: ## %else22 880; AVX2-NEXT: testb %al, %al 881; AVX2-NEXT: js LBB3_15 882; AVX2-NEXT: LBB3_16: ## %else26 883; AVX2-NEXT: testl $256, %eax ## imm = 0x100 884; AVX2-NEXT: jne LBB3_17 885; AVX2-NEXT: LBB3_18: ## %else30 886; AVX2-NEXT: testl $512, %eax ## imm = 0x200 887; AVX2-NEXT: jne LBB3_19 888; AVX2-NEXT: LBB3_20: ## %else34 889; AVX2-NEXT: testl $1024, %eax ## imm = 0x400 890; AVX2-NEXT: jne LBB3_21 891; AVX2-NEXT: LBB3_22: ## %else38 892; AVX2-NEXT: testl $2048, %eax ## imm = 0x800 893; AVX2-NEXT: jne LBB3_23 894; AVX2-NEXT: LBB3_24: ## %else42 895; AVX2-NEXT: testl $4096, %eax ## imm = 0x1000 896; AVX2-NEXT: jne LBB3_25 897; AVX2-NEXT: LBB3_26: ## %else46 898; AVX2-NEXT: testl $8192, %eax ## imm = 0x2000 899; AVX2-NEXT: jne LBB3_27 900; AVX2-NEXT: LBB3_28: ## %else50 901; AVX2-NEXT: testl $16384, %eax ## imm = 0x4000 902; AVX2-NEXT: jne LBB3_29 903; AVX2-NEXT: LBB3_30: ## %else54 904; AVX2-NEXT: testl $32768, %eax ## imm = 0x8000 905; AVX2-NEXT: jne LBB3_31 906; AVX2-NEXT: LBB3_32: ## %else58 907; AVX2-NEXT: retq 908; AVX2-NEXT: LBB3_1: ## %cond.load 909; AVX2-NEXT: vmovq (%rdi), %xmm4 ## xmm4 = mem[0],zero 910; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm4[0,1],ymm0[2,3,4,5,6,7] 911; AVX2-NEXT: addq $8, %rdi 912; AVX2-NEXT: testb $2, %al 913; AVX2-NEXT: je LBB3_4 914; AVX2-NEXT: LBB3_3: ## %cond.load1 915; AVX2-NEXT: vmovhps (%rdi), %xmm0, %xmm4 ## xmm4 = xmm0[0,1],mem[0,1] 916; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm4[0,1,2,3],ymm0[4,5,6,7] 917; AVX2-NEXT: addq $8, %rdi 918; AVX2-NEXT: testb $4, %al 919; AVX2-NEXT: je LBB3_6 920; AVX2-NEXT: LBB3_5: ## %cond.load5 921; AVX2-NEXT: vpbroadcastq (%rdi), %ymm4 922; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm4[4,5],ymm0[6,7] 923; AVX2-NEXT: addq $8, %rdi 924; AVX2-NEXT: testb $8, %al 925; AVX2-NEXT: je LBB3_8 926; AVX2-NEXT: LBB3_7: ## %cond.load9 927; AVX2-NEXT: vpbroadcastq (%rdi), %ymm4 928; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm4[6,7] 929; AVX2-NEXT: addq $8, %rdi 930; AVX2-NEXT: testb $16, %al 931; AVX2-NEXT: je LBB3_10 932; AVX2-NEXT: LBB3_9: ## %cond.load13 933; AVX2-NEXT: vmovq (%rdi), %xmm4 ## xmm4 = mem[0],zero 934; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm4[0,1],ymm1[2,3,4,5,6,7] 935; AVX2-NEXT: addq $8, %rdi 936; AVX2-NEXT: testb $32, %al 937; AVX2-NEXT: je LBB3_12 938; AVX2-NEXT: LBB3_11: ## %cond.load17 939; AVX2-NEXT: vmovhps (%rdi), %xmm1, %xmm4 ## xmm4 = xmm1[0,1],mem[0,1] 940; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7] 941; AVX2-NEXT: addq $8, %rdi 942; AVX2-NEXT: testb $64, %al 943; AVX2-NEXT: je LBB3_14 944; AVX2-NEXT: LBB3_13: ## %cond.load21 945; AVX2-NEXT: vpbroadcastq (%rdi), %ymm4 946; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm4[4,5],ymm1[6,7] 947; AVX2-NEXT: addq $8, %rdi 948; AVX2-NEXT: testb %al, %al 949; AVX2-NEXT: jns LBB3_16 950; AVX2-NEXT: LBB3_15: ## %cond.load25 951; AVX2-NEXT: vpbroadcastq (%rdi), %ymm4 952; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm4[6,7] 953; AVX2-NEXT: addq $8, %rdi 954; AVX2-NEXT: testl $256, %eax ## imm = 0x100 955; AVX2-NEXT: je LBB3_18 956; AVX2-NEXT: LBB3_17: ## %cond.load29 957; AVX2-NEXT: vmovq (%rdi), %xmm4 ## xmm4 = mem[0],zero 958; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm4[0,1],ymm2[2,3,4,5,6,7] 959; AVX2-NEXT: addq $8, %rdi 960; AVX2-NEXT: testl $512, %eax ## imm = 0x200 961; AVX2-NEXT: je LBB3_20 962; AVX2-NEXT: LBB3_19: ## %cond.load33 963; AVX2-NEXT: vmovhps (%rdi), %xmm2, %xmm4 ## xmm4 = xmm2[0,1],mem[0,1] 964; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7] 965; AVX2-NEXT: addq $8, %rdi 966; AVX2-NEXT: testl $1024, %eax ## imm = 0x400 967; AVX2-NEXT: je LBB3_22 968; AVX2-NEXT: LBB3_21: ## %cond.load37 969; AVX2-NEXT: vpbroadcastq (%rdi), %ymm4 970; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm4[4,5],ymm2[6,7] 971; AVX2-NEXT: addq $8, %rdi 972; AVX2-NEXT: testl $2048, %eax ## imm = 0x800 973; AVX2-NEXT: je LBB3_24 974; AVX2-NEXT: LBB3_23: ## %cond.load41 975; AVX2-NEXT: vpbroadcastq (%rdi), %ymm4 976; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm4[6,7] 977; AVX2-NEXT: addq $8, %rdi 978; AVX2-NEXT: testl $4096, %eax ## imm = 0x1000 979; AVX2-NEXT: je LBB3_26 980; AVX2-NEXT: LBB3_25: ## %cond.load45 981; AVX2-NEXT: vmovq (%rdi), %xmm4 ## xmm4 = mem[0],zero 982; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1],ymm3[2,3,4,5,6,7] 983; AVX2-NEXT: addq $8, %rdi 984; AVX2-NEXT: testl $8192, %eax ## imm = 0x2000 985; AVX2-NEXT: je LBB3_28 986; AVX2-NEXT: LBB3_27: ## %cond.load49 987; AVX2-NEXT: vmovhps (%rdi), %xmm3, %xmm4 ## xmm4 = xmm3[0,1],mem[0,1] 988; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 989; AVX2-NEXT: addq $8, %rdi 990; AVX2-NEXT: testl $16384, %eax ## imm = 0x4000 991; AVX2-NEXT: je LBB3_30 992; AVX2-NEXT: LBB3_29: ## %cond.load53 993; AVX2-NEXT: vpbroadcastq (%rdi), %ymm4 994; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm3[0,1,2,3],ymm4[4,5],ymm3[6,7] 995; AVX2-NEXT: addq $8, %rdi 996; AVX2-NEXT: testl $32768, %eax ## imm = 0x8000 997; AVX2-NEXT: je LBB3_32 998; AVX2-NEXT: LBB3_31: ## %cond.load57 999; AVX2-NEXT: vpbroadcastq (%rdi), %ymm4 1000; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm3[0,1,2,3,4,5],ymm4[6,7] 1001; AVX2-NEXT: retq 1002; 1003; AVX512F-LABEL: expandload_v16f64_v16i32: 1004; AVX512F: ## %bb.0: 1005; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm3 1006; AVX512F-NEXT: vptestnmd %zmm3, %zmm3, %k1 1007; AVX512F-NEXT: vptestnmd %zmm2, %zmm2, %k2 1008; AVX512F-NEXT: vexpandpd (%rdi), %zmm0 {%k2} 1009; AVX512F-NEXT: kmovw %k2, %eax 1010; AVX512F-NEXT: movzbl %al, %eax 1011; AVX512F-NEXT: imull $134480385, %eax, %eax ## imm = 0x8040201 1012; AVX512F-NEXT: shrl $3, %eax 1013; AVX512F-NEXT: andl $286331153, %eax ## imm = 0x11111111 1014; AVX512F-NEXT: imull $286331153, %eax, %eax ## imm = 0x11111111 1015; AVX512F-NEXT: shrl $28, %eax 1016; AVX512F-NEXT: vexpandpd (%rdi,%rax,8), %zmm1 {%k1} 1017; AVX512F-NEXT: retq 1018; 1019; AVX512VLDQ-LABEL: expandload_v16f64_v16i32: 1020; AVX512VLDQ: ## %bb.0: 1021; AVX512VLDQ-NEXT: vextracti64x4 $1, %zmm2, %ymm3 1022; AVX512VLDQ-NEXT: vptestnmd %ymm3, %ymm3, %k1 1023; AVX512VLDQ-NEXT: vptestnmd %ymm2, %ymm2, %k2 1024; AVX512VLDQ-NEXT: kmovb %k2, %eax 1025; AVX512VLDQ-NEXT: imull $134480385, %eax, %eax ## imm = 0x8040201 1026; AVX512VLDQ-NEXT: shrl $3, %eax 1027; AVX512VLDQ-NEXT: andl $286331153, %eax ## imm = 0x11111111 1028; AVX512VLDQ-NEXT: imull $286331153, %eax, %eax ## imm = 0x11111111 1029; AVX512VLDQ-NEXT: shrl $28, %eax 1030; AVX512VLDQ-NEXT: vexpandpd (%rdi,%rax,8), %zmm1 {%k1} 1031; AVX512VLDQ-NEXT: vexpandpd (%rdi), %zmm0 {%k2} 1032; AVX512VLDQ-NEXT: retq 1033; 1034; AVX512VLBW-LABEL: expandload_v16f64_v16i32: 1035; AVX512VLBW: ## %bb.0: 1036; AVX512VLBW-NEXT: vextracti64x4 $1, %zmm2, %ymm3 1037; AVX512VLBW-NEXT: vptestnmd %ymm3, %ymm3, %k1 1038; AVX512VLBW-NEXT: vptestnmd %ymm2, %ymm2, %k2 1039; AVX512VLBW-NEXT: vexpandpd (%rdi), %zmm0 {%k2} 1040; AVX512VLBW-NEXT: kmovd %k2, %eax 1041; AVX512VLBW-NEXT: movzbl %al, %eax 1042; AVX512VLBW-NEXT: imull $134480385, %eax, %eax ## imm = 0x8040201 1043; AVX512VLBW-NEXT: shrl $3, %eax 1044; AVX512VLBW-NEXT: andl $286331153, %eax ## imm = 0x11111111 1045; AVX512VLBW-NEXT: imull $286331153, %eax, %eax ## imm = 0x11111111 1046; AVX512VLBW-NEXT: shrl $28, %eax 1047; AVX512VLBW-NEXT: vexpandpd (%rdi,%rax,8), %zmm1 {%k1} 1048; AVX512VLBW-NEXT: retq 1049 %mask = icmp eq <16 x i32> %trigger, zeroinitializer 1050 %res = call <16 x double> @llvm.masked.expandload.v16f64(ptr %base, <16 x i1> %mask, <16 x double> %src0) 1051 ret <16 x double> %res 1052} 1053 1054; 1055; vXf32 1056; 1057 1058define <2 x float> @expandload_v2f32_v2i1(ptr %base, <2 x float> %src0, <2 x i32> %trigger) { 1059; SSE2-LABEL: expandload_v2f32_v2i1: 1060; SSE2: ## %bb.0: 1061; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 1062; SSE2-NEXT: pxor %xmm2, %xmm2 1063; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 1064; SSE2-NEXT: movmskpd %xmm2, %eax 1065; SSE2-NEXT: testb $1, %al 1066; SSE2-NEXT: jne LBB4_1 1067; SSE2-NEXT: ## %bb.2: ## %else 1068; SSE2-NEXT: testb $2, %al 1069; SSE2-NEXT: jne LBB4_3 1070; SSE2-NEXT: LBB4_4: ## %else2 1071; SSE2-NEXT: retq 1072; SSE2-NEXT: LBB4_1: ## %cond.load 1073; SSE2-NEXT: movss (%rdi), %xmm1 ## xmm1 = mem[0],zero,zero,zero 1074; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1075; SSE2-NEXT: addq $4, %rdi 1076; SSE2-NEXT: testb $2, %al 1077; SSE2-NEXT: je LBB4_4 1078; SSE2-NEXT: LBB4_3: ## %cond.load1 1079; SSE2-NEXT: movss (%rdi), %xmm1 ## xmm1 = mem[0],zero,zero,zero 1080; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 1081; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 1082; SSE2-NEXT: movaps %xmm1, %xmm0 1083; SSE2-NEXT: retq 1084; 1085; SSE42-LABEL: expandload_v2f32_v2i1: 1086; SSE42: ## %bb.0: 1087; SSE42-NEXT: pxor %xmm2, %xmm2 1088; SSE42-NEXT: pcmpeqd %xmm1, %xmm2 1089; SSE42-NEXT: pmovsxdq %xmm2, %xmm1 1090; SSE42-NEXT: movmskpd %xmm1, %eax 1091; SSE42-NEXT: testb $1, %al 1092; SSE42-NEXT: jne LBB4_1 1093; SSE42-NEXT: ## %bb.2: ## %else 1094; SSE42-NEXT: testb $2, %al 1095; SSE42-NEXT: jne LBB4_3 1096; SSE42-NEXT: LBB4_4: ## %else2 1097; SSE42-NEXT: retq 1098; SSE42-NEXT: LBB4_1: ## %cond.load 1099; SSE42-NEXT: movss (%rdi), %xmm1 ## xmm1 = mem[0],zero,zero,zero 1100; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1101; SSE42-NEXT: addq $4, %rdi 1102; SSE42-NEXT: testb $2, %al 1103; SSE42-NEXT: je LBB4_4 1104; SSE42-NEXT: LBB4_3: ## %cond.load1 1105; SSE42-NEXT: insertps $16, (%rdi), %xmm0 ## xmm0 = xmm0[0],mem[0],xmm0[2,3] 1106; SSE42-NEXT: retq 1107; 1108; AVX1OR2-LABEL: expandload_v2f32_v2i1: 1109; AVX1OR2: ## %bb.0: 1110; AVX1OR2-NEXT: vpxor %xmm2, %xmm2, %xmm2 1111; AVX1OR2-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1 1112; AVX1OR2-NEXT: vpmovsxdq %xmm1, %xmm1 1113; AVX1OR2-NEXT: vmovmskpd %xmm1, %eax 1114; AVX1OR2-NEXT: testb $1, %al 1115; AVX1OR2-NEXT: jne LBB4_1 1116; AVX1OR2-NEXT: ## %bb.2: ## %else 1117; AVX1OR2-NEXT: testb $2, %al 1118; AVX1OR2-NEXT: jne LBB4_3 1119; AVX1OR2-NEXT: LBB4_4: ## %else2 1120; AVX1OR2-NEXT: retq 1121; AVX1OR2-NEXT: LBB4_1: ## %cond.load 1122; AVX1OR2-NEXT: vmovss (%rdi), %xmm1 ## xmm1 = mem[0],zero,zero,zero 1123; AVX1OR2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1124; AVX1OR2-NEXT: addq $4, %rdi 1125; AVX1OR2-NEXT: testb $2, %al 1126; AVX1OR2-NEXT: je LBB4_4 1127; AVX1OR2-NEXT: LBB4_3: ## %cond.load1 1128; AVX1OR2-NEXT: vinsertps $16, (%rdi), %xmm0, %xmm0 ## xmm0 = xmm0[0],mem[0],xmm0[2,3] 1129; AVX1OR2-NEXT: retq 1130; 1131; AVX512F-LABEL: expandload_v2f32_v2i1: 1132; AVX512F: ## %bb.0: 1133; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 1134; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 1135; AVX512F-NEXT: vptestnmd %zmm1, %zmm1, %k0 1136; AVX512F-NEXT: kshiftlw $14, %k0, %k0 1137; AVX512F-NEXT: kshiftrw $14, %k0, %k1 1138; AVX512F-NEXT: vexpandps (%rdi), %zmm0 {%k1} 1139; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 1140; AVX512F-NEXT: vzeroupper 1141; AVX512F-NEXT: retq 1142; 1143; AVX512VLDQ-LABEL: expandload_v2f32_v2i1: 1144; AVX512VLDQ: ## %bb.0: 1145; AVX512VLDQ-NEXT: vptestnmd %xmm1, %xmm1, %k0 1146; AVX512VLDQ-NEXT: kshiftlb $6, %k0, %k0 1147; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k1 1148; AVX512VLDQ-NEXT: vexpandps (%rdi), %xmm0 {%k1} 1149; AVX512VLDQ-NEXT: retq 1150; 1151; AVX512VLBW-LABEL: expandload_v2f32_v2i1: 1152; AVX512VLBW: ## %bb.0: 1153; AVX512VLBW-NEXT: vptestnmd %xmm1, %xmm1, %k0 1154; AVX512VLBW-NEXT: kshiftlw $14, %k0, %k0 1155; AVX512VLBW-NEXT: kshiftrw $14, %k0, %k1 1156; AVX512VLBW-NEXT: vexpandps (%rdi), %xmm0 {%k1} 1157; AVX512VLBW-NEXT: retq 1158 %mask = icmp eq <2 x i32> %trigger, zeroinitializer 1159 %res = call <2 x float> @llvm.masked.expandload.v2f32(ptr %base, <2 x i1> %mask, <2 x float> %src0) 1160 ret <2 x float> %res 1161} 1162 1163define <4 x float> @expandload_v4f32_const(ptr %base, <4 x float> %src0) { 1164; SSE2-LABEL: expandload_v4f32_const: 1165; SSE2: ## %bb.0: 1166; SSE2-NEXT: movsd (%rdi), %xmm1 ## xmm1 = mem[0],zero 1167; SSE2-NEXT: movss 8(%rdi), %xmm2 ## xmm2 = mem[0],zero,zero,zero 1168; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1],xmm2[0,3] 1169; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 1170; SSE2-NEXT: movaps %xmm1, %xmm0 1171; SSE2-NEXT: retq 1172; 1173; SSE42-LABEL: expandload_v4f32_const: 1174; SSE42: ## %bb.0: 1175; SSE42-NEXT: movsd (%rdi), %xmm1 ## xmm1 = mem[0],zero 1176; SSE42-NEXT: insertps $32, 8(%rdi), %xmm1 ## xmm1 = xmm1[0,1],mem[0],xmm1[3] 1177; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 1178; SSE42-NEXT: retq 1179; 1180; AVX1OR2-LABEL: expandload_v4f32_const: 1181; AVX1OR2: ## %bb.0: 1182; AVX1OR2-NEXT: vmovsd (%rdi), %xmm1 ## xmm1 = mem[0],zero 1183; AVX1OR2-NEXT: vinsertps $32, 8(%rdi), %xmm1, %xmm1 ## xmm1 = xmm1[0,1],mem[0],xmm1[3] 1184; AVX1OR2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 1185; AVX1OR2-NEXT: retq 1186; 1187; AVX512F-LABEL: expandload_v4f32_const: 1188; AVX512F: ## %bb.0: 1189; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 1190; AVX512F-NEXT: movw $7, %ax 1191; AVX512F-NEXT: kmovw %eax, %k1 1192; AVX512F-NEXT: vexpandps (%rdi), %zmm0 {%k1} 1193; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 1194; AVX512F-NEXT: vzeroupper 1195; AVX512F-NEXT: retq 1196; 1197; AVX512VLDQ-LABEL: expandload_v4f32_const: 1198; AVX512VLDQ: ## %bb.0: 1199; AVX512VLDQ-NEXT: movb $7, %al 1200; AVX512VLDQ-NEXT: kmovw %eax, %k1 1201; AVX512VLDQ-NEXT: vexpandps (%rdi), %xmm0 {%k1} 1202; AVX512VLDQ-NEXT: retq 1203; 1204; AVX512VLBW-LABEL: expandload_v4f32_const: 1205; AVX512VLBW: ## %bb.0: 1206; AVX512VLBW-NEXT: movb $7, %al 1207; AVX512VLBW-NEXT: kmovd %eax, %k1 1208; AVX512VLBW-NEXT: vexpandps (%rdi), %xmm0 {%k1} 1209; AVX512VLBW-NEXT: retq 1210 %res = call <4 x float> @llvm.masked.expandload.v4f32(ptr %base, <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> %src0) 1211 ret <4 x float>%res 1212} 1213 1214define <16 x float> @expandload_v16f32_const(ptr %base, <16 x float> %src0) { 1215; SSE2-LABEL: expandload_v16f32_const: 1216; SSE2: ## %bb.0: 1217; SSE2-NEXT: movups (%rdi), %xmm0 1218; SSE2-NEXT: movups 16(%rdi), %xmm1 1219; SSE2-NEXT: movsd 44(%rdi), %xmm4 ## xmm4 = mem[0],zero 1220; SSE2-NEXT: movss 52(%rdi), %xmm6 ## xmm6 = mem[0],zero,zero,zero 1221; SSE2-NEXT: movsd 32(%rdi), %xmm5 ## xmm5 = mem[0],zero 1222; SSE2-NEXT: movss 40(%rdi), %xmm7 ## xmm7 = mem[0],zero,zero,zero 1223; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1],xmm7[0,3] 1224; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,1],xmm2[2,0] 1225; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm6[0,3] 1226; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,0] 1227; SSE2-NEXT: movaps %xmm5, %xmm2 1228; SSE2-NEXT: movaps %xmm4, %xmm3 1229; SSE2-NEXT: retq 1230; 1231; SSE42-LABEL: expandload_v16f32_const: 1232; SSE42: ## %bb.0: 1233; SSE42-NEXT: movsd 44(%rdi), %xmm4 ## xmm4 = mem[0],zero 1234; SSE42-NEXT: insertps $32, 52(%rdi), %xmm4 ## xmm4 = xmm4[0,1],mem[0],xmm4[3] 1235; SSE42-NEXT: movsd 32(%rdi), %xmm5 ## xmm5 = mem[0],zero 1236; SSE42-NEXT: insertps $32, 40(%rdi), %xmm5 ## xmm5 = xmm5[0,1],mem[0],xmm5[3] 1237; SSE42-NEXT: movups (%rdi), %xmm0 1238; SSE42-NEXT: movups 16(%rdi), %xmm1 1239; SSE42-NEXT: blendps {{.*#+}} xmm2 = xmm5[0,1,2],xmm2[3] 1240; SSE42-NEXT: blendps {{.*#+}} xmm3 = xmm4[0,1,2],xmm3[3] 1241; SSE42-NEXT: retq 1242; 1243; AVX1OR2-LABEL: expandload_v16f32_const: 1244; AVX1OR2: ## %bb.0: 1245; AVX1OR2-NEXT: vmovsd 44(%rdi), %xmm0 ## xmm0 = mem[0],zero 1246; AVX1OR2-NEXT: vinsertps $32, 52(%rdi), %xmm0, %xmm0 ## xmm0 = xmm0[0,1],mem[0],xmm0[3] 1247; AVX1OR2-NEXT: vmovsd 32(%rdi), %xmm2 ## xmm2 = mem[0],zero 1248; AVX1OR2-NEXT: vinsertps $32, 40(%rdi), %xmm2, %xmm2 ## xmm2 = xmm2[0,1],mem[0],xmm2[3] 1249; AVX1OR2-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm2 1250; AVX1OR2-NEXT: vmovups (%rdi), %ymm0 1251; AVX1OR2-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2],ymm1[3],ymm2[4,5,6],ymm1[7] 1252; AVX1OR2-NEXT: retq 1253; 1254; AVX512F-LABEL: expandload_v16f32_const: 1255; AVX512F: ## %bb.0: 1256; AVX512F-NEXT: movw $30719, %ax ## imm = 0x77FF 1257; AVX512F-NEXT: kmovw %eax, %k1 1258; AVX512F-NEXT: vexpandps (%rdi), %zmm0 {%k1} 1259; AVX512F-NEXT: retq 1260; 1261; AVX512VLDQ-LABEL: expandload_v16f32_const: 1262; AVX512VLDQ: ## %bb.0: 1263; AVX512VLDQ-NEXT: movw $30719, %ax ## imm = 0x77FF 1264; AVX512VLDQ-NEXT: kmovw %eax, %k1 1265; AVX512VLDQ-NEXT: vexpandps (%rdi), %zmm0 {%k1} 1266; AVX512VLDQ-NEXT: retq 1267; 1268; AVX512VLBW-LABEL: expandload_v16f32_const: 1269; AVX512VLBW: ## %bb.0: 1270; AVX512VLBW-NEXT: movw $30719, %ax ## imm = 0x77FF 1271; AVX512VLBW-NEXT: kmovd %eax, %k1 1272; AVX512VLBW-NEXT: vexpandps (%rdi), %zmm0 {%k1} 1273; AVX512VLBW-NEXT: retq 1274 %res = call <16 x float> @llvm.masked.expandload.v16f32(ptr %base, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x float> %src0) 1275 ret <16 x float>%res 1276} 1277 1278define <16 x float> @expandload_v16f32_const_undef(ptr %base) { 1279; SSE2-LABEL: expandload_v16f32_const_undef: 1280; SSE2: ## %bb.0: 1281; SSE2-NEXT: movss 40(%rdi), %xmm0 ## xmm0 = mem[0],zero,zero,zero 1282; SSE2-NEXT: movsd 32(%rdi), %xmm2 ## xmm2 = mem[0],zero 1283; SSE2-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0] 1284; SSE2-NEXT: movups (%rdi), %xmm0 1285; SSE2-NEXT: movups 16(%rdi), %xmm1 1286; SSE2-NEXT: movups 44(%rdi), %xmm3 1287; SSE2-NEXT: retq 1288; 1289; SSE42-LABEL: expandload_v16f32_const_undef: 1290; SSE42: ## %bb.0: 1291; SSE42-NEXT: movsd 32(%rdi), %xmm2 ## xmm2 = mem[0],zero 1292; SSE42-NEXT: insertps $32, 40(%rdi), %xmm2 ## xmm2 = xmm2[0,1],mem[0],xmm2[3] 1293; SSE42-NEXT: movups (%rdi), %xmm0 1294; SSE42-NEXT: movups 16(%rdi), %xmm1 1295; SSE42-NEXT: movups 44(%rdi), %xmm3 1296; SSE42-NEXT: retq 1297; 1298; AVX1OR2-LABEL: expandload_v16f32_const_undef: 1299; AVX1OR2: ## %bb.0: 1300; AVX1OR2-NEXT: vmovsd 32(%rdi), %xmm0 ## xmm0 = mem[0],zero 1301; AVX1OR2-NEXT: vinsertps $32, 40(%rdi), %xmm0, %xmm0 ## xmm0 = xmm0[0,1],mem[0],xmm0[3] 1302; AVX1OR2-NEXT: vinsertf128 $1, 44(%rdi), %ymm0, %ymm1 1303; AVX1OR2-NEXT: vmovups (%rdi), %ymm0 1304; AVX1OR2-NEXT: retq 1305; 1306; AVX512F-LABEL: expandload_v16f32_const_undef: 1307; AVX512F: ## %bb.0: 1308; AVX512F-NEXT: movw $-2049, %ax ## imm = 0xF7FF 1309; AVX512F-NEXT: kmovw %eax, %k1 1310; AVX512F-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z} 1311; AVX512F-NEXT: retq 1312; 1313; AVX512VLDQ-LABEL: expandload_v16f32_const_undef: 1314; AVX512VLDQ: ## %bb.0: 1315; AVX512VLDQ-NEXT: movw $-2049, %ax ## imm = 0xF7FF 1316; AVX512VLDQ-NEXT: kmovw %eax, %k1 1317; AVX512VLDQ-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z} 1318; AVX512VLDQ-NEXT: retq 1319; 1320; AVX512VLBW-LABEL: expandload_v16f32_const_undef: 1321; AVX512VLBW: ## %bb.0: 1322; AVX512VLBW-NEXT: movw $-2049, %ax ## imm = 0xF7FF 1323; AVX512VLBW-NEXT: kmovd %eax, %k1 1324; AVX512VLBW-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z} 1325; AVX512VLBW-NEXT: retq 1326 %res = call <16 x float> @llvm.masked.expandload.v16f32(ptr %base, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 1327 ret <16 x float>%res 1328} 1329 1330 1331define <32 x float> @expandload_v32f32_v32i32(ptr %base, <32 x float> %src0, <32 x i32> %trigger) { 1332; SSE2-LABEL: expandload_v32f32_v32i32: 1333; SSE2: ## %bb.0: 1334; SSE2-NEXT: movq %rdi, %rax 1335; SSE2-NEXT: pxor %xmm8, %xmm8 1336; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 1337; SSE2-NEXT: pcmpeqd %xmm8, %xmm9 1338; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10 1339; SSE2-NEXT: pcmpeqd %xmm8, %xmm10 1340; SSE2-NEXT: packssdw %xmm9, %xmm10 1341; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 1342; SSE2-NEXT: pcmpeqd %xmm8, %xmm9 1343; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11 1344; SSE2-NEXT: pcmpeqd %xmm8, %xmm11 1345; SSE2-NEXT: packssdw %xmm9, %xmm11 1346; SSE2-NEXT: packsswb %xmm10, %xmm11 1347; SSE2-NEXT: pmovmskb %xmm11, %edx 1348; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 1349; SSE2-NEXT: pcmpeqd %xmm8, %xmm9 1350; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10 1351; SSE2-NEXT: pcmpeqd %xmm8, %xmm10 1352; SSE2-NEXT: packssdw %xmm9, %xmm10 1353; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 1354; SSE2-NEXT: pcmpeqd %xmm8, %xmm9 1355; SSE2-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm8 1356; SSE2-NEXT: packssdw %xmm9, %xmm8 1357; SSE2-NEXT: packsswb %xmm10, %xmm8 1358; SSE2-NEXT: pmovmskb %xmm8, %ecx 1359; SSE2-NEXT: shll $16, %ecx 1360; SSE2-NEXT: orl %edx, %ecx 1361; SSE2-NEXT: testb $1, %cl 1362; SSE2-NEXT: jne LBB8_1 1363; SSE2-NEXT: ## %bb.2: ## %else 1364; SSE2-NEXT: testb $2, %cl 1365; SSE2-NEXT: jne LBB8_3 1366; SSE2-NEXT: LBB8_4: ## %else2 1367; SSE2-NEXT: testb $4, %cl 1368; SSE2-NEXT: jne LBB8_5 1369; SSE2-NEXT: LBB8_6: ## %else6 1370; SSE2-NEXT: testb $8, %cl 1371; SSE2-NEXT: jne LBB8_7 1372; SSE2-NEXT: LBB8_8: ## %else10 1373; SSE2-NEXT: testb $16, %cl 1374; SSE2-NEXT: jne LBB8_9 1375; SSE2-NEXT: LBB8_10: ## %else14 1376; SSE2-NEXT: testb $32, %cl 1377; SSE2-NEXT: jne LBB8_11 1378; SSE2-NEXT: LBB8_12: ## %else18 1379; SSE2-NEXT: testb $64, %cl 1380; SSE2-NEXT: jne LBB8_13 1381; SSE2-NEXT: LBB8_14: ## %else22 1382; SSE2-NEXT: testb %cl, %cl 1383; SSE2-NEXT: js LBB8_15 1384; SSE2-NEXT: LBB8_16: ## %else26 1385; SSE2-NEXT: testl $256, %ecx ## imm = 0x100 1386; SSE2-NEXT: jne LBB8_17 1387; SSE2-NEXT: LBB8_18: ## %else30 1388; SSE2-NEXT: testl $512, %ecx ## imm = 0x200 1389; SSE2-NEXT: jne LBB8_19 1390; SSE2-NEXT: LBB8_20: ## %else34 1391; SSE2-NEXT: testl $1024, %ecx ## imm = 0x400 1392; SSE2-NEXT: jne LBB8_21 1393; SSE2-NEXT: LBB8_22: ## %else38 1394; SSE2-NEXT: testl $2048, %ecx ## imm = 0x800 1395; SSE2-NEXT: jne LBB8_23 1396; SSE2-NEXT: LBB8_24: ## %else42 1397; SSE2-NEXT: testl $4096, %ecx ## imm = 0x1000 1398; SSE2-NEXT: jne LBB8_25 1399; SSE2-NEXT: LBB8_26: ## %else46 1400; SSE2-NEXT: testl $8192, %ecx ## imm = 0x2000 1401; SSE2-NEXT: jne LBB8_27 1402; SSE2-NEXT: LBB8_28: ## %else50 1403; SSE2-NEXT: testl $16384, %ecx ## imm = 0x4000 1404; SSE2-NEXT: jne LBB8_29 1405; SSE2-NEXT: LBB8_30: ## %else54 1406; SSE2-NEXT: testw %cx, %cx 1407; SSE2-NEXT: js LBB8_31 1408; SSE2-NEXT: LBB8_32: ## %else58 1409; SSE2-NEXT: testl $65536, %ecx ## imm = 0x10000 1410; SSE2-NEXT: jne LBB8_33 1411; SSE2-NEXT: LBB8_34: ## %else62 1412; SSE2-NEXT: testl $131072, %ecx ## imm = 0x20000 1413; SSE2-NEXT: jne LBB8_35 1414; SSE2-NEXT: LBB8_36: ## %else66 1415; SSE2-NEXT: testl $262144, %ecx ## imm = 0x40000 1416; SSE2-NEXT: jne LBB8_37 1417; SSE2-NEXT: LBB8_38: ## %else70 1418; SSE2-NEXT: testl $524288, %ecx ## imm = 0x80000 1419; SSE2-NEXT: jne LBB8_39 1420; SSE2-NEXT: LBB8_40: ## %else74 1421; SSE2-NEXT: testl $1048576, %ecx ## imm = 0x100000 1422; SSE2-NEXT: jne LBB8_41 1423; SSE2-NEXT: LBB8_42: ## %else78 1424; SSE2-NEXT: testl $2097152, %ecx ## imm = 0x200000 1425; SSE2-NEXT: jne LBB8_43 1426; SSE2-NEXT: LBB8_44: ## %else82 1427; SSE2-NEXT: testl $4194304, %ecx ## imm = 0x400000 1428; SSE2-NEXT: jne LBB8_45 1429; SSE2-NEXT: LBB8_46: ## %else86 1430; SSE2-NEXT: testl $8388608, %ecx ## imm = 0x800000 1431; SSE2-NEXT: jne LBB8_47 1432; SSE2-NEXT: LBB8_48: ## %else90 1433; SSE2-NEXT: testl $16777216, %ecx ## imm = 0x1000000 1434; SSE2-NEXT: jne LBB8_49 1435; SSE2-NEXT: LBB8_50: ## %else94 1436; SSE2-NEXT: testl $33554432, %ecx ## imm = 0x2000000 1437; SSE2-NEXT: jne LBB8_51 1438; SSE2-NEXT: LBB8_52: ## %else98 1439; SSE2-NEXT: testl $67108864, %ecx ## imm = 0x4000000 1440; SSE2-NEXT: jne LBB8_53 1441; SSE2-NEXT: LBB8_54: ## %else102 1442; SSE2-NEXT: testl $134217728, %ecx ## imm = 0x8000000 1443; SSE2-NEXT: jne LBB8_55 1444; SSE2-NEXT: LBB8_56: ## %else106 1445; SSE2-NEXT: testl $268435456, %ecx ## imm = 0x10000000 1446; SSE2-NEXT: jne LBB8_57 1447; SSE2-NEXT: LBB8_58: ## %else110 1448; SSE2-NEXT: testl $536870912, %ecx ## imm = 0x20000000 1449; SSE2-NEXT: jne LBB8_59 1450; SSE2-NEXT: LBB8_60: ## %else114 1451; SSE2-NEXT: testl $1073741824, %ecx ## imm = 0x40000000 1452; SSE2-NEXT: jne LBB8_61 1453; SSE2-NEXT: LBB8_62: ## %else118 1454; SSE2-NEXT: testl $-2147483648, %ecx ## imm = 0x80000000 1455; SSE2-NEXT: je LBB8_64 1456; SSE2-NEXT: LBB8_63: ## %cond.load121 1457; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1458; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,1],xmm7[2,3] 1459; SSE2-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,1],xmm8[2,0] 1460; SSE2-NEXT: LBB8_64: ## %else122 1461; SSE2-NEXT: movaps %xmm0, (%rax) 1462; SSE2-NEXT: movaps %xmm1, 16(%rax) 1463; SSE2-NEXT: movaps %xmm2, 32(%rax) 1464; SSE2-NEXT: movaps %xmm3, 48(%rax) 1465; SSE2-NEXT: movaps %xmm4, 64(%rax) 1466; SSE2-NEXT: movaps %xmm5, 80(%rax) 1467; SSE2-NEXT: movaps %xmm6, 96(%rax) 1468; SSE2-NEXT: movaps %xmm7, 112(%rax) 1469; SSE2-NEXT: retq 1470; SSE2-NEXT: LBB8_1: ## %cond.load 1471; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1472; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm8[0],xmm0[1,2,3] 1473; SSE2-NEXT: addq $4, %rsi 1474; SSE2-NEXT: testb $2, %cl 1475; SSE2-NEXT: je LBB8_4 1476; SSE2-NEXT: LBB8_3: ## %cond.load1 1477; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1478; SSE2-NEXT: movlhps {{.*#+}} xmm8 = xmm8[0],xmm0[0] 1479; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[2,0],xmm0[2,3] 1480; SSE2-NEXT: addq $4, %rsi 1481; SSE2-NEXT: movaps %xmm8, %xmm0 1482; SSE2-NEXT: testb $4, %cl 1483; SSE2-NEXT: je LBB8_6 1484; SSE2-NEXT: LBB8_5: ## %cond.load5 1485; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1486; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,0],xmm0[3,0] 1487; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm8[0,2] 1488; SSE2-NEXT: addq $4, %rsi 1489; SSE2-NEXT: testb $8, %cl 1490; SSE2-NEXT: je LBB8_8 1491; SSE2-NEXT: LBB8_7: ## %cond.load9 1492; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1493; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,1],xmm0[2,3] 1494; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm8[2,0] 1495; SSE2-NEXT: addq $4, %rsi 1496; SSE2-NEXT: testb $16, %cl 1497; SSE2-NEXT: je LBB8_10 1498; SSE2-NEXT: LBB8_9: ## %cond.load13 1499; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1500; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm8[0],xmm1[1,2,3] 1501; SSE2-NEXT: addq $4, %rsi 1502; SSE2-NEXT: testb $32, %cl 1503; SSE2-NEXT: je LBB8_12 1504; SSE2-NEXT: LBB8_11: ## %cond.load17 1505; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1506; SSE2-NEXT: movlhps {{.*#+}} xmm8 = xmm8[0],xmm1[0] 1507; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[2,0],xmm1[2,3] 1508; SSE2-NEXT: addq $4, %rsi 1509; SSE2-NEXT: movaps %xmm8, %xmm1 1510; SSE2-NEXT: testb $64, %cl 1511; SSE2-NEXT: je LBB8_14 1512; SSE2-NEXT: LBB8_13: ## %cond.load21 1513; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1514; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,0],xmm1[3,0] 1515; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm8[0,2] 1516; SSE2-NEXT: addq $4, %rsi 1517; SSE2-NEXT: testb %cl, %cl 1518; SSE2-NEXT: jns LBB8_16 1519; SSE2-NEXT: LBB8_15: ## %cond.load25 1520; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1521; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,1],xmm1[2,3] 1522; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm8[2,0] 1523; SSE2-NEXT: addq $4, %rsi 1524; SSE2-NEXT: testl $256, %ecx ## imm = 0x100 1525; SSE2-NEXT: je LBB8_18 1526; SSE2-NEXT: LBB8_17: ## %cond.load29 1527; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1528; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm8[0],xmm2[1,2,3] 1529; SSE2-NEXT: addq $4, %rsi 1530; SSE2-NEXT: testl $512, %ecx ## imm = 0x200 1531; SSE2-NEXT: je LBB8_20 1532; SSE2-NEXT: LBB8_19: ## %cond.load33 1533; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1534; SSE2-NEXT: movlhps {{.*#+}} xmm8 = xmm8[0],xmm2[0] 1535; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[2,0],xmm2[2,3] 1536; SSE2-NEXT: addq $4, %rsi 1537; SSE2-NEXT: movaps %xmm8, %xmm2 1538; SSE2-NEXT: testl $1024, %ecx ## imm = 0x400 1539; SSE2-NEXT: je LBB8_22 1540; SSE2-NEXT: LBB8_21: ## %cond.load37 1541; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1542; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,0],xmm2[3,0] 1543; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm8[0,2] 1544; SSE2-NEXT: addq $4, %rsi 1545; SSE2-NEXT: testl $2048, %ecx ## imm = 0x800 1546; SSE2-NEXT: je LBB8_24 1547; SSE2-NEXT: LBB8_23: ## %cond.load41 1548; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1549; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,1],xmm2[2,3] 1550; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm8[2,0] 1551; SSE2-NEXT: addq $4, %rsi 1552; SSE2-NEXT: testl $4096, %ecx ## imm = 0x1000 1553; SSE2-NEXT: je LBB8_26 1554; SSE2-NEXT: LBB8_25: ## %cond.load45 1555; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1556; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm8[0],xmm3[1,2,3] 1557; SSE2-NEXT: addq $4, %rsi 1558; SSE2-NEXT: testl $8192, %ecx ## imm = 0x2000 1559; SSE2-NEXT: je LBB8_28 1560; SSE2-NEXT: LBB8_27: ## %cond.load49 1561; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1562; SSE2-NEXT: movlhps {{.*#+}} xmm8 = xmm8[0],xmm3[0] 1563; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[2,0],xmm3[2,3] 1564; SSE2-NEXT: addq $4, %rsi 1565; SSE2-NEXT: movaps %xmm8, %xmm3 1566; SSE2-NEXT: testl $16384, %ecx ## imm = 0x4000 1567; SSE2-NEXT: je LBB8_30 1568; SSE2-NEXT: LBB8_29: ## %cond.load53 1569; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1570; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,0],xmm3[3,0] 1571; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm8[0,2] 1572; SSE2-NEXT: addq $4, %rsi 1573; SSE2-NEXT: testw %cx, %cx 1574; SSE2-NEXT: jns LBB8_32 1575; SSE2-NEXT: LBB8_31: ## %cond.load57 1576; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1577; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,1],xmm3[2,3] 1578; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm8[2,0] 1579; SSE2-NEXT: addq $4, %rsi 1580; SSE2-NEXT: testl $65536, %ecx ## imm = 0x10000 1581; SSE2-NEXT: je LBB8_34 1582; SSE2-NEXT: LBB8_33: ## %cond.load61 1583; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1584; SSE2-NEXT: movss {{.*#+}} xmm4 = xmm8[0],xmm4[1,2,3] 1585; SSE2-NEXT: addq $4, %rsi 1586; SSE2-NEXT: testl $131072, %ecx ## imm = 0x20000 1587; SSE2-NEXT: je LBB8_36 1588; SSE2-NEXT: LBB8_35: ## %cond.load65 1589; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1590; SSE2-NEXT: movlhps {{.*#+}} xmm8 = xmm8[0],xmm4[0] 1591; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[2,0],xmm4[2,3] 1592; SSE2-NEXT: addq $4, %rsi 1593; SSE2-NEXT: movaps %xmm8, %xmm4 1594; SSE2-NEXT: testl $262144, %ecx ## imm = 0x40000 1595; SSE2-NEXT: je LBB8_38 1596; SSE2-NEXT: LBB8_37: ## %cond.load69 1597; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1598; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,0],xmm4[3,0] 1599; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,1],xmm8[0,2] 1600; SSE2-NEXT: addq $4, %rsi 1601; SSE2-NEXT: testl $524288, %ecx ## imm = 0x80000 1602; SSE2-NEXT: je LBB8_40 1603; SSE2-NEXT: LBB8_39: ## %cond.load73 1604; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1605; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,1],xmm4[2,3] 1606; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,1],xmm8[2,0] 1607; SSE2-NEXT: addq $4, %rsi 1608; SSE2-NEXT: testl $1048576, %ecx ## imm = 0x100000 1609; SSE2-NEXT: je LBB8_42 1610; SSE2-NEXT: LBB8_41: ## %cond.load77 1611; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1612; SSE2-NEXT: movss {{.*#+}} xmm5 = xmm8[0],xmm5[1,2,3] 1613; SSE2-NEXT: addq $4, %rsi 1614; SSE2-NEXT: testl $2097152, %ecx ## imm = 0x200000 1615; SSE2-NEXT: je LBB8_44 1616; SSE2-NEXT: LBB8_43: ## %cond.load81 1617; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1618; SSE2-NEXT: movlhps {{.*#+}} xmm8 = xmm8[0],xmm5[0] 1619; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[2,0],xmm5[2,3] 1620; SSE2-NEXT: addq $4, %rsi 1621; SSE2-NEXT: movaps %xmm8, %xmm5 1622; SSE2-NEXT: testl $4194304, %ecx ## imm = 0x400000 1623; SSE2-NEXT: je LBB8_46 1624; SSE2-NEXT: LBB8_45: ## %cond.load85 1625; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1626; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,0],xmm5[3,0] 1627; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,1],xmm8[0,2] 1628; SSE2-NEXT: addq $4, %rsi 1629; SSE2-NEXT: testl $8388608, %ecx ## imm = 0x800000 1630; SSE2-NEXT: je LBB8_48 1631; SSE2-NEXT: LBB8_47: ## %cond.load89 1632; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1633; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,1],xmm5[2,3] 1634; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,1],xmm8[2,0] 1635; SSE2-NEXT: addq $4, %rsi 1636; SSE2-NEXT: testl $16777216, %ecx ## imm = 0x1000000 1637; SSE2-NEXT: je LBB8_50 1638; SSE2-NEXT: LBB8_49: ## %cond.load93 1639; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1640; SSE2-NEXT: movss {{.*#+}} xmm6 = xmm8[0],xmm6[1,2,3] 1641; SSE2-NEXT: addq $4, %rsi 1642; SSE2-NEXT: testl $33554432, %ecx ## imm = 0x2000000 1643; SSE2-NEXT: je LBB8_52 1644; SSE2-NEXT: LBB8_51: ## %cond.load97 1645; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1646; SSE2-NEXT: movlhps {{.*#+}} xmm8 = xmm8[0],xmm6[0] 1647; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[2,0],xmm6[2,3] 1648; SSE2-NEXT: addq $4, %rsi 1649; SSE2-NEXT: movaps %xmm8, %xmm6 1650; SSE2-NEXT: testl $67108864, %ecx ## imm = 0x4000000 1651; SSE2-NEXT: je LBB8_54 1652; SSE2-NEXT: LBB8_53: ## %cond.load101 1653; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1654; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,0],xmm6[3,0] 1655; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,1],xmm8[0,2] 1656; SSE2-NEXT: addq $4, %rsi 1657; SSE2-NEXT: testl $134217728, %ecx ## imm = 0x8000000 1658; SSE2-NEXT: je LBB8_56 1659; SSE2-NEXT: LBB8_55: ## %cond.load105 1660; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1661; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,1],xmm6[2,3] 1662; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,1],xmm8[2,0] 1663; SSE2-NEXT: addq $4, %rsi 1664; SSE2-NEXT: testl $268435456, %ecx ## imm = 0x10000000 1665; SSE2-NEXT: je LBB8_58 1666; SSE2-NEXT: LBB8_57: ## %cond.load109 1667; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1668; SSE2-NEXT: movss {{.*#+}} xmm7 = xmm8[0],xmm7[1,2,3] 1669; SSE2-NEXT: addq $4, %rsi 1670; SSE2-NEXT: testl $536870912, %ecx ## imm = 0x20000000 1671; SSE2-NEXT: je LBB8_60 1672; SSE2-NEXT: LBB8_59: ## %cond.load113 1673; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1674; SSE2-NEXT: movlhps {{.*#+}} xmm8 = xmm8[0],xmm7[0] 1675; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[2,0],xmm7[2,3] 1676; SSE2-NEXT: addq $4, %rsi 1677; SSE2-NEXT: movaps %xmm8, %xmm7 1678; SSE2-NEXT: testl $1073741824, %ecx ## imm = 0x40000000 1679; SSE2-NEXT: je LBB8_62 1680; SSE2-NEXT: LBB8_61: ## %cond.load117 1681; SSE2-NEXT: movss (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1682; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,0],xmm7[3,0] 1683; SSE2-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,1],xmm8[0,2] 1684; SSE2-NEXT: addq $4, %rsi 1685; SSE2-NEXT: testl $-2147483648, %ecx ## imm = 0x80000000 1686; SSE2-NEXT: jne LBB8_63 1687; SSE2-NEXT: jmp LBB8_64 1688; 1689; SSE42-LABEL: expandload_v32f32_v32i32: 1690; SSE42: ## %bb.0: 1691; SSE42-NEXT: movq %rdi, %rax 1692; SSE42-NEXT: pxor %xmm8, %xmm8 1693; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 1694; SSE42-NEXT: pcmpeqd %xmm8, %xmm9 1695; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10 1696; SSE42-NEXT: pcmpeqd %xmm8, %xmm10 1697; SSE42-NEXT: packssdw %xmm9, %xmm10 1698; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 1699; SSE42-NEXT: pcmpeqd %xmm8, %xmm9 1700; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11 1701; SSE42-NEXT: pcmpeqd %xmm8, %xmm11 1702; SSE42-NEXT: packssdw %xmm9, %xmm11 1703; SSE42-NEXT: packsswb %xmm10, %xmm11 1704; SSE42-NEXT: pmovmskb %xmm11, %edx 1705; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 1706; SSE42-NEXT: pcmpeqd %xmm8, %xmm9 1707; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10 1708; SSE42-NEXT: pcmpeqd %xmm8, %xmm10 1709; SSE42-NEXT: packssdw %xmm9, %xmm10 1710; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 1711; SSE42-NEXT: pcmpeqd %xmm8, %xmm9 1712; SSE42-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm8 1713; SSE42-NEXT: packssdw %xmm9, %xmm8 1714; SSE42-NEXT: packsswb %xmm10, %xmm8 1715; SSE42-NEXT: pmovmskb %xmm8, %ecx 1716; SSE42-NEXT: shll $16, %ecx 1717; SSE42-NEXT: orl %edx, %ecx 1718; SSE42-NEXT: testb $1, %cl 1719; SSE42-NEXT: jne LBB8_1 1720; SSE42-NEXT: ## %bb.2: ## %else 1721; SSE42-NEXT: testb $2, %cl 1722; SSE42-NEXT: jne LBB8_3 1723; SSE42-NEXT: LBB8_4: ## %else2 1724; SSE42-NEXT: testb $4, %cl 1725; SSE42-NEXT: jne LBB8_5 1726; SSE42-NEXT: LBB8_6: ## %else6 1727; SSE42-NEXT: testb $8, %cl 1728; SSE42-NEXT: jne LBB8_7 1729; SSE42-NEXT: LBB8_8: ## %else10 1730; SSE42-NEXT: testb $16, %cl 1731; SSE42-NEXT: jne LBB8_9 1732; SSE42-NEXT: LBB8_10: ## %else14 1733; SSE42-NEXT: testb $32, %cl 1734; SSE42-NEXT: jne LBB8_11 1735; SSE42-NEXT: LBB8_12: ## %else18 1736; SSE42-NEXT: testb $64, %cl 1737; SSE42-NEXT: jne LBB8_13 1738; SSE42-NEXT: LBB8_14: ## %else22 1739; SSE42-NEXT: testb %cl, %cl 1740; SSE42-NEXT: js LBB8_15 1741; SSE42-NEXT: LBB8_16: ## %else26 1742; SSE42-NEXT: testl $256, %ecx ## imm = 0x100 1743; SSE42-NEXT: jne LBB8_17 1744; SSE42-NEXT: LBB8_18: ## %else30 1745; SSE42-NEXT: testl $512, %ecx ## imm = 0x200 1746; SSE42-NEXT: jne LBB8_19 1747; SSE42-NEXT: LBB8_20: ## %else34 1748; SSE42-NEXT: testl $1024, %ecx ## imm = 0x400 1749; SSE42-NEXT: jne LBB8_21 1750; SSE42-NEXT: LBB8_22: ## %else38 1751; SSE42-NEXT: testl $2048, %ecx ## imm = 0x800 1752; SSE42-NEXT: jne LBB8_23 1753; SSE42-NEXT: LBB8_24: ## %else42 1754; SSE42-NEXT: testl $4096, %ecx ## imm = 0x1000 1755; SSE42-NEXT: jne LBB8_25 1756; SSE42-NEXT: LBB8_26: ## %else46 1757; SSE42-NEXT: testl $8192, %ecx ## imm = 0x2000 1758; SSE42-NEXT: jne LBB8_27 1759; SSE42-NEXT: LBB8_28: ## %else50 1760; SSE42-NEXT: testl $16384, %ecx ## imm = 0x4000 1761; SSE42-NEXT: jne LBB8_29 1762; SSE42-NEXT: LBB8_30: ## %else54 1763; SSE42-NEXT: testw %cx, %cx 1764; SSE42-NEXT: js LBB8_31 1765; SSE42-NEXT: LBB8_32: ## %else58 1766; SSE42-NEXT: testl $65536, %ecx ## imm = 0x10000 1767; SSE42-NEXT: jne LBB8_33 1768; SSE42-NEXT: LBB8_34: ## %else62 1769; SSE42-NEXT: testl $131072, %ecx ## imm = 0x20000 1770; SSE42-NEXT: jne LBB8_35 1771; SSE42-NEXT: LBB8_36: ## %else66 1772; SSE42-NEXT: testl $262144, %ecx ## imm = 0x40000 1773; SSE42-NEXT: jne LBB8_37 1774; SSE42-NEXT: LBB8_38: ## %else70 1775; SSE42-NEXT: testl $524288, %ecx ## imm = 0x80000 1776; SSE42-NEXT: jne LBB8_39 1777; SSE42-NEXT: LBB8_40: ## %else74 1778; SSE42-NEXT: testl $1048576, %ecx ## imm = 0x100000 1779; SSE42-NEXT: jne LBB8_41 1780; SSE42-NEXT: LBB8_42: ## %else78 1781; SSE42-NEXT: testl $2097152, %ecx ## imm = 0x200000 1782; SSE42-NEXT: jne LBB8_43 1783; SSE42-NEXT: LBB8_44: ## %else82 1784; SSE42-NEXT: testl $4194304, %ecx ## imm = 0x400000 1785; SSE42-NEXT: jne LBB8_45 1786; SSE42-NEXT: LBB8_46: ## %else86 1787; SSE42-NEXT: testl $8388608, %ecx ## imm = 0x800000 1788; SSE42-NEXT: jne LBB8_47 1789; SSE42-NEXT: LBB8_48: ## %else90 1790; SSE42-NEXT: testl $16777216, %ecx ## imm = 0x1000000 1791; SSE42-NEXT: jne LBB8_49 1792; SSE42-NEXT: LBB8_50: ## %else94 1793; SSE42-NEXT: testl $33554432, %ecx ## imm = 0x2000000 1794; SSE42-NEXT: jne LBB8_51 1795; SSE42-NEXT: LBB8_52: ## %else98 1796; SSE42-NEXT: testl $67108864, %ecx ## imm = 0x4000000 1797; SSE42-NEXT: jne LBB8_53 1798; SSE42-NEXT: LBB8_54: ## %else102 1799; SSE42-NEXT: testl $134217728, %ecx ## imm = 0x8000000 1800; SSE42-NEXT: jne LBB8_55 1801; SSE42-NEXT: LBB8_56: ## %else106 1802; SSE42-NEXT: testl $268435456, %ecx ## imm = 0x10000000 1803; SSE42-NEXT: jne LBB8_57 1804; SSE42-NEXT: LBB8_58: ## %else110 1805; SSE42-NEXT: testl $536870912, %ecx ## imm = 0x20000000 1806; SSE42-NEXT: jne LBB8_59 1807; SSE42-NEXT: LBB8_60: ## %else114 1808; SSE42-NEXT: testl $1073741824, %ecx ## imm = 0x40000000 1809; SSE42-NEXT: jne LBB8_61 1810; SSE42-NEXT: LBB8_62: ## %else118 1811; SSE42-NEXT: testl $-2147483648, %ecx ## imm = 0x80000000 1812; SSE42-NEXT: je LBB8_64 1813; SSE42-NEXT: LBB8_63: ## %cond.load121 1814; SSE42-NEXT: insertps $48, (%rsi), %xmm7 ## xmm7 = xmm7[0,1,2],mem[0] 1815; SSE42-NEXT: LBB8_64: ## %else122 1816; SSE42-NEXT: movaps %xmm0, (%rax) 1817; SSE42-NEXT: movaps %xmm1, 16(%rax) 1818; SSE42-NEXT: movaps %xmm2, 32(%rax) 1819; SSE42-NEXT: movaps %xmm3, 48(%rax) 1820; SSE42-NEXT: movaps %xmm4, 64(%rax) 1821; SSE42-NEXT: movaps %xmm5, 80(%rax) 1822; SSE42-NEXT: movaps %xmm6, 96(%rax) 1823; SSE42-NEXT: movaps %xmm7, 112(%rax) 1824; SSE42-NEXT: retq 1825; SSE42-NEXT: LBB8_1: ## %cond.load 1826; SSE42-NEXT: movd (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1827; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm8[0,1],xmm0[2,3,4,5,6,7] 1828; SSE42-NEXT: addq $4, %rsi 1829; SSE42-NEXT: testb $2, %cl 1830; SSE42-NEXT: je LBB8_4 1831; SSE42-NEXT: LBB8_3: ## %cond.load1 1832; SSE42-NEXT: insertps $16, (%rsi), %xmm0 ## xmm0 = xmm0[0],mem[0],xmm0[2,3] 1833; SSE42-NEXT: addq $4, %rsi 1834; SSE42-NEXT: testb $4, %cl 1835; SSE42-NEXT: je LBB8_6 1836; SSE42-NEXT: LBB8_5: ## %cond.load5 1837; SSE42-NEXT: insertps $32, (%rsi), %xmm0 ## xmm0 = xmm0[0,1],mem[0],xmm0[3] 1838; SSE42-NEXT: addq $4, %rsi 1839; SSE42-NEXT: testb $8, %cl 1840; SSE42-NEXT: je LBB8_8 1841; SSE42-NEXT: LBB8_7: ## %cond.load9 1842; SSE42-NEXT: insertps $48, (%rsi), %xmm0 ## xmm0 = xmm0[0,1,2],mem[0] 1843; SSE42-NEXT: addq $4, %rsi 1844; SSE42-NEXT: testb $16, %cl 1845; SSE42-NEXT: je LBB8_10 1846; SSE42-NEXT: LBB8_9: ## %cond.load13 1847; SSE42-NEXT: movd (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1848; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm8[0,1],xmm1[2,3,4,5,6,7] 1849; SSE42-NEXT: addq $4, %rsi 1850; SSE42-NEXT: testb $32, %cl 1851; SSE42-NEXT: je LBB8_12 1852; SSE42-NEXT: LBB8_11: ## %cond.load17 1853; SSE42-NEXT: insertps $16, (%rsi), %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[2,3] 1854; SSE42-NEXT: addq $4, %rsi 1855; SSE42-NEXT: testb $64, %cl 1856; SSE42-NEXT: je LBB8_14 1857; SSE42-NEXT: LBB8_13: ## %cond.load21 1858; SSE42-NEXT: insertps $32, (%rsi), %xmm1 ## xmm1 = xmm1[0,1],mem[0],xmm1[3] 1859; SSE42-NEXT: addq $4, %rsi 1860; SSE42-NEXT: testb %cl, %cl 1861; SSE42-NEXT: jns LBB8_16 1862; SSE42-NEXT: LBB8_15: ## %cond.load25 1863; SSE42-NEXT: insertps $48, (%rsi), %xmm1 ## xmm1 = xmm1[0,1,2],mem[0] 1864; SSE42-NEXT: addq $4, %rsi 1865; SSE42-NEXT: testl $256, %ecx ## imm = 0x100 1866; SSE42-NEXT: je LBB8_18 1867; SSE42-NEXT: LBB8_17: ## %cond.load29 1868; SSE42-NEXT: movd (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1869; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm8[0,1],xmm2[2,3,4,5,6,7] 1870; SSE42-NEXT: addq $4, %rsi 1871; SSE42-NEXT: testl $512, %ecx ## imm = 0x200 1872; SSE42-NEXT: je LBB8_20 1873; SSE42-NEXT: LBB8_19: ## %cond.load33 1874; SSE42-NEXT: insertps $16, (%rsi), %xmm2 ## xmm2 = xmm2[0],mem[0],xmm2[2,3] 1875; SSE42-NEXT: addq $4, %rsi 1876; SSE42-NEXT: testl $1024, %ecx ## imm = 0x400 1877; SSE42-NEXT: je LBB8_22 1878; SSE42-NEXT: LBB8_21: ## %cond.load37 1879; SSE42-NEXT: insertps $32, (%rsi), %xmm2 ## xmm2 = xmm2[0,1],mem[0],xmm2[3] 1880; SSE42-NEXT: addq $4, %rsi 1881; SSE42-NEXT: testl $2048, %ecx ## imm = 0x800 1882; SSE42-NEXT: je LBB8_24 1883; SSE42-NEXT: LBB8_23: ## %cond.load41 1884; SSE42-NEXT: insertps $48, (%rsi), %xmm2 ## xmm2 = xmm2[0,1,2],mem[0] 1885; SSE42-NEXT: addq $4, %rsi 1886; SSE42-NEXT: testl $4096, %ecx ## imm = 0x1000 1887; SSE42-NEXT: je LBB8_26 1888; SSE42-NEXT: LBB8_25: ## %cond.load45 1889; SSE42-NEXT: movd (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1890; SSE42-NEXT: pblendw {{.*#+}} xmm3 = xmm8[0,1],xmm3[2,3,4,5,6,7] 1891; SSE42-NEXT: addq $4, %rsi 1892; SSE42-NEXT: testl $8192, %ecx ## imm = 0x2000 1893; SSE42-NEXT: je LBB8_28 1894; SSE42-NEXT: LBB8_27: ## %cond.load49 1895; SSE42-NEXT: insertps $16, (%rsi), %xmm3 ## xmm3 = xmm3[0],mem[0],xmm3[2,3] 1896; SSE42-NEXT: addq $4, %rsi 1897; SSE42-NEXT: testl $16384, %ecx ## imm = 0x4000 1898; SSE42-NEXT: je LBB8_30 1899; SSE42-NEXT: LBB8_29: ## %cond.load53 1900; SSE42-NEXT: insertps $32, (%rsi), %xmm3 ## xmm3 = xmm3[0,1],mem[0],xmm3[3] 1901; SSE42-NEXT: addq $4, %rsi 1902; SSE42-NEXT: testw %cx, %cx 1903; SSE42-NEXT: jns LBB8_32 1904; SSE42-NEXT: LBB8_31: ## %cond.load57 1905; SSE42-NEXT: insertps $48, (%rsi), %xmm3 ## xmm3 = xmm3[0,1,2],mem[0] 1906; SSE42-NEXT: addq $4, %rsi 1907; SSE42-NEXT: testl $65536, %ecx ## imm = 0x10000 1908; SSE42-NEXT: je LBB8_34 1909; SSE42-NEXT: LBB8_33: ## %cond.load61 1910; SSE42-NEXT: movd (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1911; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm8[0,1],xmm4[2,3,4,5,6,7] 1912; SSE42-NEXT: addq $4, %rsi 1913; SSE42-NEXT: testl $131072, %ecx ## imm = 0x20000 1914; SSE42-NEXT: je LBB8_36 1915; SSE42-NEXT: LBB8_35: ## %cond.load65 1916; SSE42-NEXT: insertps $16, (%rsi), %xmm4 ## xmm4 = xmm4[0],mem[0],xmm4[2,3] 1917; SSE42-NEXT: addq $4, %rsi 1918; SSE42-NEXT: testl $262144, %ecx ## imm = 0x40000 1919; SSE42-NEXT: je LBB8_38 1920; SSE42-NEXT: LBB8_37: ## %cond.load69 1921; SSE42-NEXT: insertps $32, (%rsi), %xmm4 ## xmm4 = xmm4[0,1],mem[0],xmm4[3] 1922; SSE42-NEXT: addq $4, %rsi 1923; SSE42-NEXT: testl $524288, %ecx ## imm = 0x80000 1924; SSE42-NEXT: je LBB8_40 1925; SSE42-NEXT: LBB8_39: ## %cond.load73 1926; SSE42-NEXT: insertps $48, (%rsi), %xmm4 ## xmm4 = xmm4[0,1,2],mem[0] 1927; SSE42-NEXT: addq $4, %rsi 1928; SSE42-NEXT: testl $1048576, %ecx ## imm = 0x100000 1929; SSE42-NEXT: je LBB8_42 1930; SSE42-NEXT: LBB8_41: ## %cond.load77 1931; SSE42-NEXT: movd (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1932; SSE42-NEXT: pblendw {{.*#+}} xmm5 = xmm8[0,1],xmm5[2,3,4,5,6,7] 1933; SSE42-NEXT: addq $4, %rsi 1934; SSE42-NEXT: testl $2097152, %ecx ## imm = 0x200000 1935; SSE42-NEXT: je LBB8_44 1936; SSE42-NEXT: LBB8_43: ## %cond.load81 1937; SSE42-NEXT: insertps $16, (%rsi), %xmm5 ## xmm5 = xmm5[0],mem[0],xmm5[2,3] 1938; SSE42-NEXT: addq $4, %rsi 1939; SSE42-NEXT: testl $4194304, %ecx ## imm = 0x400000 1940; SSE42-NEXT: je LBB8_46 1941; SSE42-NEXT: LBB8_45: ## %cond.load85 1942; SSE42-NEXT: insertps $32, (%rsi), %xmm5 ## xmm5 = xmm5[0,1],mem[0],xmm5[3] 1943; SSE42-NEXT: addq $4, %rsi 1944; SSE42-NEXT: testl $8388608, %ecx ## imm = 0x800000 1945; SSE42-NEXT: je LBB8_48 1946; SSE42-NEXT: LBB8_47: ## %cond.load89 1947; SSE42-NEXT: insertps $48, (%rsi), %xmm5 ## xmm5 = xmm5[0,1,2],mem[0] 1948; SSE42-NEXT: addq $4, %rsi 1949; SSE42-NEXT: testl $16777216, %ecx ## imm = 0x1000000 1950; SSE42-NEXT: je LBB8_50 1951; SSE42-NEXT: LBB8_49: ## %cond.load93 1952; SSE42-NEXT: movd (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1953; SSE42-NEXT: pblendw {{.*#+}} xmm6 = xmm8[0,1],xmm6[2,3,4,5,6,7] 1954; SSE42-NEXT: addq $4, %rsi 1955; SSE42-NEXT: testl $33554432, %ecx ## imm = 0x2000000 1956; SSE42-NEXT: je LBB8_52 1957; SSE42-NEXT: LBB8_51: ## %cond.load97 1958; SSE42-NEXT: insertps $16, (%rsi), %xmm6 ## xmm6 = xmm6[0],mem[0],xmm6[2,3] 1959; SSE42-NEXT: addq $4, %rsi 1960; SSE42-NEXT: testl $67108864, %ecx ## imm = 0x4000000 1961; SSE42-NEXT: je LBB8_54 1962; SSE42-NEXT: LBB8_53: ## %cond.load101 1963; SSE42-NEXT: insertps $32, (%rsi), %xmm6 ## xmm6 = xmm6[0,1],mem[0],xmm6[3] 1964; SSE42-NEXT: addq $4, %rsi 1965; SSE42-NEXT: testl $134217728, %ecx ## imm = 0x8000000 1966; SSE42-NEXT: je LBB8_56 1967; SSE42-NEXT: LBB8_55: ## %cond.load105 1968; SSE42-NEXT: insertps $48, (%rsi), %xmm6 ## xmm6 = xmm6[0,1,2],mem[0] 1969; SSE42-NEXT: addq $4, %rsi 1970; SSE42-NEXT: testl $268435456, %ecx ## imm = 0x10000000 1971; SSE42-NEXT: je LBB8_58 1972; SSE42-NEXT: LBB8_57: ## %cond.load109 1973; SSE42-NEXT: movd (%rsi), %xmm8 ## xmm8 = mem[0],zero,zero,zero 1974; SSE42-NEXT: pblendw {{.*#+}} xmm7 = xmm8[0,1],xmm7[2,3,4,5,6,7] 1975; SSE42-NEXT: addq $4, %rsi 1976; SSE42-NEXT: testl $536870912, %ecx ## imm = 0x20000000 1977; SSE42-NEXT: je LBB8_60 1978; SSE42-NEXT: LBB8_59: ## %cond.load113 1979; SSE42-NEXT: insertps $16, (%rsi), %xmm7 ## xmm7 = xmm7[0],mem[0],xmm7[2,3] 1980; SSE42-NEXT: addq $4, %rsi 1981; SSE42-NEXT: testl $1073741824, %ecx ## imm = 0x40000000 1982; SSE42-NEXT: je LBB8_62 1983; SSE42-NEXT: LBB8_61: ## %cond.load117 1984; SSE42-NEXT: insertps $32, (%rsi), %xmm7 ## xmm7 = xmm7[0,1],mem[0],xmm7[3] 1985; SSE42-NEXT: addq $4, %rsi 1986; SSE42-NEXT: testl $-2147483648, %ecx ## imm = 0x80000000 1987; SSE42-NEXT: jne LBB8_63 1988; SSE42-NEXT: jmp LBB8_64 1989; 1990; AVX1-LABEL: expandload_v32f32_v32i32: 1991; AVX1: ## %bb.0: 1992; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm8 1993; AVX1-NEXT: vpxor %xmm9, %xmm9, %xmm9 1994; AVX1-NEXT: vpcmpeqd %xmm9, %xmm8, %xmm8 1995; AVX1-NEXT: vpcmpeqd %xmm5, %xmm9, %xmm5 1996; AVX1-NEXT: vpackssdw %xmm8, %xmm5, %xmm5 1997; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm8 1998; AVX1-NEXT: vpcmpeqd %xmm9, %xmm8, %xmm8 1999; AVX1-NEXT: vpcmpeqd %xmm4, %xmm9, %xmm4 2000; AVX1-NEXT: vpackssdw %xmm8, %xmm4, %xmm4 2001; AVX1-NEXT: vpacksswb %xmm5, %xmm4, %xmm4 2002; AVX1-NEXT: vpmovmskb %xmm4, %ecx 2003; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm4 2004; AVX1-NEXT: vpcmpeqd %xmm4, %xmm9, %xmm4 2005; AVX1-NEXT: vpcmpeqd %xmm7, %xmm9, %xmm5 2006; AVX1-NEXT: vpackssdw %xmm4, %xmm5, %xmm4 2007; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm5 2008; AVX1-NEXT: vpcmpeqd %xmm5, %xmm9, %xmm5 2009; AVX1-NEXT: vpcmpeqd %xmm6, %xmm9, %xmm6 2010; AVX1-NEXT: vpackssdw %xmm5, %xmm6, %xmm5 2011; AVX1-NEXT: vpacksswb %xmm4, %xmm5, %xmm4 2012; AVX1-NEXT: vpmovmskb %xmm4, %eax 2013; AVX1-NEXT: shll $16, %eax 2014; AVX1-NEXT: orl %ecx, %eax 2015; AVX1-NEXT: testb $1, %al 2016; AVX1-NEXT: jne LBB8_1 2017; AVX1-NEXT: ## %bb.2: ## %else 2018; AVX1-NEXT: testb $2, %al 2019; AVX1-NEXT: jne LBB8_3 2020; AVX1-NEXT: LBB8_4: ## %else2 2021; AVX1-NEXT: testb $4, %al 2022; AVX1-NEXT: jne LBB8_5 2023; AVX1-NEXT: LBB8_6: ## %else6 2024; AVX1-NEXT: testb $8, %al 2025; AVX1-NEXT: jne LBB8_7 2026; AVX1-NEXT: LBB8_8: ## %else10 2027; AVX1-NEXT: testb $16, %al 2028; AVX1-NEXT: jne LBB8_9 2029; AVX1-NEXT: LBB8_10: ## %else14 2030; AVX1-NEXT: testb $32, %al 2031; AVX1-NEXT: jne LBB8_11 2032; AVX1-NEXT: LBB8_12: ## %else18 2033; AVX1-NEXT: testb $64, %al 2034; AVX1-NEXT: jne LBB8_13 2035; AVX1-NEXT: LBB8_14: ## %else22 2036; AVX1-NEXT: testb %al, %al 2037; AVX1-NEXT: js LBB8_15 2038; AVX1-NEXT: LBB8_16: ## %else26 2039; AVX1-NEXT: testl $256, %eax ## imm = 0x100 2040; AVX1-NEXT: jne LBB8_17 2041; AVX1-NEXT: LBB8_18: ## %else30 2042; AVX1-NEXT: testl $512, %eax ## imm = 0x200 2043; AVX1-NEXT: jne LBB8_19 2044; AVX1-NEXT: LBB8_20: ## %else34 2045; AVX1-NEXT: testl $1024, %eax ## imm = 0x400 2046; AVX1-NEXT: jne LBB8_21 2047; AVX1-NEXT: LBB8_22: ## %else38 2048; AVX1-NEXT: testl $2048, %eax ## imm = 0x800 2049; AVX1-NEXT: jne LBB8_23 2050; AVX1-NEXT: LBB8_24: ## %else42 2051; AVX1-NEXT: testl $4096, %eax ## imm = 0x1000 2052; AVX1-NEXT: jne LBB8_25 2053; AVX1-NEXT: LBB8_26: ## %else46 2054; AVX1-NEXT: testl $8192, %eax ## imm = 0x2000 2055; AVX1-NEXT: jne LBB8_27 2056; AVX1-NEXT: LBB8_28: ## %else50 2057; AVX1-NEXT: testl $16384, %eax ## imm = 0x4000 2058; AVX1-NEXT: jne LBB8_29 2059; AVX1-NEXT: LBB8_30: ## %else54 2060; AVX1-NEXT: testw %ax, %ax 2061; AVX1-NEXT: js LBB8_31 2062; AVX1-NEXT: LBB8_32: ## %else58 2063; AVX1-NEXT: testl $65536, %eax ## imm = 0x10000 2064; AVX1-NEXT: jne LBB8_33 2065; AVX1-NEXT: LBB8_34: ## %else62 2066; AVX1-NEXT: testl $131072, %eax ## imm = 0x20000 2067; AVX1-NEXT: jne LBB8_35 2068; AVX1-NEXT: LBB8_36: ## %else66 2069; AVX1-NEXT: testl $262144, %eax ## imm = 0x40000 2070; AVX1-NEXT: jne LBB8_37 2071; AVX1-NEXT: LBB8_38: ## %else70 2072; AVX1-NEXT: testl $524288, %eax ## imm = 0x80000 2073; AVX1-NEXT: jne LBB8_39 2074; AVX1-NEXT: LBB8_40: ## %else74 2075; AVX1-NEXT: testl $1048576, %eax ## imm = 0x100000 2076; AVX1-NEXT: jne LBB8_41 2077; AVX1-NEXT: LBB8_42: ## %else78 2078; AVX1-NEXT: testl $2097152, %eax ## imm = 0x200000 2079; AVX1-NEXT: jne LBB8_43 2080; AVX1-NEXT: LBB8_44: ## %else82 2081; AVX1-NEXT: testl $4194304, %eax ## imm = 0x400000 2082; AVX1-NEXT: jne LBB8_45 2083; AVX1-NEXT: LBB8_46: ## %else86 2084; AVX1-NEXT: testl $8388608, %eax ## imm = 0x800000 2085; AVX1-NEXT: jne LBB8_47 2086; AVX1-NEXT: LBB8_48: ## %else90 2087; AVX1-NEXT: testl $16777216, %eax ## imm = 0x1000000 2088; AVX1-NEXT: jne LBB8_49 2089; AVX1-NEXT: LBB8_50: ## %else94 2090; AVX1-NEXT: testl $33554432, %eax ## imm = 0x2000000 2091; AVX1-NEXT: jne LBB8_51 2092; AVX1-NEXT: LBB8_52: ## %else98 2093; AVX1-NEXT: testl $67108864, %eax ## imm = 0x4000000 2094; AVX1-NEXT: jne LBB8_53 2095; AVX1-NEXT: LBB8_54: ## %else102 2096; AVX1-NEXT: testl $134217728, %eax ## imm = 0x8000000 2097; AVX1-NEXT: jne LBB8_55 2098; AVX1-NEXT: LBB8_56: ## %else106 2099; AVX1-NEXT: testl $268435456, %eax ## imm = 0x10000000 2100; AVX1-NEXT: jne LBB8_57 2101; AVX1-NEXT: LBB8_58: ## %else110 2102; AVX1-NEXT: testl $536870912, %eax ## imm = 0x20000000 2103; AVX1-NEXT: jne LBB8_59 2104; AVX1-NEXT: LBB8_60: ## %else114 2105; AVX1-NEXT: testl $1073741824, %eax ## imm = 0x40000000 2106; AVX1-NEXT: jne LBB8_61 2107; AVX1-NEXT: LBB8_62: ## %else118 2108; AVX1-NEXT: testl $-2147483648, %eax ## imm = 0x80000000 2109; AVX1-NEXT: jne LBB8_63 2110; AVX1-NEXT: LBB8_64: ## %else122 2111; AVX1-NEXT: retq 2112; AVX1-NEXT: LBB8_1: ## %cond.load 2113; AVX1-NEXT: vmovss (%rdi), %xmm4 ## xmm4 = mem[0],zero,zero,zero 2114; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm4[0],ymm0[1,2,3,4,5,6,7] 2115; AVX1-NEXT: addq $4, %rdi 2116; AVX1-NEXT: testb $2, %al 2117; AVX1-NEXT: je LBB8_4 2118; AVX1-NEXT: LBB8_3: ## %cond.load1 2119; AVX1-NEXT: vinsertps $16, (%rdi), %xmm0, %xmm4 ## xmm4 = xmm0[0],mem[0],xmm0[2,3] 2120; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm4[0,1,2,3],ymm0[4,5,6,7] 2121; AVX1-NEXT: addq $4, %rdi 2122; AVX1-NEXT: testb $4, %al 2123; AVX1-NEXT: je LBB8_6 2124; AVX1-NEXT: LBB8_5: ## %cond.load5 2125; AVX1-NEXT: vinsertps $32, (%rdi), %xmm0, %xmm4 ## xmm4 = xmm0[0,1],mem[0],xmm0[3] 2126; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm4[0,1,2,3],ymm0[4,5,6,7] 2127; AVX1-NEXT: addq $4, %rdi 2128; AVX1-NEXT: testb $8, %al 2129; AVX1-NEXT: je LBB8_8 2130; AVX1-NEXT: LBB8_7: ## %cond.load9 2131; AVX1-NEXT: vinsertps $48, (%rdi), %xmm0, %xmm4 ## xmm4 = xmm0[0,1,2],mem[0] 2132; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm4[0,1,2,3],ymm0[4,5,6,7] 2133; AVX1-NEXT: addq $4, %rdi 2134; AVX1-NEXT: testb $16, %al 2135; AVX1-NEXT: je LBB8_10 2136; AVX1-NEXT: LBB8_9: ## %cond.load13 2137; AVX1-NEXT: vbroadcastss (%rdi), %ymm4 2138; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm4[4],ymm0[5,6,7] 2139; AVX1-NEXT: addq $4, %rdi 2140; AVX1-NEXT: testb $32, %al 2141; AVX1-NEXT: je LBB8_12 2142; AVX1-NEXT: LBB8_11: ## %cond.load17 2143; AVX1-NEXT: vbroadcastss (%rdi), %ymm4 2144; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm4[5],ymm0[6,7] 2145; AVX1-NEXT: addq $4, %rdi 2146; AVX1-NEXT: testb $64, %al 2147; AVX1-NEXT: je LBB8_14 2148; AVX1-NEXT: LBB8_13: ## %cond.load21 2149; AVX1-NEXT: vbroadcastss (%rdi), %ymm4 2150; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm4[6],ymm0[7] 2151; AVX1-NEXT: addq $4, %rdi 2152; AVX1-NEXT: testb %al, %al 2153; AVX1-NEXT: jns LBB8_16 2154; AVX1-NEXT: LBB8_15: ## %cond.load25 2155; AVX1-NEXT: vbroadcastss (%rdi), %ymm4 2156; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm4[7] 2157; AVX1-NEXT: addq $4, %rdi 2158; AVX1-NEXT: testl $256, %eax ## imm = 0x100 2159; AVX1-NEXT: je LBB8_18 2160; AVX1-NEXT: LBB8_17: ## %cond.load29 2161; AVX1-NEXT: vmovss (%rdi), %xmm4 ## xmm4 = mem[0],zero,zero,zero 2162; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0],ymm1[1,2,3,4,5,6,7] 2163; AVX1-NEXT: addq $4, %rdi 2164; AVX1-NEXT: testl $512, %eax ## imm = 0x200 2165; AVX1-NEXT: je LBB8_20 2166; AVX1-NEXT: LBB8_19: ## %cond.load33 2167; AVX1-NEXT: vinsertps $16, (%rdi), %xmm1, %xmm4 ## xmm4 = xmm1[0],mem[0],xmm1[2,3] 2168; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7] 2169; AVX1-NEXT: addq $4, %rdi 2170; AVX1-NEXT: testl $1024, %eax ## imm = 0x400 2171; AVX1-NEXT: je LBB8_22 2172; AVX1-NEXT: LBB8_21: ## %cond.load37 2173; AVX1-NEXT: vinsertps $32, (%rdi), %xmm1, %xmm4 ## xmm4 = xmm1[0,1],mem[0],xmm1[3] 2174; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7] 2175; AVX1-NEXT: addq $4, %rdi 2176; AVX1-NEXT: testl $2048, %eax ## imm = 0x800 2177; AVX1-NEXT: je LBB8_24 2178; AVX1-NEXT: LBB8_23: ## %cond.load41 2179; AVX1-NEXT: vinsertps $48, (%rdi), %xmm1, %xmm4 ## xmm4 = xmm1[0,1,2],mem[0] 2180; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7] 2181; AVX1-NEXT: addq $4, %rdi 2182; AVX1-NEXT: testl $4096, %eax ## imm = 0x1000 2183; AVX1-NEXT: je LBB8_26 2184; AVX1-NEXT: LBB8_25: ## %cond.load45 2185; AVX1-NEXT: vbroadcastss (%rdi), %ymm4 2186; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm4[4],ymm1[5,6,7] 2187; AVX1-NEXT: addq $4, %rdi 2188; AVX1-NEXT: testl $8192, %eax ## imm = 0x2000 2189; AVX1-NEXT: je LBB8_28 2190; AVX1-NEXT: LBB8_27: ## %cond.load49 2191; AVX1-NEXT: vbroadcastss (%rdi), %ymm4 2192; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm4[5],ymm1[6,7] 2193; AVX1-NEXT: addq $4, %rdi 2194; AVX1-NEXT: testl $16384, %eax ## imm = 0x4000 2195; AVX1-NEXT: je LBB8_30 2196; AVX1-NEXT: LBB8_29: ## %cond.load53 2197; AVX1-NEXT: vbroadcastss (%rdi), %ymm4 2198; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm4[6],ymm1[7] 2199; AVX1-NEXT: addq $4, %rdi 2200; AVX1-NEXT: testw %ax, %ax 2201; AVX1-NEXT: jns LBB8_32 2202; AVX1-NEXT: LBB8_31: ## %cond.load57 2203; AVX1-NEXT: vbroadcastss (%rdi), %ymm4 2204; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,6],ymm4[7] 2205; AVX1-NEXT: addq $4, %rdi 2206; AVX1-NEXT: testl $65536, %eax ## imm = 0x10000 2207; AVX1-NEXT: je LBB8_34 2208; AVX1-NEXT: LBB8_33: ## %cond.load61 2209; AVX1-NEXT: vmovss (%rdi), %xmm4 ## xmm4 = mem[0],zero,zero,zero 2210; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0],ymm2[1,2,3,4,5,6,7] 2211; AVX1-NEXT: addq $4, %rdi 2212; AVX1-NEXT: testl $131072, %eax ## imm = 0x20000 2213; AVX1-NEXT: je LBB8_36 2214; AVX1-NEXT: LBB8_35: ## %cond.load65 2215; AVX1-NEXT: vinsertps $16, (%rdi), %xmm2, %xmm4 ## xmm4 = xmm2[0],mem[0],xmm2[2,3] 2216; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7] 2217; AVX1-NEXT: addq $4, %rdi 2218; AVX1-NEXT: testl $262144, %eax ## imm = 0x40000 2219; AVX1-NEXT: je LBB8_38 2220; AVX1-NEXT: LBB8_37: ## %cond.load69 2221; AVX1-NEXT: vinsertps $32, (%rdi), %xmm2, %xmm4 ## xmm4 = xmm2[0,1],mem[0],xmm2[3] 2222; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7] 2223; AVX1-NEXT: addq $4, %rdi 2224; AVX1-NEXT: testl $524288, %eax ## imm = 0x80000 2225; AVX1-NEXT: je LBB8_40 2226; AVX1-NEXT: LBB8_39: ## %cond.load73 2227; AVX1-NEXT: vinsertps $48, (%rdi), %xmm2, %xmm4 ## xmm4 = xmm2[0,1,2],mem[0] 2228; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7] 2229; AVX1-NEXT: addq $4, %rdi 2230; AVX1-NEXT: testl $1048576, %eax ## imm = 0x100000 2231; AVX1-NEXT: je LBB8_42 2232; AVX1-NEXT: LBB8_41: ## %cond.load77 2233; AVX1-NEXT: vbroadcastss (%rdi), %ymm4 2234; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm4[4],ymm2[5,6,7] 2235; AVX1-NEXT: addq $4, %rdi 2236; AVX1-NEXT: testl $2097152, %eax ## imm = 0x200000 2237; AVX1-NEXT: je LBB8_44 2238; AVX1-NEXT: LBB8_43: ## %cond.load81 2239; AVX1-NEXT: vbroadcastss (%rdi), %ymm4 2240; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3,4],ymm4[5],ymm2[6,7] 2241; AVX1-NEXT: addq $4, %rdi 2242; AVX1-NEXT: testl $4194304, %eax ## imm = 0x400000 2243; AVX1-NEXT: je LBB8_46 2244; AVX1-NEXT: LBB8_45: ## %cond.load85 2245; AVX1-NEXT: vbroadcastss (%rdi), %ymm4 2246; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm4[6],ymm2[7] 2247; AVX1-NEXT: addq $4, %rdi 2248; AVX1-NEXT: testl $8388608, %eax ## imm = 0x800000 2249; AVX1-NEXT: je LBB8_48 2250; AVX1-NEXT: LBB8_47: ## %cond.load89 2251; AVX1-NEXT: vbroadcastss (%rdi), %ymm4 2252; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5,6],ymm4[7] 2253; AVX1-NEXT: addq $4, %rdi 2254; AVX1-NEXT: testl $16777216, %eax ## imm = 0x1000000 2255; AVX1-NEXT: je LBB8_50 2256; AVX1-NEXT: LBB8_49: ## %cond.load93 2257; AVX1-NEXT: vmovss (%rdi), %xmm4 ## xmm4 = mem[0],zero,zero,zero 2258; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0],ymm3[1,2,3,4,5,6,7] 2259; AVX1-NEXT: addq $4, %rdi 2260; AVX1-NEXT: testl $33554432, %eax ## imm = 0x2000000 2261; AVX1-NEXT: je LBB8_52 2262; AVX1-NEXT: LBB8_51: ## %cond.load97 2263; AVX1-NEXT: vinsertps $16, (%rdi), %xmm3, %xmm4 ## xmm4 = xmm3[0],mem[0],xmm3[2,3] 2264; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 2265; AVX1-NEXT: addq $4, %rdi 2266; AVX1-NEXT: testl $67108864, %eax ## imm = 0x4000000 2267; AVX1-NEXT: je LBB8_54 2268; AVX1-NEXT: LBB8_53: ## %cond.load101 2269; AVX1-NEXT: vinsertps $32, (%rdi), %xmm3, %xmm4 ## xmm4 = xmm3[0,1],mem[0],xmm3[3] 2270; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 2271; AVX1-NEXT: addq $4, %rdi 2272; AVX1-NEXT: testl $134217728, %eax ## imm = 0x8000000 2273; AVX1-NEXT: je LBB8_56 2274; AVX1-NEXT: LBB8_55: ## %cond.load105 2275; AVX1-NEXT: vinsertps $48, (%rdi), %xmm3, %xmm4 ## xmm4 = xmm3[0,1,2],mem[0] 2276; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 2277; AVX1-NEXT: addq $4, %rdi 2278; AVX1-NEXT: testl $268435456, %eax ## imm = 0x10000000 2279; AVX1-NEXT: je LBB8_58 2280; AVX1-NEXT: LBB8_57: ## %cond.load109 2281; AVX1-NEXT: vbroadcastss (%rdi), %ymm4 2282; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3],ymm4[4],ymm3[5,6,7] 2283; AVX1-NEXT: addq $4, %rdi 2284; AVX1-NEXT: testl $536870912, %eax ## imm = 0x20000000 2285; AVX1-NEXT: je LBB8_60 2286; AVX1-NEXT: LBB8_59: ## %cond.load113 2287; AVX1-NEXT: vbroadcastss (%rdi), %ymm4 2288; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3,4],ymm4[5],ymm3[6,7] 2289; AVX1-NEXT: addq $4, %rdi 2290; AVX1-NEXT: testl $1073741824, %eax ## imm = 0x40000000 2291; AVX1-NEXT: je LBB8_62 2292; AVX1-NEXT: LBB8_61: ## %cond.load117 2293; AVX1-NEXT: vbroadcastss (%rdi), %ymm4 2294; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3,4,5],ymm4[6],ymm3[7] 2295; AVX1-NEXT: addq $4, %rdi 2296; AVX1-NEXT: testl $-2147483648, %eax ## imm = 0x80000000 2297; AVX1-NEXT: je LBB8_64 2298; AVX1-NEXT: LBB8_63: ## %cond.load121 2299; AVX1-NEXT: vbroadcastss (%rdi), %ymm4 2300; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3,4,5,6],ymm4[7] 2301; AVX1-NEXT: retq 2302; 2303; AVX2-LABEL: expandload_v32f32_v32i32: 2304; AVX2: ## %bb.0: 2305; AVX2-NEXT: vpxor %xmm8, %xmm8, %xmm8 2306; AVX2-NEXT: vpcmpeqd %ymm7, %ymm8, %ymm7 2307; AVX2-NEXT: vpcmpeqd %ymm6, %ymm8, %ymm6 2308; AVX2-NEXT: vpackssdw %ymm7, %ymm6, %ymm6 2309; AVX2-NEXT: vpermq {{.*#+}} ymm6 = ymm6[0,2,1,3] 2310; AVX2-NEXT: vpcmpeqd %ymm5, %ymm8, %ymm5 2311; AVX2-NEXT: vpcmpeqd %ymm4, %ymm8, %ymm4 2312; AVX2-NEXT: vpackssdw %ymm5, %ymm4, %ymm4 2313; AVX2-NEXT: vpermq {{.*#+}} ymm4 = ymm4[0,2,1,3] 2314; AVX2-NEXT: vpacksswb %ymm6, %ymm4, %ymm4 2315; AVX2-NEXT: vpermq {{.*#+}} ymm4 = ymm4[0,2,1,3] 2316; AVX2-NEXT: vpmovmskb %ymm4, %eax 2317; AVX2-NEXT: testb $1, %al 2318; AVX2-NEXT: jne LBB8_1 2319; AVX2-NEXT: ## %bb.2: ## %else 2320; AVX2-NEXT: testb $2, %al 2321; AVX2-NEXT: jne LBB8_3 2322; AVX2-NEXT: LBB8_4: ## %else2 2323; AVX2-NEXT: testb $4, %al 2324; AVX2-NEXT: jne LBB8_5 2325; AVX2-NEXT: LBB8_6: ## %else6 2326; AVX2-NEXT: testb $8, %al 2327; AVX2-NEXT: jne LBB8_7 2328; AVX2-NEXT: LBB8_8: ## %else10 2329; AVX2-NEXT: testb $16, %al 2330; AVX2-NEXT: jne LBB8_9 2331; AVX2-NEXT: LBB8_10: ## %else14 2332; AVX2-NEXT: testb $32, %al 2333; AVX2-NEXT: jne LBB8_11 2334; AVX2-NEXT: LBB8_12: ## %else18 2335; AVX2-NEXT: testb $64, %al 2336; AVX2-NEXT: jne LBB8_13 2337; AVX2-NEXT: LBB8_14: ## %else22 2338; AVX2-NEXT: testb %al, %al 2339; AVX2-NEXT: js LBB8_15 2340; AVX2-NEXT: LBB8_16: ## %else26 2341; AVX2-NEXT: testl $256, %eax ## imm = 0x100 2342; AVX2-NEXT: jne LBB8_17 2343; AVX2-NEXT: LBB8_18: ## %else30 2344; AVX2-NEXT: testl $512, %eax ## imm = 0x200 2345; AVX2-NEXT: jne LBB8_19 2346; AVX2-NEXT: LBB8_20: ## %else34 2347; AVX2-NEXT: testl $1024, %eax ## imm = 0x400 2348; AVX2-NEXT: jne LBB8_21 2349; AVX2-NEXT: LBB8_22: ## %else38 2350; AVX2-NEXT: testl $2048, %eax ## imm = 0x800 2351; AVX2-NEXT: jne LBB8_23 2352; AVX2-NEXT: LBB8_24: ## %else42 2353; AVX2-NEXT: testl $4096, %eax ## imm = 0x1000 2354; AVX2-NEXT: jne LBB8_25 2355; AVX2-NEXT: LBB8_26: ## %else46 2356; AVX2-NEXT: testl $8192, %eax ## imm = 0x2000 2357; AVX2-NEXT: jne LBB8_27 2358; AVX2-NEXT: LBB8_28: ## %else50 2359; AVX2-NEXT: testl $16384, %eax ## imm = 0x4000 2360; AVX2-NEXT: jne LBB8_29 2361; AVX2-NEXT: LBB8_30: ## %else54 2362; AVX2-NEXT: testw %ax, %ax 2363; AVX2-NEXT: js LBB8_31 2364; AVX2-NEXT: LBB8_32: ## %else58 2365; AVX2-NEXT: testl $65536, %eax ## imm = 0x10000 2366; AVX2-NEXT: jne LBB8_33 2367; AVX2-NEXT: LBB8_34: ## %else62 2368; AVX2-NEXT: testl $131072, %eax ## imm = 0x20000 2369; AVX2-NEXT: jne LBB8_35 2370; AVX2-NEXT: LBB8_36: ## %else66 2371; AVX2-NEXT: testl $262144, %eax ## imm = 0x40000 2372; AVX2-NEXT: jne LBB8_37 2373; AVX2-NEXT: LBB8_38: ## %else70 2374; AVX2-NEXT: testl $524288, %eax ## imm = 0x80000 2375; AVX2-NEXT: jne LBB8_39 2376; AVX2-NEXT: LBB8_40: ## %else74 2377; AVX2-NEXT: testl $1048576, %eax ## imm = 0x100000 2378; AVX2-NEXT: jne LBB8_41 2379; AVX2-NEXT: LBB8_42: ## %else78 2380; AVX2-NEXT: testl $2097152, %eax ## imm = 0x200000 2381; AVX2-NEXT: jne LBB8_43 2382; AVX2-NEXT: LBB8_44: ## %else82 2383; AVX2-NEXT: testl $4194304, %eax ## imm = 0x400000 2384; AVX2-NEXT: jne LBB8_45 2385; AVX2-NEXT: LBB8_46: ## %else86 2386; AVX2-NEXT: testl $8388608, %eax ## imm = 0x800000 2387; AVX2-NEXT: jne LBB8_47 2388; AVX2-NEXT: LBB8_48: ## %else90 2389; AVX2-NEXT: testl $16777216, %eax ## imm = 0x1000000 2390; AVX2-NEXT: jne LBB8_49 2391; AVX2-NEXT: LBB8_50: ## %else94 2392; AVX2-NEXT: testl $33554432, %eax ## imm = 0x2000000 2393; AVX2-NEXT: jne LBB8_51 2394; AVX2-NEXT: LBB8_52: ## %else98 2395; AVX2-NEXT: testl $67108864, %eax ## imm = 0x4000000 2396; AVX2-NEXT: jne LBB8_53 2397; AVX2-NEXT: LBB8_54: ## %else102 2398; AVX2-NEXT: testl $134217728, %eax ## imm = 0x8000000 2399; AVX2-NEXT: jne LBB8_55 2400; AVX2-NEXT: LBB8_56: ## %else106 2401; AVX2-NEXT: testl $268435456, %eax ## imm = 0x10000000 2402; AVX2-NEXT: jne LBB8_57 2403; AVX2-NEXT: LBB8_58: ## %else110 2404; AVX2-NEXT: testl $536870912, %eax ## imm = 0x20000000 2405; AVX2-NEXT: jne LBB8_59 2406; AVX2-NEXT: LBB8_60: ## %else114 2407; AVX2-NEXT: testl $1073741824, %eax ## imm = 0x40000000 2408; AVX2-NEXT: jne LBB8_61 2409; AVX2-NEXT: LBB8_62: ## %else118 2410; AVX2-NEXT: testl $-2147483648, %eax ## imm = 0x80000000 2411; AVX2-NEXT: jne LBB8_63 2412; AVX2-NEXT: LBB8_64: ## %else122 2413; AVX2-NEXT: retq 2414; AVX2-NEXT: LBB8_1: ## %cond.load 2415; AVX2-NEXT: vmovd (%rdi), %xmm4 ## xmm4 = mem[0],zero,zero,zero 2416; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm4[0],ymm0[1,2,3,4,5,6,7] 2417; AVX2-NEXT: addq $4, %rdi 2418; AVX2-NEXT: testb $2, %al 2419; AVX2-NEXT: je LBB8_4 2420; AVX2-NEXT: LBB8_3: ## %cond.load1 2421; AVX2-NEXT: vinsertps $16, (%rdi), %xmm0, %xmm4 ## xmm4 = xmm0[0],mem[0],xmm0[2,3] 2422; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm4[0,1,2,3],ymm0[4,5,6,7] 2423; AVX2-NEXT: addq $4, %rdi 2424; AVX2-NEXT: testb $4, %al 2425; AVX2-NEXT: je LBB8_6 2426; AVX2-NEXT: LBB8_5: ## %cond.load5 2427; AVX2-NEXT: vinsertps $32, (%rdi), %xmm0, %xmm4 ## xmm4 = xmm0[0,1],mem[0],xmm0[3] 2428; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm4[0,1,2,3],ymm0[4,5,6,7] 2429; AVX2-NEXT: addq $4, %rdi 2430; AVX2-NEXT: testb $8, %al 2431; AVX2-NEXT: je LBB8_8 2432; AVX2-NEXT: LBB8_7: ## %cond.load9 2433; AVX2-NEXT: vinsertps $48, (%rdi), %xmm0, %xmm4 ## xmm4 = xmm0[0,1,2],mem[0] 2434; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm4[0,1,2,3],ymm0[4,5,6,7] 2435; AVX2-NEXT: addq $4, %rdi 2436; AVX2-NEXT: testb $16, %al 2437; AVX2-NEXT: je LBB8_10 2438; AVX2-NEXT: LBB8_9: ## %cond.load13 2439; AVX2-NEXT: vbroadcastss (%rdi), %ymm4 2440; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm4[4],ymm0[5,6,7] 2441; AVX2-NEXT: addq $4, %rdi 2442; AVX2-NEXT: testb $32, %al 2443; AVX2-NEXT: je LBB8_12 2444; AVX2-NEXT: LBB8_11: ## %cond.load17 2445; AVX2-NEXT: vbroadcastss (%rdi), %ymm4 2446; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm4[5],ymm0[6,7] 2447; AVX2-NEXT: addq $4, %rdi 2448; AVX2-NEXT: testb $64, %al 2449; AVX2-NEXT: je LBB8_14 2450; AVX2-NEXT: LBB8_13: ## %cond.load21 2451; AVX2-NEXT: vbroadcastss (%rdi), %ymm4 2452; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm4[6],ymm0[7] 2453; AVX2-NEXT: addq $4, %rdi 2454; AVX2-NEXT: testb %al, %al 2455; AVX2-NEXT: jns LBB8_16 2456; AVX2-NEXT: LBB8_15: ## %cond.load25 2457; AVX2-NEXT: vbroadcastss (%rdi), %ymm4 2458; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm4[7] 2459; AVX2-NEXT: addq $4, %rdi 2460; AVX2-NEXT: testl $256, %eax ## imm = 0x100 2461; AVX2-NEXT: je LBB8_18 2462; AVX2-NEXT: LBB8_17: ## %cond.load29 2463; AVX2-NEXT: vmovd (%rdi), %xmm4 ## xmm4 = mem[0],zero,zero,zero 2464; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm4[0],ymm1[1,2,3,4,5,6,7] 2465; AVX2-NEXT: addq $4, %rdi 2466; AVX2-NEXT: testl $512, %eax ## imm = 0x200 2467; AVX2-NEXT: je LBB8_20 2468; AVX2-NEXT: LBB8_19: ## %cond.load33 2469; AVX2-NEXT: vinsertps $16, (%rdi), %xmm1, %xmm4 ## xmm4 = xmm1[0],mem[0],xmm1[2,3] 2470; AVX2-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7] 2471; AVX2-NEXT: addq $4, %rdi 2472; AVX2-NEXT: testl $1024, %eax ## imm = 0x400 2473; AVX2-NEXT: je LBB8_22 2474; AVX2-NEXT: LBB8_21: ## %cond.load37 2475; AVX2-NEXT: vinsertps $32, (%rdi), %xmm1, %xmm4 ## xmm4 = xmm1[0,1],mem[0],xmm1[3] 2476; AVX2-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7] 2477; AVX2-NEXT: addq $4, %rdi 2478; AVX2-NEXT: testl $2048, %eax ## imm = 0x800 2479; AVX2-NEXT: je LBB8_24 2480; AVX2-NEXT: LBB8_23: ## %cond.load41 2481; AVX2-NEXT: vinsertps $48, (%rdi), %xmm1, %xmm4 ## xmm4 = xmm1[0,1,2],mem[0] 2482; AVX2-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7] 2483; AVX2-NEXT: addq $4, %rdi 2484; AVX2-NEXT: testl $4096, %eax ## imm = 0x1000 2485; AVX2-NEXT: je LBB8_26 2486; AVX2-NEXT: LBB8_25: ## %cond.load45 2487; AVX2-NEXT: vbroadcastss (%rdi), %ymm4 2488; AVX2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm4[4],ymm1[5,6,7] 2489; AVX2-NEXT: addq $4, %rdi 2490; AVX2-NEXT: testl $8192, %eax ## imm = 0x2000 2491; AVX2-NEXT: je LBB8_28 2492; AVX2-NEXT: LBB8_27: ## %cond.load49 2493; AVX2-NEXT: vbroadcastss (%rdi), %ymm4 2494; AVX2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm4[5],ymm1[6,7] 2495; AVX2-NEXT: addq $4, %rdi 2496; AVX2-NEXT: testl $16384, %eax ## imm = 0x4000 2497; AVX2-NEXT: je LBB8_30 2498; AVX2-NEXT: LBB8_29: ## %cond.load53 2499; AVX2-NEXT: vbroadcastss (%rdi), %ymm4 2500; AVX2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm4[6],ymm1[7] 2501; AVX2-NEXT: addq $4, %rdi 2502; AVX2-NEXT: testw %ax, %ax 2503; AVX2-NEXT: jns LBB8_32 2504; AVX2-NEXT: LBB8_31: ## %cond.load57 2505; AVX2-NEXT: vbroadcastss (%rdi), %ymm4 2506; AVX2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,6],ymm4[7] 2507; AVX2-NEXT: addq $4, %rdi 2508; AVX2-NEXT: testl $65536, %eax ## imm = 0x10000 2509; AVX2-NEXT: je LBB8_34 2510; AVX2-NEXT: LBB8_33: ## %cond.load61 2511; AVX2-NEXT: vmovd (%rdi), %xmm4 ## xmm4 = mem[0],zero,zero,zero 2512; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm4[0],ymm2[1,2,3,4,5,6,7] 2513; AVX2-NEXT: addq $4, %rdi 2514; AVX2-NEXT: testl $131072, %eax ## imm = 0x20000 2515; AVX2-NEXT: je LBB8_36 2516; AVX2-NEXT: LBB8_35: ## %cond.load65 2517; AVX2-NEXT: vinsertps $16, (%rdi), %xmm2, %xmm4 ## xmm4 = xmm2[0],mem[0],xmm2[2,3] 2518; AVX2-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7] 2519; AVX2-NEXT: addq $4, %rdi 2520; AVX2-NEXT: testl $262144, %eax ## imm = 0x40000 2521; AVX2-NEXT: je LBB8_38 2522; AVX2-NEXT: LBB8_37: ## %cond.load69 2523; AVX2-NEXT: vinsertps $32, (%rdi), %xmm2, %xmm4 ## xmm4 = xmm2[0,1],mem[0],xmm2[3] 2524; AVX2-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7] 2525; AVX2-NEXT: addq $4, %rdi 2526; AVX2-NEXT: testl $524288, %eax ## imm = 0x80000 2527; AVX2-NEXT: je LBB8_40 2528; AVX2-NEXT: LBB8_39: ## %cond.load73 2529; AVX2-NEXT: vinsertps $48, (%rdi), %xmm2, %xmm4 ## xmm4 = xmm2[0,1,2],mem[0] 2530; AVX2-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7] 2531; AVX2-NEXT: addq $4, %rdi 2532; AVX2-NEXT: testl $1048576, %eax ## imm = 0x100000 2533; AVX2-NEXT: je LBB8_42 2534; AVX2-NEXT: LBB8_41: ## %cond.load77 2535; AVX2-NEXT: vbroadcastss (%rdi), %ymm4 2536; AVX2-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm4[4],ymm2[5,6,7] 2537; AVX2-NEXT: addq $4, %rdi 2538; AVX2-NEXT: testl $2097152, %eax ## imm = 0x200000 2539; AVX2-NEXT: je LBB8_44 2540; AVX2-NEXT: LBB8_43: ## %cond.load81 2541; AVX2-NEXT: vbroadcastss (%rdi), %ymm4 2542; AVX2-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3,4],ymm4[5],ymm2[6,7] 2543; AVX2-NEXT: addq $4, %rdi 2544; AVX2-NEXT: testl $4194304, %eax ## imm = 0x400000 2545; AVX2-NEXT: je LBB8_46 2546; AVX2-NEXT: LBB8_45: ## %cond.load85 2547; AVX2-NEXT: vbroadcastss (%rdi), %ymm4 2548; AVX2-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm4[6],ymm2[7] 2549; AVX2-NEXT: addq $4, %rdi 2550; AVX2-NEXT: testl $8388608, %eax ## imm = 0x800000 2551; AVX2-NEXT: je LBB8_48 2552; AVX2-NEXT: LBB8_47: ## %cond.load89 2553; AVX2-NEXT: vbroadcastss (%rdi), %ymm4 2554; AVX2-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5,6],ymm4[7] 2555; AVX2-NEXT: addq $4, %rdi 2556; AVX2-NEXT: testl $16777216, %eax ## imm = 0x1000000 2557; AVX2-NEXT: je LBB8_50 2558; AVX2-NEXT: LBB8_49: ## %cond.load93 2559; AVX2-NEXT: vmovd (%rdi), %xmm4 ## xmm4 = mem[0],zero,zero,zero 2560; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0],ymm3[1,2,3,4,5,6,7] 2561; AVX2-NEXT: addq $4, %rdi 2562; AVX2-NEXT: testl $33554432, %eax ## imm = 0x2000000 2563; AVX2-NEXT: je LBB8_52 2564; AVX2-NEXT: LBB8_51: ## %cond.load97 2565; AVX2-NEXT: vinsertps $16, (%rdi), %xmm3, %xmm4 ## xmm4 = xmm3[0],mem[0],xmm3[2,3] 2566; AVX2-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 2567; AVX2-NEXT: addq $4, %rdi 2568; AVX2-NEXT: testl $67108864, %eax ## imm = 0x4000000 2569; AVX2-NEXT: je LBB8_54 2570; AVX2-NEXT: LBB8_53: ## %cond.load101 2571; AVX2-NEXT: vinsertps $32, (%rdi), %xmm3, %xmm4 ## xmm4 = xmm3[0,1],mem[0],xmm3[3] 2572; AVX2-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 2573; AVX2-NEXT: addq $4, %rdi 2574; AVX2-NEXT: testl $134217728, %eax ## imm = 0x8000000 2575; AVX2-NEXT: je LBB8_56 2576; AVX2-NEXT: LBB8_55: ## %cond.load105 2577; AVX2-NEXT: vinsertps $48, (%rdi), %xmm3, %xmm4 ## xmm4 = xmm3[0,1,2],mem[0] 2578; AVX2-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 2579; AVX2-NEXT: addq $4, %rdi 2580; AVX2-NEXT: testl $268435456, %eax ## imm = 0x10000000 2581; AVX2-NEXT: je LBB8_58 2582; AVX2-NEXT: LBB8_57: ## %cond.load109 2583; AVX2-NEXT: vbroadcastss (%rdi), %ymm4 2584; AVX2-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3],ymm4[4],ymm3[5,6,7] 2585; AVX2-NEXT: addq $4, %rdi 2586; AVX2-NEXT: testl $536870912, %eax ## imm = 0x20000000 2587; AVX2-NEXT: je LBB8_60 2588; AVX2-NEXT: LBB8_59: ## %cond.load113 2589; AVX2-NEXT: vbroadcastss (%rdi), %ymm4 2590; AVX2-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3,4],ymm4[5],ymm3[6,7] 2591; AVX2-NEXT: addq $4, %rdi 2592; AVX2-NEXT: testl $1073741824, %eax ## imm = 0x40000000 2593; AVX2-NEXT: je LBB8_62 2594; AVX2-NEXT: LBB8_61: ## %cond.load117 2595; AVX2-NEXT: vbroadcastss (%rdi), %ymm4 2596; AVX2-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3,4,5],ymm4[6],ymm3[7] 2597; AVX2-NEXT: addq $4, %rdi 2598; AVX2-NEXT: testl $-2147483648, %eax ## imm = 0x80000000 2599; AVX2-NEXT: je LBB8_64 2600; AVX2-NEXT: LBB8_63: ## %cond.load121 2601; AVX2-NEXT: vbroadcastss (%rdi), %ymm4 2602; AVX2-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3,4,5,6],ymm4[7] 2603; AVX2-NEXT: retq 2604; 2605; AVX512-LABEL: expandload_v32f32_v32i32: 2606; AVX512: ## %bb.0: 2607; AVX512-NEXT: vptestnmd %zmm3, %zmm3, %k2 2608; AVX512-NEXT: vptestnmd %zmm2, %zmm2, %k1 2609; AVX512-NEXT: kmovw %k1, %eax 2610; AVX512-NEXT: movl %eax, %ecx 2611; AVX512-NEXT: shrl %ecx 2612; AVX512-NEXT: andl $21845, %ecx ## imm = 0x5555 2613; AVX512-NEXT: subl %ecx, %eax 2614; AVX512-NEXT: movl %eax, %ecx 2615; AVX512-NEXT: andl $858993459, %ecx ## imm = 0x33333333 2616; AVX512-NEXT: shrl $2, %eax 2617; AVX512-NEXT: andl $858993459, %eax ## imm = 0x33333333 2618; AVX512-NEXT: addl %ecx, %eax 2619; AVX512-NEXT: movl %eax, %ecx 2620; AVX512-NEXT: shrl $4, %ecx 2621; AVX512-NEXT: addl %eax, %ecx 2622; AVX512-NEXT: andl $252645135, %ecx ## imm = 0xF0F0F0F 2623; AVX512-NEXT: imull $16843009, %ecx, %eax ## imm = 0x1010101 2624; AVX512-NEXT: shrl $24, %eax 2625; AVX512-NEXT: vexpandps (%rdi,%rax,4), %zmm1 {%k2} 2626; AVX512-NEXT: vexpandps (%rdi), %zmm0 {%k1} 2627; AVX512-NEXT: retq 2628 %mask = icmp eq <32 x i32> %trigger, zeroinitializer 2629 %res = call <32 x float> @llvm.masked.expandload.v32f32(ptr %base, <32 x i1> %mask, <32 x float> %src0) 2630 ret <32 x float> %res 2631} 2632 2633; 2634; vXi64 2635; 2636 2637define <2 x i64> @expandload_v2i64_const(ptr %base, <2 x i64> %src0) { 2638; SSE2-LABEL: expandload_v2i64_const: 2639; SSE2: ## %bb.0: 2640; SSE2-NEXT: movsd (%rdi), %xmm1 ## xmm1 = mem[0],zero 2641; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2642; SSE2-NEXT: retq 2643; 2644; SSE42-LABEL: expandload_v2i64_const: 2645; SSE42: ## %bb.0: 2646; SSE42-NEXT: pinsrq $1, (%rdi), %xmm0 2647; SSE42-NEXT: retq 2648; 2649; AVX1OR2-LABEL: expandload_v2i64_const: 2650; AVX1OR2: ## %bb.0: 2651; AVX1OR2-NEXT: vpinsrq $1, (%rdi), %xmm0, %xmm0 2652; AVX1OR2-NEXT: retq 2653; 2654; AVX512F-LABEL: expandload_v2i64_const: 2655; AVX512F: ## %bb.0: 2656; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 2657; AVX512F-NEXT: movb $2, %al 2658; AVX512F-NEXT: kmovw %eax, %k1 2659; AVX512F-NEXT: vpexpandq (%rdi), %zmm0 {%k1} 2660; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 2661; AVX512F-NEXT: vzeroupper 2662; AVX512F-NEXT: retq 2663; 2664; AVX512VLDQ-LABEL: expandload_v2i64_const: 2665; AVX512VLDQ: ## %bb.0: 2666; AVX512VLDQ-NEXT: movb $2, %al 2667; AVX512VLDQ-NEXT: kmovw %eax, %k1 2668; AVX512VLDQ-NEXT: vpexpandq (%rdi), %xmm0 {%k1} 2669; AVX512VLDQ-NEXT: retq 2670; 2671; AVX512VLBW-LABEL: expandload_v2i64_const: 2672; AVX512VLBW: ## %bb.0: 2673; AVX512VLBW-NEXT: movb $2, %al 2674; AVX512VLBW-NEXT: kmovd %eax, %k1 2675; AVX512VLBW-NEXT: vpexpandq (%rdi), %xmm0 {%k1} 2676; AVX512VLBW-NEXT: retq 2677 %res = call <2 x i64> @llvm.masked.expandload.v2i64(ptr %base, <2 x i1> <i1 false, i1 true>, <2 x i64> %src0) 2678 ret <2 x i64>%res 2679} 2680 2681; 2682; vXi32 2683; 2684 2685define <4 x i32> @expandload_v4i32_v4i32(ptr %base, <4 x i32> %src0, <4 x i32> %trigger) { 2686; SSE2-LABEL: expandload_v4i32_v4i32: 2687; SSE2: ## %bb.0: 2688; SSE2-NEXT: pxor %xmm2, %xmm2 2689; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 2690; SSE2-NEXT: movmskps %xmm2, %eax 2691; SSE2-NEXT: testb $1, %al 2692; SSE2-NEXT: jne LBB10_1 2693; SSE2-NEXT: ## %bb.2: ## %else 2694; SSE2-NEXT: testb $2, %al 2695; SSE2-NEXT: jne LBB10_3 2696; SSE2-NEXT: LBB10_4: ## %else2 2697; SSE2-NEXT: testb $4, %al 2698; SSE2-NEXT: jne LBB10_5 2699; SSE2-NEXT: LBB10_6: ## %else6 2700; SSE2-NEXT: testb $8, %al 2701; SSE2-NEXT: jne LBB10_7 2702; SSE2-NEXT: LBB10_8: ## %else10 2703; SSE2-NEXT: retq 2704; SSE2-NEXT: LBB10_1: ## %cond.load 2705; SSE2-NEXT: movss (%rdi), %xmm1 ## xmm1 = mem[0],zero,zero,zero 2706; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 2707; SSE2-NEXT: addq $4, %rdi 2708; SSE2-NEXT: testb $2, %al 2709; SSE2-NEXT: je LBB10_4 2710; SSE2-NEXT: LBB10_3: ## %cond.load1 2711; SSE2-NEXT: movss (%rdi), %xmm1 ## xmm1 = mem[0],zero,zero,zero 2712; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 2713; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 2714; SSE2-NEXT: addq $4, %rdi 2715; SSE2-NEXT: movaps %xmm1, %xmm0 2716; SSE2-NEXT: testb $4, %al 2717; SSE2-NEXT: je LBB10_6 2718; SSE2-NEXT: LBB10_5: ## %cond.load5 2719; SSE2-NEXT: movss (%rdi), %xmm1 ## xmm1 = mem[0],zero,zero,zero 2720; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 2721; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 2722; SSE2-NEXT: addq $4, %rdi 2723; SSE2-NEXT: testb $8, %al 2724; SSE2-NEXT: je LBB10_8 2725; SSE2-NEXT: LBB10_7: ## %cond.load9 2726; SSE2-NEXT: movss (%rdi), %xmm1 ## xmm1 = mem[0],zero,zero,zero 2727; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] 2728; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 2729; SSE2-NEXT: retq 2730; 2731; SSE42-LABEL: expandload_v4i32_v4i32: 2732; SSE42: ## %bb.0: 2733; SSE42-NEXT: pxor %xmm2, %xmm2 2734; SSE42-NEXT: pcmpeqd %xmm1, %xmm2 2735; SSE42-NEXT: movmskps %xmm2, %eax 2736; SSE42-NEXT: testb $1, %al 2737; SSE42-NEXT: jne LBB10_1 2738; SSE42-NEXT: ## %bb.2: ## %else 2739; SSE42-NEXT: testb $2, %al 2740; SSE42-NEXT: jne LBB10_3 2741; SSE42-NEXT: LBB10_4: ## %else2 2742; SSE42-NEXT: testb $4, %al 2743; SSE42-NEXT: jne LBB10_5 2744; SSE42-NEXT: LBB10_6: ## %else6 2745; SSE42-NEXT: testb $8, %al 2746; SSE42-NEXT: jne LBB10_7 2747; SSE42-NEXT: LBB10_8: ## %else10 2748; SSE42-NEXT: retq 2749; SSE42-NEXT: LBB10_1: ## %cond.load 2750; SSE42-NEXT: pinsrd $0, (%rdi), %xmm0 2751; SSE42-NEXT: addq $4, %rdi 2752; SSE42-NEXT: testb $2, %al 2753; SSE42-NEXT: je LBB10_4 2754; SSE42-NEXT: LBB10_3: ## %cond.load1 2755; SSE42-NEXT: pinsrd $1, (%rdi), %xmm0 2756; SSE42-NEXT: addq $4, %rdi 2757; SSE42-NEXT: testb $4, %al 2758; SSE42-NEXT: je LBB10_6 2759; SSE42-NEXT: LBB10_5: ## %cond.load5 2760; SSE42-NEXT: pinsrd $2, (%rdi), %xmm0 2761; SSE42-NEXT: addq $4, %rdi 2762; SSE42-NEXT: testb $8, %al 2763; SSE42-NEXT: je LBB10_8 2764; SSE42-NEXT: LBB10_7: ## %cond.load9 2765; SSE42-NEXT: pinsrd $3, (%rdi), %xmm0 2766; SSE42-NEXT: retq 2767; 2768; AVX1OR2-LABEL: expandload_v4i32_v4i32: 2769; AVX1OR2: ## %bb.0: 2770; AVX1OR2-NEXT: vpxor %xmm2, %xmm2, %xmm2 2771; AVX1OR2-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1 2772; AVX1OR2-NEXT: vmovmskps %xmm1, %eax 2773; AVX1OR2-NEXT: testb $1, %al 2774; AVX1OR2-NEXT: jne LBB10_1 2775; AVX1OR2-NEXT: ## %bb.2: ## %else 2776; AVX1OR2-NEXT: testb $2, %al 2777; AVX1OR2-NEXT: jne LBB10_3 2778; AVX1OR2-NEXT: LBB10_4: ## %else2 2779; AVX1OR2-NEXT: testb $4, %al 2780; AVX1OR2-NEXT: jne LBB10_5 2781; AVX1OR2-NEXT: LBB10_6: ## %else6 2782; AVX1OR2-NEXT: testb $8, %al 2783; AVX1OR2-NEXT: jne LBB10_7 2784; AVX1OR2-NEXT: LBB10_8: ## %else10 2785; AVX1OR2-NEXT: retq 2786; AVX1OR2-NEXT: LBB10_1: ## %cond.load 2787; AVX1OR2-NEXT: vpinsrd $0, (%rdi), %xmm0, %xmm0 2788; AVX1OR2-NEXT: addq $4, %rdi 2789; AVX1OR2-NEXT: testb $2, %al 2790; AVX1OR2-NEXT: je LBB10_4 2791; AVX1OR2-NEXT: LBB10_3: ## %cond.load1 2792; AVX1OR2-NEXT: vpinsrd $1, (%rdi), %xmm0, %xmm0 2793; AVX1OR2-NEXT: addq $4, %rdi 2794; AVX1OR2-NEXT: testb $4, %al 2795; AVX1OR2-NEXT: je LBB10_6 2796; AVX1OR2-NEXT: LBB10_5: ## %cond.load5 2797; AVX1OR2-NEXT: vpinsrd $2, (%rdi), %xmm0, %xmm0 2798; AVX1OR2-NEXT: addq $4, %rdi 2799; AVX1OR2-NEXT: testb $8, %al 2800; AVX1OR2-NEXT: je LBB10_8 2801; AVX1OR2-NEXT: LBB10_7: ## %cond.load9 2802; AVX1OR2-NEXT: vpinsrd $3, (%rdi), %xmm0, %xmm0 2803; AVX1OR2-NEXT: retq 2804; 2805; AVX512F-LABEL: expandload_v4i32_v4i32: 2806; AVX512F: ## %bb.0: 2807; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 2808; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 2809; AVX512F-NEXT: vptestnmd %zmm1, %zmm1, %k0 2810; AVX512F-NEXT: kshiftlw $12, %k0, %k0 2811; AVX512F-NEXT: kshiftrw $12, %k0, %k1 2812; AVX512F-NEXT: vpexpandd (%rdi), %zmm0 {%k1} 2813; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 2814; AVX512F-NEXT: vzeroupper 2815; AVX512F-NEXT: retq 2816; 2817; AVX512VL-LABEL: expandload_v4i32_v4i32: 2818; AVX512VL: ## %bb.0: 2819; AVX512VL-NEXT: vptestnmd %xmm1, %xmm1, %k1 2820; AVX512VL-NEXT: vpexpandd (%rdi), %xmm0 {%k1} 2821; AVX512VL-NEXT: retq 2822 %mask = icmp eq <4 x i32> %trigger, zeroinitializer 2823 %res = call <4 x i32> @llvm.masked.expandload.v4i32(ptr %base, <4 x i1> %mask, <4 x i32> %src0) 2824 ret <4 x i32>%res 2825} 2826 2827; 2828; vXi16 2829; 2830 2831define <8 x i16> @expandload_v8i16_v8i16(ptr %base, <8 x i16> %src0, <8 x i16> %trigger) { 2832; SSE-LABEL: expandload_v8i16_v8i16: 2833; SSE: ## %bb.0: 2834; SSE-NEXT: pxor %xmm2, %xmm2 2835; SSE-NEXT: pcmpeqw %xmm1, %xmm2 2836; SSE-NEXT: packsswb %xmm2, %xmm2 2837; SSE-NEXT: pmovmskb %xmm2, %eax 2838; SSE-NEXT: testb $1, %al 2839; SSE-NEXT: jne LBB11_1 2840; SSE-NEXT: ## %bb.2: ## %else 2841; SSE-NEXT: testb $2, %al 2842; SSE-NEXT: jne LBB11_3 2843; SSE-NEXT: LBB11_4: ## %else2 2844; SSE-NEXT: testb $4, %al 2845; SSE-NEXT: jne LBB11_5 2846; SSE-NEXT: LBB11_6: ## %else6 2847; SSE-NEXT: testb $8, %al 2848; SSE-NEXT: jne LBB11_7 2849; SSE-NEXT: LBB11_8: ## %else10 2850; SSE-NEXT: testb $16, %al 2851; SSE-NEXT: jne LBB11_9 2852; SSE-NEXT: LBB11_10: ## %else14 2853; SSE-NEXT: testb $32, %al 2854; SSE-NEXT: jne LBB11_11 2855; SSE-NEXT: LBB11_12: ## %else18 2856; SSE-NEXT: testb $64, %al 2857; SSE-NEXT: jne LBB11_13 2858; SSE-NEXT: LBB11_14: ## %else22 2859; SSE-NEXT: testb $-128, %al 2860; SSE-NEXT: jne LBB11_15 2861; SSE-NEXT: LBB11_16: ## %else26 2862; SSE-NEXT: retq 2863; SSE-NEXT: LBB11_1: ## %cond.load 2864; SSE-NEXT: pinsrw $0, (%rdi), %xmm0 2865; SSE-NEXT: addq $2, %rdi 2866; SSE-NEXT: testb $2, %al 2867; SSE-NEXT: je LBB11_4 2868; SSE-NEXT: LBB11_3: ## %cond.load1 2869; SSE-NEXT: pinsrw $1, (%rdi), %xmm0 2870; SSE-NEXT: addq $2, %rdi 2871; SSE-NEXT: testb $4, %al 2872; SSE-NEXT: je LBB11_6 2873; SSE-NEXT: LBB11_5: ## %cond.load5 2874; SSE-NEXT: pinsrw $2, (%rdi), %xmm0 2875; SSE-NEXT: addq $2, %rdi 2876; SSE-NEXT: testb $8, %al 2877; SSE-NEXT: je LBB11_8 2878; SSE-NEXT: LBB11_7: ## %cond.load9 2879; SSE-NEXT: pinsrw $3, (%rdi), %xmm0 2880; SSE-NEXT: addq $2, %rdi 2881; SSE-NEXT: testb $16, %al 2882; SSE-NEXT: je LBB11_10 2883; SSE-NEXT: LBB11_9: ## %cond.load13 2884; SSE-NEXT: pinsrw $4, (%rdi), %xmm0 2885; SSE-NEXT: addq $2, %rdi 2886; SSE-NEXT: testb $32, %al 2887; SSE-NEXT: je LBB11_12 2888; SSE-NEXT: LBB11_11: ## %cond.load17 2889; SSE-NEXT: pinsrw $5, (%rdi), %xmm0 2890; SSE-NEXT: addq $2, %rdi 2891; SSE-NEXT: testb $64, %al 2892; SSE-NEXT: je LBB11_14 2893; SSE-NEXT: LBB11_13: ## %cond.load21 2894; SSE-NEXT: pinsrw $6, (%rdi), %xmm0 2895; SSE-NEXT: addq $2, %rdi 2896; SSE-NEXT: testb $-128, %al 2897; SSE-NEXT: je LBB11_16 2898; SSE-NEXT: LBB11_15: ## %cond.load25 2899; SSE-NEXT: pinsrw $7, (%rdi), %xmm0 2900; SSE-NEXT: retq 2901; 2902; AVX1OR2-LABEL: expandload_v8i16_v8i16: 2903; AVX1OR2: ## %bb.0: 2904; AVX1OR2-NEXT: vpxor %xmm2, %xmm2, %xmm2 2905; AVX1OR2-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 2906; AVX1OR2-NEXT: vpacksswb %xmm1, %xmm1, %xmm1 2907; AVX1OR2-NEXT: vpmovmskb %xmm1, %eax 2908; AVX1OR2-NEXT: testb $1, %al 2909; AVX1OR2-NEXT: jne LBB11_1 2910; AVX1OR2-NEXT: ## %bb.2: ## %else 2911; AVX1OR2-NEXT: testb $2, %al 2912; AVX1OR2-NEXT: jne LBB11_3 2913; AVX1OR2-NEXT: LBB11_4: ## %else2 2914; AVX1OR2-NEXT: testb $4, %al 2915; AVX1OR2-NEXT: jne LBB11_5 2916; AVX1OR2-NEXT: LBB11_6: ## %else6 2917; AVX1OR2-NEXT: testb $8, %al 2918; AVX1OR2-NEXT: jne LBB11_7 2919; AVX1OR2-NEXT: LBB11_8: ## %else10 2920; AVX1OR2-NEXT: testb $16, %al 2921; AVX1OR2-NEXT: jne LBB11_9 2922; AVX1OR2-NEXT: LBB11_10: ## %else14 2923; AVX1OR2-NEXT: testb $32, %al 2924; AVX1OR2-NEXT: jne LBB11_11 2925; AVX1OR2-NEXT: LBB11_12: ## %else18 2926; AVX1OR2-NEXT: testb $64, %al 2927; AVX1OR2-NEXT: jne LBB11_13 2928; AVX1OR2-NEXT: LBB11_14: ## %else22 2929; AVX1OR2-NEXT: testb $-128, %al 2930; AVX1OR2-NEXT: jne LBB11_15 2931; AVX1OR2-NEXT: LBB11_16: ## %else26 2932; AVX1OR2-NEXT: retq 2933; AVX1OR2-NEXT: LBB11_1: ## %cond.load 2934; AVX1OR2-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 2935; AVX1OR2-NEXT: addq $2, %rdi 2936; AVX1OR2-NEXT: testb $2, %al 2937; AVX1OR2-NEXT: je LBB11_4 2938; AVX1OR2-NEXT: LBB11_3: ## %cond.load1 2939; AVX1OR2-NEXT: vpinsrw $1, (%rdi), %xmm0, %xmm0 2940; AVX1OR2-NEXT: addq $2, %rdi 2941; AVX1OR2-NEXT: testb $4, %al 2942; AVX1OR2-NEXT: je LBB11_6 2943; AVX1OR2-NEXT: LBB11_5: ## %cond.load5 2944; AVX1OR2-NEXT: vpinsrw $2, (%rdi), %xmm0, %xmm0 2945; AVX1OR2-NEXT: addq $2, %rdi 2946; AVX1OR2-NEXT: testb $8, %al 2947; AVX1OR2-NEXT: je LBB11_8 2948; AVX1OR2-NEXT: LBB11_7: ## %cond.load9 2949; AVX1OR2-NEXT: vpinsrw $3, (%rdi), %xmm0, %xmm0 2950; AVX1OR2-NEXT: addq $2, %rdi 2951; AVX1OR2-NEXT: testb $16, %al 2952; AVX1OR2-NEXT: je LBB11_10 2953; AVX1OR2-NEXT: LBB11_9: ## %cond.load13 2954; AVX1OR2-NEXT: vpinsrw $4, (%rdi), %xmm0, %xmm0 2955; AVX1OR2-NEXT: addq $2, %rdi 2956; AVX1OR2-NEXT: testb $32, %al 2957; AVX1OR2-NEXT: je LBB11_12 2958; AVX1OR2-NEXT: LBB11_11: ## %cond.load17 2959; AVX1OR2-NEXT: vpinsrw $5, (%rdi), %xmm0, %xmm0 2960; AVX1OR2-NEXT: addq $2, %rdi 2961; AVX1OR2-NEXT: testb $64, %al 2962; AVX1OR2-NEXT: je LBB11_14 2963; AVX1OR2-NEXT: LBB11_13: ## %cond.load21 2964; AVX1OR2-NEXT: vpinsrw $6, (%rdi), %xmm0, %xmm0 2965; AVX1OR2-NEXT: addq $2, %rdi 2966; AVX1OR2-NEXT: testb $-128, %al 2967; AVX1OR2-NEXT: je LBB11_16 2968; AVX1OR2-NEXT: LBB11_15: ## %cond.load25 2969; AVX1OR2-NEXT: vpinsrw $7, (%rdi), %xmm0, %xmm0 2970; AVX1OR2-NEXT: retq 2971; 2972; AVX512F-LABEL: expandload_v8i16_v8i16: 2973; AVX512F: ## %bb.0: 2974; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 2975; AVX512F-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 2976; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1 2977; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0 2978; AVX512F-NEXT: kmovw %k0, %eax 2979; AVX512F-NEXT: testb $1, %al 2980; AVX512F-NEXT: jne LBB11_1 2981; AVX512F-NEXT: ## %bb.2: ## %else 2982; AVX512F-NEXT: testb $2, %al 2983; AVX512F-NEXT: jne LBB11_3 2984; AVX512F-NEXT: LBB11_4: ## %else2 2985; AVX512F-NEXT: testb $4, %al 2986; AVX512F-NEXT: jne LBB11_5 2987; AVX512F-NEXT: LBB11_6: ## %else6 2988; AVX512F-NEXT: testb $8, %al 2989; AVX512F-NEXT: jne LBB11_7 2990; AVX512F-NEXT: LBB11_8: ## %else10 2991; AVX512F-NEXT: testb $16, %al 2992; AVX512F-NEXT: jne LBB11_9 2993; AVX512F-NEXT: LBB11_10: ## %else14 2994; AVX512F-NEXT: testb $32, %al 2995; AVX512F-NEXT: jne LBB11_11 2996; AVX512F-NEXT: LBB11_12: ## %else18 2997; AVX512F-NEXT: testb $64, %al 2998; AVX512F-NEXT: jne LBB11_13 2999; AVX512F-NEXT: LBB11_14: ## %else22 3000; AVX512F-NEXT: testb $-128, %al 3001; AVX512F-NEXT: jne LBB11_15 3002; AVX512F-NEXT: LBB11_16: ## %else26 3003; AVX512F-NEXT: vzeroupper 3004; AVX512F-NEXT: retq 3005; AVX512F-NEXT: LBB11_1: ## %cond.load 3006; AVX512F-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 3007; AVX512F-NEXT: addq $2, %rdi 3008; AVX512F-NEXT: testb $2, %al 3009; AVX512F-NEXT: je LBB11_4 3010; AVX512F-NEXT: LBB11_3: ## %cond.load1 3011; AVX512F-NEXT: vpinsrw $1, (%rdi), %xmm0, %xmm0 3012; AVX512F-NEXT: addq $2, %rdi 3013; AVX512F-NEXT: testb $4, %al 3014; AVX512F-NEXT: je LBB11_6 3015; AVX512F-NEXT: LBB11_5: ## %cond.load5 3016; AVX512F-NEXT: vpinsrw $2, (%rdi), %xmm0, %xmm0 3017; AVX512F-NEXT: addq $2, %rdi 3018; AVX512F-NEXT: testb $8, %al 3019; AVX512F-NEXT: je LBB11_8 3020; AVX512F-NEXT: LBB11_7: ## %cond.load9 3021; AVX512F-NEXT: vpinsrw $3, (%rdi), %xmm0, %xmm0 3022; AVX512F-NEXT: addq $2, %rdi 3023; AVX512F-NEXT: testb $16, %al 3024; AVX512F-NEXT: je LBB11_10 3025; AVX512F-NEXT: LBB11_9: ## %cond.load13 3026; AVX512F-NEXT: vpinsrw $4, (%rdi), %xmm0, %xmm0 3027; AVX512F-NEXT: addq $2, %rdi 3028; AVX512F-NEXT: testb $32, %al 3029; AVX512F-NEXT: je LBB11_12 3030; AVX512F-NEXT: LBB11_11: ## %cond.load17 3031; AVX512F-NEXT: vpinsrw $5, (%rdi), %xmm0, %xmm0 3032; AVX512F-NEXT: addq $2, %rdi 3033; AVX512F-NEXT: testb $64, %al 3034; AVX512F-NEXT: je LBB11_14 3035; AVX512F-NEXT: LBB11_13: ## %cond.load21 3036; AVX512F-NEXT: vpinsrw $6, (%rdi), %xmm0, %xmm0 3037; AVX512F-NEXT: addq $2, %rdi 3038; AVX512F-NEXT: testb $-128, %al 3039; AVX512F-NEXT: je LBB11_16 3040; AVX512F-NEXT: LBB11_15: ## %cond.load25 3041; AVX512F-NEXT: vpinsrw $7, (%rdi), %xmm0, %xmm0 3042; AVX512F-NEXT: vzeroupper 3043; AVX512F-NEXT: retq 3044; 3045; AVX512VLDQ-LABEL: expandload_v8i16_v8i16: 3046; AVX512VLDQ: ## %bb.0: 3047; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 3048; AVX512VLDQ-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 3049; AVX512VLDQ-NEXT: vpmovsxwd %xmm1, %ymm1 3050; AVX512VLDQ-NEXT: vpmovd2m %ymm1, %k0 3051; AVX512VLDQ-NEXT: kmovw %k0, %eax 3052; AVX512VLDQ-NEXT: testb $1, %al 3053; AVX512VLDQ-NEXT: jne LBB11_1 3054; AVX512VLDQ-NEXT: ## %bb.2: ## %else 3055; AVX512VLDQ-NEXT: testb $2, %al 3056; AVX512VLDQ-NEXT: jne LBB11_3 3057; AVX512VLDQ-NEXT: LBB11_4: ## %else2 3058; AVX512VLDQ-NEXT: testb $4, %al 3059; AVX512VLDQ-NEXT: jne LBB11_5 3060; AVX512VLDQ-NEXT: LBB11_6: ## %else6 3061; AVX512VLDQ-NEXT: testb $8, %al 3062; AVX512VLDQ-NEXT: jne LBB11_7 3063; AVX512VLDQ-NEXT: LBB11_8: ## %else10 3064; AVX512VLDQ-NEXT: testb $16, %al 3065; AVX512VLDQ-NEXT: jne LBB11_9 3066; AVX512VLDQ-NEXT: LBB11_10: ## %else14 3067; AVX512VLDQ-NEXT: testb $32, %al 3068; AVX512VLDQ-NEXT: jne LBB11_11 3069; AVX512VLDQ-NEXT: LBB11_12: ## %else18 3070; AVX512VLDQ-NEXT: testb $64, %al 3071; AVX512VLDQ-NEXT: jne LBB11_13 3072; AVX512VLDQ-NEXT: LBB11_14: ## %else22 3073; AVX512VLDQ-NEXT: testb $-128, %al 3074; AVX512VLDQ-NEXT: jne LBB11_15 3075; AVX512VLDQ-NEXT: LBB11_16: ## %else26 3076; AVX512VLDQ-NEXT: vzeroupper 3077; AVX512VLDQ-NEXT: retq 3078; AVX512VLDQ-NEXT: LBB11_1: ## %cond.load 3079; AVX512VLDQ-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 3080; AVX512VLDQ-NEXT: addq $2, %rdi 3081; AVX512VLDQ-NEXT: testb $2, %al 3082; AVX512VLDQ-NEXT: je LBB11_4 3083; AVX512VLDQ-NEXT: LBB11_3: ## %cond.load1 3084; AVX512VLDQ-NEXT: vpinsrw $1, (%rdi), %xmm0, %xmm0 3085; AVX512VLDQ-NEXT: addq $2, %rdi 3086; AVX512VLDQ-NEXT: testb $4, %al 3087; AVX512VLDQ-NEXT: je LBB11_6 3088; AVX512VLDQ-NEXT: LBB11_5: ## %cond.load5 3089; AVX512VLDQ-NEXT: vpinsrw $2, (%rdi), %xmm0, %xmm0 3090; AVX512VLDQ-NEXT: addq $2, %rdi 3091; AVX512VLDQ-NEXT: testb $8, %al 3092; AVX512VLDQ-NEXT: je LBB11_8 3093; AVX512VLDQ-NEXT: LBB11_7: ## %cond.load9 3094; AVX512VLDQ-NEXT: vpinsrw $3, (%rdi), %xmm0, %xmm0 3095; AVX512VLDQ-NEXT: addq $2, %rdi 3096; AVX512VLDQ-NEXT: testb $16, %al 3097; AVX512VLDQ-NEXT: je LBB11_10 3098; AVX512VLDQ-NEXT: LBB11_9: ## %cond.load13 3099; AVX512VLDQ-NEXT: vpinsrw $4, (%rdi), %xmm0, %xmm0 3100; AVX512VLDQ-NEXT: addq $2, %rdi 3101; AVX512VLDQ-NEXT: testb $32, %al 3102; AVX512VLDQ-NEXT: je LBB11_12 3103; AVX512VLDQ-NEXT: LBB11_11: ## %cond.load17 3104; AVX512VLDQ-NEXT: vpinsrw $5, (%rdi), %xmm0, %xmm0 3105; AVX512VLDQ-NEXT: addq $2, %rdi 3106; AVX512VLDQ-NEXT: testb $64, %al 3107; AVX512VLDQ-NEXT: je LBB11_14 3108; AVX512VLDQ-NEXT: LBB11_13: ## %cond.load21 3109; AVX512VLDQ-NEXT: vpinsrw $6, (%rdi), %xmm0, %xmm0 3110; AVX512VLDQ-NEXT: addq $2, %rdi 3111; AVX512VLDQ-NEXT: testb $-128, %al 3112; AVX512VLDQ-NEXT: je LBB11_16 3113; AVX512VLDQ-NEXT: LBB11_15: ## %cond.load25 3114; AVX512VLDQ-NEXT: vpinsrw $7, (%rdi), %xmm0, %xmm0 3115; AVX512VLDQ-NEXT: vzeroupper 3116; AVX512VLDQ-NEXT: retq 3117; 3118; AVX512VLBW-LABEL: expandload_v8i16_v8i16: 3119; AVX512VLBW: ## %bb.0: 3120; AVX512VLBW-NEXT: vptestnmw %xmm1, %xmm1, %k0 3121; AVX512VLBW-NEXT: kmovd %k0, %eax 3122; AVX512VLBW-NEXT: testb $1, %al 3123; AVX512VLBW-NEXT: jne LBB11_1 3124; AVX512VLBW-NEXT: ## %bb.2: ## %else 3125; AVX512VLBW-NEXT: testb $2, %al 3126; AVX512VLBW-NEXT: jne LBB11_3 3127; AVX512VLBW-NEXT: LBB11_4: ## %else2 3128; AVX512VLBW-NEXT: testb $4, %al 3129; AVX512VLBW-NEXT: jne LBB11_5 3130; AVX512VLBW-NEXT: LBB11_6: ## %else6 3131; AVX512VLBW-NEXT: testb $8, %al 3132; AVX512VLBW-NEXT: jne LBB11_7 3133; AVX512VLBW-NEXT: LBB11_8: ## %else10 3134; AVX512VLBW-NEXT: testb $16, %al 3135; AVX512VLBW-NEXT: jne LBB11_9 3136; AVX512VLBW-NEXT: LBB11_10: ## %else14 3137; AVX512VLBW-NEXT: testb $32, %al 3138; AVX512VLBW-NEXT: jne LBB11_11 3139; AVX512VLBW-NEXT: LBB11_12: ## %else18 3140; AVX512VLBW-NEXT: testb $64, %al 3141; AVX512VLBW-NEXT: jne LBB11_13 3142; AVX512VLBW-NEXT: LBB11_14: ## %else22 3143; AVX512VLBW-NEXT: testb $-128, %al 3144; AVX512VLBW-NEXT: jne LBB11_15 3145; AVX512VLBW-NEXT: LBB11_16: ## %else26 3146; AVX512VLBW-NEXT: retq 3147; AVX512VLBW-NEXT: LBB11_1: ## %cond.load 3148; AVX512VLBW-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 3149; AVX512VLBW-NEXT: addq $2, %rdi 3150; AVX512VLBW-NEXT: testb $2, %al 3151; AVX512VLBW-NEXT: je LBB11_4 3152; AVX512VLBW-NEXT: LBB11_3: ## %cond.load1 3153; AVX512VLBW-NEXT: vpinsrw $1, (%rdi), %xmm0, %xmm0 3154; AVX512VLBW-NEXT: addq $2, %rdi 3155; AVX512VLBW-NEXT: testb $4, %al 3156; AVX512VLBW-NEXT: je LBB11_6 3157; AVX512VLBW-NEXT: LBB11_5: ## %cond.load5 3158; AVX512VLBW-NEXT: vpinsrw $2, (%rdi), %xmm0, %xmm0 3159; AVX512VLBW-NEXT: addq $2, %rdi 3160; AVX512VLBW-NEXT: testb $8, %al 3161; AVX512VLBW-NEXT: je LBB11_8 3162; AVX512VLBW-NEXT: LBB11_7: ## %cond.load9 3163; AVX512VLBW-NEXT: vpinsrw $3, (%rdi), %xmm0, %xmm0 3164; AVX512VLBW-NEXT: addq $2, %rdi 3165; AVX512VLBW-NEXT: testb $16, %al 3166; AVX512VLBW-NEXT: je LBB11_10 3167; AVX512VLBW-NEXT: LBB11_9: ## %cond.load13 3168; AVX512VLBW-NEXT: vpinsrw $4, (%rdi), %xmm0, %xmm0 3169; AVX512VLBW-NEXT: addq $2, %rdi 3170; AVX512VLBW-NEXT: testb $32, %al 3171; AVX512VLBW-NEXT: je LBB11_12 3172; AVX512VLBW-NEXT: LBB11_11: ## %cond.load17 3173; AVX512VLBW-NEXT: vpinsrw $5, (%rdi), %xmm0, %xmm0 3174; AVX512VLBW-NEXT: addq $2, %rdi 3175; AVX512VLBW-NEXT: testb $64, %al 3176; AVX512VLBW-NEXT: je LBB11_14 3177; AVX512VLBW-NEXT: LBB11_13: ## %cond.load21 3178; AVX512VLBW-NEXT: vpinsrw $6, (%rdi), %xmm0, %xmm0 3179; AVX512VLBW-NEXT: addq $2, %rdi 3180; AVX512VLBW-NEXT: testb $-128, %al 3181; AVX512VLBW-NEXT: je LBB11_16 3182; AVX512VLBW-NEXT: LBB11_15: ## %cond.load25 3183; AVX512VLBW-NEXT: vpinsrw $7, (%rdi), %xmm0, %xmm0 3184; AVX512VLBW-NEXT: retq 3185 %mask = icmp eq <8 x i16> %trigger, zeroinitializer 3186 %res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr %base, <8 x i1> %mask, <8 x i16> %src0) 3187 ret <8 x i16>%res 3188} 3189 3190; 3191; vXi8 3192; 3193 3194define <16 x i8> @expandload_v16i8_v16i8(ptr %base, <16 x i8> %src0, <16 x i8> %trigger) { 3195; SSE2-LABEL: expandload_v16i8_v16i8: 3196; SSE2: ## %bb.0: 3197; SSE2-NEXT: pxor %xmm2, %xmm2 3198; SSE2-NEXT: pcmpeqb %xmm1, %xmm2 3199; SSE2-NEXT: pmovmskb %xmm2, %eax 3200; SSE2-NEXT: testb $1, %al 3201; SSE2-NEXT: jne LBB12_1 3202; SSE2-NEXT: ## %bb.2: ## %else 3203; SSE2-NEXT: testb $2, %al 3204; SSE2-NEXT: jne LBB12_3 3205; SSE2-NEXT: LBB12_4: ## %else2 3206; SSE2-NEXT: testb $4, %al 3207; SSE2-NEXT: jne LBB12_5 3208; SSE2-NEXT: LBB12_6: ## %else6 3209; SSE2-NEXT: testb $8, %al 3210; SSE2-NEXT: jne LBB12_7 3211; SSE2-NEXT: LBB12_8: ## %else10 3212; SSE2-NEXT: testb $16, %al 3213; SSE2-NEXT: jne LBB12_9 3214; SSE2-NEXT: LBB12_10: ## %else14 3215; SSE2-NEXT: testb $32, %al 3216; SSE2-NEXT: jne LBB12_11 3217; SSE2-NEXT: LBB12_12: ## %else18 3218; SSE2-NEXT: testb $64, %al 3219; SSE2-NEXT: jne LBB12_13 3220; SSE2-NEXT: LBB12_14: ## %else22 3221; SSE2-NEXT: testb %al, %al 3222; SSE2-NEXT: js LBB12_15 3223; SSE2-NEXT: LBB12_16: ## %else26 3224; SSE2-NEXT: testl $256, %eax ## imm = 0x100 3225; SSE2-NEXT: jne LBB12_17 3226; SSE2-NEXT: LBB12_18: ## %else30 3227; SSE2-NEXT: testl $512, %eax ## imm = 0x200 3228; SSE2-NEXT: jne LBB12_19 3229; SSE2-NEXT: LBB12_20: ## %else34 3230; SSE2-NEXT: testl $1024, %eax ## imm = 0x400 3231; SSE2-NEXT: jne LBB12_21 3232; SSE2-NEXT: LBB12_22: ## %else38 3233; SSE2-NEXT: testl $2048, %eax ## imm = 0x800 3234; SSE2-NEXT: jne LBB12_23 3235; SSE2-NEXT: LBB12_24: ## %else42 3236; SSE2-NEXT: testl $4096, %eax ## imm = 0x1000 3237; SSE2-NEXT: jne LBB12_25 3238; SSE2-NEXT: LBB12_26: ## %else46 3239; SSE2-NEXT: testl $8192, %eax ## imm = 0x2000 3240; SSE2-NEXT: jne LBB12_27 3241; SSE2-NEXT: LBB12_28: ## %else50 3242; SSE2-NEXT: testl $16384, %eax ## imm = 0x4000 3243; SSE2-NEXT: jne LBB12_29 3244; SSE2-NEXT: LBB12_30: ## %else54 3245; SSE2-NEXT: testl $32768, %eax ## imm = 0x8000 3246; SSE2-NEXT: jne LBB12_31 3247; SSE2-NEXT: LBB12_32: ## %else58 3248; SSE2-NEXT: retq 3249; SSE2-NEXT: LBB12_1: ## %cond.load 3250; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 3251; SSE2-NEXT: pand %xmm1, %xmm0 3252; SSE2-NEXT: movzbl (%rdi), %ecx 3253; SSE2-NEXT: movd %ecx, %xmm2 3254; SSE2-NEXT: pandn %xmm2, %xmm1 3255; SSE2-NEXT: por %xmm1, %xmm0 3256; SSE2-NEXT: incq %rdi 3257; SSE2-NEXT: testb $2, %al 3258; SSE2-NEXT: je LBB12_4 3259; SSE2-NEXT: LBB12_3: ## %cond.load1 3260; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 3261; SSE2-NEXT: pand %xmm1, %xmm0 3262; SSE2-NEXT: movzbl (%rdi), %ecx 3263; SSE2-NEXT: movd %ecx, %xmm2 3264; SSE2-NEXT: psllw $8, %xmm2 3265; SSE2-NEXT: pandn %xmm2, %xmm1 3266; SSE2-NEXT: por %xmm1, %xmm0 3267; SSE2-NEXT: incq %rdi 3268; SSE2-NEXT: testb $4, %al 3269; SSE2-NEXT: je LBB12_6 3270; SSE2-NEXT: LBB12_5: ## %cond.load5 3271; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] 3272; SSE2-NEXT: pand %xmm1, %xmm0 3273; SSE2-NEXT: movzbl (%rdi), %ecx 3274; SSE2-NEXT: movd %ecx, %xmm2 3275; SSE2-NEXT: pslld $16, %xmm2 3276; SSE2-NEXT: pandn %xmm2, %xmm1 3277; SSE2-NEXT: por %xmm1, %xmm0 3278; SSE2-NEXT: incq %rdi 3279; SSE2-NEXT: testb $8, %al 3280; SSE2-NEXT: je LBB12_8 3281; SSE2-NEXT: LBB12_7: ## %cond.load9 3282; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] 3283; SSE2-NEXT: pand %xmm1, %xmm0 3284; SSE2-NEXT: movzbl (%rdi), %ecx 3285; SSE2-NEXT: movd %ecx, %xmm2 3286; SSE2-NEXT: pslld $24, %xmm2 3287; SSE2-NEXT: pandn %xmm2, %xmm1 3288; SSE2-NEXT: por %xmm1, %xmm0 3289; SSE2-NEXT: incq %rdi 3290; SSE2-NEXT: testb $16, %al 3291; SSE2-NEXT: je LBB12_10 3292; SSE2-NEXT: LBB12_9: ## %cond.load13 3293; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] 3294; SSE2-NEXT: pand %xmm1, %xmm0 3295; SSE2-NEXT: movzbl (%rdi), %ecx 3296; SSE2-NEXT: movd %ecx, %xmm2 3297; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] 3298; SSE2-NEXT: pandn %xmm2, %xmm1 3299; SSE2-NEXT: por %xmm1, %xmm0 3300; SSE2-NEXT: incq %rdi 3301; SSE2-NEXT: testb $32, %al 3302; SSE2-NEXT: je LBB12_12 3303; SSE2-NEXT: LBB12_11: ## %cond.load17 3304; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] 3305; SSE2-NEXT: pand %xmm1, %xmm0 3306; SSE2-NEXT: movzbl (%rdi), %ecx 3307; SSE2-NEXT: movd %ecx, %xmm2 3308; SSE2-NEXT: psllq $40, %xmm2 3309; SSE2-NEXT: pandn %xmm2, %xmm1 3310; SSE2-NEXT: por %xmm1, %xmm0 3311; SSE2-NEXT: incq %rdi 3312; SSE2-NEXT: testb $64, %al 3313; SSE2-NEXT: je LBB12_14 3314; SSE2-NEXT: LBB12_13: ## %cond.load21 3315; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] 3316; SSE2-NEXT: pand %xmm1, %xmm0 3317; SSE2-NEXT: movzbl (%rdi), %ecx 3318; SSE2-NEXT: movd %ecx, %xmm2 3319; SSE2-NEXT: psllq $48, %xmm2 3320; SSE2-NEXT: pandn %xmm2, %xmm1 3321; SSE2-NEXT: por %xmm1, %xmm0 3322; SSE2-NEXT: incq %rdi 3323; SSE2-NEXT: testb %al, %al 3324; SSE2-NEXT: jns LBB12_16 3325; SSE2-NEXT: LBB12_15: ## %cond.load25 3326; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] 3327; SSE2-NEXT: pand %xmm1, %xmm0 3328; SSE2-NEXT: movzbl (%rdi), %ecx 3329; SSE2-NEXT: movd %ecx, %xmm2 3330; SSE2-NEXT: psllq $56, %xmm2 3331; SSE2-NEXT: pandn %xmm2, %xmm1 3332; SSE2-NEXT: por %xmm1, %xmm0 3333; SSE2-NEXT: incq %rdi 3334; SSE2-NEXT: testl $256, %eax ## imm = 0x100 3335; SSE2-NEXT: je LBB12_18 3336; SSE2-NEXT: LBB12_17: ## %cond.load29 3337; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] 3338; SSE2-NEXT: pand %xmm1, %xmm0 3339; SSE2-NEXT: movzbl (%rdi), %ecx 3340; SSE2-NEXT: movd %ecx, %xmm2 3341; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] 3342; SSE2-NEXT: pandn %xmm2, %xmm1 3343; SSE2-NEXT: por %xmm1, %xmm0 3344; SSE2-NEXT: incq %rdi 3345; SSE2-NEXT: testl $512, %eax ## imm = 0x200 3346; SSE2-NEXT: je LBB12_20 3347; SSE2-NEXT: LBB12_19: ## %cond.load33 3348; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] 3349; SSE2-NEXT: pand %xmm1, %xmm0 3350; SSE2-NEXT: movzbl (%rdi), %ecx 3351; SSE2-NEXT: movd %ecx, %xmm2 3352; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6] 3353; SSE2-NEXT: pandn %xmm2, %xmm1 3354; SSE2-NEXT: por %xmm1, %xmm0 3355; SSE2-NEXT: incq %rdi 3356; SSE2-NEXT: testl $1024, %eax ## imm = 0x400 3357; SSE2-NEXT: je LBB12_22 3358; SSE2-NEXT: LBB12_21: ## %cond.load37 3359; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] 3360; SSE2-NEXT: pand %xmm1, %xmm0 3361; SSE2-NEXT: movzbl (%rdi), %ecx 3362; SSE2-NEXT: movd %ecx, %xmm2 3363; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5] 3364; SSE2-NEXT: pandn %xmm2, %xmm1 3365; SSE2-NEXT: por %xmm1, %xmm0 3366; SSE2-NEXT: incq %rdi 3367; SSE2-NEXT: testl $2048, %eax ## imm = 0x800 3368; SSE2-NEXT: je LBB12_24 3369; SSE2-NEXT: LBB12_23: ## %cond.load41 3370; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255] 3371; SSE2-NEXT: pand %xmm1, %xmm0 3372; SSE2-NEXT: movzbl (%rdi), %ecx 3373; SSE2-NEXT: movd %ecx, %xmm2 3374; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4] 3375; SSE2-NEXT: pandn %xmm2, %xmm1 3376; SSE2-NEXT: por %xmm1, %xmm0 3377; SSE2-NEXT: incq %rdi 3378; SSE2-NEXT: testl $4096, %eax ## imm = 0x1000 3379; SSE2-NEXT: je LBB12_26 3380; SSE2-NEXT: LBB12_25: ## %cond.load45 3381; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] 3382; SSE2-NEXT: pand %xmm1, %xmm0 3383; SSE2-NEXT: movzbl (%rdi), %ecx 3384; SSE2-NEXT: movd %ecx, %xmm2 3385; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] 3386; SSE2-NEXT: pandn %xmm2, %xmm1 3387; SSE2-NEXT: por %xmm1, %xmm0 3388; SSE2-NEXT: incq %rdi 3389; SSE2-NEXT: testl $8192, %eax ## imm = 0x2000 3390; SSE2-NEXT: je LBB12_28 3391; SSE2-NEXT: LBB12_27: ## %cond.load49 3392; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] 3393; SSE2-NEXT: pand %xmm1, %xmm0 3394; SSE2-NEXT: movzbl (%rdi), %ecx 3395; SSE2-NEXT: movd %ecx, %xmm2 3396; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2] 3397; SSE2-NEXT: pandn %xmm2, %xmm1 3398; SSE2-NEXT: por %xmm1, %xmm0 3399; SSE2-NEXT: incq %rdi 3400; SSE2-NEXT: testl $16384, %eax ## imm = 0x4000 3401; SSE2-NEXT: je LBB12_30 3402; SSE2-NEXT: LBB12_29: ## %cond.load53 3403; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] 3404; SSE2-NEXT: pand %xmm1, %xmm0 3405; SSE2-NEXT: movzbl (%rdi), %ecx 3406; SSE2-NEXT: movd %ecx, %xmm2 3407; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1] 3408; SSE2-NEXT: pandn %xmm2, %xmm1 3409; SSE2-NEXT: por %xmm1, %xmm0 3410; SSE2-NEXT: incq %rdi 3411; SSE2-NEXT: testl $32768, %eax ## imm = 0x8000 3412; SSE2-NEXT: je LBB12_32 3413; SSE2-NEXT: LBB12_31: ## %cond.load57 3414; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 3415; SSE2-NEXT: movzbl (%rdi), %eax 3416; SSE2-NEXT: movd %eax, %xmm1 3417; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0] 3418; SSE2-NEXT: por %xmm1, %xmm0 3419; SSE2-NEXT: retq 3420; 3421; SSE42-LABEL: expandload_v16i8_v16i8: 3422; SSE42: ## %bb.0: 3423; SSE42-NEXT: pxor %xmm2, %xmm2 3424; SSE42-NEXT: pcmpeqb %xmm1, %xmm2 3425; SSE42-NEXT: pmovmskb %xmm2, %eax 3426; SSE42-NEXT: testb $1, %al 3427; SSE42-NEXT: jne LBB12_1 3428; SSE42-NEXT: ## %bb.2: ## %else 3429; SSE42-NEXT: testb $2, %al 3430; SSE42-NEXT: jne LBB12_3 3431; SSE42-NEXT: LBB12_4: ## %else2 3432; SSE42-NEXT: testb $4, %al 3433; SSE42-NEXT: jne LBB12_5 3434; SSE42-NEXT: LBB12_6: ## %else6 3435; SSE42-NEXT: testb $8, %al 3436; SSE42-NEXT: jne LBB12_7 3437; SSE42-NEXT: LBB12_8: ## %else10 3438; SSE42-NEXT: testb $16, %al 3439; SSE42-NEXT: jne LBB12_9 3440; SSE42-NEXT: LBB12_10: ## %else14 3441; SSE42-NEXT: testb $32, %al 3442; SSE42-NEXT: jne LBB12_11 3443; SSE42-NEXT: LBB12_12: ## %else18 3444; SSE42-NEXT: testb $64, %al 3445; SSE42-NEXT: jne LBB12_13 3446; SSE42-NEXT: LBB12_14: ## %else22 3447; SSE42-NEXT: testb %al, %al 3448; SSE42-NEXT: js LBB12_15 3449; SSE42-NEXT: LBB12_16: ## %else26 3450; SSE42-NEXT: testl $256, %eax ## imm = 0x100 3451; SSE42-NEXT: jne LBB12_17 3452; SSE42-NEXT: LBB12_18: ## %else30 3453; SSE42-NEXT: testl $512, %eax ## imm = 0x200 3454; SSE42-NEXT: jne LBB12_19 3455; SSE42-NEXT: LBB12_20: ## %else34 3456; SSE42-NEXT: testl $1024, %eax ## imm = 0x400 3457; SSE42-NEXT: jne LBB12_21 3458; SSE42-NEXT: LBB12_22: ## %else38 3459; SSE42-NEXT: testl $2048, %eax ## imm = 0x800 3460; SSE42-NEXT: jne LBB12_23 3461; SSE42-NEXT: LBB12_24: ## %else42 3462; SSE42-NEXT: testl $4096, %eax ## imm = 0x1000 3463; SSE42-NEXT: jne LBB12_25 3464; SSE42-NEXT: LBB12_26: ## %else46 3465; SSE42-NEXT: testl $8192, %eax ## imm = 0x2000 3466; SSE42-NEXT: jne LBB12_27 3467; SSE42-NEXT: LBB12_28: ## %else50 3468; SSE42-NEXT: testl $16384, %eax ## imm = 0x4000 3469; SSE42-NEXT: jne LBB12_29 3470; SSE42-NEXT: LBB12_30: ## %else54 3471; SSE42-NEXT: testl $32768, %eax ## imm = 0x8000 3472; SSE42-NEXT: jne LBB12_31 3473; SSE42-NEXT: LBB12_32: ## %else58 3474; SSE42-NEXT: retq 3475; SSE42-NEXT: LBB12_1: ## %cond.load 3476; SSE42-NEXT: pinsrb $0, (%rdi), %xmm0 3477; SSE42-NEXT: incq %rdi 3478; SSE42-NEXT: testb $2, %al 3479; SSE42-NEXT: je LBB12_4 3480; SSE42-NEXT: LBB12_3: ## %cond.load1 3481; SSE42-NEXT: pinsrb $1, (%rdi), %xmm0 3482; SSE42-NEXT: incq %rdi 3483; SSE42-NEXT: testb $4, %al 3484; SSE42-NEXT: je LBB12_6 3485; SSE42-NEXT: LBB12_5: ## %cond.load5 3486; SSE42-NEXT: pinsrb $2, (%rdi), %xmm0 3487; SSE42-NEXT: incq %rdi 3488; SSE42-NEXT: testb $8, %al 3489; SSE42-NEXT: je LBB12_8 3490; SSE42-NEXT: LBB12_7: ## %cond.load9 3491; SSE42-NEXT: pinsrb $3, (%rdi), %xmm0 3492; SSE42-NEXT: incq %rdi 3493; SSE42-NEXT: testb $16, %al 3494; SSE42-NEXT: je LBB12_10 3495; SSE42-NEXT: LBB12_9: ## %cond.load13 3496; SSE42-NEXT: pinsrb $4, (%rdi), %xmm0 3497; SSE42-NEXT: incq %rdi 3498; SSE42-NEXT: testb $32, %al 3499; SSE42-NEXT: je LBB12_12 3500; SSE42-NEXT: LBB12_11: ## %cond.load17 3501; SSE42-NEXT: pinsrb $5, (%rdi), %xmm0 3502; SSE42-NEXT: incq %rdi 3503; SSE42-NEXT: testb $64, %al 3504; SSE42-NEXT: je LBB12_14 3505; SSE42-NEXT: LBB12_13: ## %cond.load21 3506; SSE42-NEXT: pinsrb $6, (%rdi), %xmm0 3507; SSE42-NEXT: incq %rdi 3508; SSE42-NEXT: testb %al, %al 3509; SSE42-NEXT: jns LBB12_16 3510; SSE42-NEXT: LBB12_15: ## %cond.load25 3511; SSE42-NEXT: pinsrb $7, (%rdi), %xmm0 3512; SSE42-NEXT: incq %rdi 3513; SSE42-NEXT: testl $256, %eax ## imm = 0x100 3514; SSE42-NEXT: je LBB12_18 3515; SSE42-NEXT: LBB12_17: ## %cond.load29 3516; SSE42-NEXT: pinsrb $8, (%rdi), %xmm0 3517; SSE42-NEXT: incq %rdi 3518; SSE42-NEXT: testl $512, %eax ## imm = 0x200 3519; SSE42-NEXT: je LBB12_20 3520; SSE42-NEXT: LBB12_19: ## %cond.load33 3521; SSE42-NEXT: pinsrb $9, (%rdi), %xmm0 3522; SSE42-NEXT: incq %rdi 3523; SSE42-NEXT: testl $1024, %eax ## imm = 0x400 3524; SSE42-NEXT: je LBB12_22 3525; SSE42-NEXT: LBB12_21: ## %cond.load37 3526; SSE42-NEXT: pinsrb $10, (%rdi), %xmm0 3527; SSE42-NEXT: incq %rdi 3528; SSE42-NEXT: testl $2048, %eax ## imm = 0x800 3529; SSE42-NEXT: je LBB12_24 3530; SSE42-NEXT: LBB12_23: ## %cond.load41 3531; SSE42-NEXT: pinsrb $11, (%rdi), %xmm0 3532; SSE42-NEXT: incq %rdi 3533; SSE42-NEXT: testl $4096, %eax ## imm = 0x1000 3534; SSE42-NEXT: je LBB12_26 3535; SSE42-NEXT: LBB12_25: ## %cond.load45 3536; SSE42-NEXT: pinsrb $12, (%rdi), %xmm0 3537; SSE42-NEXT: incq %rdi 3538; SSE42-NEXT: testl $8192, %eax ## imm = 0x2000 3539; SSE42-NEXT: je LBB12_28 3540; SSE42-NEXT: LBB12_27: ## %cond.load49 3541; SSE42-NEXT: pinsrb $13, (%rdi), %xmm0 3542; SSE42-NEXT: incq %rdi 3543; SSE42-NEXT: testl $16384, %eax ## imm = 0x4000 3544; SSE42-NEXT: je LBB12_30 3545; SSE42-NEXT: LBB12_29: ## %cond.load53 3546; SSE42-NEXT: pinsrb $14, (%rdi), %xmm0 3547; SSE42-NEXT: incq %rdi 3548; SSE42-NEXT: testl $32768, %eax ## imm = 0x8000 3549; SSE42-NEXT: je LBB12_32 3550; SSE42-NEXT: LBB12_31: ## %cond.load57 3551; SSE42-NEXT: pinsrb $15, (%rdi), %xmm0 3552; SSE42-NEXT: retq 3553; 3554; AVX1OR2-LABEL: expandload_v16i8_v16i8: 3555; AVX1OR2: ## %bb.0: 3556; AVX1OR2-NEXT: vpxor %xmm2, %xmm2, %xmm2 3557; AVX1OR2-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 3558; AVX1OR2-NEXT: vpmovmskb %xmm1, %eax 3559; AVX1OR2-NEXT: testb $1, %al 3560; AVX1OR2-NEXT: jne LBB12_1 3561; AVX1OR2-NEXT: ## %bb.2: ## %else 3562; AVX1OR2-NEXT: testb $2, %al 3563; AVX1OR2-NEXT: jne LBB12_3 3564; AVX1OR2-NEXT: LBB12_4: ## %else2 3565; AVX1OR2-NEXT: testb $4, %al 3566; AVX1OR2-NEXT: jne LBB12_5 3567; AVX1OR2-NEXT: LBB12_6: ## %else6 3568; AVX1OR2-NEXT: testb $8, %al 3569; AVX1OR2-NEXT: jne LBB12_7 3570; AVX1OR2-NEXT: LBB12_8: ## %else10 3571; AVX1OR2-NEXT: testb $16, %al 3572; AVX1OR2-NEXT: jne LBB12_9 3573; AVX1OR2-NEXT: LBB12_10: ## %else14 3574; AVX1OR2-NEXT: testb $32, %al 3575; AVX1OR2-NEXT: jne LBB12_11 3576; AVX1OR2-NEXT: LBB12_12: ## %else18 3577; AVX1OR2-NEXT: testb $64, %al 3578; AVX1OR2-NEXT: jne LBB12_13 3579; AVX1OR2-NEXT: LBB12_14: ## %else22 3580; AVX1OR2-NEXT: testb %al, %al 3581; AVX1OR2-NEXT: js LBB12_15 3582; AVX1OR2-NEXT: LBB12_16: ## %else26 3583; AVX1OR2-NEXT: testl $256, %eax ## imm = 0x100 3584; AVX1OR2-NEXT: jne LBB12_17 3585; AVX1OR2-NEXT: LBB12_18: ## %else30 3586; AVX1OR2-NEXT: testl $512, %eax ## imm = 0x200 3587; AVX1OR2-NEXT: jne LBB12_19 3588; AVX1OR2-NEXT: LBB12_20: ## %else34 3589; AVX1OR2-NEXT: testl $1024, %eax ## imm = 0x400 3590; AVX1OR2-NEXT: jne LBB12_21 3591; AVX1OR2-NEXT: LBB12_22: ## %else38 3592; AVX1OR2-NEXT: testl $2048, %eax ## imm = 0x800 3593; AVX1OR2-NEXT: jne LBB12_23 3594; AVX1OR2-NEXT: LBB12_24: ## %else42 3595; AVX1OR2-NEXT: testl $4096, %eax ## imm = 0x1000 3596; AVX1OR2-NEXT: jne LBB12_25 3597; AVX1OR2-NEXT: LBB12_26: ## %else46 3598; AVX1OR2-NEXT: testl $8192, %eax ## imm = 0x2000 3599; AVX1OR2-NEXT: jne LBB12_27 3600; AVX1OR2-NEXT: LBB12_28: ## %else50 3601; AVX1OR2-NEXT: testl $16384, %eax ## imm = 0x4000 3602; AVX1OR2-NEXT: jne LBB12_29 3603; AVX1OR2-NEXT: LBB12_30: ## %else54 3604; AVX1OR2-NEXT: testl $32768, %eax ## imm = 0x8000 3605; AVX1OR2-NEXT: jne LBB12_31 3606; AVX1OR2-NEXT: LBB12_32: ## %else58 3607; AVX1OR2-NEXT: retq 3608; AVX1OR2-NEXT: LBB12_1: ## %cond.load 3609; AVX1OR2-NEXT: vpinsrb $0, (%rdi), %xmm0, %xmm0 3610; AVX1OR2-NEXT: incq %rdi 3611; AVX1OR2-NEXT: testb $2, %al 3612; AVX1OR2-NEXT: je LBB12_4 3613; AVX1OR2-NEXT: LBB12_3: ## %cond.load1 3614; AVX1OR2-NEXT: vpinsrb $1, (%rdi), %xmm0, %xmm0 3615; AVX1OR2-NEXT: incq %rdi 3616; AVX1OR2-NEXT: testb $4, %al 3617; AVX1OR2-NEXT: je LBB12_6 3618; AVX1OR2-NEXT: LBB12_5: ## %cond.load5 3619; AVX1OR2-NEXT: vpinsrb $2, (%rdi), %xmm0, %xmm0 3620; AVX1OR2-NEXT: incq %rdi 3621; AVX1OR2-NEXT: testb $8, %al 3622; AVX1OR2-NEXT: je LBB12_8 3623; AVX1OR2-NEXT: LBB12_7: ## %cond.load9 3624; AVX1OR2-NEXT: vpinsrb $3, (%rdi), %xmm0, %xmm0 3625; AVX1OR2-NEXT: incq %rdi 3626; AVX1OR2-NEXT: testb $16, %al 3627; AVX1OR2-NEXT: je LBB12_10 3628; AVX1OR2-NEXT: LBB12_9: ## %cond.load13 3629; AVX1OR2-NEXT: vpinsrb $4, (%rdi), %xmm0, %xmm0 3630; AVX1OR2-NEXT: incq %rdi 3631; AVX1OR2-NEXT: testb $32, %al 3632; AVX1OR2-NEXT: je LBB12_12 3633; AVX1OR2-NEXT: LBB12_11: ## %cond.load17 3634; AVX1OR2-NEXT: vpinsrb $5, (%rdi), %xmm0, %xmm0 3635; AVX1OR2-NEXT: incq %rdi 3636; AVX1OR2-NEXT: testb $64, %al 3637; AVX1OR2-NEXT: je LBB12_14 3638; AVX1OR2-NEXT: LBB12_13: ## %cond.load21 3639; AVX1OR2-NEXT: vpinsrb $6, (%rdi), %xmm0, %xmm0 3640; AVX1OR2-NEXT: incq %rdi 3641; AVX1OR2-NEXT: testb %al, %al 3642; AVX1OR2-NEXT: jns LBB12_16 3643; AVX1OR2-NEXT: LBB12_15: ## %cond.load25 3644; AVX1OR2-NEXT: vpinsrb $7, (%rdi), %xmm0, %xmm0 3645; AVX1OR2-NEXT: incq %rdi 3646; AVX1OR2-NEXT: testl $256, %eax ## imm = 0x100 3647; AVX1OR2-NEXT: je LBB12_18 3648; AVX1OR2-NEXT: LBB12_17: ## %cond.load29 3649; AVX1OR2-NEXT: vpinsrb $8, (%rdi), %xmm0, %xmm0 3650; AVX1OR2-NEXT: incq %rdi 3651; AVX1OR2-NEXT: testl $512, %eax ## imm = 0x200 3652; AVX1OR2-NEXT: je LBB12_20 3653; AVX1OR2-NEXT: LBB12_19: ## %cond.load33 3654; AVX1OR2-NEXT: vpinsrb $9, (%rdi), %xmm0, %xmm0 3655; AVX1OR2-NEXT: incq %rdi 3656; AVX1OR2-NEXT: testl $1024, %eax ## imm = 0x400 3657; AVX1OR2-NEXT: je LBB12_22 3658; AVX1OR2-NEXT: LBB12_21: ## %cond.load37 3659; AVX1OR2-NEXT: vpinsrb $10, (%rdi), %xmm0, %xmm0 3660; AVX1OR2-NEXT: incq %rdi 3661; AVX1OR2-NEXT: testl $2048, %eax ## imm = 0x800 3662; AVX1OR2-NEXT: je LBB12_24 3663; AVX1OR2-NEXT: LBB12_23: ## %cond.load41 3664; AVX1OR2-NEXT: vpinsrb $11, (%rdi), %xmm0, %xmm0 3665; AVX1OR2-NEXT: incq %rdi 3666; AVX1OR2-NEXT: testl $4096, %eax ## imm = 0x1000 3667; AVX1OR2-NEXT: je LBB12_26 3668; AVX1OR2-NEXT: LBB12_25: ## %cond.load45 3669; AVX1OR2-NEXT: vpinsrb $12, (%rdi), %xmm0, %xmm0 3670; AVX1OR2-NEXT: incq %rdi 3671; AVX1OR2-NEXT: testl $8192, %eax ## imm = 0x2000 3672; AVX1OR2-NEXT: je LBB12_28 3673; AVX1OR2-NEXT: LBB12_27: ## %cond.load49 3674; AVX1OR2-NEXT: vpinsrb $13, (%rdi), %xmm0, %xmm0 3675; AVX1OR2-NEXT: incq %rdi 3676; AVX1OR2-NEXT: testl $16384, %eax ## imm = 0x4000 3677; AVX1OR2-NEXT: je LBB12_30 3678; AVX1OR2-NEXT: LBB12_29: ## %cond.load53 3679; AVX1OR2-NEXT: vpinsrb $14, (%rdi), %xmm0, %xmm0 3680; AVX1OR2-NEXT: incq %rdi 3681; AVX1OR2-NEXT: testl $32768, %eax ## imm = 0x8000 3682; AVX1OR2-NEXT: je LBB12_32 3683; AVX1OR2-NEXT: LBB12_31: ## %cond.load57 3684; AVX1OR2-NEXT: vpinsrb $15, (%rdi), %xmm0, %xmm0 3685; AVX1OR2-NEXT: retq 3686; 3687; AVX512F-LABEL: expandload_v16i8_v16i8: 3688; AVX512F: ## %bb.0: 3689; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 3690; AVX512F-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 3691; AVX512F-NEXT: vpmovmskb %xmm1, %eax 3692; AVX512F-NEXT: testb $1, %al 3693; AVX512F-NEXT: jne LBB12_1 3694; AVX512F-NEXT: ## %bb.2: ## %else 3695; AVX512F-NEXT: testb $2, %al 3696; AVX512F-NEXT: jne LBB12_3 3697; AVX512F-NEXT: LBB12_4: ## %else2 3698; AVX512F-NEXT: testb $4, %al 3699; AVX512F-NEXT: jne LBB12_5 3700; AVX512F-NEXT: LBB12_6: ## %else6 3701; AVX512F-NEXT: testb $8, %al 3702; AVX512F-NEXT: jne LBB12_7 3703; AVX512F-NEXT: LBB12_8: ## %else10 3704; AVX512F-NEXT: testb $16, %al 3705; AVX512F-NEXT: jne LBB12_9 3706; AVX512F-NEXT: LBB12_10: ## %else14 3707; AVX512F-NEXT: testb $32, %al 3708; AVX512F-NEXT: jne LBB12_11 3709; AVX512F-NEXT: LBB12_12: ## %else18 3710; AVX512F-NEXT: testb $64, %al 3711; AVX512F-NEXT: jne LBB12_13 3712; AVX512F-NEXT: LBB12_14: ## %else22 3713; AVX512F-NEXT: testb %al, %al 3714; AVX512F-NEXT: js LBB12_15 3715; AVX512F-NEXT: LBB12_16: ## %else26 3716; AVX512F-NEXT: testl $256, %eax ## imm = 0x100 3717; AVX512F-NEXT: jne LBB12_17 3718; AVX512F-NEXT: LBB12_18: ## %else30 3719; AVX512F-NEXT: testl $512, %eax ## imm = 0x200 3720; AVX512F-NEXT: jne LBB12_19 3721; AVX512F-NEXT: LBB12_20: ## %else34 3722; AVX512F-NEXT: testl $1024, %eax ## imm = 0x400 3723; AVX512F-NEXT: jne LBB12_21 3724; AVX512F-NEXT: LBB12_22: ## %else38 3725; AVX512F-NEXT: testl $2048, %eax ## imm = 0x800 3726; AVX512F-NEXT: jne LBB12_23 3727; AVX512F-NEXT: LBB12_24: ## %else42 3728; AVX512F-NEXT: testl $4096, %eax ## imm = 0x1000 3729; AVX512F-NEXT: jne LBB12_25 3730; AVX512F-NEXT: LBB12_26: ## %else46 3731; AVX512F-NEXT: testl $8192, %eax ## imm = 0x2000 3732; AVX512F-NEXT: jne LBB12_27 3733; AVX512F-NEXT: LBB12_28: ## %else50 3734; AVX512F-NEXT: testl $16384, %eax ## imm = 0x4000 3735; AVX512F-NEXT: jne LBB12_29 3736; AVX512F-NEXT: LBB12_30: ## %else54 3737; AVX512F-NEXT: testl $32768, %eax ## imm = 0x8000 3738; AVX512F-NEXT: jne LBB12_31 3739; AVX512F-NEXT: LBB12_32: ## %else58 3740; AVX512F-NEXT: retq 3741; AVX512F-NEXT: LBB12_1: ## %cond.load 3742; AVX512F-NEXT: vpinsrb $0, (%rdi), %xmm0, %xmm0 3743; AVX512F-NEXT: incq %rdi 3744; AVX512F-NEXT: testb $2, %al 3745; AVX512F-NEXT: je LBB12_4 3746; AVX512F-NEXT: LBB12_3: ## %cond.load1 3747; AVX512F-NEXT: vpinsrb $1, (%rdi), %xmm0, %xmm0 3748; AVX512F-NEXT: incq %rdi 3749; AVX512F-NEXT: testb $4, %al 3750; AVX512F-NEXT: je LBB12_6 3751; AVX512F-NEXT: LBB12_5: ## %cond.load5 3752; AVX512F-NEXT: vpinsrb $2, (%rdi), %xmm0, %xmm0 3753; AVX512F-NEXT: incq %rdi 3754; AVX512F-NEXT: testb $8, %al 3755; AVX512F-NEXT: je LBB12_8 3756; AVX512F-NEXT: LBB12_7: ## %cond.load9 3757; AVX512F-NEXT: vpinsrb $3, (%rdi), %xmm0, %xmm0 3758; AVX512F-NEXT: incq %rdi 3759; AVX512F-NEXT: testb $16, %al 3760; AVX512F-NEXT: je LBB12_10 3761; AVX512F-NEXT: LBB12_9: ## %cond.load13 3762; AVX512F-NEXT: vpinsrb $4, (%rdi), %xmm0, %xmm0 3763; AVX512F-NEXT: incq %rdi 3764; AVX512F-NEXT: testb $32, %al 3765; AVX512F-NEXT: je LBB12_12 3766; AVX512F-NEXT: LBB12_11: ## %cond.load17 3767; AVX512F-NEXT: vpinsrb $5, (%rdi), %xmm0, %xmm0 3768; AVX512F-NEXT: incq %rdi 3769; AVX512F-NEXT: testb $64, %al 3770; AVX512F-NEXT: je LBB12_14 3771; AVX512F-NEXT: LBB12_13: ## %cond.load21 3772; AVX512F-NEXT: vpinsrb $6, (%rdi), %xmm0, %xmm0 3773; AVX512F-NEXT: incq %rdi 3774; AVX512F-NEXT: testb %al, %al 3775; AVX512F-NEXT: jns LBB12_16 3776; AVX512F-NEXT: LBB12_15: ## %cond.load25 3777; AVX512F-NEXT: vpinsrb $7, (%rdi), %xmm0, %xmm0 3778; AVX512F-NEXT: incq %rdi 3779; AVX512F-NEXT: testl $256, %eax ## imm = 0x100 3780; AVX512F-NEXT: je LBB12_18 3781; AVX512F-NEXT: LBB12_17: ## %cond.load29 3782; AVX512F-NEXT: vpinsrb $8, (%rdi), %xmm0, %xmm0 3783; AVX512F-NEXT: incq %rdi 3784; AVX512F-NEXT: testl $512, %eax ## imm = 0x200 3785; AVX512F-NEXT: je LBB12_20 3786; AVX512F-NEXT: LBB12_19: ## %cond.load33 3787; AVX512F-NEXT: vpinsrb $9, (%rdi), %xmm0, %xmm0 3788; AVX512F-NEXT: incq %rdi 3789; AVX512F-NEXT: testl $1024, %eax ## imm = 0x400 3790; AVX512F-NEXT: je LBB12_22 3791; AVX512F-NEXT: LBB12_21: ## %cond.load37 3792; AVX512F-NEXT: vpinsrb $10, (%rdi), %xmm0, %xmm0 3793; AVX512F-NEXT: incq %rdi 3794; AVX512F-NEXT: testl $2048, %eax ## imm = 0x800 3795; AVX512F-NEXT: je LBB12_24 3796; AVX512F-NEXT: LBB12_23: ## %cond.load41 3797; AVX512F-NEXT: vpinsrb $11, (%rdi), %xmm0, %xmm0 3798; AVX512F-NEXT: incq %rdi 3799; AVX512F-NEXT: testl $4096, %eax ## imm = 0x1000 3800; AVX512F-NEXT: je LBB12_26 3801; AVX512F-NEXT: LBB12_25: ## %cond.load45 3802; AVX512F-NEXT: vpinsrb $12, (%rdi), %xmm0, %xmm0 3803; AVX512F-NEXT: incq %rdi 3804; AVX512F-NEXT: testl $8192, %eax ## imm = 0x2000 3805; AVX512F-NEXT: je LBB12_28 3806; AVX512F-NEXT: LBB12_27: ## %cond.load49 3807; AVX512F-NEXT: vpinsrb $13, (%rdi), %xmm0, %xmm0 3808; AVX512F-NEXT: incq %rdi 3809; AVX512F-NEXT: testl $16384, %eax ## imm = 0x4000 3810; AVX512F-NEXT: je LBB12_30 3811; AVX512F-NEXT: LBB12_29: ## %cond.load53 3812; AVX512F-NEXT: vpinsrb $14, (%rdi), %xmm0, %xmm0 3813; AVX512F-NEXT: incq %rdi 3814; AVX512F-NEXT: testl $32768, %eax ## imm = 0x8000 3815; AVX512F-NEXT: je LBB12_32 3816; AVX512F-NEXT: LBB12_31: ## %cond.load57 3817; AVX512F-NEXT: vpinsrb $15, (%rdi), %xmm0, %xmm0 3818; AVX512F-NEXT: retq 3819; 3820; AVX512VLDQ-LABEL: expandload_v16i8_v16i8: 3821; AVX512VLDQ: ## %bb.0: 3822; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 3823; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 3824; AVX512VLDQ-NEXT: vpmovmskb %xmm1, %eax 3825; AVX512VLDQ-NEXT: testb $1, %al 3826; AVX512VLDQ-NEXT: jne LBB12_1 3827; AVX512VLDQ-NEXT: ## %bb.2: ## %else 3828; AVX512VLDQ-NEXT: testb $2, %al 3829; AVX512VLDQ-NEXT: jne LBB12_3 3830; AVX512VLDQ-NEXT: LBB12_4: ## %else2 3831; AVX512VLDQ-NEXT: testb $4, %al 3832; AVX512VLDQ-NEXT: jne LBB12_5 3833; AVX512VLDQ-NEXT: LBB12_6: ## %else6 3834; AVX512VLDQ-NEXT: testb $8, %al 3835; AVX512VLDQ-NEXT: jne LBB12_7 3836; AVX512VLDQ-NEXT: LBB12_8: ## %else10 3837; AVX512VLDQ-NEXT: testb $16, %al 3838; AVX512VLDQ-NEXT: jne LBB12_9 3839; AVX512VLDQ-NEXT: LBB12_10: ## %else14 3840; AVX512VLDQ-NEXT: testb $32, %al 3841; AVX512VLDQ-NEXT: jne LBB12_11 3842; AVX512VLDQ-NEXT: LBB12_12: ## %else18 3843; AVX512VLDQ-NEXT: testb $64, %al 3844; AVX512VLDQ-NEXT: jne LBB12_13 3845; AVX512VLDQ-NEXT: LBB12_14: ## %else22 3846; AVX512VLDQ-NEXT: testb %al, %al 3847; AVX512VLDQ-NEXT: js LBB12_15 3848; AVX512VLDQ-NEXT: LBB12_16: ## %else26 3849; AVX512VLDQ-NEXT: testl $256, %eax ## imm = 0x100 3850; AVX512VLDQ-NEXT: jne LBB12_17 3851; AVX512VLDQ-NEXT: LBB12_18: ## %else30 3852; AVX512VLDQ-NEXT: testl $512, %eax ## imm = 0x200 3853; AVX512VLDQ-NEXT: jne LBB12_19 3854; AVX512VLDQ-NEXT: LBB12_20: ## %else34 3855; AVX512VLDQ-NEXT: testl $1024, %eax ## imm = 0x400 3856; AVX512VLDQ-NEXT: jne LBB12_21 3857; AVX512VLDQ-NEXT: LBB12_22: ## %else38 3858; AVX512VLDQ-NEXT: testl $2048, %eax ## imm = 0x800 3859; AVX512VLDQ-NEXT: jne LBB12_23 3860; AVX512VLDQ-NEXT: LBB12_24: ## %else42 3861; AVX512VLDQ-NEXT: testl $4096, %eax ## imm = 0x1000 3862; AVX512VLDQ-NEXT: jne LBB12_25 3863; AVX512VLDQ-NEXT: LBB12_26: ## %else46 3864; AVX512VLDQ-NEXT: testl $8192, %eax ## imm = 0x2000 3865; AVX512VLDQ-NEXT: jne LBB12_27 3866; AVX512VLDQ-NEXT: LBB12_28: ## %else50 3867; AVX512VLDQ-NEXT: testl $16384, %eax ## imm = 0x4000 3868; AVX512VLDQ-NEXT: jne LBB12_29 3869; AVX512VLDQ-NEXT: LBB12_30: ## %else54 3870; AVX512VLDQ-NEXT: testl $32768, %eax ## imm = 0x8000 3871; AVX512VLDQ-NEXT: jne LBB12_31 3872; AVX512VLDQ-NEXT: LBB12_32: ## %else58 3873; AVX512VLDQ-NEXT: retq 3874; AVX512VLDQ-NEXT: LBB12_1: ## %cond.load 3875; AVX512VLDQ-NEXT: vpinsrb $0, (%rdi), %xmm0, %xmm0 3876; AVX512VLDQ-NEXT: incq %rdi 3877; AVX512VLDQ-NEXT: testb $2, %al 3878; AVX512VLDQ-NEXT: je LBB12_4 3879; AVX512VLDQ-NEXT: LBB12_3: ## %cond.load1 3880; AVX512VLDQ-NEXT: vpinsrb $1, (%rdi), %xmm0, %xmm0 3881; AVX512VLDQ-NEXT: incq %rdi 3882; AVX512VLDQ-NEXT: testb $4, %al 3883; AVX512VLDQ-NEXT: je LBB12_6 3884; AVX512VLDQ-NEXT: LBB12_5: ## %cond.load5 3885; AVX512VLDQ-NEXT: vpinsrb $2, (%rdi), %xmm0, %xmm0 3886; AVX512VLDQ-NEXT: incq %rdi 3887; AVX512VLDQ-NEXT: testb $8, %al 3888; AVX512VLDQ-NEXT: je LBB12_8 3889; AVX512VLDQ-NEXT: LBB12_7: ## %cond.load9 3890; AVX512VLDQ-NEXT: vpinsrb $3, (%rdi), %xmm0, %xmm0 3891; AVX512VLDQ-NEXT: incq %rdi 3892; AVX512VLDQ-NEXT: testb $16, %al 3893; AVX512VLDQ-NEXT: je LBB12_10 3894; AVX512VLDQ-NEXT: LBB12_9: ## %cond.load13 3895; AVX512VLDQ-NEXT: vpinsrb $4, (%rdi), %xmm0, %xmm0 3896; AVX512VLDQ-NEXT: incq %rdi 3897; AVX512VLDQ-NEXT: testb $32, %al 3898; AVX512VLDQ-NEXT: je LBB12_12 3899; AVX512VLDQ-NEXT: LBB12_11: ## %cond.load17 3900; AVX512VLDQ-NEXT: vpinsrb $5, (%rdi), %xmm0, %xmm0 3901; AVX512VLDQ-NEXT: incq %rdi 3902; AVX512VLDQ-NEXT: testb $64, %al 3903; AVX512VLDQ-NEXT: je LBB12_14 3904; AVX512VLDQ-NEXT: LBB12_13: ## %cond.load21 3905; AVX512VLDQ-NEXT: vpinsrb $6, (%rdi), %xmm0, %xmm0 3906; AVX512VLDQ-NEXT: incq %rdi 3907; AVX512VLDQ-NEXT: testb %al, %al 3908; AVX512VLDQ-NEXT: jns LBB12_16 3909; AVX512VLDQ-NEXT: LBB12_15: ## %cond.load25 3910; AVX512VLDQ-NEXT: vpinsrb $7, (%rdi), %xmm0, %xmm0 3911; AVX512VLDQ-NEXT: incq %rdi 3912; AVX512VLDQ-NEXT: testl $256, %eax ## imm = 0x100 3913; AVX512VLDQ-NEXT: je LBB12_18 3914; AVX512VLDQ-NEXT: LBB12_17: ## %cond.load29 3915; AVX512VLDQ-NEXT: vpinsrb $8, (%rdi), %xmm0, %xmm0 3916; AVX512VLDQ-NEXT: incq %rdi 3917; AVX512VLDQ-NEXT: testl $512, %eax ## imm = 0x200 3918; AVX512VLDQ-NEXT: je LBB12_20 3919; AVX512VLDQ-NEXT: LBB12_19: ## %cond.load33 3920; AVX512VLDQ-NEXT: vpinsrb $9, (%rdi), %xmm0, %xmm0 3921; AVX512VLDQ-NEXT: incq %rdi 3922; AVX512VLDQ-NEXT: testl $1024, %eax ## imm = 0x400 3923; AVX512VLDQ-NEXT: je LBB12_22 3924; AVX512VLDQ-NEXT: LBB12_21: ## %cond.load37 3925; AVX512VLDQ-NEXT: vpinsrb $10, (%rdi), %xmm0, %xmm0 3926; AVX512VLDQ-NEXT: incq %rdi 3927; AVX512VLDQ-NEXT: testl $2048, %eax ## imm = 0x800 3928; AVX512VLDQ-NEXT: je LBB12_24 3929; AVX512VLDQ-NEXT: LBB12_23: ## %cond.load41 3930; AVX512VLDQ-NEXT: vpinsrb $11, (%rdi), %xmm0, %xmm0 3931; AVX512VLDQ-NEXT: incq %rdi 3932; AVX512VLDQ-NEXT: testl $4096, %eax ## imm = 0x1000 3933; AVX512VLDQ-NEXT: je LBB12_26 3934; AVX512VLDQ-NEXT: LBB12_25: ## %cond.load45 3935; AVX512VLDQ-NEXT: vpinsrb $12, (%rdi), %xmm0, %xmm0 3936; AVX512VLDQ-NEXT: incq %rdi 3937; AVX512VLDQ-NEXT: testl $8192, %eax ## imm = 0x2000 3938; AVX512VLDQ-NEXT: je LBB12_28 3939; AVX512VLDQ-NEXT: LBB12_27: ## %cond.load49 3940; AVX512VLDQ-NEXT: vpinsrb $13, (%rdi), %xmm0, %xmm0 3941; AVX512VLDQ-NEXT: incq %rdi 3942; AVX512VLDQ-NEXT: testl $16384, %eax ## imm = 0x4000 3943; AVX512VLDQ-NEXT: je LBB12_30 3944; AVX512VLDQ-NEXT: LBB12_29: ## %cond.load53 3945; AVX512VLDQ-NEXT: vpinsrb $14, (%rdi), %xmm0, %xmm0 3946; AVX512VLDQ-NEXT: incq %rdi 3947; AVX512VLDQ-NEXT: testl $32768, %eax ## imm = 0x8000 3948; AVX512VLDQ-NEXT: je LBB12_32 3949; AVX512VLDQ-NEXT: LBB12_31: ## %cond.load57 3950; AVX512VLDQ-NEXT: vpinsrb $15, (%rdi), %xmm0, %xmm0 3951; AVX512VLDQ-NEXT: retq 3952; 3953; AVX512VLBW-LABEL: expandload_v16i8_v16i8: 3954; AVX512VLBW: ## %bb.0: 3955; AVX512VLBW-NEXT: vptestnmb %xmm1, %xmm1, %k0 3956; AVX512VLBW-NEXT: kmovd %k0, %eax 3957; AVX512VLBW-NEXT: testb $1, %al 3958; AVX512VLBW-NEXT: jne LBB12_1 3959; AVX512VLBW-NEXT: ## %bb.2: ## %else 3960; AVX512VLBW-NEXT: testb $2, %al 3961; AVX512VLBW-NEXT: jne LBB12_3 3962; AVX512VLBW-NEXT: LBB12_4: ## %else2 3963; AVX512VLBW-NEXT: testb $4, %al 3964; AVX512VLBW-NEXT: jne LBB12_5 3965; AVX512VLBW-NEXT: LBB12_6: ## %else6 3966; AVX512VLBW-NEXT: testb $8, %al 3967; AVX512VLBW-NEXT: jne LBB12_7 3968; AVX512VLBW-NEXT: LBB12_8: ## %else10 3969; AVX512VLBW-NEXT: testb $16, %al 3970; AVX512VLBW-NEXT: jne LBB12_9 3971; AVX512VLBW-NEXT: LBB12_10: ## %else14 3972; AVX512VLBW-NEXT: testb $32, %al 3973; AVX512VLBW-NEXT: jne LBB12_11 3974; AVX512VLBW-NEXT: LBB12_12: ## %else18 3975; AVX512VLBW-NEXT: testb $64, %al 3976; AVX512VLBW-NEXT: jne LBB12_13 3977; AVX512VLBW-NEXT: LBB12_14: ## %else22 3978; AVX512VLBW-NEXT: testb %al, %al 3979; AVX512VLBW-NEXT: js LBB12_15 3980; AVX512VLBW-NEXT: LBB12_16: ## %else26 3981; AVX512VLBW-NEXT: testl $256, %eax ## imm = 0x100 3982; AVX512VLBW-NEXT: jne LBB12_17 3983; AVX512VLBW-NEXT: LBB12_18: ## %else30 3984; AVX512VLBW-NEXT: testl $512, %eax ## imm = 0x200 3985; AVX512VLBW-NEXT: jne LBB12_19 3986; AVX512VLBW-NEXT: LBB12_20: ## %else34 3987; AVX512VLBW-NEXT: testl $1024, %eax ## imm = 0x400 3988; AVX512VLBW-NEXT: jne LBB12_21 3989; AVX512VLBW-NEXT: LBB12_22: ## %else38 3990; AVX512VLBW-NEXT: testl $2048, %eax ## imm = 0x800 3991; AVX512VLBW-NEXT: jne LBB12_23 3992; AVX512VLBW-NEXT: LBB12_24: ## %else42 3993; AVX512VLBW-NEXT: testl $4096, %eax ## imm = 0x1000 3994; AVX512VLBW-NEXT: jne LBB12_25 3995; AVX512VLBW-NEXT: LBB12_26: ## %else46 3996; AVX512VLBW-NEXT: testl $8192, %eax ## imm = 0x2000 3997; AVX512VLBW-NEXT: jne LBB12_27 3998; AVX512VLBW-NEXT: LBB12_28: ## %else50 3999; AVX512VLBW-NEXT: testl $16384, %eax ## imm = 0x4000 4000; AVX512VLBW-NEXT: jne LBB12_29 4001; AVX512VLBW-NEXT: LBB12_30: ## %else54 4002; AVX512VLBW-NEXT: testl $32768, %eax ## imm = 0x8000 4003; AVX512VLBW-NEXT: jne LBB12_31 4004; AVX512VLBW-NEXT: LBB12_32: ## %else58 4005; AVX512VLBW-NEXT: retq 4006; AVX512VLBW-NEXT: LBB12_1: ## %cond.load 4007; AVX512VLBW-NEXT: vpinsrb $0, (%rdi), %xmm0, %xmm0 4008; AVX512VLBW-NEXT: incq %rdi 4009; AVX512VLBW-NEXT: testb $2, %al 4010; AVX512VLBW-NEXT: je LBB12_4 4011; AVX512VLBW-NEXT: LBB12_3: ## %cond.load1 4012; AVX512VLBW-NEXT: vpinsrb $1, (%rdi), %xmm0, %xmm0 4013; AVX512VLBW-NEXT: incq %rdi 4014; AVX512VLBW-NEXT: testb $4, %al 4015; AVX512VLBW-NEXT: je LBB12_6 4016; AVX512VLBW-NEXT: LBB12_5: ## %cond.load5 4017; AVX512VLBW-NEXT: vpinsrb $2, (%rdi), %xmm0, %xmm0 4018; AVX512VLBW-NEXT: incq %rdi 4019; AVX512VLBW-NEXT: testb $8, %al 4020; AVX512VLBW-NEXT: je LBB12_8 4021; AVX512VLBW-NEXT: LBB12_7: ## %cond.load9 4022; AVX512VLBW-NEXT: vpinsrb $3, (%rdi), %xmm0, %xmm0 4023; AVX512VLBW-NEXT: incq %rdi 4024; AVX512VLBW-NEXT: testb $16, %al 4025; AVX512VLBW-NEXT: je LBB12_10 4026; AVX512VLBW-NEXT: LBB12_9: ## %cond.load13 4027; AVX512VLBW-NEXT: vpinsrb $4, (%rdi), %xmm0, %xmm0 4028; AVX512VLBW-NEXT: incq %rdi 4029; AVX512VLBW-NEXT: testb $32, %al 4030; AVX512VLBW-NEXT: je LBB12_12 4031; AVX512VLBW-NEXT: LBB12_11: ## %cond.load17 4032; AVX512VLBW-NEXT: vpinsrb $5, (%rdi), %xmm0, %xmm0 4033; AVX512VLBW-NEXT: incq %rdi 4034; AVX512VLBW-NEXT: testb $64, %al 4035; AVX512VLBW-NEXT: je LBB12_14 4036; AVX512VLBW-NEXT: LBB12_13: ## %cond.load21 4037; AVX512VLBW-NEXT: vpinsrb $6, (%rdi), %xmm0, %xmm0 4038; AVX512VLBW-NEXT: incq %rdi 4039; AVX512VLBW-NEXT: testb %al, %al 4040; AVX512VLBW-NEXT: jns LBB12_16 4041; AVX512VLBW-NEXT: LBB12_15: ## %cond.load25 4042; AVX512VLBW-NEXT: vpinsrb $7, (%rdi), %xmm0, %xmm0 4043; AVX512VLBW-NEXT: incq %rdi 4044; AVX512VLBW-NEXT: testl $256, %eax ## imm = 0x100 4045; AVX512VLBW-NEXT: je LBB12_18 4046; AVX512VLBW-NEXT: LBB12_17: ## %cond.load29 4047; AVX512VLBW-NEXT: vpinsrb $8, (%rdi), %xmm0, %xmm0 4048; AVX512VLBW-NEXT: incq %rdi 4049; AVX512VLBW-NEXT: testl $512, %eax ## imm = 0x200 4050; AVX512VLBW-NEXT: je LBB12_20 4051; AVX512VLBW-NEXT: LBB12_19: ## %cond.load33 4052; AVX512VLBW-NEXT: vpinsrb $9, (%rdi), %xmm0, %xmm0 4053; AVX512VLBW-NEXT: incq %rdi 4054; AVX512VLBW-NEXT: testl $1024, %eax ## imm = 0x400 4055; AVX512VLBW-NEXT: je LBB12_22 4056; AVX512VLBW-NEXT: LBB12_21: ## %cond.load37 4057; AVX512VLBW-NEXT: vpinsrb $10, (%rdi), %xmm0, %xmm0 4058; AVX512VLBW-NEXT: incq %rdi 4059; AVX512VLBW-NEXT: testl $2048, %eax ## imm = 0x800 4060; AVX512VLBW-NEXT: je LBB12_24 4061; AVX512VLBW-NEXT: LBB12_23: ## %cond.load41 4062; AVX512VLBW-NEXT: vpinsrb $11, (%rdi), %xmm0, %xmm0 4063; AVX512VLBW-NEXT: incq %rdi 4064; AVX512VLBW-NEXT: testl $4096, %eax ## imm = 0x1000 4065; AVX512VLBW-NEXT: je LBB12_26 4066; AVX512VLBW-NEXT: LBB12_25: ## %cond.load45 4067; AVX512VLBW-NEXT: vpinsrb $12, (%rdi), %xmm0, %xmm0 4068; AVX512VLBW-NEXT: incq %rdi 4069; AVX512VLBW-NEXT: testl $8192, %eax ## imm = 0x2000 4070; AVX512VLBW-NEXT: je LBB12_28 4071; AVX512VLBW-NEXT: LBB12_27: ## %cond.load49 4072; AVX512VLBW-NEXT: vpinsrb $13, (%rdi), %xmm0, %xmm0 4073; AVX512VLBW-NEXT: incq %rdi 4074; AVX512VLBW-NEXT: testl $16384, %eax ## imm = 0x4000 4075; AVX512VLBW-NEXT: je LBB12_30 4076; AVX512VLBW-NEXT: LBB12_29: ## %cond.load53 4077; AVX512VLBW-NEXT: vpinsrb $14, (%rdi), %xmm0, %xmm0 4078; AVX512VLBW-NEXT: incq %rdi 4079; AVX512VLBW-NEXT: testl $32768, %eax ## imm = 0x8000 4080; AVX512VLBW-NEXT: je LBB12_32 4081; AVX512VLBW-NEXT: LBB12_31: ## %cond.load57 4082; AVX512VLBW-NEXT: vpinsrb $15, (%rdi), %xmm0, %xmm0 4083; AVX512VLBW-NEXT: retq 4084 %mask = icmp eq <16 x i8> %trigger, zeroinitializer 4085 %res = call <16 x i8> @llvm.masked.expandload.v16i8(ptr %base, <16 x i1> %mask, <16 x i8> %src0) 4086 ret <16 x i8>%res 4087} 4088 4089declare <16 x double> @llvm.masked.expandload.v16f64(ptr, <16 x i1>, <16 x double>) 4090declare <8 x double> @llvm.masked.expandload.v8f64(ptr, <8 x i1>, <8 x double>) 4091declare <4 x double> @llvm.masked.expandload.v4f64(ptr, <4 x i1>, <4 x double>) 4092declare <2 x double> @llvm.masked.expandload.v2f64(ptr, <2 x i1>, <2 x double>) 4093declare <1 x double> @llvm.masked.expandload.v1f64(ptr, <1 x i1>, <1 x double>) 4094 4095declare <32 x float> @llvm.masked.expandload.v32f32(ptr, <32 x i1>, <32 x float>) 4096declare <16 x float> @llvm.masked.expandload.v16f32(ptr, <16 x i1>, <16 x float>) 4097declare <8 x float> @llvm.masked.expandload.v8f32(ptr, <8 x i1>, <8 x float>) 4098declare <4 x float> @llvm.masked.expandload.v4f32(ptr, <4 x i1>, <4 x float>) 4099declare <2 x float> @llvm.masked.expandload.v2f32(ptr, <2 x i1>, <2 x float>) 4100 4101declare <8 x i64> @llvm.masked.expandload.v8i64(ptr, <8 x i1>, <8 x i64>) 4102declare <4 x i64> @llvm.masked.expandload.v4i64(ptr, <4 x i1>, <4 x i64>) 4103declare <2 x i64> @llvm.masked.expandload.v2i64(ptr, <2 x i1>, <2 x i64>) 4104declare <1 x i64> @llvm.masked.expandload.v1i64(ptr, <1 x i1>, <1 x i64>) 4105 4106declare <16 x i32> @llvm.masked.expandload.v16i32(ptr, <16 x i1>, <16 x i32>) 4107declare <8 x i32> @llvm.masked.expandload.v8i32(ptr, <8 x i1>, <8 x i32>) 4108declare <4 x i32> @llvm.masked.expandload.v4i32(ptr, <4 x i1>, <4 x i32>) 4109declare <2 x i32> @llvm.masked.expandload.v2i32(ptr, <2 x i1>, <2 x i32>) 4110 4111declare <32 x i16> @llvm.masked.expandload.v32i16(ptr, <32 x i1>, <32 x i16>) 4112declare <16 x i16> @llvm.masked.expandload.v16i16(ptr, <16 x i1>, <16 x i16>) 4113declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>) 4114declare <4 x i16> @llvm.masked.expandload.v4i16(ptr, <4 x i1>, <4 x i16>) 4115 4116declare <64 x i8> @llvm.masked.expandload.v64i8(ptr, <64 x i1>, <64 x i8>) 4117declare <32 x i8> @llvm.masked.expandload.v32i8(ptr, <32 x i1>, <32 x i8>) 4118declare <16 x i8> @llvm.masked.expandload.v16i8(ptr, <16 x i1>, <16 x i8>) 4119declare <8 x i8> @llvm.masked.expandload.v8i8(ptr, <8 x i1>, <8 x i8>) 4120