1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq < %s | FileCheck %s --check-prefix=X86-64 3; RUN: llc -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq < %s | FileCheck %s --check-prefix=X86-32 4 5define void @test_fcmp_storefloat(i1 %cond, ptr %fptr, float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) { 6; X86-64-LABEL: test_fcmp_storefloat: 7; X86-64: # %bb.0: # %entry 8; X86-64-NEXT: testb $1, %dil 9; X86-64-NEXT: je .LBB0_2 10; X86-64-NEXT: # %bb.1: # %if 11; X86-64-NEXT: vcmpeqss %xmm3, %xmm2, %k1 12; X86-64-NEXT: jmp .LBB0_3 13; X86-64-NEXT: .LBB0_2: # %else 14; X86-64-NEXT: vcmpeqss %xmm5, %xmm4, %k1 15; X86-64-NEXT: .LBB0_3: # %exit 16; X86-64-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} 17; X86-64-NEXT: vmovss %xmm1, (%rsi) 18; X86-64-NEXT: retq 19; 20; X86-32-LABEL: test_fcmp_storefloat: 21; X86-32: # %bb.0: # %entry 22; X86-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 23; X86-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 24; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax 25; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) 26; X86-32-NEXT: je .LBB0_2 27; X86-32-NEXT: # %bb.1: # %if 28; X86-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 29; X86-32-NEXT: vcmpeqss {{[0-9]+}}(%esp), %xmm2, %k1 30; X86-32-NEXT: jmp .LBB0_3 31; X86-32-NEXT: .LBB0_2: # %else 32; X86-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 33; X86-32-NEXT: vcmpeqss {{[0-9]+}}(%esp), %xmm2, %k1 34; X86-32-NEXT: .LBB0_3: # %exit 35; X86-32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} 36; X86-32-NEXT: vmovss %xmm0, (%eax) 37; X86-32-NEXT: retl 38entry: 39 br i1 %cond, label %if, label %else 40 41if: 42 %cmp1 = fcmp oeq float %f3, %f4 43 br label %exit 44 45else: 46 %cmp2 = fcmp oeq float %f5, %f6 47 br label %exit 48 49exit: 50 %val = phi i1 [%cmp1, %if], [%cmp2, %else] 51 %selected = select i1 %val, float %f1, float %f2 52 store float %selected, ptr %fptr 53 ret void 54} 55 56define void @test_fcmp_storei1(i1 %cond, ptr %fptr, ptr %iptr, float %f1, float %f2, float %f3, float %f4) { 57; X86-64-LABEL: test_fcmp_storei1: 58; X86-64: # %bb.0: # %entry 59; X86-64-NEXT: testb $1, %dil 60; X86-64-NEXT: je .LBB1_2 61; X86-64-NEXT: # %bb.1: # %if 62; X86-64-NEXT: vcmpeqss %xmm1, %xmm0, %k0 63; X86-64-NEXT: kmovb %k0, (%rdx) 64; X86-64-NEXT: retq 65; X86-64-NEXT: .LBB1_2: # %else 66; X86-64-NEXT: vcmpeqss %xmm3, %xmm2, %k0 67; X86-64-NEXT: kmovb %k0, (%rdx) 68; X86-64-NEXT: retq 69; 70; X86-32-LABEL: test_fcmp_storei1: 71; X86-32: # %bb.0: # %entry 72; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax 73; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) 74; X86-32-NEXT: je .LBB1_2 75; X86-32-NEXT: # %bb.1: # %if 76; X86-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 77; X86-32-NEXT: vcmpeqss {{[0-9]+}}(%esp), %xmm0, %k0 78; X86-32-NEXT: kmovb %k0, (%eax) 79; X86-32-NEXT: retl 80; X86-32-NEXT: .LBB1_2: # %else 81; X86-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 82; X86-32-NEXT: vcmpeqss {{[0-9]+}}(%esp), %xmm0, %k0 83; X86-32-NEXT: kmovb %k0, (%eax) 84; X86-32-NEXT: retl 85entry: 86 br i1 %cond, label %if, label %else 87 88if: 89 %cmp1 = fcmp oeq float %f1, %f2 90 br label %exit 91 92else: 93 %cmp2 = fcmp oeq float %f3, %f4 94 br label %exit 95 96exit: 97 %val = phi i1 [%cmp1, %if], [%cmp2, %else] 98 store i1 %val, ptr %iptr 99 ret void 100} 101 102define void @test_load_add(i1 %cond, ptr %fptr, ptr %iptr1, ptr %iptr2, float %f1, float %f2) { 103; X86-64-LABEL: test_load_add: 104; X86-64: # %bb.0: # %entry 105; X86-64-NEXT: testb $1, %dil 106; X86-64-NEXT: je .LBB2_2 107; X86-64-NEXT: # %bb.1: # %if 108; X86-64-NEXT: movzbl (%rdx), %eax 109; X86-64-NEXT: addb (%rcx), %al 110; X86-64-NEXT: jmp .LBB2_3 111; X86-64-NEXT: .LBB2_2: # %else 112; X86-64-NEXT: movzbl (%rcx), %eax 113; X86-64-NEXT: .LBB2_3: # %exit 114; X86-64-NEXT: kmovd %eax, %k1 115; X86-64-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} 116; X86-64-NEXT: vmovss %xmm1, (%rsi) 117; X86-64-NEXT: retq 118; 119; X86-32-LABEL: test_load_add: 120; X86-32: # %bb.0: # %entry 121; X86-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 122; X86-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 123; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 124; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax 125; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) 126; X86-32-NEXT: je .LBB2_2 127; X86-32-NEXT: # %bb.1: # %if 128; X86-32-NEXT: movl {{[0-9]+}}(%esp), %edx 129; X86-32-NEXT: movzbl (%edx), %edx 130; X86-32-NEXT: addb (%ecx), %dl 131; X86-32-NEXT: jmp .LBB2_3 132; X86-32-NEXT: .LBB2_2: # %else 133; X86-32-NEXT: movzbl (%ecx), %edx 134; X86-32-NEXT: .LBB2_3: # %exit 135; X86-32-NEXT: kmovd %edx, %k1 136; X86-32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} 137; X86-32-NEXT: vmovss %xmm0, (%eax) 138; X86-32-NEXT: retl 139entry: 140 br i1 %cond, label %if, label %else 141 142if: 143 %loaded1 = load i1, ptr %iptr1 144 %loaded2if = load i1, ptr %iptr2 145 %added = add i1 %loaded1, %loaded2if 146 br label %exit 147 148else: 149 %loaded2else = load i1, ptr %iptr2 150 br label %exit 151 152exit: 153 %val = phi i1 [%added, %if], [%loaded2else, %else] 154 %selected = select i1 %val, float %f1, float %f2 155 store float %selected, ptr %fptr 156 ret void 157} 158 159define void @test_load_i1(i1 %cond, ptr %fptr, ptr %iptr1, ptr %iptr2, float %f1, float %f2) { 160; X86-64-LABEL: test_load_i1: 161; X86-64: # %bb.0: # %entry 162; X86-64-NEXT: testb $1, %dil 163; X86-64-NEXT: je .LBB3_2 164; X86-64-NEXT: # %bb.1: # %if 165; X86-64-NEXT: kmovb (%rdx), %k1 166; X86-64-NEXT: jmp .LBB3_3 167; X86-64-NEXT: .LBB3_2: # %else 168; X86-64-NEXT: kmovb (%rcx), %k1 169; X86-64-NEXT: .LBB3_3: # %exit 170; X86-64-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} 171; X86-64-NEXT: vmovss %xmm1, (%rsi) 172; X86-64-NEXT: retq 173; 174; X86-32-LABEL: test_load_i1: 175; X86-32: # %bb.0: # %entry 176; X86-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 177; X86-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 178; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax 179; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) 180; X86-32-NEXT: je .LBB3_2 181; X86-32-NEXT: # %bb.1: # %if 182; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 183; X86-32-NEXT: jmp .LBB3_3 184; X86-32-NEXT: .LBB3_2: # %else 185; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 186; X86-32-NEXT: .LBB3_3: # %exit 187; X86-32-NEXT: kmovb (%ecx), %k1 188; X86-32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} 189; X86-32-NEXT: vmovss %xmm0, (%eax) 190; X86-32-NEXT: retl 191entry: 192 br i1 %cond, label %if, label %else 193 194if: 195 %loaded1 = load i1, ptr %iptr1 196 br label %exit 197 198else: 199 %loaded2 = load i1, ptr %iptr2 200 br label %exit 201 202exit: 203 %val = phi i1 [%loaded1, %if], [%loaded2, %else] 204 %selected = select i1 %val, float %f1, float %f2 205 store float %selected, ptr %fptr 206 ret void 207} 208 209define void @test_loadi1_storei1(i1 %cond, ptr %iptr1, ptr %iptr2, ptr %iptr3) { 210; X86-64-LABEL: test_loadi1_storei1: 211; X86-64: # %bb.0: # %entry 212; X86-64-NEXT: testb $1, %dil 213; X86-64-NEXT: je .LBB4_2 214; X86-64-NEXT: # %bb.1: # %if 215; X86-64-NEXT: movzbl (%rsi), %eax 216; X86-64-NEXT: jmp .LBB4_3 217; X86-64-NEXT: .LBB4_2: # %else 218; X86-64-NEXT: movzbl (%rdx), %eax 219; X86-64-NEXT: .LBB4_3: # %exit 220; X86-64-NEXT: andb $1, %al 221; X86-64-NEXT: movb %al, (%rcx) 222; X86-64-NEXT: retq 223; 224; X86-32-LABEL: test_loadi1_storei1: 225; X86-32: # %bb.0: # %entry 226; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax 227; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) 228; X86-32-NEXT: je .LBB4_2 229; X86-32-NEXT: # %bb.1: # %if 230; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 231; X86-32-NEXT: jmp .LBB4_3 232; X86-32-NEXT: .LBB4_2: # %else 233; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 234; X86-32-NEXT: .LBB4_3: # %exit 235; X86-32-NEXT: movzbl (%ecx), %ecx 236; X86-32-NEXT: andb $1, %cl 237; X86-32-NEXT: movb %cl, (%eax) 238; X86-32-NEXT: retl 239entry: 240 br i1 %cond, label %if, label %else 241 242if: 243 %loaded1 = load i1, ptr %iptr1 244 br label %exit 245 246else: 247 %loaded2 = load i1, ptr %iptr2 248 br label %exit 249 250exit: 251 %val = phi i1 [%loaded1, %if], [%loaded2, %else] 252 store i1 %val, ptr %iptr3 253 ret void 254} 255 256define void @test_shl1(i1 %cond, ptr %ptr1, ptr %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, ptr %fptrvec) { 257; X86-64-LABEL: test_shl1: 258; X86-64: # %bb.0: # %entry 259; X86-64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 260; X86-64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 261; X86-64-NEXT: testb $1, %dil 262; X86-64-NEXT: je .LBB5_2 263; X86-64-NEXT: # %bb.1: # %if 264; X86-64-NEXT: kmovb (%rsi), %k0 265; X86-64-NEXT: kaddb %k0, %k0, %k1 266; X86-64-NEXT: jmp .LBB5_3 267; X86-64-NEXT: .LBB5_2: # %else 268; X86-64-NEXT: kmovb (%rdx), %k1 269; X86-64-NEXT: .LBB5_3: # %exit 270; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} 271; X86-64-NEXT: vmovaps %ymm1, (%rcx) 272; X86-64-NEXT: vzeroupper 273; X86-64-NEXT: retq 274; 275; X86-32-LABEL: test_shl1: 276; X86-32: # %bb.0: # %entry 277; X86-32-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 278; X86-32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 279; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax 280; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) 281; X86-32-NEXT: je .LBB5_2 282; X86-32-NEXT: # %bb.1: # %if 283; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 284; X86-32-NEXT: kmovb (%ecx), %k0 285; X86-32-NEXT: kaddb %k0, %k0, %k1 286; X86-32-NEXT: jmp .LBB5_3 287; X86-32-NEXT: .LBB5_2: # %else 288; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 289; X86-32-NEXT: kmovb (%ecx), %k1 290; X86-32-NEXT: .LBB5_3: # %exit 291; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} 292; X86-32-NEXT: vmovaps %ymm1, (%eax) 293; X86-32-NEXT: vzeroupper 294; X86-32-NEXT: retl 295entry: 296 br i1 %cond, label %if, label %else 297 298if: 299 %loaded1 = load i8, ptr %ptr1 300 %shifted = shl i8 %loaded1, 1 301 br label %exit 302 303else: 304 %loaded2 = load i8, ptr %ptr2 305 br label %exit 306 307exit: 308 %val = phi i8 [%shifted, %if], [%loaded2, %else] 309 %mask = bitcast i8 %val to <8 x i1> 310 %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2 311 store <8 x float> %selected, ptr %fptrvec 312 ret void 313} 314 315define void @test_shr1(i1 %cond, ptr %ptr1, ptr %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, ptr %fptrvec) { 316; X86-64-LABEL: test_shr1: 317; X86-64: # %bb.0: # %entry 318; X86-64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 319; X86-64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 320; X86-64-NEXT: testb $1, %dil 321; X86-64-NEXT: je .LBB6_2 322; X86-64-NEXT: # %bb.1: # %if 323; X86-64-NEXT: kmovb (%rsi), %k0 324; X86-64-NEXT: kshiftrb $1, %k0, %k1 325; X86-64-NEXT: jmp .LBB6_3 326; X86-64-NEXT: .LBB6_2: # %else 327; X86-64-NEXT: kmovb (%rdx), %k1 328; X86-64-NEXT: .LBB6_3: # %exit 329; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} 330; X86-64-NEXT: vmovaps %ymm1, (%rcx) 331; X86-64-NEXT: vzeroupper 332; X86-64-NEXT: retq 333; 334; X86-32-LABEL: test_shr1: 335; X86-32: # %bb.0: # %entry 336; X86-32-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 337; X86-32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 338; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax 339; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) 340; X86-32-NEXT: je .LBB6_2 341; X86-32-NEXT: # %bb.1: # %if 342; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 343; X86-32-NEXT: kmovb (%ecx), %k0 344; X86-32-NEXT: kshiftrb $1, %k0, %k1 345; X86-32-NEXT: jmp .LBB6_3 346; X86-32-NEXT: .LBB6_2: # %else 347; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 348; X86-32-NEXT: kmovb (%ecx), %k1 349; X86-32-NEXT: .LBB6_3: # %exit 350; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} 351; X86-32-NEXT: vmovaps %ymm1, (%eax) 352; X86-32-NEXT: vzeroupper 353; X86-32-NEXT: retl 354entry: 355 br i1 %cond, label %if, label %else 356 357if: 358 %loaded1 = load i8, ptr %ptr1 359 %shifted = lshr i8 %loaded1, 1 360 br label %exit 361 362else: 363 %loaded2 = load i8, ptr %ptr2 364 br label %exit 365 366exit: 367 %val = phi i8 [%shifted, %if], [%loaded2, %else] 368 %mask = bitcast i8 %val to <8 x i1> 369 %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2 370 store <8 x float> %selected, ptr %fptrvec 371 ret void 372} 373 374define void @test_shr2(i1 %cond, ptr %ptr1, ptr %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, ptr %fptrvec) { 375; X86-64-LABEL: test_shr2: 376; X86-64: # %bb.0: # %entry 377; X86-64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 378; X86-64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 379; X86-64-NEXT: testb $1, %dil 380; X86-64-NEXT: je .LBB7_2 381; X86-64-NEXT: # %bb.1: # %if 382; X86-64-NEXT: kmovb (%rsi), %k0 383; X86-64-NEXT: kshiftrb $2, %k0, %k1 384; X86-64-NEXT: jmp .LBB7_3 385; X86-64-NEXT: .LBB7_2: # %else 386; X86-64-NEXT: kmovb (%rdx), %k1 387; X86-64-NEXT: .LBB7_3: # %exit 388; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} 389; X86-64-NEXT: vmovaps %ymm1, (%rcx) 390; X86-64-NEXT: vzeroupper 391; X86-64-NEXT: retq 392; 393; X86-32-LABEL: test_shr2: 394; X86-32: # %bb.0: # %entry 395; X86-32-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 396; X86-32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 397; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax 398; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) 399; X86-32-NEXT: je .LBB7_2 400; X86-32-NEXT: # %bb.1: # %if 401; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 402; X86-32-NEXT: kmovb (%ecx), %k0 403; X86-32-NEXT: kshiftrb $2, %k0, %k1 404; X86-32-NEXT: jmp .LBB7_3 405; X86-32-NEXT: .LBB7_2: # %else 406; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 407; X86-32-NEXT: kmovb (%ecx), %k1 408; X86-32-NEXT: .LBB7_3: # %exit 409; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} 410; X86-32-NEXT: vmovaps %ymm1, (%eax) 411; X86-32-NEXT: vzeroupper 412; X86-32-NEXT: retl 413entry: 414 br i1 %cond, label %if, label %else 415 416if: 417 %loaded1 = load i8, ptr %ptr1 418 %shifted = lshr i8 %loaded1, 2 419 br label %exit 420 421else: 422 %loaded2 = load i8, ptr %ptr2 423 br label %exit 424 425exit: 426 %val = phi i8 [%shifted, %if], [%loaded2, %else] 427 %mask = bitcast i8 %val to <8 x i1> 428 %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2 429 store <8 x float> %selected, ptr %fptrvec 430 ret void 431} 432 433define void @test_shl(i1 %cond, ptr %ptr1, ptr %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, ptr %fptrvec) { 434; X86-64-LABEL: test_shl: 435; X86-64: # %bb.0: # %entry 436; X86-64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 437; X86-64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 438; X86-64-NEXT: testb $1, %dil 439; X86-64-NEXT: je .LBB8_2 440; X86-64-NEXT: # %bb.1: # %if 441; X86-64-NEXT: kmovb (%rsi), %k0 442; X86-64-NEXT: kshiftlb $6, %k0, %k1 443; X86-64-NEXT: jmp .LBB8_3 444; X86-64-NEXT: .LBB8_2: # %else 445; X86-64-NEXT: kmovb (%rdx), %k1 446; X86-64-NEXT: .LBB8_3: # %exit 447; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} 448; X86-64-NEXT: vmovaps %ymm1, (%rcx) 449; X86-64-NEXT: vzeroupper 450; X86-64-NEXT: retq 451; 452; X86-32-LABEL: test_shl: 453; X86-32: # %bb.0: # %entry 454; X86-32-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 455; X86-32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 456; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax 457; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) 458; X86-32-NEXT: je .LBB8_2 459; X86-32-NEXT: # %bb.1: # %if 460; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 461; X86-32-NEXT: kmovb (%ecx), %k0 462; X86-32-NEXT: kshiftlb $6, %k0, %k1 463; X86-32-NEXT: jmp .LBB8_3 464; X86-32-NEXT: .LBB8_2: # %else 465; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 466; X86-32-NEXT: kmovb (%ecx), %k1 467; X86-32-NEXT: .LBB8_3: # %exit 468; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} 469; X86-32-NEXT: vmovaps %ymm1, (%eax) 470; X86-32-NEXT: vzeroupper 471; X86-32-NEXT: retl 472entry: 473 br i1 %cond, label %if, label %else 474 475if: 476 %loaded1 = load i8, ptr %ptr1 477 %shifted = shl i8 %loaded1, 6 478 br label %exit 479 480else: 481 %loaded2 = load i8, ptr %ptr2 482 br label %exit 483 484exit: 485 %val = phi i8 [%shifted, %if], [%loaded2, %else] 486 %mask = bitcast i8 %val to <8 x i1> 487 %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2 488 store <8 x float> %selected, ptr %fptrvec 489 ret void 490} 491 492define void @test_add(i1 %cond, ptr %ptr1, ptr %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, ptr %fptrvec) { 493; X86-64-LABEL: test_add: 494; X86-64: # %bb.0: # %entry 495; X86-64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 496; X86-64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 497; X86-64-NEXT: kmovb (%rsi), %k0 498; X86-64-NEXT: kmovb (%rdx), %k1 499; X86-64-NEXT: testb $1, %dil 500; X86-64-NEXT: je .LBB9_2 501; X86-64-NEXT: # %bb.1: # %if 502; X86-64-NEXT: kandb %k1, %k0, %k1 503; X86-64-NEXT: jmp .LBB9_3 504; X86-64-NEXT: .LBB9_2: # %else 505; X86-64-NEXT: kaddb %k1, %k0, %k1 506; X86-64-NEXT: .LBB9_3: # %exit 507; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} 508; X86-64-NEXT: vmovaps %ymm1, (%rcx) 509; X86-64-NEXT: vzeroupper 510; X86-64-NEXT: retq 511; 512; X86-32-LABEL: test_add: 513; X86-32: # %bb.0: # %entry 514; X86-32-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 515; X86-32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 516; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax 517; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 518; X86-32-NEXT: movl {{[0-9]+}}(%esp), %edx 519; X86-32-NEXT: kmovb (%edx), %k0 520; X86-32-NEXT: kmovb (%ecx), %k1 521; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) 522; X86-32-NEXT: je .LBB9_2 523; X86-32-NEXT: # %bb.1: # %if 524; X86-32-NEXT: kandb %k1, %k0, %k1 525; X86-32-NEXT: jmp .LBB9_3 526; X86-32-NEXT: .LBB9_2: # %else 527; X86-32-NEXT: kaddb %k1, %k0, %k1 528; X86-32-NEXT: .LBB9_3: # %exit 529; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} 530; X86-32-NEXT: vmovaps %ymm1, (%eax) 531; X86-32-NEXT: vzeroupper 532; X86-32-NEXT: retl 533entry: 534 %loaded1 = load i8, ptr %ptr1 535 %loaded2 = load i8, ptr %ptr2 536 br i1 %cond, label %if, label %else 537 538if: 539 %and = and i8 %loaded1, %loaded2 540 br label %exit 541 542else: 543 %add = add i8 %loaded1, %loaded2 544 br label %exit 545 546exit: 547 %val = phi i8 [%and, %if], [%add, %else] 548 %mask = bitcast i8 %val to <8 x i1> 549 %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2 550 store <8 x float> %selected, ptr %fptrvec 551 ret void 552} 553