1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s 3 4declare i64 @llvm.amdgcn.ballot.i64(i1) 5declare i64 @llvm.ctpop.i64(i64) 6 7; Test ballot(0) 8 9define amdgpu_cs i64 @constant_false() { 10; CHECK-LABEL: constant_false: 11; CHECK: ; %bb.0: 12; CHECK-NEXT: s_mov_b32 s0, 0 13; CHECK-NEXT: s_mov_b32 s1, 0 14; CHECK-NEXT: ; return to shader part epilog 15 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 0) 16 ret i64 %ballot 17} 18 19; Test ballot(1) 20 21define amdgpu_cs i64 @constant_true() { 22; CHECK-LABEL: constant_true: 23; CHECK: ; %bb.0: 24; CHECK-NEXT: s_mov_b32 s0, exec_lo 25; CHECK-NEXT: s_mov_b32 s1, exec_hi 26; CHECK-NEXT: ; return to shader part epilog 27 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 1) 28 ret i64 %ballot 29} 30 31; Test ballot of a non-comparison operation 32 33define amdgpu_cs i64 @non_compare(i32 %x) { 34; CHECK-LABEL: non_compare: 35; CHECK: ; %bb.0: 36; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 37; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v0 38; CHECK-NEXT: ; return to shader part epilog 39 %trunc = trunc i32 %x to i1 40 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %trunc) 41 ret i64 %ballot 42} 43 44; Test ballot of comparisons 45 46define amdgpu_cs i64 @compare_ints(i32 %x, i32 %y) { 47; CHECK-LABEL: compare_ints: 48; CHECK: ; %bb.0: 49; CHECK-NEXT: v_cmp_eq_u32_e64 s[0:1], v0, v1 50; CHECK-NEXT: ; return to shader part epilog 51 %cmp = icmp eq i32 %x, %y 52 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp) 53 ret i64 %ballot 54} 55 56define amdgpu_cs i64 @compare_int_with_constant(i32 %x) { 57; CHECK-LABEL: compare_int_with_constant: 58; CHECK: ; %bb.0: 59; CHECK-NEXT: s_movk_i32 s0, 0x62 60; CHECK-NEXT: v_cmp_lt_i32_e64 s[0:1], s0, v0 61; CHECK-NEXT: ; return to shader part epilog 62 %cmp = icmp sge i32 %x, 99 63 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp) 64 ret i64 %ballot 65} 66 67define amdgpu_cs i64 @compare_floats(float %x, float %y) { 68; CHECK-LABEL: compare_floats: 69; CHECK: ; %bb.0: 70; CHECK-NEXT: v_cmp_gt_f32_e64 s[0:1], v0, v1 71; CHECK-NEXT: ; return to shader part epilog 72 %cmp = fcmp ogt float %x, %y 73 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp) 74 ret i64 %ballot 75} 76 77define amdgpu_cs i64 @ctpop_of_ballot(float %x, float %y) { 78; CHECK-LABEL: ctpop_of_ballot: 79; CHECK: ; %bb.0: 80; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, v0, v1 81; CHECK-NEXT: s_bcnt1_i32_b64 s0, vcc 82; CHECK-NEXT: s_mov_b32 s1, 0 83; CHECK-NEXT: ; return to shader part epilog 84 %cmp = fcmp ogt float %x, %y 85 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp) 86 %bcnt = call i64 @llvm.ctpop.i64(i64 %ballot) 87 ret i64 %bcnt 88} 89 90define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_non_compare(i32 %v) { 91; CHECK-LABEL: branch_divergent_ballot_ne_zero_non_compare: 92; CHECK: ; %bb.0: 93; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 94; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 95; CHECK-NEXT: s_cbranch_vccz .LBB7_2 96; CHECK-NEXT: ; %bb.1: ; %true 97; CHECK-NEXT: s_mov_b32 s0, 42 98; CHECK-NEXT: s_branch .LBB7_3 99; CHECK-NEXT: .LBB7_2: ; %false 100; CHECK-NEXT: s_mov_b32 s0, 33 101; CHECK-NEXT: s_branch .LBB7_3 102; CHECK-NEXT: .LBB7_3: 103 %c = trunc i32 %v to i1 104 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) 105 %ballot_ne_zero = icmp ne i64 %ballot, 0 106 br i1 %ballot_ne_zero, label %true, label %false 107true: 108 ret i32 42 109false: 110 ret i32 33 111} 112 113define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) { 114; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare: 115; CHECK: ; %bb.0: 116; CHECK-NEXT: s_and_b32 s0, s0, 1 117; CHECK-NEXT: v_cmp_ne_u32_e64 vcc, s0, 0 118; CHECK-NEXT: s_cbranch_vccz .LBB8_2 119; CHECK-NEXT: ; %bb.1: ; %true 120; CHECK-NEXT: s_mov_b32 s0, 42 121; CHECK-NEXT: s_branch .LBB8_3 122; CHECK-NEXT: .LBB8_2: ; %false 123; CHECK-NEXT: s_mov_b32 s0, 33 124; CHECK-NEXT: s_branch .LBB8_3 125; CHECK-NEXT: .LBB8_3: 126 %c = trunc i32 %v to i1 127 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) 128 %ballot_ne_zero = icmp ne i64 %ballot, 0 129 br i1 %ballot_ne_zero, label %true, label %false 130true: 131 ret i32 42 132false: 133 ret i32 33 134} 135 136define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_non_compare(i32 %v) { 137; CHECK-LABEL: branch_divergent_ballot_eq_zero_non_compare: 138; CHECK: ; %bb.0: 139; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 140; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 141; CHECK-NEXT: s_cbranch_vccz .LBB9_2 142; CHECK-NEXT: ; %bb.1: ; %false 143; CHECK-NEXT: s_mov_b32 s0, 33 144; CHECK-NEXT: s_branch .LBB9_3 145; CHECK-NEXT: .LBB9_2: ; %true 146; CHECK-NEXT: s_mov_b32 s0, 42 147; CHECK-NEXT: s_branch .LBB9_3 148; CHECK-NEXT: .LBB9_3: 149 %c = trunc i32 %v to i1 150 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) 151 %ballot_eq_zero = icmp eq i64 %ballot, 0 152 br i1 %ballot_eq_zero, label %true, label %false 153true: 154 ret i32 42 155false: 156 ret i32 33 157} 158 159define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) { 160; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare: 161; CHECK: ; %bb.0: 162; CHECK-NEXT: s_and_b32 s0, s0, 1 163; CHECK-NEXT: v_cmp_ne_u32_e64 vcc, s0, 0 164; CHECK-NEXT: s_cbranch_vccz .LBB10_2 165; CHECK-NEXT: ; %bb.1: ; %false 166; CHECK-NEXT: s_mov_b32 s0, 33 167; CHECK-NEXT: s_branch .LBB10_3 168; CHECK-NEXT: .LBB10_2: ; %true 169; CHECK-NEXT: s_mov_b32 s0, 42 170; CHECK-NEXT: s_branch .LBB10_3 171; CHECK-NEXT: .LBB10_3: 172 %c = trunc i32 %v to i1 173 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) 174 %ballot_eq_zero = icmp eq i64 %ballot, 0 175 br i1 %ballot_eq_zero, label %true, label %false 176true: 177 ret i32 42 178false: 179 ret i32 33 180} 181 182define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_compare(i32 %v) { 183; CHECK-LABEL: branch_divergent_ballot_ne_zero_compare: 184; CHECK: ; %bb.0: 185; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 12, v0 186; CHECK-NEXT: s_cbranch_vccz .LBB11_2 187; CHECK-NEXT: ; %bb.1: ; %true 188; CHECK-NEXT: s_mov_b32 s0, 42 189; CHECK-NEXT: s_branch .LBB11_3 190; CHECK-NEXT: .LBB11_2: ; %false 191; CHECK-NEXT: s_mov_b32 s0, 33 192; CHECK-NEXT: s_branch .LBB11_3 193; CHECK-NEXT: .LBB11_3: 194 %c = icmp ult i32 %v, 12 195 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) 196 %ballot_ne_zero = icmp ne i64 %ballot, 0 197 br i1 %ballot_ne_zero, label %true, label %false 198true: 199 ret i32 42 200false: 201 ret i32 33 202} 203 204define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) { 205; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare: 206; CHECK: ; %bb.0: 207; CHECK-NEXT: v_cmp_lt_u32_e64 vcc, s0, 12 208; CHECK-NEXT: s_cbranch_vccz .LBB12_2 209; CHECK-NEXT: ; %bb.1: ; %true 210; CHECK-NEXT: s_mov_b32 s0, 42 211; CHECK-NEXT: s_branch .LBB12_3 212; CHECK-NEXT: .LBB12_2: ; %false 213; CHECK-NEXT: s_mov_b32 s0, 33 214; CHECK-NEXT: s_branch .LBB12_3 215; CHECK-NEXT: .LBB12_3: 216 %c = icmp ult i32 %v, 12 217 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) 218 %ballot_ne_zero = icmp ne i64 %ballot, 0 219 br i1 %ballot_ne_zero, label %true, label %false 220true: 221 ret i32 42 222false: 223 ret i32 33 224} 225 226define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_compare(i32 %v) { 227; CHECK-LABEL: branch_divergent_ballot_eq_zero_compare: 228; CHECK: ; %bb.0: 229; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 12, v0 230; CHECK-NEXT: s_cbranch_vccz .LBB13_2 231; CHECK-NEXT: ; %bb.1: ; %false 232; CHECK-NEXT: s_mov_b32 s0, 33 233; CHECK-NEXT: s_branch .LBB13_3 234; CHECK-NEXT: .LBB13_2: ; %true 235; CHECK-NEXT: s_mov_b32 s0, 42 236; CHECK-NEXT: s_branch .LBB13_3 237; CHECK-NEXT: .LBB13_3: 238 %c = icmp ult i32 %v, 12 239 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) 240 %ballot_eq_zero = icmp eq i64 %ballot, 0 241 br i1 %ballot_eq_zero, label %true, label %false 242true: 243 ret i32 42 244false: 245 ret i32 33 246} 247 248define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) { 249; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare: 250; CHECK: ; %bb.0: 251; CHECK-NEXT: v_cmp_lt_u32_e64 vcc, s0, 12 252; CHECK-NEXT: s_cbranch_vccz .LBB14_2 253; CHECK-NEXT: ; %bb.1: ; %false 254; CHECK-NEXT: s_mov_b32 s0, 33 255; CHECK-NEXT: s_branch .LBB14_3 256; CHECK-NEXT: .LBB14_2: ; %true 257; CHECK-NEXT: s_mov_b32 s0, 42 258; CHECK-NEXT: s_branch .LBB14_3 259; CHECK-NEXT: .LBB14_3: 260 %c = icmp ult i32 %v, 12 261 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) 262 %ballot_eq_zero = icmp eq i64 %ballot, 0 263 br i1 %ballot_eq_zero, label %true, label %false 264true: 265 ret i32 42 266false: 267 ret i32 33 268} 269 270define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_and(i32 %v1, i32 %v2) { 271; CHECK-LABEL: branch_divergent_ballot_ne_zero_and: 272; CHECK: ; %bb.0: 273; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 12, v0 274; CHECK-NEXT: v_cmp_lt_u32_e64 s[0:1], 34, v1 275; CHECK-NEXT: s_and_b64 vcc, vcc, s[0:1] 276; CHECK-NEXT: s_cbranch_vccz .LBB15_2 277; CHECK-NEXT: ; %bb.1: ; %true 278; CHECK-NEXT: s_mov_b32 s0, 42 279; CHECK-NEXT: s_branch .LBB15_3 280; CHECK-NEXT: .LBB15_2: ; %false 281; CHECK-NEXT: s_mov_b32 s0, 33 282; CHECK-NEXT: s_branch .LBB15_3 283; CHECK-NEXT: .LBB15_3: 284 %v1c = icmp ult i32 %v1, 12 285 %v2c = icmp ugt i32 %v2, 34 286 %c = and i1 %v1c, %v2c 287 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) 288 %ballot_ne_zero = icmp ne i64 %ballot, 0 289 br i1 %ballot_ne_zero, label %true, label %false 290true: 291 ret i32 42 292false: 293 ret i32 33 294} 295 296define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) { 297; CHECK-LABEL: branch_uniform_ballot_ne_zero_and: 298; CHECK: ; %bb.0: 299; CHECK-NEXT: s_cmp_lt_u32 s0, 12 300; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 301; CHECK-NEXT: s_cmp_gt_u32 s1, 34 302; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 303; CHECK-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] 304; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec 305; CHECK-NEXT: s_cbranch_scc0 .LBB16_2 306; CHECK-NEXT: ; %bb.1: ; %true 307; CHECK-NEXT: s_mov_b32 s0, 42 308; CHECK-NEXT: s_branch .LBB16_3 309; CHECK-NEXT: .LBB16_2: ; %false 310; CHECK-NEXT: s_mov_b32 s0, 33 311; CHECK-NEXT: s_branch .LBB16_3 312; CHECK-NEXT: .LBB16_3: 313 %v1c = icmp ult i32 %v1, 12 314 %v2c = icmp ugt i32 %v2, 34 315 %c = and i1 %v1c, %v2c 316 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) 317 %ballot_ne_zero = icmp ne i64 %ballot, 0 318 br i1 %ballot_ne_zero, label %true, label %false 319true: 320 ret i32 42 321false: 322 ret i32 33 323} 324 325define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) { 326; CHECK-LABEL: branch_divergent_ballot_eq_zero_and: 327; CHECK: ; %bb.0: 328; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 12, v0 329; CHECK-NEXT: v_cmp_lt_u32_e64 s[0:1], 34, v1 330; CHECK-NEXT: s_and_b64 vcc, vcc, s[0:1] 331; CHECK-NEXT: s_cbranch_vccz .LBB17_2 332; CHECK-NEXT: ; %bb.1: ; %false 333; CHECK-NEXT: s_mov_b32 s0, 33 334; CHECK-NEXT: s_branch .LBB17_3 335; CHECK-NEXT: .LBB17_2: ; %true 336; CHECK-NEXT: s_mov_b32 s0, 42 337; CHECK-NEXT: s_branch .LBB17_3 338; CHECK-NEXT: .LBB17_3: 339 %v1c = icmp ult i32 %v1, 12 340 %v2c = icmp ugt i32 %v2, 34 341 %c = and i1 %v1c, %v2c 342 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) 343 %ballot_eq_zero = icmp eq i64 %ballot, 0 344 br i1 %ballot_eq_zero, label %true, label %false 345true: 346 ret i32 42 347false: 348 ret i32 33 349} 350 351define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg %v2) { 352; CHECK-LABEL: branch_uniform_ballot_eq_zero_and: 353; CHECK: ; %bb.0: 354; CHECK-NEXT: s_cmp_lt_u32 s0, 12 355; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 356; CHECK-NEXT: s_cmp_gt_u32 s1, 34 357; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 358; CHECK-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] 359; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec 360; CHECK-NEXT: s_cbranch_scc0 .LBB18_2 361; CHECK-NEXT: ; %bb.1: ; %false 362; CHECK-NEXT: s_mov_b32 s0, 33 363; CHECK-NEXT: s_branch .LBB18_3 364; CHECK-NEXT: .LBB18_2: ; %true 365; CHECK-NEXT: s_mov_b32 s0, 42 366; CHECK-NEXT: s_branch .LBB18_3 367; CHECK-NEXT: .LBB18_3: 368 %v1c = icmp ult i32 %v1, 12 369 %v2c = icmp ugt i32 %v2, 34 370 %c = and i1 %v1c, %v2c 371 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) 372 %ballot_eq_zero = icmp eq i64 %ballot, 0 373 br i1 %ballot_eq_zero, label %true, label %false 374true: 375 ret i32 42 376false: 377 ret i32 33 378} 379 380define amdgpu_cs i32 @branch_uniform_ballot_sgt_N_compare(i32 inreg %v) { 381; CHECK-LABEL: branch_uniform_ballot_sgt_N_compare: 382; CHECK: ; %bb.0: 383; CHECK-NEXT: v_cmp_lt_u32_e64 s[0:1], s0, 12 384; CHECK-NEXT: v_cmp_lt_i64_e64 vcc, s[0:1], 23 385; CHECK-NEXT: s_cbranch_vccnz .LBB19_2 386; CHECK-NEXT: ; %bb.1: ; %true 387; CHECK-NEXT: s_mov_b32 s0, 42 388; CHECK-NEXT: s_branch .LBB19_3 389; CHECK-NEXT: .LBB19_2: ; %false 390; CHECK-NEXT: s_mov_b32 s0, 33 391; CHECK-NEXT: s_branch .LBB19_3 392; CHECK-NEXT: .LBB19_3: 393 %c = icmp ult i32 %v, 12 394 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) 395 %bc = icmp sgt i64 %ballot, 22 396 br i1 %bc, label %true, label %false 397true: 398 ret i32 42 399false: 400 ret i32 33 401} 402 403declare i64 @llvm.amdgcn.icmp.i64(i1, i1, i32) 404 405define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_ne_zero_and(i32 %v1, i32 %v2) { 406; CHECK-LABEL: branch_divergent_simulated_negated_ballot_ne_zero_and: 407; CHECK: ; %bb.0: 408; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 12, v0 409; CHECK-NEXT: v_cmp_lt_u32_e64 s[0:1], 34, v1 410; CHECK-NEXT: s_and_b64 vcc, vcc, s[0:1] 411; CHECK-NEXT: s_cbranch_vccnz .LBB20_2 412; CHECK-NEXT: ; %bb.1: ; %true 413; CHECK-NEXT: s_mov_b32 s0, 42 414; CHECK-NEXT: s_branch .LBB20_3 415; CHECK-NEXT: .LBB20_2: ; %false 416; CHECK-NEXT: s_mov_b32 s0, 33 417; CHECK-NEXT: s_branch .LBB20_3 418; CHECK-NEXT: .LBB20_3: 419 %v1c = icmp ult i32 %v1, 12 420 %v2c = icmp ugt i32 %v2, 34 421 %c = and i1 %v1c, %v2c 422 %ballot = call i64 @llvm.amdgcn.icmp.i64(i1 %c, i1 0, i32 32) ; ICMP_EQ == 32 423 %ballot_ne_zero = icmp ne i64 %ballot, 0 424 br i1 %ballot_ne_zero, label %true, label %false 425true: 426 ret i32 42 427false: 428 ret i32 33 429} 430 431define amdgpu_cs i32 @branch_uniform_simulated_negated_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) { 432; CHECK-LABEL: branch_uniform_simulated_negated_ballot_ne_zero_and: 433; CHECK: ; %bb.0: 434; CHECK-NEXT: s_cmp_lt_u32 s0, 12 435; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 436; CHECK-NEXT: s_cmp_gt_u32 s1, 34 437; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 438; CHECK-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] 439; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec 440; CHECK-NEXT: s_cbranch_scc1 .LBB21_2 441; CHECK-NEXT: ; %bb.1: ; %true 442; CHECK-NEXT: s_mov_b32 s0, 42 443; CHECK-NEXT: s_branch .LBB21_3 444; CHECK-NEXT: .LBB21_2: ; %false 445; CHECK-NEXT: s_mov_b32 s0, 33 446; CHECK-NEXT: s_branch .LBB21_3 447; CHECK-NEXT: .LBB21_3: 448 %v1c = icmp ult i32 %v1, 12 449 %v2c = icmp ugt i32 %v2, 34 450 %c = and i1 %v1c, %v2c 451 %ballot = call i64 @llvm.amdgcn.icmp.i64(i1 %c, i1 0, i32 32) ; ICMP_EQ == 32 452 %ballot_ne_zero = icmp ne i64 %ballot, 0 453 br i1 %ballot_ne_zero, label %true, label %false 454true: 455 ret i32 42 456false: 457 ret i32 33 458} 459 460define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_eq_zero_and(i32 %v1, i32 %v2) { 461; CHECK-LABEL: branch_divergent_simulated_negated_ballot_eq_zero_and: 462; CHECK: ; %bb.0: 463; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 12, v0 464; CHECK-NEXT: v_cmp_lt_u32_e64 s[0:1], 34, v1 465; CHECK-NEXT: s_and_b64 vcc, vcc, s[0:1] 466; CHECK-NEXT: s_cbranch_vccnz .LBB22_2 467; CHECK-NEXT: ; %bb.1: ; %false 468; CHECK-NEXT: s_mov_b32 s0, 33 469; CHECK-NEXT: s_branch .LBB22_3 470; CHECK-NEXT: .LBB22_2: ; %true 471; CHECK-NEXT: s_mov_b32 s0, 42 472; CHECK-NEXT: s_branch .LBB22_3 473; CHECK-NEXT: .LBB22_3: 474 %v1c = icmp ult i32 %v1, 12 475 %v2c = icmp ugt i32 %v2, 34 476 %c = and i1 %v1c, %v2c 477 %ballot = call i64 @llvm.amdgcn.icmp.i64(i1 %c, i1 0, i32 32) ; ICMP_EQ == 32 478 %ballot_eq_zero = icmp eq i64 %ballot, 0 479 br i1 %ballot_eq_zero, label %true, label %false 480true: 481 ret i32 42 482false: 483 ret i32 33 484} 485 486define amdgpu_cs i32 @branch_uniform_simulated_negated_ballot_eq_zero_and(i32 inreg %v1, i32 inreg %v2) { 487; CHECK-LABEL: branch_uniform_simulated_negated_ballot_eq_zero_and: 488; CHECK: ; %bb.0: 489; CHECK-NEXT: s_cmp_lt_u32 s0, 12 490; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 491; CHECK-NEXT: s_cmp_gt_u32 s1, 34 492; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 493; CHECK-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] 494; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec 495; CHECK-NEXT: s_cbranch_scc1 .LBB23_2 496; CHECK-NEXT: ; %bb.1: ; %false 497; CHECK-NEXT: s_mov_b32 s0, 33 498; CHECK-NEXT: s_branch .LBB23_3 499; CHECK-NEXT: .LBB23_2: ; %true 500; CHECK-NEXT: s_mov_b32 s0, 42 501; CHECK-NEXT: s_branch .LBB23_3 502; CHECK-NEXT: .LBB23_3: 503 %v1c = icmp ult i32 %v1, 12 504 %v2c = icmp ugt i32 %v2, 34 505 %c = and i1 %v1c, %v2c 506 %ballot = call i64 @llvm.amdgcn.icmp.i64(i1 %c, i1 0, i32 32) ; ICMP_EQ == 32 507 %ballot_eq_zero = icmp eq i64 %ballot, 0 508 br i1 %ballot_eq_zero, label %true, label %false 509true: 510 ret i32 42 511false: 512 ret i32 33 513} 514 515; Input that is not constant or direct result of a compare. 516; Tests setting 0 to inactive lanes. 517define amdgpu_ps void @non_cst_non_compare_input(ptr addrspace(1) %out, i32 %tid, i32 %cond) { 518; CHECK-LABEL: non_cst_non_compare_input: 519; CHECK: ; %bb.0: ; %entry 520; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 521; CHECK-NEXT: ; implicit-def: $sgpr0_sgpr1 522; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc 523; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 524; CHECK-NEXT: ; %bb.1: ; %B 525; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 2, v2 526; CHECK-NEXT: s_and_b64 s[0:1], vcc, exec 527; CHECK-NEXT: ; implicit-def: $vgpr2 528; CHECK-NEXT: ; %bb.2: ; %Flow 529; CHECK-NEXT: s_andn2_saveexec_b64 s[2:3], s[2:3] 530; CHECK-NEXT: ; %bb.3: ; %A 531; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 532; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec 533; CHECK-NEXT: s_and_b64 s[4:5], vcc, exec 534; CHECK-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 535; CHECK-NEXT: ; %bb.4: ; %exit 536; CHECK-NEXT: s_or_b64 exec, exec, s[2:3] 537; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] 538; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v2 539; CHECK-NEXT: v_mov_b32_e32 v3, s1 540; CHECK-NEXT: v_mov_b32_e32 v2, s0 541; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off 542; CHECK-NEXT: s_endpgm 543entry: 544 %cmp = icmp eq i32 %cond, 0 545 br i1 %cmp, label %A, label %B 546 547A: 548 %val_A = icmp uge i32 %tid, 1 549 br label %exit 550 551B: 552 %val_B = icmp ult i32 %tid, 2 553 br label %exit 554 555exit: 556 %phi = phi i1 [ %val_A, %A ], [ %val_B, %B ] 557 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %phi) 558 store i64 %ballot, ptr addrspace(1) %out 559 ret void 560} 561