1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals 2; RUN: opt < %s -S -passes=openmp-opt | FileCheck %s --check-prefixes=CHECK,MODULE 3; RUN: opt < %s -S -passes=openmp-opt-cgscc | FileCheck %s --check-prefixes=CHECK,CGSCC 4; REQUIRES: amdgpu-registered-target 5 6target triple = "amdgcn-amd-amdhsa" 7 8declare void @useI32(i32) 9declare void @unknown() 10declare void @aligned_barrier() "llvm.assume"="ompx_aligned_barrier" 11declare void @llvm.nvvm.barrier0() 12declare i32 @llvm.nvvm.barrier0.and(i32) 13declare i32 @llvm.nvvm.barrier0.or(i32) 14declare i32 @llvm.nvvm.barrier0.popc(i32) 15declare void @llvm.amdgcn.s.barrier() 16declare void @llvm.assume(i1) 17 18;. 19; CHECK: @GC1 = constant i32 42 20; CHECK: @GC2 = addrspace(4) global i32 0 21; CHECK: @GPtr4 = addrspace(4) global ptr addrspace(4) null 22; CHECK: @G = global i32 42 23; CHECK: @GS = addrspace(3) global i32 0 24; CHECK: @GPtr = global ptr null 25; CHECK: @PG1 = thread_local global i32 42 26; CHECK: @PG2 = addrspace(5) global i32 0 27; CHECK: @GPtr5 = global ptr addrspace(5) null 28; CHECK: @G1 = global i32 42 29; CHECK: @G2 = addrspace(1) global i32 0 30;. 31define amdgpu_kernel void @pos_empty_1(i1 %c) "kernel" { 32; MODULE-LABEL: define {{[^@]+}}@pos_empty_1 33; MODULE-SAME: (i1 [[C:%.*]]) #[[ATTR4:[0-9]+]] { 34; MODULE-NEXT: ret void 35; 36; CGSCC-LABEL: define {{[^@]+}}@pos_empty_1 37; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR4:[0-9]+]] { 38; CGSCC-NEXT: call void @llvm.assume(i1 [[C]]) 39; CGSCC-NEXT: call void @unknown() #[[ATTR0:[0-9]+]] 40; CGSCC-NEXT: call void @llvm.assume(i1 [[C]]) 41; CGSCC-NEXT: ret void 42; 43 call void @llvm.assume(i1 %c) 44 call void @unknown() "llvm.assume"="ompx_aligned_barrier" 45 call void @llvm.assume(i1 %c) 46 ret void 47} 48define amdgpu_kernel void @pos_empty_2() "kernel" { 49; CHECK-LABEL: define {{[^@]+}}@pos_empty_2 50; CHECK-SAME: () #[[ATTR4:[0-9]+]] { 51; CHECK-NEXT: ret void 52; 53 call void @aligned_barrier() 54 ret void 55} 56define amdgpu_kernel void @pos_empty_3() "kernel" { 57; CHECK-LABEL: define {{[^@]+}}@pos_empty_3 58; CHECK-SAME: () #[[ATTR4]] { 59; CHECK-NEXT: ret void 60; 61 call void @llvm.nvvm.barrier0() 62 ret void 63} 64define amdgpu_kernel void @pos_empty_4() "kernel" { 65; CHECK-LABEL: define {{[^@]+}}@pos_empty_4 66; CHECK-SAME: () #[[ATTR4]] { 67; CHECK-NEXT: ret void 68; 69 call i32 @llvm.nvvm.barrier0.and(i32 0) 70 ret void 71} 72define amdgpu_kernel void @pos_empty_5() "kernel" { 73; CHECK-LABEL: define {{[^@]+}}@pos_empty_5 74; CHECK-SAME: () #[[ATTR4]] { 75; CHECK-NEXT: ret void 76; 77 call i32 @llvm.nvvm.barrier0.or(i32 0) 78 ret void 79} 80define amdgpu_kernel void @pos_empty_6() "kernel" { 81; CHECK-LABEL: define {{[^@]+}}@pos_empty_6 82; CHECK-SAME: () #[[ATTR4]] { 83; CHECK-NEXT: ret void 84; 85 call i32 @llvm.nvvm.barrier0.popc(i32 0) 86 ret void 87} 88define amdgpu_kernel void @pos_empty_7a() "kernel" { 89; CHECK-LABEL: define {{[^@]+}}@pos_empty_7a 90; CHECK-SAME: () #[[ATTR4]] { 91; CHECK-NEXT: call void @unknown() 92; CHECK-NEXT: ret void 93; 94 call void @llvm.amdgcn.s.barrier() 95 call void @unknown() 96 ret void 97} 98; FIXME: We should remove the barrier. 99define amdgpu_kernel void @pos_empty_7b() "kernel" { 100; CHECK-LABEL: define {{[^@]+}}@pos_empty_7b 101; CHECK-SAME: () #[[ATTR4]] { 102; CHECK-NEXT: call void @unknown() #[[ATTR5:[0-9]+]] 103; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() 104; CHECK-NEXT: call void @unknown() 105; CHECK-NEXT: ret void 106; 107 call void @unknown() nosync readnone 108 call void @llvm.amdgcn.s.barrier() 109 call void @unknown() 110 ret void 111} 112define amdgpu_kernel void @pos_empty_8(i1 %c) "kernel" { 113; CHECK-LABEL: define {{[^@]+}}@pos_empty_8 114; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR4]] { 115; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] 116; CHECK: t: 117; CHECK-NEXT: br label [[F]] 118; CHECK: f: 119; CHECK-NEXT: ret void 120; 121 br i1 %c, label %t, label %f 122t: 123 fence release 124 call void @llvm.amdgcn.s.barrier() "llvm.assume"="ompx_aligned_barrier" 125 br label %f 126f: 127 ret void 128} 129define amdgpu_kernel void @neg_empty_8() "kernel" { 130; CHECK-LABEL: define {{[^@]+}}@neg_empty_8 131; CHECK-SAME: () #[[ATTR4]] { 132; CHECK-NEXT: call void @unknown() 133; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() 134; CHECK-NEXT: ret void 135; 136 call void @unknown() 137 call void @llvm.amdgcn.s.barrier() 138 ret void 139} 140define amdgpu_kernel void @neg_empty_9(i1 %c) "kernel" { 141; CHECK-LABEL: define {{[^@]+}}@neg_empty_9 142; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR4]] { 143; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] 144; CHECK: t: 145; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() 146; CHECK-NEXT: fence release 147; CHECK-NEXT: br label [[M:%.*]] 148; CHECK: f: 149; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() 150; CHECK-NEXT: fence release 151; CHECK-NEXT: br label [[M]] 152; CHECK: m: 153; CHECK-NEXT: fence release 154; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() 155; CHECK-NEXT: fence release 156; CHECK-NEXT: ret void 157; 158 br i1 %c, label %t, label %f 159t: 160 fence release 161 call void @llvm.amdgcn.s.barrier() 162 fence release 163 br label %m 164f: 165 fence release 166 call void @llvm.amdgcn.s.barrier() 167 fence release 168 br label %m 169m: 170 fence release 171 call void @llvm.amdgcn.s.barrier() 172 fence release 173 ret void 174} 175; FIXME: We should remove the barrier 176define amdgpu_kernel void @pos_empty_10() "kernel" { 177; CHECK-LABEL: define {{[^@]+}}@pos_empty_10 178; CHECK-SAME: () #[[ATTR4]] { 179; CHECK-NEXT: br label [[M:%.*]] 180; CHECK: m: 181; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() 182; CHECK-NEXT: ret void 183; 184 br label %m 185m: 186 call void @llvm.amdgcn.s.barrier() 187 ret void 188} 189define amdgpu_kernel void @pos_empty_11() "kernel" { 190; CHECK-LABEL: define {{[^@]+}}@pos_empty_11 191; CHECK-SAME: () #[[ATTR4]] { 192; CHECK-NEXT: br label [[M:%.*]] 193; CHECK: m: 194; CHECK-NEXT: ret void 195; 196 br label %m 197m: 198 call void @aligned_barrier() 199 call void @llvm.amdgcn.s.barrier() 200 ret void 201} 202define void @empty() { 203; CHECK-LABEL: define {{[^@]+}}@empty() { 204; CHECK-NEXT: ret void 205; 206 ret void 207} 208; FIXME: We should remove the barrier in the end but not the first one. 209define amdgpu_kernel void @neg_empty_12(i1 %c) "kernel" { 210; MODULE-LABEL: define {{[^@]+}}@neg_empty_12 211; MODULE-SAME: (i1 [[C:%.*]]) #[[ATTR4]] { 212; MODULE-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] 213; MODULE: t: 214; MODULE-NEXT: call void @llvm.amdgcn.s.barrier() 215; MODULE-NEXT: br label [[M:%.*]] 216; MODULE: f: 217; MODULE-NEXT: br label [[M]] 218; MODULE: m: 219; MODULE-NEXT: call void @llvm.amdgcn.s.barrier() 220; MODULE-NEXT: ret void 221; 222; CGSCC-LABEL: define {{[^@]+}}@neg_empty_12 223; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR4]] { 224; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] 225; CGSCC: t: 226; CGSCC-NEXT: call void @empty() 227; CGSCC-NEXT: call void @llvm.amdgcn.s.barrier() 228; CGSCC-NEXT: br label [[M:%.*]] 229; CGSCC: f: 230; CGSCC-NEXT: call void @empty() 231; CGSCC-NEXT: br label [[M]] 232; CGSCC: m: 233; CGSCC-NEXT: call void @llvm.amdgcn.s.barrier() 234; CGSCC-NEXT: ret void 235; 236 br i1 %c, label %t, label %f 237t: 238 call void @empty() 239 call void @llvm.amdgcn.s.barrier() 240 br label %m 241f: 242 call void @empty() 243 br label %m 244m: 245 call void @llvm.amdgcn.s.barrier() 246 ret void 247} 248define void @neg_empty_1() "kernel" { 249; CHECK-LABEL: define {{[^@]+}}@neg_empty_1 250; CHECK-SAME: () #[[ATTR4]] { 251; CHECK-NEXT: call void @unknown() 252; CHECK-NEXT: ret void 253; 254 call void @unknown() 255 ret void 256} 257define void @neg_empty_2() "kernel" { 258; CHECK-LABEL: define {{[^@]+}}@neg_empty_2 259; CHECK-SAME: () #[[ATTR4]] { 260; CHECK-NEXT: ret void 261; 262 call void @aligned_barrier() 263 ret void 264} 265 266@GC1 = constant i32 42 267@GC2 = addrspace(4) global i32 0 268@GPtr4 = addrspace(4) global ptr addrspace(4) null 269define amdgpu_kernel void @pos_constant_loads() "kernel" { 270; CHECK-LABEL: define {{[^@]+}}@pos_constant_loads 271; CHECK-SAME: () #[[ATTR4]] { 272; CHECK-NEXT: [[ARG:%.*]] = load ptr addrspace(4), ptr addrspace(4) @GPtr4, align 8 273; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspace(4) @GC2, align 4 274; CHECK-NEXT: [[C:%.*]] = load i32, ptr addrspace(4) [[ARG]], align 4 275; CHECK-NEXT: [[D:%.*]] = add i32 42, [[B]] 276; CHECK-NEXT: [[E:%.*]] = add i32 [[D]], [[C]] 277; CHECK-NEXT: call void @useI32(i32 [[E]]) 278; CHECK-NEXT: ret void 279; 280 %GPtr4c = addrspacecast ptr addrspace(4) @GPtr4 to ptr 281 %arg = load ptr addrspace(4), ptr %GPtr4c 282 %a = load i32, ptr @GC1 283 call void @aligned_barrier() 284 %GC2c = addrspacecast ptr addrspace(4) @GC2 to ptr 285 %b = load i32, ptr %GC2c 286 call void @aligned_barrier() 287 %argc = addrspacecast ptr addrspace(4) %arg to ptr 288 %c = load i32, ptr %argc 289 call void @aligned_barrier() 290 %d = add i32 %a, %b 291 %e = add i32 %d, %c 292 call void @useI32(i32 %e) 293 ret void 294} 295@G = global i32 42 296@GS = addrspace(3) global i32 0 297@GPtr = global ptr null 298; TODO: We could remove some of the barriers due to the lack of write effects. 299define amdgpu_kernel void @neg_loads() "kernel" { 300; CHECK-LABEL: define {{[^@]+}}@neg_loads 301; CHECK-SAME: () #[[ATTR4]] { 302; CHECK-NEXT: [[ARG:%.*]] = load ptr, ptr @GPtr, align 8 303; CHECK-NEXT: [[A:%.*]] = load i32, ptr @G, align 4 304; CHECK-NEXT: call void @aligned_barrier() 305; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspace(3) @GS, align 4 306; CHECK-NEXT: call void @aligned_barrier() 307; CHECK-NEXT: [[C:%.*]] = load i32, ptr [[ARG]], align 4 308; CHECK-NEXT: call void @aligned_barrier() 309; CHECK-NEXT: [[D:%.*]] = add i32 [[A]], [[B]] 310; CHECK-NEXT: [[E:%.*]] = add i32 [[D]], [[C]] 311; CHECK-NEXT: call void @useI32(i32 [[E]]) 312; CHECK-NEXT: ret void 313; 314 %arg = load ptr, ptr @GPtr 315 %a = load i32, ptr @G 316 call void @aligned_barrier() 317 %GSc = addrspacecast ptr addrspace(3) @GS to ptr 318 %b = load i32, ptr %GSc 319 call void @aligned_barrier() 320 %c = load i32, ptr %arg 321 call void @aligned_barrier() 322 %d = add i32 %a, %b 323 %e = add i32 %d, %c 324 call void @useI32(i32 %e) 325 ret void 326} 327@PG1 = thread_local global i32 42 328@PG2 = addrspace(5) global i32 0 329@GPtr5 = global ptr addrspace(5) null 330define amdgpu_kernel void @pos_priv_mem() "kernel" { 331; CHECK-LABEL: define {{[^@]+}}@pos_priv_mem 332; CHECK-SAME: () #[[ATTR4]] { 333; CHECK-NEXT: [[ARG:%.*]] = load ptr addrspace(5), ptr @GPtr5, align 4 334; CHECK-NEXT: [[LOC:%.*]] = alloca i32, align 4, addrspace(5) 335; CHECK-NEXT: [[A:%.*]] = load i32, ptr @PG1, align 4 336; CHECK-NEXT: store i32 [[A]], ptr addrspace(5) [[LOC]], align 4 337; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspace(5) @PG2, align 4 338; CHECK-NEXT: store i32 [[B]], ptr addrspace(5) [[ARG]], align 4 339; CHECK-NEXT: [[V:%.*]] = load i32, ptr addrspace(5) [[LOC]], align 4 340; CHECK-NEXT: store i32 [[V]], ptr @PG1, align 4 341; CHECK-NEXT: ret void 342; 343 %arg = load ptr addrspace(5), ptr @GPtr5 344 %loc = alloca i32, addrspace(5) 345 %a = load i32, ptr @PG1 346 call void @aligned_barrier() 347 store i32 %a, ptr addrspace(5) %loc 348 %PG2c = addrspacecast ptr addrspace(5) @PG2 to ptr 349 %b = load i32, ptr %PG2c 350 call void @aligned_barrier() 351 %argc = addrspacecast ptr addrspace(5) %arg to ptr 352 store i32 %b, ptr %argc 353 call void @aligned_barrier() 354 %v = load i32, ptr addrspace(5) %loc 355 store i32 %v, ptr @PG1 356 call void @aligned_barrier() 357 ret void 358} 359@G1 = global i32 42 360@G2 = addrspace(1) global i32 0 361define amdgpu_kernel void @neg_mem() "kernel" { 362; CHECK-LABEL: define {{[^@]+}}@neg_mem 363; CHECK-SAME: () #[[ATTR4]] { 364; CHECK-NEXT: [[ARG:%.*]] = load ptr, ptr @GPtr, align 8 365; CHECK-NEXT: [[A:%.*]] = load i32, ptr @G1, align 4 366; CHECK-NEXT: fence seq_cst 367; CHECK-NEXT: call void @aligned_barrier() 368; CHECK-NEXT: store i32 [[A]], ptr [[ARG]], align 4 369; CHECK-NEXT: fence release 370; CHECK-NEXT: call void @aligned_barrier() 371; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspace(1) @G2, align 4 372; CHECK-NEXT: store i32 [[B]], ptr @G1, align 4 373; CHECK-NEXT: fence acquire 374; CHECK-NEXT: ret void 375; 376 %arg = load ptr, ptr @GPtr 377 %a = load i32, ptr @G1 378 fence seq_cst 379 call void @aligned_barrier() 380 store i32 %a, ptr %arg 381 fence release 382 call void @aligned_barrier() 383 %G2c = addrspacecast ptr addrspace(1) @G2 to ptr 384 %b = load i32, ptr %G2c 385 store i32 %b, ptr @G1 386 fence acquire 387 call void @aligned_barrier() 388 ret void 389} 390 391define amdgpu_kernel void @pos_multiple() "kernel" { 392; CHECK-LABEL: define {{[^@]+}}@pos_multiple 393; CHECK-SAME: () #[[ATTR4]] { 394; CHECK-NEXT: ret void 395; 396 call void @llvm.nvvm.barrier0() 397 call void @aligned_barrier() 398 call void @aligned_barrier() 399 call void @llvm.amdgcn.s.barrier() 400 call void @aligned_barrier() 401 call void @llvm.nvvm.barrier0() 402 call void @aligned_barrier() 403 call void @aligned_barrier() 404 ret void 405} 406 407define amdgpu_kernel void @multiple_blocks_kernel_1(i1 %c0, i1 %c1) "kernel" { 408; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_kernel_1 409; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR4]] { 410; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]] 411; CHECK: t0: 412; CHECK-NEXT: br label [[T0B:%.*]] 413; CHECK: t0b: 414; CHECK-NEXT: br label [[M:%.*]] 415; CHECK: f0: 416; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]] 417; CHECK: t1: 418; CHECK-NEXT: br label [[M]] 419; CHECK: f1: 420; CHECK-NEXT: br label [[M]] 421; CHECK: m: 422; CHECK-NEXT: ret void 423; 424 fence acquire 425 call void @llvm.nvvm.barrier0() 426 fence release 427 call void @aligned_barrier() 428 fence seq_cst 429 br i1 %c0, label %t0, label %f0 430t0: 431 fence seq_cst 432 call void @aligned_barrier() 433 fence seq_cst 434 br label %t0b 435t0b: 436 fence seq_cst 437 call void @aligned_barrier() 438 fence seq_cst 439 br label %m 440f0: 441 fence release 442 call void @aligned_barrier() 443 fence acquire 444 call void @llvm.nvvm.barrier0() 445 fence acquire 446 br i1 %c1, label %t1, label %f1 447t1: 448 fence acquire 449 call void @aligned_barrier() 450 fence seq_cst 451 br label %m 452f1: 453 fence seq_cst 454 call void @aligned_barrier() 455 fence acquire 456 br label %m 457m: 458 fence seq_cst 459 call void @aligned_barrier() 460 fence seq_cst 461 ret void 462} 463 464define amdgpu_kernel void @multiple_blocks_kernel_2(i1 %c0, i1 %c1, ptr %p) "kernel" { 465; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_kernel_2 466; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] { 467; CHECK-NEXT: store i32 4, ptr [[P]], align 4 468; CHECK-NEXT: call void @aligned_barrier() 469; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]] 470; CHECK: t0: 471; CHECK-NEXT: br label [[T0B:%.*]] 472; CHECK: t0b: 473; CHECK-NEXT: br label [[M:%.*]] 474; CHECK: f0: 475; CHECK-NEXT: store i32 4, ptr [[P]], align 4 476; CHECK-NEXT: call void @llvm.nvvm.barrier0() 477; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]] 478; CHECK: t1: 479; CHECK-NEXT: br label [[M]] 480; CHECK: f1: 481; CHECK-NEXT: br label [[M]] 482; CHECK: m: 483; CHECK-NEXT: store i32 4, ptr [[P]], align 4 484; CHECK-NEXT: ret void 485; 486 call void @llvm.nvvm.barrier0() 487 store i32 4, ptr %p 488 call void @aligned_barrier() 489 br i1 %c0, label %t0, label %f0 490t0: 491 call void @aligned_barrier() 492 br label %t0b 493t0b: 494 call void @aligned_barrier() 495 br label %m 496f0: 497 call void @aligned_barrier() 498 store i32 4, ptr %p 499 call void @llvm.nvvm.barrier0() 500 br i1 %c1, label %t1, label %f1 501t1: 502 call void @aligned_barrier() 503 br label %m 504f1: 505 call void @aligned_barrier() 506 br label %m 507m: 508 store i32 4, ptr %p 509 call void @aligned_barrier() 510 ret void 511} 512 513define void @multiple_blocks_non_kernel_1(i1 %c0, i1 %c1) "kernel" { 514; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_1 515; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR4]] { 516; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]] 517; CHECK: t0: 518; CHECK-NEXT: br label [[T0B:%.*]] 519; CHECK: t0b: 520; CHECK-NEXT: br label [[M:%.*]] 521; CHECK: f0: 522; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]] 523; CHECK: t1: 524; CHECK-NEXT: br label [[M]] 525; CHECK: f1: 526; CHECK-NEXT: br label [[M]] 527; CHECK: m: 528; CHECK-NEXT: ret void 529; 530 call void @llvm.nvvm.barrier0() 531 call void @aligned_barrier() 532 br i1 %c0, label %t0, label %f0 533t0: 534 call void @aligned_barrier() 535 br label %t0b 536t0b: 537 call void @aligned_barrier() 538 br label %m 539f0: 540 call void @aligned_barrier() 541 call void @llvm.nvvm.barrier0() 542 br i1 %c1, label %t1, label %f1 543t1: 544 call void @aligned_barrier() 545 br label %m 546f1: 547 call void @aligned_barrier() 548 br label %m 549m: 550 call void @aligned_barrier() 551 ret void 552} 553 554define void @multiple_blocks_non_kernel_2(i1 %c0, i1 %c1) "kernel" { 555; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_2 556; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR4]] { 557; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]] 558; CHECK: t0: 559; CHECK-NEXT: br label [[T0B:%.*]] 560; CHECK: t0b: 561; CHECK-NEXT: br label [[M:%.*]] 562; CHECK: f0: 563; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]] 564; CHECK: t1: 565; CHECK-NEXT: br label [[M]] 566; CHECK: f1: 567; CHECK-NEXT: br label [[M]] 568; CHECK: m: 569; CHECK-NEXT: ret void 570; 571 br i1 %c0, label %t0, label %f0 572t0: 573 call void @aligned_barrier() 574 br label %t0b 575t0b: 576 call void @aligned_barrier() 577 br label %m 578f0: 579 call void @aligned_barrier() 580 call void @llvm.nvvm.barrier0() 581 br i1 %c1, label %t1, label %f1 582t1: 583 call void @aligned_barrier() 584 br label %m 585f1: 586 call void @aligned_barrier() 587 br label %m 588m: 589 call void @aligned_barrier() 590 ret void 591} 592 593define void @multiple_blocks_non_kernel_3(i1 %c0, i1 %c1) "kernel" { 594; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_3 595; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR4]] { 596; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]] 597; CHECK: t0: 598; CHECK-NEXT: br label [[T0B:%.*]] 599; CHECK: t0b: 600; CHECK-NEXT: br label [[M:%.*]] 601; CHECK: f0: 602; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]] 603; CHECK: t1: 604; CHECK-NEXT: br label [[M]] 605; CHECK: f1: 606; CHECK-NEXT: br label [[M]] 607; CHECK: m: 608; CHECK-NEXT: ret void 609; 610 br i1 %c0, label %t0, label %f0 611t0: 612 br label %t0b 613t0b: 614 br label %m 615f0: 616 call void @aligned_barrier() 617 call void @llvm.nvvm.barrier0() 618 br i1 %c1, label %t1, label %f1 619t1: 620 call void @aligned_barrier() 621 br label %m 622f1: 623 call void @aligned_barrier() 624 br label %m 625m: 626 call void @aligned_barrier() 627 ret void 628} 629 630define void @multiple_blocks_non_kernel_effects_1(i1 %c0, i1 %c1, ptr %p) "kernel" { 631; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_effects_1 632; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] { 633; CHECK-NEXT: store i32 0, ptr [[P]], align 4 634; CHECK-NEXT: call void @aligned_barrier() 635; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]] 636; CHECK: t0: 637; CHECK-NEXT: store i32 1, ptr [[P]], align 4 638; CHECK-NEXT: br label [[T0B:%.*]] 639; CHECK: t0b: 640; CHECK-NEXT: call void @aligned_barrier() 641; CHECK-NEXT: br label [[M:%.*]] 642; CHECK: f0: 643; CHECK-NEXT: store i32 2, ptr [[P]], align 4 644; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]] 645; CHECK: t1: 646; CHECK-NEXT: call void @aligned_barrier() 647; CHECK-NEXT: br label [[M]] 648; CHECK: f1: 649; CHECK-NEXT: call void @aligned_barrier() 650; CHECK-NEXT: br label [[M]] 651; CHECK: m: 652; CHECK-NEXT: store i32 3, ptr [[P]], align 4 653; CHECK-NEXT: ret void 654; 655 call void @aligned_barrier() 656 store i32 0, ptr %p 657 call void @aligned_barrier() 658 br i1 %c0, label %t0, label %f0 659t0: 660 call void @aligned_barrier() 661 store i32 1, ptr %p 662 br label %t0b 663t0b: 664 call void @aligned_barrier() 665 br label %m 666f0: 667 call void @aligned_barrier() 668 call void @llvm.nvvm.barrier0() 669 store i32 2, ptr %p 670 br i1 %c1, label %t1, label %f1 671t1: 672 call void @aligned_barrier() 673 br label %m 674f1: 675 call void @aligned_barrier() 676 br label %m 677m: 678 call void @aligned_barrier() 679 store i32 3, ptr %p 680 call void @aligned_barrier() 681 ret void 682} 683 684define internal void @write_then_barrier0(ptr %p) { 685; CHECK-LABEL: define {{[^@]+}}@write_then_barrier0 686; CHECK-SAME: (ptr [[P:%.*]]) { 687; CHECK-NEXT: store i32 0, ptr [[P]], align 4 688; CHECK-NEXT: call void @aligned_barrier() 689; CHECK-NEXT: ret void 690; 691 store i32 0, ptr %p 692 call void @aligned_barrier() 693 ret void 694} 695define internal void @barrier_then_write0(ptr %p) { 696; MODULE-LABEL: define {{[^@]+}}@barrier_then_write0 697; MODULE-SAME: (ptr [[P:%.*]]) { 698; MODULE-NEXT: store i32 0, ptr [[P]], align 4 699; MODULE-NEXT: ret void 700; 701; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write0 702; CGSCC-SAME: (ptr [[P:%.*]]) { 703; CGSCC-NEXT: call void @aligned_barrier() 704; CGSCC-NEXT: store i32 0, ptr [[P]], align 4 705; CGSCC-NEXT: ret void 706; 707 call void @aligned_barrier() 708 store i32 0, ptr %p 709 ret void 710} 711define internal void @barrier_then_write_then_barrier0(ptr %p) { 712; MODULE-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier0 713; MODULE-SAME: (ptr [[P:%.*]]) { 714; MODULE-NEXT: store i32 0, ptr [[P]], align 4 715; MODULE-NEXT: call void @aligned_barrier() 716; MODULE-NEXT: ret void 717; 718; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier0 719; CGSCC-SAME: (ptr [[P:%.*]]) { 720; CGSCC-NEXT: call void @aligned_barrier() 721; CGSCC-NEXT: store i32 0, ptr [[P]], align 4 722; CGSCC-NEXT: call void @aligned_barrier() 723; CGSCC-NEXT: ret void 724; 725 call void @aligned_barrier() 726 store i32 0, ptr %p 727 call void @aligned_barrier() 728 ret void 729} 730define amdgpu_kernel void @multiple_blocks_functions_kernel_effects_0(i1 %c0, i1 %c1, ptr %p) "kernel" { 731; MODULE-LABEL: define {{[^@]+}}@multiple_blocks_functions_kernel_effects_0 732; MODULE-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] { 733; MODULE-NEXT: call void @barrier_then_write_then_barrier0(ptr [[P]]) 734; MODULE-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]] 735; MODULE: t03: 736; MODULE-NEXT: call void @barrier_then_write0(ptr [[P]]) 737; MODULE-NEXT: br label [[T0B3:%.*]] 738; MODULE: t0b3: 739; MODULE-NEXT: br label [[M3:%.*]] 740; MODULE: f03: 741; MODULE-NEXT: call void @barrier_then_write0(ptr [[P]]) 742; MODULE-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]] 743; MODULE: t13: 744; MODULE-NEXT: br label [[M3]] 745; MODULE: f13: 746; MODULE-NEXT: br label [[M3]] 747; MODULE: m3: 748; MODULE-NEXT: call void @write_then_barrier0(ptr [[P]]) 749; MODULE-NEXT: ret void 750; 751; CGSCC-LABEL: define {{[^@]+}}@multiple_blocks_functions_kernel_effects_0 752; CGSCC-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] { 753; CGSCC-NEXT: call void @barrier_then_write_then_barrier0(ptr [[P]]) 754; CGSCC-NEXT: call void @aligned_barrier() 755; CGSCC-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]] 756; CGSCC: t03: 757; CGSCC-NEXT: call void @barrier_then_write0(ptr [[P]]) 758; CGSCC-NEXT: br label [[T0B3:%.*]] 759; CGSCC: t0b3: 760; CGSCC-NEXT: call void @aligned_barrier() 761; CGSCC-NEXT: br label [[M3:%.*]] 762; CGSCC: f03: 763; CGSCC-NEXT: call void @barrier_then_write0(ptr [[P]]) 764; CGSCC-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]] 765; CGSCC: t13: 766; CGSCC-NEXT: call void @aligned_barrier() 767; CGSCC-NEXT: br label [[M3]] 768; CGSCC: f13: 769; CGSCC-NEXT: call void @aligned_barrier() 770; CGSCC-NEXT: br label [[M3]] 771; CGSCC: m3: 772; CGSCC-NEXT: call void @write_then_barrier0(ptr [[P]]) 773; CGSCC-NEXT: ret void 774; 775 call void @barrier_then_write_then_barrier0(ptr %p) 776 call void @aligned_barrier() 777 br i1 %c0, label %t03, label %f03 778t03: 779 call void @barrier_then_write0(ptr %p) 780 br label %t0b3 781t0b3: 782 call void @aligned_barrier() 783 br label %m3 784f03: 785 call void @aligned_barrier() 786 call void @barrier_then_write0(ptr %p) 787 br i1 %c1, label %t13, label %f13 788t13: 789 call void @aligned_barrier() 790 br label %m3 791f13: 792 call void @aligned_barrier() 793 br label %m3 794m3: 795 call void @aligned_barrier() 796 call void @write_then_barrier0(ptr %p) 797 ret void 798} 799define internal void @write_then_barrier1(ptr %p) { 800; CHECK-LABEL: define {{[^@]+}}@write_then_barrier1 801; CHECK-SAME: (ptr [[P:%.*]]) { 802; CHECK-NEXT: store i32 0, ptr [[P]], align 4 803; CHECK-NEXT: call void @aligned_barrier() 804; CHECK-NEXT: ret void 805; 806 store i32 0, ptr %p 807 call void @aligned_barrier() 808 ret void 809} 810define internal void @barrier_then_write1(ptr %p) { 811; MODULE-LABEL: define {{[^@]+}}@barrier_then_write1 812; MODULE-SAME: (ptr [[P:%.*]]) { 813; MODULE-NEXT: store i32 0, ptr [[P]], align 4 814; MODULE-NEXT: ret void 815; 816; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write1 817; CGSCC-SAME: (ptr [[P:%.*]]) { 818; CGSCC-NEXT: call void @aligned_barrier() 819; CGSCC-NEXT: store i32 0, ptr [[P]], align 4 820; CGSCC-NEXT: ret void 821; 822 call void @aligned_barrier() 823 store i32 0, ptr %p 824 ret void 825} 826define internal void @barrier_then_write_then_barrier1(ptr %p) { 827; CHECK-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier1 828; CHECK-SAME: (ptr [[P:%.*]]) { 829; CHECK-NEXT: call void @aligned_barrier() 830; CHECK-NEXT: store i32 0, ptr [[P]], align 4 831; CHECK-NEXT: call void @aligned_barrier() 832; CHECK-NEXT: ret void 833; 834 call void @aligned_barrier() 835 store i32 0, ptr %p 836 call void @aligned_barrier() 837 ret void 838} 839define void @multiple_blocks_functions_non_kernel_effects_1(i1 %c0, i1 %c1, ptr %p) { 840; MODULE-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_1 841; MODULE-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) { 842; MODULE-NEXT: call void @barrier_then_write_then_barrier1(ptr [[P]]) 843; MODULE-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]] 844; MODULE: t03: 845; MODULE-NEXT: call void @barrier_then_write1(ptr [[P]]) 846; MODULE-NEXT: br label [[T0B3:%.*]] 847; MODULE: t0b3: 848; MODULE-NEXT: call void @aligned_barrier() 849; MODULE-NEXT: br label [[M3:%.*]] 850; MODULE: f03: 851; MODULE-NEXT: call void @barrier_then_write1(ptr [[P]]) 852; MODULE-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]] 853; MODULE: t13: 854; MODULE-NEXT: call void @aligned_barrier() 855; MODULE-NEXT: br label [[M3]] 856; MODULE: f13: 857; MODULE-NEXT: call void @aligned_barrier() 858; MODULE-NEXT: br label [[M3]] 859; MODULE: m3: 860; MODULE-NEXT: call void @write_then_barrier1(ptr [[P]]) 861; MODULE-NEXT: ret void 862; 863; CGSCC-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_1 864; CGSCC-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) { 865; CGSCC-NEXT: call void @barrier_then_write_then_barrier1(ptr [[P]]) 866; CGSCC-NEXT: call void @aligned_barrier() 867; CGSCC-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]] 868; CGSCC: t03: 869; CGSCC-NEXT: call void @barrier_then_write1(ptr [[P]]) 870; CGSCC-NEXT: br label [[T0B3:%.*]] 871; CGSCC: t0b3: 872; CGSCC-NEXT: call void @aligned_barrier() 873; CGSCC-NEXT: br label [[M3:%.*]] 874; CGSCC: f03: 875; CGSCC-NEXT: call void @barrier_then_write1(ptr [[P]]) 876; CGSCC-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]] 877; CGSCC: t13: 878; CGSCC-NEXT: call void @aligned_barrier() 879; CGSCC-NEXT: br label [[M3]] 880; CGSCC: f13: 881; CGSCC-NEXT: call void @aligned_barrier() 882; CGSCC-NEXT: br label [[M3]] 883; CGSCC: m3: 884; CGSCC-NEXT: call void @write_then_barrier1(ptr [[P]]) 885; CGSCC-NEXT: ret void 886; 887 call void @barrier_then_write_then_barrier1(ptr %p) 888 call void @aligned_barrier() 889 br i1 %c0, label %t03, label %f03 890t03: 891 call void @barrier_then_write1(ptr %p) 892 br label %t0b3 893t0b3: 894 call void @aligned_barrier() 895 br label %m3 896f03: 897 call void @aligned_barrier() 898 call void @barrier_then_write1(ptr %p) 899 br i1 %c1, label %t13, label %f13 900t13: 901 call void @aligned_barrier() 902 br label %m3 903f13: 904 call void @aligned_barrier() 905 br label %m3 906m3: 907 call void @aligned_barrier() 908 call void @write_then_barrier1(ptr %p) 909 ret void 910} 911 912define internal void @write_then_barrier2(ptr %p) { 913; CHECK-LABEL: define {{[^@]+}}@write_then_barrier2 914; CHECK-SAME: (ptr [[P:%.*]]) { 915; CHECK-NEXT: store i32 0, ptr [[P]], align 4 916; CHECK-NEXT: call void @aligned_barrier() 917; CHECK-NEXT: ret void 918; 919 store i32 0, ptr %p 920 call void @aligned_barrier() 921 ret void 922} 923define internal void @barrier_then_write2(ptr %p) { 924; CHECK-LABEL: define {{[^@]+}}@barrier_then_write2 925; CHECK-SAME: (ptr [[P:%.*]]) { 926; CHECK-NEXT: call void @aligned_barrier() 927; CHECK-NEXT: store i32 0, ptr [[P]], align 4 928; CHECK-NEXT: ret void 929; 930 call void @aligned_barrier() 931 store i32 0, ptr %p 932 ret void 933} 934define internal void @barrier_then_write_then_barrier2(ptr %p) { 935; MODULE-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier2 936; MODULE-SAME: (ptr [[P:%.*]]) { 937; MODULE-NEXT: store i32 0, ptr [[P]], align 4 938; MODULE-NEXT: call void @aligned_barrier() 939; MODULE-NEXT: ret void 940; 941; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier2 942; CGSCC-SAME: (ptr [[P:%.*]]) { 943; CGSCC-NEXT: call void @aligned_barrier() 944; CGSCC-NEXT: store i32 0, ptr [[P]], align 4 945; CGSCC-NEXT: call void @aligned_barrier() 946; CGSCC-NEXT: ret void 947; 948 call void @aligned_barrier() 949 store i32 0, ptr %p 950 call void @aligned_barrier() 951 ret void 952} 953define void @multiple_blocks_functions_non_kernel_effects_2(i1 %c0, i1 %c1, ptr %p) "kernel" { 954; MODULE-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_2 955; MODULE-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] { 956; MODULE-NEXT: call void @barrier_then_write_then_barrier2(ptr [[P]]) 957; MODULE-NEXT: store i32 0, ptr [[P]], align 4 958; MODULE-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]] 959; MODULE: t03: 960; MODULE-NEXT: call void @barrier_then_write2(ptr [[P]]) 961; MODULE-NEXT: br label [[T0B3:%.*]] 962; MODULE: t0b3: 963; MODULE-NEXT: call void @aligned_barrier() 964; MODULE-NEXT: br label [[M3:%.*]] 965; MODULE: f03: 966; MODULE-NEXT: call void @aligned_barrier() 967; MODULE-NEXT: call void @barrier_then_write2(ptr [[P]]) 968; MODULE-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]] 969; MODULE: t13: 970; MODULE-NEXT: call void @aligned_barrier() 971; MODULE-NEXT: br label [[M3]] 972; MODULE: f13: 973; MODULE-NEXT: call void @aligned_barrier() 974; MODULE-NEXT: br label [[M3]] 975; MODULE: m3: 976; MODULE-NEXT: call void @write_then_barrier2(ptr [[P]]) 977; MODULE-NEXT: store i32 0, ptr [[P]], align 4 978; MODULE-NEXT: ret void 979; 980; CGSCC-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_2 981; CGSCC-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] { 982; CGSCC-NEXT: call void @barrier_then_write_then_barrier2(ptr [[P]]) 983; CGSCC-NEXT: call void @aligned_barrier() 984; CGSCC-NEXT: store i32 0, ptr [[P]], align 4 985; CGSCC-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]] 986; CGSCC: t03: 987; CGSCC-NEXT: call void @barrier_then_write2(ptr [[P]]) 988; CGSCC-NEXT: br label [[T0B3:%.*]] 989; CGSCC: t0b3: 990; CGSCC-NEXT: call void @aligned_barrier() 991; CGSCC-NEXT: br label [[M3:%.*]] 992; CGSCC: f03: 993; CGSCC-NEXT: call void @aligned_barrier() 994; CGSCC-NEXT: call void @barrier_then_write2(ptr [[P]]) 995; CGSCC-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]] 996; CGSCC: t13: 997; CGSCC-NEXT: call void @aligned_barrier() 998; CGSCC-NEXT: br label [[M3]] 999; CGSCC: f13: 1000; CGSCC-NEXT: call void @aligned_barrier() 1001; CGSCC-NEXT: br label [[M3]] 1002; CGSCC: m3: 1003; CGSCC-NEXT: call void @write_then_barrier2(ptr [[P]]) 1004; CGSCC-NEXT: store i32 0, ptr [[P]], align 4 1005; CGSCC-NEXT: ret void 1006; 1007 call void @barrier_then_write_then_barrier2(ptr %p) 1008 call void @aligned_barrier() 1009 store i32 0, ptr %p 1010 br i1 %c0, label %t03, label %f03 1011t03: 1012 call void @barrier_then_write2(ptr %p) 1013 br label %t0b3 1014t0b3: 1015 call void @aligned_barrier() 1016 br label %m3 1017f03: 1018 call void @aligned_barrier() 1019 call void @barrier_then_write2(ptr %p) 1020 br i1 %c1, label %t13, label %f13 1021t13: 1022 call void @aligned_barrier() 1023 br label %m3 1024f13: 1025 call void @aligned_barrier() 1026 br label %m3 1027m3: 1028 call void @aligned_barrier() 1029 call void @write_then_barrier2(ptr %p) 1030 store i32 0, ptr %p 1031 ret void 1032} 1033 1034; Verify we do not remove the barrier in the callee. 1035define internal void @callee_barrier() { 1036; CHECK-LABEL: define {{[^@]+}}@callee_barrier() { 1037; CHECK-NEXT: call void @aligned_barrier() 1038; CHECK-NEXT: ret void 1039; 1040 call void @aligned_barrier() 1041 ret void 1042} 1043define amdgpu_kernel void @caller_barrier1() "kernel" { 1044; CHECK-LABEL: define {{[^@]+}}@caller_barrier1 1045; CHECK-SAME: () #[[ATTR4]] { 1046; CHECK-NEXT: call void @callee_barrier() 1047; CHECK-NEXT: ret void 1048; 1049 call void @aligned_barrier() 1050 call void @callee_barrier() 1051 call void @aligned_barrier() 1052 ret void 1053} 1054define amdgpu_kernel void @caller_barrier2() "kernel" { 1055; CHECK-LABEL: define {{[^@]+}}@caller_barrier2 1056; CHECK-SAME: () #[[ATTR4]] { 1057; CHECK-NEXT: call void @unknown() 1058; CHECK-NEXT: call void @callee_barrier() 1059; CHECK-NEXT: call void @unknown() 1060; CHECK-NEXT: ret void 1061; 1062 call void @unknown() 1063 call void @callee_barrier() 1064 call void @unknown() 1065 ret void 1066} 1067 1068define amdgpu_kernel void @loop_barrier() "kernel" { 1069; CHECK-LABEL: define {{[^@]+}}@loop_barrier 1070; CHECK-SAME: () #[[ATTR4]] { 1071; CHECK-NEXT: entry: 1072; CHECK-NEXT: br label [[LOOP:%.*]] 1073; CHECK: loop: 1074; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] 1075; CHECK-NEXT: call void @unknown() 1076; CHECK-NEXT: call void @aligned_barrier() 1077; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 1078; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128 1079; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] 1080; CHECK: exit: 1081; CHECK-NEXT: ret void 1082; 1083entry: 1084 br label %loop 1085 1086loop: 1087 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] 1088 call void @unknown() 1089 call void @aligned_barrier() 1090 %i.next = add nuw nsw i32 %i, 1 1091 %cond = icmp ne i32 %i.next, 128 1092 br i1 %cond, label %loop, label %exit 1093 1094exit: 1095 ret void 1096} 1097 1098define amdgpu_kernel void @loop_barrier_end_barriers() "kernel" { 1099; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers 1100; CHECK-SAME: () #[[ATTR4]] { 1101; CHECK-NEXT: entry: 1102; CHECK-NEXT: br label [[LOOP:%.*]] 1103; CHECK: loop: 1104; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] 1105; CHECK-NEXT: call void @unknown() 1106; CHECK-NEXT: call void @aligned_barrier() 1107; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 1108; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128 1109; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] 1110; CHECK: exit: 1111; CHECK-NEXT: ret void 1112; 1113entry: 1114 br label %loop 1115 1116loop: 1117 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] 1118 call void @unknown() 1119 call void @aligned_barrier() 1120 %i.next = add nuw nsw i32 %i, 1 1121 %cond = icmp ne i32 %i.next, 128 1122 br i1 %cond, label %loop, label %exit 1123 1124exit: 1125 call void @aligned_barrier() 1126 call void @aligned_barrier() 1127 call void @aligned_barrier() 1128 call void @aligned_barrier() 1129 ret void 1130} 1131 1132define amdgpu_kernel void @loop_barrier_end_barriers_unknown() "kernel" { 1133; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers_unknown 1134; CHECK-SAME: () #[[ATTR4]] { 1135; CHECK-NEXT: entry: 1136; CHECK-NEXT: br label [[LOOP:%.*]] 1137; CHECK: loop: 1138; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] 1139; CHECK-NEXT: call void @unknown() 1140; CHECK-NEXT: call void @aligned_barrier() 1141; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 1142; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128 1143; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] 1144; CHECK: exit: 1145; CHECK-NEXT: call void @unknown() 1146; CHECK-NEXT: ret void 1147; 1148entry: 1149 br label %loop 1150 1151loop: 1152 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] 1153 call void @unknown() 1154 call void @aligned_barrier() 1155 %i.next = add nuw nsw i32 %i, 1 1156 %cond = icmp ne i32 %i.next, 128 1157 br i1 %cond, label %loop, label %exit 1158 1159exit: 1160 call void @aligned_barrier() 1161 call void @aligned_barrier() 1162 call void @unknown() 1163 call void @aligned_barrier() 1164 call void @aligned_barrier() 1165 ret void 1166} 1167 1168define amdgpu_kernel void @loop_barrier_store() "kernel" { 1169; CHECK-LABEL: define {{[^@]+}}@loop_barrier_store 1170; CHECK-SAME: () #[[ATTR4]] { 1171; CHECK-NEXT: entry: 1172; CHECK-NEXT: br label [[LOOP:%.*]] 1173; CHECK: loop: 1174; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] 1175; CHECK-NEXT: store i32 [[I]], ptr @G1, align 4 1176; CHECK-NEXT: call void @aligned_barrier() 1177; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 1178; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128 1179; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] 1180; CHECK: exit: 1181; CHECK-NEXT: ret void 1182; 1183entry: 1184 br label %loop 1185 1186loop: 1187 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] 1188 store i32 %i, ptr @G1 1189 call void @aligned_barrier() 1190 %i.next = add nuw nsw i32 %i, 1 1191 %cond = icmp ne i32 %i.next, 128 1192 br i1 %cond, label %loop, label %exit 1193 1194exit: 1195 ret void 1196} 1197 1198define amdgpu_kernel void @loop_barrier_end_barriers_store() "kernel" { 1199; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers_store 1200; CHECK-SAME: () #[[ATTR4]] { 1201; CHECK-NEXT: entry: 1202; CHECK-NEXT: br label [[LOOP:%.*]] 1203; CHECK: loop: 1204; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] 1205; CHECK-NEXT: store i32 [[I]], ptr @G1, align 4 1206; CHECK-NEXT: call void @aligned_barrier() 1207; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 1208; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128 1209; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] 1210; CHECK: exit: 1211; CHECK-NEXT: store i32 [[I_NEXT]], ptr @G1, align 4 1212; CHECK-NEXT: ret void 1213; 1214entry: 1215 br label %loop 1216 1217loop: 1218 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] 1219 store i32 %i, ptr @G1 1220 call void @aligned_barrier() 1221 %i.next = add nuw nsw i32 %i, 1 1222 %cond = icmp ne i32 %i.next, 128 1223 br i1 %cond, label %loop, label %exit 1224 1225exit: 1226 call void @aligned_barrier() 1227 call void @aligned_barrier() 1228 store i32 %i.next, ptr @G1 1229 call void @aligned_barrier() 1230 call void @aligned_barrier() 1231 ret void 1232} 1233 1234!llvm.module.flags = !{!16,!15} 1235 1236!15 = !{i32 7, !"openmp", i32 50} 1237!16 = !{i32 7, !"openmp-device", i32 50} 1238;. 1239; MODULE: attributes #[[ATTR0:[0-9]+]] = { "llvm.assume"="ompx_aligned_barrier" } 1240; MODULE: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nounwind } 1241; MODULE: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } 1242; MODULE: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } 1243; MODULE: attributes #[[ATTR4]] = { "kernel" } 1244; MODULE: attributes #[[ATTR5]] = { nosync memory(none) } 1245;. 1246; CGSCC: attributes #[[ATTR0]] = { "llvm.assume"="ompx_aligned_barrier" } 1247; CGSCC: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nounwind } 1248; CGSCC: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } 1249; CGSCC: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } 1250; CGSCC: attributes #[[ATTR4]] = { "kernel" } 1251; CGSCC: attributes #[[ATTR5]] = { nosync memory(none) } 1252;. 1253; MODULE: [[META0:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 1254; MODULE: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 50} 1255;. 1256; CGSCC: [[META0:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 1257; CGSCC: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 50} 1258;. 1259