1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals 2; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT 3; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC 4 5target triple = "amdgcn-amd-amdhsa" 6 7%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 } 8%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } 9 10@G = internal addrspace(3) global i32 undef, align 4 11@H = internal addrspace(3) global i32 undef, align 4 12@X = internal addrspace(3) global i32 undef, align 4 13@QA1 = internal addrspace(3) global i32 undef, align 4 14@QB1 = internal addrspace(3) global i32 undef, align 4 15@QC1 = internal addrspace(3) global i32 undef, align 4 16@QD1 = internal addrspace(3) global i32 undef, align 4 17@QA2 = internal addrspace(3) global i32 undef, align 4 18@QB2 = internal addrspace(3) global i32 undef, align 4 19@QC2 = internal addrspace(3) global i32 undef, align 4 20@QD2 = internal addrspace(3) global i32 undef, align 4 21@QA3 = internal addrspace(3) global i32 undef, align 4 22@QB3 = internal addrspace(3) global i32 undef, align 4 23@QC3 = internal addrspace(3) global i32 undef, align 4 24@QD3 = internal addrspace(3) global i32 undef, align 4 25@UAA1 = internal addrspace(3) global i32 undef, align 4 26@UAA2 = internal addrspace(3) global i32 undef, align 4 27@UAA3 = internal addrspace(3) global i32 undef, align 4 28@UANA1 = internal addrspace(3) global i32 undef, align 4 29@str = private unnamed_addr addrspace(4) constant [1 x i8] c"\00", align 1 30@kernel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null } 31 32; Make sure we do not delete the stores to @G without also replacing the load with `1`. 33;. 34; CHECK: @G = internal addrspace(3) global i32 undef, align 4 35; CHECK: @H = internal addrspace(3) global i32 undef, align 4 36; CHECK: @X = internal addrspace(3) global i32 undef, align 4 37; CHECK: @QA1 = internal addrspace(3) global i32 undef, align 4 38; CHECK: @QB1 = internal addrspace(3) global i32 undef, align 4 39; CHECK: @QC1 = internal addrspace(3) global i32 undef, align 4 40; CHECK: @QD1 = internal addrspace(3) global i32 undef, align 4 41; CHECK: @QA2 = internal addrspace(3) global i32 undef, align 4 42; CHECK: @QB2 = internal addrspace(3) global i32 undef, align 4 43; CHECK: @QC2 = internal addrspace(3) global i32 undef, align 4 44; CHECK: @QD2 = internal addrspace(3) global i32 undef, align 4 45; CHECK: @QA3 = internal addrspace(3) global i32 undef, align 4 46; CHECK: @QB3 = internal addrspace(3) global i32 undef, align 4 47; CHECK: @QC3 = internal addrspace(3) global i32 undef, align 4 48; CHECK: @QD3 = internal addrspace(3) global i32 undef, align 4 49; CHECK: @UAA1 = internal addrspace(3) global i32 undef, align 4 50; CHECK: @UAA2 = internal addrspace(3) global i32 undef, align 4 51; CHECK: @UAA3 = internal addrspace(3) global i32 undef, align 4 52; CHECK: @UANA1 = internal addrspace(3) global i32 undef, align 4 53; CHECK: @str = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1 54; CHECK: @kernel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null } 55;. 56define amdgpu_kernel void @kernel(ptr %dyn) "kernel" { 57; 58; TUNIT: Function Attrs: norecurse 59; TUNIT-LABEL: define {{[^@]+}}@kernel 60; TUNIT-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { 61; TUNIT-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment, ptr [[DYN]]) 62; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1 63; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] 64; TUNIT: if.then: 65; TUNIT-NEXT: br label [[IF_MERGE:%.*]] 66; TUNIT: if.else: 67; TUNIT-NEXT: call void @barrier() #[[ATTR6:[0-9]+]] 68; TUNIT-NEXT: call void @use1(i32 1) #[[ATTR7:[0-9]+]] 69; TUNIT-NEXT: call void @llvm.assume(i1 true) 70; TUNIT-NEXT: call void @barrier() #[[ATTR6]] 71; TUNIT-NEXT: br label [[IF_MERGE]] 72; TUNIT: if.merge: 73; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]] 74; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]] 75; TUNIT: if.then2: 76; TUNIT-NEXT: call void @barrier() #[[ATTR6]] 77; TUNIT-NEXT: br label [[IF_END]] 78; TUNIT: if.end: 79; TUNIT-NEXT: call void @__kmpc_target_deinit() 80; TUNIT-NEXT: ret void 81; 82; CGSCC: Function Attrs: norecurse 83; CGSCC-LABEL: define {{[^@]+}}@kernel 84; CGSCC-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { 85; CGSCC-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment, ptr [[DYN]]) 86; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1 87; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] 88; CGSCC: if.then: 89; CGSCC-NEXT: br label [[IF_MERGE:%.*]] 90; CGSCC: if.else: 91; CGSCC-NEXT: call void @barrier() #[[ATTR6:[0-9]+]] 92; CGSCC-NEXT: call void @use1(i32 1) #[[ATTR6]] 93; CGSCC-NEXT: call void @llvm.assume(i1 true) 94; CGSCC-NEXT: call void @barrier() #[[ATTR6]] 95; CGSCC-NEXT: br label [[IF_MERGE]] 96; CGSCC: if.merge: 97; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]] 98; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]] 99; CGSCC: if.then2: 100; CGSCC-NEXT: call void @barrier() #[[ATTR6]] 101; CGSCC-NEXT: br label [[IF_END]] 102; CGSCC: if.end: 103; CGSCC-NEXT: call void @__kmpc_target_deinit() 104; CGSCC-NEXT: ret void 105; 106 %call = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment, ptr %dyn) 107 %cmp = icmp eq i32 %call, -1 108 br i1 %cmp, label %if.then, label %if.else 109if.then: 110 store i32 1, ptr addrspace(3) @G 111 store i32 2, ptr addrspace(3) @H 112 br label %if.merge 113if.else: 114 call void @barrier(); 115 %l = load i32, ptr addrspace(3) @G 116 call void @use1(i32 %l) 117 %hv = load i32, ptr addrspace(3) @H 118 %hc = icmp eq i32 %hv, 2 119 call void @llvm.assume(i1 %hc) 120 call void @barrier(); 121 br label %if.merge 122if.merge: 123 %hreload = load i32, ptr addrspace(3) @H 124 call void @use1(i32 %hreload) 125 br i1 %cmp, label %if.then2, label %if.end 126if.then2: 127 store i32 2, ptr addrspace(3) @G 128 call void @barrier(); 129 br label %if.end 130if.end: 131 call void @__kmpc_target_deinit() 132 ret void 133} 134 135define void @test_assume() { 136; CHECK-LABEL: define {{[^@]+}}@test_assume() { 137; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr addrspacecast (ptr addrspace(4) @str to ptr), null 138; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 139; CHECK-NEXT: ret void 140; 141 %cmp = icmp ne ptr addrspacecast (ptr addrspace(4) @str to ptr), null 142 call void @llvm.assume(i1 %cmp) 143 ret void 144} 145 146; We can't ignore the sync, hence this might store 2 into %p 147define amdgpu_kernel void @kernel2(ptr %p) "kernel" { 148; CHECK-LABEL: define {{[^@]+}}@kernel2 149; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR1:[0-9]+]] { 150; CHECK-NEXT: store i32 1, ptr addrspace(3) @X, align 4 151; CHECK-NEXT: call void @sync() 152; CHECK-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4 153; CHECK-NEXT: store i32 2, ptr addrspace(3) @X, align 4 154; CHECK-NEXT: store i32 [[V]], ptr [[P]], align 4 155; CHECK-NEXT: ret void 156; 157 store i32 1, ptr addrspace(3) @X 158 call void @sync() 159 %v = load i32, ptr addrspace(3) @X 160 store i32 2, ptr addrspace(3) @X 161 store i32 %v, ptr %p 162 ret void 163} 164 165; We can't ignore the sync, hence this might store 2 into %p 166define amdgpu_kernel void @kernel3(ptr %p) "kernel" { 167; TUNIT-LABEL: define {{[^@]+}}@kernel3 168; TUNIT-SAME: (ptr [[P:%.*]]) #[[ATTR1]] { 169; TUNIT-NEXT: store i32 1, ptr addrspace(3) @X, align 4 170; TUNIT-NEXT: call void @sync_def.internalized() 171; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4 172; TUNIT-NEXT: store i32 2, ptr addrspace(3) @X, align 4 173; TUNIT-NEXT: store i32 [[V]], ptr [[P]], align 4 174; TUNIT-NEXT: ret void 175; 176; CGSCC-LABEL: define {{[^@]+}}@kernel3 177; CGSCC-SAME: (ptr [[P:%.*]]) #[[ATTR1]] { 178; CGSCC-NEXT: store i32 1, ptr addrspace(3) @X, align 4 179; CGSCC-NEXT: call void @sync_def() 180; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4 181; CGSCC-NEXT: store i32 2, ptr addrspace(3) @X, align 4 182; CGSCC-NEXT: store i32 [[V]], ptr [[P]], align 4 183; CGSCC-NEXT: ret void 184; 185 store i32 1, ptr addrspace(3) @X 186 call void @sync_def() 187 %v = load i32, ptr addrspace(3) @X 188 store i32 2, ptr addrspace(3) @X 189 store i32 %v, ptr %p 190 ret void 191} 192 193define void @sync_def() { 194; CHECK-LABEL: define {{[^@]+}}@sync_def() { 195; CHECK-NEXT: call void @sync() 196; CHECK-NEXT: ret void 197; 198 call void @sync() 199 ret void 200} 201 202define amdgpu_kernel void @kernel4a1(i1 %c) "kernel" { 203; TUNIT-LABEL: define {{[^@]+}}@kernel4a1 204; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { 205; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QA1, align 4 206; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 207; TUNIT: L: 208; TUNIT-NEXT: call void @sync() 209; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QA1, align 4 210; TUNIT-NEXT: call void @use1(i32 [[V]]) #[[ATTR7]] 211; TUNIT-NEXT: ret void 212; TUNIT: S: 213; TUNIT-NEXT: store i32 2, ptr addrspace(3) @QA1, align 4 214; TUNIT-NEXT: call void @sync() 215; TUNIT-NEXT: ret void 216; 217; CGSCC-LABEL: define {{[^@]+}}@kernel4a1 218; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { 219; CGSCC-NEXT: store i32 0, ptr addrspace(3) @QA1, align 4 220; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 221; CGSCC: L: 222; CGSCC-NEXT: call void @sync() 223; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QA1, align 4 224; CGSCC-NEXT: call void @use1(i32 [[V]]) #[[ATTR6]] 225; CGSCC-NEXT: ret void 226; CGSCC: S: 227; CGSCC-NEXT: store i32 2, ptr addrspace(3) @QA1, align 4 228; CGSCC-NEXT: call void @sync() 229; CGSCC-NEXT: ret void 230; 231 store i32 0, ptr addrspace(3) @QA1 232 br i1 %c, label %S, label %L 233L: 234 call void @sync(); 235 %v = load i32, ptr addrspace(3) @QA1 236 call void @use1(i32 %v) 237 ret void 238S: 239 store i32 2, ptr addrspace(3) @QA1 240 call void @sync(); 241 ret void 242} 243 244; We should not replace the load or delete the second store. 245define amdgpu_kernel void @kernel4b1(i1 %c) "kernel" { 246; TUNIT-LABEL: define {{[^@]+}}@kernel4b1 247; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { 248; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QB1, align 4 249; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 250; TUNIT: L: 251; TUNIT-NEXT: call void @sync() 252; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QB1, align 4 253; TUNIT-NEXT: call void @use1(i32 [[V]]) #[[ATTR7]] 254; TUNIT-NEXT: ret void 255; TUNIT: S: 256; TUNIT-NEXT: store i32 2, ptr addrspace(3) @QB1, align 4 257; TUNIT-NEXT: ret void 258; 259; CGSCC-LABEL: define {{[^@]+}}@kernel4b1 260; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { 261; CGSCC-NEXT: store i32 0, ptr addrspace(3) @QB1, align 4 262; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 263; CGSCC: L: 264; CGSCC-NEXT: call void @sync() 265; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QB1, align 4 266; CGSCC-NEXT: call void @use1(i32 [[V]]) #[[ATTR6]] 267; CGSCC-NEXT: ret void 268; CGSCC: S: 269; CGSCC-NEXT: store i32 2, ptr addrspace(3) @QB1, align 4 270; CGSCC-NEXT: ret void 271; 272 store i32 0, ptr addrspace(3) @QB1 273 br i1 %c, label %S, label %L 274L: 275 call void @sync(); 276 %v = load i32, ptr addrspace(3) @QB1 277 call void @use1(i32 %v) 278 ret void 279S: 280 store i32 2, ptr addrspace(3) @QB1 281 ret void 282} 283 284define amdgpu_kernel void @kernel4a2(i1 %c) "kernel" { 285; TUNIT-LABEL: define {{[^@]+}}@kernel4a2 286; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { 287; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 288; TUNIT: L: 289; TUNIT-NEXT: call void @sync() 290; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]] 291; TUNIT-NEXT: ret void 292; TUNIT: S: 293; TUNIT-NEXT: call void @sync() 294; TUNIT-NEXT: ret void 295; 296; CGSCC-LABEL: define {{[^@]+}}@kernel4a2 297; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { 298; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 299; CGSCC: L: 300; CGSCC-NEXT: call void @sync() 301; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]] 302; CGSCC-NEXT: ret void 303; CGSCC: S: 304; CGSCC-NEXT: call void @sync() 305; CGSCC-NEXT: ret void 306; 307 br i1 %c, label %S, label %L 308L: 309 call void @sync(); 310 %v = load i32, ptr addrspace(3) @QA2 311 call void @use1(i32 %v) 312 ret void 313S: 314 store i32 2, ptr addrspace(3) @QA2 315 call void @sync(); 316 ret void 317} 318 319; FIXME: We should not replace the load with undef. 320define amdgpu_kernel void @kernel4b2(i1 %c) "kernel" { 321; TUNIT-LABEL: define {{[^@]+}}@kernel4b2 322; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { 323; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 324; TUNIT: L: 325; TUNIT-NEXT: call void @sync() 326; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]] 327; TUNIT-NEXT: ret void 328; TUNIT: S: 329; TUNIT-NEXT: ret void 330; 331; CGSCC-LABEL: define {{[^@]+}}@kernel4b2 332; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { 333; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 334; CGSCC: L: 335; CGSCC-NEXT: call void @sync() 336; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]] 337; CGSCC-NEXT: ret void 338; CGSCC: S: 339; CGSCC-NEXT: ret void 340; 341 br i1 %c, label %S, label %L 342L: 343 call void @sync(); 344 %v = load i32, ptr addrspace(3) @QB2 345 call void @use1(i32 %v) 346 ret void 347S: 348 store i32 2, ptr addrspace(3) @QB2 349 ret void 350} 351 352define amdgpu_kernel void @kernel4a3(i1 %c) "kernel" { 353; TUNIT-LABEL: define {{[^@]+}}@kernel4a3 354; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { 355; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QA3, align 4 356; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 357; TUNIT: L: 358; TUNIT-NEXT: call void @sync() 359; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QA3, align 4 360; TUNIT-NEXT: call void @use1(i32 [[V]]) #[[ATTR7]] 361; TUNIT-NEXT: ret void 362; TUNIT: S: 363; TUNIT-NEXT: store i32 2, ptr addrspace(3) @QA3, align 4 364; TUNIT-NEXT: call void @sync() 365; TUNIT-NEXT: call void @sync() 366; TUNIT-NEXT: call void @sync() 367; TUNIT-NEXT: call void @sync() 368; TUNIT-NEXT: ret void 369; 370; CGSCC-LABEL: define {{[^@]+}}@kernel4a3 371; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { 372; CGSCC-NEXT: store i32 0, ptr addrspace(3) @QA3, align 4 373; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 374; CGSCC: L: 375; CGSCC-NEXT: call void @sync() 376; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QA3, align 4 377; CGSCC-NEXT: call void @use1(i32 [[V]]) #[[ATTR6]] 378; CGSCC-NEXT: ret void 379; CGSCC: S: 380; CGSCC-NEXT: store i32 2, ptr addrspace(3) @QA3, align 4 381; CGSCC-NEXT: call void @sync() 382; CGSCC-NEXT: call void @sync() 383; CGSCC-NEXT: call void @sync() 384; CGSCC-NEXT: call void @sync() 385; CGSCC-NEXT: ret void 386; 387 store i32 0, ptr addrspace(3) @QA3 388 br i1 %c, label %S, label %L 389L: 390 call void @sync(); 391 %v = load i32, ptr addrspace(3) @QA3 392 call void @use1(i32 %v) 393 ret void 394S: 395 store i32 2, ptr addrspace(3) @QA3 396 call void @sync(); 397 call void @sync(); 398 call void @sync(); 399 call void @sync(); 400 ret void 401} 402 403; The load of QB3 should not be simplified to 0. 404define amdgpu_kernel void @kernel4b3(i1 %c) "kernel" { 405; TUNIT-LABEL: define {{[^@]+}}@kernel4b3 406; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { 407; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QB3, align 4 408; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 409; TUNIT: L: 410; TUNIT-NEXT: call void @sync() 411; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QB3, align 4 412; TUNIT-NEXT: call void @use1(i32 [[V]]) #[[ATTR7]] 413; TUNIT-NEXT: ret void 414; TUNIT: S: 415; TUNIT-NEXT: store i32 2, ptr addrspace(3) @QB3, align 4 416; TUNIT-NEXT: call void @use1(i32 0) #[[ATTR7]] 417; TUNIT-NEXT: call void @use1(i32 1) #[[ATTR7]] 418; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]] 419; TUNIT-NEXT: call void @use1(i32 3) #[[ATTR7]] 420; TUNIT-NEXT: ret void 421; 422; CGSCC-LABEL: define {{[^@]+}}@kernel4b3 423; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { 424; CGSCC-NEXT: store i32 0, ptr addrspace(3) @QB3, align 4 425; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 426; CGSCC: L: 427; CGSCC-NEXT: call void @sync() 428; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QB3, align 4 429; CGSCC-NEXT: call void @use1(i32 [[V]]) #[[ATTR6]] 430; CGSCC-NEXT: ret void 431; CGSCC: S: 432; CGSCC-NEXT: store i32 2, ptr addrspace(3) @QB3, align 4 433; CGSCC-NEXT: call void @use1(i32 0) #[[ATTR6]] 434; CGSCC-NEXT: call void @use1(i32 1) #[[ATTR6]] 435; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]] 436; CGSCC-NEXT: call void @use1(i32 3) #[[ATTR6]] 437; CGSCC-NEXT: ret void 438; 439 store i32 0, ptr addrspace(3) @QB3 440 br i1 %c, label %S, label %L 441L: 442 call void @sync(); 443 %v = load i32, ptr addrspace(3) @QB3 444 call void @use1(i32 %v) 445 ret void 446S: 447 store i32 2, ptr addrspace(3) @QB3 448 call void @use1(i32 0) 449 call void @use1(i32 1) 450 call void @use1(i32 2) 451 call void @use1(i32 3) 452 ret void 453} 454 455 456define amdgpu_kernel void @kernel4c1(i1 %c) "kernel" { 457; TUNIT: Function Attrs: norecurse 458; TUNIT-LABEL: define {{[^@]+}}@kernel4c1 459; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { 460; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 461; TUNIT: L: 462; TUNIT-NEXT: call void @use1(i32 0) #[[ATTR7]] 463; TUNIT-NEXT: ret void 464; TUNIT: S: 465; TUNIT-NEXT: ret void 466; 467; CGSCC: Function Attrs: norecurse 468; CGSCC-LABEL: define {{[^@]+}}@kernel4c1 469; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { 470; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 471; CGSCC: L: 472; CGSCC-NEXT: call void @use1(i32 0) #[[ATTR6]] 473; CGSCC-NEXT: ret void 474; CGSCC: S: 475; CGSCC-NEXT: ret void 476; 477 store i32 0, ptr addrspace(3) @QC1 478 br i1 %c, label %S, label %L 479L: 480 call void @barrier(); 481 %v = load i32, ptr addrspace(3) @QC1 482 call void @use1(i32 %v) 483 ret void 484S: 485 store i32 2, ptr addrspace(3) @QC1 486 call void @barrier(); 487 ret void 488} 489 490; We should not replace the load or delete the second store. 491define amdgpu_kernel void @kernel4d1(i1 %c) "kernel" { 492; TUNIT: Function Attrs: norecurse 493; TUNIT-LABEL: define {{[^@]+}}@kernel4d1 494; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { 495; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QD1, align 4 496; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 497; TUNIT: L: 498; TUNIT-NEXT: call void @barrier() #[[ATTR7]] 499; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QD1, align 4 500; TUNIT-NEXT: call void @use1(i32 [[V]]) #[[ATTR7]] 501; TUNIT-NEXT: ret void 502; TUNIT: S: 503; TUNIT-NEXT: store i32 2, ptr addrspace(3) @QD1, align 4 504; TUNIT-NEXT: ret void 505; 506; CGSCC: Function Attrs: norecurse 507; CGSCC-LABEL: define {{[^@]+}}@kernel4d1 508; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { 509; CGSCC-NEXT: store i32 0, ptr addrspace(3) @QD1, align 4 510; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 511; CGSCC: L: 512; CGSCC-NEXT: call void @barrier() #[[ATTR6]] 513; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QD1, align 4 514; CGSCC-NEXT: call void @use1(i32 [[V]]) #[[ATTR6]] 515; CGSCC-NEXT: ret void 516; CGSCC: S: 517; CGSCC-NEXT: store i32 2, ptr addrspace(3) @QD1, align 4 518; CGSCC-NEXT: ret void 519; 520 store i32 0, ptr addrspace(3) @QD1 521 br i1 %c, label %S, label %L 522L: 523 call void @barrier(); 524 %v = load i32, ptr addrspace(3) @QD1 525 call void @use1(i32 %v) 526 ret void 527S: 528 store i32 2, ptr addrspace(3) @QD1 529 ret void 530} 531 532define amdgpu_kernel void @kernel4c2(i1 %c) "kernel" { 533; TUNIT: Function Attrs: norecurse 534; TUNIT-LABEL: define {{[^@]+}}@kernel4c2 535; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { 536; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 537; TUNIT: L: 538; TUNIT-NEXT: call void @use1(i32 undef) #[[ATTR7]] 539; TUNIT-NEXT: ret void 540; TUNIT: S: 541; TUNIT-NEXT: ret void 542; 543; CGSCC: Function Attrs: norecurse 544; CGSCC-LABEL: define {{[^@]+}}@kernel4c2 545; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { 546; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 547; CGSCC: L: 548; CGSCC-NEXT: call void @use1(i32 undef) #[[ATTR6]] 549; CGSCC-NEXT: ret void 550; CGSCC: S: 551; CGSCC-NEXT: ret void 552; 553 br i1 %c, label %S, label %L 554L: 555 call void @barrier(); 556 %v = load i32, ptr addrspace(3) @QC2 557 call void @use1(i32 %v) 558 ret void 559S: 560 store i32 2, ptr addrspace(3) @QC2 561 call void @barrier(); 562 ret void 563} 564 565; We should not replace the load with undef. 566define amdgpu_kernel void @kernel4d2(i1 %c) "kernel" { 567; TUNIT: Function Attrs: norecurse 568; TUNIT-LABEL: define {{[^@]+}}@kernel4d2 569; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { 570; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 571; TUNIT: L: 572; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]] 573; TUNIT-NEXT: ret void 574; TUNIT: S: 575; TUNIT-NEXT: ret void 576; 577; CGSCC: Function Attrs: norecurse 578; CGSCC-LABEL: define {{[^@]+}}@kernel4d2 579; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { 580; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 581; CGSCC: L: 582; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]] 583; CGSCC-NEXT: ret void 584; CGSCC: S: 585; CGSCC-NEXT: ret void 586; 587 br i1 %c, label %S, label %L 588L: 589 call void @barrier(); 590 %v = load i32, ptr addrspace(3) @QD2 591 call void @use1(i32 %v) 592 ret void 593S: 594 store i32 2, ptr addrspace(3) @QD2 595 ret void 596} 597 598define amdgpu_kernel void @kernel4c3(i1 %c) "kernel" { 599; TUNIT: Function Attrs: norecurse 600; TUNIT-LABEL: define {{[^@]+}}@kernel4c3 601; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { 602; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 603; TUNIT: L: 604; TUNIT-NEXT: call void @use1(i32 undef) #[[ATTR7]] 605; TUNIT-NEXT: ret void 606; TUNIT: S: 607; TUNIT-NEXT: ret void 608; 609; CGSCC: Function Attrs: norecurse 610; CGSCC-LABEL: define {{[^@]+}}@kernel4c3 611; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { 612; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 613; CGSCC: L: 614; CGSCC-NEXT: call void @use1(i32 undef) #[[ATTR6]] 615; CGSCC-NEXT: ret void 616; CGSCC: S: 617; CGSCC-NEXT: ret void 618; 619 br i1 %c, label %S, label %L 620L: 621 call void @barrier(); 622 %v = load i32, ptr addrspace(3) @QC3 623 call void @use1(i32 %v) 624 ret void 625S: 626 store i32 2, ptr addrspace(3) @QC3 627 call void @barrier(); 628 ret void 629} 630 631; We should not replace the load with undef. 632define amdgpu_kernel void @kernel4d3(i1 %c) "kernel" { 633; TUNIT: Function Attrs: norecurse 634; TUNIT-LABEL: define {{[^@]+}}@kernel4d3 635; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { 636; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 637; TUNIT: L: 638; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]] 639; TUNIT-NEXT: ret void 640; TUNIT: S: 641; TUNIT-NEXT: ret void 642; 643; CGSCC: Function Attrs: norecurse 644; CGSCC-LABEL: define {{[^@]+}}@kernel4d3 645; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { 646; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 647; CGSCC: L: 648; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]] 649; CGSCC-NEXT: ret void 650; CGSCC: S: 651; CGSCC-NEXT: ret void 652; 653 br i1 %c, label %S, label %L 654L: 655 call void @barrier(); 656 %v = load i32, ptr addrspace(3) @QD3 657 call void @use1(i32 %v) 658 ret void 659S: 660 store i32 2, ptr addrspace(3) @QD3 661 ret void 662} 663 664define amdgpu_kernel void @kernel_unknown_and_aligned1(i1 %c) "kernel" { 665; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned1 666; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { 667; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 668; TUNIT: L: 669; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]] 670; TUNIT-NEXT: ret void 671; TUNIT: S: 672; TUNIT-NEXT: call void @sync() 673; TUNIT-NEXT: call void @barrier() #[[ATTR7]] 674; TUNIT-NEXT: call void @sync() 675; TUNIT-NEXT: ret void 676; 677; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned1 678; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { 679; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 680; CGSCC: L: 681; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]] 682; CGSCC-NEXT: ret void 683; CGSCC: S: 684; CGSCC-NEXT: call void @sync() 685; CGSCC-NEXT: call void @barrier() #[[ATTR6]] 686; CGSCC-NEXT: call void @sync() 687; CGSCC-NEXT: ret void 688; 689 br i1 %c, label %S, label %L 690L: 691 call void @barrier(); 692 %v = load i32, ptr addrspace(3) @UAA1 693 call void @use1(i32 %v) 694 ret void 695S: 696 call void @sync(); 697 store i32 2, ptr addrspace(3) @UAA1 698 call void @barrier(); 699 call void @sync(); 700 ret void 701} 702 703define amdgpu_kernel void @kernel_unknown_and_aligned2(i1 %c) "kernel" { 704; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned2 705; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { 706; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 707; TUNIT: L: 708; TUNIT-NEXT: call void @sync() 709; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]] 710; TUNIT-NEXT: ret void 711; TUNIT: S: 712; TUNIT-NEXT: call void @sync() 713; TUNIT-NEXT: call void @barrier() #[[ATTR7]] 714; TUNIT-NEXT: call void @sync() 715; TUNIT-NEXT: ret void 716; 717; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned2 718; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { 719; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 720; CGSCC: L: 721; CGSCC-NEXT: call void @sync() 722; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]] 723; CGSCC-NEXT: ret void 724; CGSCC: S: 725; CGSCC-NEXT: call void @sync() 726; CGSCC-NEXT: call void @barrier() #[[ATTR6]] 727; CGSCC-NEXT: call void @sync() 728; CGSCC-NEXT: ret void 729; 730 br i1 %c, label %S, label %L 731L: 732 call void @sync(); 733 %v = load i32, ptr addrspace(3) @UAA2 734 call void @use1(i32 %v) 735 ret void 736S: 737 call void @sync(); 738 store i32 2, ptr addrspace(3) @UAA2 739 call void @barrier(); 740 call void @sync(); 741 ret void 742} 743 744define amdgpu_kernel void @kernel_unknown_and_aligned3(i1 %c) "kernel" { 745; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned3 746; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { 747; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 748; TUNIT: L: 749; TUNIT-NEXT: call void @sync() 750; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]] 751; TUNIT-NEXT: call void @barrier() #[[ATTR7]] 752; TUNIT-NEXT: ret void 753; TUNIT: S: 754; TUNIT-NEXT: call void @sync() 755; TUNIT-NEXT: call void @sync() 756; TUNIT-NEXT: ret void 757; 758; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned3 759; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { 760; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 761; CGSCC: L: 762; CGSCC-NEXT: call void @sync() 763; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]] 764; CGSCC-NEXT: call void @barrier() #[[ATTR6]] 765; CGSCC-NEXT: ret void 766; CGSCC: S: 767; CGSCC-NEXT: call void @sync() 768; CGSCC-NEXT: call void @sync() 769; CGSCC-NEXT: ret void 770; 771 br i1 %c, label %S, label %L 772L: 773 call void @sync(); 774 %v = load i32, ptr addrspace(3) @UAA3 775 call void @use1(i32 %v) 776 call void @barrier(); 777 ret void 778S: 779 call void @sync(); 780 store i32 2, ptr addrspace(3) @UAA3 781 call void @sync(); 782 ret void 783} 784 785define amdgpu_kernel void @kernel_unknown_and_not_aligned1(i1 %c) "kernel" { 786; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_not_aligned1 787; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { 788; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 789; TUNIT: L: 790; TUNIT-NEXT: call void @sync() 791; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]] 792; TUNIT-NEXT: ret void 793; TUNIT: S: 794; TUNIT-NEXT: call void @sync() 795; TUNIT-NEXT: call void @sync() 796; TUNIT-NEXT: ret void 797; 798; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_not_aligned1 799; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { 800; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] 801; CGSCC: L: 802; CGSCC-NEXT: call void @sync() 803; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]] 804; CGSCC-NEXT: ret void 805; CGSCC: S: 806; CGSCC-NEXT: call void @sync() 807; CGSCC-NEXT: call void @sync() 808; CGSCC-NEXT: ret void 809; 810 br i1 %c, label %S, label %L 811L: 812 call void @sync(); 813 %v = load i32, ptr addrspace(3) @UANA1 814 call void @use1(i32 %v) 815 ret void 816S: 817 call void @sync(); 818 store i32 2, ptr addrspace(3) @UANA1 819 call void @sync(); 820 ret void 821} 822 823declare void @sync() 824declare void @barrier() norecurse nounwind nocallback "llvm.assume"="ompx_aligned_barrier" 825declare void @use1(i32) nosync norecurse nounwind nocallback 826declare i32 @__kmpc_target_init(ptr, ptr) nocallback 827declare void @__kmpc_target_deinit() nocallback 828declare void @llvm.assume(i1) 829 830!llvm.module.flags = !{!0, !1} 831 832!0 = !{i32 7, !"openmp", i32 50} 833!1 = !{i32 7, !"openmp-device", i32 50} 834 835;. 836; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" } 837; TUNIT: attributes #[[ATTR1]] = { "kernel" } 838; TUNIT: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nounwind "llvm.assume"="ompx_aligned_barrier" } 839; TUNIT: attributes #[[ATTR3:[0-9]+]] = { nocallback norecurse nosync nounwind } 840; TUNIT: attributes #[[ATTR4:[0-9]+]] = { nocallback } 841; TUNIT: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } 842; TUNIT: attributes #[[ATTR6]] = { nounwind "llvm.assume"="ompx_aligned_barrier" } 843; TUNIT: attributes #[[ATTR7]] = { nounwind } 844;. 845; CGSCC: attributes #[[ATTR0]] = { norecurse "kernel" } 846; CGSCC: attributes #[[ATTR1]] = { "kernel" } 847; CGSCC: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nounwind "llvm.assume"="ompx_aligned_barrier" } 848; CGSCC: attributes #[[ATTR3:[0-9]+]] = { nocallback norecurse nosync nounwind } 849; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nocallback } 850; CGSCC: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } 851; CGSCC: attributes #[[ATTR6]] = { nounwind } 852;. 853; TUNIT: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} 854; TUNIT: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 855;. 856; CGSCC: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} 857; CGSCC: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 858;. 859