1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=instcombine -S -o - %s | FileCheck %s 3 4; Check that we don't replace `atomicrmw <op> LHS, 0` with `load atomic LHS`. 5; Doing that would lose the store semantic of the `atomicrmw` operation. 6; This may enable some other optimizations that would otherwise be illegal when 7; the store semantic was present (e.g., like dropping a fence). 8 9; Idempotent atomicrmw are still canonicalized. 10define i32 @atomic_add_zero(ptr %addr) { 11; CHECK-LABEL: @atomic_add_zero( 12; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 0 monotonic, align 4 13; CHECK-NEXT: ret i32 [[RES]] 14; 15 %res = atomicrmw add ptr %addr, i32 0 monotonic 16 ret i32 %res 17} 18 19define i32 @atomic_or_zero(ptr %addr) { 20; CHECK-LABEL: @atomic_or_zero( 21; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 0 monotonic, align 4 22; CHECK-NEXT: ret i32 [[RES]] 23; 24 %res = atomicrmw or ptr %addr, i32 0 monotonic 25 ret i32 %res 26} 27 28; Idempotent atomicrmw are still canonicalized. 29define i32 @atomic_sub_zero(ptr %addr) { 30; CHECK-LABEL: @atomic_sub_zero( 31; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 0 monotonic, align 4 32; CHECK-NEXT: ret i32 [[RES]] 33; 34 %res = atomicrmw sub ptr %addr, i32 0 monotonic 35 ret i32 %res 36} 37 38; Idempotent atomicrmw are still canonicalized. 39define i32 @atomic_and_allones(ptr %addr) { 40; CHECK-LABEL: @atomic_and_allones( 41; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 0 monotonic, align 4 42; CHECK-NEXT: ret i32 [[RES]] 43; 44 %res = atomicrmw and ptr %addr, i32 -1 monotonic 45 ret i32 %res 46} 47 48; Idempotent atomicrmw are still canonicalized. 49define i32 @atomic_umin_uint_max(ptr %addr) { 50; CHECK-LABEL: @atomic_umin_uint_max( 51; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 0 monotonic, align 4 52; CHECK-NEXT: ret i32 [[RES]] 53; 54 %res = atomicrmw umin ptr %addr, i32 -1 monotonic 55 ret i32 %res 56} 57 58; Idempotent atomicrmw are still canonicalized. 59define i32 @atomic_umax_zero(ptr %addr) { 60; CHECK-LABEL: @atomic_umax_zero( 61; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 0 monotonic, align 4 62; CHECK-NEXT: ret i32 [[RES]] 63; 64 %res = atomicrmw umax ptr %addr, i32 0 monotonic 65 ret i32 %res 66} 67 68; Idempotent atomicrmw are still canonicalized. 69define i8 @atomic_min_smax_char(ptr %addr) { 70; CHECK-LABEL: @atomic_min_smax_char( 71; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 monotonic, align 1 72; CHECK-NEXT: ret i8 [[RES]] 73; 74 %res = atomicrmw min ptr %addr, i8 127 monotonic 75 ret i8 %res 76} 77 78; Idempotent atomicrmw are still canonicalized. 79define i8 @atomic_max_smin_char(ptr %addr) { 80; CHECK-LABEL: @atomic_max_smin_char( 81; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 monotonic, align 1 82; CHECK-NEXT: ret i8 [[RES]] 83; 84 %res = atomicrmw max ptr %addr, i8 -128 monotonic 85 ret i8 %res 86} 87 88; Idempotent atomicrmw are still canonicalized. 89define float @atomic_fsub_zero(ptr %addr) { 90; CHECK-LABEL: @atomic_fsub_zero( 91; CHECK-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float -0.000000e+00 monotonic, align 4 92; CHECK-NEXT: ret float [[RES]] 93; 94 %res = atomicrmw fsub ptr %addr, float 0.0 monotonic 95 ret float %res 96} 97 98define float @atomic_fadd_zero(ptr %addr) { 99; CHECK-LABEL: @atomic_fadd_zero( 100; CHECK-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float -0.000000e+00 monotonic, align 4 101; CHECK-NEXT: ret float [[RES]] 102; 103 %res = atomicrmw fadd ptr %addr, float -0.0 monotonic 104 ret float %res 105} 106 107; Idempotent atomicrmw are still canonicalized. 108define float @atomic_fsub_canon(ptr %addr) { 109; CHECK-LABEL: @atomic_fsub_canon( 110; CHECK-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float -0.000000e+00 release, align 4 111; CHECK-NEXT: ret float [[RES]] 112; 113 %res = atomicrmw fsub ptr %addr, float 0.0 release 114 ret float %res 115} 116 117define float @atomic_fadd_canon(ptr %addr) { 118; CHECK-LABEL: @atomic_fadd_canon( 119; CHECK-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float -0.000000e+00 release, align 4 120; CHECK-NEXT: ret float [[RES]] 121; 122 %res = atomicrmw fadd ptr %addr, float -0.0 release 123 ret float %res 124} 125 126; Can't replace a volatile w/a load; this would eliminate a volatile store. 127define i64 @atomic_sub_zero_volatile(ptr %addr) { 128; CHECK-LABEL: @atomic_sub_zero_volatile( 129; CHECK-NEXT: [[RES:%.*]] = atomicrmw volatile sub ptr [[ADDR:%.*]], i64 0 acquire, align 8 130; CHECK-NEXT: ret i64 [[RES]] 131; 132 %res = atomicrmw volatile sub ptr %addr, i64 0 acquire 133 ret i64 %res 134} 135 136 137; Check that the transformation properly preserve the syncscope. 138; Idempotent atomicrmw are still canonicalized. 139define i16 @atomic_syncscope(ptr %addr) { 140; CHECK-LABEL: @atomic_syncscope( 141; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i16 0 syncscope("some_syncscope") acquire, align 2 142; CHECK-NEXT: ret i16 [[RES]] 143; 144 %res = atomicrmw or ptr %addr, i16 0 syncscope("some_syncscope") acquire 145 ret i16 %res 146} 147 148; By eliminating the store part of the atomicrmw, we would get rid of the 149; release semantic, which is incorrect. We can canonicalize the operation. 150define i16 @atomic_seq_cst(ptr %addr) { 151; CHECK-LABEL: @atomic_seq_cst( 152; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i16 0 seq_cst, align 2 153; CHECK-NEXT: ret i16 [[RES]] 154; 155 %res = atomicrmw add ptr %addr, i16 0 seq_cst 156 ret i16 %res 157} 158 159; Check that the transformation does not apply when the value is changed by 160; the atomic operation (non zero constant). 161define i16 @atomic_add_non_zero(ptr %addr) { 162; CHECK-LABEL: @atomic_add_non_zero( 163; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr [[ADDR:%.*]], i16 2 monotonic, align 2 164; CHECK-NEXT: ret i16 [[RES]] 165; 166 %res = atomicrmw add ptr %addr, i16 2 monotonic 167 ret i16 %res 168} 169 170; Idempotent atomicrmw are still canonicalized. 171define i16 @atomic_xor_zero(ptr %addr) { 172; CHECK-LABEL: @atomic_xor_zero( 173; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i16 0 monotonic, align 2 174; CHECK-NEXT: ret i16 [[RES]] 175; 176 %res = atomicrmw xor ptr %addr, i16 0 monotonic 177 ret i16 %res 178} 179 180; Check that the transformation does not apply when the ordering is 181; incompatible with a load (release). Do canonicalize. 182define i16 @atomic_release(ptr %addr) { 183; CHECK-LABEL: @atomic_release( 184; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i16 0 release, align 2 185; CHECK-NEXT: ret i16 [[RES]] 186; 187 %res = atomicrmw sub ptr %addr, i16 0 release 188 ret i16 %res 189} 190 191; Check that the transformation does not apply when the ordering is 192; incompatible with a load (acquire, release). Do canonicalize. 193define i16 @atomic_acq_rel(ptr %addr) { 194; CHECK-LABEL: @atomic_acq_rel( 195; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i16 0 acq_rel, align 2 196; CHECK-NEXT: ret i16 [[RES]] 197; 198 %res = atomicrmw xor ptr %addr, i16 0 acq_rel 199 ret i16 %res 200} 201 202define i32 @sat_or_allones(ptr %addr) { 203; CHECK-LABEL: @sat_or_allones( 204; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i32 -1 monotonic, align 4 205; CHECK-NEXT: ret i32 [[RES]] 206; 207 %res = atomicrmw or ptr %addr, i32 -1 monotonic 208 ret i32 %res 209} 210 211define i32 @sat_and_zero(ptr %addr) { 212; CHECK-LABEL: @sat_and_zero( 213; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i32 0 monotonic, align 4 214; CHECK-NEXT: ret i32 [[RES]] 215; 216 %res = atomicrmw and ptr %addr, i32 0 monotonic 217 ret i32 %res 218} 219 220define i32 @sat_umin_uint_min(ptr %addr) { 221; CHECK-LABEL: @sat_umin_uint_min( 222; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i32 0 monotonic, align 4 223; CHECK-NEXT: ret i32 [[RES]] 224; 225 %res = atomicrmw umin ptr %addr, i32 0 monotonic 226 ret i32 %res 227} 228 229define i32 @sat_umax_uint_max(ptr %addr) { 230; CHECK-LABEL: @sat_umax_uint_max( 231; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i32 -1 monotonic, align 4 232; CHECK-NEXT: ret i32 [[RES]] 233; 234 %res = atomicrmw umax ptr %addr, i32 -1 monotonic 235 ret i32 %res 236} 237 238define i8 @sat_min_smin_char(ptr %addr) { 239; CHECK-LABEL: @sat_min_smin_char( 240; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i8 -128 monotonic, align 1 241; CHECK-NEXT: ret i8 [[RES]] 242; 243 %res = atomicrmw min ptr %addr, i8 -128 monotonic 244 ret i8 %res 245} 246 247define i8 @sat_max_smax_char(ptr %addr) { 248; CHECK-LABEL: @sat_max_smax_char( 249; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i8 127 monotonic, align 1 250; CHECK-NEXT: ret i8 [[RES]] 251; 252 %res = atomicrmw max ptr %addr, i8 127 monotonic 253 ret i8 %res 254} 255 256define double @sat_fadd_nan(ptr %addr) { 257; CHECK-LABEL: @sat_fadd_nan( 258; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], double 0x7FF00000FFFFFFFF release, align 8 259; CHECK-NEXT: ret double [[RES]] 260; 261 %res = atomicrmw fadd ptr %addr, double 0x7FF00000FFFFFFFF release 262 ret double %res 263} 264 265define double @sat_fsub_nan(ptr %addr) { 266; CHECK-LABEL: @sat_fsub_nan( 267; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], double 0x7FF00000FFFFFFFF release, align 8 268; CHECK-NEXT: ret double [[RES]] 269; 270 %res = atomicrmw fsub ptr %addr, double 0x7FF00000FFFFFFFF release 271 ret double %res 272} 273 274define void @sat_fsub_nan_unused(ptr %addr) { 275; CHECK-LABEL: @sat_fsub_nan_unused( 276; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], double 0x7FF00000FFFFFFFF monotonic, align 8 277; CHECK-NEXT: ret void 278; 279 atomicrmw fsub ptr %addr, double 0x7FF00000FFFFFFFF monotonic 280 ret void 281} 282 283define void @xchg_unused_monotonic(ptr %addr) { 284; CHECK-LABEL: @xchg_unused_monotonic( 285; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i32 0 monotonic, align 4 286; CHECK-NEXT: ret void 287; 288 atomicrmw xchg ptr %addr, i32 0 monotonic 289 ret void 290} 291 292define void @xchg_unused_release(ptr %addr) { 293; CHECK-LABEL: @xchg_unused_release( 294; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i32 -1 release, align 4 295; CHECK-NEXT: ret void 296; 297 atomicrmw xchg ptr %addr, i32 -1 release 298 ret void 299} 300 301define void @xchg_unused_under_aligned(ptr %addr) { 302; CHECK-LABEL: @xchg_unused_under_aligned( 303; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i32 -1 release, align 1 304; CHECK-NEXT: ret void 305; 306 atomicrmw xchg ptr %addr, i32 -1 release, align 1 307 ret void 308} 309 310define void @xchg_unused_over_aligned(ptr %addr) { 311; CHECK-LABEL: @xchg_unused_over_aligned( 312; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i32 -1 release, align 8 313; CHECK-NEXT: ret void 314; 315 atomicrmw xchg ptr %addr, i32 -1 release, align 8 316 ret void 317} 318 319define void @xchg_unused_seq_cst(ptr %addr) { 320; CHECK-LABEL: @xchg_unused_seq_cst( 321; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i32 0 seq_cst, align 4 322; CHECK-NEXT: ret void 323; 324 atomicrmw xchg ptr %addr, i32 0 seq_cst 325 ret void 326} 327 328define void @xchg_unused_volatile(ptr %addr) { 329; CHECK-LABEL: @xchg_unused_volatile( 330; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw volatile xchg ptr [[ADDR:%.*]], i32 0 monotonic, align 4 331; CHECK-NEXT: ret void 332; 333 atomicrmw volatile xchg ptr %addr, i32 0 monotonic 334 ret void 335} 336 337define void @sat_or_allones_unused(ptr %addr) { 338; CHECK-LABEL: @sat_or_allones_unused( 339; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i32 -1 monotonic, align 4 340; CHECK-NEXT: ret void 341; 342 atomicrmw or ptr %addr, i32 -1 monotonic 343 ret void 344} 345 346define void @undef_operand_unused(ptr %addr) { 347; CHECK-LABEL: @undef_operand_unused( 348; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 undef monotonic, align 4 349; CHECK-NEXT: ret void 350; 351 atomicrmw or ptr %addr, i32 undef monotonic 352 ret void 353} 354 355define i32 @undef_operand_used(ptr %addr) { 356; CHECK-LABEL: @undef_operand_used( 357; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 undef monotonic, align 4 358; CHECK-NEXT: ret i32 [[RES]] 359; 360 %res = atomicrmw or ptr %addr, i32 undef monotonic 361 ret i32 %res 362} 363 364define double @sat_fmax_inf(ptr %addr) { 365; CHECK-LABEL: @sat_fmax_inf( 366; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], double 0x7FF0000000000000 monotonic, align 8 367; CHECK-NEXT: ret double [[RES]] 368; 369 %res = atomicrmw fmax ptr %addr, double 0x7FF0000000000000 monotonic 370 ret double %res 371} 372 373define double @no_sat_fmax_inf(ptr %addr) { 374; CHECK-LABEL: @no_sat_fmax_inf( 375; CHECK-NEXT: [[RES:%.*]] = atomicrmw fmax ptr [[ADDR:%.*]], double 1.000000e-01 monotonic, align 8 376; CHECK-NEXT: ret double [[RES]] 377; 378 %res = atomicrmw fmax ptr %addr, double 1.000000e-01 monotonic 379 ret double %res 380} 381 382define double @sat_fmin_inf(ptr %addr) { 383; CHECK-LABEL: @sat_fmin_inf( 384; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], double 0xFFF0000000000000 monotonic, align 8 385; CHECK-NEXT: ret double [[RES]] 386; 387 %res = atomicrmw fmin ptr %addr, double 0xFFF0000000000000 monotonic 388 ret double %res 389} 390 391define double @no_sat_fmin_inf(ptr %addr) { 392; CHECK-LABEL: @no_sat_fmin_inf( 393; CHECK-NEXT: [[RES:%.*]] = atomicrmw fmin ptr [[ADDR:%.*]], double 1.000000e-01 monotonic, align 8 394; CHECK-NEXT: ret double [[RES]] 395; 396 %res = atomicrmw fmin ptr %addr, double 1.000000e-01 monotonic 397 ret double %res 398} 399 400; Idempotent atomicrmw are still canonicalized. 401define i32 @atomic_add_zero_preserve_md(ptr %addr) { 402; CHECK-LABEL: @atomic_add_zero_preserve_md( 403; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 0 syncscope("agent") monotonic, align 4, !mmra [[META0:![0-9]+]], !amdgpu.no.fine.grained.host.memory [[META1:![0-9]+]], !amdgpu.no.remote.memory.access [[META1]] 404; CHECK-NEXT: ret i32 [[RES]] 405; 406 %res = atomicrmw add ptr %addr, i32 0 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 407 ret i32 %res 408} 409 410define i32 @atomic_or_zero_preserve_md(ptr %addr) { 411; CHECK-LABEL: @atomic_or_zero_preserve_md( 412; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 0 syncscope("agent") monotonic, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 413; CHECK-NEXT: ret i32 [[RES]] 414; 415 %res = atomicrmw or ptr %addr, i32 0 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 416 ret i32 %res 417} 418 419; Idempotent atomicrmw are still canonicalized. 420define i32 @atomic_sub_zero_preserve_md(ptr %addr) { 421; CHECK-LABEL: @atomic_sub_zero_preserve_md( 422; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 0 syncscope("agent") monotonic, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 423; CHECK-NEXT: ret i32 [[RES]] 424; 425 %res = atomicrmw sub ptr %addr, i32 0 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 426 ret i32 %res 427} 428 429; Idempotent atomicrmw are still canonicalized. 430define i32 @atomic_and_allones_preserve_md(ptr %addr) { 431; CHECK-LABEL: @atomic_and_allones_preserve_md( 432; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 0 syncscope("agent") monotonic, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 433; CHECK-NEXT: ret i32 [[RES]] 434; 435 %res = atomicrmw and ptr %addr, i32 -1 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 436 ret i32 %res 437} 438 439; Idempotent atomicrmw are still canonicalized. 440define i32 @atomic_umin_uint_max_preserve_md(ptr %addr) { 441; CHECK-LABEL: @atomic_umin_uint_max_preserve_md( 442; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 0 syncscope("agent") monotonic, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 443; CHECK-NEXT: ret i32 [[RES]] 444; 445 %res = atomicrmw umin ptr %addr, i32 -1 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 446 ret i32 %res 447} 448 449; Idempotent atomicrmw are still canonicalized. 450define i32 @atomic_umax_zero_preserve_md(ptr %addr) { 451; CHECK-LABEL: @atomic_umax_zero_preserve_md( 452; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 0 syncscope("agent") monotonic, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 453; CHECK-NEXT: ret i32 [[RES]] 454; 455 %res = atomicrmw umax ptr %addr, i32 0 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 456 ret i32 %res 457} 458 459; Idempotent atomicrmw are still canonicalized. 460define i8 @atomic_min_smax_char_preserve_md(ptr %addr) { 461; CHECK-LABEL: @atomic_min_smax_char_preserve_md( 462; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 syncscope("agent") monotonic, align 1, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 463; CHECK-NEXT: ret i8 [[RES]] 464; 465 %res = atomicrmw min ptr %addr, i8 127 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 466 ret i8 %res 467} 468 469; Idempotent atomicrmw are still canonicalized. 470define i8 @atomic_max_smin_char_preserve_md(ptr %addr) { 471; CHECK-LABEL: @atomic_max_smin_char_preserve_md( 472; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 syncscope("agent") monotonic, align 1, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 473; CHECK-NEXT: ret i8 [[RES]] 474; 475 %res = atomicrmw max ptr %addr, i8 -128 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 476 ret i8 %res 477} 478 479; Idempotent atomicrmw are still canonicalized. 480define float @atomic_fsub_zero_preserve_md(ptr %addr) { 481; CHECK-LABEL: @atomic_fsub_zero_preserve_md( 482; CHECK-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float -0.000000e+00 syncscope("agent") monotonic, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 483; CHECK-NEXT: ret float [[RES]] 484; 485 %res = atomicrmw fsub ptr %addr, float 0.0 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 486 ret float %res 487} 488 489define float @atomic_fadd_zero_preserve_md(ptr %addr) { 490; CHECK-LABEL: @atomic_fadd_zero_preserve_md( 491; CHECK-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float -0.000000e+00 syncscope("agent") monotonic, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 492; CHECK-NEXT: ret float [[RES]] 493; 494 %res = atomicrmw fadd ptr %addr, float -0.0 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 495 ret float %res 496} 497 498; Idempotent atomicrmw are still canonicalized. 499define float @atomic_fsub_canon_preserve_md(ptr %addr) { 500; CHECK-LABEL: @atomic_fsub_canon_preserve_md( 501; CHECK-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float -0.000000e+00 release, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 502; CHECK-NEXT: ret float [[RES]] 503; 504 %res = atomicrmw fsub ptr %addr, float 0.0 release, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 505 ret float %res 506} 507 508define float @atomic_fadd_canon_preserve_md(ptr %addr) { 509; CHECK-LABEL: @atomic_fadd_canon_preserve_md( 510; CHECK-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float -0.000000e+00 release, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 511; CHECK-NEXT: ret float [[RES]] 512; 513 %res = atomicrmw fadd ptr %addr, float -0.0 release, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 514 ret float %res 515} 516 517; Can't replace a volatile w/a load; this would eliminate a volatile store. 518define i64 @atomic_sub_zero_volatile_preserve_md(ptr %addr) { 519; CHECK-LABEL: @atomic_sub_zero_volatile_preserve_md( 520; CHECK-NEXT: [[RES:%.*]] = atomicrmw volatile sub ptr [[ADDR:%.*]], i64 0 acquire, align 8, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 521; CHECK-NEXT: ret i64 [[RES]] 522; 523 %res = atomicrmw volatile sub ptr %addr, i64 0 acquire, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 524 ret i64 %res 525} 526 527 528; Check that the transformation properly preserve the syncscope. 529; Idempotent atomicrmw are still canonicalized. 530define i16 @atomic_syncscope_preserve_md(ptr %addr) { 531; CHECK-LABEL: @atomic_syncscope_preserve_md( 532; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i16 0 syncscope("some_syncscope") acquire, align 2, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 533; CHECK-NEXT: ret i16 [[RES]] 534; 535 %res = atomicrmw or ptr %addr, i16 0 syncscope("some_syncscope") acquire, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 536 ret i16 %res 537} 538 539; By eliminating the store part of the atomicrmw, we would get rid of the 540; release semantic, which is incorrect. We can canonicalize the operation. 541define i16 @atomic_seq_cst_preserve_md(ptr %addr) { 542; CHECK-LABEL: @atomic_seq_cst_preserve_md( 543; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i16 0 seq_cst, align 2, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 544; CHECK-NEXT: ret i16 [[RES]] 545; 546 %res = atomicrmw add ptr %addr, i16 0 seq_cst, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 547 ret i16 %res 548} 549 550; Check that the transformation does not apply when the value is changed by 551; the atomic operation (non zero constant). 552define i16 @atomic_add_non_zero_preserve_md(ptr %addr) { 553; CHECK-LABEL: @atomic_add_non_zero_preserve_md( 554; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr [[ADDR:%.*]], i16 2 syncscope("agent") monotonic, align 2, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 555; CHECK-NEXT: ret i16 [[RES]] 556; 557 %res = atomicrmw add ptr %addr, i16 2 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 558 ret i16 %res 559} 560 561; Idempotent atomicrmw are still canonicalized. 562define i16 @atomic_xor_zero_preserve_md(ptr %addr) { 563; CHECK-LABEL: @atomic_xor_zero_preserve_md( 564; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i16 0 syncscope("agent") monotonic, align 2, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 565; CHECK-NEXT: ret i16 [[RES]] 566; 567 %res = atomicrmw xor ptr %addr, i16 0 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 568 ret i16 %res 569} 570 571; Check that the transformation does not apply when the ordering is 572; incompatible with a load (release). Do canonicalize. 573define i16 @atomic_release_preserve_md(ptr %addr) { 574; CHECK-LABEL: @atomic_release_preserve_md( 575; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i16 0 release, align 2, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 576; CHECK-NEXT: ret i16 [[RES]] 577; 578 %res = atomicrmw sub ptr %addr, i16 0 release, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 579 ret i16 %res 580} 581 582; Check that the transformation does not apply when the ordering is 583; incompatible with a load (acquire, release). Do canonicalize. 584define i16 @atomic_acq_rel_preserve_md(ptr %addr) { 585; CHECK-LABEL: @atomic_acq_rel_preserve_md( 586; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i16 0 acq_rel, align 2, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 587; CHECK-NEXT: ret i16 [[RES]] 588; 589 %res = atomicrmw xor ptr %addr, i16 0 acq_rel, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 590 ret i16 %res 591} 592 593define i32 @sat_or_allones_preserve_md(ptr %addr) { 594; CHECK-LABEL: @sat_or_allones_preserve_md( 595; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i32 -1 syncscope("agent") monotonic, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 596; CHECK-NEXT: ret i32 [[RES]] 597; 598 %res = atomicrmw or ptr %addr, i32 -1 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 599 ret i32 %res 600} 601 602define i32 @sat_and_zero_preserve_md(ptr %addr) { 603; CHECK-LABEL: @sat_and_zero_preserve_md( 604; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i32 0 syncscope("agent") monotonic, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 605; CHECK-NEXT: ret i32 [[RES]] 606; 607 %res = atomicrmw and ptr %addr, i32 0 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 608 ret i32 %res 609} 610 611define i32 @sat_umin_uint_min_preserve_md(ptr %addr) { 612; CHECK-LABEL: @sat_umin_uint_min_preserve_md( 613; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i32 0 syncscope("agent") monotonic, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 614; CHECK-NEXT: ret i32 [[RES]] 615; 616 %res = atomicrmw umin ptr %addr, i32 0 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 617 ret i32 %res 618} 619 620define i32 @sat_umax_uint_max_preserve_md(ptr %addr) { 621; CHECK-LABEL: @sat_umax_uint_max_preserve_md( 622; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i32 -1 syncscope("agent") monotonic, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 623; CHECK-NEXT: ret i32 [[RES]] 624; 625 %res = atomicrmw umax ptr %addr, i32 -1 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 626 ret i32 %res 627} 628 629define i8 @sat_min_smin_char_preserve_md(ptr %addr) { 630; CHECK-LABEL: @sat_min_smin_char_preserve_md( 631; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i8 -128 syncscope("agent") monotonic, align 1, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 632; CHECK-NEXT: ret i8 [[RES]] 633; 634 %res = atomicrmw min ptr %addr, i8 -128 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 635 ret i8 %res 636} 637 638define i8 @sat_max_smax_char_preserve_md(ptr %addr) { 639; CHECK-LABEL: @sat_max_smax_char_preserve_md( 640; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i8 127 syncscope("agent") monotonic, align 1, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 641; CHECK-NEXT: ret i8 [[RES]] 642; 643 %res = atomicrmw max ptr %addr, i8 127 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 644 ret i8 %res 645} 646 647define double @sat_fadd_nan_preserve_md(ptr %addr) { 648; CHECK-LABEL: @sat_fadd_nan_preserve_md( 649; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], double 0x7FF00000FFFFFFFF release, align 8, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 650; CHECK-NEXT: ret double [[RES]] 651; 652 %res = atomicrmw fadd ptr %addr, double 0x7FF00000FFFFFFFF release, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 653 ret double %res 654} 655 656define double @sat_fsub_nan_preserve_md(ptr %addr) { 657; CHECK-LABEL: @sat_fsub_nan_preserve_md( 658; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], double 0x7FF00000FFFFFFFF release, align 8, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 659; CHECK-NEXT: ret double [[RES]] 660; 661 %res = atomicrmw fsub ptr %addr, double 0x7FF00000FFFFFFFF release, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 662 ret double %res 663} 664 665define void @sat_fsub_nan_unused_preserve_md(ptr %addr) { 666; CHECK-LABEL: @sat_fsub_nan_unused_preserve_md( 667; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], double 0x7FF00000FFFFFFFF syncscope("agent") monotonic, align 8, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 668; CHECK-NEXT: ret void 669; 670 atomicrmw fsub ptr %addr, double 0x7FF00000FFFFFFFF syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 671 ret void 672} 673 674define void @xchg_unused_monotonic_preserve_md(ptr %addr) { 675; CHECK-LABEL: @xchg_unused_monotonic_preserve_md( 676; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i32 0 syncscope("agent") monotonic, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 677; CHECK-NEXT: ret void 678; 679 atomicrmw xchg ptr %addr, i32 0 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 680 ret void 681} 682 683define void @xchg_unused_release_preserve_md(ptr %addr) { 684; CHECK-LABEL: @xchg_unused_release_preserve_md( 685; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i32 -1 syncscope("agent") release, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 686; CHECK-NEXT: ret void 687; 688 atomicrmw xchg ptr %addr, i32 -1 syncscope("agent") release, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 689 ret void 690} 691 692define void @xchg_unused_under_aligned_preserve_md(ptr %addr) { 693; CHECK-LABEL: @xchg_unused_under_aligned_preserve_md( 694; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i32 -1 syncscope("agent") release, align 1, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 695; CHECK-NEXT: ret void 696; 697 atomicrmw xchg ptr %addr, i32 -1 syncscope("agent") release, align 1, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 698 ret void 699} 700 701define void @xchg_unused_over_aligned_preserve_md(ptr %addr) { 702; CHECK-LABEL: @xchg_unused_over_aligned_preserve_md( 703; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i32 -1 syncscope("agent") release, align 8, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 704; CHECK-NEXT: ret void 705; 706 atomicrmw xchg ptr %addr, i32 -1 syncscope("agent") release, align 8, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 707 ret void 708} 709 710define void @xchg_unused_seq_cst_preserve_md(ptr %addr) { 711; CHECK-LABEL: @xchg_unused_seq_cst_preserve_md( 712; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i32 0 syncscope("agent") seq_cst, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 713; CHECK-NEXT: ret void 714; 715 atomicrmw xchg ptr %addr, i32 0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 716 ret void 717} 718 719define void @xchg_unused_volatile_preserve_md(ptr %addr) { 720; CHECK-LABEL: @xchg_unused_volatile_preserve_md( 721; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw volatile xchg ptr [[ADDR:%.*]], i32 0 syncscope("agent") monotonic, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 722; CHECK-NEXT: ret void 723; 724 atomicrmw volatile xchg ptr %addr, i32 0 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 725 ret void 726} 727 728define void @sat_or_allones_unused_preserve_md(ptr %addr) { 729; CHECK-LABEL: @sat_or_allones_unused_preserve_md( 730; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], i32 -1 syncscope("agent") monotonic, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 731; CHECK-NEXT: ret void 732; 733 atomicrmw or ptr %addr, i32 -1 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 734 ret void 735} 736 737define void @undef_operand_unused_preserve_md(ptr %addr) { 738; CHECK-LABEL: @undef_operand_unused_preserve_md( 739; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 undef syncscope("agent") monotonic, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 740; CHECK-NEXT: ret void 741; 742 atomicrmw or ptr %addr, i32 undef syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 743 ret void 744} 745 746define i32 @undef_operand_used_preserve_md(ptr %addr) { 747; CHECK-LABEL: @undef_operand_used_preserve_md( 748; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 undef syncscope("agent") monotonic, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 749; CHECK-NEXT: ret i32 [[RES]] 750; 751 %res = atomicrmw or ptr %addr, i32 undef syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 752 ret i32 %res 753} 754 755define double @sat_fmax_inf_preserve_md(ptr %addr) { 756; CHECK-LABEL: @sat_fmax_inf_preserve_md( 757; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], double 0x7FF0000000000000 syncscope("agent") monotonic, align 8, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 758; CHECK-NEXT: ret double [[RES]] 759; 760 %res = atomicrmw fmax ptr %addr, double 0x7FF0000000000000 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 761 ret double %res 762} 763 764define double @no_sat_fmax_inf_preserve_md(ptr %addr) { 765; CHECK-LABEL: @no_sat_fmax_inf_preserve_md( 766; CHECK-NEXT: [[RES:%.*]] = atomicrmw fmax ptr [[ADDR:%.*]], double 1.000000e-01 syncscope("agent") monotonic, align 8, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 767; CHECK-NEXT: ret double [[RES]] 768; 769 %res = atomicrmw fmax ptr %addr, double 1.000000e-01 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 770 ret double %res 771} 772 773define double @sat_fmin_inf_preserve_md(ptr %addr) { 774; CHECK-LABEL: @sat_fmin_inf_preserve_md( 775; CHECK-NEXT: [[RES:%.*]] = atomicrmw xchg ptr [[ADDR:%.*]], double 0xFFF0000000000000 syncscope("agent") monotonic, align 8, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 776; CHECK-NEXT: ret double [[RES]] 777; 778 %res = atomicrmw fmin ptr %addr, double 0xFFF0000000000000 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 779 ret double %res 780} 781 782define double @no_sat_fmin_inf_preserve_md(ptr %addr) { 783; CHECK-LABEL: @no_sat_fmin_inf_preserve_md( 784; CHECK-NEXT: [[RES:%.*]] = atomicrmw fmin ptr [[ADDR:%.*]], double 1.000000e-01 syncscope("agent") monotonic, align 8, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] 785; CHECK-NEXT: ret double [[RES]] 786; 787 %res = atomicrmw fmin ptr %addr, double 1.000000e-01 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 788 ret double %res 789} 790 791!0 = !{} 792!1 = !{!"foo", !"bar"} 793