1; RUN: not --crash llc > /dev/null < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt 2; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics,+sign-ext | FileCheck %s 3 4; Test that atomic loads are assembled properly. 5 6target triple = "wasm32-unknown-unknown" 7 8;===---------------------------------------------------------------------------- 9; Atomic loads: 32-bit 10;===---------------------------------------------------------------------------- 11 12; Basic load. 13 14; CHECK-LABEL: load_i32_no_offset: 15; CHECK: i32.atomic.load $push0=, 0($0){{$}} 16; CHECK-NEXT: return $pop0{{$}} 17define i32 @load_i32_no_offset(ptr %p) { 18 %v = load atomic i32, ptr %p seq_cst, align 4 19 ret i32 %v 20} 21 22; With an nuw add, we can fold an offset. 23 24; CHECK-LABEL: load_i32_with_folded_offset: 25; CHECK: i32.atomic.load $push0=, 24($0){{$}} 26define i32 @load_i32_with_folded_offset(ptr %p) { 27 %q = ptrtoint ptr %p to i32 28 %r = add nuw i32 %q, 24 29 %s = inttoptr i32 %r to ptr 30 %t = load atomic i32, ptr %s seq_cst, align 4 31 ret i32 %t 32} 33 34; With an inbounds gep, we can fold an offset. 35 36; CHECK-LABEL: load_i32_with_folded_gep_offset: 37; CHECK: i32.atomic.load $push0=, 24($0){{$}} 38define i32 @load_i32_with_folded_gep_offset(ptr %p) { 39 %s = getelementptr inbounds i32, ptr %p, i32 6 40 %t = load atomic i32, ptr %s seq_cst, align 4 41 ret i32 %t 42} 43 44; We can't fold a negative offset though, even with an inbounds gep. 45 46; CHECK-LABEL: load_i32_with_unfolded_gep_negative_offset: 47; CHECK: i32.const $push0=, -24{{$}} 48; CHECK: i32.add $push1=, $0, $pop0{{$}} 49; CHECK: i32.atomic.load $push2=, 0($pop1){{$}} 50define i32 @load_i32_with_unfolded_gep_negative_offset(ptr %p) { 51 %s = getelementptr inbounds i32, ptr %p, i32 -6 52 %t = load atomic i32, ptr %s seq_cst, align 4 53 ret i32 %t 54} 55 56; Without nuw, and even with nsw, we can't fold an offset. 57 58; CHECK-LABEL: load_i32_with_unfolded_offset: 59; CHECK: i32.const $push0=, 24{{$}} 60; CHECK: i32.add $push1=, $0, $pop0{{$}} 61; CHECK: i32.atomic.load $push2=, 0($pop1){{$}} 62define i32 @load_i32_with_unfolded_offset(ptr %p) { 63 %q = ptrtoint ptr %p to i32 64 %r = add nsw i32 %q, 24 65 %s = inttoptr i32 %r to ptr 66 %t = load atomic i32, ptr %s seq_cst, align 4 67 ret i32 %t 68} 69 70; Without inbounds, we can't fold a gep offset. 71 72; CHECK-LABEL: load_i32_with_unfolded_gep_offset: 73; CHECK: i32.const $push0=, 24{{$}} 74; CHECK: i32.add $push1=, $0, $pop0{{$}} 75; CHECK: i32.atomic.load $push2=, 0($pop1){{$}} 76define i32 @load_i32_with_unfolded_gep_offset(ptr %p) { 77 %s = getelementptr i32, ptr %p, i32 6 78 %t = load atomic i32, ptr %s seq_cst, align 4 79 ret i32 %t 80} 81 82; When loading from a fixed address, materialize a zero. 83 84; CHECK-LABEL: load_i32_from_numeric_address 85; CHECK: i32.const $push0=, 0{{$}} 86; CHECK: i32.atomic.load $push1=, 42($pop0){{$}} 87define i32 @load_i32_from_numeric_address() { 88 %s = inttoptr i32 42 to ptr 89 %t = load atomic i32, ptr %s seq_cst, align 4 90 ret i32 %t 91} 92 93; CHECK-LABEL: load_i32_from_global_address 94; CHECK: i32.const $push0=, 0{{$}} 95; CHECK: i32.atomic.load $push1=, gv($pop0){{$}} 96@gv = global i32 0 97define i32 @load_i32_from_global_address() { 98 %t = load atomic i32, ptr @gv seq_cst, align 4 99 ret i32 %t 100} 101 102;===---------------------------------------------------------------------------- 103; Atomic loads: 64-bit 104;===---------------------------------------------------------------------------- 105 106; Basic load. 107 108; CHECK-LABEL: load_i64_no_offset: 109; CHECK: i64.atomic.load $push0=, 0($0){{$}} 110; CHECK-NEXT: return $pop0{{$}} 111define i64 @load_i64_no_offset(ptr %p) { 112 %v = load atomic i64, ptr %p seq_cst, align 8 113 ret i64 %v 114} 115 116; With an nuw add, we can fold an offset. 117 118; CHECK-LABEL: load_i64_with_folded_offset: 119; CHECK: i64.atomic.load $push0=, 24($0){{$}} 120define i64 @load_i64_with_folded_offset(ptr %p) { 121 %q = ptrtoint ptr %p to i32 122 %r = add nuw i32 %q, 24 123 %s = inttoptr i32 %r to ptr 124 %t = load atomic i64, ptr %s seq_cst, align 8 125 ret i64 %t 126} 127 128; With an inbounds gep, we can fold an offset. 129 130; CHECK-LABEL: load_i64_with_folded_gep_offset: 131; CHECK: i64.atomic.load $push0=, 24($0){{$}} 132define i64 @load_i64_with_folded_gep_offset(ptr %p) { 133 %s = getelementptr inbounds i64, ptr %p, i32 3 134 %t = load atomic i64, ptr %s seq_cst, align 8 135 ret i64 %t 136} 137 138; We can't fold a negative offset though, even with an inbounds gep. 139 140; CHECK-LABEL: load_i64_with_unfolded_gep_negative_offset: 141; CHECK: i32.const $push0=, -24{{$}} 142; CHECK: i32.add $push1=, $0, $pop0{{$}} 143; CHECK: i64.atomic.load $push2=, 0($pop1){{$}} 144define i64 @load_i64_with_unfolded_gep_negative_offset(ptr %p) { 145 %s = getelementptr inbounds i64, ptr %p, i32 -3 146 %t = load atomic i64, ptr %s seq_cst, align 8 147 ret i64 %t 148} 149 150; Without nuw, and even with nsw, we can't fold an offset. 151 152; CHECK-LABEL: load_i64_with_unfolded_offset: 153; CHECK: i32.const $push0=, 24{{$}} 154; CHECK: i32.add $push1=, $0, $pop0{{$}} 155; CHECK: i64.atomic.load $push2=, 0($pop1){{$}} 156define i64 @load_i64_with_unfolded_offset(ptr %p) { 157 %q = ptrtoint ptr %p to i32 158 %r = add nsw i32 %q, 24 159 %s = inttoptr i32 %r to ptr 160 %t = load atomic i64, ptr %s seq_cst, align 8 161 ret i64 %t 162} 163 164; Without inbounds, we can't fold a gep offset. 165 166; CHECK-LABEL: load_i64_with_unfolded_gep_offset: 167; CHECK: i32.const $push0=, 24{{$}} 168; CHECK: i32.add $push1=, $0, $pop0{{$}} 169; CHECK: i64.atomic.load $push2=, 0($pop1){{$}} 170define i64 @load_i64_with_unfolded_gep_offset(ptr %p) { 171 %s = getelementptr i64, ptr %p, i32 3 172 %t = load atomic i64, ptr %s seq_cst, align 8 173 ret i64 %t 174} 175 176;===---------------------------------------------------------------------------- 177; Atomic stores: 32-bit 178;===---------------------------------------------------------------------------- 179 180; Basic store. 181 182; CHECK-LABEL: store_i32_no_offset: 183; CHECK-NEXT: .functype store_i32_no_offset (i32, i32) -> (){{$}} 184; CHECK-NEXT: i32.atomic.store 0($0), $1{{$}} 185; CHECK-NEXT: return{{$}} 186define void @store_i32_no_offset(ptr %p, i32 %v) { 187 store atomic i32 %v, ptr %p seq_cst, align 4 188 ret void 189} 190 191; With an nuw add, we can fold an offset. 192 193; CHECK-LABEL: store_i32_with_folded_offset: 194; CHECK: i32.atomic.store 24($0), $pop0{{$}} 195define void @store_i32_with_folded_offset(ptr %p) { 196 %q = ptrtoint ptr %p to i32 197 %r = add nuw i32 %q, 24 198 %s = inttoptr i32 %r to ptr 199 store atomic i32 0, ptr %s seq_cst, align 4 200 ret void 201} 202 203; With an inbounds gep, we can fold an offset. 204 205; CHECK-LABEL: store_i32_with_folded_gep_offset: 206; CHECK: i32.atomic.store 24($0), $pop0{{$}} 207define void @store_i32_with_folded_gep_offset(ptr %p) { 208 %s = getelementptr inbounds i32, ptr %p, i32 6 209 store atomic i32 0, ptr %s seq_cst, align 4 210 ret void 211} 212 213; We can't fold a negative offset though, even with an inbounds gep. 214 215; CHECK-LABEL: store_i32_with_unfolded_gep_negative_offset: 216; CHECK: i32.const $push0=, -24{{$}} 217; CHECK: i32.add $push1=, $0, $pop0{{$}} 218; CHECK: i32.atomic.store 0($pop1), $pop2{{$}} 219define void @store_i32_with_unfolded_gep_negative_offset(ptr %p) { 220 %s = getelementptr inbounds i32, ptr %p, i32 -6 221 store atomic i32 0, ptr %s seq_cst, align 4 222 ret void 223} 224 225; Without nuw, and even with nsw, we can't fold an offset. 226 227; CHECK-LABEL: store_i32_with_unfolded_offset: 228; CHECK: i32.const $push0=, 24{{$}} 229; CHECK: i32.add $push1=, $0, $pop0{{$}} 230; CHECK: i32.atomic.store 0($pop1), $pop2{{$}} 231define void @store_i32_with_unfolded_offset(ptr %p) { 232 %q = ptrtoint ptr %p to i32 233 %r = add nsw i32 %q, 24 234 %s = inttoptr i32 %r to ptr 235 store atomic i32 0, ptr %s seq_cst, align 4 236 ret void 237} 238 239; Without inbounds, we can't fold a gep offset. 240 241; CHECK-LABEL: store_i32_with_unfolded_gep_offset: 242; CHECK: i32.const $push0=, 24{{$}} 243; CHECK: i32.add $push1=, $0, $pop0{{$}} 244; CHECK: i32.atomic.store 0($pop1), $pop2{{$}} 245define void @store_i32_with_unfolded_gep_offset(ptr %p) { 246 %s = getelementptr i32, ptr %p, i32 6 247 store atomic i32 0, ptr %s seq_cst, align 4 248 ret void 249} 250 251; When storing from a fixed address, materialize a zero. 252 253; CHECK-LABEL: store_i32_to_numeric_address: 254; CHECK: i32.const $push0=, 0{{$}} 255; CHECK-NEXT: i32.const $push1=, 0{{$}} 256; CHECK-NEXT: i32.atomic.store 42($pop0), $pop1{{$}} 257define void @store_i32_to_numeric_address() { 258 %s = inttoptr i32 42 to ptr 259 store atomic i32 0, ptr %s seq_cst, align 4 260 ret void 261} 262 263; CHECK-LABEL: store_i32_to_global_address: 264; CHECK: i32.const $push0=, 0{{$}} 265; CHECK: i32.const $push1=, 0{{$}} 266; CHECK: i32.atomic.store gv($pop0), $pop1{{$}} 267define void @store_i32_to_global_address() { 268 store atomic i32 0, ptr @gv seq_cst, align 4 269 ret void 270} 271 272;===---------------------------------------------------------------------------- 273; Atomic stores: 64-bit 274;===---------------------------------------------------------------------------- 275 276; Basic store. 277 278; CHECK-LABEL: store_i64_no_offset: 279; CHECK-NEXT: .functype store_i64_no_offset (i32, i64) -> (){{$}} 280; CHECK-NEXT: i64.atomic.store 0($0), $1{{$}} 281; CHECK-NEXT: return{{$}} 282define void @store_i64_no_offset(ptr %p, i64 %v) { 283 store atomic i64 %v, ptr %p seq_cst, align 8 284 ret void 285} 286 287; With an nuw add, we can fold an offset. 288 289; CHECK-LABEL: store_i64_with_folded_offset: 290; CHECK: i64.atomic.store 24($0), $pop0{{$}} 291define void @store_i64_with_folded_offset(ptr %p) { 292 %q = ptrtoint ptr %p to i32 293 %r = add nuw i32 %q, 24 294 %s = inttoptr i32 %r to ptr 295 store atomic i64 0, ptr %s seq_cst, align 8 296 ret void 297} 298 299; With an inbounds gep, we can fold an offset. 300 301; CHECK-LABEL: store_i64_with_folded_gep_offset: 302; CHECK: i64.atomic.store 24($0), $pop0{{$}} 303define void @store_i64_with_folded_gep_offset(ptr %p) { 304 %s = getelementptr inbounds i64, ptr %p, i32 3 305 store atomic i64 0, ptr %s seq_cst, align 8 306 ret void 307} 308 309; We can't fold a negative offset though, even with an inbounds gep. 310 311; CHECK-LABEL: store_i64_with_unfolded_gep_negative_offset: 312; CHECK: i32.const $push0=, -24{{$}} 313; CHECK: i32.add $push1=, $0, $pop0{{$}} 314; CHECK: i64.atomic.store 0($pop1), $pop2{{$}} 315define void @store_i64_with_unfolded_gep_negative_offset(ptr %p) { 316 %s = getelementptr inbounds i64, ptr %p, i32 -3 317 store atomic i64 0, ptr %s seq_cst, align 8 318 ret void 319} 320 321; Without nuw, and even with nsw, we can't fold an offset. 322 323; CHECK-LABEL: store_i64_with_unfolded_offset: 324; CHECK: i32.const $push0=, 24{{$}} 325; CHECK: i32.add $push1=, $0, $pop0{{$}} 326; CHECK: i64.atomic.store 0($pop1), $pop2{{$}} 327define void @store_i64_with_unfolded_offset(ptr %p) { 328 %q = ptrtoint ptr %p to i32 329 %r = add nsw i32 %q, 24 330 %s = inttoptr i32 %r to ptr 331 store atomic i64 0, ptr %s seq_cst, align 8 332 ret void 333} 334 335; Without inbounds, we can't fold a gep offset. 336 337; CHECK-LABEL: store_i64_with_unfolded_gep_offset: 338; CHECK: i32.const $push0=, 24{{$}} 339; CHECK: i32.add $push1=, $0, $pop0{{$}} 340; CHECK: i64.atomic.store 0($pop1), $pop2{{$}} 341define void @store_i64_with_unfolded_gep_offset(ptr %p) { 342 %s = getelementptr i64, ptr %p, i32 3 343 store atomic i64 0, ptr %s seq_cst, align 8 344 ret void 345} 346 347;===---------------------------------------------------------------------------- 348; Atomic sign-extending loads 349;===---------------------------------------------------------------------------- 350 351; Fold an offset into a sign-extending load. 352 353; CHECK-LABEL: load_i8_i32_s_with_folded_offset: 354; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}} 355; CHECK-NEXT: i32.extend8_s $push1=, $pop0 356define i32 @load_i8_i32_s_with_folded_offset(ptr %p) { 357 %q = ptrtoint ptr %p to i32 358 %r = add nuw i32 %q, 24 359 %s = inttoptr i32 %r to ptr 360 %t = load atomic i8, ptr %s seq_cst, align 1 361 %u = sext i8 %t to i32 362 ret i32 %u 363} 364 365; 32->64 sext load gets selected as i32.atomic.load, i64.extend_i32_s 366; CHECK-LABEL: load_i32_i64_s_with_folded_offset: 367; CHECK: i32.atomic.load $push0=, 24($0){{$}} 368; CHECK-NEXT: i64.extend_i32_s $push1=, $pop0{{$}} 369define i64 @load_i32_i64_s_with_folded_offset(ptr %p) { 370 %q = ptrtoint ptr %p to i32 371 %r = add nuw i32 %q, 24 372 %s = inttoptr i32 %r to ptr 373 %t = load atomic i32, ptr %s seq_cst, align 4 374 %u = sext i32 %t to i64 375 ret i64 %u 376} 377 378; Fold a gep offset into a sign-extending load. 379 380; CHECK-LABEL: load_i8_i32_s_with_folded_gep_offset: 381; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}} 382; CHECK-NEXT: i32.extend8_s $push1=, $pop0 383define i32 @load_i8_i32_s_with_folded_gep_offset(ptr %p) { 384 %s = getelementptr inbounds i8, ptr %p, i32 24 385 %t = load atomic i8, ptr %s seq_cst, align 1 386 %u = sext i8 %t to i32 387 ret i32 %u 388} 389 390; CHECK-LABEL: load_i16_i32_s_with_folded_gep_offset: 391; CHECK: i32.atomic.load16_u $push0=, 48($0){{$}} 392; CHECK-NEXT: i32.extend16_s $push1=, $pop0 393define i32 @load_i16_i32_s_with_folded_gep_offset(ptr %p) { 394 %s = getelementptr inbounds i16, ptr %p, i32 24 395 %t = load atomic i16, ptr %s seq_cst, align 2 396 %u = sext i16 %t to i32 397 ret i32 %u 398} 399 400; CHECK-LABEL: load_i16_i64_s_with_folded_gep_offset: 401; CHECK: i64.atomic.load16_u $push0=, 48($0){{$}} 402; CHECK-NEXT: i64.extend16_s $push1=, $pop0 403define i64 @load_i16_i64_s_with_folded_gep_offset(ptr %p) { 404 %s = getelementptr inbounds i16, ptr %p, i32 24 405 %t = load atomic i16, ptr %s seq_cst, align 2 406 %u = sext i16 %t to i64 407 ret i64 %u 408} 409 410; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as 411; an 'add' if the or'ed bits are known to be zero. 412 413; CHECK-LABEL: load_i8_i32_s_with_folded_or_offset: 414; CHECK: i32.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}} 415; CHECK-NEXT: i32.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} 416define i32 @load_i8_i32_s_with_folded_or_offset(i32 %x) { 417 %and = and i32 %x, -4 418 %t0 = inttoptr i32 %and to ptr 419 %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2 420 %t1 = load atomic i8, ptr %arrayidx seq_cst, align 1 421 %conv = sext i8 %t1 to i32 422 ret i32 %conv 423} 424 425; CHECK-LABEL: load_i8_i64_s_with_folded_or_offset: 426; CHECK: i64.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}} 427; CHECK-NEXT: i64.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} 428define i64 @load_i8_i64_s_with_folded_or_offset(i32 %x) { 429 %and = and i32 %x, -4 430 %t0 = inttoptr i32 %and to ptr 431 %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2 432 %t1 = load atomic i8, ptr %arrayidx seq_cst, align 1 433 %conv = sext i8 %t1 to i64 434 ret i64 %conv 435} 436 437; When loading from a fixed address, materialize a zero. 438 439; CHECK-LABEL: load_i16_i32_s_from_numeric_address 440; CHECK: i32.const $push0=, 0{{$}} 441; CHECK: i32.atomic.load16_u $push1=, 42($pop0){{$}} 442; CHECK-NEXT: i32.extend16_s $push2=, $pop1 443define i32 @load_i16_i32_s_from_numeric_address() { 444 %s = inttoptr i32 42 to ptr 445 %t = load atomic i16, ptr %s seq_cst, align 2 446 %u = sext i16 %t to i32 447 ret i32 %u 448} 449 450; CHECK-LABEL: load_i8_i32_s_from_global_address 451; CHECK: i32.const $push0=, 0{{$}} 452; CHECK: i32.atomic.load8_u $push1=, gv8($pop0){{$}} 453; CHECK-NEXT: i32.extend8_s $push2=, $pop1{{$}} 454@gv8 = global i8 0 455define i32 @load_i8_i32_s_from_global_address() { 456 %t = load atomic i8, ptr @gv8 seq_cst, align 1 457 %u = sext i8 %t to i32 458 ret i32 %u 459} 460 461;===---------------------------------------------------------------------------- 462; Atomic zero-extending loads 463;===---------------------------------------------------------------------------- 464 465; Fold an offset into a zero-extending load. 466 467; CHECK-LABEL: load_i8_i32_z_with_folded_offset: 468; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}} 469define i32 @load_i8_i32_z_with_folded_offset(ptr %p) { 470 %q = ptrtoint ptr %p to i32 471 %r = add nuw i32 %q, 24 472 %s = inttoptr i32 %r to ptr 473 %t = load atomic i8, ptr %s seq_cst, align 1 474 %u = zext i8 %t to i32 475 ret i32 %u 476} 477 478; CHECK-LABEL: load_i32_i64_z_with_folded_offset: 479; CHECK: i64.atomic.load32_u $push0=, 24($0){{$}} 480define i64 @load_i32_i64_z_with_folded_offset(ptr %p) { 481 %q = ptrtoint ptr %p to i32 482 %r = add nuw i32 %q, 24 483 %s = inttoptr i32 %r to ptr 484 %t = load atomic i32, ptr %s seq_cst, align 4 485 %u = zext i32 %t to i64 486 ret i64 %u 487} 488 489; Fold a gep offset into a zero-extending load. 490 491; CHECK-LABEL: load_i8_i32_z_with_folded_gep_offset: 492; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}} 493define i32 @load_i8_i32_z_with_folded_gep_offset(ptr %p) { 494 %s = getelementptr inbounds i8, ptr %p, i32 24 495 %t = load atomic i8, ptr %s seq_cst, align 1 496 %u = zext i8 %t to i32 497 ret i32 %u 498} 499 500; CHECK-LABEL: load_i16_i32_z_with_folded_gep_offset: 501; CHECK: i32.atomic.load16_u $push0=, 48($0){{$}} 502define i32 @load_i16_i32_z_with_folded_gep_offset(ptr %p) { 503 %s = getelementptr inbounds i16, ptr %p, i32 24 504 %t = load atomic i16, ptr %s seq_cst, align 2 505 %u = zext i16 %t to i32 506 ret i32 %u 507} 508 509; CHECK-LABEL: load_i16_i64_z_with_folded_gep_offset: 510; CHECK: i64.atomic.load16_u $push0=, 48($0){{$}} 511define i64 @load_i16_i64_z_with_folded_gep_offset(ptr %p) { 512 %s = getelementptr inbounds i16, ptr %p, i64 24 513 %t = load atomic i16, ptr %s seq_cst, align 2 514 %u = zext i16 %t to i64 515 ret i64 %u 516} 517 518; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as 519; an 'add' if the or'ed bits are known to be zero. 520 521; CHECK-LABEL: load_i8_i32_z_with_folded_or_offset: 522; CHECK: i32.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}} 523define i32 @load_i8_i32_z_with_folded_or_offset(i32 %x) { 524 %and = and i32 %x, -4 525 %t0 = inttoptr i32 %and to ptr 526 %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2 527 %t1 = load atomic i8, ptr %arrayidx seq_cst, align 1 528 %conv = zext i8 %t1 to i32 529 ret i32 %conv 530} 531 532; CHECK-LABEL: load_i8_i64_z_with_folded_or_offset: 533; CHECK: i64.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}} 534define i64 @load_i8_i64_z_with_folded_or_offset(i32 %x) { 535 %and = and i32 %x, -4 536 %t0 = inttoptr i32 %and to ptr 537 %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2 538 %t1 = load atomic i8, ptr %arrayidx seq_cst, align 1 539 %conv = zext i8 %t1 to i64 540 ret i64 %conv 541} 542 543; When loading from a fixed address, materialize a zero. 544 545; CHECK-LABEL: load_i16_i32_z_from_numeric_address 546; CHECK: i32.const $push0=, 0{{$}} 547; CHECK: i32.atomic.load16_u $push1=, 42($pop0){{$}} 548define i32 @load_i16_i32_z_from_numeric_address() { 549 %s = inttoptr i32 42 to ptr 550 %t = load atomic i16, ptr %s seq_cst, align 2 551 %u = zext i16 %t to i32 552 ret i32 %u 553} 554 555; CHECK-LABEL: load_i8_i32_z_from_global_address 556; CHECK: i32.const $push0=, 0{{$}} 557; CHECK: i32.atomic.load8_u $push1=, gv8($pop0){{$}} 558define i32 @load_i8_i32_z_from_global_address() { 559 %t = load atomic i8, ptr @gv8 seq_cst, align 1 560 %u = zext i8 %t to i32 561 ret i32 %u 562} 563 564; i8 return value should test anyext loads 565 566; CHECK-LABEL: load_i8_i32_retvalue: 567; CHECK: i32.atomic.load8_u $push0=, 0($0){{$}} 568; CHECK-NEXT: return $pop0{{$}} 569define i8 @load_i8_i32_retvalue(ptr %p) { 570 %v = load atomic i8, ptr %p seq_cst, align 1 571 ret i8 %v 572} 573 574;===---------------------------------------------------------------------------- 575; Atomic truncating stores 576;===---------------------------------------------------------------------------- 577 578; Fold an offset into a truncating store. 579 580; CHECK-LABEL: store_i8_i32_with_folded_offset: 581; CHECK: i32.atomic.store8 24($0), $1{{$}} 582define void @store_i8_i32_with_folded_offset(ptr %p, i32 %v) { 583 %q = ptrtoint ptr %p to i32 584 %r = add nuw i32 %q, 24 585 %s = inttoptr i32 %r to ptr 586 %t = trunc i32 %v to i8 587 store atomic i8 %t, ptr %s seq_cst, align 1 588 ret void 589} 590 591; CHECK-LABEL: store_i32_i64_with_folded_offset: 592; CHECK: i64.atomic.store32 24($0), $1{{$}} 593define void @store_i32_i64_with_folded_offset(ptr %p, i64 %v) { 594 %q = ptrtoint ptr %p to i32 595 %r = add nuw i32 %q, 24 596 %s = inttoptr i32 %r to ptr 597 %t = trunc i64 %v to i32 598 store atomic i32 %t, ptr %s seq_cst, align 4 599 ret void 600} 601 602; Fold a gep offset into a truncating store. 603 604; CHECK-LABEL: store_i8_i32_with_folded_gep_offset: 605; CHECK: i32.atomic.store8 24($0), $1{{$}} 606define void @store_i8_i32_with_folded_gep_offset(ptr %p, i32 %v) { 607 %s = getelementptr inbounds i8, ptr %p, i32 24 608 %t = trunc i32 %v to i8 609 store atomic i8 %t, ptr %s seq_cst, align 1 610 ret void 611} 612 613; CHECK-LABEL: store_i16_i32_with_folded_gep_offset: 614; CHECK: i32.atomic.store16 48($0), $1{{$}} 615define void @store_i16_i32_with_folded_gep_offset(ptr %p, i32 %v) { 616 %s = getelementptr inbounds i16, ptr %p, i32 24 617 %t = trunc i32 %v to i16 618 store atomic i16 %t, ptr %s seq_cst, align 2 619 ret void 620} 621 622; CHECK-LABEL: store_i16_i64_with_folded_gep_offset: 623; CHECK: i64.atomic.store16 48($0), $1{{$}} 624define void @store_i16_i64_with_folded_gep_offset(ptr %p, i64 %v) { 625 %s = getelementptr inbounds i16, ptr %p, i32 24 626 %t = trunc i64 %v to i16 627 store atomic i16 %t, ptr %s seq_cst, align 2 628 ret void 629} 630 631; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as 632; an 'add' if the or'ed bits are known to be zero. 633 634; CHECK-LABEL: store_i8_i32_with_folded_or_offset: 635; CHECK: i32.atomic.store8 2($pop{{[0-9]+}}), $1{{$}} 636define void @store_i8_i32_with_folded_or_offset(i32 %x, i32 %v) { 637 %and = and i32 %x, -4 638 %p = inttoptr i32 %and to ptr 639 %arrayidx = getelementptr inbounds i8, ptr %p, i32 2 640 %t = trunc i32 %v to i8 641 store atomic i8 %t, ptr %arrayidx seq_cst, align 1 642 ret void 643} 644 645; CHECK-LABEL: store_i8_i64_with_folded_or_offset: 646; CHECK: i64.atomic.store8 2($pop{{[0-9]+}}), $1{{$}} 647define void @store_i8_i64_with_folded_or_offset(i32 %x, i64 %v) { 648 %and = and i32 %x, -4 649 %p = inttoptr i32 %and to ptr 650 %arrayidx = getelementptr inbounds i8, ptr %p, i32 2 651 %t = trunc i64 %v to i8 652 store atomic i8 %t, ptr %arrayidx seq_cst, align 1 653 ret void 654} 655 656;===---------------------------------------------------------------------------- 657; Atomic binary read-modify-writes: 32-bit 658;===---------------------------------------------------------------------------- 659 660; There are several RMW instructions, but here we only test 'add' as an example. 661 662; Basic RMW. 663 664; CHECK-LABEL: rmw_add_i32_no_offset: 665; CHECK-NEXT: .functype rmw_add_i32_no_offset (i32, i32) -> (i32){{$}} 666; CHECK: i32.atomic.rmw.add $push0=, 0($0), $1{{$}} 667; CHECK-NEXT: return $pop0{{$}} 668define i32 @rmw_add_i32_no_offset(ptr %p, i32 %v) { 669 %old = atomicrmw add ptr %p, i32 %v seq_cst 670 ret i32 %old 671} 672 673; With an nuw add, we can fold an offset. 674 675; CHECK-LABEL: rmw_add_i32_with_folded_offset: 676; CHECK: i32.atomic.rmw.add $push0=, 24($0), $1{{$}} 677define i32 @rmw_add_i32_with_folded_offset(ptr %p, i32 %v) { 678 %q = ptrtoint ptr %p to i32 679 %r = add nuw i32 %q, 24 680 %s = inttoptr i32 %r to ptr 681 %old = atomicrmw add ptr %s, i32 %v seq_cst 682 ret i32 %old 683} 684 685; With an inbounds gep, we can fold an offset. 686 687; CHECK-LABEL: rmw_add_i32_with_folded_gep_offset: 688; CHECK: i32.atomic.rmw.add $push0=, 24($0), $1{{$}} 689define i32 @rmw_add_i32_with_folded_gep_offset(ptr %p, i32 %v) { 690 %s = getelementptr inbounds i32, ptr %p, i32 6 691 %old = atomicrmw add ptr %s, i32 %v seq_cst 692 ret i32 %old 693} 694 695; We can't fold a negative offset though, even with an inbounds gep. 696 697; CHECK-LABEL: rmw_add_i32_with_unfolded_gep_negative_offset: 698; CHECK: i32.const $push0=, -24{{$}} 699; CHECK: i32.add $push1=, $0, $pop0{{$}} 700; CHECK: i32.atomic.rmw.add $push2=, 0($pop1), $1{{$}} 701define i32 @rmw_add_i32_with_unfolded_gep_negative_offset(ptr %p, i32 %v) { 702 %s = getelementptr inbounds i32, ptr %p, i32 -6 703 %old = atomicrmw add ptr %s, i32 %v seq_cst 704 ret i32 %old 705} 706 707; Without nuw, and even with nsw, we can't fold an offset. 708 709; CHECK-LABEL: rmw_add_i32_with_unfolded_offset: 710; CHECK: i32.const $push0=, 24{{$}} 711; CHECK: i32.add $push1=, $0, $pop0{{$}} 712; CHECK: i32.atomic.rmw.add $push2=, 0($pop1), $1{{$}} 713define i32 @rmw_add_i32_with_unfolded_offset(ptr %p, i32 %v) { 714 %q = ptrtoint ptr %p to i32 715 %r = add nsw i32 %q, 24 716 %s = inttoptr i32 %r to ptr 717 %old = atomicrmw add ptr %s, i32 %v seq_cst 718 ret i32 %old 719} 720 721; Without inbounds, we can't fold a gep offset. 722 723; CHECK-LABEL: rmw_add_i32_with_unfolded_gep_offset: 724; CHECK: i32.const $push0=, 24{{$}} 725; CHECK: i32.add $push1=, $0, $pop0{{$}} 726; CHECK: i32.atomic.rmw.add $push2=, 0($pop1), $1{{$}} 727define i32 @rmw_add_i32_with_unfolded_gep_offset(ptr %p, i32 %v) { 728 %s = getelementptr i32, ptr %p, i32 6 729 %old = atomicrmw add ptr %s, i32 %v seq_cst 730 ret i32 %old 731} 732 733; When loading from a fixed address, materialize a zero. 734 735; CHECK-LABEL: rmw_add_i32_from_numeric_address 736; CHECK: i32.const $push0=, 0{{$}} 737; CHECK: i32.atomic.rmw.add $push1=, 42($pop0), $0{{$}} 738define i32 @rmw_add_i32_from_numeric_address(i32 %v) { 739 %s = inttoptr i32 42 to ptr 740 %old = atomicrmw add ptr %s, i32 %v seq_cst 741 ret i32 %old 742} 743 744; CHECK-LABEL: rmw_add_i32_from_global_address 745; CHECK: i32.const $push0=, 0{{$}} 746; CHECK: i32.atomic.rmw.add $push1=, gv($pop0), $0{{$}} 747define i32 @rmw_add_i32_from_global_address(i32 %v) { 748 %old = atomicrmw add ptr @gv, i32 %v seq_cst 749 ret i32 %old 750} 751 752;===---------------------------------------------------------------------------- 753; Atomic binary read-modify-writes: 64-bit 754;===---------------------------------------------------------------------------- 755 756; Basic RMW. 757 758; CHECK-LABEL: rmw_add_i64_no_offset: 759; CHECK-NEXT: .functype rmw_add_i64_no_offset (i32, i64) -> (i64){{$}} 760; CHECK: i64.atomic.rmw.add $push0=, 0($0), $1{{$}} 761; CHECK-NEXT: return $pop0{{$}} 762define i64 @rmw_add_i64_no_offset(ptr %p, i64 %v) { 763 %old = atomicrmw add ptr %p, i64 %v seq_cst 764 ret i64 %old 765} 766 767; With an nuw add, we can fold an offset. 768 769; CHECK-LABEL: rmw_add_i64_with_folded_offset: 770; CHECK: i64.atomic.rmw.add $push0=, 24($0), $1{{$}} 771define i64 @rmw_add_i64_with_folded_offset(ptr %p, i64 %v) { 772 %q = ptrtoint ptr %p to i32 773 %r = add nuw i32 %q, 24 774 %s = inttoptr i32 %r to ptr 775 %old = atomicrmw add ptr %s, i64 %v seq_cst 776 ret i64 %old 777} 778 779; With an inbounds gep, we can fold an offset. 780 781; CHECK-LABEL: rmw_add_i64_with_folded_gep_offset: 782; CHECK: i64.atomic.rmw.add $push0=, 24($0), $1{{$}} 783define i64 @rmw_add_i64_with_folded_gep_offset(ptr %p, i64 %v) { 784 %s = getelementptr inbounds i64, ptr %p, i32 3 785 %old = atomicrmw add ptr %s, i64 %v seq_cst 786 ret i64 %old 787} 788 789; We can't fold a negative offset though, even with an inbounds gep. 790 791; CHECK-LABEL: rmw_add_i64_with_unfolded_gep_negative_offset: 792; CHECK: i32.const $push0=, -24{{$}} 793; CHECK: i32.add $push1=, $0, $pop0{{$}} 794; CHECK: i64.atomic.rmw.add $push2=, 0($pop1), $1{{$}} 795define i64 @rmw_add_i64_with_unfolded_gep_negative_offset(ptr %p, i64 %v) { 796 %s = getelementptr inbounds i64, ptr %p, i32 -3 797 %old = atomicrmw add ptr %s, i64 %v seq_cst 798 ret i64 %old 799} 800 801; Without nuw, and even with nsw, we can't fold an offset. 802 803; CHECK-LABEL: rmw_add_i64_with_unfolded_offset: 804; CHECK: i32.const $push0=, 24{{$}} 805; CHECK: i32.add $push1=, $0, $pop0{{$}} 806; CHECK: i64.atomic.rmw.add $push2=, 0($pop1), $1{{$}} 807define i64 @rmw_add_i64_with_unfolded_offset(ptr %p, i64 %v) { 808 %q = ptrtoint ptr %p to i32 809 %r = add nsw i32 %q, 24 810 %s = inttoptr i32 %r to ptr 811 %old = atomicrmw add ptr %s, i64 %v seq_cst 812 ret i64 %old 813} 814 815; Without inbounds, we can't fold a gep offset. 816 817; CHECK-LABEL: rmw_add_i64_with_unfolded_gep_offset: 818; CHECK: i32.const $push0=, 24{{$}} 819; CHECK: i32.add $push1=, $0, $pop0{{$}} 820; CHECK: i64.atomic.rmw.add $push2=, 0($pop1), $1{{$}} 821define i64 @rmw_add_i64_with_unfolded_gep_offset(ptr %p, i64 %v) { 822 %s = getelementptr i64, ptr %p, i32 3 823 %old = atomicrmw add ptr %s, i64 %v seq_cst 824 ret i64 %old 825} 826 827;===---------------------------------------------------------------------------- 828; Atomic truncating & sign-extending binary RMWs 829;===---------------------------------------------------------------------------- 830 831; Fold an offset into a sign-extending rmw. 832 833; CHECK-LABEL: rmw_add_i8_i32_s_with_folded_offset: 834; CHECK: i32.atomic.rmw8.add_u $push0=, 24($0), $1{{$}} 835; CHECK-NEXT: i32.extend8_s $push1=, $pop0 836define i32 @rmw_add_i8_i32_s_with_folded_offset(ptr %p, i32 %v) { 837 %q = ptrtoint ptr %p to i32 838 %r = add nuw i32 %q, 24 839 %s = inttoptr i32 %r to ptr 840 %t = trunc i32 %v to i8 841 %old = atomicrmw add ptr %s, i8 %t seq_cst 842 %u = sext i8 %old to i32 843 ret i32 %u 844} 845 846; 32->64 sext rmw gets selected as i32.atomic.rmw.add, i64.extend_i32_s 847; CHECK-LABEL: rmw_add_i32_i64_s_with_folded_offset: 848; CHECK: i32.wrap_i64 $push0=, $1 849; CHECK-NEXT: i32.atomic.rmw.add $push1=, 24($0), $pop0{{$}} 850; CHECK-NEXT: i64.extend_i32_s $push2=, $pop1{{$}} 851define i64 @rmw_add_i32_i64_s_with_folded_offset(ptr %p, i64 %v) { 852 %q = ptrtoint ptr %p to i32 853 %r = add nuw i32 %q, 24 854 %s = inttoptr i32 %r to ptr 855 %t = trunc i64 %v to i32 856 %old = atomicrmw add ptr %s, i32 %t seq_cst 857 %u = sext i32 %old to i64 858 ret i64 %u 859} 860 861; Fold a gep offset into a sign-extending rmw. 862 863; CHECK-LABEL: rmw_add_i8_i32_s_with_folded_gep_offset: 864; CHECK: i32.atomic.rmw8.add_u $push0=, 24($0), $1{{$}} 865; CHECK-NEXT: i32.extend8_s $push1=, $pop0 866define i32 @rmw_add_i8_i32_s_with_folded_gep_offset(ptr %p, i32 %v) { 867 %s = getelementptr inbounds i8, ptr %p, i32 24 868 %t = trunc i32 %v to i8 869 %old = atomicrmw add ptr %s, i8 %t seq_cst 870 %u = sext i8 %old to i32 871 ret i32 %u 872} 873 874; CHECK-LABEL: rmw_add_i16_i32_s_with_folded_gep_offset: 875; CHECK: i32.atomic.rmw16.add_u $push0=, 48($0), $1{{$}} 876; CHECK-NEXT: i32.extend16_s $push1=, $pop0 877define i32 @rmw_add_i16_i32_s_with_folded_gep_offset(ptr %p, i32 %v) { 878 %s = getelementptr inbounds i16, ptr %p, i32 24 879 %t = trunc i32 %v to i16 880 %old = atomicrmw add ptr %s, i16 %t seq_cst 881 %u = sext i16 %old to i32 882 ret i32 %u 883} 884 885; CHECK-LABEL: rmw_add_i16_i64_s_with_folded_gep_offset: 886; CHECK: i64.atomic.rmw16.add_u $push0=, 48($0), $1{{$}} 887; CHECK-NEXT: i64.extend16_s $push1=, $pop0 888define i64 @rmw_add_i16_i64_s_with_folded_gep_offset(ptr %p, i64 %v) { 889 %s = getelementptr inbounds i16, ptr %p, i32 24 890 %t = trunc i64 %v to i16 891 %old = atomicrmw add ptr %s, i16 %t seq_cst 892 %u = sext i16 %old to i64 893 ret i64 %u 894} 895 896; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as 897; an 'add' if the or'ed bits are known to be zero. 898 899; CHECK-LABEL: rmw_add_i8_i32_s_with_folded_or_offset: 900; CHECK: i32.atomic.rmw8.add_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}} 901; CHECK-NEXT: i32.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} 902define i32 @rmw_add_i8_i32_s_with_folded_or_offset(i32 %x, i32 %v) { 903 %and = and i32 %x, -4 904 %t0 = inttoptr i32 %and to ptr 905 %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2 906 %t = trunc i32 %v to i8 907 %old = atomicrmw add ptr %arrayidx, i8 %t seq_cst 908 %conv = sext i8 %old to i32 909 ret i32 %conv 910} 911 912; CHECK-LABEL: rmw_add_i8_i64_s_with_folded_or_offset: 913; CHECK: i64.atomic.rmw8.add_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}} 914; CHECK-NEXT: i64.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} 915define i64 @rmw_add_i8_i64_s_with_folded_or_offset(i32 %x, i64 %v) { 916 %and = and i32 %x, -4 917 %t0 = inttoptr i32 %and to ptr 918 %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2 919 %t = trunc i64 %v to i8 920 %old = atomicrmw add ptr %arrayidx, i8 %t seq_cst 921 %conv = sext i8 %old to i64 922 ret i64 %conv 923} 924 925; When loading from a fixed address, materialize a zero. 926 927; CHECK-LABEL: rmw_add_i16_i32_s_from_numeric_address 928; CHECK: i32.const $push0=, 0{{$}} 929; CHECK: i32.atomic.rmw16.add_u $push1=, 42($pop0), $0{{$}} 930; CHECK-NEXT: i32.extend16_s $push2=, $pop1 931define i32 @rmw_add_i16_i32_s_from_numeric_address(i32 %v) { 932 %s = inttoptr i32 42 to ptr 933 %t = trunc i32 %v to i16 934 %old = atomicrmw add ptr %s, i16 %t seq_cst 935 %u = sext i16 %old to i32 936 ret i32 %u 937} 938 939; CHECK-LABEL: rmw_add_i8_i32_s_from_global_address 940; CHECK: i32.const $push0=, 0{{$}} 941; CHECK: i32.atomic.rmw8.add_u $push1=, gv8($pop0), $0{{$}} 942; CHECK-NEXT: i32.extend8_s $push2=, $pop1{{$}} 943define i32 @rmw_add_i8_i32_s_from_global_address(i32 %v) { 944 %t = trunc i32 %v to i8 945 %old = atomicrmw add ptr @gv8, i8 %t seq_cst 946 %u = sext i8 %old to i32 947 ret i32 %u 948} 949 950;===---------------------------------------------------------------------------- 951; Atomic truncating & zero-extending binary RMWs 952;===---------------------------------------------------------------------------- 953 954; Fold an offset into a zero-extending rmw. 955 956; CHECK-LABEL: rmw_add_i8_i32_z_with_folded_offset: 957; CHECK: i32.atomic.rmw8.add_u $push0=, 24($0), $1{{$}} 958define i32 @rmw_add_i8_i32_z_with_folded_offset(ptr %p, i32 %v) { 959 %q = ptrtoint ptr %p to i32 960 %r = add nuw i32 %q, 24 961 %s = inttoptr i32 %r to ptr 962 %t = trunc i32 %v to i8 963 %old = atomicrmw add ptr %s, i8 %t seq_cst 964 %u = zext i8 %old to i32 965 ret i32 %u 966} 967 968; CHECK-LABEL: rmw_add_i32_i64_z_with_folded_offset: 969; CHECK: i64.atomic.rmw32.add_u $push0=, 24($0), $1{{$}} 970define i64 @rmw_add_i32_i64_z_with_folded_offset(ptr %p, i64 %v) { 971 %q = ptrtoint ptr %p to i32 972 %r = add nuw i32 %q, 24 973 %s = inttoptr i32 %r to ptr 974 %t = trunc i64 %v to i32 975 %old = atomicrmw add ptr %s, i32 %t seq_cst 976 %u = zext i32 %old to i64 977 ret i64 %u 978} 979 980; Fold a gep offset into a zero-extending rmw. 981 982; CHECK-LABEL: rmw_add_i8_i32_z_with_folded_gep_offset: 983; CHECK: i32.atomic.rmw8.add_u $push0=, 24($0), $1{{$}} 984define i32 @rmw_add_i8_i32_z_with_folded_gep_offset(ptr %p, i32 %v) { 985 %s = getelementptr inbounds i8, ptr %p, i32 24 986 %t = trunc i32 %v to i8 987 %old = atomicrmw add ptr %s, i8 %t seq_cst 988 %u = zext i8 %old to i32 989 ret i32 %u 990} 991 992; CHECK-LABEL: rmw_add_i16_i32_z_with_folded_gep_offset: 993; CHECK: i32.atomic.rmw16.add_u $push0=, 48($0), $1{{$}} 994define i32 @rmw_add_i16_i32_z_with_folded_gep_offset(ptr %p, i32 %v) { 995 %s = getelementptr inbounds i16, ptr %p, i32 24 996 %t = trunc i32 %v to i16 997 %old = atomicrmw add ptr %s, i16 %t seq_cst 998 %u = zext i16 %old to i32 999 ret i32 %u 1000} 1001 1002; CHECK-LABEL: rmw_add_i16_i64_z_with_folded_gep_offset: 1003; CHECK: i64.atomic.rmw16.add_u $push0=, 48($0), $1{{$}} 1004define i64 @rmw_add_i16_i64_z_with_folded_gep_offset(ptr %p, i64 %v) { 1005 %s = getelementptr inbounds i16, ptr %p, i32 24 1006 %t = trunc i64 %v to i16 1007 %old = atomicrmw add ptr %s, i16 %t seq_cst 1008 %u = zext i16 %old to i64 1009 ret i64 %u 1010} 1011 1012; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as 1013; an 'add' if the or'ed bits are known to be zero. 1014 1015; CHECK-LABEL: rmw_add_i8_i32_z_with_folded_or_offset: 1016; CHECK: i32.atomic.rmw8.add_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}} 1017define i32 @rmw_add_i8_i32_z_with_folded_or_offset(i32 %x, i32 %v) { 1018 %and = and i32 %x, -4 1019 %t0 = inttoptr i32 %and to ptr 1020 %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2 1021 %t = trunc i32 %v to i8 1022 %old = atomicrmw add ptr %arrayidx, i8 %t seq_cst 1023 %conv = zext i8 %old to i32 1024 ret i32 %conv 1025} 1026 1027; CHECK-LABEL: rmw_add_i8_i64_z_with_folded_or_offset: 1028; CHECK: i64.atomic.rmw8.add_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}} 1029define i64 @rmw_add_i8_i64_z_with_folded_or_offset(i32 %x, i64 %v) { 1030 %and = and i32 %x, -4 1031 %t0 = inttoptr i32 %and to ptr 1032 %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2 1033 %t = trunc i64 %v to i8 1034 %old = atomicrmw add ptr %arrayidx, i8 %t seq_cst 1035 %conv = zext i8 %old to i64 1036 ret i64 %conv 1037} 1038 1039; When loading from a fixed address, materialize a zero. 1040 1041; CHECK-LABEL: rmw_add_i16_i32_z_from_numeric_address 1042; CHECK: i32.const $push0=, 0{{$}} 1043; CHECK: i32.atomic.rmw16.add_u $push1=, 42($pop0), $0{{$}} 1044define i32 @rmw_add_i16_i32_z_from_numeric_address(i32 %v) { 1045 %s = inttoptr i32 42 to ptr 1046 %t = trunc i32 %v to i16 1047 %old = atomicrmw add ptr %s, i16 %t seq_cst 1048 %u = zext i16 %old to i32 1049 ret i32 %u 1050} 1051 1052; CHECK-LABEL: rmw_add_i8_i32_z_from_global_address 1053; CHECK: i32.const $push0=, 0{{$}} 1054; CHECK: i32.atomic.rmw8.add_u $push1=, gv8($pop0), $0{{$}} 1055define i32 @rmw_add_i8_i32_z_from_global_address(i32 %v) { 1056 %t = trunc i32 %v to i8 1057 %old = atomicrmw add ptr @gv8, i8 %t seq_cst 1058 %u = zext i8 %old to i32 1059 ret i32 %u 1060} 1061 1062; i8 return value should test anyext RMWs 1063 1064; CHECK-LABEL: rmw_add_i8_i32_retvalue: 1065; CHECK: i32.atomic.rmw8.add_u $push0=, 0($0), $1{{$}} 1066; CHECK-NEXT: return $pop0{{$}} 1067define i8 @rmw_add_i8_i32_retvalue(ptr %p, i32 %v) { 1068 %t = trunc i32 %v to i8 1069 %old = atomicrmw add ptr %p, i8 %t seq_cst 1070 ret i8 %old 1071} 1072 1073;===---------------------------------------------------------------------------- 1074; Atomic ternary read-modify-writes: 32-bit 1075;===---------------------------------------------------------------------------- 1076 1077; Basic RMW. 1078 1079; CHECK-LABEL: cmpxchg_i32_no_offset: 1080; CHECK-NEXT: .functype cmpxchg_i32_no_offset (i32, i32, i32) -> (i32){{$}} 1081; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}} 1082; CHECK-NEXT: return $pop0{{$}} 1083define i32 @cmpxchg_i32_no_offset(ptr %p, i32 %exp, i32 %new) { 1084 %pair = cmpxchg ptr %p, i32 %exp, i32 %new seq_cst seq_cst 1085 %old = extractvalue { i32, i1 } %pair, 0 1086 ret i32 %old 1087} 1088 1089; With an nuw add, we can fold an offset. 1090 1091; CHECK-LABEL: cmpxchg_i32_with_folded_offset: 1092; CHECK: i32.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}} 1093define i32 @cmpxchg_i32_with_folded_offset(ptr %p, i32 %exp, i32 %new) { 1094 %q = ptrtoint ptr %p to i32 1095 %r = add nuw i32 %q, 24 1096 %s = inttoptr i32 %r to ptr 1097 %pair = cmpxchg ptr %s, i32 %exp, i32 %new seq_cst seq_cst 1098 %old = extractvalue { i32, i1 } %pair, 0 1099 ret i32 %old 1100} 1101 1102; With an inbounds gep, we can fold an offset. 1103 1104; CHECK-LABEL: cmpxchg_i32_with_folded_gep_offset: 1105; CHECK: i32.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}} 1106define i32 @cmpxchg_i32_with_folded_gep_offset(ptr %p, i32 %exp, i32 %new) { 1107 %s = getelementptr inbounds i32, ptr %p, i32 6 1108 %pair = cmpxchg ptr %s, i32 %exp, i32 %new seq_cst seq_cst 1109 %old = extractvalue { i32, i1 } %pair, 0 1110 ret i32 %old 1111} 1112 1113; We can't fold a negative offset though, even with an inbounds gep. 1114 1115; CHECK-LABEL: cmpxchg_i32_with_unfolded_gep_negative_offset: 1116; CHECK: i32.const $push0=, -24{{$}} 1117; CHECK: i32.add $push1=, $0, $pop0{{$}} 1118; CHECK: i32.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}} 1119define i32 @cmpxchg_i32_with_unfolded_gep_negative_offset(ptr %p, i32 %exp, i32 %new) { 1120 %s = getelementptr inbounds i32, ptr %p, i32 -6 1121 %pair = cmpxchg ptr %s, i32 %exp, i32 %new seq_cst seq_cst 1122 %old = extractvalue { i32, i1 } %pair, 0 1123 ret i32 %old 1124} 1125 1126; Without nuw, and even with nsw, we can't fold an offset. 1127 1128; CHECK-LABEL: cmpxchg_i32_with_unfolded_offset: 1129; CHECK: i32.const $push0=, 24{{$}} 1130; CHECK: i32.add $push1=, $0, $pop0{{$}} 1131; CHECK: i32.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}} 1132define i32 @cmpxchg_i32_with_unfolded_offset(ptr %p, i32 %exp, i32 %new) { 1133 %q = ptrtoint ptr %p to i32 1134 %r = add nsw i32 %q, 24 1135 %s = inttoptr i32 %r to ptr 1136 %pair = cmpxchg ptr %s, i32 %exp, i32 %new seq_cst seq_cst 1137 %old = extractvalue { i32, i1 } %pair, 0 1138 ret i32 %old 1139} 1140 1141; Without inbounds, we can't fold a gep offset. 1142 1143; CHECK-LABEL: cmpxchg_i32_with_unfolded_gep_offset: 1144; CHECK: i32.const $push0=, 24{{$}} 1145; CHECK: i32.add $push1=, $0, $pop0{{$}} 1146; CHECK: i32.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}} 1147define i32 @cmpxchg_i32_with_unfolded_gep_offset(ptr %p, i32 %exp, i32 %new) { 1148 %s = getelementptr i32, ptr %p, i32 6 1149 %pair = cmpxchg ptr %s, i32 %exp, i32 %new seq_cst seq_cst 1150 %old = extractvalue { i32, i1 } %pair, 0 1151 ret i32 %old 1152} 1153 1154; When loading from a fixed address, materialize a zero. 1155 1156; CHECK-LABEL: cmpxchg_i32_from_numeric_address 1157; CHECK: i32.const $push0=, 0{{$}} 1158; CHECK: i32.atomic.rmw.cmpxchg $push1=, 42($pop0), $0, $1{{$}} 1159define i32 @cmpxchg_i32_from_numeric_address(i32 %exp, i32 %new) { 1160 %s = inttoptr i32 42 to ptr 1161 %pair = cmpxchg ptr %s, i32 %exp, i32 %new seq_cst seq_cst 1162 %old = extractvalue { i32, i1 } %pair, 0 1163 ret i32 %old 1164} 1165 1166; CHECK-LABEL: cmpxchg_i32_from_global_address 1167; CHECK: i32.const $push0=, 0{{$}} 1168; CHECK: i32.atomic.rmw.cmpxchg $push1=, gv($pop0), $0, $1{{$}} 1169define i32 @cmpxchg_i32_from_global_address(i32 %exp, i32 %new) { 1170 %pair = cmpxchg ptr @gv, i32 %exp, i32 %new seq_cst seq_cst 1171 %old = extractvalue { i32, i1 } %pair, 0 1172 ret i32 %old 1173} 1174 1175;===---------------------------------------------------------------------------- 1176; Atomic ternary read-modify-writes: 64-bit 1177;===---------------------------------------------------------------------------- 1178 1179; Basic RMW. 1180 1181; CHECK-LABEL: cmpxchg_i64_no_offset: 1182; CHECK-NEXT: .functype cmpxchg_i64_no_offset (i32, i64, i64) -> (i64){{$}} 1183; CHECK: i64.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}} 1184; CHECK-NEXT: return $pop0{{$}} 1185define i64 @cmpxchg_i64_no_offset(ptr %p, i64 %exp, i64 %new) { 1186 %pair = cmpxchg ptr %p, i64 %exp, i64 %new seq_cst seq_cst 1187 %old = extractvalue { i64, i1 } %pair, 0 1188 ret i64 %old 1189} 1190 1191; With an nuw add, we can fold an offset. 1192 1193; CHECK-LABEL: cmpxchg_i64_with_folded_offset: 1194; CHECK: i64.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}} 1195define i64 @cmpxchg_i64_with_folded_offset(ptr %p, i64 %exp, i64 %new) { 1196 %q = ptrtoint ptr %p to i32 1197 %r = add nuw i32 %q, 24 1198 %s = inttoptr i32 %r to ptr 1199 %pair = cmpxchg ptr %s, i64 %exp, i64 %new seq_cst seq_cst 1200 %old = extractvalue { i64, i1 } %pair, 0 1201 ret i64 %old 1202} 1203 1204; With an inbounds gep, we can fold an offset. 1205 1206; CHECK-LABEL: cmpxchg_i64_with_folded_gep_offset: 1207; CHECK: i64.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}} 1208define i64 @cmpxchg_i64_with_folded_gep_offset(ptr %p, i64 %exp, i64 %new) { 1209 %s = getelementptr inbounds i64, ptr %p, i32 3 1210 %pair = cmpxchg ptr %s, i64 %exp, i64 %new seq_cst seq_cst 1211 %old = extractvalue { i64, i1 } %pair, 0 1212 ret i64 %old 1213} 1214 1215; We can't fold a negative offset though, even with an inbounds gep. 1216 1217; CHECK-LABEL: cmpxchg_i64_with_unfolded_gep_negative_offset: 1218; CHECK: i32.const $push0=, -24{{$}} 1219; CHECK: i32.add $push1=, $0, $pop0{{$}} 1220; CHECK: i64.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}} 1221define i64 @cmpxchg_i64_with_unfolded_gep_negative_offset(ptr %p, i64 %exp, i64 %new) { 1222 %s = getelementptr inbounds i64, ptr %p, i32 -3 1223 %pair = cmpxchg ptr %s, i64 %exp, i64 %new seq_cst seq_cst 1224 %old = extractvalue { i64, i1 } %pair, 0 1225 ret i64 %old 1226} 1227 1228; Without nuw, and even with nsw, we can't fold an offset. 1229 1230; CHECK-LABEL: cmpxchg_i64_with_unfolded_offset: 1231; CHECK: i32.const $push0=, 24{{$}} 1232; CHECK: i32.add $push1=, $0, $pop0{{$}} 1233; CHECK: i64.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}} 1234define i64 @cmpxchg_i64_with_unfolded_offset(ptr %p, i64 %exp, i64 %new) { 1235 %q = ptrtoint ptr %p to i32 1236 %r = add nsw i32 %q, 24 1237 %s = inttoptr i32 %r to ptr 1238 %pair = cmpxchg ptr %s, i64 %exp, i64 %new seq_cst seq_cst 1239 %old = extractvalue { i64, i1 } %pair, 0 1240 ret i64 %old 1241} 1242 1243; Without inbounds, we can't fold a gep offset. 1244 1245; CHECK-LABEL: cmpxchg_i64_with_unfolded_gep_offset: 1246; CHECK: i32.const $push0=, 24{{$}} 1247; CHECK: i32.add $push1=, $0, $pop0{{$}} 1248; CHECK: i64.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}} 1249define i64 @cmpxchg_i64_with_unfolded_gep_offset(ptr %p, i64 %exp, i64 %new) { 1250 %s = getelementptr i64, ptr %p, i32 3 1251 %pair = cmpxchg ptr %s, i64 %exp, i64 %new seq_cst seq_cst 1252 %old = extractvalue { i64, i1 } %pair, 0 1253 ret i64 %old 1254} 1255 1256;===---------------------------------------------------------------------------- 1257; Atomic truncating & sign-extending ternary RMWs 1258;===---------------------------------------------------------------------------- 1259 1260; Fold an offset into a sign-extending rmw. 1261 1262; CHECK-LABEL: cmpxchg_i8_i32_s_with_folded_offset: 1263; CHECK: i32.atomic.rmw8.cmpxchg_u $push0=, 24($0), $1, $2{{$}} 1264; CHECK-NEXT: i32.extend8_s $push1=, $pop0 1265define i32 @cmpxchg_i8_i32_s_with_folded_offset(ptr %p, i32 %exp, i32 %new) { 1266 %q = ptrtoint ptr %p to i32 1267 %r = add nuw i32 %q, 24 1268 %s = inttoptr i32 %r to ptr 1269 %exp_t = trunc i32 %exp to i8 1270 %new_t = trunc i32 %new to i8 1271 %pair = cmpxchg ptr %s, i8 %exp_t, i8 %new_t seq_cst seq_cst 1272 %old = extractvalue { i8, i1 } %pair, 0 1273 %u = sext i8 %old to i32 1274 ret i32 %u 1275} 1276 1277; 32->64 sext rmw gets selected as i32.atomic.rmw.cmpxchg, i64.extend_i32_s 1278; CHECK-LABEL: cmpxchg_i32_i64_s_with_folded_offset: 1279; CHECK: i32.wrap_i64 $push1=, $1 1280; CHECK-NEXT: i32.wrap_i64 $push0=, $2 1281; CHECK-NEXT: i32.atomic.rmw.cmpxchg $push2=, 24($0), $pop1, $pop0{{$}} 1282; CHECK-NEXT: i64.extend_i32_s $push3=, $pop2{{$}} 1283define i64 @cmpxchg_i32_i64_s_with_folded_offset(ptr %p, i64 %exp, i64 %new) { 1284 %q = ptrtoint ptr %p to i32 1285 %r = add nuw i32 %q, 24 1286 %s = inttoptr i32 %r to ptr 1287 %exp_t = trunc i64 %exp to i32 1288 %new_t = trunc i64 %new to i32 1289 %pair = cmpxchg ptr %s, i32 %exp_t, i32 %new_t seq_cst seq_cst 1290 %old = extractvalue { i32, i1 } %pair, 0 1291 %u = sext i32 %old to i64 1292 ret i64 %u 1293} 1294 1295; Fold a gep offset into a sign-extending rmw. 1296 1297; CHECK-LABEL: cmpxchg_i8_i32_s_with_folded_gep_offset: 1298; CHECK: i32.atomic.rmw8.cmpxchg_u $push0=, 24($0), $1, $2{{$}} 1299; CHECK-NEXT: i32.extend8_s $push1=, $pop0 1300define i32 @cmpxchg_i8_i32_s_with_folded_gep_offset(ptr %p, i32 %exp, i32 %new) { 1301 %s = getelementptr inbounds i8, ptr %p, i32 24 1302 %exp_t = trunc i32 %exp to i8 1303 %new_t = trunc i32 %new to i8 1304 %pair = cmpxchg ptr %s, i8 %exp_t, i8 %new_t seq_cst seq_cst 1305 %old = extractvalue { i8, i1 } %pair, 0 1306 %u = sext i8 %old to i32 1307 ret i32 %u 1308} 1309 1310; CHECK-LABEL: cmpxchg_i16_i32_s_with_folded_gep_offset: 1311; CHECK: i32.atomic.rmw16.cmpxchg_u $push0=, 48($0), $1, $2{{$}} 1312; CHECK-NEXT: i32.extend16_s $push1=, $pop0 1313define i32 @cmpxchg_i16_i32_s_with_folded_gep_offset(ptr %p, i32 %exp, i32 %new) { 1314 %s = getelementptr inbounds i16, ptr %p, i32 24 1315 %exp_t = trunc i32 %exp to i16 1316 %new_t = trunc i32 %new to i16 1317 %pair = cmpxchg ptr %s, i16 %exp_t, i16 %new_t seq_cst seq_cst 1318 %old = extractvalue { i16, i1 } %pair, 0 1319 %u = sext i16 %old to i32 1320 ret i32 %u 1321} 1322 1323; CHECK-LABEL: cmpxchg_i16_i64_s_with_folded_gep_offset: 1324; CHECK: i64.atomic.rmw16.cmpxchg_u $push0=, 48($0), $1, $2{{$}} 1325; CHECK-NEXT: i64.extend16_s $push1=, $pop0 1326define i64 @cmpxchg_i16_i64_s_with_folded_gep_offset(ptr %p, i64 %exp, i64 %new) { 1327 %s = getelementptr inbounds i16, ptr %p, i32 24 1328 %exp_t = trunc i64 %exp to i16 1329 %new_t = trunc i64 %new to i16 1330 %pair = cmpxchg ptr %s, i16 %exp_t, i16 %new_t seq_cst seq_cst 1331 %old = extractvalue { i16, i1 } %pair, 0 1332 %u = sext i16 %old to i64 1333 ret i64 %u 1334} 1335 1336; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as 1337; an 'add' if the or'ed bits are known to be zero. 1338 1339; CHECK-LABEL: cmpxchg_i8_i32_s_with_folded_or_offset: 1340; CHECK: i32.atomic.rmw8.cmpxchg_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}} 1341; CHECK-NEXT: i32.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} 1342define i32 @cmpxchg_i8_i32_s_with_folded_or_offset(i32 %x, i32 %exp, i32 %new) { 1343 %and = and i32 %x, -4 1344 %t0 = inttoptr i32 %and to ptr 1345 %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2 1346 %exp_t = trunc i32 %exp to i8 1347 %new_t = trunc i32 %new to i8 1348 %pair = cmpxchg ptr %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst 1349 %old = extractvalue { i8, i1 } %pair, 0 1350 %conv = sext i8 %old to i32 1351 ret i32 %conv 1352} 1353 1354; CHECK-LABEL: cmpxchg_i8_i64_s_with_folded_or_offset: 1355; CHECK: i64.atomic.rmw8.cmpxchg_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}} 1356; CHECK-NEXT: i64.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} 1357define i64 @cmpxchg_i8_i64_s_with_folded_or_offset(i32 %x, i64 %exp, i64 %new) { 1358 %and = and i32 %x, -4 1359 %t0 = inttoptr i32 %and to ptr 1360 %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2 1361 %exp_t = trunc i64 %exp to i8 1362 %new_t = trunc i64 %new to i8 1363 %pair = cmpxchg ptr %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst 1364 %old = extractvalue { i8, i1 } %pair, 0 1365 %conv = sext i8 %old to i64 1366 ret i64 %conv 1367} 1368 1369; When loading from a fixed address, materialize a zero. 1370 1371; CHECK-LABEL: cmpxchg_i16_i32_s_from_numeric_address 1372; CHECK: i32.const $push0=, 0{{$}} 1373; CHECK: i32.atomic.rmw16.cmpxchg_u $push1=, 42($pop0), $0, $1{{$}} 1374; CHECK-NEXT: i32.extend16_s $push2=, $pop1 1375define i32 @cmpxchg_i16_i32_s_from_numeric_address(i32 %exp, i32 %new) { 1376 %s = inttoptr i32 42 to ptr 1377 %exp_t = trunc i32 %exp to i16 1378 %new_t = trunc i32 %new to i16 1379 %pair = cmpxchg ptr %s, i16 %exp_t, i16 %new_t seq_cst seq_cst 1380 %old = extractvalue { i16, i1 } %pair, 0 1381 %u = sext i16 %old to i32 1382 ret i32 %u 1383} 1384 1385; CHECK-LABEL: cmpxchg_i8_i32_s_from_global_address 1386; CHECK: i32.const $push0=, 0{{$}} 1387; CHECK: i32.atomic.rmw8.cmpxchg_u $push1=, gv8($pop0), $0, $1{{$}} 1388; CHECK-NEXT: i32.extend8_s $push2=, $pop1{{$}} 1389define i32 @cmpxchg_i8_i32_s_from_global_address(i32 %exp, i32 %new) { 1390 %exp_t = trunc i32 %exp to i8 1391 %new_t = trunc i32 %new to i8 1392 %pair = cmpxchg ptr @gv8, i8 %exp_t, i8 %new_t seq_cst seq_cst 1393 %old = extractvalue { i8, i1 } %pair, 0 1394 %u = sext i8 %old to i32 1395 ret i32 %u 1396} 1397 1398;===---------------------------------------------------------------------------- 1399; Atomic truncating & zero-extending ternary RMWs 1400;===---------------------------------------------------------------------------- 1401 1402; Fold an offset into a sign-extending rmw. 1403 1404; CHECK-LABEL: cmpxchg_i8_i32_z_with_folded_offset: 1405; CHECK: i32.atomic.rmw8.cmpxchg_u $push0=, 24($0), $1, $2{{$}} 1406define i32 @cmpxchg_i8_i32_z_with_folded_offset(ptr %p, i32 %exp, i32 %new) { 1407 %q = ptrtoint ptr %p to i32 1408 %r = add nuw i32 %q, 24 1409 %s = inttoptr i32 %r to ptr 1410 %exp_t = trunc i32 %exp to i8 1411 %new_t = trunc i32 %new to i8 1412 %pair = cmpxchg ptr %s, i8 %exp_t, i8 %new_t seq_cst seq_cst 1413 %old = extractvalue { i8, i1 } %pair, 0 1414 %u = zext i8 %old to i32 1415 ret i32 %u 1416} 1417 1418; CHECK-LABEL: cmpxchg_i32_i64_z_with_folded_offset: 1419; CHECK: i64.atomic.rmw32.cmpxchg_u $push0=, 24($0), $1, $2{{$}} 1420define i64 @cmpxchg_i32_i64_z_with_folded_offset(ptr %p, i64 %exp, i64 %new) { 1421 %q = ptrtoint ptr %p to i32 1422 %r = add nuw i32 %q, 24 1423 %s = inttoptr i32 %r to ptr 1424 %exp_t = trunc i64 %exp to i32 1425 %new_t = trunc i64 %new to i32 1426 %pair = cmpxchg ptr %s, i32 %exp_t, i32 %new_t seq_cst seq_cst 1427 %old = extractvalue { i32, i1 } %pair, 0 1428 %u = zext i32 %old to i64 1429 ret i64 %u 1430} 1431 1432; Fold a gep offset into a sign-extending rmw. 1433 1434; CHECK-LABEL: cmpxchg_i8_i32_z_with_folded_gep_offset: 1435; CHECK: i32.atomic.rmw8.cmpxchg_u $push0=, 24($0), $1, $2{{$}} 1436define i32 @cmpxchg_i8_i32_z_with_folded_gep_offset(ptr %p, i32 %exp, i32 %new) { 1437 %s = getelementptr inbounds i8, ptr %p, i32 24 1438 %exp_t = trunc i32 %exp to i8 1439 %new_t = trunc i32 %new to i8 1440 %pair = cmpxchg ptr %s, i8 %exp_t, i8 %new_t seq_cst seq_cst 1441 %old = extractvalue { i8, i1 } %pair, 0 1442 %u = zext i8 %old to i32 1443 ret i32 %u 1444} 1445 1446; CHECK-LABEL: cmpxchg_i16_i32_z_with_folded_gep_offset: 1447; CHECK: i32.atomic.rmw16.cmpxchg_u $push0=, 48($0), $1, $2{{$}} 1448define i32 @cmpxchg_i16_i32_z_with_folded_gep_offset(ptr %p, i32 %exp, i32 %new) { 1449 %s = getelementptr inbounds i16, ptr %p, i32 24 1450 %exp_t = trunc i32 %exp to i16 1451 %new_t = trunc i32 %new to i16 1452 %pair = cmpxchg ptr %s, i16 %exp_t, i16 %new_t seq_cst seq_cst 1453 %old = extractvalue { i16, i1 } %pair, 0 1454 %u = zext i16 %old to i32 1455 ret i32 %u 1456} 1457 1458; CHECK-LABEL: cmpxchg_i16_i64_z_with_folded_gep_offset: 1459; CHECK: i64.atomic.rmw16.cmpxchg_u $push0=, 48($0), $1, $2{{$}} 1460define i64 @cmpxchg_i16_i64_z_with_folded_gep_offset(ptr %p, i64 %exp, i64 %new) { 1461 %s = getelementptr inbounds i16, ptr %p, i32 24 1462 %exp_t = trunc i64 %exp to i16 1463 %new_t = trunc i64 %new to i16 1464 %pair = cmpxchg ptr %s, i16 %exp_t, i16 %new_t seq_cst seq_cst 1465 %old = extractvalue { i16, i1 } %pair, 0 1466 %u = zext i16 %old to i64 1467 ret i64 %u 1468} 1469 1470; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as 1471; an 'add' if the or'ed bits are known to be zero. 1472 1473; CHECK-LABEL: cmpxchg_i8_i32_z_with_folded_or_offset: 1474; CHECK: i32.atomic.rmw8.cmpxchg_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}} 1475define i32 @cmpxchg_i8_i32_z_with_folded_or_offset(i32 %x, i32 %exp, i32 %new) { 1476 %and = and i32 %x, -4 1477 %t0 = inttoptr i32 %and to ptr 1478 %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2 1479 %exp_t = trunc i32 %exp to i8 1480 %new_t = trunc i32 %new to i8 1481 %pair = cmpxchg ptr %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst 1482 %old = extractvalue { i8, i1 } %pair, 0 1483 %conv = zext i8 %old to i32 1484 ret i32 %conv 1485} 1486 1487; CHECK-LABEL: cmpxchg_i8_i64_z_with_folded_or_offset: 1488; CHECK: i64.atomic.rmw8.cmpxchg_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}} 1489define i64 @cmpxchg_i8_i64_z_with_folded_or_offset(i32 %x, i64 %exp, i64 %new) { 1490 %and = and i32 %x, -4 1491 %t0 = inttoptr i32 %and to ptr 1492 %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2 1493 %exp_t = trunc i64 %exp to i8 1494 %new_t = trunc i64 %new to i8 1495 %pair = cmpxchg ptr %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst 1496 %old = extractvalue { i8, i1 } %pair, 0 1497 %conv = zext i8 %old to i64 1498 ret i64 %conv 1499} 1500 1501; When loading from a fixed address, materialize a zero. 1502 1503; CHECK-LABEL: cmpxchg_i16_i32_z_from_numeric_address 1504; CHECK: i32.const $push0=, 0{{$}} 1505; CHECK: i32.atomic.rmw16.cmpxchg_u $push1=, 42($pop0), $0, $1{{$}} 1506define i32 @cmpxchg_i16_i32_z_from_numeric_address(i32 %exp, i32 %new) { 1507 %s = inttoptr i32 42 to ptr 1508 %exp_t = trunc i32 %exp to i16 1509 %new_t = trunc i32 %new to i16 1510 %pair = cmpxchg ptr %s, i16 %exp_t, i16 %new_t seq_cst seq_cst 1511 %old = extractvalue { i16, i1 } %pair, 0 1512 %u = zext i16 %old to i32 1513 ret i32 %u 1514} 1515 1516; CHECK-LABEL: cmpxchg_i8_i32_z_from_global_address 1517; CHECK: i32.const $push0=, 0{{$}} 1518; CHECK: i32.atomic.rmw8.cmpxchg_u $push1=, gv8($pop0), $0, $1{{$}} 1519define i32 @cmpxchg_i8_i32_z_from_global_address(i32 %exp, i32 %new) { 1520 %exp_t = trunc i32 %exp to i8 1521 %new_t = trunc i32 %new to i8 1522 %pair = cmpxchg ptr @gv8, i8 %exp_t, i8 %new_t seq_cst seq_cst 1523 %old = extractvalue { i8, i1 } %pair, 0 1524 %u = zext i8 %old to i32 1525 ret i32 %u 1526} 1527 1528;===---------------------------------------------------------------------------- 1529; Waits: 32-bit 1530;===---------------------------------------------------------------------------- 1531 1532declare i32 @llvm.wasm.memory.atomic.wait32(ptr, i32, i64) 1533 1534; Basic wait. 1535 1536; CHECK-LABEL: wait32_no_offset: 1537; CHECK: memory.atomic.wait32 $push0=, 0($0), $1, $2{{$}} 1538; CHECK-NEXT: return $pop0{{$}} 1539define i32 @wait32_no_offset(ptr %p, i32 %exp, i64 %timeout) { 1540 %v = call i32 @llvm.wasm.memory.atomic.wait32(ptr %p, i32 %exp, i64 %timeout) 1541 ret i32 %v 1542} 1543 1544; With an nuw add, we can fold an offset. 1545 1546; CHECK-LABEL: wait32_with_folded_offset: 1547; CHECK: memory.atomic.wait32 $push0=, 24($0), $1, $2{{$}} 1548define i32 @wait32_with_folded_offset(ptr %p, i32 %exp, i64 %timeout) { 1549 %q = ptrtoint ptr %p to i32 1550 %r = add nuw i32 %q, 24 1551 %s = inttoptr i32 %r to ptr 1552 %t = call i32 @llvm.wasm.memory.atomic.wait32(ptr %s, i32 %exp, i64 %timeout) 1553 ret i32 %t 1554} 1555 1556; With an inbounds gep, we can fold an offset. 1557 1558; CHECK-LABEL: wait32_with_folded_gep_offset: 1559; CHECK: memory.atomic.wait32 $push0=, 24($0), $1, $2{{$}} 1560define i32 @wait32_with_folded_gep_offset(ptr %p, i32 %exp, i64 %timeout) { 1561 %s = getelementptr inbounds i32, ptr %p, i32 6 1562 %t = call i32 @llvm.wasm.memory.atomic.wait32(ptr %s, i32 %exp, i64 %timeout) 1563 ret i32 %t 1564} 1565 1566; We can't fold a negative offset though, even with an inbounds gep. 1567 1568; CHECK-LABEL: wait32_with_unfolded_gep_negative_offset: 1569; CHECK: i32.const $push0=, -24{{$}} 1570; CHECK: i32.add $push1=, $0, $pop0{{$}} 1571; CHECK: memory.atomic.wait32 $push2=, 0($pop1), $1, $2{{$}} 1572define i32 @wait32_with_unfolded_gep_negative_offset(ptr %p, i32 %exp, i64 %timeout) { 1573 %s = getelementptr inbounds i32, ptr %p, i32 -6 1574 %t = call i32 @llvm.wasm.memory.atomic.wait32(ptr %s, i32 %exp, i64 %timeout) 1575 ret i32 %t 1576} 1577 1578; Without nuw, and even with nsw, we can't fold an offset. 1579 1580; CHECK-LABEL: wait32_with_unfolded_offset: 1581; CHECK: i32.const $push0=, 24{{$}} 1582; CHECK: i32.add $push1=, $0, $pop0{{$}} 1583; CHECK: memory.atomic.wait32 $push2=, 0($pop1), $1, $2{{$}} 1584define i32 @wait32_with_unfolded_offset(ptr %p, i32 %exp, i64 %timeout) { 1585 %q = ptrtoint ptr %p to i32 1586 %r = add nsw i32 %q, 24 1587 %s = inttoptr i32 %r to ptr 1588 %t = call i32 @llvm.wasm.memory.atomic.wait32(ptr %s, i32 %exp, i64 %timeout) 1589 ret i32 %t 1590} 1591 1592; Without inbounds, we can't fold a gep offset. 1593 1594; CHECK-LABEL: wait32_with_unfolded_gep_offset: 1595; CHECK: i32.const $push0=, 24{{$}} 1596; CHECK: i32.add $push1=, $0, $pop0{{$}} 1597; CHECK: memory.atomic.wait32 $push2=, 0($pop1), $1, $2{{$}} 1598define i32 @wait32_with_unfolded_gep_offset(ptr %p, i32 %exp, i64 %timeout) { 1599 %s = getelementptr i32, ptr %p, i32 6 1600 %t = call i32 @llvm.wasm.memory.atomic.wait32(ptr %s, i32 %exp, i64 %timeout) 1601 ret i32 %t 1602} 1603 1604; When waiting from a fixed address, materialize a zero. 1605 1606; CHECK-LABEL: wait32_from_numeric_address 1607; CHECK: i32.const $push0=, 0{{$}} 1608; CHECK: memory.atomic.wait32 $push1=, 42($pop0), $0, $1{{$}} 1609define i32 @wait32_from_numeric_address(i32 %exp, i64 %timeout) { 1610 %s = inttoptr i32 42 to ptr 1611 %t = call i32 @llvm.wasm.memory.atomic.wait32(ptr %s, i32 %exp, i64 %timeout) 1612 ret i32 %t 1613} 1614 1615; CHECK-LABEL: wait32_from_global_address 1616; CHECK: i32.const $push0=, 0{{$}} 1617; CHECK: memory.atomic.wait32 $push1=, gv($pop0), $0, $1{{$}} 1618define i32 @wait32_from_global_address(i32 %exp, i64 %timeout) { 1619 %t = call i32 @llvm.wasm.memory.atomic.wait32(ptr @gv, i32 %exp, i64 %timeout) 1620 ret i32 %t 1621} 1622 1623;===---------------------------------------------------------------------------- 1624; Waits: 64-bit 1625;===---------------------------------------------------------------------------- 1626 1627declare i32 @llvm.wasm.memory.atomic.wait64(ptr, i64, i64) 1628 1629; Basic wait. 1630 1631; CHECK-LABEL: wait64_no_offset: 1632; CHECK: memory.atomic.wait64 $push0=, 0($0), $1, $2{{$}} 1633; CHECK-NEXT: return $pop0{{$}} 1634define i32 @wait64_no_offset(ptr %p, i64 %exp, i64 %timeout) { 1635 %v = call i32 @llvm.wasm.memory.atomic.wait64(ptr %p, i64 %exp, i64 %timeout) 1636 ret i32 %v 1637} 1638 1639; With an nuw add, we can fold an offset. 1640 1641; CHECK-LABEL: wait64_with_folded_offset: 1642; CHECK: memory.atomic.wait64 $push0=, 24($0), $1, $2{{$}} 1643define i32 @wait64_with_folded_offset(ptr %p, i64 %exp, i64 %timeout) { 1644 %q = ptrtoint ptr %p to i32 1645 %r = add nuw i32 %q, 24 1646 %s = inttoptr i32 %r to ptr 1647 %t = call i32 @llvm.wasm.memory.atomic.wait64(ptr %s, i64 %exp, i64 %timeout) 1648 ret i32 %t 1649} 1650 1651; With an inbounds gep, we can fold an offset. 1652 1653; CHECK-LABEL: wait64_with_folded_gep_offset: 1654; CHECK: memory.atomic.wait64 $push0=, 24($0), $1, $2{{$}} 1655define i32 @wait64_with_folded_gep_offset(ptr %p, i64 %exp, i64 %timeout) { 1656 %s = getelementptr inbounds i64, ptr %p, i32 3 1657 %t = call i32 @llvm.wasm.memory.atomic.wait64(ptr %s, i64 %exp, i64 %timeout) 1658 ret i32 %t 1659} 1660 1661; We can't fold a negative offset though, even with an inbounds gep. 1662 1663; CHECK-LABEL: wait64_with_unfolded_gep_negative_offset: 1664; CHECK: i32.const $push0=, -24{{$}} 1665; CHECK: i32.add $push1=, $0, $pop0{{$}} 1666; CHECK: memory.atomic.wait64 $push2=, 0($pop1), $1, $2{{$}} 1667define i32 @wait64_with_unfolded_gep_negative_offset(ptr %p, i64 %exp, i64 %timeout) { 1668 %s = getelementptr inbounds i64, ptr %p, i32 -3 1669 %t = call i32 @llvm.wasm.memory.atomic.wait64(ptr %s, i64 %exp, i64 %timeout) 1670 ret i32 %t 1671} 1672 1673; Without nuw, and even with nsw, we can't fold an offset. 1674 1675; CHECK-LABEL: wait64_with_unfolded_offset: 1676; CHECK: i32.const $push0=, 24{{$}} 1677; CHECK: i32.add $push1=, $0, $pop0{{$}} 1678; CHECK: memory.atomic.wait64 $push2=, 0($pop1), $1, $2{{$}} 1679define i32 @wait64_with_unfolded_offset(ptr %p, i64 %exp, i64 %timeout) { 1680 %q = ptrtoint ptr %p to i32 1681 %r = add nsw i32 %q, 24 1682 %s = inttoptr i32 %r to ptr 1683 %t = call i32 @llvm.wasm.memory.atomic.wait64(ptr %s, i64 %exp, i64 %timeout) 1684 ret i32 %t 1685} 1686 1687; Without inbounds, we can't fold a gep offset. 1688 1689; CHECK-LABEL: wait64_with_unfolded_gep_offset: 1690; CHECK: i32.const $push0=, 24{{$}} 1691; CHECK: i32.add $push1=, $0, $pop0{{$}} 1692; CHECK: memory.atomic.wait64 $push2=, 0($pop1), $1, $2{{$}} 1693define i32 @wait64_with_unfolded_gep_offset(ptr %p, i64 %exp, i64 %timeout) { 1694 %s = getelementptr i64, ptr %p, i32 3 1695 %t = call i32 @llvm.wasm.memory.atomic.wait64(ptr %s, i64 %exp, i64 %timeout) 1696 ret i32 %t 1697} 1698 1699;===---------------------------------------------------------------------------- 1700; Notifies 1701;===---------------------------------------------------------------------------- 1702 1703declare i32 @llvm.wasm.memory.atomic.notify(ptr, i32) 1704 1705; Basic notify. 1706 1707; CHECK-LABEL: notify_no_offset: 1708; CHECK: memory.atomic.notify $push0=, 0($0), $1{{$}} 1709; CHECK-NEXT: return $pop0{{$}} 1710define i32 @notify_no_offset(ptr %p, i32 %notify_count) { 1711 %v = call i32 @llvm.wasm.memory.atomic.notify(ptr %p, i32 %notify_count) 1712 ret i32 %v 1713} 1714 1715; With an nuw add, we can fold an offset. 1716 1717; CHECK-LABEL: notify_with_folded_offset: 1718; CHECK: memory.atomic.notify $push0=, 24($0), $1{{$}} 1719define i32 @notify_with_folded_offset(ptr %p, i32 %notify_count) { 1720 %q = ptrtoint ptr %p to i32 1721 %r = add nuw i32 %q, 24 1722 %s = inttoptr i32 %r to ptr 1723 %t = call i32 @llvm.wasm.memory.atomic.notify(ptr %s, i32 %notify_count) 1724 ret i32 %t 1725} 1726 1727; With an inbounds gep, we can fold an offset. 1728 1729; CHECK-LABEL: notify_with_folded_gep_offset: 1730; CHECK: memory.atomic.notify $push0=, 24($0), $1{{$}} 1731define i32 @notify_with_folded_gep_offset(ptr %p, i32 %notify_count) { 1732 %s = getelementptr inbounds i32, ptr %p, i32 6 1733 %t = call i32 @llvm.wasm.memory.atomic.notify(ptr %s, i32 %notify_count) 1734 ret i32 %t 1735} 1736 1737; We can't fold a negative offset though, even with an inbounds gep. 1738 1739; CHECK-LABEL: notify_with_unfolded_gep_negative_offset: 1740; CHECK: i32.const $push0=, -24{{$}} 1741; CHECK: i32.add $push1=, $0, $pop0{{$}} 1742; CHECK: memory.atomic.notify $push2=, 0($pop1), $1{{$}} 1743define i32 @notify_with_unfolded_gep_negative_offset(ptr %p, i32 %notify_count) { 1744 %s = getelementptr inbounds i32, ptr %p, i32 -6 1745 %t = call i32 @llvm.wasm.memory.atomic.notify(ptr %s, i32 %notify_count) 1746 ret i32 %t 1747} 1748 1749; Without nuw, and even with nsw, we can't fold an offset. 1750 1751; CHECK-LABEL: notify_with_unfolded_offset: 1752; CHECK: i32.const $push0=, 24{{$}} 1753; CHECK: i32.add $push1=, $0, $pop0{{$}} 1754; CHECK: memory.atomic.notify $push2=, 0($pop1), $1{{$}} 1755define i32 @notify_with_unfolded_offset(ptr %p, i32 %notify_count) { 1756 %q = ptrtoint ptr %p to i32 1757 %r = add nsw i32 %q, 24 1758 %s = inttoptr i32 %r to ptr 1759 %t = call i32 @llvm.wasm.memory.atomic.notify(ptr %s, i32 %notify_count) 1760 ret i32 %t 1761} 1762 1763; Without inbounds, we can't fold a gep offset. 1764 1765; CHECK-LABEL: notify_with_unfolded_gep_offset: 1766; CHECK: i32.const $push0=, 24{{$}} 1767; CHECK: i32.add $push1=, $0, $pop0{{$}} 1768; CHECK: memory.atomic.notify $push2=, 0($pop1), $1{{$}} 1769define i32 @notify_with_unfolded_gep_offset(ptr %p, i32 %notify_count) { 1770 %s = getelementptr i32, ptr %p, i32 6 1771 %t = call i32 @llvm.wasm.memory.atomic.notify(ptr %s, i32 %notify_count) 1772 ret i32 %t 1773} 1774 1775; When notifying from a fixed address, materialize a zero. 1776 1777; CHECK-LABEL: notify_from_numeric_address 1778; CHECK: i32.const $push0=, 0{{$}} 1779; CHECK: memory.atomic.notify $push1=, 42($pop0), $0{{$}} 1780define i32 @notify_from_numeric_address(i32 %notify_count) { 1781 %s = inttoptr i32 42 to ptr 1782 %t = call i32 @llvm.wasm.memory.atomic.notify(ptr %s, i32 %notify_count) 1783 ret i32 %t 1784} 1785 1786; CHECK-LABEL: notify_from_global_address 1787; CHECK: i32.const $push0=, 0{{$}} 1788; CHECK: memory.atomic.notify $push1=, gv($pop0), $0{{$}} 1789define i32 @notify_from_global_address(i32 %notify_count) { 1790 %t = call i32 @llvm.wasm.memory.atomic.notify(ptr @gv, i32 %notify_count) 1791 ret i32 %t 1792} 1793