1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=aarch64-linux-gnu -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck -check-prefix=NOLSE %s 3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck -check-prefix=LSE %s 4; RUN: llc -mtriple=aarch64-linux-gnu -mattr=-lse,-fp-armv8 -O1 < %s | FileCheck -check-prefix=SOFTFP-NOLSE %s 5 6; FIXME: Restore test of fp128 case 7 8define half @test_atomicrmw_fmax_f16_seq_cst_align2(ptr %ptr, half %value) #0 { 9; NOLSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align2: 10; NOLSE: // %bb.0: 11; NOLSE-NEXT: fcvt s1, h0 12; NOLSE-NEXT: .LBB0_1: // %atomicrmw.start 13; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 14; NOLSE-NEXT: ldaxrh w8, [x0] 15; NOLSE-NEXT: fmov s0, w8 16; NOLSE-NEXT: fcvt s2, h0 17; NOLSE-NEXT: fmaxnm s2, s2, s1 18; NOLSE-NEXT: fcvt h2, s2 19; NOLSE-NEXT: fmov w8, s2 20; NOLSE-NEXT: stlxrh w9, w8, [x0] 21; NOLSE-NEXT: cbnz w9, .LBB0_1 22; NOLSE-NEXT: // %bb.2: // %atomicrmw.end 23; NOLSE-NEXT: // kill: def $h0 killed $h0 killed $s0 24; NOLSE-NEXT: ret 25; 26; LSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align2: 27; LSE: // %bb.0: 28; LSE-NEXT: fcvt s1, h0 29; LSE-NEXT: ldr h0, [x0] 30; LSE-NEXT: .LBB0_1: // %atomicrmw.start 31; LSE-NEXT: // =>This Inner Loop Header: Depth=1 32; LSE-NEXT: fcvt s2, h0 33; LSE-NEXT: fmov w8, s0 34; LSE-NEXT: mov w10, w8 35; LSE-NEXT: fmaxnm s2, s2, s1 36; LSE-NEXT: fcvt h2, s2 37; LSE-NEXT: fmov w9, s2 38; LSE-NEXT: casalh w10, w9, [x0] 39; LSE-NEXT: fmov s0, w10 40; LSE-NEXT: cmp w10, w8, uxth 41; LSE-NEXT: b.ne .LBB0_1 42; LSE-NEXT: // %bb.2: // %atomicrmw.end 43; LSE-NEXT: // kill: def $h0 killed $h0 killed $s0 44; LSE-NEXT: ret 45; 46; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align2: 47; SOFTFP-NOLSE: // %bb.0: 48; SOFTFP-NOLSE-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill 49; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill 50; SOFTFP-NOLSE-NEXT: mov x19, x0 51; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] 52; SOFTFP-NOLSE-NEXT: mov w20, w1 53; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill 54; SOFTFP-NOLSE-NEXT: b .LBB0_2 55; SOFTFP-NOLSE-NEXT: .LBB0_1: // %cmpxchg.nostore 56; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=1 57; SOFTFP-NOLSE-NEXT: mov w8, wzr 58; SOFTFP-NOLSE-NEXT: clrex 59; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB0_6 60; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start 61; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 62; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2 63; SOFTFP-NOLSE-NEXT: mov w22, w0 64; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff 65; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee 66; SOFTFP-NOLSE-NEXT: mov w21, w0 67; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff 68; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee 69; SOFTFP-NOLSE-NEXT: mov w1, w21 70; SOFTFP-NOLSE-NEXT: bl fmaxf 71; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee 72; SOFTFP-NOLSE-NEXT: mov w8, w0 73; SOFTFP-NOLSE-NEXT: .LBB0_3: // %cmpxchg.start 74; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1 75; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 76; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] 77; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth 78; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1 79; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore 80; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2 81; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] 82; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_3 83; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB0_2 Depth=1 84; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 85; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_2 86; SOFTFP-NOLSE-NEXT: .LBB0_6: // %atomicrmw.end 87; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload 88; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 89; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload 90; SOFTFP-NOLSE-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload 91; SOFTFP-NOLSE-NEXT: ret 92 %res = atomicrmw fmax ptr %ptr, half %value seq_cst, align 2 93 ret half %res 94} 95 96define half @test_atomicrmw_fmax_f16_seq_cst_align4(ptr %ptr, half %value) #0 { 97; NOLSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align4: 98; NOLSE: // %bb.0: 99; NOLSE-NEXT: fcvt s1, h0 100; NOLSE-NEXT: .LBB1_1: // %atomicrmw.start 101; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 102; NOLSE-NEXT: ldaxrh w8, [x0] 103; NOLSE-NEXT: fmov s0, w8 104; NOLSE-NEXT: fcvt s2, h0 105; NOLSE-NEXT: fmaxnm s2, s2, s1 106; NOLSE-NEXT: fcvt h2, s2 107; NOLSE-NEXT: fmov w8, s2 108; NOLSE-NEXT: stlxrh w9, w8, [x0] 109; NOLSE-NEXT: cbnz w9, .LBB1_1 110; NOLSE-NEXT: // %bb.2: // %atomicrmw.end 111; NOLSE-NEXT: // kill: def $h0 killed $h0 killed $s0 112; NOLSE-NEXT: ret 113; 114; LSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align4: 115; LSE: // %bb.0: 116; LSE-NEXT: fcvt s1, h0 117; LSE-NEXT: ldr h0, [x0] 118; LSE-NEXT: .LBB1_1: // %atomicrmw.start 119; LSE-NEXT: // =>This Inner Loop Header: Depth=1 120; LSE-NEXT: fcvt s2, h0 121; LSE-NEXT: fmov w8, s0 122; LSE-NEXT: mov w10, w8 123; LSE-NEXT: fmaxnm s2, s2, s1 124; LSE-NEXT: fcvt h2, s2 125; LSE-NEXT: fmov w9, s2 126; LSE-NEXT: casalh w10, w9, [x0] 127; LSE-NEXT: fmov s0, w10 128; LSE-NEXT: cmp w10, w8, uxth 129; LSE-NEXT: b.ne .LBB1_1 130; LSE-NEXT: // %bb.2: // %atomicrmw.end 131; LSE-NEXT: // kill: def $h0 killed $h0 killed $s0 132; LSE-NEXT: ret 133; 134; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align4: 135; SOFTFP-NOLSE: // %bb.0: 136; SOFTFP-NOLSE-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill 137; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill 138; SOFTFP-NOLSE-NEXT: mov x19, x0 139; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] 140; SOFTFP-NOLSE-NEXT: mov w20, w1 141; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill 142; SOFTFP-NOLSE-NEXT: b .LBB1_2 143; SOFTFP-NOLSE-NEXT: .LBB1_1: // %cmpxchg.nostore 144; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=1 145; SOFTFP-NOLSE-NEXT: mov w8, wzr 146; SOFTFP-NOLSE-NEXT: clrex 147; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB1_6 148; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start 149; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 150; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2 151; SOFTFP-NOLSE-NEXT: mov w22, w0 152; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff 153; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee 154; SOFTFP-NOLSE-NEXT: mov w21, w0 155; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff 156; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee 157; SOFTFP-NOLSE-NEXT: mov w1, w21 158; SOFTFP-NOLSE-NEXT: bl fmaxf 159; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee 160; SOFTFP-NOLSE-NEXT: mov w8, w0 161; SOFTFP-NOLSE-NEXT: .LBB1_3: // %cmpxchg.start 162; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1 163; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 164; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] 165; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth 166; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1 167; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore 168; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2 169; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] 170; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_3 171; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB1_2 Depth=1 172; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 173; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_2 174; SOFTFP-NOLSE-NEXT: .LBB1_6: // %atomicrmw.end 175; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload 176; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 177; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload 178; SOFTFP-NOLSE-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload 179; SOFTFP-NOLSE-NEXT: ret 180 %res = atomicrmw fmax ptr %ptr, half %value seq_cst, align 4 181 ret half %res 182} 183 184define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align2(ptr %ptr, bfloat %value) #0 { 185; NOLSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align2: 186; NOLSE: // %bb.0: 187; NOLSE-NEXT: // kill: def $h0 killed $h0 def $d0 188; NOLSE-NEXT: shll v1.4s, v0.4h, #16 189; NOLSE-NEXT: mov w8, #32767 // =0x7fff 190; NOLSE-NEXT: .LBB2_1: // %atomicrmw.start 191; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 192; NOLSE-NEXT: ldaxrh w9, [x0] 193; NOLSE-NEXT: fmov s0, w9 194; NOLSE-NEXT: shll v2.4s, v0.4h, #16 195; NOLSE-NEXT: fmaxnm s2, s2, s1 196; NOLSE-NEXT: fmov w9, s2 197; NOLSE-NEXT: ubfx w10, w9, #16, #1 198; NOLSE-NEXT: add w9, w9, w8 199; NOLSE-NEXT: add w9, w10, w9 200; NOLSE-NEXT: lsr w9, w9, #16 201; NOLSE-NEXT: stlxrh w10, w9, [x0] 202; NOLSE-NEXT: cbnz w10, .LBB2_1 203; NOLSE-NEXT: // %bb.2: // %atomicrmw.end 204; NOLSE-NEXT: // kill: def $h0 killed $h0 killed $d0 205; NOLSE-NEXT: ret 206; 207; LSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align2: 208; LSE: // %bb.0: 209; LSE-NEXT: // kill: def $h0 killed $h0 def $d0 210; LSE-NEXT: shll v1.4s, v0.4h, #16 211; LSE-NEXT: mov w8, #32767 // =0x7fff 212; LSE-NEXT: ldr h0, [x0] 213; LSE-NEXT: .LBB2_1: // %atomicrmw.start 214; LSE-NEXT: // =>This Inner Loop Header: Depth=1 215; LSE-NEXT: shll v2.4s, v0.4h, #16 216; LSE-NEXT: fmaxnm s2, s2, s1 217; LSE-NEXT: fmov w9, s2 218; LSE-NEXT: ubfx w10, w9, #16, #1 219; LSE-NEXT: add w9, w9, w8 220; LSE-NEXT: add w9, w10, w9 221; LSE-NEXT: lsr w9, w9, #16 222; LSE-NEXT: fmov s2, w9 223; LSE-NEXT: fmov w9, s0 224; LSE-NEXT: fmov w10, s2 225; LSE-NEXT: mov w11, w9 226; LSE-NEXT: casalh w11, w10, [x0] 227; LSE-NEXT: fmov s0, w11 228; LSE-NEXT: cmp w11, w9, uxth 229; LSE-NEXT: b.ne .LBB2_1 230; LSE-NEXT: // %bb.2: // %atomicrmw.end 231; LSE-NEXT: // kill: def $h0 killed $h0 killed $d0 232; LSE-NEXT: ret 233; 234; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align2: 235; SOFTFP-NOLSE: // %bb.0: 236; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill 237; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill 238; SOFTFP-NOLSE-NEXT: mov x19, x0 239; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] 240; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16 241; SOFTFP-NOLSE-NEXT: b .LBB2_2 242; SOFTFP-NOLSE-NEXT: .LBB2_1: // %cmpxchg.nostore 243; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=1 244; SOFTFP-NOLSE-NEXT: mov w8, wzr 245; SOFTFP-NOLSE-NEXT: clrex 246; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB2_6 247; SOFTFP-NOLSE-NEXT: .LBB2_2: // %atomicrmw.start 248; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 249; SOFTFP-NOLSE-NEXT: // Child Loop BB2_3 Depth 2 250; SOFTFP-NOLSE-NEXT: mov w21, w0 251; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16 252; SOFTFP-NOLSE-NEXT: mov w1, w20 253; SOFTFP-NOLSE-NEXT: bl fmaxf 254; SOFTFP-NOLSE-NEXT: bl __truncsfbf2 255; SOFTFP-NOLSE-NEXT: mov w8, w0 256; SOFTFP-NOLSE-NEXT: .LBB2_3: // %cmpxchg.start 257; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_2 Depth=1 258; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 259; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] 260; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth 261; SOFTFP-NOLSE-NEXT: b.ne .LBB2_1 262; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore 263; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_3 Depth=2 264; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] 265; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_3 266; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB2_2 Depth=1 267; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 268; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_2 269; SOFTFP-NOLSE-NEXT: .LBB2_6: // %atomicrmw.end 270; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload 271; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 272; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload 273; SOFTFP-NOLSE-NEXT: ret 274 %res = atomicrmw fmax ptr %ptr, bfloat %value seq_cst, align 2 275 ret bfloat %res 276} 277 278define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align4(ptr %ptr, bfloat %value) #0 { 279; NOLSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align4: 280; NOLSE: // %bb.0: 281; NOLSE-NEXT: // kill: def $h0 killed $h0 def $d0 282; NOLSE-NEXT: shll v1.4s, v0.4h, #16 283; NOLSE-NEXT: mov w8, #32767 // =0x7fff 284; NOLSE-NEXT: .LBB3_1: // %atomicrmw.start 285; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 286; NOLSE-NEXT: ldaxrh w9, [x0] 287; NOLSE-NEXT: fmov s0, w9 288; NOLSE-NEXT: shll v2.4s, v0.4h, #16 289; NOLSE-NEXT: fmaxnm s2, s2, s1 290; NOLSE-NEXT: fmov w9, s2 291; NOLSE-NEXT: ubfx w10, w9, #16, #1 292; NOLSE-NEXT: add w9, w9, w8 293; NOLSE-NEXT: add w9, w10, w9 294; NOLSE-NEXT: lsr w9, w9, #16 295; NOLSE-NEXT: stlxrh w10, w9, [x0] 296; NOLSE-NEXT: cbnz w10, .LBB3_1 297; NOLSE-NEXT: // %bb.2: // %atomicrmw.end 298; NOLSE-NEXT: // kill: def $h0 killed $h0 killed $d0 299; NOLSE-NEXT: ret 300; 301; LSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align4: 302; LSE: // %bb.0: 303; LSE-NEXT: // kill: def $h0 killed $h0 def $d0 304; LSE-NEXT: shll v1.4s, v0.4h, #16 305; LSE-NEXT: mov w8, #32767 // =0x7fff 306; LSE-NEXT: ldr h0, [x0] 307; LSE-NEXT: .LBB3_1: // %atomicrmw.start 308; LSE-NEXT: // =>This Inner Loop Header: Depth=1 309; LSE-NEXT: shll v2.4s, v0.4h, #16 310; LSE-NEXT: fmaxnm s2, s2, s1 311; LSE-NEXT: fmov w9, s2 312; LSE-NEXT: ubfx w10, w9, #16, #1 313; LSE-NEXT: add w9, w9, w8 314; LSE-NEXT: add w9, w10, w9 315; LSE-NEXT: lsr w9, w9, #16 316; LSE-NEXT: fmov s2, w9 317; LSE-NEXT: fmov w9, s0 318; LSE-NEXT: fmov w10, s2 319; LSE-NEXT: mov w11, w9 320; LSE-NEXT: casalh w11, w10, [x0] 321; LSE-NEXT: fmov s0, w11 322; LSE-NEXT: cmp w11, w9, uxth 323; LSE-NEXT: b.ne .LBB3_1 324; LSE-NEXT: // %bb.2: // %atomicrmw.end 325; LSE-NEXT: // kill: def $h0 killed $h0 killed $d0 326; LSE-NEXT: ret 327; 328; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align4: 329; SOFTFP-NOLSE: // %bb.0: 330; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill 331; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill 332; SOFTFP-NOLSE-NEXT: mov x19, x0 333; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] 334; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16 335; SOFTFP-NOLSE-NEXT: b .LBB3_2 336; SOFTFP-NOLSE-NEXT: .LBB3_1: // %cmpxchg.nostore 337; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=1 338; SOFTFP-NOLSE-NEXT: mov w8, wzr 339; SOFTFP-NOLSE-NEXT: clrex 340; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB3_6 341; SOFTFP-NOLSE-NEXT: .LBB3_2: // %atomicrmw.start 342; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 343; SOFTFP-NOLSE-NEXT: // Child Loop BB3_3 Depth 2 344; SOFTFP-NOLSE-NEXT: mov w21, w0 345; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16 346; SOFTFP-NOLSE-NEXT: mov w1, w20 347; SOFTFP-NOLSE-NEXT: bl fmaxf 348; SOFTFP-NOLSE-NEXT: bl __truncsfbf2 349; SOFTFP-NOLSE-NEXT: mov w8, w0 350; SOFTFP-NOLSE-NEXT: .LBB3_3: // %cmpxchg.start 351; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_2 Depth=1 352; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 353; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] 354; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth 355; SOFTFP-NOLSE-NEXT: b.ne .LBB3_1 356; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore 357; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_3 Depth=2 358; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] 359; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_3 360; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB3_2 Depth=1 361; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 362; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_2 363; SOFTFP-NOLSE-NEXT: .LBB3_6: // %atomicrmw.end 364; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload 365; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 366; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload 367; SOFTFP-NOLSE-NEXT: ret 368 %res = atomicrmw fmax ptr %ptr, bfloat %value seq_cst, align 4 369 ret bfloat %res 370} 371 372define float @test_atomicrmw_fmax_f32_seq_cst_align4(ptr %ptr, float %value) #0 { 373; NOLSE-LABEL: test_atomicrmw_fmax_f32_seq_cst_align4: 374; NOLSE: // %bb.0: 375; NOLSE-NEXT: .LBB4_1: // %atomicrmw.start 376; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 377; NOLSE-NEXT: ldaxr w8, [x0] 378; NOLSE-NEXT: fmov s1, w8 379; NOLSE-NEXT: fmaxnm s2, s1, s0 380; NOLSE-NEXT: fmov w8, s2 381; NOLSE-NEXT: stlxr w9, w8, [x0] 382; NOLSE-NEXT: cbnz w9, .LBB4_1 383; NOLSE-NEXT: // %bb.2: // %atomicrmw.end 384; NOLSE-NEXT: fmov s0, s1 385; NOLSE-NEXT: ret 386; 387; LSE-LABEL: test_atomicrmw_fmax_f32_seq_cst_align4: 388; LSE: // %bb.0: 389; LSE-NEXT: ldr s1, [x0] 390; LSE-NEXT: .LBB4_1: // %atomicrmw.start 391; LSE-NEXT: // =>This Inner Loop Header: Depth=1 392; LSE-NEXT: fmaxnm s2, s1, s0 393; LSE-NEXT: fmov w8, s1 394; LSE-NEXT: mov w10, w8 395; LSE-NEXT: fmov w9, s2 396; LSE-NEXT: casal w10, w9, [x0] 397; LSE-NEXT: fmov s1, w10 398; LSE-NEXT: cmp w10, w8 399; LSE-NEXT: b.ne .LBB4_1 400; LSE-NEXT: // %bb.2: // %atomicrmw.end 401; LSE-NEXT: fmov s0, s1 402; LSE-NEXT: ret 403; 404; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_f32_seq_cst_align4: 405; SOFTFP-NOLSE: // %bb.0: 406; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill 407; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill 408; SOFTFP-NOLSE-NEXT: mov x19, x0 409; SOFTFP-NOLSE-NEXT: ldr w0, [x0] 410; SOFTFP-NOLSE-NEXT: mov w20, w1 411; SOFTFP-NOLSE-NEXT: b .LBB4_2 412; SOFTFP-NOLSE-NEXT: .LBB4_1: // %cmpxchg.nostore 413; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=1 414; SOFTFP-NOLSE-NEXT: mov w8, wzr 415; SOFTFP-NOLSE-NEXT: clrex 416; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB4_6 417; SOFTFP-NOLSE-NEXT: .LBB4_2: // %atomicrmw.start 418; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 419; SOFTFP-NOLSE-NEXT: // Child Loop BB4_3 Depth 2 420; SOFTFP-NOLSE-NEXT: mov w1, w20 421; SOFTFP-NOLSE-NEXT: mov w21, w0 422; SOFTFP-NOLSE-NEXT: bl fmaxf 423; SOFTFP-NOLSE-NEXT: mov w8, w0 424; SOFTFP-NOLSE-NEXT: .LBB4_3: // %cmpxchg.start 425; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_2 Depth=1 426; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 427; SOFTFP-NOLSE-NEXT: ldaxr w0, [x19] 428; SOFTFP-NOLSE-NEXT: cmp w0, w21 429; SOFTFP-NOLSE-NEXT: b.ne .LBB4_1 430; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore 431; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_3 Depth=2 432; SOFTFP-NOLSE-NEXT: stlxr w9, w8, [x19] 433; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_3 434; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB4_2 Depth=1 435; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 436; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_2 437; SOFTFP-NOLSE-NEXT: .LBB4_6: // %atomicrmw.end 438; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload 439; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 440; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload 441; SOFTFP-NOLSE-NEXT: ret 442 %res = atomicrmw fmax ptr %ptr, float %value seq_cst, align 4 443 ret float %res 444} 445 446define double @test_atomicrmw_fmax_f32_seq_cst_align8(ptr %ptr, double %value) #0 { 447; NOLSE-LABEL: test_atomicrmw_fmax_f32_seq_cst_align8: 448; NOLSE: // %bb.0: 449; NOLSE-NEXT: .LBB5_1: // %atomicrmw.start 450; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 451; NOLSE-NEXT: ldaxr x8, [x0] 452; NOLSE-NEXT: fmov d1, x8 453; NOLSE-NEXT: fmaxnm d2, d1, d0 454; NOLSE-NEXT: fmov x8, d2 455; NOLSE-NEXT: stlxr w9, x8, [x0] 456; NOLSE-NEXT: cbnz w9, .LBB5_1 457; NOLSE-NEXT: // %bb.2: // %atomicrmw.end 458; NOLSE-NEXT: fmov d0, d1 459; NOLSE-NEXT: ret 460; 461; LSE-LABEL: test_atomicrmw_fmax_f32_seq_cst_align8: 462; LSE: // %bb.0: 463; LSE-NEXT: ldr d1, [x0] 464; LSE-NEXT: .LBB5_1: // %atomicrmw.start 465; LSE-NEXT: // =>This Inner Loop Header: Depth=1 466; LSE-NEXT: fmaxnm d2, d1, d0 467; LSE-NEXT: fmov x8, d1 468; LSE-NEXT: mov x10, x8 469; LSE-NEXT: fmov x9, d2 470; LSE-NEXT: casal x10, x9, [x0] 471; LSE-NEXT: fmov d1, x10 472; LSE-NEXT: cmp x10, x8 473; LSE-NEXT: b.ne .LBB5_1 474; LSE-NEXT: // %bb.2: // %atomicrmw.end 475; LSE-NEXT: fmov d0, d1 476; LSE-NEXT: ret 477; 478; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_f32_seq_cst_align8: 479; SOFTFP-NOLSE: // %bb.0: 480; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill 481; SOFTFP-NOLSE-NEXT: ldr x21, [x0] 482; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill 483; SOFTFP-NOLSE-NEXT: mov x19, x0 484; SOFTFP-NOLSE-NEXT: mov x20, x1 485; SOFTFP-NOLSE-NEXT: b .LBB5_2 486; SOFTFP-NOLSE-NEXT: .LBB5_1: // %cmpxchg.nostore 487; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=1 488; SOFTFP-NOLSE-NEXT: mov w9, wzr 489; SOFTFP-NOLSE-NEXT: clrex 490; SOFTFP-NOLSE-NEXT: mov x21, x8 491; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_6 492; SOFTFP-NOLSE-NEXT: .LBB5_2: // %atomicrmw.start 493; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 494; SOFTFP-NOLSE-NEXT: // Child Loop BB5_3 Depth 2 495; SOFTFP-NOLSE-NEXT: mov x0, x21 496; SOFTFP-NOLSE-NEXT: mov x1, x20 497; SOFTFP-NOLSE-NEXT: bl fmax 498; SOFTFP-NOLSE-NEXT: .LBB5_3: // %cmpxchg.start 499; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_2 Depth=1 500; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 501; SOFTFP-NOLSE-NEXT: ldaxr x8, [x19] 502; SOFTFP-NOLSE-NEXT: cmp x8, x21 503; SOFTFP-NOLSE-NEXT: b.ne .LBB5_1 504; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore 505; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_3 Depth=2 506; SOFTFP-NOLSE-NEXT: stlxr w9, x0, [x19] 507; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_3 508; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB5_2 Depth=1 509; SOFTFP-NOLSE-NEXT: mov w9, #1 // =0x1 510; SOFTFP-NOLSE-NEXT: mov x21, x8 511; SOFTFP-NOLSE-NEXT: cbz w9, .LBB5_2 512; SOFTFP-NOLSE-NEXT: .LBB5_6: // %atomicrmw.end 513; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload 514; SOFTFP-NOLSE-NEXT: mov x0, x21 515; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload 516; SOFTFP-NOLSE-NEXT: ret 517 %res = atomicrmw fmax ptr %ptr, double %value seq_cst, align 8 518 ret double %res 519} 520 521define <2 x half> @test_atomicrmw_fmax_v2f16_seq_cst_align4(ptr %ptr, <2 x half> %value) #0 { 522; NOLSE-LABEL: test_atomicrmw_fmax_v2f16_seq_cst_align4: 523; NOLSE: // %bb.0: 524; NOLSE-NEXT: // kill: def $d0 killed $d0 def $q0 525; NOLSE-NEXT: mov h1, v0.h[1] 526; NOLSE-NEXT: fcvt s0, h0 527; NOLSE-NEXT: fcvt s1, h1 528; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start 529; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 530; NOLSE-NEXT: ldaxr w8, [x0] 531; NOLSE-NEXT: fmov s2, w8 532; NOLSE-NEXT: mov h3, v2.h[1] 533; NOLSE-NEXT: fcvt s2, h2 534; NOLSE-NEXT: fcvt s3, h3 535; NOLSE-NEXT: fmaxnm s2, s2, s0 536; NOLSE-NEXT: fmaxnm s3, s3, s1 537; NOLSE-NEXT: fcvt h2, s2 538; NOLSE-NEXT: fcvt h3, s3 539; NOLSE-NEXT: mov v2.h[1], v3.h[0] 540; NOLSE-NEXT: fmov w9, s2 541; NOLSE-NEXT: stlxr w10, w9, [x0] 542; NOLSE-NEXT: cbnz w10, .LBB6_1 543; NOLSE-NEXT: // %bb.2: // %atomicrmw.end 544; NOLSE-NEXT: fmov d0, x8 545; NOLSE-NEXT: ret 546; 547; LSE-LABEL: test_atomicrmw_fmax_v2f16_seq_cst_align4: 548; LSE: // %bb.0: 549; LSE-NEXT: // kill: def $d0 killed $d0 def $q0 550; LSE-NEXT: mov h1, v0.h[1] 551; LSE-NEXT: fcvt s2, h0 552; LSE-NEXT: ldr s0, [x0] 553; LSE-NEXT: fcvt s1, h1 554; LSE-NEXT: .LBB6_1: // %atomicrmw.start 555; LSE-NEXT: // =>This Inner Loop Header: Depth=1 556; LSE-NEXT: mov h3, v0.h[1] 557; LSE-NEXT: fcvt s4, h0 558; LSE-NEXT: fmov w8, s0 559; LSE-NEXT: mov w10, w8 560; LSE-NEXT: fcvt s3, h3 561; LSE-NEXT: fmaxnm s4, s4, s2 562; LSE-NEXT: fmaxnm s3, s3, s1 563; LSE-NEXT: fcvt h4, s4 564; LSE-NEXT: fcvt h3, s3 565; LSE-NEXT: mov v4.h[1], v3.h[0] 566; LSE-NEXT: fmov w9, s4 567; LSE-NEXT: casal w10, w9, [x0] 568; LSE-NEXT: fmov s0, w10 569; LSE-NEXT: cmp w10, w8 570; LSE-NEXT: b.ne .LBB6_1 571; LSE-NEXT: // %bb.2: // %atomicrmw.end 572; LSE-NEXT: // kill: def $d0 killed $d0 killed $q0 573; LSE-NEXT: ret 574; 575; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_v2f16_seq_cst_align4: 576; SOFTFP-NOLSE: // %bb.0: 577; SOFTFP-NOLSE-NEXT: stp x30, x25, [sp, #-64]! // 16-byte Folded Spill 578; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill 579; SOFTFP-NOLSE-NEXT: ldrh w23, [x0, #2] 580; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill 581; SOFTFP-NOLSE-NEXT: ldrh w22, [x0] 582; SOFTFP-NOLSE-NEXT: mov w21, w1 583; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill 584; SOFTFP-NOLSE-NEXT: mov w19, w2 585; SOFTFP-NOLSE-NEXT: mov x20, x0 586; SOFTFP-NOLSE-NEXT: b .LBB6_2 587; SOFTFP-NOLSE-NEXT: .LBB6_1: // %cmpxchg.nostore 588; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1 589; SOFTFP-NOLSE-NEXT: mov w8, wzr 590; SOFTFP-NOLSE-NEXT: clrex 591; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16 592; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB6_6 593; SOFTFP-NOLSE-NEXT: .LBB6_2: // %atomicrmw.start 594; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 595; SOFTFP-NOLSE-NEXT: // Child Loop BB6_3 Depth 2 596; SOFTFP-NOLSE-NEXT: and w0, w19, #0xffff 597; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee 598; SOFTFP-NOLSE-NEXT: mov w24, w0 599; SOFTFP-NOLSE-NEXT: and w0, w23, #0xffff 600; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee 601; SOFTFP-NOLSE-NEXT: mov w1, w24 602; SOFTFP-NOLSE-NEXT: bl fmaxf 603; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee 604; SOFTFP-NOLSE-NEXT: mov w24, w0 605; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff 606; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee 607; SOFTFP-NOLSE-NEXT: mov w25, w0 608; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff 609; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee 610; SOFTFP-NOLSE-NEXT: mov w1, w25 611; SOFTFP-NOLSE-NEXT: bl fmaxf 612; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee 613; SOFTFP-NOLSE-NEXT: mov w8, w22 614; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16 615; SOFTFP-NOLSE-NEXT: bfi w8, w23, #16, #16 616; SOFTFP-NOLSE-NEXT: .LBB6_3: // %cmpxchg.start 617; SOFTFP-NOLSE-NEXT: // Parent Loop BB6_2 Depth=1 618; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 619; SOFTFP-NOLSE-NEXT: ldaxr w22, [x20] 620; SOFTFP-NOLSE-NEXT: cmp w22, w8 621; SOFTFP-NOLSE-NEXT: b.ne .LBB6_1 622; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore 623; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2 624; SOFTFP-NOLSE-NEXT: stlxr w9, w0, [x20] 625; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB6_3 626; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB6_2 Depth=1 627; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 628; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16 629; SOFTFP-NOLSE-NEXT: cbz w8, .LBB6_2 630; SOFTFP-NOLSE-NEXT: .LBB6_6: // %atomicrmw.end 631; SOFTFP-NOLSE-NEXT: mov w0, w22 632; SOFTFP-NOLSE-NEXT: mov w1, w23 633; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload 634; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload 635; SOFTFP-NOLSE-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload 636; SOFTFP-NOLSE-NEXT: ldp x30, x25, [sp], #64 // 16-byte Folded Reload 637; SOFTFP-NOLSE-NEXT: ret 638 %res = atomicrmw fmax ptr %ptr, <2 x half> %value seq_cst, align 4 639 ret <2 x half> %res 640} 641 642define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bfloat> %value) #0 { 643; NOLSE-LABEL: test_atomicrmw_fmax_v2bf16_seq_cst_align4: 644; NOLSE: // %bb.0: 645; NOLSE-NEXT: // kill: def $d0 killed $d0 def $q0 646; NOLSE-NEXT: dup v1.4h, v0.h[1] 647; NOLSE-NEXT: mov w8, #32767 // =0x7fff 648; NOLSE-NEXT: shll v0.4s, v0.4h, #16 649; NOLSE-NEXT: shll v1.4s, v1.4h, #16 650; NOLSE-NEXT: .LBB7_1: // %atomicrmw.start 651; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 652; NOLSE-NEXT: ldaxr w9, [x0] 653; NOLSE-NEXT: fmov s2, w9 654; NOLSE-NEXT: dup v3.4h, v2.h[1] 655; NOLSE-NEXT: shll v2.4s, v2.4h, #16 656; NOLSE-NEXT: fmaxnm s2, s2, s0 657; NOLSE-NEXT: shll v3.4s, v3.4h, #16 658; NOLSE-NEXT: fmaxnm s3, s3, s1 659; NOLSE-NEXT: fmov w11, s2 660; NOLSE-NEXT: ubfx w13, w11, #16, #1 661; NOLSE-NEXT: add w11, w11, w8 662; NOLSE-NEXT: fmov w10, s3 663; NOLSE-NEXT: add w11, w13, w11 664; NOLSE-NEXT: lsr w11, w11, #16 665; NOLSE-NEXT: ubfx w12, w10, #16, #1 666; NOLSE-NEXT: add w10, w10, w8 667; NOLSE-NEXT: fmov s3, w11 668; NOLSE-NEXT: add w10, w12, w10 669; NOLSE-NEXT: lsr w10, w10, #16 670; NOLSE-NEXT: fmov s2, w10 671; NOLSE-NEXT: mov v3.h[1], v2.h[0] 672; NOLSE-NEXT: fmov w10, s3 673; NOLSE-NEXT: stlxr w11, w10, [x0] 674; NOLSE-NEXT: cbnz w11, .LBB7_1 675; NOLSE-NEXT: // %bb.2: // %atomicrmw.end 676; NOLSE-NEXT: fmov d0, x9 677; NOLSE-NEXT: ret 678; 679; LSE-LABEL: test_atomicrmw_fmax_v2bf16_seq_cst_align4: 680; LSE: // %bb.0: 681; LSE-NEXT: // kill: def $d0 killed $d0 def $q0 682; LSE-NEXT: dup v1.4h, v0.h[1] 683; LSE-NEXT: shll v2.4s, v0.4h, #16 684; LSE-NEXT: mov w8, #32767 // =0x7fff 685; LSE-NEXT: ldr s0, [x0] 686; LSE-NEXT: shll v1.4s, v1.4h, #16 687; LSE-NEXT: .LBB7_1: // %atomicrmw.start 688; LSE-NEXT: // =>This Inner Loop Header: Depth=1 689; LSE-NEXT: dup v3.4h, v0.h[1] 690; LSE-NEXT: shll v4.4s, v0.4h, #16 691; LSE-NEXT: fmaxnm s4, s4, s2 692; LSE-NEXT: shll v3.4s, v3.4h, #16 693; LSE-NEXT: fmaxnm s3, s3, s1 694; LSE-NEXT: fmov w10, s4 695; LSE-NEXT: ubfx w12, w10, #16, #1 696; LSE-NEXT: add w10, w10, w8 697; LSE-NEXT: fmov w9, s3 698; LSE-NEXT: add w10, w12, w10 699; LSE-NEXT: lsr w10, w10, #16 700; LSE-NEXT: ubfx w11, w9, #16, #1 701; LSE-NEXT: add w9, w9, w8 702; LSE-NEXT: fmov s4, w10 703; LSE-NEXT: add w9, w11, w9 704; LSE-NEXT: lsr w9, w9, #16 705; LSE-NEXT: fmov s3, w9 706; LSE-NEXT: fmov w9, s0 707; LSE-NEXT: mov v4.h[1], v3.h[0] 708; LSE-NEXT: mov w11, w9 709; LSE-NEXT: fmov w10, s4 710; LSE-NEXT: casal w11, w10, [x0] 711; LSE-NEXT: fmov s0, w11 712; LSE-NEXT: cmp w11, w9 713; LSE-NEXT: b.ne .LBB7_1 714; LSE-NEXT: // %bb.2: // %atomicrmw.end 715; LSE-NEXT: // kill: def $d0 killed $d0 killed $q0 716; LSE-NEXT: ret 717; 718; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_v2bf16_seq_cst_align4: 719; SOFTFP-NOLSE: // %bb.0: 720; SOFTFP-NOLSE-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill 721; SOFTFP-NOLSE-NEXT: mov w8, w1 722; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill 723; SOFTFP-NOLSE-NEXT: ldrh w1, [x0, #2] 724; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill 725; SOFTFP-NOLSE-NEXT: ldrh w22, [x0] 726; SOFTFP-NOLSE-NEXT: lsl w20, w2, #16 727; SOFTFP-NOLSE-NEXT: lsl w21, w8, #16 728; SOFTFP-NOLSE-NEXT: mov x19, x0 729; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill 730; SOFTFP-NOLSE-NEXT: b .LBB7_2 731; SOFTFP-NOLSE-NEXT: .LBB7_1: // %cmpxchg.nostore 732; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=1 733; SOFTFP-NOLSE-NEXT: mov w8, wzr 734; SOFTFP-NOLSE-NEXT: clrex 735; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16 736; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_6 737; SOFTFP-NOLSE-NEXT: .LBB7_2: // %atomicrmw.start 738; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 739; SOFTFP-NOLSE-NEXT: // Child Loop BB7_3 Depth 2 740; SOFTFP-NOLSE-NEXT: lsl w23, w1, #16 741; SOFTFP-NOLSE-NEXT: mov w1, w20 742; SOFTFP-NOLSE-NEXT: mov w0, w23 743; SOFTFP-NOLSE-NEXT: bl fmaxf 744; SOFTFP-NOLSE-NEXT: bl __truncsfbf2 745; SOFTFP-NOLSE-NEXT: mov w24, w0 746; SOFTFP-NOLSE-NEXT: lsl w0, w22, #16 747; SOFTFP-NOLSE-NEXT: mov w1, w21 748; SOFTFP-NOLSE-NEXT: bl fmaxf 749; SOFTFP-NOLSE-NEXT: bl __truncsfbf2 750; SOFTFP-NOLSE-NEXT: bfxil w23, w22, #0, #16 751; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16 752; SOFTFP-NOLSE-NEXT: .LBB7_3: // %cmpxchg.start 753; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_2 Depth=1 754; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 755; SOFTFP-NOLSE-NEXT: ldaxr w22, [x19] 756; SOFTFP-NOLSE-NEXT: cmp w22, w23 757; SOFTFP-NOLSE-NEXT: b.ne .LBB7_1 758; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore 759; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_3 Depth=2 760; SOFTFP-NOLSE-NEXT: stlxr w8, w0, [x19] 761; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_3 762; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB7_2 Depth=1 763; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 764; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16 765; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_2 766; SOFTFP-NOLSE-NEXT: .LBB7_6: // %atomicrmw.end 767; SOFTFP-NOLSE-NEXT: mov w0, w22 768; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload 769; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload 770; SOFTFP-NOLSE-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload 771; SOFTFP-NOLSE-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload 772; SOFTFP-NOLSE-NEXT: ret 773 %res = atomicrmw fmax ptr %ptr, <2 x bfloat> %value seq_cst, align 4 774 ret <2 x bfloat> %res 775} 776 777define <2 x float> @test_atomicrmw_fmax_v2f32_seq_cst_align8(ptr %ptr, <2 x float> %value) #0 { 778; NOLSE-LABEL: test_atomicrmw_fmax_v2f32_seq_cst_align8: 779; NOLSE: // %bb.0: 780; NOLSE-NEXT: .LBB8_1: // %atomicrmw.start 781; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 782; NOLSE-NEXT: ldaxr x8, [x0] 783; NOLSE-NEXT: fmov d1, x8 784; NOLSE-NEXT: fmaxnm v2.2s, v1.2s, v0.2s 785; NOLSE-NEXT: fmov x8, d2 786; NOLSE-NEXT: stlxr w9, x8, [x0] 787; NOLSE-NEXT: cbnz w9, .LBB8_1 788; NOLSE-NEXT: // %bb.2: // %atomicrmw.end 789; NOLSE-NEXT: fmov d0, d1 790; NOLSE-NEXT: ret 791; 792; LSE-LABEL: test_atomicrmw_fmax_v2f32_seq_cst_align8: 793; LSE: // %bb.0: 794; LSE-NEXT: ldr d1, [x0] 795; LSE-NEXT: .LBB8_1: // %atomicrmw.start 796; LSE-NEXT: // =>This Inner Loop Header: Depth=1 797; LSE-NEXT: fmaxnm v2.2s, v1.2s, v0.2s 798; LSE-NEXT: fmov x8, d1 799; LSE-NEXT: mov x10, x8 800; LSE-NEXT: fmov x9, d2 801; LSE-NEXT: casal x10, x9, [x0] 802; LSE-NEXT: fmov d1, x10 803; LSE-NEXT: cmp x10, x8 804; LSE-NEXT: b.ne .LBB8_1 805; LSE-NEXT: // %bb.2: // %atomicrmw.end 806; LSE-NEXT: fmov d0, d1 807; LSE-NEXT: ret 808; 809; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_v2f32_seq_cst_align8: 810; SOFTFP-NOLSE: // %bb.0: 811; SOFTFP-NOLSE-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill 812; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill 813; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill 814; SOFTFP-NOLSE-NEXT: mov w21, w1 815; SOFTFP-NOLSE-NEXT: ldp w22, w23, [x0] 816; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill 817; SOFTFP-NOLSE-NEXT: mov w19, w2 818; SOFTFP-NOLSE-NEXT: mov x20, x0 819; SOFTFP-NOLSE-NEXT: b .LBB8_2 820; SOFTFP-NOLSE-NEXT: .LBB8_1: // %cmpxchg.nostore 821; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=1 822; SOFTFP-NOLSE-NEXT: mov w8, wzr 823; SOFTFP-NOLSE-NEXT: clrex 824; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32 825; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_6 826; SOFTFP-NOLSE-NEXT: .LBB8_2: // %atomicrmw.start 827; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 828; SOFTFP-NOLSE-NEXT: // Child Loop BB8_3 Depth 2 829; SOFTFP-NOLSE-NEXT: mov w0, w23 830; SOFTFP-NOLSE-NEXT: mov w1, w19 831; SOFTFP-NOLSE-NEXT: bl fmaxf 832; SOFTFP-NOLSE-NEXT: mov w24, w0 833; SOFTFP-NOLSE-NEXT: mov w0, w22 834; SOFTFP-NOLSE-NEXT: mov w1, w21 835; SOFTFP-NOLSE-NEXT: bl fmaxf 836; SOFTFP-NOLSE-NEXT: mov w8, w0 837; SOFTFP-NOLSE-NEXT: mov w9, w22 838; SOFTFP-NOLSE-NEXT: // kill: def $w23 killed $w23 killed $x23 def $x23 839; SOFTFP-NOLSE-NEXT: orr x8, x8, x24, lsl #32 840; SOFTFP-NOLSE-NEXT: orr x9, x9, x23, lsl #32 841; SOFTFP-NOLSE-NEXT: .LBB8_3: // %cmpxchg.start 842; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_2 Depth=1 843; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 844; SOFTFP-NOLSE-NEXT: ldaxr x22, [x20] 845; SOFTFP-NOLSE-NEXT: cmp x22, x9 846; SOFTFP-NOLSE-NEXT: b.ne .LBB8_1 847; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore 848; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_3 Depth=2 849; SOFTFP-NOLSE-NEXT: stlxr w10, x8, [x20] 850; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB8_3 851; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB8_2 Depth=1 852; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 853; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32 854; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_2 855; SOFTFP-NOLSE-NEXT: .LBB8_6: // %atomicrmw.end 856; SOFTFP-NOLSE-NEXT: mov w0, w22 857; SOFTFP-NOLSE-NEXT: mov w1, w23 858; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload 859; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload 860; SOFTFP-NOLSE-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload 861; SOFTFP-NOLSE-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload 862; SOFTFP-NOLSE-NEXT: ret 863 %res = atomicrmw fmax ptr %ptr, <2 x float> %value seq_cst, align 8 864 ret <2 x float> %res 865} 866 867define <2 x double> @test_atomicrmw_fmax_v2f64_seq_cst_align8(ptr %ptr, <2 x double> %value) #0 { 868; NOLSE-LABEL: test_atomicrmw_fmax_v2f64_seq_cst_align8: 869; NOLSE: // %bb.0: 870; NOLSE-NEXT: .LBB9_1: // %atomicrmw.start 871; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 872; NOLSE-NEXT: ldaxp x8, x9, [x0] 873; NOLSE-NEXT: fmov d1, x8 874; NOLSE-NEXT: mov v1.d[1], x9 875; NOLSE-NEXT: fmaxnm v2.2d, v1.2d, v0.2d 876; NOLSE-NEXT: mov x8, v2.d[1] 877; NOLSE-NEXT: fmov x9, d2 878; NOLSE-NEXT: stlxp w10, x9, x8, [x0] 879; NOLSE-NEXT: cbnz w10, .LBB9_1 880; NOLSE-NEXT: // %bb.2: // %atomicrmw.end 881; NOLSE-NEXT: mov v0.16b, v1.16b 882; NOLSE-NEXT: ret 883; 884; LSE-LABEL: test_atomicrmw_fmax_v2f64_seq_cst_align8: 885; LSE: // %bb.0: 886; LSE-NEXT: ldr q1, [x0] 887; LSE-NEXT: .LBB9_1: // %atomicrmw.start 888; LSE-NEXT: // =>This Inner Loop Header: Depth=1 889; LSE-NEXT: fmaxnm v2.2d, v1.2d, v0.2d 890; LSE-NEXT: mov x3, v1.d[1] 891; LSE-NEXT: fmov x2, d1 892; LSE-NEXT: mov x7, x3 893; LSE-NEXT: mov x5, v2.d[1] 894; LSE-NEXT: mov x6, x2 895; LSE-NEXT: fmov x4, d2 896; LSE-NEXT: caspal x6, x7, x4, x5, [x0] 897; LSE-NEXT: fmov d1, x6 898; LSE-NEXT: cmp x7, x3 899; LSE-NEXT: ccmp x6, x2, #0, eq 900; LSE-NEXT: mov v1.d[1], x7 901; LSE-NEXT: b.ne .LBB9_1 902; LSE-NEXT: // %bb.2: // %atomicrmw.end 903; LSE-NEXT: mov v0.16b, v1.16b 904; LSE-NEXT: ret 905; 906; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_v2f64_seq_cst_align8: 907; SOFTFP-NOLSE: // %bb.0: 908; SOFTFP-NOLSE-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill 909; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill 910; SOFTFP-NOLSE-NEXT: mov x20, x0 911; SOFTFP-NOLSE-NEXT: mov x19, x3 912; SOFTFP-NOLSE-NEXT: ldp x0, x1, [x0] 913; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill 914; SOFTFP-NOLSE-NEXT: mov x21, x2 915; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill 916; SOFTFP-NOLSE-NEXT: b .LBB9_2 917; SOFTFP-NOLSE-NEXT: .LBB9_1: // %atomicrmw.start 918; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=1 919; SOFTFP-NOLSE-NEXT: cmp x1, x22 920; SOFTFP-NOLSE-NEXT: ccmp x0, x23, #0, eq 921; SOFTFP-NOLSE-NEXT: b.eq .LBB9_6 922; SOFTFP-NOLSE-NEXT: .LBB9_2: // %atomicrmw.start 923; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 924; SOFTFP-NOLSE-NEXT: // Child Loop BB9_3 Depth 2 925; SOFTFP-NOLSE-NEXT: mov x22, x1 926; SOFTFP-NOLSE-NEXT: mov x23, x0 927; SOFTFP-NOLSE-NEXT: mov x0, x1 928; SOFTFP-NOLSE-NEXT: mov x1, x19 929; SOFTFP-NOLSE-NEXT: bl fmax 930; SOFTFP-NOLSE-NEXT: mov x24, x0 931; SOFTFP-NOLSE-NEXT: mov x0, x23 932; SOFTFP-NOLSE-NEXT: mov x1, x21 933; SOFTFP-NOLSE-NEXT: bl fmax 934; SOFTFP-NOLSE-NEXT: mov x8, x0 935; SOFTFP-NOLSE-NEXT: .LBB9_3: // %atomicrmw.start 936; SOFTFP-NOLSE-NEXT: // Parent Loop BB9_2 Depth=1 937; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 938; SOFTFP-NOLSE-NEXT: ldaxp x0, x1, [x20] 939; SOFTFP-NOLSE-NEXT: cmp x0, x23 940; SOFTFP-NOLSE-NEXT: cset w9, ne 941; SOFTFP-NOLSE-NEXT: cmp x1, x22 942; SOFTFP-NOLSE-NEXT: cinc w9, w9, ne 943; SOFTFP-NOLSE-NEXT: cbz w9, .LBB9_5 944; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start 945; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_3 Depth=2 946; SOFTFP-NOLSE-NEXT: stlxp w9, x0, x1, [x20] 947; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB9_3 948; SOFTFP-NOLSE-NEXT: b .LBB9_1 949; SOFTFP-NOLSE-NEXT: .LBB9_5: // %atomicrmw.start 950; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_3 Depth=2 951; SOFTFP-NOLSE-NEXT: stlxp w9, x8, x24, [x20] 952; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB9_3 953; SOFTFP-NOLSE-NEXT: b .LBB9_1 954; SOFTFP-NOLSE-NEXT: .LBB9_6: // %atomicrmw.end 955; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload 956; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload 957; SOFTFP-NOLSE-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload 958; SOFTFP-NOLSE-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload 959; SOFTFP-NOLSE-NEXT: ret 960 %res = atomicrmw fmax ptr %ptr, <2 x double> %value seq_cst, align 16 961 ret <2 x double> %res 962} 963 964attributes #0 = { nounwind } 965