1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefix=X64 3; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SSE2 4; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=slm -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM 5; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=goldmont -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM 6; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=knl -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM 7; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=atom -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-ATOM 8 9; On x86, an atomic rmw operation that does not modify the value in memory 10; (such as atomic add 0) can be replaced by an mfence followed by a mov. 11; This is explained (with the motivation for such an optimization) in 12; http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf 13 14define i8 @add8(ptr %p) { 15; X64-LABEL: add8: 16; X64: # %bb.0: 17; X64-NEXT: mfence 18; X64-NEXT: movzbl (%rdi), %eax 19; X64-NEXT: retq 20; 21; X86-SSE2-LABEL: add8: 22; X86-SSE2: # %bb.0: 23; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 24; X86-SSE2-NEXT: mfence 25; X86-SSE2-NEXT: movzbl (%eax), %eax 26; X86-SSE2-NEXT: retl 27; 28; X86-SLM-LABEL: add8: 29; X86-SLM: # %bb.0: 30; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx 31; X86-SLM-NEXT: xorl %eax, %eax 32; X86-SLM-NEXT: lock xaddb %al, (%ecx) 33; X86-SLM-NEXT: # kill: def $al killed $al killed $eax 34; X86-SLM-NEXT: retl 35; 36; X86-ATOM-LABEL: add8: 37; X86-ATOM: # %bb.0: 38; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx 39; X86-ATOM-NEXT: xorl %eax, %eax 40; X86-ATOM-NEXT: lock xaddb %al, (%ecx) 41; X86-ATOM-NEXT: # kill: def $al killed $al killed $eax 42; X86-ATOM-NEXT: nop 43; X86-ATOM-NEXT: nop 44; X86-ATOM-NEXT: retl 45 %1 = atomicrmw add ptr %p, i8 0 monotonic 46 ret i8 %1 47} 48 49define i16 @or16(ptr %p) { 50; X64-LABEL: or16: 51; X64: # %bb.0: 52; X64-NEXT: mfence 53; X64-NEXT: movzwl (%rdi), %eax 54; X64-NEXT: retq 55; 56; X86-SSE2-LABEL: or16: 57; X86-SSE2: # %bb.0: 58; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 59; X86-SSE2-NEXT: mfence 60; X86-SSE2-NEXT: movzwl (%eax), %eax 61; X86-SSE2-NEXT: retl 62; 63; X86-SLM-LABEL: or16: 64; X86-SLM: # %bb.0: 65; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx 66; X86-SLM-NEXT: movzwl (%ecx), %eax 67; X86-SLM-NEXT: .p2align 4 68; X86-SLM-NEXT: .LBB1_1: # %atomicrmw.start 69; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1 70; X86-SLM-NEXT: lock cmpxchgw %ax, (%ecx) 71; X86-SLM-NEXT: jne .LBB1_1 72; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end 73; X86-SLM-NEXT: retl 74; 75; X86-ATOM-LABEL: or16: 76; X86-ATOM: # %bb.0: 77; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx 78; X86-ATOM-NEXT: movzwl (%ecx), %eax 79; X86-ATOM-NEXT: .p2align 4 80; X86-ATOM-NEXT: .LBB1_1: # %atomicrmw.start 81; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1 82; X86-ATOM-NEXT: lock cmpxchgw %ax, (%ecx) 83; X86-ATOM-NEXT: jne .LBB1_1 84; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end 85; X86-ATOM-NEXT: retl 86 %1 = atomicrmw or ptr %p, i16 0 acquire 87 ret i16 %1 88} 89 90define i32 @xor32(ptr %p) { 91; X64-LABEL: xor32: 92; X64: # %bb.0: 93; X64-NEXT: mfence 94; X64-NEXT: movl (%rdi), %eax 95; X64-NEXT: retq 96; 97; X86-SSE2-LABEL: xor32: 98; X86-SSE2: # %bb.0: 99; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 100; X86-SSE2-NEXT: mfence 101; X86-SSE2-NEXT: movl (%eax), %eax 102; X86-SSE2-NEXT: retl 103; 104; X86-SLM-LABEL: xor32: 105; X86-SLM: # %bb.0: 106; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx 107; X86-SLM-NEXT: movl (%ecx), %eax 108; X86-SLM-NEXT: .p2align 4 109; X86-SLM-NEXT: .LBB2_1: # %atomicrmw.start 110; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1 111; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx) 112; X86-SLM-NEXT: jne .LBB2_1 113; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end 114; X86-SLM-NEXT: retl 115; 116; X86-ATOM-LABEL: xor32: 117; X86-ATOM: # %bb.0: 118; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx 119; X86-ATOM-NEXT: movl (%ecx), %eax 120; X86-ATOM-NEXT: .p2align 4 121; X86-ATOM-NEXT: .LBB2_1: # %atomicrmw.start 122; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1 123; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx) 124; X86-ATOM-NEXT: jne .LBB2_1 125; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end 126; X86-ATOM-NEXT: retl 127 %1 = atomicrmw xor ptr %p, i32 0 release 128 ret i32 %1 129} 130 131define i64 @sub64(ptr %p) { 132; X64-LABEL: sub64: 133; X64: # %bb.0: 134; X64-NEXT: mfence 135; X64-NEXT: movq (%rdi), %rax 136; X64-NEXT: retq 137; 138; X86-LABEL: sub64: 139; X86: # %bb.0: 140; X86-NEXT: pushl %ebx 141; X86-NEXT: .cfi_def_cfa_offset 8 142; X86-NEXT: pushl %esi 143; X86-NEXT: .cfi_def_cfa_offset 12 144; X86-NEXT: .cfi_offset %esi, -12 145; X86-NEXT: .cfi_offset %ebx, -8 146; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 147; X86-NEXT: movl (%esi), %eax 148; X86-NEXT: movl 4(%esi), %edx 149; X86-NEXT: .p2align 4 150; X86-NEXT: .LBB3_1: # %atomicrmw.start 151; X86-NEXT: # =>This Inner Loop Header: Depth=1 152; X86-NEXT: movl %edx, %ecx 153; X86-NEXT: movl %eax, %ebx 154; X86-NEXT: lock cmpxchg8b (%esi) 155; X86-NEXT: jne .LBB3_1 156; X86-NEXT: # %bb.2: # %atomicrmw.end 157; X86-NEXT: popl %esi 158; X86-NEXT: .cfi_def_cfa_offset 8 159; X86-NEXT: popl %ebx 160; X86-NEXT: .cfi_def_cfa_offset 4 161; X86-NEXT: retl 162 %1 = atomicrmw sub ptr %p, i64 0 seq_cst 163 ret i64 %1 164} 165 166define i128 @or128(ptr %p) { 167; X64-LABEL: or128: 168; X64: # %bb.0: 169; X64-NEXT: pushq %rax 170; X64-NEXT: .cfi_def_cfa_offset 16 171; X64-NEXT: xorl %esi, %esi 172; X64-NEXT: xorl %edx, %edx 173; X64-NEXT: xorl %ecx, %ecx 174; X64-NEXT: callq __atomic_fetch_or_16@PLT 175; X64-NEXT: popq %rcx 176; X64-NEXT: .cfi_def_cfa_offset 8 177; X64-NEXT: retq 178; 179; X86-GENERIC-LABEL: or128: 180; X86-GENERIC: # %bb.0: 181; X86-GENERIC-NEXT: pushl %ebp 182; X86-GENERIC-NEXT: .cfi_def_cfa_offset 8 183; X86-GENERIC-NEXT: .cfi_offset %ebp, -8 184; X86-GENERIC-NEXT: movl %esp, %ebp 185; X86-GENERIC-NEXT: .cfi_def_cfa_register %ebp 186; X86-GENERIC-NEXT: pushl %ebx 187; X86-GENERIC-NEXT: pushl %edi 188; X86-GENERIC-NEXT: pushl %esi 189; X86-GENERIC-NEXT: andl $-16, %esp 190; X86-GENERIC-NEXT: subl $48, %esp 191; X86-GENERIC-NEXT: .cfi_offset %esi, -20 192; X86-GENERIC-NEXT: .cfi_offset %edi, -16 193; X86-GENERIC-NEXT: .cfi_offset %ebx, -12 194; X86-GENERIC-NEXT: movl 12(%ebp), %edi 195; X86-GENERIC-NEXT: movl 12(%edi), %ecx 196; X86-GENERIC-NEXT: movl 8(%edi), %edx 197; X86-GENERIC-NEXT: movl (%edi), %ebx 198; X86-GENERIC-NEXT: movl 4(%edi), %esi 199; X86-GENERIC-NEXT: .p2align 4 200; X86-GENERIC-NEXT: .LBB4_1: # %atomicrmw.start 201; X86-GENERIC-NEXT: # =>This Inner Loop Header: Depth=1 202; X86-GENERIC-NEXT: movl %ebx, (%esp) 203; X86-GENERIC-NEXT: movl %esi, {{[0-9]+}}(%esp) 204; X86-GENERIC-NEXT: movl %edx, {{[0-9]+}}(%esp) 205; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp) 206; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp) 207; X86-GENERIC-NEXT: movl %edx, {{[0-9]+}}(%esp) 208; X86-GENERIC-NEXT: movl %esi, {{[0-9]+}}(%esp) 209; X86-GENERIC-NEXT: movl %ebx, {{[0-9]+}}(%esp) 210; X86-GENERIC-NEXT: pushl $0 211; X86-GENERIC-NEXT: pushl $0 212; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax 213; X86-GENERIC-NEXT: pushl %eax 214; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax 215; X86-GENERIC-NEXT: pushl %eax 216; X86-GENERIC-NEXT: pushl %edi 217; X86-GENERIC-NEXT: pushl $16 218; X86-GENERIC-NEXT: calll __atomic_compare_exchange@PLT 219; X86-GENERIC-NEXT: addl $24, %esp 220; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx 221; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edx 222; X86-GENERIC-NEXT: movl (%esp), %ebx 223; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %esi 224; X86-GENERIC-NEXT: testb %al, %al 225; X86-GENERIC-NEXT: je .LBB4_1 226; X86-GENERIC-NEXT: # %bb.2: # %atomicrmw.end 227; X86-GENERIC-NEXT: movl 8(%ebp), %eax 228; X86-GENERIC-NEXT: movl %ebx, (%eax) 229; X86-GENERIC-NEXT: movl %esi, 4(%eax) 230; X86-GENERIC-NEXT: movl %edx, 8(%eax) 231; X86-GENERIC-NEXT: movl %ecx, 12(%eax) 232; X86-GENERIC-NEXT: leal -12(%ebp), %esp 233; X86-GENERIC-NEXT: popl %esi 234; X86-GENERIC-NEXT: popl %edi 235; X86-GENERIC-NEXT: popl %ebx 236; X86-GENERIC-NEXT: popl %ebp 237; X86-GENERIC-NEXT: .cfi_def_cfa %esp, 4 238; X86-GENERIC-NEXT: retl $4 239; 240; X86-ATOM-LABEL: or128: 241; X86-ATOM: # %bb.0: 242; X86-ATOM-NEXT: pushl %ebp 243; X86-ATOM-NEXT: .cfi_def_cfa_offset 8 244; X86-ATOM-NEXT: .cfi_offset %ebp, -8 245; X86-ATOM-NEXT: movl %esp, %ebp 246; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp 247; X86-ATOM-NEXT: pushl %ebx 248; X86-ATOM-NEXT: pushl %edi 249; X86-ATOM-NEXT: pushl %esi 250; X86-ATOM-NEXT: andl $-16, %esp 251; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp 252; X86-ATOM-NEXT: .cfi_offset %esi, -20 253; X86-ATOM-NEXT: .cfi_offset %edi, -16 254; X86-ATOM-NEXT: .cfi_offset %ebx, -12 255; X86-ATOM-NEXT: movl 12(%ebp), %edi 256; X86-ATOM-NEXT: movl 12(%edi), %ecx 257; X86-ATOM-NEXT: movl 8(%edi), %edx 258; X86-ATOM-NEXT: movl (%edi), %esi 259; X86-ATOM-NEXT: movl 4(%edi), %ebx 260; X86-ATOM-NEXT: .p2align 4 261; X86-ATOM-NEXT: .LBB4_1: # %atomicrmw.start 262; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1 263; X86-ATOM-NEXT: movl %esi, (%esp) 264; X86-ATOM-NEXT: movl %ebx, {{[0-9]+}}(%esp) 265; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp) 266; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp) 267; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp) 268; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp) 269; X86-ATOM-NEXT: movl %ebx, {{[0-9]+}}(%esp) 270; X86-ATOM-NEXT: movl %esi, {{[0-9]+}}(%esp) 271; X86-ATOM-NEXT: pushl $0 272; X86-ATOM-NEXT: pushl $0 273; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %eax 274; X86-ATOM-NEXT: pushl %eax 275; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %eax 276; X86-ATOM-NEXT: pushl %eax 277; X86-ATOM-NEXT: pushl %edi 278; X86-ATOM-NEXT: pushl $16 279; X86-ATOM-NEXT: calll __atomic_compare_exchange@PLT 280; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp 281; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx 282; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx 283; X86-ATOM-NEXT: testb %al, %al 284; X86-ATOM-NEXT: movl (%esp), %esi 285; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ebx 286; X86-ATOM-NEXT: je .LBB4_1 287; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end 288; X86-ATOM-NEXT: movl 8(%ebp), %eax 289; X86-ATOM-NEXT: movl %esi, (%eax) 290; X86-ATOM-NEXT: movl %ebx, 4(%eax) 291; X86-ATOM-NEXT: movl %edx, 8(%eax) 292; X86-ATOM-NEXT: movl %ecx, 12(%eax) 293; X86-ATOM-NEXT: leal -12(%ebp), %esp 294; X86-ATOM-NEXT: popl %esi 295; X86-ATOM-NEXT: popl %edi 296; X86-ATOM-NEXT: popl %ebx 297; X86-ATOM-NEXT: popl %ebp 298; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4 299; X86-ATOM-NEXT: retl $4 300 %1 = atomicrmw or ptr %p, i128 0 monotonic 301 ret i128 %1 302} 303 304; For 'and', the idempotent value is (-1) 305define i32 @and32 (ptr %p) { 306; X64-LABEL: and32: 307; X64: # %bb.0: 308; X64-NEXT: mfence 309; X64-NEXT: movl (%rdi), %eax 310; X64-NEXT: retq 311; 312; X86-SSE2-LABEL: and32: 313; X86-SSE2: # %bb.0: 314; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 315; X86-SSE2-NEXT: mfence 316; X86-SSE2-NEXT: movl (%eax), %eax 317; X86-SSE2-NEXT: retl 318; 319; X86-SLM-LABEL: and32: 320; X86-SLM: # %bb.0: 321; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx 322; X86-SLM-NEXT: movl (%ecx), %eax 323; X86-SLM-NEXT: .p2align 4 324; X86-SLM-NEXT: .LBB5_1: # %atomicrmw.start 325; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1 326; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx) 327; X86-SLM-NEXT: jne .LBB5_1 328; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end 329; X86-SLM-NEXT: retl 330; 331; X86-ATOM-LABEL: and32: 332; X86-ATOM: # %bb.0: 333; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx 334; X86-ATOM-NEXT: movl (%ecx), %eax 335; X86-ATOM-NEXT: .p2align 4 336; X86-ATOM-NEXT: .LBB5_1: # %atomicrmw.start 337; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1 338; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx) 339; X86-ATOM-NEXT: jne .LBB5_1 340; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end 341; X86-ATOM-NEXT: retl 342 %1 = atomicrmw and ptr %p, i32 -1 acq_rel 343 ret i32 %1 344} 345 346define void @or32_nouse_monotonic(ptr %p) { 347; X64-LABEL: or32_nouse_monotonic: 348; X64: # %bb.0: 349; X64-NEXT: #MEMBARRIER 350; X64-NEXT: retq 351; 352; X86-GENERIC-LABEL: or32_nouse_monotonic: 353; X86-GENERIC: # %bb.0: 354; X86-GENERIC-NEXT: #MEMBARRIER 355; X86-GENERIC-NEXT: retl 356; 357; X86-ATOM-LABEL: or32_nouse_monotonic: 358; X86-ATOM: # %bb.0: 359; X86-ATOM-NEXT: #MEMBARRIER 360; X86-ATOM-NEXT: nop 361; X86-ATOM-NEXT: nop 362; X86-ATOM-NEXT: nop 363; X86-ATOM-NEXT: nop 364; X86-ATOM-NEXT: nop 365; X86-ATOM-NEXT: nop 366; X86-ATOM-NEXT: nop 367; X86-ATOM-NEXT: nop 368; X86-ATOM-NEXT: retl 369 atomicrmw or ptr %p, i32 0 monotonic 370 ret void 371} 372 373 374define void @or32_nouse_acquire(ptr %p) { 375; X64-LABEL: or32_nouse_acquire: 376; X64: # %bb.0: 377; X64-NEXT: #MEMBARRIER 378; X64-NEXT: retq 379; 380; X86-GENERIC-LABEL: or32_nouse_acquire: 381; X86-GENERIC: # %bb.0: 382; X86-GENERIC-NEXT: #MEMBARRIER 383; X86-GENERIC-NEXT: retl 384; 385; X86-ATOM-LABEL: or32_nouse_acquire: 386; X86-ATOM: # %bb.0: 387; X86-ATOM-NEXT: #MEMBARRIER 388; X86-ATOM-NEXT: nop 389; X86-ATOM-NEXT: nop 390; X86-ATOM-NEXT: nop 391; X86-ATOM-NEXT: nop 392; X86-ATOM-NEXT: nop 393; X86-ATOM-NEXT: nop 394; X86-ATOM-NEXT: nop 395; X86-ATOM-NEXT: nop 396; X86-ATOM-NEXT: retl 397 atomicrmw or ptr %p, i32 0 acquire 398 ret void 399} 400 401define void @or32_nouse_release(ptr %p) { 402; X64-LABEL: or32_nouse_release: 403; X64: # %bb.0: 404; X64-NEXT: #MEMBARRIER 405; X64-NEXT: retq 406; 407; X86-GENERIC-LABEL: or32_nouse_release: 408; X86-GENERIC: # %bb.0: 409; X86-GENERIC-NEXT: #MEMBARRIER 410; X86-GENERIC-NEXT: retl 411; 412; X86-ATOM-LABEL: or32_nouse_release: 413; X86-ATOM: # %bb.0: 414; X86-ATOM-NEXT: #MEMBARRIER 415; X86-ATOM-NEXT: nop 416; X86-ATOM-NEXT: nop 417; X86-ATOM-NEXT: nop 418; X86-ATOM-NEXT: nop 419; X86-ATOM-NEXT: nop 420; X86-ATOM-NEXT: nop 421; X86-ATOM-NEXT: nop 422; X86-ATOM-NEXT: nop 423; X86-ATOM-NEXT: retl 424 atomicrmw or ptr %p, i32 0 release 425 ret void 426} 427 428define void @or32_nouse_acq_rel(ptr %p) { 429; X64-LABEL: or32_nouse_acq_rel: 430; X64: # %bb.0: 431; X64-NEXT: #MEMBARRIER 432; X64-NEXT: retq 433; 434; X86-GENERIC-LABEL: or32_nouse_acq_rel: 435; X86-GENERIC: # %bb.0: 436; X86-GENERIC-NEXT: #MEMBARRIER 437; X86-GENERIC-NEXT: retl 438; 439; X86-ATOM-LABEL: or32_nouse_acq_rel: 440; X86-ATOM: # %bb.0: 441; X86-ATOM-NEXT: #MEMBARRIER 442; X86-ATOM-NEXT: nop 443; X86-ATOM-NEXT: nop 444; X86-ATOM-NEXT: nop 445; X86-ATOM-NEXT: nop 446; X86-ATOM-NEXT: nop 447; X86-ATOM-NEXT: nop 448; X86-ATOM-NEXT: nop 449; X86-ATOM-NEXT: nop 450; X86-ATOM-NEXT: retl 451 atomicrmw or ptr %p, i32 0 acq_rel 452 ret void 453} 454 455define void @or32_nouse_seq_cst(ptr %p) { 456; X64-LABEL: or32_nouse_seq_cst: 457; X64: # %bb.0: 458; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp) 459; X64-NEXT: retq 460; 461; X86-GENERIC-LABEL: or32_nouse_seq_cst: 462; X86-GENERIC: # %bb.0: 463; X86-GENERIC-NEXT: lock orl $0, (%esp) 464; X86-GENERIC-NEXT: retl 465; 466; X86-ATOM-LABEL: or32_nouse_seq_cst: 467; X86-ATOM: # %bb.0: 468; X86-ATOM-NEXT: lock orl $0, (%esp) 469; X86-ATOM-NEXT: nop 470; X86-ATOM-NEXT: nop 471; X86-ATOM-NEXT: nop 472; X86-ATOM-NEXT: nop 473; X86-ATOM-NEXT: nop 474; X86-ATOM-NEXT: nop 475; X86-ATOM-NEXT: retl 476 atomicrmw or ptr %p, i32 0 seq_cst 477 ret void 478} 479 480; TODO: The value isn't used on 32 bit, so the cmpxchg8b is unneeded 481define void @or64_nouse_seq_cst(ptr %p) { 482; X64-LABEL: or64_nouse_seq_cst: 483; X64: # %bb.0: 484; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp) 485; X64-NEXT: retq 486; 487; X86-LABEL: or64_nouse_seq_cst: 488; X86: # %bb.0: 489; X86-NEXT: pushl %ebx 490; X86-NEXT: .cfi_def_cfa_offset 8 491; X86-NEXT: pushl %esi 492; X86-NEXT: .cfi_def_cfa_offset 12 493; X86-NEXT: .cfi_offset %esi, -12 494; X86-NEXT: .cfi_offset %ebx, -8 495; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 496; X86-NEXT: movl (%esi), %eax 497; X86-NEXT: movl 4(%esi), %edx 498; X86-NEXT: .p2align 4 499; X86-NEXT: .LBB11_1: # %atomicrmw.start 500; X86-NEXT: # =>This Inner Loop Header: Depth=1 501; X86-NEXT: movl %edx, %ecx 502; X86-NEXT: movl %eax, %ebx 503; X86-NEXT: lock cmpxchg8b (%esi) 504; X86-NEXT: jne .LBB11_1 505; X86-NEXT: # %bb.2: # %atomicrmw.end 506; X86-NEXT: popl %esi 507; X86-NEXT: .cfi_def_cfa_offset 8 508; X86-NEXT: popl %ebx 509; X86-NEXT: .cfi_def_cfa_offset 4 510; X86-NEXT: retl 511 atomicrmw or ptr %p, i64 0 seq_cst 512 ret void 513} 514 515; TODO: Don't need to lower as sync_and_fetch call 516define void @or128_nouse_seq_cst(ptr %p) { 517; X64-LABEL: or128_nouse_seq_cst: 518; X64: # %bb.0: 519; X64-NEXT: pushq %rax 520; X64-NEXT: .cfi_def_cfa_offset 16 521; X64-NEXT: xorl %esi, %esi 522; X64-NEXT: xorl %edx, %edx 523; X64-NEXT: movl $5, %ecx 524; X64-NEXT: callq __atomic_fetch_or_16@PLT 525; X64-NEXT: popq %rax 526; X64-NEXT: .cfi_def_cfa_offset 8 527; X64-NEXT: retq 528; 529; X86-GENERIC-LABEL: or128_nouse_seq_cst: 530; X86-GENERIC: # %bb.0: 531; X86-GENERIC-NEXT: pushl %ebp 532; X86-GENERIC-NEXT: .cfi_def_cfa_offset 8 533; X86-GENERIC-NEXT: .cfi_offset %ebp, -8 534; X86-GENERIC-NEXT: movl %esp, %ebp 535; X86-GENERIC-NEXT: .cfi_def_cfa_register %ebp 536; X86-GENERIC-NEXT: pushl %ebx 537; X86-GENERIC-NEXT: pushl %edi 538; X86-GENERIC-NEXT: pushl %esi 539; X86-GENERIC-NEXT: andl $-16, %esp 540; X86-GENERIC-NEXT: subl $48, %esp 541; X86-GENERIC-NEXT: .cfi_offset %esi, -20 542; X86-GENERIC-NEXT: .cfi_offset %edi, -16 543; X86-GENERIC-NEXT: .cfi_offset %ebx, -12 544; X86-GENERIC-NEXT: movl 8(%ebp), %esi 545; X86-GENERIC-NEXT: movl 12(%esi), %ecx 546; X86-GENERIC-NEXT: movl 8(%esi), %edi 547; X86-GENERIC-NEXT: movl (%esi), %edx 548; X86-GENERIC-NEXT: movl 4(%esi), %ebx 549; X86-GENERIC-NEXT: .p2align 4 550; X86-GENERIC-NEXT: .LBB12_1: # %atomicrmw.start 551; X86-GENERIC-NEXT: # =>This Inner Loop Header: Depth=1 552; X86-GENERIC-NEXT: movl %edx, (%esp) 553; X86-GENERIC-NEXT: movl %ebx, {{[0-9]+}}(%esp) 554; X86-GENERIC-NEXT: movl %edi, {{[0-9]+}}(%esp) 555; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp) 556; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp) 557; X86-GENERIC-NEXT: movl %edi, {{[0-9]+}}(%esp) 558; X86-GENERIC-NEXT: movl %ebx, {{[0-9]+}}(%esp) 559; X86-GENERIC-NEXT: movl %edx, {{[0-9]+}}(%esp) 560; X86-GENERIC-NEXT: pushl $5 561; X86-GENERIC-NEXT: pushl $5 562; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax 563; X86-GENERIC-NEXT: pushl %eax 564; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax 565; X86-GENERIC-NEXT: pushl %eax 566; X86-GENERIC-NEXT: pushl %esi 567; X86-GENERIC-NEXT: pushl $16 568; X86-GENERIC-NEXT: calll __atomic_compare_exchange@PLT 569; X86-GENERIC-NEXT: addl $24, %esp 570; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx 571; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edi 572; X86-GENERIC-NEXT: movl (%esp), %edx 573; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ebx 574; X86-GENERIC-NEXT: testb %al, %al 575; X86-GENERIC-NEXT: je .LBB12_1 576; X86-GENERIC-NEXT: # %bb.2: # %atomicrmw.end 577; X86-GENERIC-NEXT: leal -12(%ebp), %esp 578; X86-GENERIC-NEXT: popl %esi 579; X86-GENERIC-NEXT: popl %edi 580; X86-GENERIC-NEXT: popl %ebx 581; X86-GENERIC-NEXT: popl %ebp 582; X86-GENERIC-NEXT: .cfi_def_cfa %esp, 4 583; X86-GENERIC-NEXT: retl 584; 585; X86-ATOM-LABEL: or128_nouse_seq_cst: 586; X86-ATOM: # %bb.0: 587; X86-ATOM-NEXT: pushl %ebp 588; X86-ATOM-NEXT: .cfi_def_cfa_offset 8 589; X86-ATOM-NEXT: .cfi_offset %ebp, -8 590; X86-ATOM-NEXT: movl %esp, %ebp 591; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp 592; X86-ATOM-NEXT: pushl %ebx 593; X86-ATOM-NEXT: pushl %edi 594; X86-ATOM-NEXT: pushl %esi 595; X86-ATOM-NEXT: andl $-16, %esp 596; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp 597; X86-ATOM-NEXT: .cfi_offset %esi, -20 598; X86-ATOM-NEXT: .cfi_offset %edi, -16 599; X86-ATOM-NEXT: .cfi_offset %ebx, -12 600; X86-ATOM-NEXT: movl 8(%ebp), %esi 601; X86-ATOM-NEXT: movl %esp, %ebx 602; X86-ATOM-NEXT: movl 12(%esi), %ecx 603; X86-ATOM-NEXT: movl 8(%esi), %edx 604; X86-ATOM-NEXT: movl (%esi), %eax 605; X86-ATOM-NEXT: movl 4(%esi), %edi 606; X86-ATOM-NEXT: .p2align 4 607; X86-ATOM-NEXT: .LBB12_1: # %atomicrmw.start 608; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1 609; X86-ATOM-NEXT: movl %eax, (%esp) 610; X86-ATOM-NEXT: movl %edi, {{[0-9]+}}(%esp) 611; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp) 612; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp) 613; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp) 614; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp) 615; X86-ATOM-NEXT: movl %edi, {{[0-9]+}}(%esp) 616; X86-ATOM-NEXT: movl %eax, {{[0-9]+}}(%esp) 617; X86-ATOM-NEXT: pushl $5 618; X86-ATOM-NEXT: pushl $5 619; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %eax 620; X86-ATOM-NEXT: pushl %eax 621; X86-ATOM-NEXT: pushl %ebx 622; X86-ATOM-NEXT: pushl %esi 623; X86-ATOM-NEXT: pushl $16 624; X86-ATOM-NEXT: calll __atomic_compare_exchange@PLT 625; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp 626; X86-ATOM-NEXT: testb %al, %al 627; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx 628; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx 629; X86-ATOM-NEXT: movl (%esp), %eax 630; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edi 631; X86-ATOM-NEXT: je .LBB12_1 632; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end 633; X86-ATOM-NEXT: leal -12(%ebp), %esp 634; X86-ATOM-NEXT: popl %esi 635; X86-ATOM-NEXT: popl %edi 636; X86-ATOM-NEXT: popl %ebx 637; X86-ATOM-NEXT: popl %ebp 638; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4 639; X86-ATOM-NEXT: retl 640 atomicrmw or ptr %p, i128 0 seq_cst 641 ret void 642} 643 644 645define void @or16_nouse_seq_cst(ptr %p) { 646; X64-LABEL: or16_nouse_seq_cst: 647; X64: # %bb.0: 648; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp) 649; X64-NEXT: retq 650; 651; X86-GENERIC-LABEL: or16_nouse_seq_cst: 652; X86-GENERIC: # %bb.0: 653; X86-GENERIC-NEXT: lock orl $0, (%esp) 654; X86-GENERIC-NEXT: retl 655; 656; X86-ATOM-LABEL: or16_nouse_seq_cst: 657; X86-ATOM: # %bb.0: 658; X86-ATOM-NEXT: lock orl $0, (%esp) 659; X86-ATOM-NEXT: nop 660; X86-ATOM-NEXT: nop 661; X86-ATOM-NEXT: nop 662; X86-ATOM-NEXT: nop 663; X86-ATOM-NEXT: nop 664; X86-ATOM-NEXT: nop 665; X86-ATOM-NEXT: retl 666 atomicrmw or ptr %p, i16 0 seq_cst 667 ret void 668} 669 670define void @or8_nouse_seq_cst(ptr %p) { 671; X64-LABEL: or8_nouse_seq_cst: 672; X64: # %bb.0: 673; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp) 674; X64-NEXT: retq 675; 676; X86-GENERIC-LABEL: or8_nouse_seq_cst: 677; X86-GENERIC: # %bb.0: 678; X86-GENERIC-NEXT: lock orl $0, (%esp) 679; X86-GENERIC-NEXT: retl 680; 681; X86-ATOM-LABEL: or8_nouse_seq_cst: 682; X86-ATOM: # %bb.0: 683; X86-ATOM-NEXT: lock orl $0, (%esp) 684; X86-ATOM-NEXT: nop 685; X86-ATOM-NEXT: nop 686; X86-ATOM-NEXT: nop 687; X86-ATOM-NEXT: nop 688; X86-ATOM-NEXT: nop 689; X86-ATOM-NEXT: nop 690; X86-ATOM-NEXT: retl 691 atomicrmw or ptr %p, i8 0 seq_cst 692 ret void 693} 694