1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=sse | FileCheck %s --check-prefixes=X86,X86-SSE1 3; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2 4; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=avx | FileCheck %s --check-prefixes=X86,X86-AVX 5; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=avx512f | FileCheck %s --check-prefixes=X86,X86-AVX 6; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs | FileCheck %s --check-prefixes=X86,X86-NOSSE 7; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck %s --check-prefixes=X64-SSE 8; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=avx | FileCheck %s --check-prefixes=X64-AVX 9; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=avx512f | FileCheck %s --check-prefixes=X64-AVX 10 11; Note: This test is testing that the lowering for atomics matches what we 12; currently emit for non-atomics + the atomic restriction. The presence of 13; particular lowering detail in these tests should not be read as requiring 14; that detail for correctness unless it's related to the atomicity itself. 15; (Specifically, there were reviewer questions about the lowering for halfs 16; and their calling convention which remain unresolved.) 17 18define void @store_half(ptr %fptr, half %v) { 19; X86-SSE1-LABEL: store_half: 20; X86-SSE1: # %bb.0: 21; X86-SSE1-NEXT: movzwl {{[0-9]+}}(%esp), %eax 22; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx 23; X86-SSE1-NEXT: movw %ax, (%ecx) 24; X86-SSE1-NEXT: retl 25; 26; X86-SSE2-LABEL: store_half: 27; X86-SSE2: # %bb.0: 28; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 29; X86-SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 30; X86-SSE2-NEXT: movw %cx, (%eax) 31; X86-SSE2-NEXT: retl 32; 33; X86-AVX-LABEL: store_half: 34; X86-AVX: # %bb.0: 35; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 36; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 37; X86-AVX-NEXT: movw %cx, (%eax) 38; X86-AVX-NEXT: retl 39; 40; X86-NOSSE-LABEL: store_half: 41; X86-NOSSE: # %bb.0: 42; X86-NOSSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax 43; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 44; X86-NOSSE-NEXT: movw %ax, (%ecx) 45; X86-NOSSE-NEXT: retl 46; 47; X64-SSE-LABEL: store_half: 48; X64-SSE: # %bb.0: 49; X64-SSE-NEXT: pextrw $0, %xmm0, %eax 50; X64-SSE-NEXT: movw %ax, (%rdi) 51; X64-SSE-NEXT: retq 52; 53; X64-AVX-LABEL: store_half: 54; X64-AVX: # %bb.0: 55; X64-AVX-NEXT: vpextrw $0, %xmm0, %eax 56; X64-AVX-NEXT: movw %ax, (%rdi) 57; X64-AVX-NEXT: retq 58 store atomic half %v, ptr %fptr unordered, align 2 59 ret void 60} 61 62define void @store_float(ptr %fptr, float %v) { 63; X86-LABEL: store_float: 64; X86: # %bb.0: 65; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 66; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 67; X86-NEXT: movl %ecx, (%eax) 68; X86-NEXT: retl 69; 70; X64-SSE-LABEL: store_float: 71; X64-SSE: # %bb.0: 72; X64-SSE-NEXT: movss %xmm0, (%rdi) 73; X64-SSE-NEXT: retq 74; 75; X64-AVX-LABEL: store_float: 76; X64-AVX: # %bb.0: 77; X64-AVX-NEXT: vmovss %xmm0, (%rdi) 78; X64-AVX-NEXT: retq 79 store atomic float %v, ptr %fptr unordered, align 4 80 ret void 81} 82 83define void @store_double(ptr %fptr, double %v) { 84; X86-SSE1-LABEL: store_double: 85; X86-SSE1: # %bb.0: 86; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 87; X86-SSE1-NEXT: xorps %xmm0, %xmm0 88; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 89; X86-SSE1-NEXT: movlps %xmm0, (%eax) 90; X86-SSE1-NEXT: retl 91; 92; X86-SSE2-LABEL: store_double: 93; X86-SSE2: # %bb.0: 94; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 95; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 96; X86-SSE2-NEXT: movlps %xmm0, (%eax) 97; X86-SSE2-NEXT: retl 98; 99; X86-AVX-LABEL: store_double: 100; X86-AVX: # %bb.0: 101; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 102; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 103; X86-AVX-NEXT: vmovlps %xmm0, (%eax) 104; X86-AVX-NEXT: retl 105; 106; X86-NOSSE-LABEL: store_double: 107; X86-NOSSE: # %bb.0: 108; X86-NOSSE-NEXT: subl $12, %esp 109; X86-NOSSE-NEXT: .cfi_def_cfa_offset 16 110; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 111; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 112; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 113; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 114; X86-NOSSE-NEXT: movl %ecx, (%esp) 115; X86-NOSSE-NEXT: fildll (%esp) 116; X86-NOSSE-NEXT: fistpll (%eax) 117; X86-NOSSE-NEXT: addl $12, %esp 118; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 119; X86-NOSSE-NEXT: retl 120; 121; X64-SSE-LABEL: store_double: 122; X64-SSE: # %bb.0: 123; X64-SSE-NEXT: movsd %xmm0, (%rdi) 124; X64-SSE-NEXT: retq 125; 126; X64-AVX-LABEL: store_double: 127; X64-AVX: # %bb.0: 128; X64-AVX-NEXT: vmovsd %xmm0, (%rdi) 129; X64-AVX-NEXT: retq 130 store atomic double %v, ptr %fptr unordered, align 8 131 ret void 132} 133 134define half @load_half(ptr %fptr) { 135; X86-SSE1-LABEL: load_half: 136; X86-SSE1: # %bb.0: 137; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 138; X86-SSE1-NEXT: movzwl (%eax), %eax 139; X86-SSE1-NEXT: retl 140; 141; X86-SSE2-LABEL: load_half: 142; X86-SSE2: # %bb.0: 143; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 144; X86-SSE2-NEXT: movzwl (%eax), %eax 145; X86-SSE2-NEXT: pinsrw $0, %eax, %xmm0 146; X86-SSE2-NEXT: retl 147; 148; X86-AVX-LABEL: load_half: 149; X86-AVX: # %bb.0: 150; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 151; X86-AVX-NEXT: movzwl (%eax), %eax 152; X86-AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 153; X86-AVX-NEXT: retl 154; 155; X86-NOSSE-LABEL: load_half: 156; X86-NOSSE: # %bb.0: 157; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 158; X86-NOSSE-NEXT: movzwl (%eax), %eax 159; X86-NOSSE-NEXT: retl 160; 161; X64-SSE-LABEL: load_half: 162; X64-SSE: # %bb.0: 163; X64-SSE-NEXT: movzwl (%rdi), %eax 164; X64-SSE-NEXT: pinsrw $0, %eax, %xmm0 165; X64-SSE-NEXT: retq 166; 167; X64-AVX-LABEL: load_half: 168; X64-AVX: # %bb.0: 169; X64-AVX-NEXT: movzwl (%rdi), %eax 170; X64-AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 171; X64-AVX-NEXT: retq 172 %v = load atomic half, ptr %fptr unordered, align 2 173 ret half %v 174} 175 176define float @load_float(ptr %fptr) { 177; X86-SSE1-LABEL: load_float: 178; X86-SSE1: # %bb.0: 179; X86-SSE1-NEXT: pushl %eax 180; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 181; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 182; X86-SSE1-NEXT: movl (%eax), %eax 183; X86-SSE1-NEXT: movl %eax, (%esp) 184; X86-SSE1-NEXT: flds (%esp) 185; X86-SSE1-NEXT: popl %eax 186; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 187; X86-SSE1-NEXT: retl 188; 189; X86-SSE2-LABEL: load_float: 190; X86-SSE2: # %bb.0: 191; X86-SSE2-NEXT: pushl %eax 192; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 193; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 194; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 195; X86-SSE2-NEXT: movss %xmm0, (%esp) 196; X86-SSE2-NEXT: flds (%esp) 197; X86-SSE2-NEXT: popl %eax 198; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 199; X86-SSE2-NEXT: retl 200; 201; X86-AVX-LABEL: load_float: 202; X86-AVX: # %bb.0: 203; X86-AVX-NEXT: pushl %eax 204; X86-AVX-NEXT: .cfi_def_cfa_offset 8 205; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 206; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 207; X86-AVX-NEXT: vmovss %xmm0, (%esp) 208; X86-AVX-NEXT: flds (%esp) 209; X86-AVX-NEXT: popl %eax 210; X86-AVX-NEXT: .cfi_def_cfa_offset 4 211; X86-AVX-NEXT: retl 212; 213; X86-NOSSE-LABEL: load_float: 214; X86-NOSSE: # %bb.0: 215; X86-NOSSE-NEXT: pushl %eax 216; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 217; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 218; X86-NOSSE-NEXT: movl (%eax), %eax 219; X86-NOSSE-NEXT: movl %eax, (%esp) 220; X86-NOSSE-NEXT: flds (%esp) 221; X86-NOSSE-NEXT: popl %eax 222; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 223; X86-NOSSE-NEXT: retl 224; 225; X64-SSE-LABEL: load_float: 226; X64-SSE: # %bb.0: 227; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 228; X64-SSE-NEXT: retq 229; 230; X64-AVX-LABEL: load_float: 231; X64-AVX: # %bb.0: 232; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 233; X64-AVX-NEXT: retq 234 %v = load atomic float, ptr %fptr unordered, align 4 235 ret float %v 236} 237 238define double @load_double(ptr %fptr) { 239; X86-SSE1-LABEL: load_double: 240; X86-SSE1: # %bb.0: 241; X86-SSE1-NEXT: subl $12, %esp 242; X86-SSE1-NEXT: .cfi_def_cfa_offset 16 243; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 244; X86-SSE1-NEXT: xorps %xmm0, %xmm0 245; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 246; X86-SSE1-NEXT: movss %xmm0, (%esp) 247; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 248; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 249; X86-SSE1-NEXT: fldl (%esp) 250; X86-SSE1-NEXT: addl $12, %esp 251; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 252; X86-SSE1-NEXT: retl 253; 254; X86-SSE2-LABEL: load_double: 255; X86-SSE2: # %bb.0: 256; X86-SSE2-NEXT: subl $12, %esp 257; X86-SSE2-NEXT: .cfi_def_cfa_offset 16 258; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 259; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 260; X86-SSE2-NEXT: movlps %xmm0, (%esp) 261; X86-SSE2-NEXT: fldl (%esp) 262; X86-SSE2-NEXT: addl $12, %esp 263; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 264; X86-SSE2-NEXT: retl 265; 266; X86-AVX-LABEL: load_double: 267; X86-AVX: # %bb.0: 268; X86-AVX-NEXT: subl $12, %esp 269; X86-AVX-NEXT: .cfi_def_cfa_offset 16 270; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 271; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 272; X86-AVX-NEXT: vmovlps %xmm0, (%esp) 273; X86-AVX-NEXT: fldl (%esp) 274; X86-AVX-NEXT: addl $12, %esp 275; X86-AVX-NEXT: .cfi_def_cfa_offset 4 276; X86-AVX-NEXT: retl 277; 278; X86-NOSSE-LABEL: load_double: 279; X86-NOSSE: # %bb.0: 280; X86-NOSSE-NEXT: subl $20, %esp 281; X86-NOSSE-NEXT: .cfi_def_cfa_offset 24 282; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 283; X86-NOSSE-NEXT: fildll (%eax) 284; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 285; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 286; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 287; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 288; X86-NOSSE-NEXT: movl %eax, (%esp) 289; X86-NOSSE-NEXT: fldl (%esp) 290; X86-NOSSE-NEXT: addl $20, %esp 291; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 292; X86-NOSSE-NEXT: retl 293; 294; X64-SSE-LABEL: load_double: 295; X64-SSE: # %bb.0: 296; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 297; X64-SSE-NEXT: retq 298; 299; X64-AVX-LABEL: load_double: 300; X64-AVX: # %bb.0: 301; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 302; X64-AVX-NEXT: retq 303 %v = load atomic double, ptr %fptr unordered, align 8 304 ret double %v 305} 306 307define half @exchange_half(ptr %fptr, half %x) { 308; X86-SSE1-LABEL: exchange_half: 309; X86-SSE1: # %bb.0: 310; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx 311; X86-SSE1-NEXT: movzwl {{[0-9]+}}(%esp), %eax 312; X86-SSE1-NEXT: xchgw %ax, (%ecx) 313; X86-SSE1-NEXT: retl 314; 315; X86-SSE2-LABEL: exchange_half: 316; X86-SSE2: # %bb.0: 317; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 318; X86-SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 319; X86-SSE2-NEXT: xchgw %cx, (%eax) 320; X86-SSE2-NEXT: pinsrw $0, %ecx, %xmm0 321; X86-SSE2-NEXT: retl 322; 323; X86-AVX-LABEL: exchange_half: 324; X86-AVX: # %bb.0: 325; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 326; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 327; X86-AVX-NEXT: xchgw %cx, (%eax) 328; X86-AVX-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 329; X86-AVX-NEXT: retl 330; 331; X86-NOSSE-LABEL: exchange_half: 332; X86-NOSSE: # %bb.0: 333; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 334; X86-NOSSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax 335; X86-NOSSE-NEXT: xchgw %ax, (%ecx) 336; X86-NOSSE-NEXT: retl 337; 338; X64-SSE-LABEL: exchange_half: 339; X64-SSE: # %bb.0: 340; X64-SSE-NEXT: pextrw $0, %xmm0, %eax 341; X64-SSE-NEXT: xchgw %ax, (%rdi) 342; X64-SSE-NEXT: pinsrw $0, %eax, %xmm0 343; X64-SSE-NEXT: retq 344; 345; X64-AVX-LABEL: exchange_half: 346; X64-AVX: # %bb.0: 347; X64-AVX-NEXT: vpextrw $0, %xmm0, %eax 348; X64-AVX-NEXT: xchgw %ax, (%rdi) 349; X64-AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 350; X64-AVX-NEXT: retq 351 %v = atomicrmw xchg ptr %fptr, half %x monotonic, align 2 352 ret half %v 353} 354 355define float @exchange_float(ptr %fptr, float %x) { 356; X86-SSE1-LABEL: exchange_float: 357; X86-SSE1: # %bb.0: 358; X86-SSE1-NEXT: pushl %eax 359; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 360; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 361; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx 362; X86-SSE1-NEXT: xchgl %ecx, (%eax) 363; X86-SSE1-NEXT: movl %ecx, (%esp) 364; X86-SSE1-NEXT: flds (%esp) 365; X86-SSE1-NEXT: popl %eax 366; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 367; X86-SSE1-NEXT: retl 368; 369; X86-SSE2-LABEL: exchange_float: 370; X86-SSE2: # %bb.0: 371; X86-SSE2-NEXT: pushl %eax 372; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 373; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 374; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 375; X86-SSE2-NEXT: xchgl %ecx, (%eax) 376; X86-SSE2-NEXT: movd %ecx, %xmm0 377; X86-SSE2-NEXT: movd %xmm0, (%esp) 378; X86-SSE2-NEXT: flds (%esp) 379; X86-SSE2-NEXT: popl %eax 380; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 381; X86-SSE2-NEXT: retl 382; 383; X86-AVX-LABEL: exchange_float: 384; X86-AVX: # %bb.0: 385; X86-AVX-NEXT: pushl %eax 386; X86-AVX-NEXT: .cfi_def_cfa_offset 8 387; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 388; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 389; X86-AVX-NEXT: xchgl %ecx, (%eax) 390; X86-AVX-NEXT: vmovd %ecx, %xmm0 391; X86-AVX-NEXT: vmovd %xmm0, (%esp) 392; X86-AVX-NEXT: flds (%esp) 393; X86-AVX-NEXT: popl %eax 394; X86-AVX-NEXT: .cfi_def_cfa_offset 4 395; X86-AVX-NEXT: retl 396; 397; X86-NOSSE-LABEL: exchange_float: 398; X86-NOSSE: # %bb.0: 399; X86-NOSSE-NEXT: pushl %eax 400; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 401; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 402; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 403; X86-NOSSE-NEXT: xchgl %ecx, (%eax) 404; X86-NOSSE-NEXT: movl %ecx, (%esp) 405; X86-NOSSE-NEXT: flds (%esp) 406; X86-NOSSE-NEXT: popl %eax 407; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 408; X86-NOSSE-NEXT: retl 409; 410; X64-SSE-LABEL: exchange_float: 411; X64-SSE: # %bb.0: 412; X64-SSE-NEXT: movd %xmm0, %eax 413; X64-SSE-NEXT: xchgl %eax, (%rdi) 414; X64-SSE-NEXT: movd %eax, %xmm0 415; X64-SSE-NEXT: retq 416; 417; X64-AVX-LABEL: exchange_float: 418; X64-AVX: # %bb.0: 419; X64-AVX-NEXT: vmovd %xmm0, %eax 420; X64-AVX-NEXT: xchgl %eax, (%rdi) 421; X64-AVX-NEXT: vmovd %eax, %xmm0 422; X64-AVX-NEXT: retq 423 %v = atomicrmw xchg ptr %fptr, float %x monotonic, align 4 424 ret float %v 425} 426 427define double @exchange_double(ptr %fptr, double %x) { 428; X86-SSE1-LABEL: exchange_double: 429; X86-SSE1: # %bb.0: 430; X86-SSE1-NEXT: pushl %ebx 431; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 432; X86-SSE1-NEXT: pushl %esi 433; X86-SSE1-NEXT: .cfi_def_cfa_offset 12 434; X86-SSE1-NEXT: subl $12, %esp 435; X86-SSE1-NEXT: .cfi_def_cfa_offset 24 436; X86-SSE1-NEXT: .cfi_offset %esi, -12 437; X86-SSE1-NEXT: .cfi_offset %ebx, -8 438; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi 439; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ebx 440; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx 441; X86-SSE1-NEXT: movl (%esi), %eax 442; X86-SSE1-NEXT: movl 4(%esi), %edx 443; X86-SSE1-NEXT: .p2align 4 444; X86-SSE1-NEXT: .LBB8_1: # %atomicrmw.start 445; X86-SSE1-NEXT: # =>This Inner Loop Header: Depth=1 446; X86-SSE1-NEXT: lock cmpxchg8b (%esi) 447; X86-SSE1-NEXT: jne .LBB8_1 448; X86-SSE1-NEXT: # %bb.2: # %atomicrmw.end 449; X86-SSE1-NEXT: movl %eax, (%esp) 450; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp) 451; X86-SSE1-NEXT: fldl (%esp) 452; X86-SSE1-NEXT: addl $12, %esp 453; X86-SSE1-NEXT: .cfi_def_cfa_offset 12 454; X86-SSE1-NEXT: popl %esi 455; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 456; X86-SSE1-NEXT: popl %ebx 457; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 458; X86-SSE1-NEXT: retl 459; 460; X86-SSE2-LABEL: exchange_double: 461; X86-SSE2: # %bb.0: 462; X86-SSE2-NEXT: pushl %ebx 463; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 464; X86-SSE2-NEXT: pushl %esi 465; X86-SSE2-NEXT: .cfi_def_cfa_offset 12 466; X86-SSE2-NEXT: subl $12, %esp 467; X86-SSE2-NEXT: .cfi_def_cfa_offset 24 468; X86-SSE2-NEXT: .cfi_offset %esi, -12 469; X86-SSE2-NEXT: .cfi_offset %ebx, -8 470; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi 471; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx 472; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 473; X86-SSE2-NEXT: movl (%esi), %eax 474; X86-SSE2-NEXT: movl 4(%esi), %edx 475; X86-SSE2-NEXT: .p2align 4 476; X86-SSE2-NEXT: .LBB8_1: # %atomicrmw.start 477; X86-SSE2-NEXT: # =>This Inner Loop Header: Depth=1 478; X86-SSE2-NEXT: lock cmpxchg8b (%esi) 479; X86-SSE2-NEXT: jne .LBB8_1 480; X86-SSE2-NEXT: # %bb.2: # %atomicrmw.end 481; X86-SSE2-NEXT: movd %eax, %xmm0 482; X86-SSE2-NEXT: movd %edx, %xmm1 483; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 484; X86-SSE2-NEXT: movq %xmm0, (%esp) 485; X86-SSE2-NEXT: fldl (%esp) 486; X86-SSE2-NEXT: addl $12, %esp 487; X86-SSE2-NEXT: .cfi_def_cfa_offset 12 488; X86-SSE2-NEXT: popl %esi 489; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 490; X86-SSE2-NEXT: popl %ebx 491; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 492; X86-SSE2-NEXT: retl 493; 494; X86-AVX-LABEL: exchange_double: 495; X86-AVX: # %bb.0: 496; X86-AVX-NEXT: pushl %ebx 497; X86-AVX-NEXT: .cfi_def_cfa_offset 8 498; X86-AVX-NEXT: pushl %esi 499; X86-AVX-NEXT: .cfi_def_cfa_offset 12 500; X86-AVX-NEXT: subl $12, %esp 501; X86-AVX-NEXT: .cfi_def_cfa_offset 24 502; X86-AVX-NEXT: .cfi_offset %esi, -12 503; X86-AVX-NEXT: .cfi_offset %ebx, -8 504; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %esi 505; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ebx 506; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 507; X86-AVX-NEXT: movl (%esi), %eax 508; X86-AVX-NEXT: movl 4(%esi), %edx 509; X86-AVX-NEXT: .p2align 4 510; X86-AVX-NEXT: .LBB8_1: # %atomicrmw.start 511; X86-AVX-NEXT: # =>This Inner Loop Header: Depth=1 512; X86-AVX-NEXT: lock cmpxchg8b (%esi) 513; X86-AVX-NEXT: jne .LBB8_1 514; X86-AVX-NEXT: # %bb.2: # %atomicrmw.end 515; X86-AVX-NEXT: vmovd %eax, %xmm0 516; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 517; X86-AVX-NEXT: vmovq %xmm0, (%esp) 518; X86-AVX-NEXT: fldl (%esp) 519; X86-AVX-NEXT: addl $12, %esp 520; X86-AVX-NEXT: .cfi_def_cfa_offset 12 521; X86-AVX-NEXT: popl %esi 522; X86-AVX-NEXT: .cfi_def_cfa_offset 8 523; X86-AVX-NEXT: popl %ebx 524; X86-AVX-NEXT: .cfi_def_cfa_offset 4 525; X86-AVX-NEXT: retl 526; 527; X86-NOSSE-LABEL: exchange_double: 528; X86-NOSSE: # %bb.0: 529; X86-NOSSE-NEXT: pushl %ebx 530; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 531; X86-NOSSE-NEXT: pushl %esi 532; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12 533; X86-NOSSE-NEXT: subl $12, %esp 534; X86-NOSSE-NEXT: .cfi_def_cfa_offset 24 535; X86-NOSSE-NEXT: .cfi_offset %esi, -12 536; X86-NOSSE-NEXT: .cfi_offset %ebx, -8 537; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi 538; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx 539; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 540; X86-NOSSE-NEXT: movl (%esi), %eax 541; X86-NOSSE-NEXT: movl 4(%esi), %edx 542; X86-NOSSE-NEXT: .p2align 4 543; X86-NOSSE-NEXT: .LBB8_1: # %atomicrmw.start 544; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1 545; X86-NOSSE-NEXT: lock cmpxchg8b (%esi) 546; X86-NOSSE-NEXT: jne .LBB8_1 547; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end 548; X86-NOSSE-NEXT: movl %eax, (%esp) 549; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 550; X86-NOSSE-NEXT: fldl (%esp) 551; X86-NOSSE-NEXT: addl $12, %esp 552; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12 553; X86-NOSSE-NEXT: popl %esi 554; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 555; X86-NOSSE-NEXT: popl %ebx 556; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 557; X86-NOSSE-NEXT: retl 558; 559; X64-SSE-LABEL: exchange_double: 560; X64-SSE: # %bb.0: 561; X64-SSE-NEXT: movq %xmm0, %rax 562; X64-SSE-NEXT: xchgq %rax, (%rdi) 563; X64-SSE-NEXT: movq %rax, %xmm0 564; X64-SSE-NEXT: retq 565; 566; X64-AVX-LABEL: exchange_double: 567; X64-AVX: # %bb.0: 568; X64-AVX-NEXT: vmovq %xmm0, %rax 569; X64-AVX-NEXT: xchgq %rax, (%rdi) 570; X64-AVX-NEXT: vmovq %rax, %xmm0 571; X64-AVX-NEXT: retq 572 %v = atomicrmw xchg ptr %fptr, double %x monotonic, align 8 573 ret double %v 574} 575 576 577; Check the seq_cst lowering since that's the 578; interesting one from an ordering perspective on x86. 579 580define void @store_float_seq_cst(ptr %fptr, float %v) { 581; X86-LABEL: store_float_seq_cst: 582; X86: # %bb.0: 583; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 584; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 585; X86-NEXT: xchgl %ecx, (%eax) 586; X86-NEXT: retl 587; 588; X64-SSE-LABEL: store_float_seq_cst: 589; X64-SSE: # %bb.0: 590; X64-SSE-NEXT: movd %xmm0, %eax 591; X64-SSE-NEXT: xchgl %eax, (%rdi) 592; X64-SSE-NEXT: retq 593; 594; X64-AVX-LABEL: store_float_seq_cst: 595; X64-AVX: # %bb.0: 596; X64-AVX-NEXT: vmovd %xmm0, %eax 597; X64-AVX-NEXT: xchgl %eax, (%rdi) 598; X64-AVX-NEXT: retq 599 store atomic float %v, ptr %fptr seq_cst, align 4 600 ret void 601} 602 603define void @store_double_seq_cst(ptr %fptr, double %v) { 604; X86-SSE1-LABEL: store_double_seq_cst: 605; X86-SSE1: # %bb.0: 606; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 607; X86-SSE1-NEXT: xorps %xmm0, %xmm0 608; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 609; X86-SSE1-NEXT: movlps %xmm0, (%eax) 610; X86-SSE1-NEXT: lock orl $0, (%esp) 611; X86-SSE1-NEXT: retl 612; 613; X86-SSE2-LABEL: store_double_seq_cst: 614; X86-SSE2: # %bb.0: 615; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 616; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 617; X86-SSE2-NEXT: movlps %xmm0, (%eax) 618; X86-SSE2-NEXT: lock orl $0, (%esp) 619; X86-SSE2-NEXT: retl 620; 621; X86-AVX-LABEL: store_double_seq_cst: 622; X86-AVX: # %bb.0: 623; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 624; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 625; X86-AVX-NEXT: vmovlps %xmm0, (%eax) 626; X86-AVX-NEXT: lock orl $0, (%esp) 627; X86-AVX-NEXT: retl 628; 629; X86-NOSSE-LABEL: store_double_seq_cst: 630; X86-NOSSE: # %bb.0: 631; X86-NOSSE-NEXT: subl $12, %esp 632; X86-NOSSE-NEXT: .cfi_def_cfa_offset 16 633; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 634; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 635; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 636; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 637; X86-NOSSE-NEXT: movl %ecx, (%esp) 638; X86-NOSSE-NEXT: fildll (%esp) 639; X86-NOSSE-NEXT: fistpll (%eax) 640; X86-NOSSE-NEXT: lock orl $0, (%esp) 641; X86-NOSSE-NEXT: addl $12, %esp 642; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 643; X86-NOSSE-NEXT: retl 644; 645; X64-SSE-LABEL: store_double_seq_cst: 646; X64-SSE: # %bb.0: 647; X64-SSE-NEXT: movq %xmm0, %rax 648; X64-SSE-NEXT: xchgq %rax, (%rdi) 649; X64-SSE-NEXT: retq 650; 651; X64-AVX-LABEL: store_double_seq_cst: 652; X64-AVX: # %bb.0: 653; X64-AVX-NEXT: vmovq %xmm0, %rax 654; X64-AVX-NEXT: xchgq %rax, (%rdi) 655; X64-AVX-NEXT: retq 656 store atomic double %v, ptr %fptr seq_cst, align 8 657 ret void 658} 659 660define float @load_float_seq_cst(ptr %fptr) { 661; X86-SSE1-LABEL: load_float_seq_cst: 662; X86-SSE1: # %bb.0: 663; X86-SSE1-NEXT: pushl %eax 664; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 665; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 666; X86-SSE1-NEXT: movl (%eax), %eax 667; X86-SSE1-NEXT: movl %eax, (%esp) 668; X86-SSE1-NEXT: flds (%esp) 669; X86-SSE1-NEXT: popl %eax 670; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 671; X86-SSE1-NEXT: retl 672; 673; X86-SSE2-LABEL: load_float_seq_cst: 674; X86-SSE2: # %bb.0: 675; X86-SSE2-NEXT: pushl %eax 676; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 677; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 678; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 679; X86-SSE2-NEXT: movss %xmm0, (%esp) 680; X86-SSE2-NEXT: flds (%esp) 681; X86-SSE2-NEXT: popl %eax 682; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 683; X86-SSE2-NEXT: retl 684; 685; X86-AVX-LABEL: load_float_seq_cst: 686; X86-AVX: # %bb.0: 687; X86-AVX-NEXT: pushl %eax 688; X86-AVX-NEXT: .cfi_def_cfa_offset 8 689; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 690; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 691; X86-AVX-NEXT: vmovss %xmm0, (%esp) 692; X86-AVX-NEXT: flds (%esp) 693; X86-AVX-NEXT: popl %eax 694; X86-AVX-NEXT: .cfi_def_cfa_offset 4 695; X86-AVX-NEXT: retl 696; 697; X86-NOSSE-LABEL: load_float_seq_cst: 698; X86-NOSSE: # %bb.0: 699; X86-NOSSE-NEXT: pushl %eax 700; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 701; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 702; X86-NOSSE-NEXT: movl (%eax), %eax 703; X86-NOSSE-NEXT: movl %eax, (%esp) 704; X86-NOSSE-NEXT: flds (%esp) 705; X86-NOSSE-NEXT: popl %eax 706; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 707; X86-NOSSE-NEXT: retl 708; 709; X64-SSE-LABEL: load_float_seq_cst: 710; X64-SSE: # %bb.0: 711; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 712; X64-SSE-NEXT: retq 713; 714; X64-AVX-LABEL: load_float_seq_cst: 715; X64-AVX: # %bb.0: 716; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 717; X64-AVX-NEXT: retq 718 %v = load atomic float, ptr %fptr seq_cst, align 4 719 ret float %v 720} 721 722define double @load_double_seq_cst(ptr %fptr) { 723; X86-SSE1-LABEL: load_double_seq_cst: 724; X86-SSE1: # %bb.0: 725; X86-SSE1-NEXT: subl $12, %esp 726; X86-SSE1-NEXT: .cfi_def_cfa_offset 16 727; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 728; X86-SSE1-NEXT: xorps %xmm0, %xmm0 729; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 730; X86-SSE1-NEXT: movss %xmm0, (%esp) 731; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 732; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 733; X86-SSE1-NEXT: fldl (%esp) 734; X86-SSE1-NEXT: addl $12, %esp 735; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 736; X86-SSE1-NEXT: retl 737; 738; X86-SSE2-LABEL: load_double_seq_cst: 739; X86-SSE2: # %bb.0: 740; X86-SSE2-NEXT: subl $12, %esp 741; X86-SSE2-NEXT: .cfi_def_cfa_offset 16 742; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 743; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 744; X86-SSE2-NEXT: movlps %xmm0, (%esp) 745; X86-SSE2-NEXT: fldl (%esp) 746; X86-SSE2-NEXT: addl $12, %esp 747; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 748; X86-SSE2-NEXT: retl 749; 750; X86-AVX-LABEL: load_double_seq_cst: 751; X86-AVX: # %bb.0: 752; X86-AVX-NEXT: subl $12, %esp 753; X86-AVX-NEXT: .cfi_def_cfa_offset 16 754; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 755; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 756; X86-AVX-NEXT: vmovlps %xmm0, (%esp) 757; X86-AVX-NEXT: fldl (%esp) 758; X86-AVX-NEXT: addl $12, %esp 759; X86-AVX-NEXT: .cfi_def_cfa_offset 4 760; X86-AVX-NEXT: retl 761; 762; X86-NOSSE-LABEL: load_double_seq_cst: 763; X86-NOSSE: # %bb.0: 764; X86-NOSSE-NEXT: subl $20, %esp 765; X86-NOSSE-NEXT: .cfi_def_cfa_offset 24 766; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 767; X86-NOSSE-NEXT: fildll (%eax) 768; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 769; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 770; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 771; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 772; X86-NOSSE-NEXT: movl %eax, (%esp) 773; X86-NOSSE-NEXT: fldl (%esp) 774; X86-NOSSE-NEXT: addl $20, %esp 775; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 776; X86-NOSSE-NEXT: retl 777; 778; X64-SSE-LABEL: load_double_seq_cst: 779; X64-SSE: # %bb.0: 780; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 781; X64-SSE-NEXT: retq 782; 783; X64-AVX-LABEL: load_double_seq_cst: 784; X64-AVX: # %bb.0: 785; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 786; X64-AVX-NEXT: retq 787 %v = load atomic double, ptr %fptr seq_cst, align 8 788 ret double %v 789} 790 791define void @store_bfloat(ptr %fptr, bfloat %v) { 792; X86-SSE1-LABEL: store_bfloat: 793; X86-SSE1: # %bb.0: 794; X86-SSE1-NEXT: pushl %esi 795; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 796; X86-SSE1-NEXT: subl $8, %esp 797; X86-SSE1-NEXT: .cfi_def_cfa_offset 16 798; X86-SSE1-NEXT: .cfi_offset %esi, -8 799; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 800; X86-SSE1-NEXT: movss %xmm0, (%esp) 801; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi 802; X86-SSE1-NEXT: calll __truncsfbf2 803; X86-SSE1-NEXT: movw %ax, (%esi) 804; X86-SSE1-NEXT: addl $8, %esp 805; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 806; X86-SSE1-NEXT: popl %esi 807; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 808; X86-SSE1-NEXT: retl 809; 810; X86-SSE2-LABEL: store_bfloat: 811; X86-SSE2: # %bb.0: 812; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 813; X86-SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 814; X86-SSE2-NEXT: movw %cx, (%eax) 815; X86-SSE2-NEXT: retl 816; 817; X86-AVX-LABEL: store_bfloat: 818; X86-AVX: # %bb.0: 819; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 820; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 821; X86-AVX-NEXT: movw %cx, (%eax) 822; X86-AVX-NEXT: retl 823; 824; X86-NOSSE-LABEL: store_bfloat: 825; X86-NOSSE: # %bb.0: 826; X86-NOSSE-NEXT: pushl %esi 827; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 828; X86-NOSSE-NEXT: subl $8, %esp 829; X86-NOSSE-NEXT: .cfi_def_cfa_offset 16 830; X86-NOSSE-NEXT: .cfi_offset %esi, -8 831; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp) 832; X86-NOSSE-NEXT: fstps (%esp) 833; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi 834; X86-NOSSE-NEXT: calll __truncsfbf2 835; X86-NOSSE-NEXT: movw %ax, (%esi) 836; X86-NOSSE-NEXT: addl $8, %esp 837; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 838; X86-NOSSE-NEXT: popl %esi 839; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 840; X86-NOSSE-NEXT: retl 841; 842; X64-SSE-LABEL: store_bfloat: 843; X64-SSE: # %bb.0: 844; X64-SSE-NEXT: pextrw $0, %xmm0, %eax 845; X64-SSE-NEXT: movw %ax, (%rdi) 846; X64-SSE-NEXT: retq 847; 848; X64-AVX-LABEL: store_bfloat: 849; X64-AVX: # %bb.0: 850; X64-AVX-NEXT: vpextrw $0, %xmm0, %eax 851; X64-AVX-NEXT: movw %ax, (%rdi) 852; X64-AVX-NEXT: retq 853 store atomic bfloat %v, ptr %fptr unordered, align 2 854 ret void 855} 856 857define bfloat @load_bfloat(ptr %fptr) { 858; X86-SSE1-LABEL: load_bfloat: 859; X86-SSE1: # %bb.0: 860; X86-SSE1-NEXT: pushl %eax 861; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 862; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 863; X86-SSE1-NEXT: movzwl (%eax), %eax 864; X86-SSE1-NEXT: shll $16, %eax 865; X86-SSE1-NEXT: movl %eax, (%esp) 866; X86-SSE1-NEXT: flds (%esp) 867; X86-SSE1-NEXT: popl %eax 868; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 869; X86-SSE1-NEXT: retl 870; 871; X86-SSE2-LABEL: load_bfloat: 872; X86-SSE2: # %bb.0: 873; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 874; X86-SSE2-NEXT: movzwl (%eax), %eax 875; X86-SSE2-NEXT: pinsrw $0, %eax, %xmm0 876; X86-SSE2-NEXT: retl 877; 878; X86-AVX-LABEL: load_bfloat: 879; X86-AVX: # %bb.0: 880; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 881; X86-AVX-NEXT: movzwl (%eax), %eax 882; X86-AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 883; X86-AVX-NEXT: retl 884; 885; X86-NOSSE-LABEL: load_bfloat: 886; X86-NOSSE: # %bb.0: 887; X86-NOSSE-NEXT: pushl %eax 888; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 889; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 890; X86-NOSSE-NEXT: movzwl (%eax), %eax 891; X86-NOSSE-NEXT: shll $16, %eax 892; X86-NOSSE-NEXT: movl %eax, (%esp) 893; X86-NOSSE-NEXT: flds (%esp) 894; X86-NOSSE-NEXT: popl %eax 895; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 896; X86-NOSSE-NEXT: retl 897; 898; X64-SSE-LABEL: load_bfloat: 899; X64-SSE: # %bb.0: 900; X64-SSE-NEXT: movzwl (%rdi), %eax 901; X64-SSE-NEXT: pinsrw $0, %eax, %xmm0 902; X64-SSE-NEXT: retq 903; 904; X64-AVX-LABEL: load_bfloat: 905; X64-AVX: # %bb.0: 906; X64-AVX-NEXT: movzwl (%rdi), %eax 907; X64-AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 908; X64-AVX-NEXT: retq 909 %v = load atomic bfloat, ptr %fptr unordered, align 2 910 ret bfloat %v 911} 912