1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=X86-NOSSE 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=sse -verify-machineinstrs | FileCheck %s --check-prefix=X86-SSE1 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=sse2 -verify-machineinstrs | FileCheck %s --check-prefix=X86-SSE2 5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx -verify-machineinstrs | FileCheck %s --check-prefix=X86-AVX 6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f -verify-machineinstrs | FileCheck %s --check-prefix=X86-AVX 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=X64-SSE 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -verify-machineinstrs | FileCheck %s --check-prefix=X64-AVX 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f -verify-machineinstrs | FileCheck %s --check-prefix=X64-AVX 10 11; ----- FADD ----- 12 13define dso_local void @fadd_32r(ptr %loc, float %val) nounwind { 14; X86-NOSSE-LABEL: fadd_32r: 15; X86-NOSSE: # %bb.0: 16; X86-NOSSE-NEXT: subl $8, %esp 17; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 18; X86-NOSSE-NEXT: movl (%eax), %ecx 19; X86-NOSSE-NEXT: movl %ecx, (%esp) 20; X86-NOSSE-NEXT: flds (%esp) 21; X86-NOSSE-NEXT: fadds {{[0-9]+}}(%esp) 22; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 23; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 24; X86-NOSSE-NEXT: movl %ecx, (%eax) 25; X86-NOSSE-NEXT: addl $8, %esp 26; X86-NOSSE-NEXT: retl 27; 28; X86-SSE1-LABEL: fadd_32r: 29; X86-SSE1: # %bb.0: 30; X86-SSE1-NEXT: subl $8, %esp 31; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 32; X86-SSE1-NEXT: movl (%eax), %ecx 33; X86-SSE1-NEXT: movl %ecx, (%esp) 34; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 35; X86-SSE1-NEXT: addss {{[0-9]+}}(%esp), %xmm0 36; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 37; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx 38; X86-SSE1-NEXT: movl %ecx, (%eax) 39; X86-SSE1-NEXT: addl $8, %esp 40; X86-SSE1-NEXT: retl 41; 42; X86-SSE2-LABEL: fadd_32r: 43; X86-SSE2: # %bb.0: 44; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 45; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 46; X86-SSE2-NEXT: addss (%eax), %xmm0 47; X86-SSE2-NEXT: movss %xmm0, (%eax) 48; X86-SSE2-NEXT: retl 49; 50; X86-AVX-LABEL: fadd_32r: 51; X86-AVX: # %bb.0: 52; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 53; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 54; X86-AVX-NEXT: vaddss (%eax), %xmm0, %xmm0 55; X86-AVX-NEXT: vmovss %xmm0, (%eax) 56; X86-AVX-NEXT: retl 57; 58; X64-SSE-LABEL: fadd_32r: 59; X64-SSE: # %bb.0: 60; X64-SSE-NEXT: addss (%rdi), %xmm0 61; X64-SSE-NEXT: movss %xmm0, (%rdi) 62; X64-SSE-NEXT: retq 63; 64; X64-AVX-LABEL: fadd_32r: 65; X64-AVX: # %bb.0: 66; X64-AVX-NEXT: vaddss (%rdi), %xmm0, %xmm0 67; X64-AVX-NEXT: vmovss %xmm0, (%rdi) 68; X64-AVX-NEXT: retq 69 %1 = load atomic i32, ptr %loc seq_cst, align 4 70 %2 = bitcast i32 %1 to float 71 %add = fadd float %2, %val 72 %3 = bitcast float %add to i32 73 store atomic i32 %3, ptr %loc release, align 4 74 ret void 75} 76 77define dso_local void @fadd_64r(ptr %loc, double %val) nounwind { 78; X86-NOSSE-LABEL: fadd_64r: 79; X86-NOSSE: # %bb.0: 80; X86-NOSSE-NEXT: pushl %ebp 81; X86-NOSSE-NEXT: movl %esp, %ebp 82; X86-NOSSE-NEXT: andl $-8, %esp 83; X86-NOSSE-NEXT: subl $32, %esp 84; X86-NOSSE-NEXT: movl 8(%ebp), %eax 85; X86-NOSSE-NEXT: fildll (%eax) 86; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 87; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 88; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 89; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 90; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 91; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) 92; X86-NOSSE-NEXT: faddl 12(%ebp) 93; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 94; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 95; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 96; X86-NOSSE-NEXT: movl %ecx, (%esp) 97; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 98; X86-NOSSE-NEXT: fildll (%esp) 99; X86-NOSSE-NEXT: fistpll (%eax) 100; X86-NOSSE-NEXT: movl %ebp, %esp 101; X86-NOSSE-NEXT: popl %ebp 102; X86-NOSSE-NEXT: retl 103; 104; X86-SSE1-LABEL: fadd_64r: 105; X86-SSE1: # %bb.0: 106; X86-SSE1-NEXT: pushl %ebp 107; X86-SSE1-NEXT: movl %esp, %ebp 108; X86-SSE1-NEXT: andl $-8, %esp 109; X86-SSE1-NEXT: subl $16, %esp 110; X86-SSE1-NEXT: movl 8(%ebp), %eax 111; X86-SSE1-NEXT: xorps %xmm0, %xmm0 112; X86-SSE1-NEXT: xorps %xmm1, %xmm1 113; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 114; X86-SSE1-NEXT: movss %xmm1, (%esp) 115; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 116; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 117; X86-SSE1-NEXT: fldl (%esp) 118; X86-SSE1-NEXT: faddl 12(%ebp) 119; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 120; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 121; X86-SSE1-NEXT: movlps %xmm0, (%eax) 122; X86-SSE1-NEXT: movl %ebp, %esp 123; X86-SSE1-NEXT: popl %ebp 124; X86-SSE1-NEXT: retl 125; 126; X86-SSE2-LABEL: fadd_64r: 127; X86-SSE2: # %bb.0: 128; X86-SSE2-NEXT: pushl %ebp 129; X86-SSE2-NEXT: movl %esp, %ebp 130; X86-SSE2-NEXT: andl $-8, %esp 131; X86-SSE2-NEXT: subl $8, %esp 132; X86-SSE2-NEXT: movl 8(%ebp), %eax 133; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 134; X86-SSE2-NEXT: addsd 12(%ebp), %xmm0 135; X86-SSE2-NEXT: movsd %xmm0, (%esp) 136; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 137; X86-SSE2-NEXT: movlps %xmm0, (%eax) 138; X86-SSE2-NEXT: movl %ebp, %esp 139; X86-SSE2-NEXT: popl %ebp 140; X86-SSE2-NEXT: retl 141; 142; X86-AVX-LABEL: fadd_64r: 143; X86-AVX: # %bb.0: 144; X86-AVX-NEXT: pushl %ebp 145; X86-AVX-NEXT: movl %esp, %ebp 146; X86-AVX-NEXT: andl $-8, %esp 147; X86-AVX-NEXT: subl $8, %esp 148; X86-AVX-NEXT: movl 8(%ebp), %eax 149; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 150; X86-AVX-NEXT: vaddsd 12(%ebp), %xmm0, %xmm0 151; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 152; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 153; X86-AVX-NEXT: vmovlps %xmm0, (%eax) 154; X86-AVX-NEXT: movl %ebp, %esp 155; X86-AVX-NEXT: popl %ebp 156; X86-AVX-NEXT: retl 157; 158; X64-SSE-LABEL: fadd_64r: 159; X64-SSE: # %bb.0: 160; X64-SSE-NEXT: addsd (%rdi), %xmm0 161; X64-SSE-NEXT: movsd %xmm0, (%rdi) 162; X64-SSE-NEXT: retq 163; 164; X64-AVX-LABEL: fadd_64r: 165; X64-AVX: # %bb.0: 166; X64-AVX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 167; X64-AVX-NEXT: vmovsd %xmm0, (%rdi) 168; X64-AVX-NEXT: retq 169 %1 = load atomic i64, ptr %loc seq_cst, align 8 170 %2 = bitcast i64 %1 to double 171 %add = fadd double %2, %val 172 %3 = bitcast double %add to i64 173 store atomic i64 %3, ptr %loc release, align 8 174 ret void 175} 176 177@glob32 = dso_local global float 0.000000e+00, align 4 178@glob64 = dso_local global double 0.000000e+00, align 8 179 180; Floating-point add to a global using an immediate. 181define dso_local void @fadd_32g() nounwind { 182; X86-NOSSE-LABEL: fadd_32g: 183; X86-NOSSE: # %bb.0: 184; X86-NOSSE-NEXT: subl $8, %esp 185; X86-NOSSE-NEXT: movl glob32, %eax 186; X86-NOSSE-NEXT: movl %eax, (%esp) 187; X86-NOSSE-NEXT: fld1 188; X86-NOSSE-NEXT: fadds (%esp) 189; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 190; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 191; X86-NOSSE-NEXT: movl %eax, glob32 192; X86-NOSSE-NEXT: addl $8, %esp 193; X86-NOSSE-NEXT: retl 194; 195; X86-SSE1-LABEL: fadd_32g: 196; X86-SSE1: # %bb.0: 197; X86-SSE1-NEXT: subl $8, %esp 198; X86-SSE1-NEXT: movl glob32, %eax 199; X86-SSE1-NEXT: movl %eax, (%esp) 200; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 201; X86-SSE1-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 202; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 203; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 204; X86-SSE1-NEXT: movl %eax, glob32 205; X86-SSE1-NEXT: addl $8, %esp 206; X86-SSE1-NEXT: retl 207; 208; X86-SSE2-LABEL: fadd_32g: 209; X86-SSE2: # %bb.0: 210; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 211; X86-SSE2-NEXT: addss glob32, %xmm0 212; X86-SSE2-NEXT: movss %xmm0, glob32 213; X86-SSE2-NEXT: retl 214; 215; X86-AVX-LABEL: fadd_32g: 216; X86-AVX: # %bb.0: 217; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 218; X86-AVX-NEXT: vaddss glob32, %xmm0, %xmm0 219; X86-AVX-NEXT: vmovss %xmm0, glob32 220; X86-AVX-NEXT: retl 221; 222; X64-SSE-LABEL: fadd_32g: 223; X64-SSE: # %bb.0: 224; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 225; X64-SSE-NEXT: addss glob32(%rip), %xmm0 226; X64-SSE-NEXT: movss %xmm0, glob32(%rip) 227; X64-SSE-NEXT: retq 228; 229; X64-AVX-LABEL: fadd_32g: 230; X64-AVX: # %bb.0: 231; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 232; X64-AVX-NEXT: vaddss glob32(%rip), %xmm0, %xmm0 233; X64-AVX-NEXT: vmovss %xmm0, glob32(%rip) 234; X64-AVX-NEXT: retq 235 %i = load atomic i32, ptr @glob32 monotonic, align 4 236 %f = bitcast i32 %i to float 237 %add = fadd float %f, 1.000000e+00 238 %s = bitcast float %add to i32 239 store atomic i32 %s, ptr @glob32 monotonic, align 4 240 ret void 241} 242 243define dso_local void @fadd_64g() nounwind { 244; X86-NOSSE-LABEL: fadd_64g: 245; X86-NOSSE: # %bb.0: 246; X86-NOSSE-NEXT: pushl %ebp 247; X86-NOSSE-NEXT: movl %esp, %ebp 248; X86-NOSSE-NEXT: andl $-8, %esp 249; X86-NOSSE-NEXT: subl $32, %esp 250; X86-NOSSE-NEXT: fildll glob64 251; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 252; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 253; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 254; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 255; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 256; X86-NOSSE-NEXT: fld1 257; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) 258; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 259; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 260; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 261; X86-NOSSE-NEXT: movl %eax, (%esp) 262; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 263; X86-NOSSE-NEXT: fildll (%esp) 264; X86-NOSSE-NEXT: fistpll glob64 265; X86-NOSSE-NEXT: movl %ebp, %esp 266; X86-NOSSE-NEXT: popl %ebp 267; X86-NOSSE-NEXT: retl 268; 269; X86-SSE1-LABEL: fadd_64g: 270; X86-SSE1: # %bb.0: 271; X86-SSE1-NEXT: pushl %ebp 272; X86-SSE1-NEXT: movl %esp, %ebp 273; X86-SSE1-NEXT: andl $-8, %esp 274; X86-SSE1-NEXT: subl $16, %esp 275; X86-SSE1-NEXT: xorps %xmm0, %xmm0 276; X86-SSE1-NEXT: xorps %xmm1, %xmm1 277; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 278; X86-SSE1-NEXT: movss %xmm1, (%esp) 279; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 280; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 281; X86-SSE1-NEXT: fld1 282; X86-SSE1-NEXT: faddl (%esp) 283; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 284; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 285; X86-SSE1-NEXT: movlps %xmm0, glob64 286; X86-SSE1-NEXT: movl %ebp, %esp 287; X86-SSE1-NEXT: popl %ebp 288; X86-SSE1-NEXT: retl 289; 290; X86-SSE2-LABEL: fadd_64g: 291; X86-SSE2: # %bb.0: 292; X86-SSE2-NEXT: pushl %ebp 293; X86-SSE2-NEXT: movl %esp, %ebp 294; X86-SSE2-NEXT: andl $-8, %esp 295; X86-SSE2-NEXT: subl $8, %esp 296; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 297; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 298; X86-SSE2-NEXT: movsd %xmm0, (%esp) 299; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 300; X86-SSE2-NEXT: movlps %xmm0, glob64 301; X86-SSE2-NEXT: movl %ebp, %esp 302; X86-SSE2-NEXT: popl %ebp 303; X86-SSE2-NEXT: retl 304; 305; X86-AVX-LABEL: fadd_64g: 306; X86-AVX: # %bb.0: 307; X86-AVX-NEXT: pushl %ebp 308; X86-AVX-NEXT: movl %esp, %ebp 309; X86-AVX-NEXT: andl $-8, %esp 310; X86-AVX-NEXT: subl $8, %esp 311; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 312; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 313; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 314; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 315; X86-AVX-NEXT: vmovlps %xmm0, glob64 316; X86-AVX-NEXT: movl %ebp, %esp 317; X86-AVX-NEXT: popl %ebp 318; X86-AVX-NEXT: retl 319; 320; X64-SSE-LABEL: fadd_64g: 321; X64-SSE: # %bb.0: 322; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] 323; X64-SSE-NEXT: addsd glob64(%rip), %xmm0 324; X64-SSE-NEXT: movsd %xmm0, glob64(%rip) 325; X64-SSE-NEXT: retq 326; 327; X64-AVX-LABEL: fadd_64g: 328; X64-AVX: # %bb.0: 329; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] 330; X64-AVX-NEXT: vaddsd glob64(%rip), %xmm0, %xmm0 331; X64-AVX-NEXT: vmovsd %xmm0, glob64(%rip) 332; X64-AVX-NEXT: retq 333 %i = load atomic i64, ptr @glob64 monotonic, align 8 334 %f = bitcast i64 %i to double 335 %add = fadd double %f, 1.000000e+00 336 %s = bitcast double %add to i64 337 store atomic i64 %s, ptr @glob64 monotonic, align 8 338 ret void 339} 340 341; Floating-point add to a hard-coded immediate location using an immediate. 342define dso_local void @fadd_32imm() nounwind { 343; X86-NOSSE-LABEL: fadd_32imm: 344; X86-NOSSE: # %bb.0: 345; X86-NOSSE-NEXT: subl $8, %esp 346; X86-NOSSE-NEXT: movl -559038737, %eax 347; X86-NOSSE-NEXT: movl %eax, (%esp) 348; X86-NOSSE-NEXT: fld1 349; X86-NOSSE-NEXT: fadds (%esp) 350; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 351; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 352; X86-NOSSE-NEXT: movl %eax, -559038737 353; X86-NOSSE-NEXT: addl $8, %esp 354; X86-NOSSE-NEXT: retl 355; 356; X86-SSE1-LABEL: fadd_32imm: 357; X86-SSE1: # %bb.0: 358; X86-SSE1-NEXT: subl $8, %esp 359; X86-SSE1-NEXT: movl -559038737, %eax 360; X86-SSE1-NEXT: movl %eax, (%esp) 361; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 362; X86-SSE1-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 363; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 364; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 365; X86-SSE1-NEXT: movl %eax, -559038737 366; X86-SSE1-NEXT: addl $8, %esp 367; X86-SSE1-NEXT: retl 368; 369; X86-SSE2-LABEL: fadd_32imm: 370; X86-SSE2: # %bb.0: 371; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 372; X86-SSE2-NEXT: addss -559038737, %xmm0 373; X86-SSE2-NEXT: movss %xmm0, -559038737 374; X86-SSE2-NEXT: retl 375; 376; X86-AVX-LABEL: fadd_32imm: 377; X86-AVX: # %bb.0: 378; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 379; X86-AVX-NEXT: vaddss -559038737, %xmm0, %xmm0 380; X86-AVX-NEXT: vmovss %xmm0, -559038737 381; X86-AVX-NEXT: retl 382; 383; X64-SSE-LABEL: fadd_32imm: 384; X64-SSE: # %bb.0: 385; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 386; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 387; X64-SSE-NEXT: addss (%rax), %xmm0 388; X64-SSE-NEXT: movss %xmm0, (%rax) 389; X64-SSE-NEXT: retq 390; 391; X64-AVX-LABEL: fadd_32imm: 392; X64-AVX: # %bb.0: 393; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 394; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 395; X64-AVX-NEXT: vaddss (%rax), %xmm0, %xmm0 396; X64-AVX-NEXT: vmovss %xmm0, (%rax) 397; X64-AVX-NEXT: retq 398 %i = load atomic i32, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4 399 %f = bitcast i32 %i to float 400 %add = fadd float %f, 1.000000e+00 401 %s = bitcast float %add to i32 402 store atomic i32 %s, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4 403 ret void 404} 405 406define dso_local void @fadd_64imm() nounwind { 407; X86-NOSSE-LABEL: fadd_64imm: 408; X86-NOSSE: # %bb.0: 409; X86-NOSSE-NEXT: pushl %ebp 410; X86-NOSSE-NEXT: movl %esp, %ebp 411; X86-NOSSE-NEXT: andl $-8, %esp 412; X86-NOSSE-NEXT: subl $32, %esp 413; X86-NOSSE-NEXT: fildll -559038737 414; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 415; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 416; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 417; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 418; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 419; X86-NOSSE-NEXT: fld1 420; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) 421; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 422; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 423; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 424; X86-NOSSE-NEXT: movl %eax, (%esp) 425; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 426; X86-NOSSE-NEXT: fildll (%esp) 427; X86-NOSSE-NEXT: fistpll -559038737 428; X86-NOSSE-NEXT: movl %ebp, %esp 429; X86-NOSSE-NEXT: popl %ebp 430; X86-NOSSE-NEXT: retl 431; 432; X86-SSE1-LABEL: fadd_64imm: 433; X86-SSE1: # %bb.0: 434; X86-SSE1-NEXT: pushl %ebp 435; X86-SSE1-NEXT: movl %esp, %ebp 436; X86-SSE1-NEXT: andl $-8, %esp 437; X86-SSE1-NEXT: subl $16, %esp 438; X86-SSE1-NEXT: xorps %xmm0, %xmm0 439; X86-SSE1-NEXT: xorps %xmm1, %xmm1 440; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 441; X86-SSE1-NEXT: movss %xmm1, (%esp) 442; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 443; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 444; X86-SSE1-NEXT: fld1 445; X86-SSE1-NEXT: faddl (%esp) 446; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 447; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 448; X86-SSE1-NEXT: movlps %xmm0, -559038737 449; X86-SSE1-NEXT: movl %ebp, %esp 450; X86-SSE1-NEXT: popl %ebp 451; X86-SSE1-NEXT: retl 452; 453; X86-SSE2-LABEL: fadd_64imm: 454; X86-SSE2: # %bb.0: 455; X86-SSE2-NEXT: pushl %ebp 456; X86-SSE2-NEXT: movl %esp, %ebp 457; X86-SSE2-NEXT: andl $-8, %esp 458; X86-SSE2-NEXT: subl $8, %esp 459; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 460; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 461; X86-SSE2-NEXT: movsd %xmm0, (%esp) 462; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 463; X86-SSE2-NEXT: movlps %xmm0, -559038737 464; X86-SSE2-NEXT: movl %ebp, %esp 465; X86-SSE2-NEXT: popl %ebp 466; X86-SSE2-NEXT: retl 467; 468; X86-AVX-LABEL: fadd_64imm: 469; X86-AVX: # %bb.0: 470; X86-AVX-NEXT: pushl %ebp 471; X86-AVX-NEXT: movl %esp, %ebp 472; X86-AVX-NEXT: andl $-8, %esp 473; X86-AVX-NEXT: subl $8, %esp 474; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 475; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 476; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 477; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 478; X86-AVX-NEXT: vmovlps %xmm0, -559038737 479; X86-AVX-NEXT: movl %ebp, %esp 480; X86-AVX-NEXT: popl %ebp 481; X86-AVX-NEXT: retl 482; 483; X64-SSE-LABEL: fadd_64imm: 484; X64-SSE: # %bb.0: 485; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 486; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] 487; X64-SSE-NEXT: addsd (%rax), %xmm0 488; X64-SSE-NEXT: movsd %xmm0, (%rax) 489; X64-SSE-NEXT: retq 490; 491; X64-AVX-LABEL: fadd_64imm: 492; X64-AVX: # %bb.0: 493; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 494; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] 495; X64-AVX-NEXT: vaddsd (%rax), %xmm0, %xmm0 496; X64-AVX-NEXT: vmovsd %xmm0, (%rax) 497; X64-AVX-NEXT: retq 498 %i = load atomic i64, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8 499 %f = bitcast i64 %i to double 500 %add = fadd double %f, 1.000000e+00 501 %s = bitcast double %add to i64 502 store atomic i64 %s, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8 503 ret void 504} 505 506; Floating-point add to a stack location. 507define dso_local void @fadd_32stack() nounwind { 508; X86-NOSSE-LABEL: fadd_32stack: 509; X86-NOSSE: # %bb.0: 510; X86-NOSSE-NEXT: subl $12, %esp 511; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 512; X86-NOSSE-NEXT: movl %eax, (%esp) 513; X86-NOSSE-NEXT: fld1 514; X86-NOSSE-NEXT: fadds (%esp) 515; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 516; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 517; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 518; X86-NOSSE-NEXT: addl $12, %esp 519; X86-NOSSE-NEXT: retl 520; 521; X86-SSE1-LABEL: fadd_32stack: 522; X86-SSE1: # %bb.0: 523; X86-SSE1-NEXT: subl $12, %esp 524; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 525; X86-SSE1-NEXT: movl %eax, (%esp) 526; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 527; X86-SSE1-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 528; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 529; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 530; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp) 531; X86-SSE1-NEXT: addl $12, %esp 532; X86-SSE1-NEXT: retl 533; 534; X86-SSE2-LABEL: fadd_32stack: 535; X86-SSE2: # %bb.0: 536; X86-SSE2-NEXT: pushl %eax 537; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 538; X86-SSE2-NEXT: addss (%esp), %xmm0 539; X86-SSE2-NEXT: movss %xmm0, (%esp) 540; X86-SSE2-NEXT: popl %eax 541; X86-SSE2-NEXT: retl 542; 543; X86-AVX-LABEL: fadd_32stack: 544; X86-AVX: # %bb.0: 545; X86-AVX-NEXT: pushl %eax 546; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 547; X86-AVX-NEXT: vaddss (%esp), %xmm0, %xmm0 548; X86-AVX-NEXT: vmovss %xmm0, (%esp) 549; X86-AVX-NEXT: popl %eax 550; X86-AVX-NEXT: retl 551; 552; X64-SSE-LABEL: fadd_32stack: 553; X64-SSE: # %bb.0: 554; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 555; X64-SSE-NEXT: addss -{{[0-9]+}}(%rsp), %xmm0 556; X64-SSE-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) 557; X64-SSE-NEXT: retq 558; 559; X64-AVX-LABEL: fadd_32stack: 560; X64-AVX: # %bb.0: 561; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 562; X64-AVX-NEXT: vaddss -{{[0-9]+}}(%rsp), %xmm0, %xmm0 563; X64-AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) 564; X64-AVX-NEXT: retq 565 %ptr = alloca i32, align 4 566 %load = load atomic i32, ptr %ptr acquire, align 4 567 %bc0 = bitcast i32 %load to float 568 %fadd = fadd float 1.000000e+00, %bc0 569 %bc1 = bitcast float %fadd to i32 570 store atomic i32 %bc1, ptr %ptr release, align 4 571 ret void 572} 573 574define dso_local void @fadd_64stack() nounwind { 575; X86-NOSSE-LABEL: fadd_64stack: 576; X86-NOSSE: # %bb.0: 577; X86-NOSSE-NEXT: pushl %ebp 578; X86-NOSSE-NEXT: movl %esp, %ebp 579; X86-NOSSE-NEXT: andl $-8, %esp 580; X86-NOSSE-NEXT: subl $40, %esp 581; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp) 582; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 583; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 584; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 585; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 586; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 587; X86-NOSSE-NEXT: fld1 588; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) 589; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 590; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 591; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 592; X86-NOSSE-NEXT: movl %eax, (%esp) 593; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 594; X86-NOSSE-NEXT: fildll (%esp) 595; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 596; X86-NOSSE-NEXT: movl %ebp, %esp 597; X86-NOSSE-NEXT: popl %ebp 598; X86-NOSSE-NEXT: retl 599; 600; X86-SSE1-LABEL: fadd_64stack: 601; X86-SSE1: # %bb.0: 602; X86-SSE1-NEXT: pushl %ebp 603; X86-SSE1-NEXT: movl %esp, %ebp 604; X86-SSE1-NEXT: andl $-8, %esp 605; X86-SSE1-NEXT: subl $24, %esp 606; X86-SSE1-NEXT: xorps %xmm0, %xmm0 607; X86-SSE1-NEXT: xorps %xmm1, %xmm1 608; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 609; X86-SSE1-NEXT: movss %xmm1, (%esp) 610; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 611; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 612; X86-SSE1-NEXT: fld1 613; X86-SSE1-NEXT: faddl (%esp) 614; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 615; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 616; X86-SSE1-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 617; X86-SSE1-NEXT: movl %ebp, %esp 618; X86-SSE1-NEXT: popl %ebp 619; X86-SSE1-NEXT: retl 620; 621; X86-SSE2-LABEL: fadd_64stack: 622; X86-SSE2: # %bb.0: 623; X86-SSE2-NEXT: pushl %ebp 624; X86-SSE2-NEXT: movl %esp, %ebp 625; X86-SSE2-NEXT: andl $-8, %esp 626; X86-SSE2-NEXT: subl $16, %esp 627; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 628; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 629; X86-SSE2-NEXT: movsd %xmm0, (%esp) 630; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 631; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 632; X86-SSE2-NEXT: movl %ebp, %esp 633; X86-SSE2-NEXT: popl %ebp 634; X86-SSE2-NEXT: retl 635; 636; X86-AVX-LABEL: fadd_64stack: 637; X86-AVX: # %bb.0: 638; X86-AVX-NEXT: pushl %ebp 639; X86-AVX-NEXT: movl %esp, %ebp 640; X86-AVX-NEXT: andl $-8, %esp 641; X86-AVX-NEXT: subl $16, %esp 642; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 643; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 644; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 645; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 646; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 647; X86-AVX-NEXT: movl %ebp, %esp 648; X86-AVX-NEXT: popl %ebp 649; X86-AVX-NEXT: retl 650; 651; X64-SSE-LABEL: fadd_64stack: 652; X64-SSE: # %bb.0: 653; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] 654; X64-SSE-NEXT: addsd -{{[0-9]+}}(%rsp), %xmm0 655; X64-SSE-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) 656; X64-SSE-NEXT: retq 657; 658; X64-AVX-LABEL: fadd_64stack: 659; X64-AVX: # %bb.0: 660; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] 661; X64-AVX-NEXT: vaddsd -{{[0-9]+}}(%rsp), %xmm0, %xmm0 662; X64-AVX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) 663; X64-AVX-NEXT: retq 664 %ptr = alloca i64, align 8 665 %load = load atomic i64, ptr %ptr acquire, align 8 666 %bc0 = bitcast i64 %load to double 667 %fadd = fadd double 1.000000e+00, %bc0 668 %bc1 = bitcast double %fadd to i64 669 store atomic i64 %bc1, ptr %ptr release, align 8 670 ret void 671} 672 673define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind { 674; X86-NOSSE-LABEL: fadd_array: 675; X86-NOSSE: # %bb.0: # %bb 676; X86-NOSSE-NEXT: pushl %ebp 677; X86-NOSSE-NEXT: movl %esp, %ebp 678; X86-NOSSE-NEXT: pushl %esi 679; X86-NOSSE-NEXT: andl $-8, %esp 680; X86-NOSSE-NEXT: subl $40, %esp 681; X86-NOSSE-NEXT: movl 20(%ebp), %eax 682; X86-NOSSE-NEXT: movl 8(%ebp), %ecx 683; X86-NOSSE-NEXT: fildll (%ecx,%eax,8) 684; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 685; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 686; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi 687; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) 688; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 689; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) 690; X86-NOSSE-NEXT: faddl 12(%ebp) 691; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 692; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 693; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi 694; X86-NOSSE-NEXT: movl %edx, (%esp) 695; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) 696; X86-NOSSE-NEXT: fildll (%esp) 697; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8) 698; X86-NOSSE-NEXT: leal -4(%ebp), %esp 699; X86-NOSSE-NEXT: popl %esi 700; X86-NOSSE-NEXT: popl %ebp 701; X86-NOSSE-NEXT: retl 702; 703; X86-SSE1-LABEL: fadd_array: 704; X86-SSE1: # %bb.0: # %bb 705; X86-SSE1-NEXT: pushl %ebp 706; X86-SSE1-NEXT: movl %esp, %ebp 707; X86-SSE1-NEXT: andl $-8, %esp 708; X86-SSE1-NEXT: subl $16, %esp 709; X86-SSE1-NEXT: movl 20(%ebp), %eax 710; X86-SSE1-NEXT: movl 8(%ebp), %ecx 711; X86-SSE1-NEXT: xorps %xmm0, %xmm0 712; X86-SSE1-NEXT: xorps %xmm1, %xmm1 713; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 714; X86-SSE1-NEXT: movss %xmm1, (%esp) 715; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 716; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 717; X86-SSE1-NEXT: fldl (%esp) 718; X86-SSE1-NEXT: faddl 12(%ebp) 719; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 720; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 721; X86-SSE1-NEXT: movlps %xmm0, (%ecx,%eax,8) 722; X86-SSE1-NEXT: movl %ebp, %esp 723; X86-SSE1-NEXT: popl %ebp 724; X86-SSE1-NEXT: retl 725; 726; X86-SSE2-LABEL: fadd_array: 727; X86-SSE2: # %bb.0: # %bb 728; X86-SSE2-NEXT: pushl %ebp 729; X86-SSE2-NEXT: movl %esp, %ebp 730; X86-SSE2-NEXT: andl $-8, %esp 731; X86-SSE2-NEXT: subl $8, %esp 732; X86-SSE2-NEXT: movl 20(%ebp), %eax 733; X86-SSE2-NEXT: movl 8(%ebp), %ecx 734; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 735; X86-SSE2-NEXT: addsd 12(%ebp), %xmm0 736; X86-SSE2-NEXT: movsd %xmm0, (%esp) 737; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 738; X86-SSE2-NEXT: movlps %xmm0, (%ecx,%eax,8) 739; X86-SSE2-NEXT: movl %ebp, %esp 740; X86-SSE2-NEXT: popl %ebp 741; X86-SSE2-NEXT: retl 742; 743; X86-AVX-LABEL: fadd_array: 744; X86-AVX: # %bb.0: # %bb 745; X86-AVX-NEXT: pushl %ebp 746; X86-AVX-NEXT: movl %esp, %ebp 747; X86-AVX-NEXT: andl $-8, %esp 748; X86-AVX-NEXT: subl $8, %esp 749; X86-AVX-NEXT: movl 20(%ebp), %eax 750; X86-AVX-NEXT: movl 8(%ebp), %ecx 751; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 752; X86-AVX-NEXT: vaddsd 12(%ebp), %xmm0, %xmm0 753; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 754; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 755; X86-AVX-NEXT: vmovlps %xmm0, (%ecx,%eax,8) 756; X86-AVX-NEXT: movl %ebp, %esp 757; X86-AVX-NEXT: popl %ebp 758; X86-AVX-NEXT: retl 759; 760; X64-SSE-LABEL: fadd_array: 761; X64-SSE: # %bb.0: # %bb 762; X64-SSE-NEXT: addsd (%rdi,%rsi,8), %xmm0 763; X64-SSE-NEXT: movsd %xmm0, (%rdi,%rsi,8) 764; X64-SSE-NEXT: retq 765; 766; X64-AVX-LABEL: fadd_array: 767; X64-AVX: # %bb.0: # %bb 768; X64-AVX-NEXT: vaddsd (%rdi,%rsi,8), %xmm0, %xmm0 769; X64-AVX-NEXT: vmovsd %xmm0, (%rdi,%rsi,8) 770; X64-AVX-NEXT: retq 771bb: 772 %tmp4 = getelementptr inbounds i64, ptr %arg, i64 %arg2 773 %tmp6 = load atomic i64, ptr %tmp4 monotonic, align 8 774 %tmp7 = bitcast i64 %tmp6 to double 775 %tmp8 = fadd double %tmp7, %arg1 776 %tmp9 = bitcast double %tmp8 to i64 777 store atomic i64 %tmp9, ptr %tmp4 monotonic, align 8 778 ret void 779} 780 781; ----- FSUB ----- 782 783define dso_local void @fsub_32r(ptr %loc, float %val) nounwind { 784; X86-NOSSE-LABEL: fsub_32r: 785; X86-NOSSE: # %bb.0: 786; X86-NOSSE-NEXT: subl $8, %esp 787; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 788; X86-NOSSE-NEXT: movl (%eax), %ecx 789; X86-NOSSE-NEXT: movl %ecx, (%esp) 790; X86-NOSSE-NEXT: flds (%esp) 791; X86-NOSSE-NEXT: fsubs {{[0-9]+}}(%esp) 792; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 793; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 794; X86-NOSSE-NEXT: movl %ecx, (%eax) 795; X86-NOSSE-NEXT: addl $8, %esp 796; X86-NOSSE-NEXT: retl 797; 798; X86-SSE1-LABEL: fsub_32r: 799; X86-SSE1: # %bb.0: 800; X86-SSE1-NEXT: subl $8, %esp 801; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 802; X86-SSE1-NEXT: movl (%eax), %ecx 803; X86-SSE1-NEXT: movl %ecx, (%esp) 804; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 805; X86-SSE1-NEXT: subss {{[0-9]+}}(%esp), %xmm0 806; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 807; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx 808; X86-SSE1-NEXT: movl %ecx, (%eax) 809; X86-SSE1-NEXT: addl $8, %esp 810; X86-SSE1-NEXT: retl 811; 812; X86-SSE2-LABEL: fsub_32r: 813; X86-SSE2: # %bb.0: 814; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 815; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 816; X86-SSE2-NEXT: subss {{[0-9]+}}(%esp), %xmm0 817; X86-SSE2-NEXT: movss %xmm0, (%eax) 818; X86-SSE2-NEXT: retl 819; 820; X86-AVX-LABEL: fsub_32r: 821; X86-AVX: # %bb.0: 822; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 823; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 824; X86-AVX-NEXT: vsubss {{[0-9]+}}(%esp), %xmm0, %xmm0 825; X86-AVX-NEXT: vmovss %xmm0, (%eax) 826; X86-AVX-NEXT: retl 827; 828; X64-SSE-LABEL: fsub_32r: 829; X64-SSE: # %bb.0: 830; X64-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 831; X64-SSE-NEXT: subss %xmm0, %xmm1 832; X64-SSE-NEXT: movss %xmm1, (%rdi) 833; X64-SSE-NEXT: retq 834; 835; X64-AVX-LABEL: fsub_32r: 836; X64-AVX: # %bb.0: 837; X64-AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 838; X64-AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 839; X64-AVX-NEXT: vmovss %xmm0, (%rdi) 840; X64-AVX-NEXT: retq 841 %1 = load atomic i32, ptr %loc seq_cst, align 4 842 %2 = bitcast i32 %1 to float 843 %sub = fsub float %2, %val 844 %3 = bitcast float %sub to i32 845 store atomic i32 %3, ptr %loc release, align 4 846 ret void 847} 848 849define dso_local void @fsub_64r(ptr %loc, double %val) nounwind { 850; X86-NOSSE-LABEL: fsub_64r: 851; X86-NOSSE: # %bb.0: 852; X86-NOSSE-NEXT: pushl %ebp 853; X86-NOSSE-NEXT: movl %esp, %ebp 854; X86-NOSSE-NEXT: andl $-8, %esp 855; X86-NOSSE-NEXT: subl $32, %esp 856; X86-NOSSE-NEXT: movl 8(%ebp), %eax 857; X86-NOSSE-NEXT: fildll (%eax) 858; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 859; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 860; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 861; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 862; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 863; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) 864; X86-NOSSE-NEXT: fsubl 12(%ebp) 865; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 866; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 867; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 868; X86-NOSSE-NEXT: movl %ecx, (%esp) 869; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 870; X86-NOSSE-NEXT: fildll (%esp) 871; X86-NOSSE-NEXT: fistpll (%eax) 872; X86-NOSSE-NEXT: movl %ebp, %esp 873; X86-NOSSE-NEXT: popl %ebp 874; X86-NOSSE-NEXT: retl 875; 876; X86-SSE1-LABEL: fsub_64r: 877; X86-SSE1: # %bb.0: 878; X86-SSE1-NEXT: pushl %ebp 879; X86-SSE1-NEXT: movl %esp, %ebp 880; X86-SSE1-NEXT: andl $-8, %esp 881; X86-SSE1-NEXT: subl $16, %esp 882; X86-SSE1-NEXT: movl 8(%ebp), %eax 883; X86-SSE1-NEXT: xorps %xmm0, %xmm0 884; X86-SSE1-NEXT: xorps %xmm1, %xmm1 885; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 886; X86-SSE1-NEXT: movss %xmm1, (%esp) 887; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 888; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 889; X86-SSE1-NEXT: fldl (%esp) 890; X86-SSE1-NEXT: fsubl 12(%ebp) 891; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 892; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 893; X86-SSE1-NEXT: movlps %xmm0, (%eax) 894; X86-SSE1-NEXT: movl %ebp, %esp 895; X86-SSE1-NEXT: popl %ebp 896; X86-SSE1-NEXT: retl 897; 898; X86-SSE2-LABEL: fsub_64r: 899; X86-SSE2: # %bb.0: 900; X86-SSE2-NEXT: pushl %ebp 901; X86-SSE2-NEXT: movl %esp, %ebp 902; X86-SSE2-NEXT: andl $-8, %esp 903; X86-SSE2-NEXT: subl $8, %esp 904; X86-SSE2-NEXT: movl 8(%ebp), %eax 905; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 906; X86-SSE2-NEXT: subsd 12(%ebp), %xmm0 907; X86-SSE2-NEXT: movsd %xmm0, (%esp) 908; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 909; X86-SSE2-NEXT: movlps %xmm0, (%eax) 910; X86-SSE2-NEXT: movl %ebp, %esp 911; X86-SSE2-NEXT: popl %ebp 912; X86-SSE2-NEXT: retl 913; 914; X86-AVX-LABEL: fsub_64r: 915; X86-AVX: # %bb.0: 916; X86-AVX-NEXT: pushl %ebp 917; X86-AVX-NEXT: movl %esp, %ebp 918; X86-AVX-NEXT: andl $-8, %esp 919; X86-AVX-NEXT: subl $8, %esp 920; X86-AVX-NEXT: movl 8(%ebp), %eax 921; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 922; X86-AVX-NEXT: vsubsd 12(%ebp), %xmm0, %xmm0 923; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 924; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 925; X86-AVX-NEXT: vmovlps %xmm0, (%eax) 926; X86-AVX-NEXT: movl %ebp, %esp 927; X86-AVX-NEXT: popl %ebp 928; X86-AVX-NEXT: retl 929; 930; X64-SSE-LABEL: fsub_64r: 931; X64-SSE: # %bb.0: 932; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 933; X64-SSE-NEXT: subsd %xmm0, %xmm1 934; X64-SSE-NEXT: movsd %xmm1, (%rdi) 935; X64-SSE-NEXT: retq 936; 937; X64-AVX-LABEL: fsub_64r: 938; X64-AVX: # %bb.0: 939; X64-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 940; X64-AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 941; X64-AVX-NEXT: vmovsd %xmm0, (%rdi) 942; X64-AVX-NEXT: retq 943 %1 = load atomic i64, ptr %loc seq_cst, align 8 944 %2 = bitcast i64 %1 to double 945 %sub = fsub double %2, %val 946 %3 = bitcast double %sub to i64 947 store atomic i64 %3, ptr %loc release, align 8 948 ret void 949} 950 951; Floating-point sub to a global using an immediate. 952define dso_local void @fsub_32g() nounwind { 953; X86-NOSSE-LABEL: fsub_32g: 954; X86-NOSSE: # %bb.0: 955; X86-NOSSE-NEXT: subl $8, %esp 956; X86-NOSSE-NEXT: movl glob32, %eax 957; X86-NOSSE-NEXT: movl %eax, (%esp) 958; X86-NOSSE-NEXT: fld1 959; X86-NOSSE-NEXT: fchs 960; X86-NOSSE-NEXT: fadds (%esp) 961; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 962; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 963; X86-NOSSE-NEXT: movl %eax, glob32 964; X86-NOSSE-NEXT: addl $8, %esp 965; X86-NOSSE-NEXT: retl 966; 967; X86-SSE1-LABEL: fsub_32g: 968; X86-SSE1: # %bb.0: 969; X86-SSE1-NEXT: subl $8, %esp 970; X86-SSE1-NEXT: movl glob32, %eax 971; X86-SSE1-NEXT: movl %eax, (%esp) 972; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 973; X86-SSE1-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 974; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 975; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 976; X86-SSE1-NEXT: movl %eax, glob32 977; X86-SSE1-NEXT: addl $8, %esp 978; X86-SSE1-NEXT: retl 979; 980; X86-SSE2-LABEL: fsub_32g: 981; X86-SSE2: # %bb.0: 982; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0] 983; X86-SSE2-NEXT: addss glob32, %xmm0 984; X86-SSE2-NEXT: movss %xmm0, glob32 985; X86-SSE2-NEXT: retl 986; 987; X86-AVX-LABEL: fsub_32g: 988; X86-AVX: # %bb.0: 989; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0] 990; X86-AVX-NEXT: vaddss glob32, %xmm0, %xmm0 991; X86-AVX-NEXT: vmovss %xmm0, glob32 992; X86-AVX-NEXT: retl 993; 994; X64-SSE-LABEL: fsub_32g: 995; X64-SSE: # %bb.0: 996; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0] 997; X64-SSE-NEXT: addss glob32(%rip), %xmm0 998; X64-SSE-NEXT: movss %xmm0, glob32(%rip) 999; X64-SSE-NEXT: retq 1000; 1001; X64-AVX-LABEL: fsub_32g: 1002; X64-AVX: # %bb.0: 1003; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0] 1004; X64-AVX-NEXT: vaddss glob32(%rip), %xmm0, %xmm0 1005; X64-AVX-NEXT: vmovss %xmm0, glob32(%rip) 1006; X64-AVX-NEXT: retq 1007 %i = load atomic i32, ptr @glob32 monotonic, align 4 1008 %f = bitcast i32 %i to float 1009 %sub = fsub float %f, 1.000000e+00 1010 %s = bitcast float %sub to i32 1011 store atomic i32 %s, ptr @glob32 monotonic, align 4 1012 ret void 1013} 1014 1015define dso_local void @fsub_64g() nounwind { 1016; X86-NOSSE-LABEL: fsub_64g: 1017; X86-NOSSE: # %bb.0: 1018; X86-NOSSE-NEXT: pushl %ebp 1019; X86-NOSSE-NEXT: movl %esp, %ebp 1020; X86-NOSSE-NEXT: andl $-8, %esp 1021; X86-NOSSE-NEXT: subl $32, %esp 1022; X86-NOSSE-NEXT: fildll glob64 1023; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 1024; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 1025; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 1026; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 1027; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 1028; X86-NOSSE-NEXT: fld1 1029; X86-NOSSE-NEXT: fchs 1030; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) 1031; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 1032; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 1033; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 1034; X86-NOSSE-NEXT: movl %eax, (%esp) 1035; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 1036; X86-NOSSE-NEXT: fildll (%esp) 1037; X86-NOSSE-NEXT: fistpll glob64 1038; X86-NOSSE-NEXT: movl %ebp, %esp 1039; X86-NOSSE-NEXT: popl %ebp 1040; X86-NOSSE-NEXT: retl 1041; 1042; X86-SSE1-LABEL: fsub_64g: 1043; X86-SSE1: # %bb.0: 1044; X86-SSE1-NEXT: pushl %ebp 1045; X86-SSE1-NEXT: movl %esp, %ebp 1046; X86-SSE1-NEXT: andl $-8, %esp 1047; X86-SSE1-NEXT: subl $16, %esp 1048; X86-SSE1-NEXT: xorps %xmm0, %xmm0 1049; X86-SSE1-NEXT: xorps %xmm1, %xmm1 1050; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 1051; X86-SSE1-NEXT: movss %xmm1, (%esp) 1052; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 1053; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 1054; X86-SSE1-NEXT: fld1 1055; X86-SSE1-NEXT: fchs 1056; X86-SSE1-NEXT: faddl (%esp) 1057; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 1058; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 1059; X86-SSE1-NEXT: movlps %xmm0, glob64 1060; X86-SSE1-NEXT: movl %ebp, %esp 1061; X86-SSE1-NEXT: popl %ebp 1062; X86-SSE1-NEXT: retl 1063; 1064; X86-SSE2-LABEL: fsub_64g: 1065; X86-SSE2: # %bb.0: 1066; X86-SSE2-NEXT: pushl %ebp 1067; X86-SSE2-NEXT: movl %esp, %ebp 1068; X86-SSE2-NEXT: andl $-8, %esp 1069; X86-SSE2-NEXT: subl $8, %esp 1070; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1071; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 1072; X86-SSE2-NEXT: movsd %xmm0, (%esp) 1073; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1074; X86-SSE2-NEXT: movlps %xmm0, glob64 1075; X86-SSE2-NEXT: movl %ebp, %esp 1076; X86-SSE2-NEXT: popl %ebp 1077; X86-SSE2-NEXT: retl 1078; 1079; X86-AVX-LABEL: fsub_64g: 1080; X86-AVX: # %bb.0: 1081; X86-AVX-NEXT: pushl %ebp 1082; X86-AVX-NEXT: movl %esp, %ebp 1083; X86-AVX-NEXT: andl $-8, %esp 1084; X86-AVX-NEXT: subl $8, %esp 1085; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1086; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 1087; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 1088; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1089; X86-AVX-NEXT: vmovlps %xmm0, glob64 1090; X86-AVX-NEXT: movl %ebp, %esp 1091; X86-AVX-NEXT: popl %ebp 1092; X86-AVX-NEXT: retl 1093; 1094; X64-SSE-LABEL: fsub_64g: 1095; X64-SSE: # %bb.0: 1096; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0] 1097; X64-SSE-NEXT: addsd glob64(%rip), %xmm0 1098; X64-SSE-NEXT: movsd %xmm0, glob64(%rip) 1099; X64-SSE-NEXT: retq 1100; 1101; X64-AVX-LABEL: fsub_64g: 1102; X64-AVX: # %bb.0: 1103; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0] 1104; X64-AVX-NEXT: vaddsd glob64(%rip), %xmm0, %xmm0 1105; X64-AVX-NEXT: vmovsd %xmm0, glob64(%rip) 1106; X64-AVX-NEXT: retq 1107 %i = load atomic i64, ptr @glob64 monotonic, align 8 1108 %f = bitcast i64 %i to double 1109 %sub = fsub double %f, 1.000000e+00 1110 %s = bitcast double %sub to i64 1111 store atomic i64 %s, ptr @glob64 monotonic, align 8 1112 ret void 1113} 1114 1115; Floating-point sub to a hard-coded immediate location using an immediate. 1116define dso_local void @fsub_32imm() nounwind { 1117; X86-NOSSE-LABEL: fsub_32imm: 1118; X86-NOSSE: # %bb.0: 1119; X86-NOSSE-NEXT: subl $8, %esp 1120; X86-NOSSE-NEXT: movl -559038737, %eax 1121; X86-NOSSE-NEXT: movl %eax, (%esp) 1122; X86-NOSSE-NEXT: fld1 1123; X86-NOSSE-NEXT: fchs 1124; X86-NOSSE-NEXT: fadds (%esp) 1125; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 1126; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 1127; X86-NOSSE-NEXT: movl %eax, -559038737 1128; X86-NOSSE-NEXT: addl $8, %esp 1129; X86-NOSSE-NEXT: retl 1130; 1131; X86-SSE1-LABEL: fsub_32imm: 1132; X86-SSE1: # %bb.0: 1133; X86-SSE1-NEXT: subl $8, %esp 1134; X86-SSE1-NEXT: movl -559038737, %eax 1135; X86-SSE1-NEXT: movl %eax, (%esp) 1136; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1137; X86-SSE1-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 1138; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 1139; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 1140; X86-SSE1-NEXT: movl %eax, -559038737 1141; X86-SSE1-NEXT: addl $8, %esp 1142; X86-SSE1-NEXT: retl 1143; 1144; X86-SSE2-LABEL: fsub_32imm: 1145; X86-SSE2: # %bb.0: 1146; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0] 1147; X86-SSE2-NEXT: addss -559038737, %xmm0 1148; X86-SSE2-NEXT: movss %xmm0, -559038737 1149; X86-SSE2-NEXT: retl 1150; 1151; X86-AVX-LABEL: fsub_32imm: 1152; X86-AVX: # %bb.0: 1153; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0] 1154; X86-AVX-NEXT: vaddss -559038737, %xmm0, %xmm0 1155; X86-AVX-NEXT: vmovss %xmm0, -559038737 1156; X86-AVX-NEXT: retl 1157; 1158; X64-SSE-LABEL: fsub_32imm: 1159; X64-SSE: # %bb.0: 1160; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 1161; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0] 1162; X64-SSE-NEXT: addss (%rax), %xmm0 1163; X64-SSE-NEXT: movss %xmm0, (%rax) 1164; X64-SSE-NEXT: retq 1165; 1166; X64-AVX-LABEL: fsub_32imm: 1167; X64-AVX: # %bb.0: 1168; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 1169; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0] 1170; X64-AVX-NEXT: vaddss (%rax), %xmm0, %xmm0 1171; X64-AVX-NEXT: vmovss %xmm0, (%rax) 1172; X64-AVX-NEXT: retq 1173 %i = load atomic i32, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4 1174 %f = bitcast i32 %i to float 1175 %sub = fsub float %f, 1.000000e+00 1176 %s = bitcast float %sub to i32 1177 store atomic i32 %s, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4 1178 ret void 1179} 1180 1181define dso_local void @fsub_64imm() nounwind { 1182; X86-NOSSE-LABEL: fsub_64imm: 1183; X86-NOSSE: # %bb.0: 1184; X86-NOSSE-NEXT: pushl %ebp 1185; X86-NOSSE-NEXT: movl %esp, %ebp 1186; X86-NOSSE-NEXT: andl $-8, %esp 1187; X86-NOSSE-NEXT: subl $32, %esp 1188; X86-NOSSE-NEXT: fildll -559038737 1189; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 1190; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 1191; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 1192; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 1193; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 1194; X86-NOSSE-NEXT: fld1 1195; X86-NOSSE-NEXT: fchs 1196; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) 1197; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 1198; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 1199; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 1200; X86-NOSSE-NEXT: movl %eax, (%esp) 1201; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 1202; X86-NOSSE-NEXT: fildll (%esp) 1203; X86-NOSSE-NEXT: fistpll -559038737 1204; X86-NOSSE-NEXT: movl %ebp, %esp 1205; X86-NOSSE-NEXT: popl %ebp 1206; X86-NOSSE-NEXT: retl 1207; 1208; X86-SSE1-LABEL: fsub_64imm: 1209; X86-SSE1: # %bb.0: 1210; X86-SSE1-NEXT: pushl %ebp 1211; X86-SSE1-NEXT: movl %esp, %ebp 1212; X86-SSE1-NEXT: andl $-8, %esp 1213; X86-SSE1-NEXT: subl $16, %esp 1214; X86-SSE1-NEXT: xorps %xmm0, %xmm0 1215; X86-SSE1-NEXT: xorps %xmm1, %xmm1 1216; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 1217; X86-SSE1-NEXT: movss %xmm1, (%esp) 1218; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 1219; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 1220; X86-SSE1-NEXT: fld1 1221; X86-SSE1-NEXT: fchs 1222; X86-SSE1-NEXT: faddl (%esp) 1223; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 1224; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 1225; X86-SSE1-NEXT: movlps %xmm0, -559038737 1226; X86-SSE1-NEXT: movl %ebp, %esp 1227; X86-SSE1-NEXT: popl %ebp 1228; X86-SSE1-NEXT: retl 1229; 1230; X86-SSE2-LABEL: fsub_64imm: 1231; X86-SSE2: # %bb.0: 1232; X86-SSE2-NEXT: pushl %ebp 1233; X86-SSE2-NEXT: movl %esp, %ebp 1234; X86-SSE2-NEXT: andl $-8, %esp 1235; X86-SSE2-NEXT: subl $8, %esp 1236; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1237; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 1238; X86-SSE2-NEXT: movsd %xmm0, (%esp) 1239; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1240; X86-SSE2-NEXT: movlps %xmm0, -559038737 1241; X86-SSE2-NEXT: movl %ebp, %esp 1242; X86-SSE2-NEXT: popl %ebp 1243; X86-SSE2-NEXT: retl 1244; 1245; X86-AVX-LABEL: fsub_64imm: 1246; X86-AVX: # %bb.0: 1247; X86-AVX-NEXT: pushl %ebp 1248; X86-AVX-NEXT: movl %esp, %ebp 1249; X86-AVX-NEXT: andl $-8, %esp 1250; X86-AVX-NEXT: subl $8, %esp 1251; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1252; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 1253; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 1254; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1255; X86-AVX-NEXT: vmovlps %xmm0, -559038737 1256; X86-AVX-NEXT: movl %ebp, %esp 1257; X86-AVX-NEXT: popl %ebp 1258; X86-AVX-NEXT: retl 1259; 1260; X64-SSE-LABEL: fsub_64imm: 1261; X64-SSE: # %bb.0: 1262; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 1263; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0] 1264; X64-SSE-NEXT: addsd (%rax), %xmm0 1265; X64-SSE-NEXT: movsd %xmm0, (%rax) 1266; X64-SSE-NEXT: retq 1267; 1268; X64-AVX-LABEL: fsub_64imm: 1269; X64-AVX: # %bb.0: 1270; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 1271; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0] 1272; X64-AVX-NEXT: vaddsd (%rax), %xmm0, %xmm0 1273; X64-AVX-NEXT: vmovsd %xmm0, (%rax) 1274; X64-AVX-NEXT: retq 1275 %i = load atomic i64, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8 1276 %f = bitcast i64 %i to double 1277 %sub = fsub double %f, 1.000000e+00 1278 %s = bitcast double %sub to i64 1279 store atomic i64 %s, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8 1280 ret void 1281} 1282 1283; Floating-point sub to a stack location. 1284define dso_local void @fsub_32stack() nounwind { 1285; X86-NOSSE-LABEL: fsub_32stack: 1286; X86-NOSSE: # %bb.0: 1287; X86-NOSSE-NEXT: subl $12, %esp 1288; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 1289; X86-NOSSE-NEXT: movl %eax, (%esp) 1290; X86-NOSSE-NEXT: fld1 1291; X86-NOSSE-NEXT: fsubs (%esp) 1292; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 1293; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 1294; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 1295; X86-NOSSE-NEXT: addl $12, %esp 1296; X86-NOSSE-NEXT: retl 1297; 1298; X86-SSE1-LABEL: fsub_32stack: 1299; X86-SSE1: # %bb.0: 1300; X86-SSE1-NEXT: subl $12, %esp 1301; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 1302; X86-SSE1-NEXT: movl %eax, (%esp) 1303; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 1304; X86-SSE1-NEXT: subss (%esp), %xmm0 1305; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 1306; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 1307; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp) 1308; X86-SSE1-NEXT: addl $12, %esp 1309; X86-SSE1-NEXT: retl 1310; 1311; X86-SSE2-LABEL: fsub_32stack: 1312; X86-SSE2: # %bb.0: 1313; X86-SSE2-NEXT: pushl %eax 1314; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 1315; X86-SSE2-NEXT: subss (%esp), %xmm0 1316; X86-SSE2-NEXT: movss %xmm0, (%esp) 1317; X86-SSE2-NEXT: popl %eax 1318; X86-SSE2-NEXT: retl 1319; 1320; X86-AVX-LABEL: fsub_32stack: 1321; X86-AVX: # %bb.0: 1322; X86-AVX-NEXT: pushl %eax 1323; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 1324; X86-AVX-NEXT: vsubss (%esp), %xmm0, %xmm0 1325; X86-AVX-NEXT: vmovss %xmm0, (%esp) 1326; X86-AVX-NEXT: popl %eax 1327; X86-AVX-NEXT: retl 1328; 1329; X64-SSE-LABEL: fsub_32stack: 1330; X64-SSE: # %bb.0: 1331; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 1332; X64-SSE-NEXT: subss -{{[0-9]+}}(%rsp), %xmm0 1333; X64-SSE-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) 1334; X64-SSE-NEXT: retq 1335; 1336; X64-AVX-LABEL: fsub_32stack: 1337; X64-AVX: # %bb.0: 1338; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 1339; X64-AVX-NEXT: vsubss -{{[0-9]+}}(%rsp), %xmm0, %xmm0 1340; X64-AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) 1341; X64-AVX-NEXT: retq 1342 %ptr = alloca i32, align 4 1343 %load = load atomic i32, ptr %ptr acquire, align 4 1344 %bc0 = bitcast i32 %load to float 1345 %fsub = fsub float 1.000000e+00, %bc0 1346 %bc1 = bitcast float %fsub to i32 1347 store atomic i32 %bc1, ptr %ptr release, align 4 1348 ret void 1349} 1350 1351define dso_local void @fsub_64stack() nounwind { 1352; X86-NOSSE-LABEL: fsub_64stack: 1353; X86-NOSSE: # %bb.0: 1354; X86-NOSSE-NEXT: pushl %ebp 1355; X86-NOSSE-NEXT: movl %esp, %ebp 1356; X86-NOSSE-NEXT: andl $-8, %esp 1357; X86-NOSSE-NEXT: subl $40, %esp 1358; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp) 1359; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 1360; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 1361; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 1362; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 1363; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 1364; X86-NOSSE-NEXT: fld1 1365; X86-NOSSE-NEXT: fsubl {{[0-9]+}}(%esp) 1366; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 1367; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 1368; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 1369; X86-NOSSE-NEXT: movl %eax, (%esp) 1370; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 1371; X86-NOSSE-NEXT: fildll (%esp) 1372; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 1373; X86-NOSSE-NEXT: movl %ebp, %esp 1374; X86-NOSSE-NEXT: popl %ebp 1375; X86-NOSSE-NEXT: retl 1376; 1377; X86-SSE1-LABEL: fsub_64stack: 1378; X86-SSE1: # %bb.0: 1379; X86-SSE1-NEXT: pushl %ebp 1380; X86-SSE1-NEXT: movl %esp, %ebp 1381; X86-SSE1-NEXT: andl $-8, %esp 1382; X86-SSE1-NEXT: subl $24, %esp 1383; X86-SSE1-NEXT: xorps %xmm0, %xmm0 1384; X86-SSE1-NEXT: xorps %xmm1, %xmm1 1385; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 1386; X86-SSE1-NEXT: movss %xmm1, (%esp) 1387; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 1388; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 1389; X86-SSE1-NEXT: fld1 1390; X86-SSE1-NEXT: fsubl (%esp) 1391; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 1392; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 1393; X86-SSE1-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 1394; X86-SSE1-NEXT: movl %ebp, %esp 1395; X86-SSE1-NEXT: popl %ebp 1396; X86-SSE1-NEXT: retl 1397; 1398; X86-SSE2-LABEL: fsub_64stack: 1399; X86-SSE2: # %bb.0: 1400; X86-SSE2-NEXT: pushl %ebp 1401; X86-SSE2-NEXT: movl %esp, %ebp 1402; X86-SSE2-NEXT: andl $-8, %esp 1403; X86-SSE2-NEXT: subl $16, %esp 1404; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1405; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0] 1406; X86-SSE2-NEXT: subsd %xmm0, %xmm1 1407; X86-SSE2-NEXT: movsd %xmm1, (%esp) 1408; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1409; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 1410; X86-SSE2-NEXT: movl %ebp, %esp 1411; X86-SSE2-NEXT: popl %ebp 1412; X86-SSE2-NEXT: retl 1413; 1414; X86-AVX-LABEL: fsub_64stack: 1415; X86-AVX: # %bb.0: 1416; X86-AVX-NEXT: pushl %ebp 1417; X86-AVX-NEXT: movl %esp, %ebp 1418; X86-AVX-NEXT: andl $-8, %esp 1419; X86-AVX-NEXT: subl $16, %esp 1420; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1421; X86-AVX-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0] 1422; X86-AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 1423; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 1424; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1425; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 1426; X86-AVX-NEXT: movl %ebp, %esp 1427; X86-AVX-NEXT: popl %ebp 1428; X86-AVX-NEXT: retl 1429; 1430; X64-SSE-LABEL: fsub_64stack: 1431; X64-SSE: # %bb.0: 1432; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] 1433; X64-SSE-NEXT: subsd -{{[0-9]+}}(%rsp), %xmm0 1434; X64-SSE-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) 1435; X64-SSE-NEXT: retq 1436; 1437; X64-AVX-LABEL: fsub_64stack: 1438; X64-AVX: # %bb.0: 1439; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] 1440; X64-AVX-NEXT: vsubsd -{{[0-9]+}}(%rsp), %xmm0, %xmm0 1441; X64-AVX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) 1442; X64-AVX-NEXT: retq 1443 %ptr = alloca i64, align 8 1444 %load = load atomic i64, ptr %ptr acquire, align 8 1445 %bc0 = bitcast i64 %load to double 1446 %fsub = fsub double 1.000000e+00, %bc0 1447 %bc1 = bitcast double %fsub to i64 1448 store atomic i64 %bc1, ptr %ptr release, align 8 1449 ret void 1450} 1451 1452define dso_local void @fsub_array(ptr %arg, double %arg1, i64 %arg2) nounwind { 1453; X86-NOSSE-LABEL: fsub_array: 1454; X86-NOSSE: # %bb.0: # %bb 1455; X86-NOSSE-NEXT: pushl %ebp 1456; X86-NOSSE-NEXT: movl %esp, %ebp 1457; X86-NOSSE-NEXT: pushl %esi 1458; X86-NOSSE-NEXT: andl $-8, %esp 1459; X86-NOSSE-NEXT: subl $40, %esp 1460; X86-NOSSE-NEXT: movl 20(%ebp), %eax 1461; X86-NOSSE-NEXT: movl 8(%ebp), %ecx 1462; X86-NOSSE-NEXT: fildll (%ecx,%eax,8) 1463; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 1464; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 1465; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi 1466; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) 1467; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 1468; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) 1469; X86-NOSSE-NEXT: fsubl 12(%ebp) 1470; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 1471; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 1472; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi 1473; X86-NOSSE-NEXT: movl %edx, (%esp) 1474; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) 1475; X86-NOSSE-NEXT: fildll (%esp) 1476; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8) 1477; X86-NOSSE-NEXT: leal -4(%ebp), %esp 1478; X86-NOSSE-NEXT: popl %esi 1479; X86-NOSSE-NEXT: popl %ebp 1480; X86-NOSSE-NEXT: retl 1481; 1482; X86-SSE1-LABEL: fsub_array: 1483; X86-SSE1: # %bb.0: # %bb 1484; X86-SSE1-NEXT: pushl %ebp 1485; X86-SSE1-NEXT: movl %esp, %ebp 1486; X86-SSE1-NEXT: andl $-8, %esp 1487; X86-SSE1-NEXT: subl $16, %esp 1488; X86-SSE1-NEXT: movl 20(%ebp), %eax 1489; X86-SSE1-NEXT: movl 8(%ebp), %ecx 1490; X86-SSE1-NEXT: xorps %xmm0, %xmm0 1491; X86-SSE1-NEXT: xorps %xmm1, %xmm1 1492; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 1493; X86-SSE1-NEXT: movss %xmm1, (%esp) 1494; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 1495; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 1496; X86-SSE1-NEXT: fldl (%esp) 1497; X86-SSE1-NEXT: fsubl 12(%ebp) 1498; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 1499; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 1500; X86-SSE1-NEXT: movlps %xmm0, (%ecx,%eax,8) 1501; X86-SSE1-NEXT: movl %ebp, %esp 1502; X86-SSE1-NEXT: popl %ebp 1503; X86-SSE1-NEXT: retl 1504; 1505; X86-SSE2-LABEL: fsub_array: 1506; X86-SSE2: # %bb.0: # %bb 1507; X86-SSE2-NEXT: pushl %ebp 1508; X86-SSE2-NEXT: movl %esp, %ebp 1509; X86-SSE2-NEXT: andl $-8, %esp 1510; X86-SSE2-NEXT: subl $8, %esp 1511; X86-SSE2-NEXT: movl 20(%ebp), %eax 1512; X86-SSE2-NEXT: movl 8(%ebp), %ecx 1513; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1514; X86-SSE2-NEXT: subsd 12(%ebp), %xmm0 1515; X86-SSE2-NEXT: movsd %xmm0, (%esp) 1516; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1517; X86-SSE2-NEXT: movlps %xmm0, (%ecx,%eax,8) 1518; X86-SSE2-NEXT: movl %ebp, %esp 1519; X86-SSE2-NEXT: popl %ebp 1520; X86-SSE2-NEXT: retl 1521; 1522; X86-AVX-LABEL: fsub_array: 1523; X86-AVX: # %bb.0: # %bb 1524; X86-AVX-NEXT: pushl %ebp 1525; X86-AVX-NEXT: movl %esp, %ebp 1526; X86-AVX-NEXT: andl $-8, %esp 1527; X86-AVX-NEXT: subl $8, %esp 1528; X86-AVX-NEXT: movl 20(%ebp), %eax 1529; X86-AVX-NEXT: movl 8(%ebp), %ecx 1530; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1531; X86-AVX-NEXT: vsubsd 12(%ebp), %xmm0, %xmm0 1532; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 1533; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1534; X86-AVX-NEXT: vmovlps %xmm0, (%ecx,%eax,8) 1535; X86-AVX-NEXT: movl %ebp, %esp 1536; X86-AVX-NEXT: popl %ebp 1537; X86-AVX-NEXT: retl 1538; 1539; X64-SSE-LABEL: fsub_array: 1540; X64-SSE: # %bb.0: # %bb 1541; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 1542; X64-SSE-NEXT: subsd %xmm0, %xmm1 1543; X64-SSE-NEXT: movsd %xmm1, (%rdi,%rsi,8) 1544; X64-SSE-NEXT: retq 1545; 1546; X64-AVX-LABEL: fsub_array: 1547; X64-AVX: # %bb.0: # %bb 1548; X64-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 1549; X64-AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 1550; X64-AVX-NEXT: vmovsd %xmm0, (%rdi,%rsi,8) 1551; X64-AVX-NEXT: retq 1552bb: 1553 %tmp4 = getelementptr inbounds i64, ptr %arg, i64 %arg2 1554 %tmp6 = load atomic i64, ptr %tmp4 monotonic, align 8 1555 %tmp7 = bitcast i64 %tmp6 to double 1556 %tmp8 = fsub double %tmp7, %arg1 1557 %tmp9 = bitcast double %tmp8 to i64 1558 store atomic i64 %tmp9, ptr %tmp4 monotonic, align 8 1559 ret void 1560} 1561 1562; ----- FMUL ----- 1563 1564define dso_local void @fmul_32r(ptr %loc, float %val) nounwind { 1565; X86-NOSSE-LABEL: fmul_32r: 1566; X86-NOSSE: # %bb.0: 1567; X86-NOSSE-NEXT: subl $8, %esp 1568; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 1569; X86-NOSSE-NEXT: movl (%eax), %ecx 1570; X86-NOSSE-NEXT: movl %ecx, (%esp) 1571; X86-NOSSE-NEXT: flds (%esp) 1572; X86-NOSSE-NEXT: fmuls {{[0-9]+}}(%esp) 1573; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 1574; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 1575; X86-NOSSE-NEXT: movl %ecx, (%eax) 1576; X86-NOSSE-NEXT: addl $8, %esp 1577; X86-NOSSE-NEXT: retl 1578; 1579; X86-SSE1-LABEL: fmul_32r: 1580; X86-SSE1: # %bb.0: 1581; X86-SSE1-NEXT: subl $8, %esp 1582; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 1583; X86-SSE1-NEXT: movl (%eax), %ecx 1584; X86-SSE1-NEXT: movl %ecx, (%esp) 1585; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1586; X86-SSE1-NEXT: mulss {{[0-9]+}}(%esp), %xmm0 1587; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 1588; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx 1589; X86-SSE1-NEXT: movl %ecx, (%eax) 1590; X86-SSE1-NEXT: addl $8, %esp 1591; X86-SSE1-NEXT: retl 1592; 1593; X86-SSE2-LABEL: fmul_32r: 1594; X86-SSE2: # %bb.0: 1595; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1596; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1597; X86-SSE2-NEXT: mulss (%eax), %xmm0 1598; X86-SSE2-NEXT: movss %xmm0, (%eax) 1599; X86-SSE2-NEXT: retl 1600; 1601; X86-AVX-LABEL: fmul_32r: 1602; X86-AVX: # %bb.0: 1603; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 1604; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1605; X86-AVX-NEXT: vmulss (%eax), %xmm0, %xmm0 1606; X86-AVX-NEXT: vmovss %xmm0, (%eax) 1607; X86-AVX-NEXT: retl 1608; 1609; X64-SSE-LABEL: fmul_32r: 1610; X64-SSE: # %bb.0: 1611; X64-SSE-NEXT: mulss (%rdi), %xmm0 1612; X64-SSE-NEXT: movss %xmm0, (%rdi) 1613; X64-SSE-NEXT: retq 1614; 1615; X64-AVX-LABEL: fmul_32r: 1616; X64-AVX: # %bb.0: 1617; X64-AVX-NEXT: vmulss (%rdi), %xmm0, %xmm0 1618; X64-AVX-NEXT: vmovss %xmm0, (%rdi) 1619; X64-AVX-NEXT: retq 1620 %1 = load atomic i32, ptr %loc seq_cst, align 4 1621 %2 = bitcast i32 %1 to float 1622 %mul = fmul float %2, %val 1623 %3 = bitcast float %mul to i32 1624 store atomic i32 %3, ptr %loc release, align 4 1625 ret void 1626} 1627 1628define dso_local void @fmul_64r(ptr %loc, double %val) nounwind { 1629; X86-NOSSE-LABEL: fmul_64r: 1630; X86-NOSSE: # %bb.0: 1631; X86-NOSSE-NEXT: pushl %ebp 1632; X86-NOSSE-NEXT: movl %esp, %ebp 1633; X86-NOSSE-NEXT: andl $-8, %esp 1634; X86-NOSSE-NEXT: subl $32, %esp 1635; X86-NOSSE-NEXT: movl 8(%ebp), %eax 1636; X86-NOSSE-NEXT: fildll (%eax) 1637; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 1638; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 1639; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 1640; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 1641; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 1642; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) 1643; X86-NOSSE-NEXT: fmull 12(%ebp) 1644; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 1645; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 1646; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 1647; X86-NOSSE-NEXT: movl %ecx, (%esp) 1648; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 1649; X86-NOSSE-NEXT: fildll (%esp) 1650; X86-NOSSE-NEXT: fistpll (%eax) 1651; X86-NOSSE-NEXT: movl %ebp, %esp 1652; X86-NOSSE-NEXT: popl %ebp 1653; X86-NOSSE-NEXT: retl 1654; 1655; X86-SSE1-LABEL: fmul_64r: 1656; X86-SSE1: # %bb.0: 1657; X86-SSE1-NEXT: pushl %ebp 1658; X86-SSE1-NEXT: movl %esp, %ebp 1659; X86-SSE1-NEXT: andl $-8, %esp 1660; X86-SSE1-NEXT: subl $16, %esp 1661; X86-SSE1-NEXT: movl 8(%ebp), %eax 1662; X86-SSE1-NEXT: xorps %xmm0, %xmm0 1663; X86-SSE1-NEXT: xorps %xmm1, %xmm1 1664; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 1665; X86-SSE1-NEXT: movss %xmm1, (%esp) 1666; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 1667; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 1668; X86-SSE1-NEXT: fldl (%esp) 1669; X86-SSE1-NEXT: fmull 12(%ebp) 1670; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 1671; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 1672; X86-SSE1-NEXT: movlps %xmm0, (%eax) 1673; X86-SSE1-NEXT: movl %ebp, %esp 1674; X86-SSE1-NEXT: popl %ebp 1675; X86-SSE1-NEXT: retl 1676; 1677; X86-SSE2-LABEL: fmul_64r: 1678; X86-SSE2: # %bb.0: 1679; X86-SSE2-NEXT: pushl %ebp 1680; X86-SSE2-NEXT: movl %esp, %ebp 1681; X86-SSE2-NEXT: andl $-8, %esp 1682; X86-SSE2-NEXT: subl $8, %esp 1683; X86-SSE2-NEXT: movl 8(%ebp), %eax 1684; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1685; X86-SSE2-NEXT: mulsd 12(%ebp), %xmm0 1686; X86-SSE2-NEXT: movsd %xmm0, (%esp) 1687; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1688; X86-SSE2-NEXT: movlps %xmm0, (%eax) 1689; X86-SSE2-NEXT: movl %ebp, %esp 1690; X86-SSE2-NEXT: popl %ebp 1691; X86-SSE2-NEXT: retl 1692; 1693; X86-AVX-LABEL: fmul_64r: 1694; X86-AVX: # %bb.0: 1695; X86-AVX-NEXT: pushl %ebp 1696; X86-AVX-NEXT: movl %esp, %ebp 1697; X86-AVX-NEXT: andl $-8, %esp 1698; X86-AVX-NEXT: subl $8, %esp 1699; X86-AVX-NEXT: movl 8(%ebp), %eax 1700; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1701; X86-AVX-NEXT: vmulsd 12(%ebp), %xmm0, %xmm0 1702; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 1703; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1704; X86-AVX-NEXT: vmovlps %xmm0, (%eax) 1705; X86-AVX-NEXT: movl %ebp, %esp 1706; X86-AVX-NEXT: popl %ebp 1707; X86-AVX-NEXT: retl 1708; 1709; X64-SSE-LABEL: fmul_64r: 1710; X64-SSE: # %bb.0: 1711; X64-SSE-NEXT: mulsd (%rdi), %xmm0 1712; X64-SSE-NEXT: movsd %xmm0, (%rdi) 1713; X64-SSE-NEXT: retq 1714; 1715; X64-AVX-LABEL: fmul_64r: 1716; X64-AVX: # %bb.0: 1717; X64-AVX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 1718; X64-AVX-NEXT: vmovsd %xmm0, (%rdi) 1719; X64-AVX-NEXT: retq 1720 %1 = load atomic i64, ptr %loc seq_cst, align 8 1721 %2 = bitcast i64 %1 to double 1722 %mul = fmul double %2, %val 1723 %3 = bitcast double %mul to i64 1724 store atomic i64 %3, ptr %loc release, align 8 1725 ret void 1726} 1727 1728; Floating-point mul to a global using an immediate. 1729define dso_local void @fmul_32g() nounwind { 1730; X86-NOSSE-LABEL: fmul_32g: 1731; X86-NOSSE: # %bb.0: 1732; X86-NOSSE-NEXT: subl $8, %esp 1733; X86-NOSSE-NEXT: movl glob32, %eax 1734; X86-NOSSE-NEXT: movl %eax, (%esp) 1735; X86-NOSSE-NEXT: flds (%esp) 1736; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} 1737; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 1738; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 1739; X86-NOSSE-NEXT: movl %eax, glob32 1740; X86-NOSSE-NEXT: addl $8, %esp 1741; X86-NOSSE-NEXT: retl 1742; 1743; X86-SSE1-LABEL: fmul_32g: 1744; X86-SSE1: # %bb.0: 1745; X86-SSE1-NEXT: subl $8, %esp 1746; X86-SSE1-NEXT: movl glob32, %eax 1747; X86-SSE1-NEXT: movl %eax, (%esp) 1748; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1749; X86-SSE1-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 1750; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 1751; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 1752; X86-SSE1-NEXT: movl %eax, glob32 1753; X86-SSE1-NEXT: addl $8, %esp 1754; X86-SSE1-NEXT: retl 1755; 1756; X86-SSE2-LABEL: fmul_32g: 1757; X86-SSE2: # %bb.0: 1758; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] 1759; X86-SSE2-NEXT: mulss glob32, %xmm0 1760; X86-SSE2-NEXT: movss %xmm0, glob32 1761; X86-SSE2-NEXT: retl 1762; 1763; X86-AVX-LABEL: fmul_32g: 1764; X86-AVX: # %bb.0: 1765; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] 1766; X86-AVX-NEXT: vmulss glob32, %xmm0, %xmm0 1767; X86-AVX-NEXT: vmovss %xmm0, glob32 1768; X86-AVX-NEXT: retl 1769; 1770; X64-SSE-LABEL: fmul_32g: 1771; X64-SSE: # %bb.0: 1772; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] 1773; X64-SSE-NEXT: mulss glob32(%rip), %xmm0 1774; X64-SSE-NEXT: movss %xmm0, glob32(%rip) 1775; X64-SSE-NEXT: retq 1776; 1777; X64-AVX-LABEL: fmul_32g: 1778; X64-AVX: # %bb.0: 1779; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] 1780; X64-AVX-NEXT: vmulss glob32(%rip), %xmm0, %xmm0 1781; X64-AVX-NEXT: vmovss %xmm0, glob32(%rip) 1782; X64-AVX-NEXT: retq 1783 %i = load atomic i32, ptr @glob32 monotonic, align 4 1784 %f = bitcast i32 %i to float 1785 %mul = fmul float %f, 0x400921FA00000000 1786 %s = bitcast float %mul to i32 1787 store atomic i32 %s, ptr @glob32 monotonic, align 4 1788 ret void 1789} 1790 1791define dso_local void @fmul_64g() nounwind { 1792; X86-NOSSE-LABEL: fmul_64g: 1793; X86-NOSSE: # %bb.0: 1794; X86-NOSSE-NEXT: pushl %ebp 1795; X86-NOSSE-NEXT: movl %esp, %ebp 1796; X86-NOSSE-NEXT: andl $-8, %esp 1797; X86-NOSSE-NEXT: subl $32, %esp 1798; X86-NOSSE-NEXT: fildll glob64 1799; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 1800; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 1801; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 1802; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 1803; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 1804; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) 1805; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} 1806; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 1807; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 1808; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 1809; X86-NOSSE-NEXT: movl %eax, (%esp) 1810; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 1811; X86-NOSSE-NEXT: fildll (%esp) 1812; X86-NOSSE-NEXT: fistpll glob64 1813; X86-NOSSE-NEXT: movl %ebp, %esp 1814; X86-NOSSE-NEXT: popl %ebp 1815; X86-NOSSE-NEXT: retl 1816; 1817; X86-SSE1-LABEL: fmul_64g: 1818; X86-SSE1: # %bb.0: 1819; X86-SSE1-NEXT: pushl %ebp 1820; X86-SSE1-NEXT: movl %esp, %ebp 1821; X86-SSE1-NEXT: andl $-8, %esp 1822; X86-SSE1-NEXT: subl $16, %esp 1823; X86-SSE1-NEXT: xorps %xmm0, %xmm0 1824; X86-SSE1-NEXT: xorps %xmm1, %xmm1 1825; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 1826; X86-SSE1-NEXT: movss %xmm1, (%esp) 1827; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 1828; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 1829; X86-SSE1-NEXT: fldl (%esp) 1830; X86-SSE1-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} 1831; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 1832; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 1833; X86-SSE1-NEXT: movlps %xmm0, glob64 1834; X86-SSE1-NEXT: movl %ebp, %esp 1835; X86-SSE1-NEXT: popl %ebp 1836; X86-SSE1-NEXT: retl 1837; 1838; X86-SSE2-LABEL: fmul_64g: 1839; X86-SSE2: # %bb.0: 1840; X86-SSE2-NEXT: pushl %ebp 1841; X86-SSE2-NEXT: movl %esp, %ebp 1842; X86-SSE2-NEXT: andl $-8, %esp 1843; X86-SSE2-NEXT: subl $8, %esp 1844; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1845; X86-SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 1846; X86-SSE2-NEXT: movsd %xmm0, (%esp) 1847; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1848; X86-SSE2-NEXT: movlps %xmm0, glob64 1849; X86-SSE2-NEXT: movl %ebp, %esp 1850; X86-SSE2-NEXT: popl %ebp 1851; X86-SSE2-NEXT: retl 1852; 1853; X86-AVX-LABEL: fmul_64g: 1854; X86-AVX: # %bb.0: 1855; X86-AVX-NEXT: pushl %ebp 1856; X86-AVX-NEXT: movl %esp, %ebp 1857; X86-AVX-NEXT: andl $-8, %esp 1858; X86-AVX-NEXT: subl $8, %esp 1859; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1860; X86-AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 1861; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 1862; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1863; X86-AVX-NEXT: vmovlps %xmm0, glob64 1864; X86-AVX-NEXT: movl %ebp, %esp 1865; X86-AVX-NEXT: popl %ebp 1866; X86-AVX-NEXT: retl 1867; 1868; X64-SSE-LABEL: fmul_64g: 1869; X64-SSE: # %bb.0: 1870; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [3.1415901184082031E+0,0.0E+0] 1871; X64-SSE-NEXT: mulsd glob64(%rip), %xmm0 1872; X64-SSE-NEXT: movsd %xmm0, glob64(%rip) 1873; X64-SSE-NEXT: retq 1874; 1875; X64-AVX-LABEL: fmul_64g: 1876; X64-AVX: # %bb.0: 1877; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [3.1415901184082031E+0,0.0E+0] 1878; X64-AVX-NEXT: vmulsd glob64(%rip), %xmm0, %xmm0 1879; X64-AVX-NEXT: vmovsd %xmm0, glob64(%rip) 1880; X64-AVX-NEXT: retq 1881 %i = load atomic i64, ptr @glob64 monotonic, align 8 1882 %f = bitcast i64 %i to double 1883 %mul = fmul double %f, 0x400921FA00000000 1884 %s = bitcast double %mul to i64 1885 store atomic i64 %s, ptr @glob64 monotonic, align 8 1886 ret void 1887} 1888 1889; Floating-point mul to a hard-coded immediate location using an immediate. 1890define dso_local void @fmul_32imm() nounwind { 1891; X86-NOSSE-LABEL: fmul_32imm: 1892; X86-NOSSE: # %bb.0: 1893; X86-NOSSE-NEXT: subl $8, %esp 1894; X86-NOSSE-NEXT: movl -559038737, %eax 1895; X86-NOSSE-NEXT: movl %eax, (%esp) 1896; X86-NOSSE-NEXT: flds (%esp) 1897; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} 1898; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 1899; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 1900; X86-NOSSE-NEXT: movl %eax, -559038737 1901; X86-NOSSE-NEXT: addl $8, %esp 1902; X86-NOSSE-NEXT: retl 1903; 1904; X86-SSE1-LABEL: fmul_32imm: 1905; X86-SSE1: # %bb.0: 1906; X86-SSE1-NEXT: subl $8, %esp 1907; X86-SSE1-NEXT: movl -559038737, %eax 1908; X86-SSE1-NEXT: movl %eax, (%esp) 1909; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1910; X86-SSE1-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 1911; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 1912; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 1913; X86-SSE1-NEXT: movl %eax, -559038737 1914; X86-SSE1-NEXT: addl $8, %esp 1915; X86-SSE1-NEXT: retl 1916; 1917; X86-SSE2-LABEL: fmul_32imm: 1918; X86-SSE2: # %bb.0: 1919; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] 1920; X86-SSE2-NEXT: mulss -559038737, %xmm0 1921; X86-SSE2-NEXT: movss %xmm0, -559038737 1922; X86-SSE2-NEXT: retl 1923; 1924; X86-AVX-LABEL: fmul_32imm: 1925; X86-AVX: # %bb.0: 1926; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] 1927; X86-AVX-NEXT: vmulss -559038737, %xmm0, %xmm0 1928; X86-AVX-NEXT: vmovss %xmm0, -559038737 1929; X86-AVX-NEXT: retl 1930; 1931; X64-SSE-LABEL: fmul_32imm: 1932; X64-SSE: # %bb.0: 1933; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 1934; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] 1935; X64-SSE-NEXT: mulss (%rax), %xmm0 1936; X64-SSE-NEXT: movss %xmm0, (%rax) 1937; X64-SSE-NEXT: retq 1938; 1939; X64-AVX-LABEL: fmul_32imm: 1940; X64-AVX: # %bb.0: 1941; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 1942; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] 1943; X64-AVX-NEXT: vmulss (%rax), %xmm0, %xmm0 1944; X64-AVX-NEXT: vmovss %xmm0, (%rax) 1945; X64-AVX-NEXT: retq 1946 %i = load atomic i32, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4 1947 %f = bitcast i32 %i to float 1948 %mul = fmul float %f, 0x400921FA00000000 1949 %s = bitcast float %mul to i32 1950 store atomic i32 %s, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4 1951 ret void 1952} 1953 1954define dso_local void @fmul_64imm() nounwind { 1955; X86-NOSSE-LABEL: fmul_64imm: 1956; X86-NOSSE: # %bb.0: 1957; X86-NOSSE-NEXT: pushl %ebp 1958; X86-NOSSE-NEXT: movl %esp, %ebp 1959; X86-NOSSE-NEXT: andl $-8, %esp 1960; X86-NOSSE-NEXT: subl $32, %esp 1961; X86-NOSSE-NEXT: fildll -559038737 1962; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 1963; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 1964; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 1965; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 1966; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 1967; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) 1968; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} 1969; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 1970; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 1971; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 1972; X86-NOSSE-NEXT: movl %eax, (%esp) 1973; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 1974; X86-NOSSE-NEXT: fildll (%esp) 1975; X86-NOSSE-NEXT: fistpll -559038737 1976; X86-NOSSE-NEXT: movl %ebp, %esp 1977; X86-NOSSE-NEXT: popl %ebp 1978; X86-NOSSE-NEXT: retl 1979; 1980; X86-SSE1-LABEL: fmul_64imm: 1981; X86-SSE1: # %bb.0: 1982; X86-SSE1-NEXT: pushl %ebp 1983; X86-SSE1-NEXT: movl %esp, %ebp 1984; X86-SSE1-NEXT: andl $-8, %esp 1985; X86-SSE1-NEXT: subl $16, %esp 1986; X86-SSE1-NEXT: xorps %xmm0, %xmm0 1987; X86-SSE1-NEXT: xorps %xmm1, %xmm1 1988; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 1989; X86-SSE1-NEXT: movss %xmm1, (%esp) 1990; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 1991; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 1992; X86-SSE1-NEXT: fldl (%esp) 1993; X86-SSE1-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} 1994; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 1995; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 1996; X86-SSE1-NEXT: movlps %xmm0, -559038737 1997; X86-SSE1-NEXT: movl %ebp, %esp 1998; X86-SSE1-NEXT: popl %ebp 1999; X86-SSE1-NEXT: retl 2000; 2001; X86-SSE2-LABEL: fmul_64imm: 2002; X86-SSE2: # %bb.0: 2003; X86-SSE2-NEXT: pushl %ebp 2004; X86-SSE2-NEXT: movl %esp, %ebp 2005; X86-SSE2-NEXT: andl $-8, %esp 2006; X86-SSE2-NEXT: subl $8, %esp 2007; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2008; X86-SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 2009; X86-SSE2-NEXT: movsd %xmm0, (%esp) 2010; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2011; X86-SSE2-NEXT: movlps %xmm0, -559038737 2012; X86-SSE2-NEXT: movl %ebp, %esp 2013; X86-SSE2-NEXT: popl %ebp 2014; X86-SSE2-NEXT: retl 2015; 2016; X86-AVX-LABEL: fmul_64imm: 2017; X86-AVX: # %bb.0: 2018; X86-AVX-NEXT: pushl %ebp 2019; X86-AVX-NEXT: movl %esp, %ebp 2020; X86-AVX-NEXT: andl $-8, %esp 2021; X86-AVX-NEXT: subl $8, %esp 2022; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2023; X86-AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 2024; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 2025; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2026; X86-AVX-NEXT: vmovlps %xmm0, -559038737 2027; X86-AVX-NEXT: movl %ebp, %esp 2028; X86-AVX-NEXT: popl %ebp 2029; X86-AVX-NEXT: retl 2030; 2031; X64-SSE-LABEL: fmul_64imm: 2032; X64-SSE: # %bb.0: 2033; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 2034; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [3.1415901184082031E+0,0.0E+0] 2035; X64-SSE-NEXT: mulsd (%rax), %xmm0 2036; X64-SSE-NEXT: movsd %xmm0, (%rax) 2037; X64-SSE-NEXT: retq 2038; 2039; X64-AVX-LABEL: fmul_64imm: 2040; X64-AVX: # %bb.0: 2041; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 2042; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [3.1415901184082031E+0,0.0E+0] 2043; X64-AVX-NEXT: vmulsd (%rax), %xmm0, %xmm0 2044; X64-AVX-NEXT: vmovsd %xmm0, (%rax) 2045; X64-AVX-NEXT: retq 2046 %i = load atomic i64, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8 2047 %f = bitcast i64 %i to double 2048 %mul = fmul double %f, 0x400921FA00000000 2049 %s = bitcast double %mul to i64 2050 store atomic i64 %s, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8 2051 ret void 2052} 2053 2054; Floating-point mul to a stack location. 2055define dso_local void @fmul_32stack() nounwind { 2056; X86-NOSSE-LABEL: fmul_32stack: 2057; X86-NOSSE: # %bb.0: 2058; X86-NOSSE-NEXT: subl $12, %esp 2059; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2060; X86-NOSSE-NEXT: movl %eax, (%esp) 2061; X86-NOSSE-NEXT: flds (%esp) 2062; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} 2063; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 2064; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2065; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 2066; X86-NOSSE-NEXT: addl $12, %esp 2067; X86-NOSSE-NEXT: retl 2068; 2069; X86-SSE1-LABEL: fmul_32stack: 2070; X86-SSE1: # %bb.0: 2071; X86-SSE1-NEXT: subl $12, %esp 2072; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 2073; X86-SSE1-NEXT: movl %eax, (%esp) 2074; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2075; X86-SSE1-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 2076; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 2077; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 2078; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp) 2079; X86-SSE1-NEXT: addl $12, %esp 2080; X86-SSE1-NEXT: retl 2081; 2082; X86-SSE2-LABEL: fmul_32stack: 2083; X86-SSE2: # %bb.0: 2084; X86-SSE2-NEXT: pushl %eax 2085; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] 2086; X86-SSE2-NEXT: mulss (%esp), %xmm0 2087; X86-SSE2-NEXT: movss %xmm0, (%esp) 2088; X86-SSE2-NEXT: popl %eax 2089; X86-SSE2-NEXT: retl 2090; 2091; X86-AVX-LABEL: fmul_32stack: 2092; X86-AVX: # %bb.0: 2093; X86-AVX-NEXT: pushl %eax 2094; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] 2095; X86-AVX-NEXT: vmulss (%esp), %xmm0, %xmm0 2096; X86-AVX-NEXT: vmovss %xmm0, (%esp) 2097; X86-AVX-NEXT: popl %eax 2098; X86-AVX-NEXT: retl 2099; 2100; X64-SSE-LABEL: fmul_32stack: 2101; X64-SSE: # %bb.0: 2102; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] 2103; X64-SSE-NEXT: mulss -{{[0-9]+}}(%rsp), %xmm0 2104; X64-SSE-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) 2105; X64-SSE-NEXT: retq 2106; 2107; X64-AVX-LABEL: fmul_32stack: 2108; X64-AVX: # %bb.0: 2109; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0] 2110; X64-AVX-NEXT: vmulss -{{[0-9]+}}(%rsp), %xmm0, %xmm0 2111; X64-AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) 2112; X64-AVX-NEXT: retq 2113 %ptr = alloca i32, align 4 2114 %load = load atomic i32, ptr %ptr acquire, align 4 2115 %bc0 = bitcast i32 %load to float 2116 %fmul = fmul float 0x400921FA00000000, %bc0 2117 %bc1 = bitcast float %fmul to i32 2118 store atomic i32 %bc1, ptr %ptr release, align 4 2119 ret void 2120} 2121 2122define dso_local void @fmul_64stack() nounwind { 2123; X86-NOSSE-LABEL: fmul_64stack: 2124; X86-NOSSE: # %bb.0: 2125; X86-NOSSE-NEXT: pushl %ebp 2126; X86-NOSSE-NEXT: movl %esp, %ebp 2127; X86-NOSSE-NEXT: andl $-8, %esp 2128; X86-NOSSE-NEXT: subl $40, %esp 2129; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp) 2130; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 2131; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2132; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 2133; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 2134; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 2135; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) 2136; X86-NOSSE-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} 2137; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 2138; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2139; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 2140; X86-NOSSE-NEXT: movl %eax, (%esp) 2141; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 2142; X86-NOSSE-NEXT: fildll (%esp) 2143; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 2144; X86-NOSSE-NEXT: movl %ebp, %esp 2145; X86-NOSSE-NEXT: popl %ebp 2146; X86-NOSSE-NEXT: retl 2147; 2148; X86-SSE1-LABEL: fmul_64stack: 2149; X86-SSE1: # %bb.0: 2150; X86-SSE1-NEXT: pushl %ebp 2151; X86-SSE1-NEXT: movl %esp, %ebp 2152; X86-SSE1-NEXT: andl $-8, %esp 2153; X86-SSE1-NEXT: subl $24, %esp 2154; X86-SSE1-NEXT: xorps %xmm0, %xmm0 2155; X86-SSE1-NEXT: xorps %xmm1, %xmm1 2156; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 2157; X86-SSE1-NEXT: movss %xmm1, (%esp) 2158; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 2159; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 2160; X86-SSE1-NEXT: fldl (%esp) 2161; X86-SSE1-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}} 2162; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 2163; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 2164; X86-SSE1-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 2165; X86-SSE1-NEXT: movl %ebp, %esp 2166; X86-SSE1-NEXT: popl %ebp 2167; X86-SSE1-NEXT: retl 2168; 2169; X86-SSE2-LABEL: fmul_64stack: 2170; X86-SSE2: # %bb.0: 2171; X86-SSE2-NEXT: pushl %ebp 2172; X86-SSE2-NEXT: movl %esp, %ebp 2173; X86-SSE2-NEXT: andl $-8, %esp 2174; X86-SSE2-NEXT: subl $16, %esp 2175; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2176; X86-SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 2177; X86-SSE2-NEXT: movsd %xmm0, (%esp) 2178; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2179; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 2180; X86-SSE2-NEXT: movl %ebp, %esp 2181; X86-SSE2-NEXT: popl %ebp 2182; X86-SSE2-NEXT: retl 2183; 2184; X86-AVX-LABEL: fmul_64stack: 2185; X86-AVX: # %bb.0: 2186; X86-AVX-NEXT: pushl %ebp 2187; X86-AVX-NEXT: movl %esp, %ebp 2188; X86-AVX-NEXT: andl $-8, %esp 2189; X86-AVX-NEXT: subl $16, %esp 2190; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2191; X86-AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 2192; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 2193; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2194; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 2195; X86-AVX-NEXT: movl %ebp, %esp 2196; X86-AVX-NEXT: popl %ebp 2197; X86-AVX-NEXT: retl 2198; 2199; X64-SSE-LABEL: fmul_64stack: 2200; X64-SSE: # %bb.0: 2201; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [3.1415901184082031E+0,0.0E+0] 2202; X64-SSE-NEXT: mulsd -{{[0-9]+}}(%rsp), %xmm0 2203; X64-SSE-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) 2204; X64-SSE-NEXT: retq 2205; 2206; X64-AVX-LABEL: fmul_64stack: 2207; X64-AVX: # %bb.0: 2208; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [3.1415901184082031E+0,0.0E+0] 2209; X64-AVX-NEXT: vmulsd -{{[0-9]+}}(%rsp), %xmm0, %xmm0 2210; X64-AVX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) 2211; X64-AVX-NEXT: retq 2212 %ptr = alloca i64, align 8 2213 %load = load atomic i64, ptr %ptr acquire, align 8 2214 %bc0 = bitcast i64 %load to double 2215 %fmul = fmul double 0x400921FA00000000, %bc0 2216 %bc1 = bitcast double %fmul to i64 2217 store atomic i64 %bc1, ptr %ptr release, align 8 2218 ret void 2219} 2220 2221define dso_local void @fmul_array(ptr %arg, double %arg1, i64 %arg2) nounwind { 2222; X86-NOSSE-LABEL: fmul_array: 2223; X86-NOSSE: # %bb.0: # %bb 2224; X86-NOSSE-NEXT: pushl %ebp 2225; X86-NOSSE-NEXT: movl %esp, %ebp 2226; X86-NOSSE-NEXT: pushl %esi 2227; X86-NOSSE-NEXT: andl $-8, %esp 2228; X86-NOSSE-NEXT: subl $40, %esp 2229; X86-NOSSE-NEXT: movl 20(%ebp), %eax 2230; X86-NOSSE-NEXT: movl 8(%ebp), %ecx 2231; X86-NOSSE-NEXT: fildll (%ecx,%eax,8) 2232; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 2233; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 2234; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi 2235; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) 2236; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 2237; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) 2238; X86-NOSSE-NEXT: fmull 12(%ebp) 2239; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 2240; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 2241; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi 2242; X86-NOSSE-NEXT: movl %edx, (%esp) 2243; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) 2244; X86-NOSSE-NEXT: fildll (%esp) 2245; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8) 2246; X86-NOSSE-NEXT: leal -4(%ebp), %esp 2247; X86-NOSSE-NEXT: popl %esi 2248; X86-NOSSE-NEXT: popl %ebp 2249; X86-NOSSE-NEXT: retl 2250; 2251; X86-SSE1-LABEL: fmul_array: 2252; X86-SSE1: # %bb.0: # %bb 2253; X86-SSE1-NEXT: pushl %ebp 2254; X86-SSE1-NEXT: movl %esp, %ebp 2255; X86-SSE1-NEXT: andl $-8, %esp 2256; X86-SSE1-NEXT: subl $16, %esp 2257; X86-SSE1-NEXT: movl 20(%ebp), %eax 2258; X86-SSE1-NEXT: movl 8(%ebp), %ecx 2259; X86-SSE1-NEXT: xorps %xmm0, %xmm0 2260; X86-SSE1-NEXT: xorps %xmm1, %xmm1 2261; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 2262; X86-SSE1-NEXT: movss %xmm1, (%esp) 2263; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 2264; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 2265; X86-SSE1-NEXT: fldl (%esp) 2266; X86-SSE1-NEXT: fmull 12(%ebp) 2267; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 2268; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 2269; X86-SSE1-NEXT: movlps %xmm0, (%ecx,%eax,8) 2270; X86-SSE1-NEXT: movl %ebp, %esp 2271; X86-SSE1-NEXT: popl %ebp 2272; X86-SSE1-NEXT: retl 2273; 2274; X86-SSE2-LABEL: fmul_array: 2275; X86-SSE2: # %bb.0: # %bb 2276; X86-SSE2-NEXT: pushl %ebp 2277; X86-SSE2-NEXT: movl %esp, %ebp 2278; X86-SSE2-NEXT: andl $-8, %esp 2279; X86-SSE2-NEXT: subl $8, %esp 2280; X86-SSE2-NEXT: movl 20(%ebp), %eax 2281; X86-SSE2-NEXT: movl 8(%ebp), %ecx 2282; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2283; X86-SSE2-NEXT: mulsd 12(%ebp), %xmm0 2284; X86-SSE2-NEXT: movsd %xmm0, (%esp) 2285; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2286; X86-SSE2-NEXT: movlps %xmm0, (%ecx,%eax,8) 2287; X86-SSE2-NEXT: movl %ebp, %esp 2288; X86-SSE2-NEXT: popl %ebp 2289; X86-SSE2-NEXT: retl 2290; 2291; X86-AVX-LABEL: fmul_array: 2292; X86-AVX: # %bb.0: # %bb 2293; X86-AVX-NEXT: pushl %ebp 2294; X86-AVX-NEXT: movl %esp, %ebp 2295; X86-AVX-NEXT: andl $-8, %esp 2296; X86-AVX-NEXT: subl $8, %esp 2297; X86-AVX-NEXT: movl 20(%ebp), %eax 2298; X86-AVX-NEXT: movl 8(%ebp), %ecx 2299; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2300; X86-AVX-NEXT: vmulsd 12(%ebp), %xmm0, %xmm0 2301; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 2302; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2303; X86-AVX-NEXT: vmovlps %xmm0, (%ecx,%eax,8) 2304; X86-AVX-NEXT: movl %ebp, %esp 2305; X86-AVX-NEXT: popl %ebp 2306; X86-AVX-NEXT: retl 2307; 2308; X64-SSE-LABEL: fmul_array: 2309; X64-SSE: # %bb.0: # %bb 2310; X64-SSE-NEXT: mulsd (%rdi,%rsi,8), %xmm0 2311; X64-SSE-NEXT: movsd %xmm0, (%rdi,%rsi,8) 2312; X64-SSE-NEXT: retq 2313; 2314; X64-AVX-LABEL: fmul_array: 2315; X64-AVX: # %bb.0: # %bb 2316; X64-AVX-NEXT: vmulsd (%rdi,%rsi,8), %xmm0, %xmm0 2317; X64-AVX-NEXT: vmovsd %xmm0, (%rdi,%rsi,8) 2318; X64-AVX-NEXT: retq 2319bb: 2320 %tmp4 = getelementptr inbounds i64, ptr %arg, i64 %arg2 2321 %tmp6 = load atomic i64, ptr %tmp4 monotonic, align 8 2322 %tmp7 = bitcast i64 %tmp6 to double 2323 %tmp8 = fmul double %tmp7, %arg1 2324 %tmp9 = bitcast double %tmp8 to i64 2325 store atomic i64 %tmp9, ptr %tmp4 monotonic, align 8 2326 ret void 2327} 2328 2329; ----- FDIV ----- 2330 2331define dso_local void @fdiv_32r(ptr %loc, float %val) nounwind { 2332; X86-NOSSE-LABEL: fdiv_32r: 2333; X86-NOSSE: # %bb.0: 2334; X86-NOSSE-NEXT: subl $8, %esp 2335; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2336; X86-NOSSE-NEXT: movl (%eax), %ecx 2337; X86-NOSSE-NEXT: movl %ecx, (%esp) 2338; X86-NOSSE-NEXT: flds (%esp) 2339; X86-NOSSE-NEXT: fdivs {{[0-9]+}}(%esp) 2340; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 2341; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 2342; X86-NOSSE-NEXT: movl %ecx, (%eax) 2343; X86-NOSSE-NEXT: addl $8, %esp 2344; X86-NOSSE-NEXT: retl 2345; 2346; X86-SSE1-LABEL: fdiv_32r: 2347; X86-SSE1: # %bb.0: 2348; X86-SSE1-NEXT: subl $8, %esp 2349; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 2350; X86-SSE1-NEXT: movl (%eax), %ecx 2351; X86-SSE1-NEXT: movl %ecx, (%esp) 2352; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2353; X86-SSE1-NEXT: divss {{[0-9]+}}(%esp), %xmm0 2354; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 2355; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx 2356; X86-SSE1-NEXT: movl %ecx, (%eax) 2357; X86-SSE1-NEXT: addl $8, %esp 2358; X86-SSE1-NEXT: retl 2359; 2360; X86-SSE2-LABEL: fdiv_32r: 2361; X86-SSE2: # %bb.0: 2362; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2363; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2364; X86-SSE2-NEXT: divss {{[0-9]+}}(%esp), %xmm0 2365; X86-SSE2-NEXT: movss %xmm0, (%eax) 2366; X86-SSE2-NEXT: retl 2367; 2368; X86-AVX-LABEL: fdiv_32r: 2369; X86-AVX: # %bb.0: 2370; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 2371; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2372; X86-AVX-NEXT: vdivss {{[0-9]+}}(%esp), %xmm0, %xmm0 2373; X86-AVX-NEXT: vmovss %xmm0, (%eax) 2374; X86-AVX-NEXT: retl 2375; 2376; X64-SSE-LABEL: fdiv_32r: 2377; X64-SSE: # %bb.0: 2378; X64-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 2379; X64-SSE-NEXT: divss %xmm0, %xmm1 2380; X64-SSE-NEXT: movss %xmm1, (%rdi) 2381; X64-SSE-NEXT: retq 2382; 2383; X64-AVX-LABEL: fdiv_32r: 2384; X64-AVX: # %bb.0: 2385; X64-AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 2386; X64-AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 2387; X64-AVX-NEXT: vmovss %xmm0, (%rdi) 2388; X64-AVX-NEXT: retq 2389 %1 = load atomic i32, ptr %loc seq_cst, align 4 2390 %2 = bitcast i32 %1 to float 2391 %div = fdiv float %2, %val 2392 %3 = bitcast float %div to i32 2393 store atomic i32 %3, ptr %loc release, align 4 2394 ret void 2395} 2396 2397define dso_local void @fdiv_64r(ptr %loc, double %val) nounwind { 2398; X86-NOSSE-LABEL: fdiv_64r: 2399; X86-NOSSE: # %bb.0: 2400; X86-NOSSE-NEXT: pushl %ebp 2401; X86-NOSSE-NEXT: movl %esp, %ebp 2402; X86-NOSSE-NEXT: andl $-8, %esp 2403; X86-NOSSE-NEXT: subl $32, %esp 2404; X86-NOSSE-NEXT: movl 8(%ebp), %eax 2405; X86-NOSSE-NEXT: fildll (%eax) 2406; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 2407; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 2408; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 2409; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 2410; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 2411; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) 2412; X86-NOSSE-NEXT: fdivl 12(%ebp) 2413; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 2414; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 2415; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 2416; X86-NOSSE-NEXT: movl %ecx, (%esp) 2417; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 2418; X86-NOSSE-NEXT: fildll (%esp) 2419; X86-NOSSE-NEXT: fistpll (%eax) 2420; X86-NOSSE-NEXT: movl %ebp, %esp 2421; X86-NOSSE-NEXT: popl %ebp 2422; X86-NOSSE-NEXT: retl 2423; 2424; X86-SSE1-LABEL: fdiv_64r: 2425; X86-SSE1: # %bb.0: 2426; X86-SSE1-NEXT: pushl %ebp 2427; X86-SSE1-NEXT: movl %esp, %ebp 2428; X86-SSE1-NEXT: andl $-8, %esp 2429; X86-SSE1-NEXT: subl $16, %esp 2430; X86-SSE1-NEXT: movl 8(%ebp), %eax 2431; X86-SSE1-NEXT: xorps %xmm0, %xmm0 2432; X86-SSE1-NEXT: xorps %xmm1, %xmm1 2433; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 2434; X86-SSE1-NEXT: movss %xmm1, (%esp) 2435; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 2436; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 2437; X86-SSE1-NEXT: fldl (%esp) 2438; X86-SSE1-NEXT: fdivl 12(%ebp) 2439; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 2440; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 2441; X86-SSE1-NEXT: movlps %xmm0, (%eax) 2442; X86-SSE1-NEXT: movl %ebp, %esp 2443; X86-SSE1-NEXT: popl %ebp 2444; X86-SSE1-NEXT: retl 2445; 2446; X86-SSE2-LABEL: fdiv_64r: 2447; X86-SSE2: # %bb.0: 2448; X86-SSE2-NEXT: pushl %ebp 2449; X86-SSE2-NEXT: movl %esp, %ebp 2450; X86-SSE2-NEXT: andl $-8, %esp 2451; X86-SSE2-NEXT: subl $8, %esp 2452; X86-SSE2-NEXT: movl 8(%ebp), %eax 2453; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2454; X86-SSE2-NEXT: divsd 12(%ebp), %xmm0 2455; X86-SSE2-NEXT: movsd %xmm0, (%esp) 2456; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2457; X86-SSE2-NEXT: movlps %xmm0, (%eax) 2458; X86-SSE2-NEXT: movl %ebp, %esp 2459; X86-SSE2-NEXT: popl %ebp 2460; X86-SSE2-NEXT: retl 2461; 2462; X86-AVX-LABEL: fdiv_64r: 2463; X86-AVX: # %bb.0: 2464; X86-AVX-NEXT: pushl %ebp 2465; X86-AVX-NEXT: movl %esp, %ebp 2466; X86-AVX-NEXT: andl $-8, %esp 2467; X86-AVX-NEXT: subl $8, %esp 2468; X86-AVX-NEXT: movl 8(%ebp), %eax 2469; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2470; X86-AVX-NEXT: vdivsd 12(%ebp), %xmm0, %xmm0 2471; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 2472; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2473; X86-AVX-NEXT: vmovlps %xmm0, (%eax) 2474; X86-AVX-NEXT: movl %ebp, %esp 2475; X86-AVX-NEXT: popl %ebp 2476; X86-AVX-NEXT: retl 2477; 2478; X64-SSE-LABEL: fdiv_64r: 2479; X64-SSE: # %bb.0: 2480; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 2481; X64-SSE-NEXT: divsd %xmm0, %xmm1 2482; X64-SSE-NEXT: movsd %xmm1, (%rdi) 2483; X64-SSE-NEXT: retq 2484; 2485; X64-AVX-LABEL: fdiv_64r: 2486; X64-AVX: # %bb.0: 2487; X64-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 2488; X64-AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 2489; X64-AVX-NEXT: vmovsd %xmm0, (%rdi) 2490; X64-AVX-NEXT: retq 2491 %1 = load atomic i64, ptr %loc seq_cst, align 8 2492 %2 = bitcast i64 %1 to double 2493 %div = fdiv double %2, %val 2494 %3 = bitcast double %div to i64 2495 store atomic i64 %3, ptr %loc release, align 8 2496 ret void 2497} 2498 2499; Floating-point div to a global using an immediate. 2500define dso_local void @fdiv_32g() nounwind { 2501; X86-NOSSE-LABEL: fdiv_32g: 2502; X86-NOSSE: # %bb.0: 2503; X86-NOSSE-NEXT: subl $8, %esp 2504; X86-NOSSE-NEXT: movl glob32, %eax 2505; X86-NOSSE-NEXT: movl %eax, (%esp) 2506; X86-NOSSE-NEXT: flds (%esp) 2507; X86-NOSSE-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}} 2508; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 2509; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2510; X86-NOSSE-NEXT: movl %eax, glob32 2511; X86-NOSSE-NEXT: addl $8, %esp 2512; X86-NOSSE-NEXT: retl 2513; 2514; X86-SSE1-LABEL: fdiv_32g: 2515; X86-SSE1: # %bb.0: 2516; X86-SSE1-NEXT: subl $8, %esp 2517; X86-SSE1-NEXT: movl glob32, %eax 2518; X86-SSE1-NEXT: movl %eax, (%esp) 2519; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2520; X86-SSE1-NEXT: divss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 2521; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 2522; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 2523; X86-SSE1-NEXT: movl %eax, glob32 2524; X86-SSE1-NEXT: addl $8, %esp 2525; X86-SSE1-NEXT: retl 2526; 2527; X86-SSE2-LABEL: fdiv_32g: 2528; X86-SSE2: # %bb.0: 2529; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2530; X86-SSE2-NEXT: divss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 2531; X86-SSE2-NEXT: movss %xmm0, glob32 2532; X86-SSE2-NEXT: retl 2533; 2534; X86-AVX-LABEL: fdiv_32g: 2535; X86-AVX: # %bb.0: 2536; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2537; X86-AVX-NEXT: vdivss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 2538; X86-AVX-NEXT: vmovss %xmm0, glob32 2539; X86-AVX-NEXT: retl 2540; 2541; X64-SSE-LABEL: fdiv_32g: 2542; X64-SSE: # %bb.0: 2543; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2544; X64-SSE-NEXT: divss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2545; X64-SSE-NEXT: movss %xmm0, glob32(%rip) 2546; X64-SSE-NEXT: retq 2547; 2548; X64-AVX-LABEL: fdiv_32g: 2549; X64-AVX: # %bb.0: 2550; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2551; X64-AVX-NEXT: vdivss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2552; X64-AVX-NEXT: vmovss %xmm0, glob32(%rip) 2553; X64-AVX-NEXT: retq 2554 %i = load atomic i32, ptr @glob32 monotonic, align 4 2555 %f = bitcast i32 %i to float 2556 %div = fdiv float %f, 0x400921FA00000000 2557 %s = bitcast float %div to i32 2558 store atomic i32 %s, ptr @glob32 monotonic, align 4 2559 ret void 2560} 2561 2562define dso_local void @fdiv_64g() nounwind { 2563; X86-NOSSE-LABEL: fdiv_64g: 2564; X86-NOSSE: # %bb.0: 2565; X86-NOSSE-NEXT: pushl %ebp 2566; X86-NOSSE-NEXT: movl %esp, %ebp 2567; X86-NOSSE-NEXT: andl $-8, %esp 2568; X86-NOSSE-NEXT: subl $32, %esp 2569; X86-NOSSE-NEXT: fildll glob64 2570; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 2571; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2572; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 2573; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 2574; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 2575; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) 2576; X86-NOSSE-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}} 2577; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 2578; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2579; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 2580; X86-NOSSE-NEXT: movl %eax, (%esp) 2581; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 2582; X86-NOSSE-NEXT: fildll (%esp) 2583; X86-NOSSE-NEXT: fistpll glob64 2584; X86-NOSSE-NEXT: movl %ebp, %esp 2585; X86-NOSSE-NEXT: popl %ebp 2586; X86-NOSSE-NEXT: retl 2587; 2588; X86-SSE1-LABEL: fdiv_64g: 2589; X86-SSE1: # %bb.0: 2590; X86-SSE1-NEXT: pushl %ebp 2591; X86-SSE1-NEXT: movl %esp, %ebp 2592; X86-SSE1-NEXT: andl $-8, %esp 2593; X86-SSE1-NEXT: subl $16, %esp 2594; X86-SSE1-NEXT: xorps %xmm0, %xmm0 2595; X86-SSE1-NEXT: xorps %xmm1, %xmm1 2596; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 2597; X86-SSE1-NEXT: movss %xmm1, (%esp) 2598; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 2599; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 2600; X86-SSE1-NEXT: fldl (%esp) 2601; X86-SSE1-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}} 2602; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 2603; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 2604; X86-SSE1-NEXT: movlps %xmm0, glob64 2605; X86-SSE1-NEXT: movl %ebp, %esp 2606; X86-SSE1-NEXT: popl %ebp 2607; X86-SSE1-NEXT: retl 2608; 2609; X86-SSE2-LABEL: fdiv_64g: 2610; X86-SSE2: # %bb.0: 2611; X86-SSE2-NEXT: pushl %ebp 2612; X86-SSE2-NEXT: movl %esp, %ebp 2613; X86-SSE2-NEXT: andl $-8, %esp 2614; X86-SSE2-NEXT: subl $8, %esp 2615; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2616; X86-SSE2-NEXT: divsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 2617; X86-SSE2-NEXT: movsd %xmm0, (%esp) 2618; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2619; X86-SSE2-NEXT: movlps %xmm0, glob64 2620; X86-SSE2-NEXT: movl %ebp, %esp 2621; X86-SSE2-NEXT: popl %ebp 2622; X86-SSE2-NEXT: retl 2623; 2624; X86-AVX-LABEL: fdiv_64g: 2625; X86-AVX: # %bb.0: 2626; X86-AVX-NEXT: pushl %ebp 2627; X86-AVX-NEXT: movl %esp, %ebp 2628; X86-AVX-NEXT: andl $-8, %esp 2629; X86-AVX-NEXT: subl $8, %esp 2630; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2631; X86-AVX-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 2632; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 2633; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2634; X86-AVX-NEXT: vmovlps %xmm0, glob64 2635; X86-AVX-NEXT: movl %ebp, %esp 2636; X86-AVX-NEXT: popl %ebp 2637; X86-AVX-NEXT: retl 2638; 2639; X64-SSE-LABEL: fdiv_64g: 2640; X64-SSE: # %bb.0: 2641; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2642; X64-SSE-NEXT: divsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2643; X64-SSE-NEXT: movsd %xmm0, glob64(%rip) 2644; X64-SSE-NEXT: retq 2645; 2646; X64-AVX-LABEL: fdiv_64g: 2647; X64-AVX: # %bb.0: 2648; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2649; X64-AVX-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2650; X64-AVX-NEXT: vmovsd %xmm0, glob64(%rip) 2651; X64-AVX-NEXT: retq 2652 %i = load atomic i64, ptr @glob64 monotonic, align 8 2653 %f = bitcast i64 %i to double 2654 %div = fdiv double %f, 0x400921FA00000000 2655 %s = bitcast double %div to i64 2656 store atomic i64 %s, ptr @glob64 monotonic, align 8 2657 ret void 2658} 2659 2660; Floating-point div to a hard-coded immediate location using an immediate. 2661define dso_local void @fdiv_32imm() nounwind { 2662; X86-NOSSE-LABEL: fdiv_32imm: 2663; X86-NOSSE: # %bb.0: 2664; X86-NOSSE-NEXT: subl $8, %esp 2665; X86-NOSSE-NEXT: movl -559038737, %eax 2666; X86-NOSSE-NEXT: movl %eax, (%esp) 2667; X86-NOSSE-NEXT: flds (%esp) 2668; X86-NOSSE-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}} 2669; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 2670; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2671; X86-NOSSE-NEXT: movl %eax, -559038737 2672; X86-NOSSE-NEXT: addl $8, %esp 2673; X86-NOSSE-NEXT: retl 2674; 2675; X86-SSE1-LABEL: fdiv_32imm: 2676; X86-SSE1: # %bb.0: 2677; X86-SSE1-NEXT: subl $8, %esp 2678; X86-SSE1-NEXT: movl -559038737, %eax 2679; X86-SSE1-NEXT: movl %eax, (%esp) 2680; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2681; X86-SSE1-NEXT: divss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 2682; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 2683; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 2684; X86-SSE1-NEXT: movl %eax, -559038737 2685; X86-SSE1-NEXT: addl $8, %esp 2686; X86-SSE1-NEXT: retl 2687; 2688; X86-SSE2-LABEL: fdiv_32imm: 2689; X86-SSE2: # %bb.0: 2690; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2691; X86-SSE2-NEXT: divss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 2692; X86-SSE2-NEXT: movss %xmm0, -559038737 2693; X86-SSE2-NEXT: retl 2694; 2695; X86-AVX-LABEL: fdiv_32imm: 2696; X86-AVX: # %bb.0: 2697; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2698; X86-AVX-NEXT: vdivss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 2699; X86-AVX-NEXT: vmovss %xmm0, -559038737 2700; X86-AVX-NEXT: retl 2701; 2702; X64-SSE-LABEL: fdiv_32imm: 2703; X64-SSE: # %bb.0: 2704; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 2705; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2706; X64-SSE-NEXT: divss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2707; X64-SSE-NEXT: movss %xmm0, (%rax) 2708; X64-SSE-NEXT: retq 2709; 2710; X64-AVX-LABEL: fdiv_32imm: 2711; X64-AVX: # %bb.0: 2712; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 2713; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2714; X64-AVX-NEXT: vdivss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2715; X64-AVX-NEXT: vmovss %xmm0, (%rax) 2716; X64-AVX-NEXT: retq 2717 %i = load atomic i32, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4 2718 %f = bitcast i32 %i to float 2719 %div = fdiv float %f, 0x400921FA00000000 2720 %s = bitcast float %div to i32 2721 store atomic i32 %s, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4 2722 ret void 2723} 2724 2725define dso_local void @fdiv_64imm() nounwind { 2726; X86-NOSSE-LABEL: fdiv_64imm: 2727; X86-NOSSE: # %bb.0: 2728; X86-NOSSE-NEXT: pushl %ebp 2729; X86-NOSSE-NEXT: movl %esp, %ebp 2730; X86-NOSSE-NEXT: andl $-8, %esp 2731; X86-NOSSE-NEXT: subl $32, %esp 2732; X86-NOSSE-NEXT: fildll -559038737 2733; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 2734; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2735; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 2736; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 2737; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 2738; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) 2739; X86-NOSSE-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}} 2740; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 2741; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2742; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 2743; X86-NOSSE-NEXT: movl %eax, (%esp) 2744; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 2745; X86-NOSSE-NEXT: fildll (%esp) 2746; X86-NOSSE-NEXT: fistpll -559038737 2747; X86-NOSSE-NEXT: movl %ebp, %esp 2748; X86-NOSSE-NEXT: popl %ebp 2749; X86-NOSSE-NEXT: retl 2750; 2751; X86-SSE1-LABEL: fdiv_64imm: 2752; X86-SSE1: # %bb.0: 2753; X86-SSE1-NEXT: pushl %ebp 2754; X86-SSE1-NEXT: movl %esp, %ebp 2755; X86-SSE1-NEXT: andl $-8, %esp 2756; X86-SSE1-NEXT: subl $16, %esp 2757; X86-SSE1-NEXT: xorps %xmm0, %xmm0 2758; X86-SSE1-NEXT: xorps %xmm1, %xmm1 2759; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 2760; X86-SSE1-NEXT: movss %xmm1, (%esp) 2761; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 2762; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 2763; X86-SSE1-NEXT: fldl (%esp) 2764; X86-SSE1-NEXT: fdivs {{\.?LCPI[0-9]+_[0-9]+}} 2765; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 2766; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 2767; X86-SSE1-NEXT: movlps %xmm0, -559038737 2768; X86-SSE1-NEXT: movl %ebp, %esp 2769; X86-SSE1-NEXT: popl %ebp 2770; X86-SSE1-NEXT: retl 2771; 2772; X86-SSE2-LABEL: fdiv_64imm: 2773; X86-SSE2: # %bb.0: 2774; X86-SSE2-NEXT: pushl %ebp 2775; X86-SSE2-NEXT: movl %esp, %ebp 2776; X86-SSE2-NEXT: andl $-8, %esp 2777; X86-SSE2-NEXT: subl $8, %esp 2778; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2779; X86-SSE2-NEXT: divsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 2780; X86-SSE2-NEXT: movsd %xmm0, (%esp) 2781; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2782; X86-SSE2-NEXT: movlps %xmm0, -559038737 2783; X86-SSE2-NEXT: movl %ebp, %esp 2784; X86-SSE2-NEXT: popl %ebp 2785; X86-SSE2-NEXT: retl 2786; 2787; X86-AVX-LABEL: fdiv_64imm: 2788; X86-AVX: # %bb.0: 2789; X86-AVX-NEXT: pushl %ebp 2790; X86-AVX-NEXT: movl %esp, %ebp 2791; X86-AVX-NEXT: andl $-8, %esp 2792; X86-AVX-NEXT: subl $8, %esp 2793; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2794; X86-AVX-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 2795; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 2796; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2797; X86-AVX-NEXT: vmovlps %xmm0, -559038737 2798; X86-AVX-NEXT: movl %ebp, %esp 2799; X86-AVX-NEXT: popl %ebp 2800; X86-AVX-NEXT: retl 2801; 2802; X64-SSE-LABEL: fdiv_64imm: 2803; X64-SSE: # %bb.0: 2804; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 2805; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2806; X64-SSE-NEXT: divsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2807; X64-SSE-NEXT: movsd %xmm0, (%rax) 2808; X64-SSE-NEXT: retq 2809; 2810; X64-AVX-LABEL: fdiv_64imm: 2811; X64-AVX: # %bb.0: 2812; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 2813; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2814; X64-AVX-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2815; X64-AVX-NEXT: vmovsd %xmm0, (%rax) 2816; X64-AVX-NEXT: retq 2817 %i = load atomic i64, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8 2818 %f = bitcast i64 %i to double 2819 %div = fdiv double %f, 0x400921FA00000000 2820 %s = bitcast double %div to i64 2821 store atomic i64 %s, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8 2822 ret void 2823} 2824 2825; Floating-point div to a stack location. 2826define dso_local void @fdiv_32stack() nounwind { 2827; X86-NOSSE-LABEL: fdiv_32stack: 2828; X86-NOSSE: # %bb.0: 2829; X86-NOSSE-NEXT: subl $12, %esp 2830; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2831; X86-NOSSE-NEXT: movl %eax, (%esp) 2832; X86-NOSSE-NEXT: fld1 2833; X86-NOSSE-NEXT: fdivs (%esp) 2834; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 2835; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2836; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 2837; X86-NOSSE-NEXT: addl $12, %esp 2838; X86-NOSSE-NEXT: retl 2839; 2840; X86-SSE1-LABEL: fdiv_32stack: 2841; X86-SSE1: # %bb.0: 2842; X86-SSE1-NEXT: subl $12, %esp 2843; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 2844; X86-SSE1-NEXT: movl %eax, (%esp) 2845; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 2846; X86-SSE1-NEXT: divss (%esp), %xmm0 2847; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 2848; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 2849; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp) 2850; X86-SSE1-NEXT: addl $12, %esp 2851; X86-SSE1-NEXT: retl 2852; 2853; X86-SSE2-LABEL: fdiv_32stack: 2854; X86-SSE2: # %bb.0: 2855; X86-SSE2-NEXT: pushl %eax 2856; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 2857; X86-SSE2-NEXT: divss (%esp), %xmm0 2858; X86-SSE2-NEXT: movss %xmm0, (%esp) 2859; X86-SSE2-NEXT: popl %eax 2860; X86-SSE2-NEXT: retl 2861; 2862; X86-AVX-LABEL: fdiv_32stack: 2863; X86-AVX: # %bb.0: 2864; X86-AVX-NEXT: pushl %eax 2865; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 2866; X86-AVX-NEXT: vdivss (%esp), %xmm0, %xmm0 2867; X86-AVX-NEXT: vmovss %xmm0, (%esp) 2868; X86-AVX-NEXT: popl %eax 2869; X86-AVX-NEXT: retl 2870; 2871; X64-SSE-LABEL: fdiv_32stack: 2872; X64-SSE: # %bb.0: 2873; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 2874; X64-SSE-NEXT: divss -{{[0-9]+}}(%rsp), %xmm0 2875; X64-SSE-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) 2876; X64-SSE-NEXT: retq 2877; 2878; X64-AVX-LABEL: fdiv_32stack: 2879; X64-AVX: # %bb.0: 2880; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 2881; X64-AVX-NEXT: vdivss -{{[0-9]+}}(%rsp), %xmm0, %xmm0 2882; X64-AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) 2883; X64-AVX-NEXT: retq 2884 %ptr = alloca i32, align 4 2885 %load = load atomic i32, ptr %ptr acquire, align 4 2886 %bc0 = bitcast i32 %load to float 2887 %fdiv = fdiv float 1.000000e+00, %bc0 2888 %bc1 = bitcast float %fdiv to i32 2889 store atomic i32 %bc1, ptr %ptr release, align 4 2890 ret void 2891} 2892 2893define dso_local void @fdiv_64stack() nounwind { 2894; X86-NOSSE-LABEL: fdiv_64stack: 2895; X86-NOSSE: # %bb.0: 2896; X86-NOSSE-NEXT: pushl %ebp 2897; X86-NOSSE-NEXT: movl %esp, %ebp 2898; X86-NOSSE-NEXT: andl $-8, %esp 2899; X86-NOSSE-NEXT: subl $40, %esp 2900; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp) 2901; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 2902; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2903; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 2904; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 2905; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 2906; X86-NOSSE-NEXT: fld1 2907; X86-NOSSE-NEXT: fdivl {{[0-9]+}}(%esp) 2908; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 2909; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2910; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 2911; X86-NOSSE-NEXT: movl %eax, (%esp) 2912; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 2913; X86-NOSSE-NEXT: fildll (%esp) 2914; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 2915; X86-NOSSE-NEXT: movl %ebp, %esp 2916; X86-NOSSE-NEXT: popl %ebp 2917; X86-NOSSE-NEXT: retl 2918; 2919; X86-SSE1-LABEL: fdiv_64stack: 2920; X86-SSE1: # %bb.0: 2921; X86-SSE1-NEXT: pushl %ebp 2922; X86-SSE1-NEXT: movl %esp, %ebp 2923; X86-SSE1-NEXT: andl $-8, %esp 2924; X86-SSE1-NEXT: subl $24, %esp 2925; X86-SSE1-NEXT: xorps %xmm0, %xmm0 2926; X86-SSE1-NEXT: xorps %xmm1, %xmm1 2927; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 2928; X86-SSE1-NEXT: movss %xmm1, (%esp) 2929; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 2930; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 2931; X86-SSE1-NEXT: fld1 2932; X86-SSE1-NEXT: fdivl (%esp) 2933; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 2934; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 2935; X86-SSE1-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 2936; X86-SSE1-NEXT: movl %ebp, %esp 2937; X86-SSE1-NEXT: popl %ebp 2938; X86-SSE1-NEXT: retl 2939; 2940; X86-SSE2-LABEL: fdiv_64stack: 2941; X86-SSE2: # %bb.0: 2942; X86-SSE2-NEXT: pushl %ebp 2943; X86-SSE2-NEXT: movl %esp, %ebp 2944; X86-SSE2-NEXT: andl $-8, %esp 2945; X86-SSE2-NEXT: subl $16, %esp 2946; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2947; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0] 2948; X86-SSE2-NEXT: divsd %xmm0, %xmm1 2949; X86-SSE2-NEXT: movsd %xmm1, (%esp) 2950; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2951; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 2952; X86-SSE2-NEXT: movl %ebp, %esp 2953; X86-SSE2-NEXT: popl %ebp 2954; X86-SSE2-NEXT: retl 2955; 2956; X86-AVX-LABEL: fdiv_64stack: 2957; X86-AVX: # %bb.0: 2958; X86-AVX-NEXT: pushl %ebp 2959; X86-AVX-NEXT: movl %esp, %ebp 2960; X86-AVX-NEXT: andl $-8, %esp 2961; X86-AVX-NEXT: subl $16, %esp 2962; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2963; X86-AVX-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0] 2964; X86-AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 2965; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 2966; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2967; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 2968; X86-AVX-NEXT: movl %ebp, %esp 2969; X86-AVX-NEXT: popl %ebp 2970; X86-AVX-NEXT: retl 2971; 2972; X64-SSE-LABEL: fdiv_64stack: 2973; X64-SSE: # %bb.0: 2974; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] 2975; X64-SSE-NEXT: divsd -{{[0-9]+}}(%rsp), %xmm0 2976; X64-SSE-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) 2977; X64-SSE-NEXT: retq 2978; 2979; X64-AVX-LABEL: fdiv_64stack: 2980; X64-AVX: # %bb.0: 2981; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] 2982; X64-AVX-NEXT: vdivsd -{{[0-9]+}}(%rsp), %xmm0, %xmm0 2983; X64-AVX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) 2984; X64-AVX-NEXT: retq 2985 %ptr = alloca i64, align 8 2986 %load = load atomic i64, ptr %ptr acquire, align 8 2987 %bc0 = bitcast i64 %load to double 2988 %fdiv = fdiv double 1.000000e+00, %bc0 2989 %bc1 = bitcast double %fdiv to i64 2990 store atomic i64 %bc1, ptr %ptr release, align 8 2991 ret void 2992} 2993 2994define dso_local void @fdiv_array(ptr %arg, double %arg1, i64 %arg2) nounwind { 2995; X86-NOSSE-LABEL: fdiv_array: 2996; X86-NOSSE: # %bb.0: # %bb 2997; X86-NOSSE-NEXT: pushl %ebp 2998; X86-NOSSE-NEXT: movl %esp, %ebp 2999; X86-NOSSE-NEXT: pushl %esi 3000; X86-NOSSE-NEXT: andl $-8, %esp 3001; X86-NOSSE-NEXT: subl $40, %esp 3002; X86-NOSSE-NEXT: movl 20(%ebp), %eax 3003; X86-NOSSE-NEXT: movl 8(%ebp), %ecx 3004; X86-NOSSE-NEXT: fildll (%ecx,%eax,8) 3005; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 3006; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 3007; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi 3008; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) 3009; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 3010; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) 3011; X86-NOSSE-NEXT: fdivl 12(%ebp) 3012; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 3013; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 3014; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi 3015; X86-NOSSE-NEXT: movl %edx, (%esp) 3016; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) 3017; X86-NOSSE-NEXT: fildll (%esp) 3018; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8) 3019; X86-NOSSE-NEXT: leal -4(%ebp), %esp 3020; X86-NOSSE-NEXT: popl %esi 3021; X86-NOSSE-NEXT: popl %ebp 3022; X86-NOSSE-NEXT: retl 3023; 3024; X86-SSE1-LABEL: fdiv_array: 3025; X86-SSE1: # %bb.0: # %bb 3026; X86-SSE1-NEXT: pushl %ebp 3027; X86-SSE1-NEXT: movl %esp, %ebp 3028; X86-SSE1-NEXT: andl $-8, %esp 3029; X86-SSE1-NEXT: subl $16, %esp 3030; X86-SSE1-NEXT: movl 20(%ebp), %eax 3031; X86-SSE1-NEXT: movl 8(%ebp), %ecx 3032; X86-SSE1-NEXT: xorps %xmm0, %xmm0 3033; X86-SSE1-NEXT: xorps %xmm1, %xmm1 3034; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 3035; X86-SSE1-NEXT: movss %xmm1, (%esp) 3036; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 3037; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 3038; X86-SSE1-NEXT: fldl (%esp) 3039; X86-SSE1-NEXT: fdivl 12(%ebp) 3040; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 3041; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 3042; X86-SSE1-NEXT: movlps %xmm0, (%ecx,%eax,8) 3043; X86-SSE1-NEXT: movl %ebp, %esp 3044; X86-SSE1-NEXT: popl %ebp 3045; X86-SSE1-NEXT: retl 3046; 3047; X86-SSE2-LABEL: fdiv_array: 3048; X86-SSE2: # %bb.0: # %bb 3049; X86-SSE2-NEXT: pushl %ebp 3050; X86-SSE2-NEXT: movl %esp, %ebp 3051; X86-SSE2-NEXT: andl $-8, %esp 3052; X86-SSE2-NEXT: subl $8, %esp 3053; X86-SSE2-NEXT: movl 20(%ebp), %eax 3054; X86-SSE2-NEXT: movl 8(%ebp), %ecx 3055; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3056; X86-SSE2-NEXT: divsd 12(%ebp), %xmm0 3057; X86-SSE2-NEXT: movsd %xmm0, (%esp) 3058; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3059; X86-SSE2-NEXT: movlps %xmm0, (%ecx,%eax,8) 3060; X86-SSE2-NEXT: movl %ebp, %esp 3061; X86-SSE2-NEXT: popl %ebp 3062; X86-SSE2-NEXT: retl 3063; 3064; X86-AVX-LABEL: fdiv_array: 3065; X86-AVX: # %bb.0: # %bb 3066; X86-AVX-NEXT: pushl %ebp 3067; X86-AVX-NEXT: movl %esp, %ebp 3068; X86-AVX-NEXT: andl $-8, %esp 3069; X86-AVX-NEXT: subl $8, %esp 3070; X86-AVX-NEXT: movl 20(%ebp), %eax 3071; X86-AVX-NEXT: movl 8(%ebp), %ecx 3072; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3073; X86-AVX-NEXT: vdivsd 12(%ebp), %xmm0, %xmm0 3074; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 3075; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3076; X86-AVX-NEXT: vmovlps %xmm0, (%ecx,%eax,8) 3077; X86-AVX-NEXT: movl %ebp, %esp 3078; X86-AVX-NEXT: popl %ebp 3079; X86-AVX-NEXT: retl 3080; 3081; X64-SSE-LABEL: fdiv_array: 3082; X64-SSE: # %bb.0: # %bb 3083; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 3084; X64-SSE-NEXT: divsd %xmm0, %xmm1 3085; X64-SSE-NEXT: movsd %xmm1, (%rdi,%rsi,8) 3086; X64-SSE-NEXT: retq 3087; 3088; X64-AVX-LABEL: fdiv_array: 3089; X64-AVX: # %bb.0: # %bb 3090; X64-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 3091; X64-AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 3092; X64-AVX-NEXT: vmovsd %xmm0, (%rdi,%rsi,8) 3093; X64-AVX-NEXT: retq 3094bb: 3095 %tmp4 = getelementptr inbounds i64, ptr %arg, i64 %arg2 3096 %tmp6 = load atomic i64, ptr %tmp4 monotonic, align 8 3097 %tmp7 = bitcast i64 %tmp6 to double 3098 %tmp8 = fdiv double %tmp7, %arg1 3099 %tmp9 = bitcast double %tmp8 to i64 3100 store atomic i64 %tmp9, ptr %tmp4 monotonic, align 8 3101 ret void 3102} 3103