1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i386-apple-darwin10 -mattr=+avx,+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=FMA32 3; RUN: llc < %s -mtriple=i386-apple-darwin10 -mattr=+avx,-fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=FMACALL32 4; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=FMA64 5; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=-fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=FMACALL64 6; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+avx512f,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=AVX512 7; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+avx512vl,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=AVX512VL 8; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=bdver2 -mattr=-fma4 -show-mc-encoding | FileCheck %s --check-prefix=FMA32 9; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=bdver2 -mattr=-fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefixes=FMACALL32,FMACALL32_BDVER2 10 11define float @test_f32(float %a, float %b, float %c) #0 { 12; FMA32-LABEL: test_f32: 13; FMA32: ## %bb.0: 14; FMA32-NEXT: pushl %eax ## encoding: [0x50] 15; FMA32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 16; FMA32-NEXT: ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08] 17; FMA32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 18; FMA32-NEXT: ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 19; FMA32-NEXT: vfmadd213ss {{[0-9]+}}(%esp), %xmm0, %xmm1 ## encoding: [0xc4,0xe2,0x79,0xa9,0x4c,0x24,0x10] 20; FMA32-NEXT: ## xmm1 = (xmm0 * xmm1) + mem 21; FMA32-NEXT: vmovss %xmm1, (%esp) ## encoding: [0xc5,0xfa,0x11,0x0c,0x24] 22; FMA32-NEXT: flds (%esp) ## encoding: [0xd9,0x04,0x24] 23; FMA32-NEXT: popl %eax ## encoding: [0x58] 24; FMA32-NEXT: retl ## encoding: [0xc3] 25; 26; FMACALL32-LABEL: test_f32: 27; FMACALL32: ## %bb.0: 28; FMACALL32-NEXT: jmp _fmaf ## TAILCALL 29; FMACALL32-NEXT: ## encoding: [0xeb,A] 30; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-1, kind: FK_PCRel_1 31; 32; FMA64-LABEL: test_f32: 33; FMA64: ## %bb.0: 34; FMA64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa9,0xc2] 35; FMA64-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 36; FMA64-NEXT: retq ## encoding: [0xc3] 37; 38; FMACALL64-LABEL: test_f32: 39; FMACALL64: ## %bb.0: 40; FMACALL64-NEXT: jmp _fmaf ## TAILCALL 41; FMACALL64-NEXT: ## encoding: [0xeb,A] 42; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-1, kind: FK_PCRel_1 43; 44; AVX512-LABEL: test_f32: 45; AVX512: ## %bb.0: 46; AVX512-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2] 47; AVX512-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 48; AVX512-NEXT: retq ## encoding: [0xc3] 49; 50; AVX512VL-LABEL: test_f32: 51; AVX512VL: ## %bb.0: 52; AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2] 53; AVX512VL-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 54; AVX512VL-NEXT: retq ## encoding: [0xc3] 55 %call = call float @llvm.fma.f32(float %a, float %b, float %c) 56 ret float %call 57} 58 59define float @test_f32_reassoc(float %a, float %b, float %c) #0 { 60; FMA32-LABEL: test_f32_reassoc: 61; FMA32: ## %bb.0: 62; FMA32-NEXT: pushl %eax ## encoding: [0x50] 63; FMA32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 64; FMA32-NEXT: ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08] 65; FMA32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 66; FMA32-NEXT: ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 67; FMA32-NEXT: vfmadd213ss {{[0-9]+}}(%esp), %xmm0, %xmm1 ## encoding: [0xc4,0xe2,0x79,0xa9,0x4c,0x24,0x10] 68; FMA32-NEXT: ## xmm1 = (xmm0 * xmm1) + mem 69; FMA32-NEXT: vmovss %xmm1, (%esp) ## encoding: [0xc5,0xfa,0x11,0x0c,0x24] 70; FMA32-NEXT: flds (%esp) ## encoding: [0xd9,0x04,0x24] 71; FMA32-NEXT: popl %eax ## encoding: [0x58] 72; FMA32-NEXT: retl ## encoding: [0xc3] 73; 74; FMACALL32-LABEL: test_f32_reassoc: 75; FMACALL32: ## %bb.0: 76; FMACALL32-NEXT: pushl %eax ## encoding: [0x50] 77; FMACALL32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 78; FMACALL32-NEXT: ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08] 79; FMACALL32-NEXT: vmulss {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x59,0x44,0x24,0x0c] 80; FMACALL32-NEXT: vaddss {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x58,0x44,0x24,0x10] 81; FMACALL32-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] 82; FMACALL32-NEXT: flds (%esp) ## encoding: [0xd9,0x04,0x24] 83; FMACALL32-NEXT: popl %eax ## encoding: [0x58] 84; FMACALL32-NEXT: retl ## encoding: [0xc3] 85; 86; FMA64-LABEL: test_f32_reassoc: 87; FMA64: ## %bb.0: 88; FMA64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa9,0xc2] 89; FMA64-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 90; FMA64-NEXT: retq ## encoding: [0xc3] 91; 92; FMACALL64-LABEL: test_f32_reassoc: 93; FMACALL64: ## %bb.0: 94; FMACALL64-NEXT: mulss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x59,0xc1] 95; FMACALL64-NEXT: addss %xmm2, %xmm0 ## encoding: [0xf3,0x0f,0x58,0xc2] 96; FMACALL64-NEXT: retq ## encoding: [0xc3] 97; 98; AVX512-LABEL: test_f32_reassoc: 99; AVX512: ## %bb.0: 100; AVX512-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2] 101; AVX512-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 102; AVX512-NEXT: retq ## encoding: [0xc3] 103; 104; AVX512VL-LABEL: test_f32_reassoc: 105; AVX512VL: ## %bb.0: 106; AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2] 107; AVX512VL-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 108; AVX512VL-NEXT: retq ## encoding: [0xc3] 109 %call = call reassoc float @llvm.fma.f32(float %a, float %b, float %c) 110 ret float %call 111} 112 113define double @test_f64(double %a, double %b, double %c) #0 { 114; FMA32-LABEL: test_f64: 115; FMA32: ## %bb.0: ## %entry 116; FMA32-NEXT: subl $12, %esp ## encoding: [0x83,0xec,0x0c] 117; FMA32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 118; FMA32-NEXT: ## encoding: [0xc5,0xfb,0x10,0x44,0x24,0x10] 119; FMA32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 120; FMA32-NEXT: ## encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x18] 121; FMA32-NEXT: vfmadd213sd {{[0-9]+}}(%esp), %xmm0, %xmm1 ## encoding: [0xc4,0xe2,0xf9,0xa9,0x4c,0x24,0x20] 122; FMA32-NEXT: ## xmm1 = (xmm0 * xmm1) + mem 123; FMA32-NEXT: vmovsd %xmm1, (%esp) ## encoding: [0xc5,0xfb,0x11,0x0c,0x24] 124; FMA32-NEXT: fldl (%esp) ## encoding: [0xdd,0x04,0x24] 125; FMA32-NEXT: addl $12, %esp ## encoding: [0x83,0xc4,0x0c] 126; FMA32-NEXT: retl ## encoding: [0xc3] 127; 128; FMACALL32-LABEL: test_f64: 129; FMACALL32: ## %bb.0: ## %entry 130; FMACALL32-NEXT: jmp _fma ## TAILCALL 131; FMACALL32-NEXT: ## encoding: [0xeb,A] 132; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fma-1, kind: FK_PCRel_1 133; 134; FMA64-LABEL: test_f64: 135; FMA64: ## %bb.0: ## %entry 136; FMA64-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa9,0xc2] 137; FMA64-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 138; FMA64-NEXT: retq ## encoding: [0xc3] 139; 140; FMACALL64-LABEL: test_f64: 141; FMACALL64: ## %bb.0: ## %entry 142; FMACALL64-NEXT: jmp _fma ## TAILCALL 143; FMACALL64-NEXT: ## encoding: [0xeb,A] 144; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-1, kind: FK_PCRel_1 145; 146; AVX512-LABEL: test_f64: 147; AVX512: ## %bb.0: ## %entry 148; AVX512-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xc2] 149; AVX512-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 150; AVX512-NEXT: retq ## encoding: [0xc3] 151; 152; AVX512VL-LABEL: test_f64: 153; AVX512VL: ## %bb.0: ## %entry 154; AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xc2] 155; AVX512VL-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 156; AVX512VL-NEXT: retq ## encoding: [0xc3] 157entry: 158 %call = call double @llvm.fma.f64(double %a, double %b, double %c) 159 ret double %call 160} 161 162define x86_fp80 @test_f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c) #0 { 163; FMA32-LABEL: test_f80: 164; FMA32: ## %bb.0: ## %entry 165; FMA32-NEXT: subl $60, %esp ## encoding: [0x83,0xec,0x3c] 166; FMA32-NEXT: fldt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x6c,0x24,0x40] 167; FMA32-NEXT: fldt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x6c,0x24,0x50] 168; FMA32-NEXT: fldt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x6c,0x24,0x60] 169; FMA32-NEXT: fstpt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x7c,0x24,0x20] 170; FMA32-NEXT: fstpt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x7c,0x24,0x10] 171; FMA32-NEXT: fstpt (%esp) ## encoding: [0xdb,0x3c,0x24] 172; FMA32-NEXT: calll _fmal ## encoding: [0xe8,A,A,A,A] 173; FMA32-NEXT: ## fixup A - offset: 1, value: _fmal-4, kind: FK_PCRel_4 174; FMA32-NEXT: addl $60, %esp ## encoding: [0x83,0xc4,0x3c] 175; FMA32-NEXT: retl ## encoding: [0xc3] 176; 177; FMACALL32-LABEL: test_f80: 178; FMACALL32: ## %bb.0: ## %entry 179; FMACALL32-NEXT: subl $60, %esp ## encoding: [0x83,0xec,0x3c] 180; FMACALL32-NEXT: fldt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x6c,0x24,0x40] 181; FMACALL32-NEXT: fldt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x6c,0x24,0x50] 182; FMACALL32-NEXT: fldt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x6c,0x24,0x60] 183; FMACALL32-NEXT: fstpt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x7c,0x24,0x20] 184; FMACALL32-NEXT: fstpt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x7c,0x24,0x10] 185; FMACALL32-NEXT: fstpt (%esp) ## encoding: [0xdb,0x3c,0x24] 186; FMACALL32-NEXT: calll _fmal ## encoding: [0xe8,A,A,A,A] 187; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmal-4, kind: FK_PCRel_4 188; FMACALL32-NEXT: addl $60, %esp ## encoding: [0x83,0xc4,0x3c] 189; FMACALL32-NEXT: retl ## encoding: [0xc3] 190; 191; FMA64-LABEL: test_f80: 192; FMA64: ## %bb.0: ## %entry 193; FMA64-NEXT: subq $56, %rsp ## encoding: [0x48,0x83,0xec,0x38] 194; FMA64-NEXT: fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x40] 195; FMA64-NEXT: fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x50] 196; FMA64-NEXT: fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x60] 197; FMA64-NEXT: fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x20] 198; FMA64-NEXT: fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x10] 199; FMA64-NEXT: fstpt (%rsp) ## encoding: [0xdb,0x3c,0x24] 200; FMA64-NEXT: callq _fmal ## encoding: [0xe8,A,A,A,A] 201; FMA64-NEXT: ## fixup A - offset: 1, value: _fmal-4, kind: reloc_branch_4byte_pcrel 202; FMA64-NEXT: addq $56, %rsp ## encoding: [0x48,0x83,0xc4,0x38] 203; FMA64-NEXT: retq ## encoding: [0xc3] 204; 205; FMACALL64-LABEL: test_f80: 206; FMACALL64: ## %bb.0: ## %entry 207; FMACALL64-NEXT: subq $56, %rsp ## encoding: [0x48,0x83,0xec,0x38] 208; FMACALL64-NEXT: fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x40] 209; FMACALL64-NEXT: fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x50] 210; FMACALL64-NEXT: fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x60] 211; FMACALL64-NEXT: fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x20] 212; FMACALL64-NEXT: fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x10] 213; FMACALL64-NEXT: fstpt (%rsp) ## encoding: [0xdb,0x3c,0x24] 214; FMACALL64-NEXT: callq _fmal ## encoding: [0xe8,A,A,A,A] 215; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmal-4, kind: reloc_branch_4byte_pcrel 216; FMACALL64-NEXT: addq $56, %rsp ## encoding: [0x48,0x83,0xc4,0x38] 217; FMACALL64-NEXT: retq ## encoding: [0xc3] 218; 219; AVX512-LABEL: test_f80: 220; AVX512: ## %bb.0: ## %entry 221; AVX512-NEXT: subq $56, %rsp ## encoding: [0x48,0x83,0xec,0x38] 222; AVX512-NEXT: fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x40] 223; AVX512-NEXT: fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x50] 224; AVX512-NEXT: fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x60] 225; AVX512-NEXT: fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x20] 226; AVX512-NEXT: fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x10] 227; AVX512-NEXT: fstpt (%rsp) ## encoding: [0xdb,0x3c,0x24] 228; AVX512-NEXT: callq _fmal ## encoding: [0xe8,A,A,A,A] 229; AVX512-NEXT: ## fixup A - offset: 1, value: _fmal-4, kind: reloc_branch_4byte_pcrel 230; AVX512-NEXT: addq $56, %rsp ## encoding: [0x48,0x83,0xc4,0x38] 231; AVX512-NEXT: retq ## encoding: [0xc3] 232; 233; AVX512VL-LABEL: test_f80: 234; AVX512VL: ## %bb.0: ## %entry 235; AVX512VL-NEXT: subq $56, %rsp ## encoding: [0x48,0x83,0xec,0x38] 236; AVX512VL-NEXT: fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x40] 237; AVX512VL-NEXT: fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x50] 238; AVX512VL-NEXT: fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x60] 239; AVX512VL-NEXT: fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x20] 240; AVX512VL-NEXT: fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x10] 241; AVX512VL-NEXT: fstpt (%rsp) ## encoding: [0xdb,0x3c,0x24] 242; AVX512VL-NEXT: callq _fmal ## encoding: [0xe8,A,A,A,A] 243; AVX512VL-NEXT: ## fixup A - offset: 1, value: _fmal-4, kind: reloc_branch_4byte_pcrel 244; AVX512VL-NEXT: addq $56, %rsp ## encoding: [0x48,0x83,0xc4,0x38] 245; AVX512VL-NEXT: retq ## encoding: [0xc3] 246entry: 247 %call = call x86_fp80 @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c) 248 ret x86_fp80 %call 249} 250 251define float @test_f32_cst() #0 { 252; FMA32-LABEL: test_f32_cst: 253; FMA32: ## %bb.0: ## %entry 254; FMA32-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} ## encoding: [0xd9,0x05,A,A,A,A] 255; FMA32-NEXT: ## fixup A - offset: 2, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 256; FMA32-NEXT: retl ## encoding: [0xc3] 257; 258; FMACALL32-LABEL: test_f32_cst: 259; FMACALL32: ## %bb.0: ## %entry 260; FMACALL32-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} ## encoding: [0xd9,0x05,A,A,A,A] 261; FMACALL32-NEXT: ## fixup A - offset: 2, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 262; FMACALL32-NEXT: retl ## encoding: [0xc3] 263; 264; FMA64-LABEL: test_f32_cst: 265; FMA64: ## %bb.0: ## %entry 266; FMA64-NEXT: vmovss {{.*#+}} xmm0 = [1.2E+1,0.0E+0,0.0E+0,0.0E+0] 267; FMA64-NEXT: ## encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A] 268; FMA64-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 269; FMA64-NEXT: retq ## encoding: [0xc3] 270; 271; FMACALL64-LABEL: test_f32_cst: 272; FMACALL64: ## %bb.0: ## %entry 273; FMACALL64-NEXT: movss {{.*#+}} xmm0 = [1.2E+1,0.0E+0,0.0E+0,0.0E+0] 274; FMACALL64-NEXT: ## encoding: [0xf3,0x0f,0x10,0x05,A,A,A,A] 275; FMACALL64-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 276; FMACALL64-NEXT: retq ## encoding: [0xc3] 277; 278; AVX512-LABEL: test_f32_cst: 279; AVX512: ## %bb.0: ## %entry 280; AVX512-NEXT: vmovss {{.*#+}} xmm0 = [1.2E+1,0.0E+0,0.0E+0,0.0E+0] 281; AVX512-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A] 282; AVX512-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 283; AVX512-NEXT: retq ## encoding: [0xc3] 284; 285; AVX512VL-LABEL: test_f32_cst: 286; AVX512VL: ## %bb.0: ## %entry 287; AVX512VL-NEXT: vmovss {{.*#+}} xmm0 = [1.2E+1,0.0E+0,0.0E+0,0.0E+0] 288; AVX512VL-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A] 289; AVX512VL-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 290; AVX512VL-NEXT: retq ## encoding: [0xc3] 291entry: 292 %call = call float @llvm.fma.f32(float 3.0, float 3.0, float 3.0) 293 ret float %call 294} 295 296define <4 x float> @test_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { 297; FMA32-LABEL: test_v4f32: 298; FMA32: ## %bb.0: ## %entry 299; FMA32-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0xc2] 300; FMA32-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 301; FMA32-NEXT: retl ## encoding: [0xc3] 302; 303; FMA64-LABEL: test_v4f32: 304; FMA64: ## %bb.0: ## %entry 305; FMA64-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0xc2] 306; FMA64-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 307; FMA64-NEXT: retq ## encoding: [0xc3] 308; 309; FMACALL64-LABEL: test_v4f32: 310; FMACALL64: ## %bb.0: ## %entry 311; FMACALL64-NEXT: subq $88, %rsp ## encoding: [0x48,0x83,0xec,0x58] 312; FMACALL64-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 313; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x54,0x24,0x30] 314; FMACALL64-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 315; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x4c,0x24,0x10] 316; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 317; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x20] 318; FMACALL64-NEXT: shufps $255, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0xff] 319; FMACALL64-NEXT: ## xmm0 = xmm0[3,3,3,3] 320; FMACALL64-NEXT: shufps $255, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0xff] 321; FMACALL64-NEXT: ## xmm1 = xmm1[3,3,3,3] 322; FMACALL64-NEXT: shufps $255, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0xff] 323; FMACALL64-NEXT: ## xmm2 = xmm2[3,3,3,3] 324; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 325; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 326; FMACALL64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill 327; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x04,0x24] 328; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 329; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x20] 330; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] 331; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] 332; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 333; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x10] 334; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] 335; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] 336; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload 337; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x54,0x24,0x30] 338; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] 339; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] 340; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 341; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 342; FMACALL64-NEXT: unpcklps (%rsp), %xmm0 ## 16-byte Folded Reload 343; FMACALL64-NEXT: ## encoding: [0x0f,0x14,0x04,0x24] 344; FMACALL64-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 345; FMACALL64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill 346; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x04,0x24] 347; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 348; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x20] 349; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 350; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x10] 351; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload 352; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x54,0x24,0x30] 353; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 354; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 355; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 356; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x40] 357; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 358; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x20] 359; FMACALL64-NEXT: shufps $85, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0x55] 360; FMACALL64-NEXT: ## xmm0 = xmm0[1,1,1,1] 361; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 362; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x10] 363; FMACALL64-NEXT: shufps $85, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0x55] 364; FMACALL64-NEXT: ## xmm1 = xmm1[1,1,1,1] 365; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload 366; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x54,0x24,0x30] 367; FMACALL64-NEXT: shufps $85, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0x55] 368; FMACALL64-NEXT: ## xmm2 = xmm2[1,1,1,1] 369; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 370; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 371; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 372; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x40] 373; FMACALL64-NEXT: unpcklps %xmm0, %xmm1 ## encoding: [0x0f,0x14,0xc8] 374; FMACALL64-NEXT: ## xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 375; FMACALL64-NEXT: unpcklpd (%rsp), %xmm1 ## 16-byte Folded Reload 376; FMACALL64-NEXT: ## encoding: [0x66,0x0f,0x14,0x0c,0x24] 377; FMACALL64-NEXT: ## xmm1 = xmm1[0],mem[0] 378; FMACALL64-NEXT: movaps %xmm1, %xmm0 ## encoding: [0x0f,0x28,0xc1] 379; FMACALL64-NEXT: addq $88, %rsp ## encoding: [0x48,0x83,0xc4,0x58] 380; FMACALL64-NEXT: retq ## encoding: [0xc3] 381; 382; AVX512-LABEL: test_v4f32: 383; AVX512: ## %bb.0: ## %entry 384; AVX512-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0xc2] 385; AVX512-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 386; AVX512-NEXT: retq ## encoding: [0xc3] 387; 388; AVX512VL-LABEL: test_v4f32: 389; AVX512VL: ## %bb.0: ## %entry 390; AVX512VL-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2] 391; AVX512VL-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 392; AVX512VL-NEXT: retq ## encoding: [0xc3] 393; 394; FMACALL32_BDVER2-LABEL: test_v4f32: 395; FMACALL32_BDVER2: ## %bb.0: ## %entry 396; FMACALL32_BDVER2-NEXT: subl $108, %esp ## encoding: [0x83,0xec,0x6c] 397; FMACALL32_BDVER2-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill 398; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x40] 399; FMACALL32_BDVER2-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill 400; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x4c,0x24,0x30] 401; FMACALL32_BDVER2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill 402; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x20] 403; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x08,0x02] 404; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm1, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x4c,0x24,0x04,0x02] 405; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02] 406; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 407; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 408; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 409; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x7c,0x24,0x60] 410; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload 411; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40] 412; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload 413; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x54,0x24,0x30] 414; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload 415; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x20] 416; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01] 417; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x01] 418; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x01] 419; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 420; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 421; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 422; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x7c,0x24,0x54] 423; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload 424; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40] 425; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08] 426; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload 427; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x30] 428; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04] 429; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload 430; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x20] 431; FMACALL32_BDVER2-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] 432; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 433; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 434; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload 435; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40] 436; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload 437; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x54,0x24,0x30] 438; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload 439; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x20] 440; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03] 441; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x03] 442; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x03] 443; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x1c] 444; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 445; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x6c,0x24,0x54] 446; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x18] 447; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 448; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x6c,0x24,0x60] 449; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x14] 450; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 451; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 452; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x10] 453; FMACALL32_BDVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 454; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x1c] 455; FMACALL32_BDVER2-NEXT: vinsertps $16, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x18,0x10] 456; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[2,3] 457; FMACALL32_BDVER2-NEXT: vinsertps $32, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x14,0x20] 458; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0,1],mem[0],xmm0[3] 459; FMACALL32_BDVER2-NEXT: vinsertps $48, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x10,0x30] 460; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0,1,2],mem[0] 461; FMACALL32_BDVER2-NEXT: addl $108, %esp ## encoding: [0x83,0xc4,0x6c] 462; FMACALL32_BDVER2-NEXT: retl ## encoding: [0xc3] 463entry: 464 %call = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) 465 ret <4 x float> %call 466} 467 468define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) #0 { 469; FMA32-LABEL: test_v8f32: 470; FMA32: ## %bb.0: ## %entry 471; FMA32-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0xa8,0xc2] 472; FMA32-NEXT: ## ymm0 = (ymm1 * ymm0) + ymm2 473; FMA32-NEXT: retl ## encoding: [0xc3] 474; 475; FMA64-LABEL: test_v8f32: 476; FMA64: ## %bb.0: ## %entry 477; FMA64-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0xa8,0xc2] 478; FMA64-NEXT: ## ymm0 = (ymm1 * ymm0) + ymm2 479; FMA64-NEXT: retq ## encoding: [0xc3] 480; 481; FMACALL64-LABEL: test_v8f32: 482; FMACALL64: ## %bb.0: ## %entry 483; FMACALL64-NEXT: subq $136, %rsp ## encoding: [0x48,0x81,0xec,0x88,0x00,0x00,0x00] 484; FMACALL64-NEXT: movaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 485; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x6c,0x24,0x50] 486; FMACALL64-NEXT: movaps %xmm4, (%rsp) ## 16-byte Spill 487; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x24,0x24] 488; FMACALL64-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 489; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x5c,0x24,0x40] 490; FMACALL64-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 491; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x54,0x24,0x60] 492; FMACALL64-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 493; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x4c,0x24,0x30] 494; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 495; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x10] 496; FMACALL64-NEXT: shufps $255, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0xff] 497; FMACALL64-NEXT: ## xmm0 = xmm0[3,3,3,3] 498; FMACALL64-NEXT: movaps %xmm2, %xmm1 ## encoding: [0x0f,0x28,0xca] 499; FMACALL64-NEXT: shufps $255, %xmm2, %xmm1 ## encoding: [0x0f,0xc6,0xca,0xff] 500; FMACALL64-NEXT: ## xmm1 = xmm1[3,3],xmm2[3,3] 501; FMACALL64-NEXT: movaps %xmm4, %xmm2 ## encoding: [0x0f,0x28,0xd4] 502; FMACALL64-NEXT: shufps $255, %xmm4, %xmm2 ## encoding: [0x0f,0xc6,0xd4,0xff] 503; FMACALL64-NEXT: ## xmm2 = xmm2[3,3],xmm4[3,3] 504; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 505; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 506; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 507; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x20] 508; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 509; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x10] 510; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] 511; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] 512; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 513; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x60] 514; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] 515; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] 516; FMACALL64-NEXT: movaps (%rsp), %xmm2 ## 16-byte Reload 517; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x14,0x24] 518; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] 519; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] 520; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 521; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 522; FMACALL64-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload 523; FMACALL64-NEXT: ## encoding: [0x0f,0x14,0x44,0x24,0x20] 524; FMACALL64-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 525; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 526; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x70] 527; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 528; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x10] 529; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 530; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x60] 531; FMACALL64-NEXT: movaps (%rsp), %xmm2 ## 16-byte Reload 532; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x14,0x24] 533; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 534; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 535; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 536; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x20] 537; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 538; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x10] 539; FMACALL64-NEXT: shufps $85, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0x55] 540; FMACALL64-NEXT: ## xmm0 = xmm0[1,1,1,1] 541; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 542; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x60] 543; FMACALL64-NEXT: shufps $85, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0x55] 544; FMACALL64-NEXT: ## xmm1 = xmm1[1,1,1,1] 545; FMACALL64-NEXT: movaps (%rsp), %xmm2 ## 16-byte Reload 546; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x14,0x24] 547; FMACALL64-NEXT: shufps $85, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0x55] 548; FMACALL64-NEXT: ## xmm2 = xmm2[1,1,1,1] 549; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 550; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 551; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 552; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x20] 553; FMACALL64-NEXT: unpcklps %xmm0, %xmm1 ## encoding: [0x0f,0x14,0xc8] 554; FMACALL64-NEXT: ## xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 555; FMACALL64-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload 556; FMACALL64-NEXT: ## encoding: [0x66,0x0f,0x14,0x4c,0x24,0x70] 557; FMACALL64-NEXT: ## xmm1 = xmm1[0],mem[0] 558; FMACALL64-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 559; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x4c,0x24,0x20] 560; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 561; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x30] 562; FMACALL64-NEXT: shufps $255, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0xff] 563; FMACALL64-NEXT: ## xmm0 = xmm0[3,3,3,3] 564; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 565; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x40] 566; FMACALL64-NEXT: shufps $255, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0xff] 567; FMACALL64-NEXT: ## xmm1 = xmm1[3,3,3,3] 568; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload 569; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x54,0x24,0x50] 570; FMACALL64-NEXT: shufps $255, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0xff] 571; FMACALL64-NEXT: ## xmm2 = xmm2[3,3,3,3] 572; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 573; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 574; FMACALL64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill 575; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x04,0x24] 576; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 577; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x30] 578; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] 579; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] 580; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 581; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x40] 582; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] 583; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] 584; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload 585; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x54,0x24,0x50] 586; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] 587; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] 588; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 589; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 590; FMACALL64-NEXT: unpcklps (%rsp), %xmm0 ## 16-byte Folded Reload 591; FMACALL64-NEXT: ## encoding: [0x0f,0x14,0x04,0x24] 592; FMACALL64-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 593; FMACALL64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill 594; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x04,0x24] 595; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 596; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x30] 597; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 598; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x40] 599; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload 600; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x54,0x24,0x50] 601; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 602; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 603; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 604; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x10] 605; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 606; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x30] 607; FMACALL64-NEXT: shufps $85, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0x55] 608; FMACALL64-NEXT: ## xmm0 = xmm0[1,1,1,1] 609; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 610; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x40] 611; FMACALL64-NEXT: shufps $85, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0x55] 612; FMACALL64-NEXT: ## xmm1 = xmm1[1,1,1,1] 613; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload 614; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x54,0x24,0x50] 615; FMACALL64-NEXT: shufps $85, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0x55] 616; FMACALL64-NEXT: ## xmm2 = xmm2[1,1,1,1] 617; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 618; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 619; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 620; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x10] 621; FMACALL64-NEXT: unpcklps %xmm0, %xmm1 ## encoding: [0x0f,0x14,0xc8] 622; FMACALL64-NEXT: ## xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 623; FMACALL64-NEXT: unpcklpd (%rsp), %xmm1 ## 16-byte Folded Reload 624; FMACALL64-NEXT: ## encoding: [0x66,0x0f,0x14,0x0c,0x24] 625; FMACALL64-NEXT: ## xmm1 = xmm1[0],mem[0] 626; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 627; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x20] 628; FMACALL64-NEXT: addq $136, %rsp ## encoding: [0x48,0x81,0xc4,0x88,0x00,0x00,0x00] 629; FMACALL64-NEXT: retq ## encoding: [0xc3] 630; 631; AVX512-LABEL: test_v8f32: 632; AVX512: ## %bb.0: ## %entry 633; AVX512-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0xa8,0xc2] 634; AVX512-NEXT: ## ymm0 = (ymm1 * ymm0) + ymm2 635; AVX512-NEXT: retq ## encoding: [0xc3] 636; 637; AVX512VL-LABEL: test_v8f32: 638; AVX512VL: ## %bb.0: ## %entry 639; AVX512VL-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa8,0xc2] 640; AVX512VL-NEXT: ## ymm0 = (ymm1 * ymm0) + ymm2 641; AVX512VL-NEXT: retq ## encoding: [0xc3] 642; 643; FMACALL32_BDVER2-LABEL: test_v8f32: 644; FMACALL32_BDVER2: ## %bb.0: ## %entry 645; FMACALL32_BDVER2-NEXT: subl $284, %esp ## encoding: [0x81,0xec,0x1c,0x01,0x00,0x00] 646; FMACALL32_BDVER2-NEXT: ## imm = 0x11C 647; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm2, %xmm3 ## encoding: [0xc4,0xe3,0x7d,0x19,0xd3,0x01] 648; FMACALL32_BDVER2-NEXT: vmovups %ymm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill 649; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x11,0x94,0x24,0xf0,0x00,0x00,0x00] 650; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm1, %xmm2 ## encoding: [0xc4,0xe3,0x7d,0x19,0xca,0x01] 651; FMACALL32_BDVER2-NEXT: vmovups %ymm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill 652; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x11,0x8c,0x24,0xd0,0x00,0x00,0x00] 653; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm1 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01] 654; FMACALL32_BDVER2-NEXT: vmovups %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill 655; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x11,0x84,0x24,0xb0,0x00,0x00,0x00] 656; FMACALL32_BDVER2-NEXT: vmovaps %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill 657; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x5c,0x24,0x50] 658; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm3, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x5c,0x24,0x08,0x02] 659; FMACALL32_BDVER2-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill 660; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x40] 661; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x02] 662; FMACALL32_BDVER2-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill 663; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x4c,0x24,0x30] 664; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x02] 665; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 666; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 667; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 668; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 669; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0xa4,0x00,0x00,0x00] 670; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload 671; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50] 672; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload 673; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x54,0x24,0x40] 674; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload 675; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x30] 676; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01] 677; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x01] 678; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x01] 679; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 680; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 681; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 682; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x98,0x00,0x00,0x00] 683; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload 684; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50] 685; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08] 686; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload 687; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40] 688; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04] 689; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload 690; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x30] 691; FMACALL32_BDVER2-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] 692; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 693; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 694; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 695; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x8c,0x00,0x00,0x00] 696; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 697; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xf0,0x00,0x00,0x00] 698; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03] 699; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 700; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xd0,0x00,0x00,0x00] 701; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03] 702; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 703; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xb0,0x00,0x00,0x00] 704; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03] 705; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 706; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 707; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 708; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 709; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x80,0x00,0x00,0x00] 710; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 711; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xf0,0x00,0x00,0x00] 712; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02] 713; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 714; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xd0,0x00,0x00,0x00] 715; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x02] 716; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 717; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xb0,0x00,0x00,0x00] 718; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02] 719; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 720; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 721; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 722; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 723; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x7c,0x24,0x74] 724; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 725; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xf0,0x00,0x00,0x00] 726; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01] 727; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 728; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xd0,0x00,0x00,0x00] 729; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x01] 730; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 731; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xb0,0x00,0x00,0x00] 732; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x01] 733; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 734; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 735; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 736; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 737; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x7c,0x24,0x68] 738; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 739; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xf0,0x00,0x00,0x00] 740; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08] 741; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 742; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xd0,0x00,0x00,0x00] 743; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04] 744; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 745; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xb0,0x00,0x00,0x00] 746; FMACALL32_BDVER2-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] 747; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 748; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 749; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 750; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload 751; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50] 752; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload 753; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x54,0x24,0x40] 754; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload 755; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x30] 756; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03] 757; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x03] 758; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x03] 759; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x2c] 760; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 761; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x6c,0x24,0x68] 762; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x28] 763; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 764; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x6c,0x24,0x74] 765; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x24] 766; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 767; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x80,0x00,0x00,0x00] 768; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x20] 769; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 770; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x8c,0x00,0x00,0x00] 771; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x1c] 772; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 773; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x98,0x00,0x00,0x00] 774; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x18] 775; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 776; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0xa4,0x00,0x00,0x00] 777; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x14] 778; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 779; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 780; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x10] 781; FMACALL32_BDVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 782; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x2c] 783; FMACALL32_BDVER2-NEXT: vinsertps $16, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x28,0x10] 784; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[2,3] 785; FMACALL32_BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 786; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x1c] 787; FMACALL32_BDVER2-NEXT: vinsertps $16, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x18,0x10] 788; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0],mem[0],xmm1[2,3] 789; FMACALL32_BDVER2-NEXT: vinsertps $32, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x24,0x20] 790; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0,1],mem[0],xmm0[3] 791; FMACALL32_BDVER2-NEXT: vinsertps $32, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x14,0x20] 792; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0,1],mem[0],xmm1[3] 793; FMACALL32_BDVER2-NEXT: vinsertps $48, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x20,0x30] 794; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0,1,2],mem[0] 795; FMACALL32_BDVER2-NEXT: vinsertps $48, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x10,0x30] 796; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0,1,2],mem[0] 797; FMACALL32_BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 798; FMACALL32_BDVER2-NEXT: addl $284, %esp ## encoding: [0x81,0xc4,0x1c,0x01,0x00,0x00] 799; FMACALL32_BDVER2-NEXT: ## imm = 0x11C 800; FMACALL32_BDVER2-NEXT: retl ## encoding: [0xc3] 801entry: 802 %call = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) 803 ret <8 x float> %call 804} 805 806define <16 x float> @test_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c) #0 { 807; FMA32-LABEL: test_v16f32: 808; FMA32: ## %bb.0: ## %entry 809; FMA32-NEXT: pushl %ebp ## encoding: [0x55] 810; FMA32-NEXT: movl %esp, %ebp ## encoding: [0x89,0xe5] 811; FMA32-NEXT: andl $-32, %esp ## encoding: [0x83,0xe4,0xe0] 812; FMA32-NEXT: subl $32, %esp ## encoding: [0x83,0xec,0x20] 813; FMA32-NEXT: vfmadd213ps 8(%ebp), %ymm2, %ymm0 ## encoding: [0xc4,0xe2,0x6d,0xa8,0x45,0x08] 814; FMA32-NEXT: ## ymm0 = (ymm2 * ymm0) + mem 815; FMA32-NEXT: vfmadd213ps 40(%ebp), %ymm3, %ymm1 ## encoding: [0xc4,0xe2,0x65,0xa8,0x4d,0x28] 816; FMA32-NEXT: ## ymm1 = (ymm3 * ymm1) + mem 817; FMA32-NEXT: movl %ebp, %esp ## encoding: [0x89,0xec] 818; FMA32-NEXT: popl %ebp ## encoding: [0x5d] 819; FMA32-NEXT: retl ## encoding: [0xc3] 820; 821; FMA64-LABEL: test_v16f32: 822; FMA64: ## %bb.0: ## %entry 823; FMA64-NEXT: vfmadd213ps %ymm4, %ymm2, %ymm0 ## encoding: [0xc4,0xe2,0x6d,0xa8,0xc4] 824; FMA64-NEXT: ## ymm0 = (ymm2 * ymm0) + ymm4 825; FMA64-NEXT: vfmadd213ps %ymm5, %ymm3, %ymm1 ## encoding: [0xc4,0xe2,0x65,0xa8,0xcd] 826; FMA64-NEXT: ## ymm1 = (ymm3 * ymm1) + ymm5 827; FMA64-NEXT: retq ## encoding: [0xc3] 828; 829; FMACALL64-LABEL: test_v16f32: 830; FMACALL64: ## %bb.0: ## %entry 831; FMACALL64-NEXT: subq $168, %rsp ## encoding: [0x48,0x81,0xec,0xa8,0x00,0x00,0x00] 832; FMACALL64-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 833; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0xbc,0x24,0x80,0x00,0x00,0x00] 834; FMACALL64-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 835; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x74,0x24,0x20] 836; FMACALL64-NEXT: movaps %xmm5, (%rsp) ## 16-byte Spill 837; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x2c,0x24] 838; FMACALL64-NEXT: movaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 839; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x64,0x24,0x10] 840; FMACALL64-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 841; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x5c,0x24,0x70] 842; FMACALL64-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 843; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x54,0x24,0x40] 844; FMACALL64-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 845; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x4c,0x24,0x30] 846; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 847; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x50] 848; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xb0,0x00,0x00,0x00] 849; FMACALL64-NEXT: shufps $255, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0xff] 850; FMACALL64-NEXT: ## xmm0 = xmm0[3,3,3,3] 851; FMACALL64-NEXT: movaps %xmm4, %xmm1 ## encoding: [0x0f,0x28,0xcc] 852; FMACALL64-NEXT: shufps $255, %xmm4, %xmm1 ## encoding: [0x0f,0xc6,0xcc,0xff] 853; FMACALL64-NEXT: ## xmm1 = xmm1[3,3],xmm4[3,3] 854; FMACALL64-NEXT: shufps $255, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0xff] 855; FMACALL64-NEXT: ## xmm2 = xmm2[3,3,3,3] 856; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 857; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 858; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 859; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x60] 860; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 861; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x50] 862; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] 863; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] 864; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 865; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x10] 866; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] 867; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] 868; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xb0,0x00,0x00,0x00] 869; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] 870; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] 871; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 872; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 873; FMACALL64-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload 874; FMACALL64-NEXT: ## encoding: [0x0f,0x14,0x44,0x24,0x60] 875; FMACALL64-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 876; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 877; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x84,0x24,0x90,0x00,0x00,0x00] 878; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 879; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x50] 880; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 881; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x10] 882; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xb0,0x00,0x00,0x00] 883; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 884; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 885; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 886; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x60] 887; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 888; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x50] 889; FMACALL64-NEXT: shufps $85, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0x55] 890; FMACALL64-NEXT: ## xmm0 = xmm0[1,1,1,1] 891; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 892; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x10] 893; FMACALL64-NEXT: shufps $85, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0x55] 894; FMACALL64-NEXT: ## xmm1 = xmm1[1,1,1,1] 895; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xb0,0x00,0x00,0x00] 896; FMACALL64-NEXT: shufps $85, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0x55] 897; FMACALL64-NEXT: ## xmm2 = xmm2[1,1,1,1] 898; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 899; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 900; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 901; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x60] 902; FMACALL64-NEXT: unpcklps %xmm0, %xmm1 ## encoding: [0x0f,0x14,0xc8] 903; FMACALL64-NEXT: ## xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 904; FMACALL64-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload 905; FMACALL64-NEXT: ## encoding: [0x66,0x0f,0x14,0x8c,0x24,0x90,0x00,0x00,0x00] 906; FMACALL64-NEXT: ## xmm1 = xmm1[0],mem[0] 907; FMACALL64-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 908; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x4c,0x24,0x60] 909; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 910; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x30] 911; FMACALL64-NEXT: shufps $255, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0xff] 912; FMACALL64-NEXT: ## xmm0 = xmm0[3,3,3,3] 913; FMACALL64-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload 914; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x0c,0x24] 915; FMACALL64-NEXT: shufps $255, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0xff] 916; FMACALL64-NEXT: ## xmm1 = xmm1[3,3,3,3] 917; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xc0,0x00,0x00,0x00] 918; FMACALL64-NEXT: shufps $255, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0xff] 919; FMACALL64-NEXT: ## xmm2 = xmm2[3,3,3,3] 920; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 921; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 922; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 923; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x10] 924; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 925; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x30] 926; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] 927; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] 928; FMACALL64-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload 929; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x0c,0x24] 930; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] 931; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] 932; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xc0,0x00,0x00,0x00] 933; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] 934; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] 935; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 936; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 937; FMACALL64-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload 938; FMACALL64-NEXT: ## encoding: [0x0f,0x14,0x44,0x24,0x10] 939; FMACALL64-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 940; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 941; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x50] 942; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 943; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x30] 944; FMACALL64-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload 945; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x0c,0x24] 946; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xc0,0x00,0x00,0x00] 947; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 948; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 949; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 950; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x10] 951; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 952; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x30] 953; FMACALL64-NEXT: shufps $85, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0x55] 954; FMACALL64-NEXT: ## xmm0 = xmm0[1,1,1,1] 955; FMACALL64-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload 956; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x0c,0x24] 957; FMACALL64-NEXT: shufps $85, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0x55] 958; FMACALL64-NEXT: ## xmm1 = xmm1[1,1,1,1] 959; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xc0,0x00,0x00,0x00] 960; FMACALL64-NEXT: shufps $85, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0x55] 961; FMACALL64-NEXT: ## xmm2 = xmm2[1,1,1,1] 962; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 963; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 964; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 965; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x10] 966; FMACALL64-NEXT: unpcklps %xmm0, %xmm1 ## encoding: [0x0f,0x14,0xc8] 967; FMACALL64-NEXT: ## xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 968; FMACALL64-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload 969; FMACALL64-NEXT: ## encoding: [0x66,0x0f,0x14,0x4c,0x24,0x50] 970; FMACALL64-NEXT: ## xmm1 = xmm1[0],mem[0] 971; FMACALL64-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 972; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x4c,0x24,0x10] 973; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 974; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x40] 975; FMACALL64-NEXT: shufps $255, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0xff] 976; FMACALL64-NEXT: ## xmm0 = xmm0[3,3,3,3] 977; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 978; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x20] 979; FMACALL64-NEXT: shufps $255, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0xff] 980; FMACALL64-NEXT: ## xmm1 = xmm1[3,3,3,3] 981; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xd0,0x00,0x00,0x00] 982; FMACALL64-NEXT: shufps $255, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0xff] 983; FMACALL64-NEXT: ## xmm2 = xmm2[3,3,3,3] 984; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 985; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 986; FMACALL64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill 987; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x04,0x24] 988; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 989; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x40] 990; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] 991; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] 992; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 993; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x20] 994; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] 995; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] 996; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xd0,0x00,0x00,0x00] 997; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] 998; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] 999; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 1000; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 1001; FMACALL64-NEXT: unpcklps (%rsp), %xmm0 ## 16-byte Folded Reload 1002; FMACALL64-NEXT: ## encoding: [0x0f,0x14,0x04,0x24] 1003; FMACALL64-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 1004; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1005; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x30] 1006; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 1007; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x40] 1008; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 1009; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x20] 1010; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xd0,0x00,0x00,0x00] 1011; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 1012; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 1013; FMACALL64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill 1014; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x04,0x24] 1015; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 1016; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x40] 1017; FMACALL64-NEXT: shufps $85, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0x55] 1018; FMACALL64-NEXT: ## xmm0 = xmm0[1,1,1,1] 1019; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 1020; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x20] 1021; FMACALL64-NEXT: shufps $85, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0x55] 1022; FMACALL64-NEXT: ## xmm1 = xmm1[1,1,1,1] 1023; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xd0,0x00,0x00,0x00] 1024; FMACALL64-NEXT: shufps $85, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0x55] 1025; FMACALL64-NEXT: ## xmm2 = xmm2[1,1,1,1] 1026; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 1027; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 1028; FMACALL64-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload 1029; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x0c,0x24] 1030; FMACALL64-NEXT: unpcklps %xmm0, %xmm1 ## encoding: [0x0f,0x14,0xc8] 1031; FMACALL64-NEXT: ## xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1032; FMACALL64-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload 1033; FMACALL64-NEXT: ## encoding: [0x66,0x0f,0x14,0x4c,0x24,0x30] 1034; FMACALL64-NEXT: ## xmm1 = xmm1[0],mem[0] 1035; FMACALL64-NEXT: movaps %xmm1, (%rsp) ## 16-byte Spill 1036; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x0c,0x24] 1037; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 1038; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x70] 1039; FMACALL64-NEXT: shufps $255, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0xff] 1040; FMACALL64-NEXT: ## xmm0 = xmm0[3,3,3,3] 1041; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 1042; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x8c,0x24,0x80,0x00,0x00,0x00] 1043; FMACALL64-NEXT: shufps $255, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0xff] 1044; FMACALL64-NEXT: ## xmm1 = xmm1[3,3,3,3] 1045; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xe0,0x00,0x00,0x00] 1046; FMACALL64-NEXT: shufps $255, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0xff] 1047; FMACALL64-NEXT: ## xmm2 = xmm2[3,3,3,3] 1048; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 1049; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 1050; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1051; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x20] 1052; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 1053; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x70] 1054; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] 1055; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] 1056; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 1057; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x8c,0x24,0x80,0x00,0x00,0x00] 1058; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] 1059; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] 1060; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xe0,0x00,0x00,0x00] 1061; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] 1062; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] 1063; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 1064; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 1065; FMACALL64-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload 1066; FMACALL64-NEXT: ## encoding: [0x0f,0x14,0x44,0x24,0x20] 1067; FMACALL64-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 1068; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1069; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x20] 1070; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 1071; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x70] 1072; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 1073; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x8c,0x24,0x80,0x00,0x00,0x00] 1074; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xe0,0x00,0x00,0x00] 1075; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 1076; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 1077; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1078; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x40] 1079; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 1080; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x70] 1081; FMACALL64-NEXT: shufps $85, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0x55] 1082; FMACALL64-NEXT: ## xmm0 = xmm0[1,1,1,1] 1083; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 1084; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x8c,0x24,0x80,0x00,0x00,0x00] 1085; FMACALL64-NEXT: shufps $85, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0x55] 1086; FMACALL64-NEXT: ## xmm1 = xmm1[1,1,1,1] 1087; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xe0,0x00,0x00,0x00] 1088; FMACALL64-NEXT: shufps $85, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0x55] 1089; FMACALL64-NEXT: ## xmm2 = xmm2[1,1,1,1] 1090; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] 1091; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel 1092; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 ## 16-byte Reload 1093; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x5c,0x24,0x40] 1094; FMACALL64-NEXT: unpcklps %xmm0, %xmm3 ## encoding: [0x0f,0x14,0xd8] 1095; FMACALL64-NEXT: ## xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 1096; FMACALL64-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 ## 16-byte Folded Reload 1097; FMACALL64-NEXT: ## encoding: [0x66,0x0f,0x14,0x5c,0x24,0x20] 1098; FMACALL64-NEXT: ## xmm3 = xmm3[0],mem[0] 1099; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 1100; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x60] 1101; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 1102; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x10] 1103; FMACALL64-NEXT: movaps (%rsp), %xmm2 ## 16-byte Reload 1104; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x14,0x24] 1105; FMACALL64-NEXT: addq $168, %rsp ## encoding: [0x48,0x81,0xc4,0xa8,0x00,0x00,0x00] 1106; FMACALL64-NEXT: retq ## encoding: [0xc3] 1107; 1108; AVX512-LABEL: test_v16f32: 1109; AVX512: ## %bb.0: ## %entry 1110; AVX512-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2] 1111; AVX512-NEXT: ## zmm0 = (zmm1 * zmm0) + zmm2 1112; AVX512-NEXT: retq ## encoding: [0xc3] 1113; 1114; AVX512VL-LABEL: test_v16f32: 1115; AVX512VL: ## %bb.0: ## %entry 1116; AVX512VL-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2] 1117; AVX512VL-NEXT: ## zmm0 = (zmm1 * zmm0) + zmm2 1118; AVX512VL-NEXT: retq ## encoding: [0xc3] 1119; 1120; FMACALL32_BDVER2-LABEL: test_v16f32: 1121; FMACALL32_BDVER2: ## %bb.0: ## %entry 1122; FMACALL32_BDVER2-NEXT: pushl %ebp ## encoding: [0x55] 1123; FMACALL32_BDVER2-NEXT: movl %esp, %ebp ## encoding: [0x89,0xe5] 1124; FMACALL32_BDVER2-NEXT: andl $-32, %esp ## encoding: [0x83,0xe4,0xe0] 1125; FMACALL32_BDVER2-NEXT: subl $448, %esp ## encoding: [0x81,0xec,0xc0,0x01,0x00,0x00] 1126; FMACALL32_BDVER2-NEXT: ## imm = 0x1C0 1127; FMACALL32_BDVER2-NEXT: vmovaps 56(%ebp), %xmm4 ## encoding: [0xc5,0xf8,0x28,0x65,0x38] 1128; FMACALL32_BDVER2-NEXT: vmovaps %ymm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill 1129; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x29,0x94,0x24,0x60,0x01,0x00,0x00] 1130; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm3, %xmm2 ## encoding: [0xc4,0xe3,0x7d,0x19,0xda,0x01] 1131; FMACALL32_BDVER2-NEXT: vmovaps %ymm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill 1132; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x29,0x4c,0x24,0x60] 1133; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm1, %xmm1 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc9,0x01] 1134; FMACALL32_BDVER2-NEXT: vmovaps %ymm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill 1135; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x29,0x9c,0x24,0x80,0x00,0x00,0x00] 1136; FMACALL32_BDVER2-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill 1137; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x29,0x84,0x24,0x80,0x01,0x00,0x00] 1138; FMACALL32_BDVER2-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill 1139; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x94,0x24,0xc0,0x00,0x00,0x00] 1140; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x02] 1141; FMACALL32_BDVER2-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill 1142; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x8c,0x24,0xb0,0x00,0x00,0x00] 1143; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x02] 1144; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm4, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x64,0x24,0x08,0x02] 1145; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1146; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 1147; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 1148; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 1149; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x54,0x01,0x00,0x00] 1150; FMACALL32_BDVER2-NEXT: vmovaps 56(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x38] 1151; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload 1152; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x94,0x24,0xc0,0x00,0x00,0x00] 1153; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload 1154; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x8c,0x24,0xb0,0x00,0x00,0x00] 1155; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01] 1156; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x01] 1157; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x01] 1158; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 1159; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 1160; FMACALL32_BDVER2-NEXT: vmovaps 56(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x38] 1161; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 1162; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x48,0x01,0x00,0x00] 1163; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08] 1164; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload 1165; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0xc0,0x00,0x00,0x00] 1166; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04] 1167; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload 1168; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0xb0,0x00,0x00,0x00] 1169; FMACALL32_BDVER2-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] 1170; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 1171; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 1172; FMACALL32_BDVER2-NEXT: vmovaps 40(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x28] 1173; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 1174; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x3c,0x01,0x00,0x00] 1175; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03] 1176; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1177; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x00,0x00,0x00] 1178; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03] 1179; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1180; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x44,0x24,0x60] 1181; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03] 1182; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1183; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 1184; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 1185; FMACALL32_BDVER2-NEXT: vmovaps 40(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x28] 1186; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 1187; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x30,0x01,0x00,0x00] 1188; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02] 1189; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1190; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x00,0x00,0x00] 1191; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x02] 1192; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1193; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x44,0x24,0x60] 1194; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02] 1195; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1196; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 1197; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 1198; FMACALL32_BDVER2-NEXT: vmovaps 40(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x28] 1199; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 1200; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x24,0x01,0x00,0x00] 1201; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01] 1202; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1203; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x00,0x00,0x00] 1204; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x01] 1205; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1206; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x44,0x24,0x60] 1207; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x01] 1208; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1209; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 1210; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 1211; FMACALL32_BDVER2-NEXT: vmovaps 40(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x28] 1212; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 1213; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x18,0x01,0x00,0x00] 1214; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08] 1215; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1216; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x00,0x00,0x00] 1217; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04] 1218; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1219; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x44,0x24,0x60] 1220; FMACALL32_BDVER2-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] 1221; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1222; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 1223; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 1224; FMACALL32_BDVER2-NEXT: vmovaps 24(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x18] 1225; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 1226; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x0c,0x01,0x00,0x00] 1227; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03] 1228; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1229; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00] 1230; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 1231; FMACALL32_BDVER2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill 1232; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x84,0x24,0x80,0x00,0x00,0x00] 1233; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03] 1234; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1235; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00] 1236; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 1237; FMACALL32_BDVER2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill 1238; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x60] 1239; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03] 1240; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1241; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 1242; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 1243; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 1244; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x00,0x01,0x00,0x00] 1245; FMACALL32_BDVER2-NEXT: vmovaps 24(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x18] 1246; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload 1247; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x94,0x24,0x80,0x00,0x00,0x00] 1248; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload 1249; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x60] 1250; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02] 1251; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x02] 1252; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x02] 1253; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 1254; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 1255; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 1256; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0xf4,0x00,0x00,0x00] 1257; FMACALL32_BDVER2-NEXT: vmovaps 24(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x18] 1258; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload 1259; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x94,0x24,0x80,0x00,0x00,0x00] 1260; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload 1261; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x60] 1262; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01] 1263; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x01] 1264; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x01] 1265; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 1266; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 1267; FMACALL32_BDVER2-NEXT: vmovaps 24(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x18] 1268; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 1269; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0xe8,0x00,0x00,0x00] 1270; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08] 1271; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload 1272; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0x80,0x00,0x00,0x00] 1273; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04] 1274; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload 1275; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x60] 1276; FMACALL32_BDVER2-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] 1277; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 1278; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 1279; FMACALL32_BDVER2-NEXT: vmovaps 8(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x08] 1280; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 1281; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x80,0x00,0x00,0x00] 1282; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03] 1283; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1284; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00] 1285; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03] 1286; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1287; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00] 1288; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03] 1289; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1290; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 1291; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 1292; FMACALL32_BDVER2-NEXT: vmovaps 8(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x08] 1293; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 1294; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x7c,0x24,0x60] 1295; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02] 1296; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1297; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00] 1298; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x02] 1299; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1300; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00] 1301; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02] 1302; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1303; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 1304; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 1305; FMACALL32_BDVER2-NEXT: vmovaps 8(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x08] 1306; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 1307; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0xdc,0x00,0x00,0x00] 1308; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01] 1309; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1310; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00] 1311; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x01] 1312; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1313; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00] 1314; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x01] 1315; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1316; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 1317; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 1318; FMACALL32_BDVER2-NEXT: vmovaps 8(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x08] 1319; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 1320; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0xd0,0x00,0x00,0x00] 1321; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08] 1322; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1323; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00] 1324; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04] 1325; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1326; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00] 1327; FMACALL32_BDVER2-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] 1328; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1329; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 1330; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 1331; FMACALL32_BDVER2-NEXT: vmovaps 56(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x38] 1332; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload 1333; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x94,0x24,0xc0,0x00,0x00,0x00] 1334; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload 1335; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x8c,0x24,0xb0,0x00,0x00,0x00] 1336; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03] 1337; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x03] 1338; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x03] 1339; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x3c] 1340; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 1341; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0xd0,0x00,0x00,0x00] 1342; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x38] 1343; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 1344; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0xdc,0x00,0x00,0x00] 1345; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x34] 1346; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 1347; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x6c,0x24,0x60] 1348; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x30] 1349; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 1350; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x80,0x00,0x00,0x00] 1351; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x2c] 1352; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 1353; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0xe8,0x00,0x00,0x00] 1354; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x28] 1355; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 1356; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0xf4,0x00,0x00,0x00] 1357; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x24] 1358; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 1359; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x00,0x01,0x00,0x00] 1360; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x20] 1361; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 1362; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x0c,0x01,0x00,0x00] 1363; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x5c] 1364; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 1365; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x18,0x01,0x00,0x00] 1366; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x58] 1367; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 1368; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x24,0x01,0x00,0x00] 1369; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x54] 1370; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 1371; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x30,0x01,0x00,0x00] 1372; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x50] 1373; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 1374; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x3c,0x01,0x00,0x00] 1375; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x4c] 1376; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 1377; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x48,0x01,0x00,0x00] 1378; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x48] 1379; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 1380; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x54,0x01,0x00,0x00] 1381; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x44] 1382; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] 1383; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 1384; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x40] 1385; FMACALL32_BDVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1386; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x3c] 1387; FMACALL32_BDVER2-NEXT: vinsertps $16, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x38,0x10] 1388; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[2,3] 1389; FMACALL32_BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1390; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x2c] 1391; FMACALL32_BDVER2-NEXT: vinsertps $16, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x28,0x10] 1392; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0],mem[0],xmm1[2,3] 1393; FMACALL32_BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 1394; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfa,0x10,0x54,0x24,0x4c] 1395; FMACALL32_BDVER2-NEXT: vinsertps $16, {{[0-9]+}}(%esp), %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x21,0x54,0x24,0x48,0x10] 1396; FMACALL32_BDVER2-NEXT: ## xmm2 = xmm2[0],mem[0],xmm2[2,3] 1397; FMACALL32_BDVER2-NEXT: vinsertps $32, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x34,0x20] 1398; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0,1],mem[0],xmm0[3] 1399; FMACALL32_BDVER2-NEXT: vinsertps $32, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x24,0x20] 1400; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0,1],mem[0],xmm1[3] 1401; FMACALL32_BDVER2-NEXT: vinsertps $32, {{[0-9]+}}(%esp), %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x21,0x54,0x24,0x44,0x20] 1402; FMACALL32_BDVER2-NEXT: ## xmm2 = xmm2[0,1],mem[0],xmm2[3] 1403; FMACALL32_BDVER2-NEXT: vinsertps $48, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x30,0x30] 1404; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0,1,2],mem[0] 1405; FMACALL32_BDVER2-NEXT: vinsertps $48, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x20,0x30] 1406; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0,1,2],mem[0] 1407; FMACALL32_BDVER2-NEXT: vinsertps $48, {{[0-9]+}}(%esp), %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x21,0x54,0x24,0x40,0x30] 1408; FMACALL32_BDVER2-NEXT: ## xmm2 = xmm2[0,1,2],mem[0] 1409; FMACALL32_BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 1410; FMACALL32_BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1411; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x5c] 1412; FMACALL32_BDVER2-NEXT: vinsertps $16, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x58,0x10] 1413; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0],mem[0],xmm1[2,3] 1414; FMACALL32_BDVER2-NEXT: vinsertps $32, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x54,0x20] 1415; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0,1],mem[0],xmm1[3] 1416; FMACALL32_BDVER2-NEXT: vinsertps $48, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x50,0x30] 1417; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0,1,2],mem[0] 1418; FMACALL32_BDVER2-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ## encoding: [0xc4,0xe3,0x75,0x18,0xca,0x01] 1419; FMACALL32_BDVER2-NEXT: movl %ebp, %esp ## encoding: [0x89,0xec] 1420; FMACALL32_BDVER2-NEXT: popl %ebp ## encoding: [0x5d] 1421; FMACALL32_BDVER2-NEXT: retl ## encoding: [0xc3] 1422entry: 1423 %call = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c) 1424 ret <16 x float> %call 1425} 1426 1427define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #0 { 1428; FMA32-LABEL: test_v2f64: 1429; FMA32: ## %bb.0: 1430; FMA32-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] 1431; FMA32-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 1432; FMA32-NEXT: retl ## encoding: [0xc3] 1433; 1434; FMA64-LABEL: test_v2f64: 1435; FMA64: ## %bb.0: 1436; FMA64-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] 1437; FMA64-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 1438; FMA64-NEXT: retq ## encoding: [0xc3] 1439; 1440; FMACALL64-LABEL: test_v2f64: 1441; FMACALL64: ## %bb.0: 1442; FMACALL64-NEXT: subq $72, %rsp ## encoding: [0x48,0x83,0xec,0x48] 1443; FMACALL64-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1444; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x54,0x24,0x20] 1445; FMACALL64-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1446; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x4c,0x24,0x10] 1447; FMACALL64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill 1448; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x04,0x24] 1449; FMACALL64-NEXT: callq _fma ## encoding: [0xe8,A,A,A,A] 1450; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel 1451; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1452; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x30] 1453; FMACALL64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload 1454; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x04,0x24] 1455; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] 1456; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] 1457; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 1458; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x10] 1459; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] 1460; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] 1461; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload 1462; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x54,0x24,0x20] 1463; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] 1464; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] 1465; FMACALL64-NEXT: callq _fma ## encoding: [0xe8,A,A,A,A] 1466; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel 1467; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 1468; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x30] 1469; FMACALL64-NEXT: movlhps %xmm0, %xmm1 ## encoding: [0x0f,0x16,0xc8] 1470; FMACALL64-NEXT: ## xmm1 = xmm1[0],xmm0[0] 1471; FMACALL64-NEXT: movaps %xmm1, %xmm0 ## encoding: [0x0f,0x28,0xc1] 1472; FMACALL64-NEXT: addq $72, %rsp ## encoding: [0x48,0x83,0xc4,0x48] 1473; FMACALL64-NEXT: retq ## encoding: [0xc3] 1474; 1475; AVX512-LABEL: test_v2f64: 1476; AVX512: ## %bb.0: 1477; AVX512-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] 1478; AVX512-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 1479; AVX512-NEXT: retq ## encoding: [0xc3] 1480; 1481; AVX512VL-LABEL: test_v2f64: 1482; AVX512VL: ## %bb.0: 1483; AVX512VL-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] 1484; AVX512VL-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 1485; AVX512VL-NEXT: retq ## encoding: [0xc3] 1486; 1487; FMACALL32_BDVER2-LABEL: test_v2f64: 1488; FMACALL32_BDVER2: ## %bb.0: 1489; FMACALL32_BDVER2-NEXT: subl $108, %esp ## encoding: [0x83,0xec,0x6c] 1490; FMACALL32_BDVER2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill 1491; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x50] 1492; FMACALL32_BDVER2-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0xc1] 1493; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],xmm1[0] 1494; FMACALL32_BDVER2-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill 1495; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x30] 1496; FMACALL32_BDVER2-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill 1497; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x4c,0x24,0x40] 1498; FMACALL32_BDVER2-NEXT: vmovlps %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x54,0x24,0x10] 1499; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] 1500; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] 1501; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 1502; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload 1503; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x30] 1504; FMACALL32_BDVER2-NEXT: vmovhps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x17,0x44,0x24,0x10] 1505; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload 1506; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40] 1507; FMACALL32_BDVER2-NEXT: vmovlps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload 1508; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x12,0x44,0x24,0x58] 1509; FMACALL32_BDVER2-NEXT: ## xmm0 = mem[0,1],xmm0[2,3] 1510; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] 1511; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x28] 1512; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] 1513; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 1514; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x20] 1515; FMACALL32_BDVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1516; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfb,0x10,0x44,0x24,0x28] 1517; FMACALL32_BDVER2-NEXT: vmovhps {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0x44,0x24,0x20] 1518; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0,1],mem[0,1] 1519; FMACALL32_BDVER2-NEXT: addl $108, %esp ## encoding: [0x83,0xc4,0x6c] 1520; FMACALL32_BDVER2-NEXT: retl ## encoding: [0xc3] 1521 %call = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) 1522 ret <2 x double> %call 1523} 1524 1525define <2 x double> @test_v2f64_reassoc(<2 x double> %a, <2 x double> %b, <2 x double> %c) #0 { 1526; FMA32-LABEL: test_v2f64_reassoc: 1527; FMA32: ## %bb.0: 1528; FMA32-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] 1529; FMA32-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 1530; FMA32-NEXT: retl ## encoding: [0xc3] 1531; 1532; FMACALL32-LABEL: test_v2f64_reassoc: 1533; FMACALL32: ## %bb.0: 1534; FMACALL32-NEXT: vmulpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x59,0xc1] 1535; FMACALL32-NEXT: vaddpd %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc2] 1536; FMACALL32-NEXT: retl ## encoding: [0xc3] 1537; 1538; FMA64-LABEL: test_v2f64_reassoc: 1539; FMA64: ## %bb.0: 1540; FMA64-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] 1541; FMA64-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 1542; FMA64-NEXT: retq ## encoding: [0xc3] 1543; 1544; FMACALL64-LABEL: test_v2f64_reassoc: 1545; FMACALL64: ## %bb.0: 1546; FMACALL64-NEXT: mulpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x59,0xc1] 1547; FMACALL64-NEXT: addpd %xmm2, %xmm0 ## encoding: [0x66,0x0f,0x58,0xc2] 1548; FMACALL64-NEXT: retq ## encoding: [0xc3] 1549; 1550; AVX512-LABEL: test_v2f64_reassoc: 1551; AVX512: ## %bb.0: 1552; AVX512-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] 1553; AVX512-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 1554; AVX512-NEXT: retq ## encoding: [0xc3] 1555; 1556; AVX512VL-LABEL: test_v2f64_reassoc: 1557; AVX512VL: ## %bb.0: 1558; AVX512VL-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] 1559; AVX512VL-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 1560; AVX512VL-NEXT: retq ## encoding: [0xc3] 1561 %call = call reassoc <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) 1562 ret <2 x double> %call 1563} 1564 1565define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #0 { 1566; FMA32-LABEL: test_v4f64: 1567; FMA32: ## %bb.0: ## %entry 1568; FMA32-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0xc2] 1569; FMA32-NEXT: ## ymm0 = (ymm1 * ymm0) + ymm2 1570; FMA32-NEXT: retl ## encoding: [0xc3] 1571; 1572; FMA64-LABEL: test_v4f64: 1573; FMA64: ## %bb.0: ## %entry 1574; FMA64-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0xc2] 1575; FMA64-NEXT: ## ymm0 = (ymm1 * ymm0) + ymm2 1576; FMA64-NEXT: retq ## encoding: [0xc3] 1577; 1578; FMACALL64-LABEL: test_v4f64: 1579; FMACALL64: ## %bb.0: ## %entry 1580; FMACALL64-NEXT: subq $120, %rsp ## encoding: [0x48,0x83,0xec,0x78] 1581; FMACALL64-NEXT: movaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1582; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x6c,0x24,0x40] 1583; FMACALL64-NEXT: movaps %xmm4, (%rsp) ## 16-byte Spill 1584; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x24,0x24] 1585; FMACALL64-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1586; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x5c,0x24,0x30] 1587; FMACALL64-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1588; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x54,0x24,0x60] 1589; FMACALL64-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1590; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x4c,0x24,0x20] 1591; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1592; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x50] 1593; FMACALL64-NEXT: movaps %xmm2, %xmm1 ## encoding: [0x0f,0x28,0xca] 1594; FMACALL64-NEXT: movaps %xmm4, %xmm2 ## encoding: [0x0f,0x28,0xd4] 1595; FMACALL64-NEXT: callq _fma ## encoding: [0xe8,A,A,A,A] 1596; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel 1597; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1598; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x10] 1599; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 1600; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x50] 1601; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] 1602; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] 1603; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 1604; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x60] 1605; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] 1606; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] 1607; FMACALL64-NEXT: movaps (%rsp), %xmm2 ## 16-byte Reload 1608; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x14,0x24] 1609; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] 1610; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] 1611; FMACALL64-NEXT: callq _fma ## encoding: [0xe8,A,A,A,A] 1612; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel 1613; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 1614; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x10] 1615; FMACALL64-NEXT: movlhps %xmm0, %xmm1 ## encoding: [0x0f,0x16,0xc8] 1616; FMACALL64-NEXT: ## xmm1 = xmm1[0],xmm0[0] 1617; FMACALL64-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1618; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x4c,0x24,0x10] 1619; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 1620; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x20] 1621; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 1622; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x30] 1623; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload 1624; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x54,0x24,0x40] 1625; FMACALL64-NEXT: callq _fma ## encoding: [0xe8,A,A,A,A] 1626; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel 1627; FMACALL64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill 1628; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x04,0x24] 1629; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 1630; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x20] 1631; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] 1632; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] 1633; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 1634; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x30] 1635; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] 1636; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] 1637; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload 1638; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x54,0x24,0x40] 1639; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] 1640; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] 1641; FMACALL64-NEXT: callq _fma ## encoding: [0xe8,A,A,A,A] 1642; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel 1643; FMACALL64-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload 1644; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x0c,0x24] 1645; FMACALL64-NEXT: movlhps %xmm0, %xmm1 ## encoding: [0x0f,0x16,0xc8] 1646; FMACALL64-NEXT: ## xmm1 = xmm1[0],xmm0[0] 1647; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 1648; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x10] 1649; FMACALL64-NEXT: addq $120, %rsp ## encoding: [0x48,0x83,0xc4,0x78] 1650; FMACALL64-NEXT: retq ## encoding: [0xc3] 1651; 1652; AVX512-LABEL: test_v4f64: 1653; AVX512: ## %bb.0: ## %entry 1654; AVX512-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0xc2] 1655; AVX512-NEXT: ## ymm0 = (ymm1 * ymm0) + ymm2 1656; AVX512-NEXT: retq ## encoding: [0xc3] 1657; 1658; AVX512VL-LABEL: test_v4f64: 1659; AVX512VL: ## %bb.0: ## %entry 1660; AVX512VL-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2] 1661; AVX512VL-NEXT: ## ymm0 = (ymm1 * ymm0) + ymm2 1662; AVX512VL-NEXT: retq ## encoding: [0xc3] 1663; 1664; FMACALL32_BDVER2-LABEL: test_v4f64: 1665; FMACALL32_BDVER2: ## %bb.0: ## %entry 1666; FMACALL32_BDVER2-NEXT: subl $236, %esp ## encoding: [0x81,0xec,0xec,0x00,0x00,0x00] 1667; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm2, %xmm3 ## encoding: [0xc4,0xe3,0x7d,0x19,0xd3,0x01] 1668; FMACALL32_BDVER2-NEXT: vmovups %ymm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill 1669; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x11,0x94,0x24,0xc0,0x00,0x00,0x00] 1670; FMACALL32_BDVER2-NEXT: vmovups %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill 1671; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x11,0x84,0x24,0x80,0x00,0x00,0x00] 1672; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm1, %xmm2 ## encoding: [0xc4,0xe3,0x7d,0x19,0xca,0x01] 1673; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 1674; FMACALL32_BDVER2-NEXT: vmovups %ymm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill 1675; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x11,0x8c,0x24,0xa0,0x00,0x00,0x00] 1676; FMACALL32_BDVER2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill 1677; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x60] 1678; FMACALL32_BDVER2-NEXT: vmovlhps %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0xc2] 1679; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],xmm2[0] 1680; FMACALL32_BDVER2-NEXT: vmovaps %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill 1681; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x5c,0x24,0x70] 1682; FMACALL32_BDVER2-NEXT: vmovlps %xmm3, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x5c,0x24,0x10] 1683; FMACALL32_BDVER2-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill 1684; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x50] 1685; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] 1686; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1687; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] 1688; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 1689; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 1690; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x7c,0x24,0x44] 1691; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1692; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xc0,0x00,0x00,0x00] 1693; FMACALL32_BDVER2-NEXT: vmovhps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x17,0x44,0x24,0x10] 1694; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1695; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0x80,0x00,0x00,0x00] 1696; FMACALL32_BDVER2-NEXT: vunpckhpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload 1697; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf9,0x15,0x84,0x24,0xa0,0x00,0x00,0x00] 1698; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[1],mem[1] 1699; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] 1700; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1701; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] 1702; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 1703; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 1704; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x7c,0x24,0x38] 1705; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1706; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xc0,0x00,0x00,0x00] 1707; FMACALL32_BDVER2-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x44,0x24,0x10] 1708; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1709; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0x80,0x00,0x00,0x00] 1710; FMACALL32_BDVER2-NEXT: vunpcklpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload 1711; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf9,0x14,0x84,0x24,0xa0,0x00,0x00,0x00] 1712; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],mem[0] 1713; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] 1714; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1715; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] 1716; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 1717; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload 1718; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x70] 1719; FMACALL32_BDVER2-NEXT: vmovhps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x17,0x44,0x24,0x10] 1720; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload 1721; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50] 1722; FMACALL32_BDVER2-NEXT: vmovlps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload 1723; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x12,0x44,0x24,0x68] 1724; FMACALL32_BDVER2-NEXT: ## xmm0 = mem[0,1],xmm0[2,3] 1725; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] 1726; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x30] 1727; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 1728; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x6c,0x24,0x38] 1729; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x28] 1730; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 1731; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x6c,0x24,0x44] 1732; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x20] 1733; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] 1734; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 1735; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x18] 1736; FMACALL32_BDVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1737; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfb,0x10,0x44,0x24,0x30] 1738; FMACALL32_BDVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 1739; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x20] 1740; FMACALL32_BDVER2-NEXT: vmovhps {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0x44,0x24,0x28] 1741; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0,1],mem[0,1] 1742; FMACALL32_BDVER2-NEXT: vmovhps {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x16,0x4c,0x24,0x18] 1743; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0,1],mem[0,1] 1744; FMACALL32_BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 1745; FMACALL32_BDVER2-NEXT: addl $236, %esp ## encoding: [0x81,0xc4,0xec,0x00,0x00,0x00] 1746; FMACALL32_BDVER2-NEXT: retl ## encoding: [0xc3] 1747entry: 1748 %call = call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) 1749 ret <4 x double> %call 1750} 1751 1752define <8 x double> @test_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c) #0 { 1753; FMA32-LABEL: test_v8f64: 1754; FMA32: ## %bb.0: ## %entry 1755; FMA32-NEXT: pushl %ebp ## encoding: [0x55] 1756; FMA32-NEXT: movl %esp, %ebp ## encoding: [0x89,0xe5] 1757; FMA32-NEXT: andl $-32, %esp ## encoding: [0x83,0xe4,0xe0] 1758; FMA32-NEXT: subl $32, %esp ## encoding: [0x83,0xec,0x20] 1759; FMA32-NEXT: vfmadd213pd 8(%ebp), %ymm2, %ymm0 ## encoding: [0xc4,0xe2,0xed,0xa8,0x45,0x08] 1760; FMA32-NEXT: ## ymm0 = (ymm2 * ymm0) + mem 1761; FMA32-NEXT: vfmadd213pd 40(%ebp), %ymm3, %ymm1 ## encoding: [0xc4,0xe2,0xe5,0xa8,0x4d,0x28] 1762; FMA32-NEXT: ## ymm1 = (ymm3 * ymm1) + mem 1763; FMA32-NEXT: movl %ebp, %esp ## encoding: [0x89,0xec] 1764; FMA32-NEXT: popl %ebp ## encoding: [0x5d] 1765; FMA32-NEXT: retl ## encoding: [0xc3] 1766; 1767; FMA64-LABEL: test_v8f64: 1768; FMA64: ## %bb.0: ## %entry 1769; FMA64-NEXT: vfmadd213pd %ymm4, %ymm2, %ymm0 ## encoding: [0xc4,0xe2,0xed,0xa8,0xc4] 1770; FMA64-NEXT: ## ymm0 = (ymm2 * ymm0) + ymm4 1771; FMA64-NEXT: vfmadd213pd %ymm5, %ymm3, %ymm1 ## encoding: [0xc4,0xe2,0xe5,0xa8,0xcd] 1772; FMA64-NEXT: ## ymm1 = (ymm3 * ymm1) + ymm5 1773; FMA64-NEXT: retq ## encoding: [0xc3] 1774; 1775; FMACALL64-LABEL: test_v8f64: 1776; FMACALL64: ## %bb.0: ## %entry 1777; FMACALL64-NEXT: subq $152, %rsp ## encoding: [0x48,0x81,0xec,0x98,0x00,0x00,0x00] 1778; FMACALL64-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1779; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x7c,0x24,0x70] 1780; FMACALL64-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1781; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x74,0x24,0x20] 1782; FMACALL64-NEXT: movaps %xmm5, (%rsp) ## 16-byte Spill 1783; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x2c,0x24] 1784; FMACALL64-NEXT: movaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1785; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x64,0x24,0x10] 1786; FMACALL64-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1787; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x5c,0x24,0x60] 1788; FMACALL64-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1789; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x54,0x24,0x50] 1790; FMACALL64-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1791; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x4c,0x24,0x40] 1792; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1793; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x84,0x24,0x80,0x00,0x00,0x00] 1794; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xa0,0x00,0x00,0x00] 1795; FMACALL64-NEXT: movaps %xmm4, %xmm1 ## encoding: [0x0f,0x28,0xcc] 1796; FMACALL64-NEXT: callq _fma ## encoding: [0xe8,A,A,A,A] 1797; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel 1798; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1799; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x30] 1800; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 1801; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x84,0x24,0x80,0x00,0x00,0x00] 1802; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] 1803; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] 1804; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 1805; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x10] 1806; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] 1807; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] 1808; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xa0,0x00,0x00,0x00] 1809; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] 1810; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] 1811; FMACALL64-NEXT: callq _fma ## encoding: [0xe8,A,A,A,A] 1812; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel 1813; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 1814; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x30] 1815; FMACALL64-NEXT: movlhps %xmm0, %xmm1 ## encoding: [0x0f,0x16,0xc8] 1816; FMACALL64-NEXT: ## xmm1 = xmm1[0],xmm0[0] 1817; FMACALL64-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1818; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x4c,0x24,0x30] 1819; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 1820; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x40] 1821; FMACALL64-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload 1822; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x0c,0x24] 1823; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xb0,0x00,0x00,0x00] 1824; FMACALL64-NEXT: callq _fma ## encoding: [0xe8,A,A,A,A] 1825; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel 1826; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1827; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x10] 1828; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 1829; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x40] 1830; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] 1831; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] 1832; FMACALL64-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload 1833; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x0c,0x24] 1834; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] 1835; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] 1836; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xb0,0x00,0x00,0x00] 1837; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] 1838; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] 1839; FMACALL64-NEXT: callq _fma ## encoding: [0xe8,A,A,A,A] 1840; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel 1841; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 1842; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x10] 1843; FMACALL64-NEXT: movlhps %xmm0, %xmm1 ## encoding: [0x0f,0x16,0xc8] 1844; FMACALL64-NEXT: ## xmm1 = xmm1[0],xmm0[0] 1845; FMACALL64-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1846; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x4c,0x24,0x10] 1847; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 1848; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x50] 1849; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 1850; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x20] 1851; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xc0,0x00,0x00,0x00] 1852; FMACALL64-NEXT: callq _fma ## encoding: [0xe8,A,A,A,A] 1853; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel 1854; FMACALL64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill 1855; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x04,0x24] 1856; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 1857; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x50] 1858; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] 1859; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] 1860; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 1861; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x20] 1862; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] 1863; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] 1864; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xc0,0x00,0x00,0x00] 1865; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] 1866; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] 1867; FMACALL64-NEXT: callq _fma ## encoding: [0xe8,A,A,A,A] 1868; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel 1869; FMACALL64-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload 1870; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x0c,0x24] 1871; FMACALL64-NEXT: movlhps %xmm0, %xmm1 ## encoding: [0x0f,0x16,0xc8] 1872; FMACALL64-NEXT: ## xmm1 = xmm1[0],xmm0[0] 1873; FMACALL64-NEXT: movaps %xmm1, (%rsp) ## 16-byte Spill 1874; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x0c,0x24] 1875; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 1876; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x60] 1877; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 1878; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x70] 1879; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xd0,0x00,0x00,0x00] 1880; FMACALL64-NEXT: callq _fma ## encoding: [0xe8,A,A,A,A] 1881; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel 1882; FMACALL64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill 1883; FMACALL64-NEXT: ## encoding: [0x0f,0x29,0x44,0x24,0x20] 1884; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 1885; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x60] 1886; FMACALL64-NEXT: movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0] 1887; FMACALL64-NEXT: ## xmm0 = xmm0[1,1] 1888; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 1889; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x70] 1890; FMACALL64-NEXT: movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9] 1891; FMACALL64-NEXT: ## xmm1 = xmm1[1,1] 1892; FMACALL64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xd0,0x00,0x00,0x00] 1893; FMACALL64-NEXT: movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2] 1894; FMACALL64-NEXT: ## xmm2 = xmm2[1,1] 1895; FMACALL64-NEXT: callq _fma ## encoding: [0xe8,A,A,A,A] 1896; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel 1897; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 ## 16-byte Reload 1898; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x5c,0x24,0x20] 1899; FMACALL64-NEXT: movlhps %xmm0, %xmm3 ## encoding: [0x0f,0x16,0xd8] 1900; FMACALL64-NEXT: ## xmm3 = xmm3[0],xmm0[0] 1901; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload 1902; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x30] 1903; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload 1904; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x10] 1905; FMACALL64-NEXT: movaps (%rsp), %xmm2 ## 16-byte Reload 1906; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x14,0x24] 1907; FMACALL64-NEXT: addq $152, %rsp ## encoding: [0x48,0x81,0xc4,0x98,0x00,0x00,0x00] 1908; FMACALL64-NEXT: retq ## encoding: [0xc3] 1909; 1910; AVX512-LABEL: test_v8f64: 1911; AVX512: ## %bb.0: ## %entry 1912; AVX512-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2] 1913; AVX512-NEXT: ## zmm0 = (zmm1 * zmm0) + zmm2 1914; AVX512-NEXT: retq ## encoding: [0xc3] 1915; 1916; AVX512VL-LABEL: test_v8f64: 1917; AVX512VL: ## %bb.0: ## %entry 1918; AVX512VL-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2] 1919; AVX512VL-NEXT: ## zmm0 = (zmm1 * zmm0) + zmm2 1920; AVX512VL-NEXT: retq ## encoding: [0xc3] 1921; 1922; FMACALL32_BDVER2-LABEL: test_v8f64: 1923; FMACALL32_BDVER2: ## %bb.0: ## %entry 1924; FMACALL32_BDVER2-NEXT: pushl %ebp ## encoding: [0x55] 1925; FMACALL32_BDVER2-NEXT: movl %esp, %ebp ## encoding: [0x89,0xe5] 1926; FMACALL32_BDVER2-NEXT: andl $-32, %esp ## encoding: [0x83,0xe4,0xe0] 1927; FMACALL32_BDVER2-NEXT: subl $352, %esp ## encoding: [0x81,0xec,0x60,0x01,0x00,0x00] 1928; FMACALL32_BDVER2-NEXT: ## imm = 0x160 1929; FMACALL32_BDVER2-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill 1930; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x29,0x84,0x24,0xe0,0x00,0x00,0x00] 1931; FMACALL32_BDVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1932; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfb,0x10,0x45,0x38] 1933; FMACALL32_BDVER2-NEXT: vmovaps %ymm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill 1934; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x29,0x94,0x24,0x00,0x01,0x00,0x00] 1935; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm3, %xmm2 ## encoding: [0xc4,0xe3,0x7d,0x19,0xda,0x01] 1936; FMACALL32_BDVER2-NEXT: vmovaps %ymm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill 1937; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x29,0x9c,0x24,0xc0,0x00,0x00,0x00] 1938; FMACALL32_BDVER2-NEXT: vmovaps %ymm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill 1939; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x29,0x8c,0x24,0xa0,0x00,0x00,0x00] 1940; FMACALL32_BDVER2-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill 1941; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x94,0x24,0x30,0x01,0x00,0x00] 1942; FMACALL32_BDVER2-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10] 1943; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm1, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc8,0x01] 1944; FMACALL32_BDVER2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill 1945; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x84,0x24,0x40,0x01,0x00,0x00] 1946; FMACALL32_BDVER2-NEXT: vmovlhps %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0xc2] 1947; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],xmm2[0] 1948; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] 1949; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1950; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] 1951; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 1952; FMACALL32_BDVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1953; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfb,0x10,0x45,0x30] 1954; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 1955; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x94,0x00,0x00,0x00] 1956; FMACALL32_BDVER2-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10] 1957; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1958; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xa0,0x00,0x00,0x00] 1959; FMACALL32_BDVER2-NEXT: vunpckhpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload 1960; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf9,0x15,0x84,0x24,0xc0,0x00,0x00,0x00] 1961; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[1],mem[1] 1962; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] 1963; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1964; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] 1965; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 1966; FMACALL32_BDVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1967; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfb,0x10,0x45,0x28] 1968; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 1969; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x88,0x00,0x00,0x00] 1970; FMACALL32_BDVER2-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10] 1971; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1972; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xa0,0x00,0x00,0x00] 1973; FMACALL32_BDVER2-NEXT: vunpcklpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload 1974; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf9,0x14,0x84,0x24,0xc0,0x00,0x00,0x00] 1975; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],mem[0] 1976; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] 1977; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1978; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] 1979; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 1980; FMACALL32_BDVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1981; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfb,0x10,0x45,0x20] 1982; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 1983; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0xc0,0x00,0x00,0x00] 1984; FMACALL32_BDVER2-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10] 1985; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1986; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x00,0x01,0x00,0x00] 1987; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm1 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01] 1988; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 1989; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xe0,0x00,0x00,0x00] 1990; FMACALL32_BDVER2-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill 1991; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x4c,0x24,0x20] 1992; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 1993; FMACALL32_BDVER2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill 1994; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x30] 1995; FMACALL32_BDVER2-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x15,0xc1] 1996; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[1],xmm1[1] 1997; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] 1998; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1999; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] 2000; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 2001; FMACALL32_BDVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2002; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfb,0x10,0x45,0x18] 2003; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 2004; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0xa0,0x00,0x00,0x00] 2005; FMACALL32_BDVER2-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10] 2006; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload 2007; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x30] 2008; FMACALL32_BDVER2-NEXT: vunpcklpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload 2009; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf9,0x14,0x44,0x24,0x20] 2010; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],mem[0] 2011; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] 2012; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] 2013; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 2014; FMACALL32_BDVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2015; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfb,0x10,0x45,0x10] 2016; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 2017; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x7c,0x24,0x30] 2018; FMACALL32_BDVER2-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10] 2019; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 2020; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xe0,0x00,0x00,0x00] 2021; FMACALL32_BDVER2-NEXT: vunpckhpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload 2022; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf9,0x15,0x84,0x24,0x00,0x01,0x00,0x00] 2023; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[1],mem[1] 2024; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] 2025; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 2026; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] 2027; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 2028; FMACALL32_BDVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2029; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfb,0x10,0x45,0x08] 2030; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill 2031; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x7c,0x24,0x20] 2032; FMACALL32_BDVER2-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10] 2033; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload 2034; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xe0,0x00,0x00,0x00] 2035; FMACALL32_BDVER2-NEXT: vunpcklpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload 2036; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf9,0x14,0x84,0x24,0x00,0x01,0x00,0x00] 2037; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],mem[0] 2038; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] 2039; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 2040; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] 2041; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 2042; FMACALL32_BDVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2043; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfb,0x10,0x45,0x40] 2044; FMACALL32_BDVER2-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10] 2045; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload 2046; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0x30,0x01,0x00,0x00] 2047; FMACALL32_BDVER2-NEXT: vmovlps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload 2048; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x12,0x84,0x24,0x48,0x01,0x00,0x00] 2049; FMACALL32_BDVER2-NEXT: ## xmm0 = mem[0,1],xmm0[2,3] 2050; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] 2051; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x60] 2052; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 2053; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x6c,0x24,0x20] 2054; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x58] 2055; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 2056; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x6c,0x24,0x30] 2057; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x50] 2058; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 2059; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0xa0,0x00,0x00,0x00] 2060; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x48] 2061; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 2062; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0xc0,0x00,0x00,0x00] 2063; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x9c,0x24,0x80,0x00,0x00,0x00] 2064; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 2065; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x88,0x00,0x00,0x00] 2066; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x78] 2067; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload 2068; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x94,0x00,0x00,0x00] 2069; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x70] 2070; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] 2071; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 2072; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x68] 2073; FMACALL32_BDVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2074; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfb,0x10,0x44,0x24,0x60] 2075; FMACALL32_BDVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 2076; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x50] 2077; FMACALL32_BDVER2-NEXT: vmovhps {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0x44,0x24,0x58] 2078; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0,1],mem[0,1] 2079; FMACALL32_BDVER2-NEXT: vmovhps {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x16,0x4c,0x24,0x48] 2080; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0,1],mem[0,1] 2081; FMACALL32_BDVER2-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 2082; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfb,0x10,0x54,0x24,0x70] 2083; FMACALL32_BDVER2-NEXT: vmovhps {{[0-9]+}}(%esp), %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x16,0x54,0x24,0x68] 2084; FMACALL32_BDVER2-NEXT: ## xmm2 = xmm2[0,1],mem[0,1] 2085; FMACALL32_BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 2086; FMACALL32_BDVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 2087; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfb,0x10,0x8c,0x24,0x80,0x00,0x00,0x00] 2088; FMACALL32_BDVER2-NEXT: vmovhps {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x16,0x4c,0x24,0x78] 2089; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0,1],mem[0,1] 2090; FMACALL32_BDVER2-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ## encoding: [0xc4,0xe3,0x75,0x18,0xca,0x01] 2091; FMACALL32_BDVER2-NEXT: movl %ebp, %esp ## encoding: [0x89,0xec] 2092; FMACALL32_BDVER2-NEXT: popl %ebp ## encoding: [0x5d] 2093; FMACALL32_BDVER2-NEXT: retl ## encoding: [0xc3] 2094entry: 2095 %call = call <8 x double> @llvm.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c) 2096 ret <8 x double> %call 2097} 2098 2099define float @constant_fold_f32() { 2100; FMA32-LABEL: constant_fold_f32: 2101; FMA32: ## %bb.0: 2102; FMA32-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} ## encoding: [0xd9,0x05,A,A,A,A] 2103; FMA32-NEXT: ## fixup A - offset: 2, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 2104; FMA32-NEXT: retl ## encoding: [0xc3] 2105; 2106; FMACALL32-LABEL: constant_fold_f32: 2107; FMACALL32: ## %bb.0: 2108; FMACALL32-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} ## encoding: [0xd9,0x05,A,A,A,A] 2109; FMACALL32-NEXT: ## fixup A - offset: 2, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 2110; FMACALL32-NEXT: retl ## encoding: [0xc3] 2111; 2112; FMA64-LABEL: constant_fold_f32: 2113; FMA64: ## %bb.0: 2114; FMA64-NEXT: vmovss {{.*#+}} xmm0 = [1.02E+3,0.0E+0,0.0E+0,0.0E+0] 2115; FMA64-NEXT: ## encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A] 2116; FMA64-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 2117; FMA64-NEXT: retq ## encoding: [0xc3] 2118; 2119; FMACALL64-LABEL: constant_fold_f32: 2120; FMACALL64: ## %bb.0: 2121; FMACALL64-NEXT: movss {{.*#+}} xmm0 = [1.02E+3,0.0E+0,0.0E+0,0.0E+0] 2122; FMACALL64-NEXT: ## encoding: [0xf3,0x0f,0x10,0x05,A,A,A,A] 2123; FMACALL64-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 2124; FMACALL64-NEXT: retq ## encoding: [0xc3] 2125; 2126; AVX512-LABEL: constant_fold_f32: 2127; AVX512: ## %bb.0: 2128; AVX512-NEXT: vmovss {{.*#+}} xmm0 = [1.02E+3,0.0E+0,0.0E+0,0.0E+0] 2129; AVX512-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A] 2130; AVX512-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 2131; AVX512-NEXT: retq ## encoding: [0xc3] 2132; 2133; AVX512VL-LABEL: constant_fold_f32: 2134; AVX512VL: ## %bb.0: 2135; AVX512VL-NEXT: vmovss {{.*#+}} xmm0 = [1.02E+3,0.0E+0,0.0E+0,0.0E+0] 2136; AVX512VL-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A] 2137; AVX512VL-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 2138; AVX512VL-NEXT: retq ## encoding: [0xc3] 2139 %r = call float @llvm.fma.f32(float 5.000000e+01, float 2.000000e+01, float 2.000000e+01) 2140 ret float %r 2141} 2142 2143define <4 x float> @constant_fold_v4f32() { 2144; FMA32-LABEL: constant_fold_v4f32: 2145; FMA32: ## %bb.0: 2146; FMA32-NEXT: vmovaps {{.*#+}} xmm0 = [0.0E+0,4.9E+2,1.18E+3,2.07E+3] 2147; FMA32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] 2148; FMA32-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 2149; FMA32-NEXT: retl ## encoding: [0xc3] 2150; 2151; FMACALL32-LABEL: constant_fold_v4f32: 2152; FMACALL32: ## %bb.0: 2153; FMACALL32-NEXT: vmovaps {{.*#+}} xmm0 = [0.0E+0,4.9E+2,1.18E+3,2.07E+3] 2154; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] 2155; FMACALL32-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 2156; FMACALL32-NEXT: retl ## encoding: [0xc3] 2157; 2158; FMA64-LABEL: constant_fold_v4f32: 2159; FMA64: ## %bb.0: 2160; FMA64-NEXT: vmovaps {{.*#+}} xmm0 = [0.0E+0,4.9E+2,1.18E+3,2.07E+3] 2161; FMA64-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] 2162; FMA64-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 2163; FMA64-NEXT: retq ## encoding: [0xc3] 2164; 2165; FMACALL64-LABEL: constant_fold_v4f32: 2166; FMACALL64: ## %bb.0: 2167; FMACALL64-NEXT: movaps {{.*#+}} xmm0 = [0.0E+0,4.9E+2,1.18E+3,2.07E+3] 2168; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A] 2169; FMACALL64-NEXT: ## fixup A - offset: 3, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 2170; FMACALL64-NEXT: retq ## encoding: [0xc3] 2171; 2172; AVX512-LABEL: constant_fold_v4f32: 2173; AVX512: ## %bb.0: 2174; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [0.0E+0,4.9E+2,1.18E+3,2.07E+3] 2175; AVX512-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] 2176; AVX512-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 2177; AVX512-NEXT: retq ## encoding: [0xc3] 2178; 2179; AVX512VL-LABEL: constant_fold_v4f32: 2180; AVX512VL: ## %bb.0: 2181; AVX512VL-NEXT: vmovaps {{.*#+}} xmm0 = [0.0E+0,4.9E+2,1.18E+3,2.07E+3] 2182; AVX512VL-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] 2183; AVX512VL-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 2184; AVX512VL-NEXT: retq ## encoding: [0xc3] 2185 %r = call <4 x float> @llvm.fma.v4f32(<4 x float> <float 0.000000e+01, float 1.000000e+01, float 2.000000e+01, float 3.000000e+01>, <4 x float> <float 4.000000e+01, float 5.000000e+01, float 6.000000e+01, float 7.000000e+01>, <4 x float> <float 0.000000e+01, float -1.000000e+01, float -2.000000e+01, float -3.000000e+01>) 2186 ret <4 x float> %r 2187} 2188 2189define <2 x double> @constant_fold_v2f64() { 2190; FMA32-LABEL: constant_fold_v2f64: 2191; FMA32: ## %bb.0: 2192; FMA32-NEXT: vmovaps {{.*#+}} xmm0 = [4.1E+2,1.4E+3] 2193; FMA32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] 2194; FMA32-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 2195; FMA32-NEXT: retl ## encoding: [0xc3] 2196; 2197; FMACALL32-LABEL: constant_fold_v2f64: 2198; FMACALL32: ## %bb.0: 2199; FMACALL32-NEXT: vmovaps {{.*#+}} xmm0 = [4.1E+2,1.4E+3] 2200; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] 2201; FMACALL32-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 2202; FMACALL32-NEXT: retl ## encoding: [0xc3] 2203; 2204; FMA64-LABEL: constant_fold_v2f64: 2205; FMA64: ## %bb.0: 2206; FMA64-NEXT: vmovaps {{.*#+}} xmm0 = [4.1E+2,1.4E+3] 2207; FMA64-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] 2208; FMA64-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 2209; FMA64-NEXT: retq ## encoding: [0xc3] 2210; 2211; FMACALL64-LABEL: constant_fold_v2f64: 2212; FMACALL64: ## %bb.0: 2213; FMACALL64-NEXT: movaps {{.*#+}} xmm0 = [4.1E+2,1.4E+3] 2214; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A] 2215; FMACALL64-NEXT: ## fixup A - offset: 3, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 2216; FMACALL64-NEXT: retq ## encoding: [0xc3] 2217; 2218; AVX512-LABEL: constant_fold_v2f64: 2219; AVX512: ## %bb.0: 2220; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [4.1E+2,1.4E+3] 2221; AVX512-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] 2222; AVX512-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 2223; AVX512-NEXT: retq ## encoding: [0xc3] 2224; 2225; AVX512VL-LABEL: constant_fold_v2f64: 2226; AVX512VL: ## %bb.0: 2227; AVX512VL-NEXT: vmovaps {{.*#+}} xmm0 = [4.1E+2,1.4E+3] 2228; AVX512VL-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] 2229; AVX512VL-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 2230; AVX512VL-NEXT: retq ## encoding: [0xc3] 2231 %r = call <2 x double> @llvm.fma.v2f64(<2 x double> <double 1.000000e+01, double 2.000000e+01>, <2 x double> <double 4.000000e+01, double 7.000000e+01>, <2 x double> <double 1.000000e+01, double 0.000000e+01>) 2232 ret <2 x double> %r 2233} 2234 2235declare float @llvm.fma.f32(float, float, float) 2236declare double @llvm.fma.f64(double, double, double) 2237declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80) 2238 2239declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) 2240declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) 2241declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>) 2242 2243declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) 2244declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) 2245declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>) 2246 2247attributes #0 = { nounwind } 2248