1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX1OR2,X86-AVX1 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX1OR2,X86-AVX2 5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX512,X86-AVX512VL 6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX512,X86-AVX512FP16 7; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX512,X86-AVX512VLDQ 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X64,X64-SSE 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1OR2,X64-AVX1 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1OR2,X64-AVX2 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512VL 12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512FP16 13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512VLDQ 14 15; 16; 128-bit Vectors 17; 18 19define <2 x double> @fabs_v2f64(<2 x double> %p) nounwind { 20; X86-SSE-LABEL: fabs_v2f64: 21; X86-SSE: # %bb.0: 22; X86-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 23; X86-SSE-NEXT: retl 24; 25; X86-AVX1OR2-LABEL: fabs_v2f64: 26; X86-AVX1OR2: # %bb.0: 27; X86-AVX1OR2-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 28; X86-AVX1OR2-NEXT: retl 29; 30; X86-AVX512VL-LABEL: fabs_v2f64: 31; X86-AVX512VL: # %bb.0: 32; X86-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0 33; X86-AVX512VL-NEXT: retl 34; 35; X86-AVX512FP16-LABEL: fabs_v2f64: 36; X86-AVX512FP16: # %bb.0: 37; X86-AVX512FP16-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0 38; X86-AVX512FP16-NEXT: retl 39; 40; X86-AVX512VLDQ-LABEL: fabs_v2f64: 41; X86-AVX512VLDQ: # %bb.0: 42; X86-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0 43; X86-AVX512VLDQ-NEXT: retl 44; 45; X64-SSE-LABEL: fabs_v2f64: 46; X64-SSE: # %bb.0: 47; X64-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 48; X64-SSE-NEXT: retq 49; 50; X64-AVX1OR2-LABEL: fabs_v2f64: 51; X64-AVX1OR2: # %bb.0: 52; X64-AVX1OR2-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 53; X64-AVX1OR2-NEXT: retq 54; 55; X64-AVX512VL-LABEL: fabs_v2f64: 56; X64-AVX512VL: # %bb.0: 57; X64-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 58; X64-AVX512VL-NEXT: retq 59; 60; X64-AVX512FP16-LABEL: fabs_v2f64: 61; X64-AVX512FP16: # %bb.0: 62; X64-AVX512FP16-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 63; X64-AVX512FP16-NEXT: retq 64; 65; X64-AVX512VLDQ-LABEL: fabs_v2f64: 66; X64-AVX512VLDQ: # %bb.0: 67; X64-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 68; X64-AVX512VLDQ-NEXT: retq 69 %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p) 70 ret <2 x double> %t 71} 72declare <2 x double> @llvm.fabs.v2f64(<2 x double> %p) 73 74define <4 x float> @fabs_v4f32(<4 x float> %p) nounwind { 75; X86-SSE-LABEL: fabs_v4f32: 76; X86-SSE: # %bb.0: 77; X86-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 78; X86-SSE-NEXT: retl 79; 80; X86-AVX1-LABEL: fabs_v4f32: 81; X86-AVX1: # %bb.0: 82; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 83; X86-AVX1-NEXT: retl 84; 85; X86-AVX2-LABEL: fabs_v4f32: 86; X86-AVX2: # %bb.0: 87; X86-AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] 88; X86-AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0 89; X86-AVX2-NEXT: retl 90; 91; X86-AVX512VL-LABEL: fabs_v4f32: 92; X86-AVX512VL: # %bb.0: 93; X86-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0 94; X86-AVX512VL-NEXT: retl 95; 96; X86-AVX512FP16-LABEL: fabs_v4f32: 97; X86-AVX512FP16: # %bb.0: 98; X86-AVX512FP16-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0 99; X86-AVX512FP16-NEXT: retl 100; 101; X86-AVX512VLDQ-LABEL: fabs_v4f32: 102; X86-AVX512VLDQ: # %bb.0: 103; X86-AVX512VLDQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0 104; X86-AVX512VLDQ-NEXT: retl 105; 106; X64-SSE-LABEL: fabs_v4f32: 107; X64-SSE: # %bb.0: 108; X64-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 109; X64-SSE-NEXT: retq 110; 111; X64-AVX1-LABEL: fabs_v4f32: 112; X64-AVX1: # %bb.0: 113; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 114; X64-AVX1-NEXT: retq 115; 116; X64-AVX2-LABEL: fabs_v4f32: 117; X64-AVX2: # %bb.0: 118; X64-AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] 119; X64-AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0 120; X64-AVX2-NEXT: retq 121; 122; X64-AVX512VL-LABEL: fabs_v4f32: 123; X64-AVX512VL: # %bb.0: 124; X64-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 125; X64-AVX512VL-NEXT: retq 126; 127; X64-AVX512FP16-LABEL: fabs_v4f32: 128; X64-AVX512FP16: # %bb.0: 129; X64-AVX512FP16-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 130; X64-AVX512FP16-NEXT: retq 131; 132; X64-AVX512VLDQ-LABEL: fabs_v4f32: 133; X64-AVX512VLDQ: # %bb.0: 134; X64-AVX512VLDQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 135; X64-AVX512VLDQ-NEXT: retq 136 %t = call <4 x float> @llvm.fabs.v4f32(<4 x float> %p) 137 ret <4 x float> %t 138} 139declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p) 140 141define <8 x half> @fabs_v8f16(ptr %p) nounwind { 142; X86-SSE-LABEL: fabs_v8f16: 143; X86-SSE: # %bb.0: 144; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 145; X86-SSE-NEXT: movaps (%eax), %xmm0 146; X86-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 147; X86-SSE-NEXT: retl 148; 149; X86-AVX1-LABEL: fabs_v8f16: 150; X86-AVX1: # %bb.0: 151; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax 152; X86-AVX1-NEXT: vmovaps (%eax), %xmm0 153; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 154; X86-AVX1-NEXT: retl 155; 156; X86-AVX2-LABEL: fabs_v8f16: 157; X86-AVX2: # %bb.0: 158; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 159; X86-AVX2-NEXT: vpbroadcastw {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 160; X86-AVX2-NEXT: vpand (%eax), %xmm0, %xmm0 161; X86-AVX2-NEXT: retl 162; 163; X86-AVX512-LABEL: fabs_v8f16: 164; X86-AVX512: # %bb.0: 165; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax 166; X86-AVX512-NEXT: vpbroadcastw {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 167; X86-AVX512-NEXT: vpand (%eax), %xmm0, %xmm0 168; X86-AVX512-NEXT: retl 169; 170; X64-SSE-LABEL: fabs_v8f16: 171; X64-SSE: # %bb.0: 172; X64-SSE-NEXT: movaps (%rdi), %xmm0 173; X64-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 174; X64-SSE-NEXT: retq 175; 176; X64-AVX1-LABEL: fabs_v8f16: 177; X64-AVX1: # %bb.0: 178; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 179; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 180; X64-AVX1-NEXT: retq 181; 182; X64-AVX2-LABEL: fabs_v8f16: 183; X64-AVX2: # %bb.0: 184; X64-AVX2-NEXT: vpbroadcastw {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 185; X64-AVX2-NEXT: vpand (%rdi), %xmm0, %xmm0 186; X64-AVX2-NEXT: retq 187; 188; X64-AVX512-LABEL: fabs_v8f16: 189; X64-AVX512: # %bb.0: 190; X64-AVX512-NEXT: vpbroadcastw {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 191; X64-AVX512-NEXT: vpand (%rdi), %xmm0, %xmm0 192; X64-AVX512-NEXT: retq 193 %v = load <8 x half>, ptr %p, align 16 194 %nnv = call <8 x half> @llvm.fabs.v8f16(<8 x half> %v) 195 ret <8 x half> %nnv 196} 197declare <8 x half> @llvm.fabs.v8f16(<8 x half> %p) 198 199; 200; 256-bit Vectors 201; 202 203define <4 x double> @fabs_v4f64(<4 x double> %p) nounwind { 204; X86-SSE-LABEL: fabs_v4f64: 205; X86-SSE: # %bb.0: 206; X86-SSE-NEXT: movaps {{.*#+}} xmm2 = [NaN,NaN] 207; X86-SSE-NEXT: andps %xmm2, %xmm0 208; X86-SSE-NEXT: andps %xmm2, %xmm1 209; X86-SSE-NEXT: retl 210; 211; X86-AVX1-LABEL: fabs_v4f64: 212; X86-AVX1: # %bb.0: 213; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 214; X86-AVX1-NEXT: retl 215; 216; X86-AVX2-LABEL: fabs_v4f64: 217; X86-AVX2: # %bb.0: 218; X86-AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN] 219; X86-AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0 220; X86-AVX2-NEXT: retl 221; 222; X86-AVX512VL-LABEL: fabs_v4f64: 223; X86-AVX512VL: # %bb.0: 224; X86-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm0, %ymm0 225; X86-AVX512VL-NEXT: retl 226; 227; X86-AVX512FP16-LABEL: fabs_v4f64: 228; X86-AVX512FP16: # %bb.0: 229; X86-AVX512FP16-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm0, %ymm0 230; X86-AVX512FP16-NEXT: retl 231; 232; X86-AVX512VLDQ-LABEL: fabs_v4f64: 233; X86-AVX512VLDQ: # %bb.0: 234; X86-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm0, %ymm0 235; X86-AVX512VLDQ-NEXT: retl 236; 237; X64-SSE-LABEL: fabs_v4f64: 238; X64-SSE: # %bb.0: 239; X64-SSE-NEXT: movaps {{.*#+}} xmm2 = [NaN,NaN] 240; X64-SSE-NEXT: andps %xmm2, %xmm0 241; X64-SSE-NEXT: andps %xmm2, %xmm1 242; X64-SSE-NEXT: retq 243; 244; X64-AVX1-LABEL: fabs_v4f64: 245; X64-AVX1: # %bb.0: 246; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 247; X64-AVX1-NEXT: retq 248; 249; X64-AVX2-LABEL: fabs_v4f64: 250; X64-AVX2: # %bb.0: 251; X64-AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN] 252; X64-AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0 253; X64-AVX2-NEXT: retq 254; 255; X64-AVX512VL-LABEL: fabs_v4f64: 256; X64-AVX512VL: # %bb.0: 257; X64-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 258; X64-AVX512VL-NEXT: retq 259; 260; X64-AVX512FP16-LABEL: fabs_v4f64: 261; X64-AVX512FP16: # %bb.0: 262; X64-AVX512FP16-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 263; X64-AVX512FP16-NEXT: retq 264; 265; X64-AVX512VLDQ-LABEL: fabs_v4f64: 266; X64-AVX512VLDQ: # %bb.0: 267; X64-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 268; X64-AVX512VLDQ-NEXT: retq 269 %t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p) 270 ret <4 x double> %t 271} 272declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p) 273 274define <8 x float> @fabs_v8f32(<8 x float> %p) nounwind { 275; X86-SSE-LABEL: fabs_v8f32: 276; X86-SSE: # %bb.0: 277; X86-SSE-NEXT: movaps {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN] 278; X86-SSE-NEXT: andps %xmm2, %xmm0 279; X86-SSE-NEXT: andps %xmm2, %xmm1 280; X86-SSE-NEXT: retl 281; 282; X86-AVX1-LABEL: fabs_v8f32: 283; X86-AVX1: # %bb.0: 284; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 285; X86-AVX1-NEXT: retl 286; 287; X86-AVX2-LABEL: fabs_v8f32: 288; X86-AVX2: # %bb.0: 289; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 290; X86-AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0 291; X86-AVX2-NEXT: retl 292; 293; X86-AVX512VL-LABEL: fabs_v8f32: 294; X86-AVX512VL: # %bb.0: 295; X86-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0 296; X86-AVX512VL-NEXT: retl 297; 298; X86-AVX512FP16-LABEL: fabs_v8f32: 299; X86-AVX512FP16: # %bb.0: 300; X86-AVX512FP16-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0 301; X86-AVX512FP16-NEXT: retl 302; 303; X86-AVX512VLDQ-LABEL: fabs_v8f32: 304; X86-AVX512VLDQ: # %bb.0: 305; X86-AVX512VLDQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0 306; X86-AVX512VLDQ-NEXT: retl 307; 308; X64-SSE-LABEL: fabs_v8f32: 309; X64-SSE: # %bb.0: 310; X64-SSE-NEXT: movaps {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN] 311; X64-SSE-NEXT: andps %xmm2, %xmm0 312; X64-SSE-NEXT: andps %xmm2, %xmm1 313; X64-SSE-NEXT: retq 314; 315; X64-AVX1-LABEL: fabs_v8f32: 316; X64-AVX1: # %bb.0: 317; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 318; X64-AVX1-NEXT: retq 319; 320; X64-AVX2-LABEL: fabs_v8f32: 321; X64-AVX2: # %bb.0: 322; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 323; X64-AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0 324; X64-AVX2-NEXT: retq 325; 326; X64-AVX512VL-LABEL: fabs_v8f32: 327; X64-AVX512VL: # %bb.0: 328; X64-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 329; X64-AVX512VL-NEXT: retq 330; 331; X64-AVX512FP16-LABEL: fabs_v8f32: 332; X64-AVX512FP16: # %bb.0: 333; X64-AVX512FP16-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 334; X64-AVX512FP16-NEXT: retq 335; 336; X64-AVX512VLDQ-LABEL: fabs_v8f32: 337; X64-AVX512VLDQ: # %bb.0: 338; X64-AVX512VLDQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 339; X64-AVX512VLDQ-NEXT: retq 340 %t = call <8 x float> @llvm.fabs.v8f32(<8 x float> %p) 341 ret <8 x float> %t 342} 343declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p) 344 345define <16 x half> @fabs_v16f16(ptr %p) nounwind { 346; X86-SSE-LABEL: fabs_v16f16: 347; X86-SSE: # %bb.0: 348; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 349; X86-SSE-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 350; X86-SSE-NEXT: movaps (%eax), %xmm0 351; X86-SSE-NEXT: andps %xmm1, %xmm0 352; X86-SSE-NEXT: andps 16(%eax), %xmm1 353; X86-SSE-NEXT: retl 354; 355; X86-AVX1-LABEL: fabs_v16f16: 356; X86-AVX1: # %bb.0: 357; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax 358; X86-AVX1-NEXT: vmovaps (%eax), %ymm0 359; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 360; X86-AVX1-NEXT: retl 361; 362; X86-AVX2-LABEL: fabs_v16f16: 363; X86-AVX2: # %bb.0: 364; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 365; X86-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 366; X86-AVX2-NEXT: vpand (%eax), %ymm0, %ymm0 367; X86-AVX2-NEXT: retl 368; 369; X86-AVX512-LABEL: fabs_v16f16: 370; X86-AVX512: # %bb.0: 371; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax 372; X86-AVX512-NEXT: vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 373; X86-AVX512-NEXT: vpand (%eax), %ymm0, %ymm0 374; X86-AVX512-NEXT: retl 375; 376; X64-SSE-LABEL: fabs_v16f16: 377; X64-SSE: # %bb.0: 378; X64-SSE-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 379; X64-SSE-NEXT: movaps (%rdi), %xmm0 380; X64-SSE-NEXT: andps %xmm1, %xmm0 381; X64-SSE-NEXT: andps 16(%rdi), %xmm1 382; X64-SSE-NEXT: retq 383; 384; X64-AVX1-LABEL: fabs_v16f16: 385; X64-AVX1: # %bb.0: 386; X64-AVX1-NEXT: vmovaps (%rdi), %ymm0 387; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 388; X64-AVX1-NEXT: retq 389; 390; X64-AVX2-LABEL: fabs_v16f16: 391; X64-AVX2: # %bb.0: 392; X64-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 393; X64-AVX2-NEXT: vpand (%rdi), %ymm0, %ymm0 394; X64-AVX2-NEXT: retq 395; 396; X64-AVX512-LABEL: fabs_v16f16: 397; X64-AVX512: # %bb.0: 398; X64-AVX512-NEXT: vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 399; X64-AVX512-NEXT: vpand (%rdi), %ymm0, %ymm0 400; X64-AVX512-NEXT: retq 401 %v = load <16 x half>, ptr %p, align 32 402 %nnv = call <16 x half> @llvm.fabs.v16f16(<16 x half> %v) 403 ret <16 x half> %nnv 404} 405declare <16 x half> @llvm.fabs.v16f16(<16 x half> %p) 406 407; 408; 512-bit Vectors 409; 410 411define <8 x double> @fabs_v8f64(<8 x double> %p) nounwind { 412; X86-SSE-LABEL: fabs_v8f64: 413; X86-SSE: # %bb.0: 414; X86-SSE-NEXT: pushl %ebp 415; X86-SSE-NEXT: movl %esp, %ebp 416; X86-SSE-NEXT: andl $-16, %esp 417; X86-SSE-NEXT: subl $16, %esp 418; X86-SSE-NEXT: movaps {{.*#+}} xmm3 = [NaN,NaN] 419; X86-SSE-NEXT: andps %xmm3, %xmm0 420; X86-SSE-NEXT: andps %xmm3, %xmm1 421; X86-SSE-NEXT: andps %xmm3, %xmm2 422; X86-SSE-NEXT: andps 8(%ebp), %xmm3 423; X86-SSE-NEXT: movl %ebp, %esp 424; X86-SSE-NEXT: popl %ebp 425; X86-SSE-NEXT: retl 426; 427; X86-AVX1OR2-LABEL: fabs_v8f64: 428; X86-AVX1OR2: # %bb.0: 429; X86-AVX1OR2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN] 430; X86-AVX1OR2-NEXT: vandps %ymm2, %ymm0, %ymm0 431; X86-AVX1OR2-NEXT: vandps %ymm2, %ymm1, %ymm1 432; X86-AVX1OR2-NEXT: retl 433; 434; X86-AVX512VL-LABEL: fabs_v8f64: 435; X86-AVX512VL: # %bb.0: 436; X86-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm0, %zmm0 437; X86-AVX512VL-NEXT: retl 438; 439; X86-AVX512FP16-LABEL: fabs_v8f64: 440; X86-AVX512FP16: # %bb.0: 441; X86-AVX512FP16-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm0, %zmm0 442; X86-AVX512FP16-NEXT: retl 443; 444; X86-AVX512VLDQ-LABEL: fabs_v8f64: 445; X86-AVX512VLDQ: # %bb.0: 446; X86-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm0, %zmm0 447; X86-AVX512VLDQ-NEXT: retl 448; 449; X64-SSE-LABEL: fabs_v8f64: 450; X64-SSE: # %bb.0: 451; X64-SSE-NEXT: movaps {{.*#+}} xmm4 = [NaN,NaN] 452; X64-SSE-NEXT: andps %xmm4, %xmm0 453; X64-SSE-NEXT: andps %xmm4, %xmm1 454; X64-SSE-NEXT: andps %xmm4, %xmm2 455; X64-SSE-NEXT: andps %xmm4, %xmm3 456; X64-SSE-NEXT: retq 457; 458; X64-AVX1OR2-LABEL: fabs_v8f64: 459; X64-AVX1OR2: # %bb.0: 460; X64-AVX1OR2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN] 461; X64-AVX1OR2-NEXT: vandps %ymm2, %ymm0, %ymm0 462; X64-AVX1OR2-NEXT: vandps %ymm2, %ymm1, %ymm1 463; X64-AVX1OR2-NEXT: retq 464; 465; X64-AVX512VL-LABEL: fabs_v8f64: 466; X64-AVX512VL: # %bb.0: 467; X64-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 468; X64-AVX512VL-NEXT: retq 469; 470; X64-AVX512FP16-LABEL: fabs_v8f64: 471; X64-AVX512FP16: # %bb.0: 472; X64-AVX512FP16-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 473; X64-AVX512FP16-NEXT: retq 474; 475; X64-AVX512VLDQ-LABEL: fabs_v8f64: 476; X64-AVX512VLDQ: # %bb.0: 477; X64-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 478; X64-AVX512VLDQ-NEXT: retq 479 %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 480 ret <8 x double> %t 481} 482declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 483 484define <16 x float> @fabs_v16f32(<16 x float> %p) nounwind { 485; X86-SSE-LABEL: fabs_v16f32: 486; X86-SSE: # %bb.0: 487; X86-SSE-NEXT: pushl %ebp 488; X86-SSE-NEXT: movl %esp, %ebp 489; X86-SSE-NEXT: andl $-16, %esp 490; X86-SSE-NEXT: subl $16, %esp 491; X86-SSE-NEXT: movaps {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN] 492; X86-SSE-NEXT: andps %xmm3, %xmm0 493; X86-SSE-NEXT: andps %xmm3, %xmm1 494; X86-SSE-NEXT: andps %xmm3, %xmm2 495; X86-SSE-NEXT: andps 8(%ebp), %xmm3 496; X86-SSE-NEXT: movl %ebp, %esp 497; X86-SSE-NEXT: popl %ebp 498; X86-SSE-NEXT: retl 499; 500; X86-AVX1OR2-LABEL: fabs_v16f32: 501; X86-AVX1OR2: # %bb.0: 502; X86-AVX1OR2-NEXT: vbroadcastss {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 503; X86-AVX1OR2-NEXT: vandps %ymm2, %ymm0, %ymm0 504; X86-AVX1OR2-NEXT: vandps %ymm2, %ymm1, %ymm1 505; X86-AVX1OR2-NEXT: retl 506; 507; X86-AVX512VL-LABEL: fabs_v16f32: 508; X86-AVX512VL: # %bb.0: 509; X86-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0 510; X86-AVX512VL-NEXT: retl 511; 512; X86-AVX512FP16-LABEL: fabs_v16f32: 513; X86-AVX512FP16: # %bb.0: 514; X86-AVX512FP16-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0 515; X86-AVX512FP16-NEXT: retl 516; 517; X86-AVX512VLDQ-LABEL: fabs_v16f32: 518; X86-AVX512VLDQ: # %bb.0: 519; X86-AVX512VLDQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0 520; X86-AVX512VLDQ-NEXT: retl 521; 522; X64-SSE-LABEL: fabs_v16f32: 523; X64-SSE: # %bb.0: 524; X64-SSE-NEXT: movaps {{.*#+}} xmm4 = [NaN,NaN,NaN,NaN] 525; X64-SSE-NEXT: andps %xmm4, %xmm0 526; X64-SSE-NEXT: andps %xmm4, %xmm1 527; X64-SSE-NEXT: andps %xmm4, %xmm2 528; X64-SSE-NEXT: andps %xmm4, %xmm3 529; X64-SSE-NEXT: retq 530; 531; X64-AVX1OR2-LABEL: fabs_v16f32: 532; X64-AVX1OR2: # %bb.0: 533; X64-AVX1OR2-NEXT: vbroadcastss {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 534; X64-AVX1OR2-NEXT: vandps %ymm2, %ymm0, %ymm0 535; X64-AVX1OR2-NEXT: vandps %ymm2, %ymm1, %ymm1 536; X64-AVX1OR2-NEXT: retq 537; 538; X64-AVX512VL-LABEL: fabs_v16f32: 539; X64-AVX512VL: # %bb.0: 540; X64-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 541; X64-AVX512VL-NEXT: retq 542; 543; X64-AVX512FP16-LABEL: fabs_v16f32: 544; X64-AVX512FP16: # %bb.0: 545; X64-AVX512FP16-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 546; X64-AVX512FP16-NEXT: retq 547; 548; X64-AVX512VLDQ-LABEL: fabs_v16f32: 549; X64-AVX512VLDQ: # %bb.0: 550; X64-AVX512VLDQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 551; X64-AVX512VLDQ-NEXT: retq 552 %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 553 ret <16 x float> %t 554} 555declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 556 557define <32 x half> @fabs_v32f16(ptr %p) nounwind { 558; X86-SSE-LABEL: fabs_v32f16: 559; X86-SSE: # %bb.0: 560; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 561; X86-SSE-NEXT: movaps {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 562; X86-SSE-NEXT: movaps (%eax), %xmm0 563; X86-SSE-NEXT: andps %xmm3, %xmm0 564; X86-SSE-NEXT: movaps 16(%eax), %xmm1 565; X86-SSE-NEXT: andps %xmm3, %xmm1 566; X86-SSE-NEXT: movaps 32(%eax), %xmm2 567; X86-SSE-NEXT: andps %xmm3, %xmm2 568; X86-SSE-NEXT: andps 48(%eax), %xmm3 569; X86-SSE-NEXT: retl 570; 571; X86-AVX1-LABEL: fabs_v32f16: 572; X86-AVX1: # %bb.0: 573; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax 574; X86-AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 575; X86-AVX1-NEXT: vandps (%eax), %ymm1, %ymm0 576; X86-AVX1-NEXT: vandps 32(%eax), %ymm1, %ymm1 577; X86-AVX1-NEXT: retl 578; 579; X86-AVX2-LABEL: fabs_v32f16: 580; X86-AVX2: # %bb.0: 581; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 582; X86-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 583; X86-AVX2-NEXT: vpand (%eax), %ymm1, %ymm0 584; X86-AVX2-NEXT: vpand 32(%eax), %ymm1, %ymm1 585; X86-AVX2-NEXT: retl 586; 587; X86-AVX512VL-LABEL: fabs_v32f16: 588; X86-AVX512VL: # %bb.0: 589; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax 590; X86-AVX512VL-NEXT: vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 591; X86-AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 592; X86-AVX512VL-NEXT: vpandq (%eax), %zmm0, %zmm0 593; X86-AVX512VL-NEXT: retl 594; 595; X86-AVX512FP16-LABEL: fabs_v32f16: 596; X86-AVX512FP16: # %bb.0: 597; X86-AVX512FP16-NEXT: movl {{[0-9]+}}(%esp), %eax 598; X86-AVX512FP16-NEXT: vpbroadcastw {{.*#+}} zmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 599; X86-AVX512FP16-NEXT: vpandq (%eax), %zmm0, %zmm0 600; X86-AVX512FP16-NEXT: retl 601; 602; X86-AVX512VLDQ-LABEL: fabs_v32f16: 603; X86-AVX512VLDQ: # %bb.0: 604; X86-AVX512VLDQ-NEXT: movl {{[0-9]+}}(%esp), %eax 605; X86-AVX512VLDQ-NEXT: vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 606; X86-AVX512VLDQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 607; X86-AVX512VLDQ-NEXT: vpandq (%eax), %zmm0, %zmm0 608; X86-AVX512VLDQ-NEXT: retl 609; 610; X64-SSE-LABEL: fabs_v32f16: 611; X64-SSE: # %bb.0: 612; X64-SSE-NEXT: movaps {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 613; X64-SSE-NEXT: movaps (%rdi), %xmm0 614; X64-SSE-NEXT: andps %xmm3, %xmm0 615; X64-SSE-NEXT: movaps 16(%rdi), %xmm1 616; X64-SSE-NEXT: andps %xmm3, %xmm1 617; X64-SSE-NEXT: movaps 32(%rdi), %xmm2 618; X64-SSE-NEXT: andps %xmm3, %xmm2 619; X64-SSE-NEXT: andps 48(%rdi), %xmm3 620; X64-SSE-NEXT: retq 621; 622; X64-AVX1-LABEL: fabs_v32f16: 623; X64-AVX1: # %bb.0: 624; X64-AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 625; X64-AVX1-NEXT: vandps (%rdi), %ymm1, %ymm0 626; X64-AVX1-NEXT: vandps 32(%rdi), %ymm1, %ymm1 627; X64-AVX1-NEXT: retq 628; 629; X64-AVX2-LABEL: fabs_v32f16: 630; X64-AVX2: # %bb.0: 631; X64-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 632; X64-AVX2-NEXT: vpand (%rdi), %ymm1, %ymm0 633; X64-AVX2-NEXT: vpand 32(%rdi), %ymm1, %ymm1 634; X64-AVX2-NEXT: retq 635; 636; X64-AVX512VL-LABEL: fabs_v32f16: 637; X64-AVX512VL: # %bb.0: 638; X64-AVX512VL-NEXT: vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 639; X64-AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 640; X64-AVX512VL-NEXT: vpandq (%rdi), %zmm0, %zmm0 641; X64-AVX512VL-NEXT: retq 642; 643; X64-AVX512FP16-LABEL: fabs_v32f16: 644; X64-AVX512FP16: # %bb.0: 645; X64-AVX512FP16-NEXT: vpbroadcastw {{.*#+}} zmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 646; X64-AVX512FP16-NEXT: vpandq (%rdi), %zmm0, %zmm0 647; X64-AVX512FP16-NEXT: retq 648; 649; X64-AVX512VLDQ-LABEL: fabs_v32f16: 650; X64-AVX512VLDQ: # %bb.0: 651; X64-AVX512VLDQ-NEXT: vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 652; X64-AVX512VLDQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 653; X64-AVX512VLDQ-NEXT: vpandq (%rdi), %zmm0, %zmm0 654; X64-AVX512VLDQ-NEXT: retq 655 %v = load <32 x half>, ptr %p, align 64 656 %nnv = call <32 x half> @llvm.fabs.v32f16(<32 x half> %v) 657 ret <32 x half> %nnv 658} 659declare <32 x half> @llvm.fabs.v32f16(<32 x half> %p) 660 661; PR20354: when generating code for a vector fabs op, 662; make sure that we're only turning off the sign bit of each float value. 663; No constant pool loads or vector ops are needed for the fabs of a 664; bitcasted integer constant; we should just return an integer constant 665; that has the sign bits turned off. 666; 667; So instead of something like this: 668; movabsq (constant pool load of mask for sign bits) 669; vmovq (move from integer register to vector/fp register) 670; vandps (mask off sign bits) 671; vmovq (move vector/fp register back to integer return register) 672; 673; We should generate: 674; mov (put constant value in return register) 675 676define i64 @fabs_v2f32_1() nounwind { 677; X86-LABEL: fabs_v2f32_1: 678; X86: # %bb.0: 679; X86-NEXT: xorl %eax, %eax 680; X86-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF 681; X86-NEXT: retl 682; 683; X64-LABEL: fabs_v2f32_1: 684; X64: # %bb.0: 685; X64-NEXT: movabsq $9223372032559808512, %rax # imm = 0x7FFFFFFF00000000 686; X64-NEXT: retq 687 %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000 688 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast) 689 %ret = bitcast <2 x float> %fabs to i64 690 ret i64 %ret 691} 692 693define i64 @fabs_v2f32_2() nounwind { 694; X86-LABEL: fabs_v2f32_2: 695; X86: # %bb.0: 696; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 697; X86-NEXT: xorl %edx, %edx 698; X86-NEXT: retl 699; 700; X64-LABEL: fabs_v2f32_2: 701; X64: # %bb.0: 702; X64-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 703; X64-NEXT: retq 704 %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF 705 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast) 706 %ret = bitcast <2 x float> %fabs to i64 707 ret i64 %ret 708} 709 710declare <2 x float> @llvm.fabs.v2f32(<2 x float> %p) 711 712; PR70947 - remove duplicate xmm/ymm constant loads 713define void @PR70947(ptr %src, ptr %dst) nounwind { 714; X86-SSE-LABEL: PR70947: 715; X86-SSE: # %bb.0: 716; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 717; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 718; X86-SSE-NEXT: movups (%ecx), %xmm0 719; X86-SSE-NEXT: movups 32(%ecx), %xmm1 720; X86-SSE-NEXT: movaps {{.*#+}} xmm2 = [NaN,NaN] 721; X86-SSE-NEXT: andps %xmm2, %xmm0 722; X86-SSE-NEXT: andps %xmm2, %xmm1 723; X86-SSE-NEXT: movups %xmm0, (%eax) 724; X86-SSE-NEXT: movups %xmm1, 16(%eax) 725; X86-SSE-NEXT: retl 726; 727; X86-AVX-LABEL: PR70947: 728; X86-AVX: # %bb.0: 729; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 730; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 731; X86-AVX-NEXT: vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN] 732; X86-AVX-NEXT: vandps (%ecx), %ymm0, %ymm1 733; X86-AVX-NEXT: vandps 32(%ecx), %xmm0, %xmm0 734; X86-AVX-NEXT: vmovups %ymm1, (%eax) 735; X86-AVX-NEXT: vmovups %xmm0, 16(%eax) 736; X86-AVX-NEXT: vzeroupper 737; X86-AVX-NEXT: retl 738; 739; X64-SSE-LABEL: PR70947: 740; X64-SSE: # %bb.0: 741; X64-SSE-NEXT: movups (%rdi), %xmm0 742; X64-SSE-NEXT: movups 32(%rdi), %xmm1 743; X64-SSE-NEXT: movaps {{.*#+}} xmm2 = [NaN,NaN] 744; X64-SSE-NEXT: andps %xmm2, %xmm0 745; X64-SSE-NEXT: andps %xmm2, %xmm1 746; X64-SSE-NEXT: movups %xmm0, (%rsi) 747; X64-SSE-NEXT: movups %xmm1, 16(%rsi) 748; X64-SSE-NEXT: retq 749; 750; X64-AVX-LABEL: PR70947: 751; X64-AVX: # %bb.0: 752; X64-AVX-NEXT: vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN] 753; X64-AVX-NEXT: vandps (%rdi), %ymm0, %ymm1 754; X64-AVX-NEXT: vandps 32(%rdi), %xmm0, %xmm0 755; X64-AVX-NEXT: vmovups %ymm1, (%rsi) 756; X64-AVX-NEXT: vmovups %xmm0, 16(%rsi) 757; X64-AVX-NEXT: vzeroupper 758; X64-AVX-NEXT: retq 759 %src4 = getelementptr inbounds double, ptr %src, i64 4 760 %dst4 = getelementptr inbounds i32, ptr %dst, i64 4 761 %ld0 = load <4 x double>, ptr %src, align 8 762 %ld4 = load <2 x double>, ptr %src4, align 8 763 %fabs0 = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %ld0) 764 %fabs4 = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %ld4) 765 store <4 x double> %fabs0, ptr %dst, align 4 766 store <2 x double> %fabs4, ptr %dst4, align 4 767 ret void 768} 769