1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update 2; RUN: llc -verify-machineinstrs -mcpu=sapphirerapids -mattr=+false-deps-range -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefixes=ENABLE 3; RUN: llc -verify-machineinstrs -mcpu=sapphirerapids -mattr=-false-deps-range -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefixes=DISABLE 4 5define <4 x float> @rangeps_128(<4 x float> %a0, <4 x float> %a1) { 6; ENABLE-LABEL: rangeps_128: 7; ENABLE: # %bb.0: 8; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 9; ENABLE-NEXT: #APP 10; ENABLE-NEXT: nop 11; ENABLE-NEXT: #NO_APP 12; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload 13; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 14; ENABLE-NEXT: vrangeps $88, %xmm2, %xmm0, %xmm1 15; ENABLE-NEXT: vaddps %xmm2, %xmm0, %xmm0 16; ENABLE-NEXT: vaddps %xmm0, %xmm1, %xmm0 17; ENABLE-NEXT: retq 18; 19; DISABLE-LABEL: rangeps_128: 20; DISABLE: # %bb.0: 21; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 22; DISABLE-NEXT: #APP 23; DISABLE-NEXT: nop 24; DISABLE-NEXT: #NO_APP 25; DISABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload 26; DISABLE-NEXT: vrangeps $88, %xmm2, %xmm0, %xmm1 27; DISABLE-NEXT: vaddps %xmm2, %xmm0, %xmm0 28; DISABLE-NEXT: vaddps %xmm0, %xmm1, %xmm0 29; DISABLE-NEXT: retq 30 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 31 %2 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %a0, <4 x float> %a1, i32 88, <4 x float> undef, i8 -1) 32 %3 = fadd <4 x float> %a0, %a1 33 %res = fadd <4 x float> %2, %3 34 ret <4 x float> %res 35} 36 37define <4 x float> @rangeps_mem_128(<4 x float> %a0, ptr %p1) { 38; ENABLE-LABEL: rangeps_mem_128: 39; ENABLE: # %bb.0: 40; ENABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 41; ENABLE-NEXT: #APP 42; ENABLE-NEXT: nop 43; ENABLE-NEXT: #NO_APP 44; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 45; ENABLE-NEXT: vxorps %xmm0, %xmm0, %xmm0 46; ENABLE-NEXT: vrangeps $88, (%rdi), %xmm1, %xmm0 47; ENABLE-NEXT: vaddps %xmm1, %xmm0, %xmm0 48; ENABLE-NEXT: retq 49; 50; DISABLE-LABEL: rangeps_mem_128: 51; DISABLE: # %bb.0: 52; DISABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 53; DISABLE-NEXT: #APP 54; DISABLE-NEXT: nop 55; DISABLE-NEXT: #NO_APP 56; DISABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 57; DISABLE-NEXT: vrangeps $88, (%rdi), %xmm1, %xmm0 58; DISABLE-NEXT: vaddps %xmm1, %xmm0, %xmm0 59; DISABLE-NEXT: retq 60 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 61 %a1 = load <4 x float>, ptr %p1, align 64 62 %2 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %a0, <4 x float> %a1, i32 88, <4 x float> undef, i8 -1) 63 %res = fadd <4 x float> %2, %a0 64 ret <4 x float> %res 65} 66 67define <4 x float> @rangeps_broadcast_128(<4 x float> %a0, ptr %p1) { 68; ENABLE-LABEL: rangeps_broadcast_128: 69; ENABLE: # %bb.0: 70; ENABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 71; ENABLE-NEXT: #APP 72; ENABLE-NEXT: nop 73; ENABLE-NEXT: #NO_APP 74; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 75; ENABLE-NEXT: vxorps %xmm0, %xmm0, %xmm0 76; ENABLE-NEXT: vrangeps $88, (%rdi){1to4}, %xmm1, %xmm0 77; ENABLE-NEXT: vaddps %xmm1, %xmm0, %xmm0 78; ENABLE-NEXT: retq 79; 80; DISABLE-LABEL: rangeps_broadcast_128: 81; DISABLE: # %bb.0: 82; DISABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 83; DISABLE-NEXT: #APP 84; DISABLE-NEXT: nop 85; DISABLE-NEXT: #NO_APP 86; DISABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 87; DISABLE-NEXT: vrangeps $88, (%rdi){1to4}, %xmm1, %xmm0 88; DISABLE-NEXT: vaddps %xmm1, %xmm0, %xmm0 89; DISABLE-NEXT: retq 90 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 91 %v1 = load float, ptr %p1, align 4 92 %t0 = insertelement <4 x float> undef, float %v1, i64 0 93 %a1 = shufflevector <4 x float> %t0, <4 x float> undef, <4 x i32> zeroinitializer 94 %2 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %a0, <4 x float> %a1, i32 88, <4 x float> undef, i8 -1) 95 %res = fadd <4 x float> %2, %a0 96 ret <4 x float> %res 97} 98 99define <4 x float> @rangeps_maskz_128(<4 x float> %a0, <4 x float> %a1, ptr %pmask) { 100; ENABLE-LABEL: rangeps_maskz_128: 101; ENABLE: # %bb.0: 102; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 103; ENABLE-NEXT: #APP 104; ENABLE-NEXT: nop 105; ENABLE-NEXT: #NO_APP 106; ENABLE-NEXT: kmovb (%rdi), %k1 107; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload 108; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 109; ENABLE-NEXT: vrangeps $88, %xmm2, %xmm0, %xmm1 {%k1} {z} 110; ENABLE-NEXT: vaddps %xmm2, %xmm0, %xmm0 111; ENABLE-NEXT: vaddps %xmm0, %xmm1, %xmm0 112; ENABLE-NEXT: retq 113; 114; DISABLE-LABEL: rangeps_maskz_128: 115; DISABLE: # %bb.0: 116; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 117; DISABLE-NEXT: #APP 118; DISABLE-NEXT: nop 119; DISABLE-NEXT: #NO_APP 120; DISABLE-NEXT: kmovb (%rdi), %k1 121; DISABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload 122; DISABLE-NEXT: vrangeps $88, %xmm2, %xmm0, %xmm1 {%k1} {z} 123; DISABLE-NEXT: vaddps %xmm2, %xmm0, %xmm0 124; DISABLE-NEXT: vaddps %xmm0, %xmm1, %xmm0 125; DISABLE-NEXT: retq 126 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 127 %mask = load i8, ptr %pmask 128 %2 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %a0, <4 x float> %a1, i32 88, <4 x float> undef, i8 %mask) 129 %3 = fadd <4 x float> %a0, %a1 130 %res = fadd <4 x float> %2, %3 131 ret <4 x float> %res 132} 133 134declare <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8) nounwind readnone 135 136define <8 x float> @rangeps_256(<8 x float> %a0, <8 x float> %a1) { 137; ENABLE-LABEL: rangeps_256: 138; ENABLE: # %bb.0: 139; ENABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 140; ENABLE-NEXT: #APP 141; ENABLE-NEXT: nop 142; ENABLE-NEXT: #NO_APP 143; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload 144; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 145; ENABLE-NEXT: vrangeps $88, %ymm2, %ymm0, %ymm1 146; ENABLE-NEXT: vaddps %ymm2, %ymm0, %ymm0 147; ENABLE-NEXT: vaddps %ymm0, %ymm1, %ymm0 148; ENABLE-NEXT: retq 149; 150; DISABLE-LABEL: rangeps_256: 151; DISABLE: # %bb.0: 152; DISABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 153; DISABLE-NEXT: #APP 154; DISABLE-NEXT: nop 155; DISABLE-NEXT: #NO_APP 156; DISABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload 157; DISABLE-NEXT: vrangeps $88, %ymm2, %ymm0, %ymm1 158; DISABLE-NEXT: vaddps %ymm2, %ymm0, %ymm0 159; DISABLE-NEXT: vaddps %ymm0, %ymm1, %ymm0 160; DISABLE-NEXT: retq 161 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 162 %2 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %a0, <8 x float> %a1, i32 88, <8 x float> undef, i8 -1) 163 %3 = fadd <8 x float> %a0, %a1 164 %res = fadd <8 x float> %2, %3 165 ret <8 x float> %res 166} 167 168define <8 x float> @rangeps_mem_256(<8 x float> %a0, ptr %p1) { 169; ENABLE-LABEL: rangeps_mem_256: 170; ENABLE: # %bb.0: 171; ENABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 172; ENABLE-NEXT: #APP 173; ENABLE-NEXT: nop 174; ENABLE-NEXT: #NO_APP 175; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload 176; ENABLE-NEXT: vxorps %xmm0, %xmm0, %xmm0 177; ENABLE-NEXT: vrangeps $88, (%rdi), %ymm1, %ymm0 178; ENABLE-NEXT: vaddps %ymm1, %ymm0, %ymm0 179; ENABLE-NEXT: retq 180; 181; DISABLE-LABEL: rangeps_mem_256: 182; DISABLE: # %bb.0: 183; DISABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 184; DISABLE-NEXT: #APP 185; DISABLE-NEXT: nop 186; DISABLE-NEXT: #NO_APP 187; DISABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload 188; DISABLE-NEXT: vrangeps $88, (%rdi), %ymm1, %ymm0 189; DISABLE-NEXT: vaddps %ymm1, %ymm0, %ymm0 190; DISABLE-NEXT: retq 191 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 192 %a1 = load <8 x float>, ptr %p1, align 64 193 %2 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %a0, <8 x float> %a1, i32 88, <8 x float> undef, i8 -1) 194 %res = fadd <8 x float> %2, %a0 195 ret <8 x float> %res 196} 197 198define <8 x float> @rangeps_broadcast_256(<8 x float> %a0, ptr %p1) { 199; ENABLE-LABEL: rangeps_broadcast_256: 200; ENABLE: # %bb.0: 201; ENABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 202; ENABLE-NEXT: #APP 203; ENABLE-NEXT: nop 204; ENABLE-NEXT: #NO_APP 205; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload 206; ENABLE-NEXT: vxorps %xmm0, %xmm0, %xmm0 207; ENABLE-NEXT: vrangeps $88, (%rdi){1to8}, %ymm1, %ymm0 208; ENABLE-NEXT: vaddps %ymm1, %ymm0, %ymm0 209; ENABLE-NEXT: retq 210; 211; DISABLE-LABEL: rangeps_broadcast_256: 212; DISABLE: # %bb.0: 213; DISABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 214; DISABLE-NEXT: #APP 215; DISABLE-NEXT: nop 216; DISABLE-NEXT: #NO_APP 217; DISABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload 218; DISABLE-NEXT: vrangeps $88, (%rdi){1to8}, %ymm1, %ymm0 219; DISABLE-NEXT: vaddps %ymm1, %ymm0, %ymm0 220; DISABLE-NEXT: retq 221 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 222 %v1 = load float, ptr %p1, align 4 223 %t0 = insertelement <8 x float> undef, float %v1, i64 0 224 %a1 = shufflevector <8 x float> %t0, <8 x float> undef, <8 x i32> zeroinitializer 225 %2 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %a0, <8 x float> %a1, i32 88, <8 x float> undef, i8 -1) 226 %res = fadd <8 x float> %2, %a0 227 ret <8 x float> %res 228} 229 230define <8 x float> @rangeps_maskz_256(<8 x float> %a0, <8 x float> %a1, ptr %pmask) { 231; ENABLE-LABEL: rangeps_maskz_256: 232; ENABLE: # %bb.0: 233; ENABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 234; ENABLE-NEXT: #APP 235; ENABLE-NEXT: nop 236; ENABLE-NEXT: #NO_APP 237; ENABLE-NEXT: kmovb (%rdi), %k1 238; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload 239; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 240; ENABLE-NEXT: vrangeps $44, %ymm2, %ymm0, %ymm1 {%k1} {z} 241; ENABLE-NEXT: vaddps %ymm2, %ymm0, %ymm0 242; ENABLE-NEXT: vaddps %ymm0, %ymm1, %ymm0 243; ENABLE-NEXT: retq 244; 245; DISABLE-LABEL: rangeps_maskz_256: 246; DISABLE: # %bb.0: 247; DISABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 248; DISABLE-NEXT: #APP 249; DISABLE-NEXT: nop 250; DISABLE-NEXT: #NO_APP 251; DISABLE-NEXT: kmovb (%rdi), %k1 252; DISABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload 253; DISABLE-NEXT: vrangeps $44, %ymm2, %ymm0, %ymm1 {%k1} {z} 254; DISABLE-NEXT: vaddps %ymm2, %ymm0, %ymm0 255; DISABLE-NEXT: vaddps %ymm0, %ymm1, %ymm0 256; DISABLE-NEXT: retq 257 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 258 %mask = load i8, ptr %pmask 259 %2 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %a0, <8 x float> %a1, i32 44, <8 x float> undef, i8 %mask) 260 %3 = fadd <8 x float> %a0, %a1 261 %res = fadd <8 x float> %2, %3 262 ret <8 x float> %res 263} 264 265declare <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8) nounwind readnone 266 267define <16 x float> @rangeps_512(<16 x float> %a0, <16 x float> %a1) { 268; ENABLE-LABEL: rangeps_512: 269; ENABLE: # %bb.0: 270; ENABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 271; ENABLE-NEXT: #APP 272; ENABLE-NEXT: nop 273; ENABLE-NEXT: #NO_APP 274; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload 275; ENABLE-NEXT: vpxor %xmm1, %xmm1, %xmm1 276; ENABLE-NEXT: vrangeps $88, %zmm2, %zmm0, %zmm1 277; ENABLE-NEXT: vaddps %zmm2, %zmm0, %zmm0 278; ENABLE-NEXT: vaddps %zmm0, %zmm1, %zmm0 279; ENABLE-NEXT: retq 280; 281; DISABLE-LABEL: rangeps_512: 282; DISABLE: # %bb.0: 283; DISABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 284; DISABLE-NEXT: #APP 285; DISABLE-NEXT: nop 286; DISABLE-NEXT: #NO_APP 287; DISABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload 288; DISABLE-NEXT: vrangeps $88, %zmm2, %zmm0, %zmm1 289; DISABLE-NEXT: vaddps %zmm2, %zmm0, %zmm0 290; DISABLE-NEXT: vaddps %zmm0, %zmm1, %zmm0 291; DISABLE-NEXT: retq 292 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 293 %2 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %a0, <16 x float> %a1, i32 88, <16 x float> undef, i16 -1, i32 4) 294 %3 = fadd <16 x float> %a0, %a1 295 %res = fadd <16 x float> %2, %3 296 ret <16 x float> %res 297} 298 299define <16 x float> @rangeps_mem_512(<16 x float> %a0, ptr %p1) { 300; ENABLE-LABEL: rangeps_mem_512: 301; ENABLE: # %bb.0: 302; ENABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 303; ENABLE-NEXT: #APP 304; ENABLE-NEXT: nop 305; ENABLE-NEXT: #NO_APP 306; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload 307; ENABLE-NEXT: vpxor %xmm0, %xmm0, %xmm0 308; ENABLE-NEXT: vrangeps $88, (%rdi), %zmm1, %zmm0 309; ENABLE-NEXT: vaddps %zmm1, %zmm0, %zmm0 310; ENABLE-NEXT: retq 311; 312; DISABLE-LABEL: rangeps_mem_512: 313; DISABLE: # %bb.0: 314; DISABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 315; DISABLE-NEXT: #APP 316; DISABLE-NEXT: nop 317; DISABLE-NEXT: #NO_APP 318; DISABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload 319; DISABLE-NEXT: vrangeps $88, (%rdi), %zmm1, %zmm0 320; DISABLE-NEXT: vaddps %zmm1, %zmm0, %zmm0 321; DISABLE-NEXT: retq 322 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 323 %a1 = load <16 x float>, ptr %p1, align 64 324 %2 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %a0, <16 x float> %a1, i32 88, <16 x float> undef, i16 -1, i32 4) 325 %res = fadd <16 x float> %2, %a0 326 ret <16 x float> %res 327} 328 329define <16 x float> @rangeps_broadcast_512(<16 x float> %a0, ptr %p1) { 330; ENABLE-LABEL: rangeps_broadcast_512: 331; ENABLE: # %bb.0: 332; ENABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 333; ENABLE-NEXT: #APP 334; ENABLE-NEXT: nop 335; ENABLE-NEXT: #NO_APP 336; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload 337; ENABLE-NEXT: vpxor %xmm0, %xmm0, %xmm0 338; ENABLE-NEXT: vrangeps $88, (%rdi){1to16}, %zmm1, %zmm0 339; ENABLE-NEXT: vaddps %zmm1, %zmm0, %zmm0 340; ENABLE-NEXT: retq 341; 342; DISABLE-LABEL: rangeps_broadcast_512: 343; DISABLE: # %bb.0: 344; DISABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 345; DISABLE-NEXT: #APP 346; DISABLE-NEXT: nop 347; DISABLE-NEXT: #NO_APP 348; DISABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload 349; DISABLE-NEXT: vrangeps $88, (%rdi){1to16}, %zmm1, %zmm0 350; DISABLE-NEXT: vaddps %zmm1, %zmm0, %zmm0 351; DISABLE-NEXT: retq 352 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 353 %v1 = load float, ptr %p1, align 4 354 %t0 = insertelement <16 x float> undef, float %v1, i64 0 355 %a1 = shufflevector <16 x float> %t0, <16 x float> undef, <16 x i32> zeroinitializer 356 %2 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %a0, <16 x float> %a1, i32 88, <16 x float> undef, i16 -1, i32 4) 357 %res = fadd <16 x float> %2, %a0 358 ret <16 x float> %res 359} 360 361define <16 x float> @rangeps_maskz_512(<16 x float> %a0, <16 x float> %a1, ptr %pmask) { 362; ENABLE-LABEL: rangeps_maskz_512: 363; ENABLE: # %bb.0: 364; ENABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 365; ENABLE-NEXT: #APP 366; ENABLE-NEXT: nop 367; ENABLE-NEXT: #NO_APP 368; ENABLE-NEXT: kmovw (%rdi), %k1 369; ENABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload 370; ENABLE-NEXT: vpxor %xmm1, %xmm1, %xmm1 371; ENABLE-NEXT: vrangeps $88, %zmm2, %zmm0, %zmm1 {%k1} {z} 372; ENABLE-NEXT: vaddps %zmm2, %zmm0, %zmm0 373; ENABLE-NEXT: vaddps %zmm0, %zmm1, %zmm0 374; ENABLE-NEXT: retq 375; 376; DISABLE-LABEL: rangeps_maskz_512: 377; DISABLE: # %bb.0: 378; DISABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 379; DISABLE-NEXT: #APP 380; DISABLE-NEXT: nop 381; DISABLE-NEXT: #NO_APP 382; DISABLE-NEXT: kmovw (%rdi), %k1 383; DISABLE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload 384; DISABLE-NEXT: vrangeps $88, %zmm2, %zmm0, %zmm1 {%k1} {z} 385; DISABLE-NEXT: vaddps %zmm2, %zmm0, %zmm0 386; DISABLE-NEXT: vaddps %zmm0, %zmm1, %zmm0 387; DISABLE-NEXT: retq 388 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 389 %mask = load i16, ptr %pmask 390 %2 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %a0, <16 x float> %a1, i32 88, <16 x float> undef, i16 %mask, i32 4) 391 %3 = fadd <16 x float> %a0, %a1 392 %res = fadd <16 x float> %2, %3 393 ret <16 x float> %res 394} 395 396declare <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16, i32) nounwind readnone 397 398 399define <2 x double> @rangepd_128(<2 x double> %a0, <2 x double> %a1) { 400; ENABLE-LABEL: rangepd_128: 401; ENABLE: # %bb.0: 402; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 403; ENABLE-NEXT: #APP 404; ENABLE-NEXT: nop 405; ENABLE-NEXT: #NO_APP 406; ENABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload 407; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 408; ENABLE-NEXT: vrangepd $88, %xmm2, %xmm0, %xmm1 409; ENABLE-NEXT: vaddpd %xmm2, %xmm0, %xmm0 410; ENABLE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 411; ENABLE-NEXT: retq 412; 413; DISABLE-LABEL: rangepd_128: 414; DISABLE: # %bb.0: 415; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 416; DISABLE-NEXT: #APP 417; DISABLE-NEXT: nop 418; DISABLE-NEXT: #NO_APP 419; DISABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload 420; DISABLE-NEXT: vrangepd $88, %xmm2, %xmm0, %xmm1 421; DISABLE-NEXT: vaddpd %xmm2, %xmm0, %xmm0 422; DISABLE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 423; DISABLE-NEXT: retq 424 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 425 %2 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %a0, <2 x double> %a1, i32 88, <2 x double> undef, i8 -1) 426 %3 = fadd <2 x double> %a0, %a1 427 %res = fadd <2 x double> %2, %3 428 ret <2 x double> %res 429} 430 431define <2 x double> @rangepd_mem_128(<2 x double> %a0, ptr %p1) { 432; ENABLE-LABEL: rangepd_mem_128: 433; ENABLE: # %bb.0: 434; ENABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 435; ENABLE-NEXT: #APP 436; ENABLE-NEXT: nop 437; ENABLE-NEXT: #NO_APP 438; ENABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 439; ENABLE-NEXT: vxorps %xmm0, %xmm0, %xmm0 440; ENABLE-NEXT: vrangepd $88, (%rdi), %xmm1, %xmm0 441; ENABLE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 442; ENABLE-NEXT: retq 443; 444; DISABLE-LABEL: rangepd_mem_128: 445; DISABLE: # %bb.0: 446; DISABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 447; DISABLE-NEXT: #APP 448; DISABLE-NEXT: nop 449; DISABLE-NEXT: #NO_APP 450; DISABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 451; DISABLE-NEXT: vrangepd $88, (%rdi), %xmm1, %xmm0 452; DISABLE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 453; DISABLE-NEXT: retq 454 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 455 %a1 = load <2 x double>, ptr %p1, align 64 456 %2 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %a0, <2 x double> %a1, i32 88, <2 x double> undef, i8 -1) 457 %res = fadd <2 x double> %2, %a0 458 ret <2 x double> %res 459} 460 461define <2 x double> @rangepd_broadcast_128(<2 x double> %a0, ptr %p1) { 462; ENABLE-LABEL: rangepd_broadcast_128: 463; ENABLE: # %bb.0: 464; ENABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 465; ENABLE-NEXT: #APP 466; ENABLE-NEXT: nop 467; ENABLE-NEXT: #NO_APP 468; ENABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 469; ENABLE-NEXT: vxorps %xmm0, %xmm0, %xmm0 470; ENABLE-NEXT: vrangepd $88, (%rdi){1to2}, %xmm1, %xmm0 471; ENABLE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 472; ENABLE-NEXT: retq 473; 474; DISABLE-LABEL: rangepd_broadcast_128: 475; DISABLE: # %bb.0: 476; DISABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 477; DISABLE-NEXT: #APP 478; DISABLE-NEXT: nop 479; DISABLE-NEXT: #NO_APP 480; DISABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 481; DISABLE-NEXT: vrangepd $88, (%rdi){1to2}, %xmm1, %xmm0 482; DISABLE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 483; DISABLE-NEXT: retq 484 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 485 %v1 = load double, ptr %p1, align 4 486 %t0 = insertelement <2 x double> undef, double %v1, i64 0 487 %a1 = shufflevector <2 x double> %t0, <2 x double> undef, <2 x i32> zeroinitializer 488 %2 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %a0, <2 x double> %a1, i32 88, <2 x double> undef, i8 -1) 489 %res = fadd <2 x double> %2, %a0 490 ret <2 x double> %res 491} 492 493define <2 x double> @rangepd_maskz_128(<2 x double> %a0, <2 x double> %a1, ptr %pmask) { 494; ENABLE-LABEL: rangepd_maskz_128: 495; ENABLE: # %bb.0: 496; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 497; ENABLE-NEXT: #APP 498; ENABLE-NEXT: nop 499; ENABLE-NEXT: #NO_APP 500; ENABLE-NEXT: kmovb (%rdi), %k1 501; ENABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload 502; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 503; ENABLE-NEXT: vrangepd $88, %xmm2, %xmm0, %xmm1 {%k1} {z} 504; ENABLE-NEXT: vaddpd %xmm2, %xmm0, %xmm0 505; ENABLE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 506; ENABLE-NEXT: retq 507; 508; DISABLE-LABEL: rangepd_maskz_128: 509; DISABLE: # %bb.0: 510; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 511; DISABLE-NEXT: #APP 512; DISABLE-NEXT: nop 513; DISABLE-NEXT: #NO_APP 514; DISABLE-NEXT: kmovb (%rdi), %k1 515; DISABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload 516; DISABLE-NEXT: vrangepd $88, %xmm2, %xmm0, %xmm1 {%k1} {z} 517; DISABLE-NEXT: vaddpd %xmm2, %xmm0, %xmm0 518; DISABLE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 519; DISABLE-NEXT: retq 520 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 521 %mask = load i8, ptr %pmask 522 %2 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %a0, <2 x double> %a1, i32 88, <2 x double> undef, i8 %mask) 523 %3 = fadd <2 x double> %a0, %a1 524 %res = fadd <2 x double> %2, %3 525 ret <2 x double> %res 526} 527 528declare <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8) nounwind readnone 529 530define <4 x double> @rangepd_256(<4 x double> %a0, <4 x double> %a1) { 531; ENABLE-LABEL: rangepd_256: 532; ENABLE: # %bb.0: 533; ENABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 534; ENABLE-NEXT: #APP 535; ENABLE-NEXT: nop 536; ENABLE-NEXT: #NO_APP 537; ENABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload 538; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 539; ENABLE-NEXT: vrangepd $88, %ymm2, %ymm0, %ymm1 540; ENABLE-NEXT: vaddpd %ymm2, %ymm0, %ymm0 541; ENABLE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 542; ENABLE-NEXT: retq 543; 544; DISABLE-LABEL: rangepd_256: 545; DISABLE: # %bb.0: 546; DISABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 547; DISABLE-NEXT: #APP 548; DISABLE-NEXT: nop 549; DISABLE-NEXT: #NO_APP 550; DISABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload 551; DISABLE-NEXT: vrangepd $88, %ymm2, %ymm0, %ymm1 552; DISABLE-NEXT: vaddpd %ymm2, %ymm0, %ymm0 553; DISABLE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 554; DISABLE-NEXT: retq 555 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 556 %2 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %a0, <4 x double> %a1, i32 88, <4 x double> undef, i8 -1) 557 %3 = fadd <4 x double> %a0, %a1 558 %res = fadd <4 x double> %2, %3 559 ret <4 x double> %res 560} 561 562define <4 x double> @rangepd_mem_256(<4 x double> %a0, ptr %p1) { 563; ENABLE-LABEL: rangepd_mem_256: 564; ENABLE: # %bb.0: 565; ENABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 566; ENABLE-NEXT: #APP 567; ENABLE-NEXT: nop 568; ENABLE-NEXT: #NO_APP 569; ENABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload 570; ENABLE-NEXT: vxorps %xmm0, %xmm0, %xmm0 571; ENABLE-NEXT: vrangepd $88, (%rdi), %ymm1, %ymm0 572; ENABLE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 573; ENABLE-NEXT: retq 574; 575; DISABLE-LABEL: rangepd_mem_256: 576; DISABLE: # %bb.0: 577; DISABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 578; DISABLE-NEXT: #APP 579; DISABLE-NEXT: nop 580; DISABLE-NEXT: #NO_APP 581; DISABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload 582; DISABLE-NEXT: vrangepd $88, (%rdi), %ymm1, %ymm0 583; DISABLE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 584; DISABLE-NEXT: retq 585 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 586 %a1 = load <4 x double>, ptr %p1, align 64 587 %2 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %a0, <4 x double> %a1, i32 88, <4 x double> undef, i8 -1) 588 %res = fadd <4 x double> %2, %a0 589 ret <4 x double> %res 590} 591 592define <4 x double> @rangepd_broadcast_256(<4 x double> %a0, ptr %p1) { 593; ENABLE-LABEL: rangepd_broadcast_256: 594; ENABLE: # %bb.0: 595; ENABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 596; ENABLE-NEXT: #APP 597; ENABLE-NEXT: nop 598; ENABLE-NEXT: #NO_APP 599; ENABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload 600; ENABLE-NEXT: vxorps %xmm0, %xmm0, %xmm0 601; ENABLE-NEXT: vrangepd $88, (%rdi){1to4}, %ymm1, %ymm0 602; ENABLE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 603; ENABLE-NEXT: retq 604; 605; DISABLE-LABEL: rangepd_broadcast_256: 606; DISABLE: # %bb.0: 607; DISABLE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 608; DISABLE-NEXT: #APP 609; DISABLE-NEXT: nop 610; DISABLE-NEXT: #NO_APP 611; DISABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload 612; DISABLE-NEXT: vrangepd $88, (%rdi){1to4}, %ymm1, %ymm0 613; DISABLE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 614; DISABLE-NEXT: retq 615 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 616 %v1 = load double, ptr %p1, align 4 617 %t0 = insertelement <4 x double> undef, double %v1, i64 0 618 %a1 = shufflevector <4 x double> %t0, <4 x double> undef, <4 x i32> zeroinitializer 619 %2 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %a0, <4 x double> %a1, i32 88, <4 x double> undef, i8 -1) 620 %res = fadd <4 x double> %2, %a0 621 ret <4 x double> %res 622} 623 624define <4 x double> @rangepd_maskz_256(<4 x double> %a0, <4 x double> %a1, ptr %pmask) { 625; ENABLE-LABEL: rangepd_maskz_256: 626; ENABLE: # %bb.0: 627; ENABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 628; ENABLE-NEXT: #APP 629; ENABLE-NEXT: nop 630; ENABLE-NEXT: #NO_APP 631; ENABLE-NEXT: kmovb (%rdi), %k1 632; ENABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload 633; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 634; ENABLE-NEXT: vrangepd $88, %ymm2, %ymm0, %ymm1 {%k1} {z} 635; ENABLE-NEXT: vaddpd %ymm2, %ymm0, %ymm0 636; ENABLE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 637; ENABLE-NEXT: retq 638; 639; DISABLE-LABEL: rangepd_maskz_256: 640; DISABLE: # %bb.0: 641; DISABLE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 642; DISABLE-NEXT: #APP 643; DISABLE-NEXT: nop 644; DISABLE-NEXT: #NO_APP 645; DISABLE-NEXT: kmovb (%rdi), %k1 646; DISABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload 647; DISABLE-NEXT: vrangepd $88, %ymm2, %ymm0, %ymm1 {%k1} {z} 648; DISABLE-NEXT: vaddpd %ymm2, %ymm0, %ymm0 649; DISABLE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 650; DISABLE-NEXT: retq 651 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 652 %mask = load i8, ptr %pmask 653 %2 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %a0, <4 x double> %a1, i32 88, <4 x double> undef, i8 %mask) 654 %3 = fadd <4 x double> %a0, %a1 655 %res = fadd <4 x double> %2, %3 656 ret <4 x double> %res 657} 658 659declare <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8) nounwind readnone 660 661define <8 x double> @rangepd_512(<8 x double> %a0, <8 x double> %a1) { 662; ENABLE-LABEL: rangepd_512: 663; ENABLE: # %bb.0: 664; ENABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 665; ENABLE-NEXT: #APP 666; ENABLE-NEXT: nop 667; ENABLE-NEXT: #NO_APP 668; ENABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload 669; ENABLE-NEXT: vpxor %xmm1, %xmm1, %xmm1 670; ENABLE-NEXT: vrangepd $88, %zmm2, %zmm0, %zmm1 671; ENABLE-NEXT: vaddpd %zmm2, %zmm0, %zmm0 672; ENABLE-NEXT: vaddpd %zmm0, %zmm1, %zmm0 673; ENABLE-NEXT: retq 674; 675; DISABLE-LABEL: rangepd_512: 676; DISABLE: # %bb.0: 677; DISABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 678; DISABLE-NEXT: #APP 679; DISABLE-NEXT: nop 680; DISABLE-NEXT: #NO_APP 681; DISABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload 682; DISABLE-NEXT: vrangepd $88, %zmm2, %zmm0, %zmm1 683; DISABLE-NEXT: vaddpd %zmm2, %zmm0, %zmm0 684; DISABLE-NEXT: vaddpd %zmm0, %zmm1, %zmm0 685; DISABLE-NEXT: retq 686 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 687 %2 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %a0, <8 x double> %a1, i32 88, <8 x double> undef, i8 -1, i32 4) 688 %3 = fadd <8 x double> %a0, %a1 689 %res = fadd <8 x double> %2, %3 690 ret <8 x double> %res 691} 692 693define <8 x double> @rangepd_mem_512(<8 x double> %a0, ptr %p1) { 694; ENABLE-LABEL: rangepd_mem_512: 695; ENABLE: # %bb.0: 696; ENABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 697; ENABLE-NEXT: #APP 698; ENABLE-NEXT: nop 699; ENABLE-NEXT: #NO_APP 700; ENABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload 701; ENABLE-NEXT: vpxor %xmm0, %xmm0, %xmm0 702; ENABLE-NEXT: vrangepd $88, (%rdi), %zmm1, %zmm0 703; ENABLE-NEXT: vaddpd %zmm1, %zmm0, %zmm0 704; ENABLE-NEXT: retq 705; 706; DISABLE-LABEL: rangepd_mem_512: 707; DISABLE: # %bb.0: 708; DISABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 709; DISABLE-NEXT: #APP 710; DISABLE-NEXT: nop 711; DISABLE-NEXT: #NO_APP 712; DISABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload 713; DISABLE-NEXT: vrangepd $88, (%rdi), %zmm1, %zmm0 714; DISABLE-NEXT: vaddpd %zmm1, %zmm0, %zmm0 715; DISABLE-NEXT: retq 716 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 717 %a1 = load <8 x double>, ptr %p1, align 64 718 %2 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %a0, <8 x double> %a1, i32 88, <8 x double> undef, i8 -1, i32 4) 719 %res = fadd <8 x double> %2, %a0 720 ret <8 x double> %res 721} 722 723define <8 x double> @rangepd_broadcast_512(<8 x double> %a0, ptr %p1) { 724; ENABLE-LABEL: rangepd_broadcast_512: 725; ENABLE: # %bb.0: 726; ENABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 727; ENABLE-NEXT: #APP 728; ENABLE-NEXT: nop 729; ENABLE-NEXT: #NO_APP 730; ENABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload 731; ENABLE-NEXT: vpxor %xmm0, %xmm0, %xmm0 732; ENABLE-NEXT: vrangepd $88, (%rdi){1to8}, %zmm1, %zmm0 733; ENABLE-NEXT: vaddpd %zmm1, %zmm0, %zmm0 734; ENABLE-NEXT: retq 735; 736; DISABLE-LABEL: rangepd_broadcast_512: 737; DISABLE: # %bb.0: 738; DISABLE-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 739; DISABLE-NEXT: #APP 740; DISABLE-NEXT: nop 741; DISABLE-NEXT: #NO_APP 742; DISABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload 743; DISABLE-NEXT: vrangepd $88, (%rdi){1to8}, %zmm1, %zmm0 744; DISABLE-NEXT: vaddpd %zmm1, %zmm0, %zmm0 745; DISABLE-NEXT: retq 746 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 747 %v1 = load double, ptr %p1, align 4 748 %t0 = insertelement <8 x double> undef, double %v1, i64 0 749 %a1 = shufflevector <8 x double> %t0, <8 x double> undef, <8 x i32> zeroinitializer 750 %2 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %a0, <8 x double> %a1, i32 88, <8 x double> undef, i8 -1, i32 4) 751 %res = fadd <8 x double> %2, %a0 752 ret <8 x double> %res 753} 754 755define <8 x double> @rangepd_maskz_512(<8 x double> %a0, <8 x double> %a1, ptr %pmask) { 756; ENABLE-LABEL: rangepd_maskz_512: 757; ENABLE: # %bb.0: 758; ENABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 759; ENABLE-NEXT: #APP 760; ENABLE-NEXT: nop 761; ENABLE-NEXT: #NO_APP 762; ENABLE-NEXT: kmovb (%rdi), %k1 763; ENABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload 764; ENABLE-NEXT: vpxor %xmm1, %xmm1, %xmm1 765; ENABLE-NEXT: vrangepd $88, %zmm2, %zmm0, %zmm1 {%k1} {z} 766; ENABLE-NEXT: vaddpd %zmm2, %zmm0, %zmm0 767; ENABLE-NEXT: vaddpd %zmm0, %zmm1, %zmm0 768; ENABLE-NEXT: retq 769; 770; DISABLE-LABEL: rangepd_maskz_512: 771; DISABLE: # %bb.0: 772; DISABLE-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill 773; DISABLE-NEXT: #APP 774; DISABLE-NEXT: nop 775; DISABLE-NEXT: #NO_APP 776; DISABLE-NEXT: kmovb (%rdi), %k1 777; DISABLE-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload 778; DISABLE-NEXT: vrangepd $88, %zmm2, %zmm0, %zmm1 {%k1} {z} 779; DISABLE-NEXT: vaddpd %zmm2, %zmm0, %zmm0 780; DISABLE-NEXT: vaddpd %zmm0, %zmm1, %zmm0 781; DISABLE-NEXT: retq 782 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 783 %mask = load i8, ptr %pmask 784 %2 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %a0, <8 x double> %a1, i32 88, <8 x double> undef, i8 %mask, i32 4) 785 %3 = fadd <8 x double> %a0, %a1 786 %res = fadd <8 x double> %2, %3 787 ret <8 x double> %res 788} 789 790declare <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8, i32) nounwind readnone 791 792define <4 x float> @rangess(<4 x float> %a0, <4 x float> %a1) { 793; ENABLE-LABEL: rangess: 794; ENABLE: # %bb.0: 795; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 796; ENABLE-NEXT: #APP 797; ENABLE-NEXT: nop 798; ENABLE-NEXT: #NO_APP 799; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload 800; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 801; ENABLE-NEXT: vrangess $4, %xmm2, %xmm0, %xmm1 802; ENABLE-NEXT: vaddps %xmm0, %xmm2, %xmm0 803; ENABLE-NEXT: vaddps %xmm0, %xmm1, %xmm0 804; ENABLE-NEXT: retq 805; 806; DISABLE-LABEL: rangess: 807; DISABLE: # %bb.0: 808; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 809; DISABLE-NEXT: #APP 810; DISABLE-NEXT: nop 811; DISABLE-NEXT: #NO_APP 812; DISABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload 813; DISABLE-NEXT: vrangess $4, %xmm2, %xmm0, %xmm1 814; DISABLE-NEXT: vaddps %xmm0, %xmm2, %xmm0 815; DISABLE-NEXT: vaddps %xmm0, %xmm1, %xmm0 816; DISABLE-NEXT: retq 817 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 818 %2 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4, i32 4) 819 %3 = fadd <4 x float> %a1, %a0 820 %res = fadd <4 x float> %2, %3 821 ret <4 x float> %res 822} 823 824define <4 x float> @rangess_mem(<4 x float> %a0, ptr %p1) { 825; ENABLE-LABEL: rangess_mem: 826; ENABLE: # %bb.0: 827; ENABLE-NEXT: #APP 828; ENABLE-NEXT: nop 829; ENABLE-NEXT: #NO_APP 830; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 831; ENABLE-NEXT: vrangess $4, (%rdi), %xmm0, %xmm1 832; ENABLE-NEXT: vaddps %xmm0, %xmm1, %xmm0 833; ENABLE-NEXT: retq 834; 835; DISABLE-LABEL: rangess_mem: 836; DISABLE: # %bb.0: 837; DISABLE-NEXT: #APP 838; DISABLE-NEXT: nop 839; DISABLE-NEXT: #NO_APP 840; DISABLE-NEXT: vrangess $4, (%rdi), %xmm0, %xmm1 841; DISABLE-NEXT: vaddps %xmm0, %xmm1, %xmm0 842; DISABLE-NEXT: retq 843 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 844 %a1 = load <4 x float>, ptr %p1, align 64 845 %2 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4, i32 4) 846 %res = fadd <4 x float> %2, %a0 847 ret <4 x float> %res 848} 849 850define <4 x float> @rangess_maskz(<4 x float> %a0, <4 x float> %a1, ptr %pmask) { 851; ENABLE-LABEL: rangess_maskz: 852; ENABLE: # %bb.0: 853; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 854; ENABLE-NEXT: #APP 855; ENABLE-NEXT: nop 856; ENABLE-NEXT: #NO_APP 857; ENABLE-NEXT: kmovb (%rdi), %k1 858; ENABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload 859; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 860; ENABLE-NEXT: vrangess $4, %xmm2, %xmm0, %xmm1 {%k1} {z} 861; ENABLE-NEXT: vaddps %xmm2, %xmm0, %xmm0 862; ENABLE-NEXT: vaddps %xmm0, %xmm1, %xmm0 863; ENABLE-NEXT: retq 864; 865; DISABLE-LABEL: rangess_maskz: 866; DISABLE: # %bb.0: 867; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 868; DISABLE-NEXT: #APP 869; DISABLE-NEXT: nop 870; DISABLE-NEXT: #NO_APP 871; DISABLE-NEXT: kmovb (%rdi), %k1 872; DISABLE-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload 873; DISABLE-NEXT: vrangess $4, %xmm2, %xmm0, %xmm1 {%k1} {z} 874; DISABLE-NEXT: vaddps %xmm2, %xmm0, %xmm0 875; DISABLE-NEXT: vaddps %xmm0, %xmm1, %xmm0 876; DISABLE-NEXT: retq 877 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 878 %mask = load i8, ptr %pmask 879 %2 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 %mask, i32 4, i32 4) 880 %3 = fadd <4 x float> %a0, %a1 881 %res = fadd <4 x float> %2, %3 882 ret <4 x float> %res 883} 884 885declare <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32, i32) 886 887define <2 x double> @rangesd(<2 x double> %a0, <2 x double> %a1) { 888; ENABLE-LABEL: rangesd: 889; ENABLE: # %bb.0: 890; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 891; ENABLE-NEXT: #APP 892; ENABLE-NEXT: nop 893; ENABLE-NEXT: #NO_APP 894; ENABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload 895; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 896; ENABLE-NEXT: vrangesd $4, %xmm2, %xmm0, %xmm1 897; ENABLE-NEXT: vaddpd %xmm2, %xmm0, %xmm0 898; ENABLE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 899; ENABLE-NEXT: retq 900; 901; DISABLE-LABEL: rangesd: 902; DISABLE: # %bb.0: 903; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 904; DISABLE-NEXT: #APP 905; DISABLE-NEXT: nop 906; DISABLE-NEXT: #NO_APP 907; DISABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload 908; DISABLE-NEXT: vrangesd $4, %xmm2, %xmm0, %xmm1 909; DISABLE-NEXT: vaddpd %xmm2, %xmm0, %xmm0 910; DISABLE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 911; DISABLE-NEXT: retq 912 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 913 %2 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> undef, i8 -1, i32 4, i32 4) 914 %3 = fadd <2 x double> %a0, %a1 915 %res = fadd <2 x double> %2, %3 916 ret <2 x double> %res 917} 918 919define <2 x double> @rangesd_mem(<2 x double> %a0, ptr %p1) { 920; ENABLE-LABEL: rangesd_mem: 921; ENABLE: # %bb.0: 922; ENABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 923; ENABLE-NEXT: #APP 924; ENABLE-NEXT: nop 925; ENABLE-NEXT: #NO_APP 926; ENABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 927; ENABLE-NEXT: vxorps %xmm0, %xmm0, %xmm0 928; ENABLE-NEXT: vrangesd $4, (%rdi), %xmm1, %xmm0 929; ENABLE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 930; ENABLE-NEXT: retq 931; 932; DISABLE-LABEL: rangesd_mem: 933; DISABLE: # %bb.0: 934; DISABLE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 935; DISABLE-NEXT: #APP 936; DISABLE-NEXT: nop 937; DISABLE-NEXT: #NO_APP 938; DISABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 939; DISABLE-NEXT: vrangesd $4, (%rdi), %xmm1, %xmm0 940; DISABLE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 941; DISABLE-NEXT: retq 942 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 943 %a1 = load <2 x double>, ptr %p1, align 64 944 %2 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> undef, i8 -1, i32 4, i32 4) 945 %res = fadd <2 x double> %2, %a0 946 ret <2 x double> %res 947} 948 949define <2 x double> @rangesd_maskz(<2 x double> %a0, <2 x double> %a1, ptr %pmask) { 950; ENABLE-LABEL: rangesd_maskz: 951; ENABLE: # %bb.0: 952; ENABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 953; ENABLE-NEXT: #APP 954; ENABLE-NEXT: nop 955; ENABLE-NEXT: #NO_APP 956; ENABLE-NEXT: kmovb (%rdi), %k1 957; ENABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload 958; ENABLE-NEXT: vxorps %xmm1, %xmm1, %xmm1 959; ENABLE-NEXT: vrangesd $4, %xmm2, %xmm0, %xmm1 {%k1} {z} 960; ENABLE-NEXT: vaddpd %xmm2, %xmm0, %xmm0 961; ENABLE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 962; ENABLE-NEXT: retq 963; 964; DISABLE-LABEL: rangesd_maskz: 965; DISABLE: # %bb.0: 966; DISABLE-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 967; DISABLE-NEXT: #APP 968; DISABLE-NEXT: nop 969; DISABLE-NEXT: #NO_APP 970; DISABLE-NEXT: kmovb (%rdi), %k1 971; DISABLE-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload 972; DISABLE-NEXT: vrangesd $4, %xmm2, %xmm0, %xmm1 {%k1} {z} 973; DISABLE-NEXT: vaddpd %xmm2, %xmm0, %xmm0 974; DISABLE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 975; DISABLE-NEXT: retq 976 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 977 %mask = load i8, ptr %pmask 978 %2 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> undef, i8 %mask, i32 4, i32 4) 979 %3 = fadd <2 x double> %a0, %a1 980 %res = fadd <2 x double> %2, %3 981 ret <2 x double> %res 982} 983 984declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32, i32) 985