xref: /llvm-project/llvm/test/CodeGen/X86/range-false-deps.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update
2; RUN: llc -verify-machineinstrs -mcpu=sapphirerapids -mattr=+false-deps-range -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefixes=ENABLE
3; RUN: llc -verify-machineinstrs -mcpu=sapphirerapids -mattr=-false-deps-range -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefixes=DISABLE
4
5define <4 x float> @rangeps_128(<4 x float> %a0, <4 x float> %a1) {
6; ENABLE-LABEL: rangeps_128:
7; ENABLE:       # %bb.0:
8; ENABLE-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
9; ENABLE-NEXT:    #APP
10; ENABLE-NEXT:    nop
11; ENABLE-NEXT:    #NO_APP
12; ENABLE-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
13; ENABLE-NEXT:    vxorps %xmm1, %xmm1, %xmm1
14; ENABLE-NEXT:    vrangeps $88, %xmm2, %xmm0, %xmm1
15; ENABLE-NEXT:    vaddps %xmm2, %xmm0, %xmm0
16; ENABLE-NEXT:    vaddps %xmm0, %xmm1, %xmm0
17; ENABLE-NEXT:    retq
18;
19; DISABLE-LABEL: rangeps_128:
20; DISABLE:       # %bb.0:
21; DISABLE-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
22; DISABLE-NEXT:    #APP
23; DISABLE-NEXT:    nop
24; DISABLE-NEXT:    #NO_APP
25; DISABLE-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
26; DISABLE-NEXT:    vrangeps $88, %xmm2, %xmm0, %xmm1
27; DISABLE-NEXT:    vaddps %xmm2, %xmm0, %xmm0
28; DISABLE-NEXT:    vaddps %xmm0, %xmm1, %xmm0
29; DISABLE-NEXT:    retq
30  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
31  %2 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %a0, <4 x float> %a1, i32 88, <4 x float> undef, i8 -1)
32  %3 = fadd <4 x float> %a0, %a1
33  %res = fadd <4 x float> %2, %3
34  ret <4 x float> %res
35}
36
37define <4 x float> @rangeps_mem_128(<4 x float> %a0, ptr %p1) {
38; ENABLE-LABEL: rangeps_mem_128:
39; ENABLE:       # %bb.0:
40; ENABLE-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
41; ENABLE-NEXT:    #APP
42; ENABLE-NEXT:    nop
43; ENABLE-NEXT:    #NO_APP
44; ENABLE-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
45; ENABLE-NEXT:    vxorps %xmm0, %xmm0, %xmm0
46; ENABLE-NEXT:    vrangeps $88, (%rdi), %xmm1, %xmm0
47; ENABLE-NEXT:    vaddps %xmm1, %xmm0, %xmm0
48; ENABLE-NEXT:    retq
49;
50; DISABLE-LABEL: rangeps_mem_128:
51; DISABLE:       # %bb.0:
52; DISABLE-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
53; DISABLE-NEXT:    #APP
54; DISABLE-NEXT:    nop
55; DISABLE-NEXT:    #NO_APP
56; DISABLE-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
57; DISABLE-NEXT:    vrangeps $88, (%rdi), %xmm1, %xmm0
58; DISABLE-NEXT:    vaddps %xmm1, %xmm0, %xmm0
59; DISABLE-NEXT:    retq
60  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
61  %a1 = load <4 x float>, ptr %p1, align 64
62  %2 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %a0, <4 x float> %a1, i32 88, <4 x float> undef, i8 -1)
63  %res = fadd <4 x float> %2, %a0
64  ret <4 x float> %res
65}
66
67define <4 x float> @rangeps_broadcast_128(<4 x float> %a0, ptr %p1) {
68; ENABLE-LABEL: rangeps_broadcast_128:
69; ENABLE:       # %bb.0:
70; ENABLE-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
71; ENABLE-NEXT:    #APP
72; ENABLE-NEXT:    nop
73; ENABLE-NEXT:    #NO_APP
74; ENABLE-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
75; ENABLE-NEXT:    vxorps %xmm0, %xmm0, %xmm0
76; ENABLE-NEXT:    vrangeps $88, (%rdi){1to4}, %xmm1, %xmm0
77; ENABLE-NEXT:    vaddps %xmm1, %xmm0, %xmm0
78; ENABLE-NEXT:    retq
79;
80; DISABLE-LABEL: rangeps_broadcast_128:
81; DISABLE:       # %bb.0:
82; DISABLE-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
83; DISABLE-NEXT:    #APP
84; DISABLE-NEXT:    nop
85; DISABLE-NEXT:    #NO_APP
86; DISABLE-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
87; DISABLE-NEXT:    vrangeps $88, (%rdi){1to4}, %xmm1, %xmm0
88; DISABLE-NEXT:    vaddps %xmm1, %xmm0, %xmm0
89; DISABLE-NEXT:    retq
90  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
91  %v1 = load float, ptr %p1, align 4
92  %t0 = insertelement <4 x float> undef, float %v1, i64 0
93  %a1 = shufflevector <4 x float> %t0, <4 x float> undef, <4 x i32> zeroinitializer
94  %2 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %a0, <4 x float> %a1, i32 88, <4 x float> undef, i8 -1)
95  %res = fadd <4 x float> %2, %a0
96  ret <4 x float> %res
97}
98
99define <4 x float> @rangeps_maskz_128(<4 x float> %a0, <4 x float> %a1, ptr %pmask) {
100; ENABLE-LABEL: rangeps_maskz_128:
101; ENABLE:       # %bb.0:
102; ENABLE-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
103; ENABLE-NEXT:    #APP
104; ENABLE-NEXT:    nop
105; ENABLE-NEXT:    #NO_APP
106; ENABLE-NEXT:    kmovb (%rdi), %k1
107; ENABLE-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
108; ENABLE-NEXT:    vxorps %xmm1, %xmm1, %xmm1
109; ENABLE-NEXT:    vrangeps $88, %xmm2, %xmm0, %xmm1 {%k1} {z}
110; ENABLE-NEXT:    vaddps %xmm2, %xmm0, %xmm0
111; ENABLE-NEXT:    vaddps %xmm0, %xmm1, %xmm0
112; ENABLE-NEXT:    retq
113;
114; DISABLE-LABEL: rangeps_maskz_128:
115; DISABLE:       # %bb.0:
116; DISABLE-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
117; DISABLE-NEXT:    #APP
118; DISABLE-NEXT:    nop
119; DISABLE-NEXT:    #NO_APP
120; DISABLE-NEXT:    kmovb (%rdi), %k1
121; DISABLE-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
122; DISABLE-NEXT:    vrangeps $88, %xmm2, %xmm0, %xmm1 {%k1} {z}
123; DISABLE-NEXT:    vaddps %xmm2, %xmm0, %xmm0
124; DISABLE-NEXT:    vaddps %xmm0, %xmm1, %xmm0
125; DISABLE-NEXT:    retq
126  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
127  %mask = load i8, ptr %pmask
128  %2 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %a0, <4 x float> %a1, i32 88, <4 x float> undef, i8 %mask)
129  %3 = fadd <4 x float> %a0, %a1
130  %res = fadd <4 x float> %2, %3
131  ret <4 x float> %res
132}
133
134declare <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8) nounwind readnone
135
136define <8 x float> @rangeps_256(<8 x float> %a0, <8 x float> %a1) {
137; ENABLE-LABEL: rangeps_256:
138; ENABLE:       # %bb.0:
139; ENABLE-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
140; ENABLE-NEXT:    #APP
141; ENABLE-NEXT:    nop
142; ENABLE-NEXT:    #NO_APP
143; ENABLE-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
144; ENABLE-NEXT:    vxorps %xmm1, %xmm1, %xmm1
145; ENABLE-NEXT:    vrangeps $88, %ymm2, %ymm0, %ymm1
146; ENABLE-NEXT:    vaddps %ymm2, %ymm0, %ymm0
147; ENABLE-NEXT:    vaddps %ymm0, %ymm1, %ymm0
148; ENABLE-NEXT:    retq
149;
150; DISABLE-LABEL: rangeps_256:
151; DISABLE:       # %bb.0:
152; DISABLE-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
153; DISABLE-NEXT:    #APP
154; DISABLE-NEXT:    nop
155; DISABLE-NEXT:    #NO_APP
156; DISABLE-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
157; DISABLE-NEXT:    vrangeps $88, %ymm2, %ymm0, %ymm1
158; DISABLE-NEXT:    vaddps %ymm2, %ymm0, %ymm0
159; DISABLE-NEXT:    vaddps %ymm0, %ymm1, %ymm0
160; DISABLE-NEXT:    retq
161  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
162  %2 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %a0, <8 x float> %a1, i32 88, <8 x float> undef, i8 -1)
163  %3 = fadd <8 x float> %a0, %a1
164  %res = fadd <8 x float> %2, %3
165  ret <8 x float> %res
166}
167
168define <8 x float> @rangeps_mem_256(<8 x float> %a0, ptr %p1) {
169; ENABLE-LABEL: rangeps_mem_256:
170; ENABLE:       # %bb.0:
171; ENABLE-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
172; ENABLE-NEXT:    #APP
173; ENABLE-NEXT:    nop
174; ENABLE-NEXT:    #NO_APP
175; ENABLE-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
176; ENABLE-NEXT:    vxorps %xmm0, %xmm0, %xmm0
177; ENABLE-NEXT:    vrangeps $88, (%rdi), %ymm1, %ymm0
178; ENABLE-NEXT:    vaddps %ymm1, %ymm0, %ymm0
179; ENABLE-NEXT:    retq
180;
181; DISABLE-LABEL: rangeps_mem_256:
182; DISABLE:       # %bb.0:
183; DISABLE-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
184; DISABLE-NEXT:    #APP
185; DISABLE-NEXT:    nop
186; DISABLE-NEXT:    #NO_APP
187; DISABLE-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
188; DISABLE-NEXT:    vrangeps $88, (%rdi), %ymm1, %ymm0
189; DISABLE-NEXT:    vaddps %ymm1, %ymm0, %ymm0
190; DISABLE-NEXT:    retq
191  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
192  %a1 = load <8 x float>, ptr %p1, align 64
193  %2 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %a0, <8 x float> %a1, i32 88, <8 x float> undef, i8 -1)
194  %res = fadd <8 x float> %2, %a0
195  ret <8 x float> %res
196}
197
198define <8 x float> @rangeps_broadcast_256(<8 x float> %a0, ptr %p1) {
199; ENABLE-LABEL: rangeps_broadcast_256:
200; ENABLE:       # %bb.0:
201; ENABLE-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
202; ENABLE-NEXT:    #APP
203; ENABLE-NEXT:    nop
204; ENABLE-NEXT:    #NO_APP
205; ENABLE-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
206; ENABLE-NEXT:    vxorps %xmm0, %xmm0, %xmm0
207; ENABLE-NEXT:    vrangeps $88, (%rdi){1to8}, %ymm1, %ymm0
208; ENABLE-NEXT:    vaddps %ymm1, %ymm0, %ymm0
209; ENABLE-NEXT:    retq
210;
211; DISABLE-LABEL: rangeps_broadcast_256:
212; DISABLE:       # %bb.0:
213; DISABLE-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
214; DISABLE-NEXT:    #APP
215; DISABLE-NEXT:    nop
216; DISABLE-NEXT:    #NO_APP
217; DISABLE-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
218; DISABLE-NEXT:    vrangeps $88, (%rdi){1to8}, %ymm1, %ymm0
219; DISABLE-NEXT:    vaddps %ymm1, %ymm0, %ymm0
220; DISABLE-NEXT:    retq
221  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
222  %v1 = load float, ptr %p1, align 4
223  %t0 = insertelement <8 x float> undef, float %v1, i64 0
224  %a1 = shufflevector <8 x float> %t0, <8 x float> undef, <8 x i32> zeroinitializer
225  %2 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %a0, <8 x float> %a1, i32 88, <8 x float> undef, i8 -1)
226  %res = fadd <8 x float> %2, %a0
227  ret <8 x float> %res
228}
229
230define <8 x float> @rangeps_maskz_256(<8 x float> %a0, <8 x float> %a1, ptr %pmask) {
231; ENABLE-LABEL: rangeps_maskz_256:
232; ENABLE:       # %bb.0:
233; ENABLE-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
234; ENABLE-NEXT:    #APP
235; ENABLE-NEXT:    nop
236; ENABLE-NEXT:    #NO_APP
237; ENABLE-NEXT:    kmovb (%rdi), %k1
238; ENABLE-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
239; ENABLE-NEXT:    vxorps %xmm1, %xmm1, %xmm1
240; ENABLE-NEXT:    vrangeps $44, %ymm2, %ymm0, %ymm1 {%k1} {z}
241; ENABLE-NEXT:    vaddps %ymm2, %ymm0, %ymm0
242; ENABLE-NEXT:    vaddps %ymm0, %ymm1, %ymm0
243; ENABLE-NEXT:    retq
244;
245; DISABLE-LABEL: rangeps_maskz_256:
246; DISABLE:       # %bb.0:
247; DISABLE-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
248; DISABLE-NEXT:    #APP
249; DISABLE-NEXT:    nop
250; DISABLE-NEXT:    #NO_APP
251; DISABLE-NEXT:    kmovb (%rdi), %k1
252; DISABLE-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
253; DISABLE-NEXT:    vrangeps $44, %ymm2, %ymm0, %ymm1 {%k1} {z}
254; DISABLE-NEXT:    vaddps %ymm2, %ymm0, %ymm0
255; DISABLE-NEXT:    vaddps %ymm0, %ymm1, %ymm0
256; DISABLE-NEXT:    retq
257  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
258  %mask = load i8, ptr %pmask
259  %2 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %a0, <8 x float> %a1, i32 44, <8 x float> undef, i8 %mask)
260  %3 = fadd <8 x float> %a0, %a1
261  %res = fadd <8 x float> %2, %3
262  ret <8 x float> %res
263}
264
265declare <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8) nounwind readnone
266
267define <16 x float> @rangeps_512(<16 x float> %a0, <16 x float> %a1) {
268; ENABLE-LABEL: rangeps_512:
269; ENABLE:       # %bb.0:
270; ENABLE-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
271; ENABLE-NEXT:    #APP
272; ENABLE-NEXT:    nop
273; ENABLE-NEXT:    #NO_APP
274; ENABLE-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
275; ENABLE-NEXT:    vpxor %xmm1, %xmm1, %xmm1
276; ENABLE-NEXT:    vrangeps $88, %zmm2, %zmm0, %zmm1
277; ENABLE-NEXT:    vaddps %zmm2, %zmm0, %zmm0
278; ENABLE-NEXT:    vaddps %zmm0, %zmm1, %zmm0
279; ENABLE-NEXT:    retq
280;
281; DISABLE-LABEL: rangeps_512:
282; DISABLE:       # %bb.0:
283; DISABLE-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
284; DISABLE-NEXT:    #APP
285; DISABLE-NEXT:    nop
286; DISABLE-NEXT:    #NO_APP
287; DISABLE-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
288; DISABLE-NEXT:    vrangeps $88, %zmm2, %zmm0, %zmm1
289; DISABLE-NEXT:    vaddps %zmm2, %zmm0, %zmm0
290; DISABLE-NEXT:    vaddps %zmm0, %zmm1, %zmm0
291; DISABLE-NEXT:    retq
292  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
293  %2 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %a0, <16 x float> %a1, i32 88, <16 x float> undef, i16 -1, i32 4)
294  %3 = fadd <16 x float> %a0, %a1
295  %res = fadd <16 x float> %2, %3
296  ret <16 x float> %res
297}
298
299define <16 x float> @rangeps_mem_512(<16 x float> %a0, ptr %p1) {
300; ENABLE-LABEL: rangeps_mem_512:
301; ENABLE:       # %bb.0:
302; ENABLE-NEXT:    vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
303; ENABLE-NEXT:    #APP
304; ENABLE-NEXT:    nop
305; ENABLE-NEXT:    #NO_APP
306; ENABLE-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
307; ENABLE-NEXT:    vpxor %xmm0, %xmm0, %xmm0
308; ENABLE-NEXT:    vrangeps $88, (%rdi), %zmm1, %zmm0
309; ENABLE-NEXT:    vaddps %zmm1, %zmm0, %zmm0
310; ENABLE-NEXT:    retq
311;
312; DISABLE-LABEL: rangeps_mem_512:
313; DISABLE:       # %bb.0:
314; DISABLE-NEXT:    vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
315; DISABLE-NEXT:    #APP
316; DISABLE-NEXT:    nop
317; DISABLE-NEXT:    #NO_APP
318; DISABLE-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
319; DISABLE-NEXT:    vrangeps $88, (%rdi), %zmm1, %zmm0
320; DISABLE-NEXT:    vaddps %zmm1, %zmm0, %zmm0
321; DISABLE-NEXT:    retq
322  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
323  %a1 = load <16 x float>, ptr %p1, align 64
324  %2 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %a0, <16 x float> %a1, i32 88, <16 x float> undef, i16 -1, i32 4)
325  %res = fadd <16 x float> %2, %a0
326  ret <16 x float> %res
327}
328
329define <16 x float> @rangeps_broadcast_512(<16 x float> %a0, ptr %p1) {
330; ENABLE-LABEL: rangeps_broadcast_512:
331; ENABLE:       # %bb.0:
332; ENABLE-NEXT:    vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
333; ENABLE-NEXT:    #APP
334; ENABLE-NEXT:    nop
335; ENABLE-NEXT:    #NO_APP
336; ENABLE-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
337; ENABLE-NEXT:    vpxor %xmm0, %xmm0, %xmm0
338; ENABLE-NEXT:    vrangeps $88, (%rdi){1to16}, %zmm1, %zmm0
339; ENABLE-NEXT:    vaddps %zmm1, %zmm0, %zmm0
340; ENABLE-NEXT:    retq
341;
342; DISABLE-LABEL: rangeps_broadcast_512:
343; DISABLE:       # %bb.0:
344; DISABLE-NEXT:    vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
345; DISABLE-NEXT:    #APP
346; DISABLE-NEXT:    nop
347; DISABLE-NEXT:    #NO_APP
348; DISABLE-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
349; DISABLE-NEXT:    vrangeps $88, (%rdi){1to16}, %zmm1, %zmm0
350; DISABLE-NEXT:    vaddps %zmm1, %zmm0, %zmm0
351; DISABLE-NEXT:    retq
352  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
353  %v1 = load float, ptr %p1, align 4
354  %t0 = insertelement <16 x float> undef, float %v1, i64 0
355  %a1 = shufflevector <16 x float> %t0, <16 x float> undef, <16 x i32> zeroinitializer
356  %2 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %a0, <16 x float> %a1, i32 88, <16 x float> undef, i16 -1, i32 4)
357  %res = fadd <16 x float> %2, %a0
358  ret <16 x float> %res
359}
360
361define <16 x float> @rangeps_maskz_512(<16 x float> %a0, <16 x float> %a1, ptr %pmask) {
362; ENABLE-LABEL: rangeps_maskz_512:
363; ENABLE:       # %bb.0:
364; ENABLE-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
365; ENABLE-NEXT:    #APP
366; ENABLE-NEXT:    nop
367; ENABLE-NEXT:    #NO_APP
368; ENABLE-NEXT:    kmovw (%rdi), %k1
369; ENABLE-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
370; ENABLE-NEXT:    vpxor %xmm1, %xmm1, %xmm1
371; ENABLE-NEXT:    vrangeps $88, %zmm2, %zmm0, %zmm1 {%k1} {z}
372; ENABLE-NEXT:    vaddps %zmm2, %zmm0, %zmm0
373; ENABLE-NEXT:    vaddps %zmm0, %zmm1, %zmm0
374; ENABLE-NEXT:    retq
375;
376; DISABLE-LABEL: rangeps_maskz_512:
377; DISABLE:       # %bb.0:
378; DISABLE-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
379; DISABLE-NEXT:    #APP
380; DISABLE-NEXT:    nop
381; DISABLE-NEXT:    #NO_APP
382; DISABLE-NEXT:    kmovw (%rdi), %k1
383; DISABLE-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
384; DISABLE-NEXT:    vrangeps $88, %zmm2, %zmm0, %zmm1 {%k1} {z}
385; DISABLE-NEXT:    vaddps %zmm2, %zmm0, %zmm0
386; DISABLE-NEXT:    vaddps %zmm0, %zmm1, %zmm0
387; DISABLE-NEXT:    retq
388  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
389  %mask = load i16, ptr %pmask
390  %2 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %a0, <16 x float> %a1, i32 88, <16 x float> undef, i16 %mask, i32 4)
391  %3 = fadd <16 x float> %a0, %a1
392  %res = fadd <16 x float> %2, %3
393  ret <16 x float> %res
394}
395
396declare <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16, i32) nounwind readnone
397
398
399define <2 x double> @rangepd_128(<2 x double> %a0, <2 x double> %a1) {
400; ENABLE-LABEL: rangepd_128:
401; ENABLE:       # %bb.0:
402; ENABLE-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
403; ENABLE-NEXT:    #APP
404; ENABLE-NEXT:    nop
405; ENABLE-NEXT:    #NO_APP
406; ENABLE-NEXT:    vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
407; ENABLE-NEXT:    vxorps %xmm1, %xmm1, %xmm1
408; ENABLE-NEXT:    vrangepd $88, %xmm2, %xmm0, %xmm1
409; ENABLE-NEXT:    vaddpd %xmm2, %xmm0, %xmm0
410; ENABLE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
411; ENABLE-NEXT:    retq
412;
413; DISABLE-LABEL: rangepd_128:
414; DISABLE:       # %bb.0:
415; DISABLE-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
416; DISABLE-NEXT:    #APP
417; DISABLE-NEXT:    nop
418; DISABLE-NEXT:    #NO_APP
419; DISABLE-NEXT:    vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
420; DISABLE-NEXT:    vrangepd $88, %xmm2, %xmm0, %xmm1
421; DISABLE-NEXT:    vaddpd %xmm2, %xmm0, %xmm0
422; DISABLE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
423; DISABLE-NEXT:    retq
424  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
425  %2 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %a0, <2 x double> %a1, i32 88, <2 x double> undef, i8 -1)
426  %3 = fadd <2 x double> %a0, %a1
427  %res = fadd <2 x double> %2, %3
428  ret <2 x double> %res
429}
430
431define <2 x double> @rangepd_mem_128(<2 x double> %a0, ptr %p1) {
432; ENABLE-LABEL: rangepd_mem_128:
433; ENABLE:       # %bb.0:
434; ENABLE-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
435; ENABLE-NEXT:    #APP
436; ENABLE-NEXT:    nop
437; ENABLE-NEXT:    #NO_APP
438; ENABLE-NEXT:    vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
439; ENABLE-NEXT:    vxorps %xmm0, %xmm0, %xmm0
440; ENABLE-NEXT:    vrangepd $88, (%rdi), %xmm1, %xmm0
441; ENABLE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
442; ENABLE-NEXT:    retq
443;
444; DISABLE-LABEL: rangepd_mem_128:
445; DISABLE:       # %bb.0:
446; DISABLE-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
447; DISABLE-NEXT:    #APP
448; DISABLE-NEXT:    nop
449; DISABLE-NEXT:    #NO_APP
450; DISABLE-NEXT:    vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
451; DISABLE-NEXT:    vrangepd $88, (%rdi), %xmm1, %xmm0
452; DISABLE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
453; DISABLE-NEXT:    retq
454  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
455  %a1 = load <2 x double>, ptr %p1, align 64
456  %2 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %a0, <2 x double> %a1, i32 88, <2 x double> undef, i8 -1)
457  %res = fadd <2 x double> %2, %a0
458  ret <2 x double> %res
459}
460
461define <2 x double> @rangepd_broadcast_128(<2 x double> %a0, ptr %p1) {
462; ENABLE-LABEL: rangepd_broadcast_128:
463; ENABLE:       # %bb.0:
464; ENABLE-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
465; ENABLE-NEXT:    #APP
466; ENABLE-NEXT:    nop
467; ENABLE-NEXT:    #NO_APP
468; ENABLE-NEXT:    vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
469; ENABLE-NEXT:    vxorps %xmm0, %xmm0, %xmm0
470; ENABLE-NEXT:    vrangepd $88, (%rdi){1to2}, %xmm1, %xmm0
471; ENABLE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
472; ENABLE-NEXT:    retq
473;
474; DISABLE-LABEL: rangepd_broadcast_128:
475; DISABLE:       # %bb.0:
476; DISABLE-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
477; DISABLE-NEXT:    #APP
478; DISABLE-NEXT:    nop
479; DISABLE-NEXT:    #NO_APP
480; DISABLE-NEXT:    vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
481; DISABLE-NEXT:    vrangepd $88, (%rdi){1to2}, %xmm1, %xmm0
482; DISABLE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
483; DISABLE-NEXT:    retq
484  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
485  %v1 = load double, ptr %p1, align 4
486  %t0 = insertelement <2 x double> undef, double %v1, i64 0
487  %a1 = shufflevector <2 x double> %t0, <2 x double> undef, <2 x i32> zeroinitializer
488  %2 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %a0, <2 x double> %a1, i32 88, <2 x double> undef, i8 -1)
489  %res = fadd <2 x double> %2, %a0
490  ret <2 x double> %res
491}
492
493define <2 x double> @rangepd_maskz_128(<2 x double> %a0, <2 x double> %a1, ptr %pmask) {
494; ENABLE-LABEL: rangepd_maskz_128:
495; ENABLE:       # %bb.0:
496; ENABLE-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
497; ENABLE-NEXT:    #APP
498; ENABLE-NEXT:    nop
499; ENABLE-NEXT:    #NO_APP
500; ENABLE-NEXT:    kmovb (%rdi), %k1
501; ENABLE-NEXT:    vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
502; ENABLE-NEXT:    vxorps %xmm1, %xmm1, %xmm1
503; ENABLE-NEXT:    vrangepd $88, %xmm2, %xmm0, %xmm1 {%k1} {z}
504; ENABLE-NEXT:    vaddpd %xmm2, %xmm0, %xmm0
505; ENABLE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
506; ENABLE-NEXT:    retq
507;
508; DISABLE-LABEL: rangepd_maskz_128:
509; DISABLE:       # %bb.0:
510; DISABLE-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
511; DISABLE-NEXT:    #APP
512; DISABLE-NEXT:    nop
513; DISABLE-NEXT:    #NO_APP
514; DISABLE-NEXT:    kmovb (%rdi), %k1
515; DISABLE-NEXT:    vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
516; DISABLE-NEXT:    vrangepd $88, %xmm2, %xmm0, %xmm1 {%k1} {z}
517; DISABLE-NEXT:    vaddpd %xmm2, %xmm0, %xmm0
518; DISABLE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
519; DISABLE-NEXT:    retq
520  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
521  %mask = load i8, ptr %pmask
522  %2 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %a0, <2 x double> %a1, i32 88, <2 x double> undef, i8 %mask)
523  %3 = fadd <2 x double> %a0, %a1
524  %res = fadd <2 x double> %2, %3
525  ret <2 x double> %res
526}
527
528declare <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8) nounwind readnone
529
530define <4 x double> @rangepd_256(<4 x double> %a0, <4 x double> %a1) {
531; ENABLE-LABEL: rangepd_256:
532; ENABLE:       # %bb.0:
533; ENABLE-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
534; ENABLE-NEXT:    #APP
535; ENABLE-NEXT:    nop
536; ENABLE-NEXT:    #NO_APP
537; ENABLE-NEXT:    vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
538; ENABLE-NEXT:    vxorps %xmm1, %xmm1, %xmm1
539; ENABLE-NEXT:    vrangepd $88, %ymm2, %ymm0, %ymm1
540; ENABLE-NEXT:    vaddpd %ymm2, %ymm0, %ymm0
541; ENABLE-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
542; ENABLE-NEXT:    retq
543;
544; DISABLE-LABEL: rangepd_256:
545; DISABLE:       # %bb.0:
546; DISABLE-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
547; DISABLE-NEXT:    #APP
548; DISABLE-NEXT:    nop
549; DISABLE-NEXT:    #NO_APP
550; DISABLE-NEXT:    vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
551; DISABLE-NEXT:    vrangepd $88, %ymm2, %ymm0, %ymm1
552; DISABLE-NEXT:    vaddpd %ymm2, %ymm0, %ymm0
553; DISABLE-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
554; DISABLE-NEXT:    retq
555  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
556  %2 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %a0, <4 x double> %a1, i32 88, <4 x double> undef, i8 -1)
557  %3 = fadd <4 x double> %a0, %a1
558  %res = fadd <4 x double> %2, %3
559  ret <4 x double> %res
560}
561
562define <4 x double> @rangepd_mem_256(<4 x double> %a0, ptr %p1) {
563; ENABLE-LABEL: rangepd_mem_256:
564; ENABLE:       # %bb.0:
565; ENABLE-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
566; ENABLE-NEXT:    #APP
567; ENABLE-NEXT:    nop
568; ENABLE-NEXT:    #NO_APP
569; ENABLE-NEXT:    vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
570; ENABLE-NEXT:    vxorps %xmm0, %xmm0, %xmm0
571; ENABLE-NEXT:    vrangepd $88, (%rdi), %ymm1, %ymm0
572; ENABLE-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
573; ENABLE-NEXT:    retq
574;
575; DISABLE-LABEL: rangepd_mem_256:
576; DISABLE:       # %bb.0:
577; DISABLE-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
578; DISABLE-NEXT:    #APP
579; DISABLE-NEXT:    nop
580; DISABLE-NEXT:    #NO_APP
581; DISABLE-NEXT:    vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
582; DISABLE-NEXT:    vrangepd $88, (%rdi), %ymm1, %ymm0
583; DISABLE-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
584; DISABLE-NEXT:    retq
585  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
586  %a1 = load <4 x double>, ptr %p1, align 64
587  %2 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %a0, <4 x double> %a1, i32 88, <4 x double> undef, i8 -1)
588  %res = fadd <4 x double> %2, %a0
589  ret <4 x double> %res
590}
591
592define <4 x double> @rangepd_broadcast_256(<4 x double> %a0, ptr %p1) {
593; ENABLE-LABEL: rangepd_broadcast_256:
594; ENABLE:       # %bb.0:
595; ENABLE-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
596; ENABLE-NEXT:    #APP
597; ENABLE-NEXT:    nop
598; ENABLE-NEXT:    #NO_APP
599; ENABLE-NEXT:    vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
600; ENABLE-NEXT:    vxorps %xmm0, %xmm0, %xmm0
601; ENABLE-NEXT:    vrangepd $88, (%rdi){1to4}, %ymm1, %ymm0
602; ENABLE-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
603; ENABLE-NEXT:    retq
604;
605; DISABLE-LABEL: rangepd_broadcast_256:
606; DISABLE:       # %bb.0:
607; DISABLE-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
608; DISABLE-NEXT:    #APP
609; DISABLE-NEXT:    nop
610; DISABLE-NEXT:    #NO_APP
611; DISABLE-NEXT:    vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
612; DISABLE-NEXT:    vrangepd $88, (%rdi){1to4}, %ymm1, %ymm0
613; DISABLE-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
614; DISABLE-NEXT:    retq
615  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
616  %v1 = load double, ptr %p1, align 4
617  %t0 = insertelement <4 x double> undef, double %v1, i64 0
618  %a1 = shufflevector <4 x double> %t0, <4 x double> undef, <4 x i32> zeroinitializer
619  %2 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %a0, <4 x double> %a1, i32 88, <4 x double> undef, i8 -1)
620  %res = fadd <4 x double> %2, %a0
621  ret <4 x double> %res
622}
623
624define <4 x double> @rangepd_maskz_256(<4 x double> %a0, <4 x double> %a1, ptr %pmask) {
625; ENABLE-LABEL: rangepd_maskz_256:
626; ENABLE:       # %bb.0:
627; ENABLE-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
628; ENABLE-NEXT:    #APP
629; ENABLE-NEXT:    nop
630; ENABLE-NEXT:    #NO_APP
631; ENABLE-NEXT:    kmovb (%rdi), %k1
632; ENABLE-NEXT:    vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
633; ENABLE-NEXT:    vxorps %xmm1, %xmm1, %xmm1
634; ENABLE-NEXT:    vrangepd $88, %ymm2, %ymm0, %ymm1 {%k1} {z}
635; ENABLE-NEXT:    vaddpd %ymm2, %ymm0, %ymm0
636; ENABLE-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
637; ENABLE-NEXT:    retq
638;
639; DISABLE-LABEL: rangepd_maskz_256:
640; DISABLE:       # %bb.0:
641; DISABLE-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
642; DISABLE-NEXT:    #APP
643; DISABLE-NEXT:    nop
644; DISABLE-NEXT:    #NO_APP
645; DISABLE-NEXT:    kmovb (%rdi), %k1
646; DISABLE-NEXT:    vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
647; DISABLE-NEXT:    vrangepd $88, %ymm2, %ymm0, %ymm1 {%k1} {z}
648; DISABLE-NEXT:    vaddpd %ymm2, %ymm0, %ymm0
649; DISABLE-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
650; DISABLE-NEXT:    retq
651  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
652  %mask = load i8, ptr %pmask
653  %2 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %a0, <4 x double> %a1, i32 88, <4 x double> undef, i8 %mask)
654  %3 = fadd <4 x double> %a0, %a1
655  %res = fadd <4 x double> %2, %3
656  ret <4 x double> %res
657}
658
659declare <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8) nounwind readnone
660
661define <8 x double> @rangepd_512(<8 x double> %a0, <8 x double> %a1) {
662; ENABLE-LABEL: rangepd_512:
663; ENABLE:       # %bb.0:
664; ENABLE-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
665; ENABLE-NEXT:    #APP
666; ENABLE-NEXT:    nop
667; ENABLE-NEXT:    #NO_APP
668; ENABLE-NEXT:    vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
669; ENABLE-NEXT:    vpxor %xmm1, %xmm1, %xmm1
670; ENABLE-NEXT:    vrangepd $88, %zmm2, %zmm0, %zmm1
671; ENABLE-NEXT:    vaddpd %zmm2, %zmm0, %zmm0
672; ENABLE-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
673; ENABLE-NEXT:    retq
674;
675; DISABLE-LABEL: rangepd_512:
676; DISABLE:       # %bb.0:
677; DISABLE-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
678; DISABLE-NEXT:    #APP
679; DISABLE-NEXT:    nop
680; DISABLE-NEXT:    #NO_APP
681; DISABLE-NEXT:    vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
682; DISABLE-NEXT:    vrangepd $88, %zmm2, %zmm0, %zmm1
683; DISABLE-NEXT:    vaddpd %zmm2, %zmm0, %zmm0
684; DISABLE-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
685; DISABLE-NEXT:    retq
686  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
687  %2 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %a0, <8 x double> %a1, i32 88, <8 x double> undef, i8 -1, i32 4)
688  %3 = fadd <8 x double> %a0, %a1
689  %res = fadd <8 x double> %2, %3
690  ret <8 x double> %res
691}
692
693define <8 x double> @rangepd_mem_512(<8 x double> %a0, ptr %p1) {
694; ENABLE-LABEL: rangepd_mem_512:
695; ENABLE:       # %bb.0:
696; ENABLE-NEXT:    vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
697; ENABLE-NEXT:    #APP
698; ENABLE-NEXT:    nop
699; ENABLE-NEXT:    #NO_APP
700; ENABLE-NEXT:    vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
701; ENABLE-NEXT:    vpxor %xmm0, %xmm0, %xmm0
702; ENABLE-NEXT:    vrangepd $88, (%rdi), %zmm1, %zmm0
703; ENABLE-NEXT:    vaddpd %zmm1, %zmm0, %zmm0
704; ENABLE-NEXT:    retq
705;
706; DISABLE-LABEL: rangepd_mem_512:
707; DISABLE:       # %bb.0:
708; DISABLE-NEXT:    vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
709; DISABLE-NEXT:    #APP
710; DISABLE-NEXT:    nop
711; DISABLE-NEXT:    #NO_APP
712; DISABLE-NEXT:    vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
713; DISABLE-NEXT:    vrangepd $88, (%rdi), %zmm1, %zmm0
714; DISABLE-NEXT:    vaddpd %zmm1, %zmm0, %zmm0
715; DISABLE-NEXT:    retq
716  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
717  %a1 = load <8 x double>, ptr %p1, align 64
718  %2 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %a0, <8 x double> %a1, i32 88, <8 x double> undef, i8 -1, i32 4)
719  %res = fadd <8 x double> %2, %a0
720  ret <8 x double> %res
721}
722
723define <8 x double> @rangepd_broadcast_512(<8 x double> %a0, ptr %p1) {
724; ENABLE-LABEL: rangepd_broadcast_512:
725; ENABLE:       # %bb.0:
726; ENABLE-NEXT:    vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
727; ENABLE-NEXT:    #APP
728; ENABLE-NEXT:    nop
729; ENABLE-NEXT:    #NO_APP
730; ENABLE-NEXT:    vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
731; ENABLE-NEXT:    vpxor %xmm0, %xmm0, %xmm0
732; ENABLE-NEXT:    vrangepd $88, (%rdi){1to8}, %zmm1, %zmm0
733; ENABLE-NEXT:    vaddpd %zmm1, %zmm0, %zmm0
734; ENABLE-NEXT:    retq
735;
736; DISABLE-LABEL: rangepd_broadcast_512:
737; DISABLE:       # %bb.0:
738; DISABLE-NEXT:    vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
739; DISABLE-NEXT:    #APP
740; DISABLE-NEXT:    nop
741; DISABLE-NEXT:    #NO_APP
742; DISABLE-NEXT:    vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
743; DISABLE-NEXT:    vrangepd $88, (%rdi){1to8}, %zmm1, %zmm0
744; DISABLE-NEXT:    vaddpd %zmm1, %zmm0, %zmm0
745; DISABLE-NEXT:    retq
746  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
747  %v1 = load double, ptr %p1, align 4
748  %t0 = insertelement <8 x double> undef, double %v1, i64 0
749  %a1 = shufflevector <8 x double> %t0, <8 x double> undef, <8 x i32> zeroinitializer
750  %2 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %a0, <8 x double> %a1, i32 88, <8 x double> undef, i8 -1, i32 4)
751  %res = fadd <8 x double> %2, %a0
752  ret <8 x double> %res
753}
754
755define <8 x double> @rangepd_maskz_512(<8 x double> %a0, <8 x double> %a1, ptr %pmask) {
756; ENABLE-LABEL: rangepd_maskz_512:
757; ENABLE:       # %bb.0:
758; ENABLE-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
759; ENABLE-NEXT:    #APP
760; ENABLE-NEXT:    nop
761; ENABLE-NEXT:    #NO_APP
762; ENABLE-NEXT:    kmovb (%rdi), %k1
763; ENABLE-NEXT:    vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
764; ENABLE-NEXT:    vpxor %xmm1, %xmm1, %xmm1
765; ENABLE-NEXT:    vrangepd $88, %zmm2, %zmm0, %zmm1 {%k1} {z}
766; ENABLE-NEXT:    vaddpd %zmm2, %zmm0, %zmm0
767; ENABLE-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
768; ENABLE-NEXT:    retq
769;
770; DISABLE-LABEL: rangepd_maskz_512:
771; DISABLE:       # %bb.0:
772; DISABLE-NEXT:    vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
773; DISABLE-NEXT:    #APP
774; DISABLE-NEXT:    nop
775; DISABLE-NEXT:    #NO_APP
776; DISABLE-NEXT:    kmovb (%rdi), %k1
777; DISABLE-NEXT:    vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
778; DISABLE-NEXT:    vrangepd $88, %zmm2, %zmm0, %zmm1 {%k1} {z}
779; DISABLE-NEXT:    vaddpd %zmm2, %zmm0, %zmm0
780; DISABLE-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
781; DISABLE-NEXT:    retq
782  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
783  %mask = load i8, ptr %pmask
784  %2 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %a0, <8 x double> %a1, i32 88, <8 x double> undef, i8 %mask, i32 4)
785  %3 = fadd <8 x double> %a0, %a1
786  %res = fadd <8 x double> %2, %3
787  ret <8 x double> %res
788}
789
790declare <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8, i32) nounwind readnone
791
792define <4 x float> @rangess(<4 x float> %a0, <4 x float> %a1) {
793; ENABLE-LABEL: rangess:
794; ENABLE:       # %bb.0:
795; ENABLE-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
796; ENABLE-NEXT:    #APP
797; ENABLE-NEXT:    nop
798; ENABLE-NEXT:    #NO_APP
799; ENABLE-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
800; ENABLE-NEXT:    vxorps %xmm1, %xmm1, %xmm1
801; ENABLE-NEXT:    vrangess $4, %xmm2, %xmm0, %xmm1
802; ENABLE-NEXT:    vaddps %xmm0, %xmm2, %xmm0
803; ENABLE-NEXT:    vaddps %xmm0, %xmm1, %xmm0
804; ENABLE-NEXT:    retq
805;
806; DISABLE-LABEL: rangess:
807; DISABLE:       # %bb.0:
808; DISABLE-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
809; DISABLE-NEXT:    #APP
810; DISABLE-NEXT:    nop
811; DISABLE-NEXT:    #NO_APP
812; DISABLE-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
813; DISABLE-NEXT:    vrangess $4, %xmm2, %xmm0, %xmm1
814; DISABLE-NEXT:    vaddps %xmm0, %xmm2, %xmm0
815; DISABLE-NEXT:    vaddps %xmm0, %xmm1, %xmm0
816; DISABLE-NEXT:    retq
817  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
818  %2 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4, i32 4)
819  %3 = fadd <4 x float> %a1, %a0
820  %res = fadd <4 x float> %2, %3
821  ret <4 x float> %res
822}
823
824define <4 x float> @rangess_mem(<4 x float> %a0, ptr %p1) {
825; ENABLE-LABEL: rangess_mem:
826; ENABLE:       # %bb.0:
827; ENABLE-NEXT:    #APP
828; ENABLE-NEXT:    nop
829; ENABLE-NEXT:    #NO_APP
830; ENABLE-NEXT:    vxorps %xmm1, %xmm1, %xmm1
831; ENABLE-NEXT:    vrangess $4, (%rdi), %xmm0, %xmm1
832; ENABLE-NEXT:    vaddps %xmm0, %xmm1, %xmm0
833; ENABLE-NEXT:    retq
834;
835; DISABLE-LABEL: rangess_mem:
836; DISABLE:       # %bb.0:
837; DISABLE-NEXT:    #APP
838; DISABLE-NEXT:    nop
839; DISABLE-NEXT:    #NO_APP
840; DISABLE-NEXT:    vrangess $4, (%rdi), %xmm0, %xmm1
841; DISABLE-NEXT:    vaddps %xmm0, %xmm1, %xmm0
842; DISABLE-NEXT:    retq
843  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
844  %a1 = load <4 x float>, ptr %p1, align 64
845  %2 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4, i32 4)
846  %res = fadd <4 x float> %2, %a0
847  ret <4 x float> %res
848}
849
850define <4 x float> @rangess_maskz(<4 x float> %a0, <4 x float> %a1, ptr %pmask) {
851; ENABLE-LABEL: rangess_maskz:
852; ENABLE:       # %bb.0:
853; ENABLE-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
854; ENABLE-NEXT:    #APP
855; ENABLE-NEXT:    nop
856; ENABLE-NEXT:    #NO_APP
857; ENABLE-NEXT:    kmovb (%rdi), %k1
858; ENABLE-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
859; ENABLE-NEXT:    vxorps %xmm1, %xmm1, %xmm1
860; ENABLE-NEXT:    vrangess $4, %xmm2, %xmm0, %xmm1 {%k1} {z}
861; ENABLE-NEXT:    vaddps %xmm2, %xmm0, %xmm0
862; ENABLE-NEXT:    vaddps %xmm0, %xmm1, %xmm0
863; ENABLE-NEXT:    retq
864;
865; DISABLE-LABEL: rangess_maskz:
866; DISABLE:       # %bb.0:
867; DISABLE-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
868; DISABLE-NEXT:    #APP
869; DISABLE-NEXT:    nop
870; DISABLE-NEXT:    #NO_APP
871; DISABLE-NEXT:    kmovb (%rdi), %k1
872; DISABLE-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
873; DISABLE-NEXT:    vrangess $4, %xmm2, %xmm0, %xmm1 {%k1} {z}
874; DISABLE-NEXT:    vaddps %xmm2, %xmm0, %xmm0
875; DISABLE-NEXT:    vaddps %xmm0, %xmm1, %xmm0
876; DISABLE-NEXT:    retq
877  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
878  %mask = load i8, ptr %pmask
879  %2 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 %mask, i32 4, i32 4)
880  %3 = fadd <4 x float> %a0, %a1
881  %res = fadd <4 x float> %2, %3
882  ret <4 x float> %res
883}
884
885declare <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32, i32)
886
887define <2 x double> @rangesd(<2 x double> %a0, <2 x double> %a1) {
888; ENABLE-LABEL: rangesd:
889; ENABLE:       # %bb.0:
890; ENABLE-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
891; ENABLE-NEXT:    #APP
892; ENABLE-NEXT:    nop
893; ENABLE-NEXT:    #NO_APP
894; ENABLE-NEXT:    vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
895; ENABLE-NEXT:    vxorps %xmm1, %xmm1, %xmm1
896; ENABLE-NEXT:    vrangesd $4, %xmm2, %xmm0, %xmm1
897; ENABLE-NEXT:    vaddpd %xmm2, %xmm0, %xmm0
898; ENABLE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
899; ENABLE-NEXT:    retq
900;
901; DISABLE-LABEL: rangesd:
902; DISABLE:       # %bb.0:
903; DISABLE-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
904; DISABLE-NEXT:    #APP
905; DISABLE-NEXT:    nop
906; DISABLE-NEXT:    #NO_APP
907; DISABLE-NEXT:    vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
908; DISABLE-NEXT:    vrangesd $4, %xmm2, %xmm0, %xmm1
909; DISABLE-NEXT:    vaddpd %xmm2, %xmm0, %xmm0
910; DISABLE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
911; DISABLE-NEXT:    retq
912  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
913  %2 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> undef, i8 -1, i32 4, i32 4)
914  %3 = fadd <2 x double> %a0, %a1
915  %res = fadd <2 x double> %2, %3
916  ret <2 x double> %res
917}
918
919define <2 x double> @rangesd_mem(<2 x double> %a0, ptr %p1) {
920; ENABLE-LABEL: rangesd_mem:
921; ENABLE:       # %bb.0:
922; ENABLE-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
923; ENABLE-NEXT:    #APP
924; ENABLE-NEXT:    nop
925; ENABLE-NEXT:    #NO_APP
926; ENABLE-NEXT:    vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
927; ENABLE-NEXT:    vxorps %xmm0, %xmm0, %xmm0
928; ENABLE-NEXT:    vrangesd $4, (%rdi), %xmm1, %xmm0
929; ENABLE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
930; ENABLE-NEXT:    retq
931;
932; DISABLE-LABEL: rangesd_mem:
933; DISABLE:       # %bb.0:
934; DISABLE-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
935; DISABLE-NEXT:    #APP
936; DISABLE-NEXT:    nop
937; DISABLE-NEXT:    #NO_APP
938; DISABLE-NEXT:    vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
939; DISABLE-NEXT:    vrangesd $4, (%rdi), %xmm1, %xmm0
940; DISABLE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
941; DISABLE-NEXT:    retq
942  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
943  %a1 = load <2 x double>, ptr %p1, align 64
944  %2 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> undef, i8 -1, i32 4, i32 4)
945  %res = fadd <2 x double> %2, %a0
946  ret <2 x double> %res
947}
948
949define <2 x double> @rangesd_maskz(<2 x double> %a0, <2 x double> %a1, ptr %pmask) {
950; ENABLE-LABEL: rangesd_maskz:
951; ENABLE:       # %bb.0:
952; ENABLE-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
953; ENABLE-NEXT:    #APP
954; ENABLE-NEXT:    nop
955; ENABLE-NEXT:    #NO_APP
956; ENABLE-NEXT:    kmovb (%rdi), %k1
957; ENABLE-NEXT:    vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
958; ENABLE-NEXT:    vxorps %xmm1, %xmm1, %xmm1
959; ENABLE-NEXT:    vrangesd $4, %xmm2, %xmm0, %xmm1 {%k1} {z}
960; ENABLE-NEXT:    vaddpd %xmm2, %xmm0, %xmm0
961; ENABLE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
962; ENABLE-NEXT:    retq
963;
964; DISABLE-LABEL: rangesd_maskz:
965; DISABLE:       # %bb.0:
966; DISABLE-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
967; DISABLE-NEXT:    #APP
968; DISABLE-NEXT:    nop
969; DISABLE-NEXT:    #NO_APP
970; DISABLE-NEXT:    kmovb (%rdi), %k1
971; DISABLE-NEXT:    vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
972; DISABLE-NEXT:    vrangesd $4, %xmm2, %xmm0, %xmm1 {%k1} {z}
973; DISABLE-NEXT:    vaddpd %xmm2, %xmm0, %xmm0
974; DISABLE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
975; DISABLE-NEXT:    retq
976  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
977  %mask = load i8, ptr %pmask
978  %2 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> undef, i8 %mask, i32 4, i32 4)
979  %3 = fadd <2 x double> %a0, %a1
980  %res = fadd <2 x double> %2, %3
981  ret <2 x double> %res
982}
983
984declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32, i32)
985