xref: /llvm-project/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
3; RUN:		-mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9LE
4; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
5; RUN:    -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9BE
6; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
7; RUN:    -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8LE
8; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
9; RUN:    -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8BE
10
11define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
12; P9LE-LABEL: fold_urem_vec_1:
13; P9LE:       # %bb.0:
14; P9LE-NEXT:    li r3, 0
15; P9LE-NEXT:    lis r4, 689
16; P9LE-NEXT:    vextuhrx r3, r3, v2
17; P9LE-NEXT:    ori r4, r4, 55879
18; P9LE-NEXT:    clrlwi r3, r3, 16
19; P9LE-NEXT:    mulhwu r4, r3, r4
20; P9LE-NEXT:    mulli r4, r4, 95
21; P9LE-NEXT:    sub r3, r3, r4
22; P9LE-NEXT:    lis r4, 528
23; P9LE-NEXT:    mtvsrd v3, r3
24; P9LE-NEXT:    li r3, 2
25; P9LE-NEXT:    ori r4, r4, 33826
26; P9LE-NEXT:    vextuhrx r3, r3, v2
27; P9LE-NEXT:    clrlwi r3, r3, 16
28; P9LE-NEXT:    mulhwu r4, r3, r4
29; P9LE-NEXT:    mulli r4, r4, 124
30; P9LE-NEXT:    sub r3, r3, r4
31; P9LE-NEXT:    lis r4, 668
32; P9LE-NEXT:    mtvsrd v4, r3
33; P9LE-NEXT:    li r3, 4
34; P9LE-NEXT:    ori r4, r4, 48149
35; P9LE-NEXT:    vextuhrx r3, r3, v2
36; P9LE-NEXT:    vmrghh v3, v4, v3
37; P9LE-NEXT:    clrlwi r3, r3, 16
38; P9LE-NEXT:    mulhwu r4, r3, r4
39; P9LE-NEXT:    mulli r4, r4, 98
40; P9LE-NEXT:    sub r3, r3, r4
41; P9LE-NEXT:    lis r4, 65
42; P9LE-NEXT:    mtvsrd v4, r3
43; P9LE-NEXT:    li r3, 6
44; P9LE-NEXT:    ori r4, r4, 22281
45; P9LE-NEXT:    vextuhrx r3, r3, v2
46; P9LE-NEXT:    clrlwi r3, r3, 16
47; P9LE-NEXT:    mulhwu r4, r3, r4
48; P9LE-NEXT:    mulli r4, r4, 1003
49; P9LE-NEXT:    sub r3, r3, r4
50; P9LE-NEXT:    mtvsrd v2, r3
51; P9LE-NEXT:    vmrghh v2, v2, v4
52; P9LE-NEXT:    xxmrglw v2, v2, v3
53; P9LE-NEXT:    blr
54;
55; P9BE-LABEL: fold_urem_vec_1:
56; P9BE:       # %bb.0:
57; P9BE-NEXT:    li r3, 6
58; P9BE-NEXT:    lis r4, 65
59; P9BE-NEXT:    vextuhlx r3, r3, v2
60; P9BE-NEXT:    ori r4, r4, 22281
61; P9BE-NEXT:    clrlwi r3, r3, 16
62; P9BE-NEXT:    mulhwu r4, r3, r4
63; P9BE-NEXT:    mulli r4, r4, 1003
64; P9BE-NEXT:    sub r3, r3, r4
65; P9BE-NEXT:    lis r4, 668
66; P9BE-NEXT:    mtfprwz f0, r3
67; P9BE-NEXT:    li r3, 4
68; P9BE-NEXT:    ori r4, r4, 48149
69; P9BE-NEXT:    vextuhlx r3, r3, v2
70; P9BE-NEXT:    clrlwi r3, r3, 16
71; P9BE-NEXT:    mulhwu r4, r3, r4
72; P9BE-NEXT:    mulli r4, r4, 98
73; P9BE-NEXT:    sub r3, r3, r4
74; P9BE-NEXT:    lis r4, 528
75; P9BE-NEXT:    mtfprwz f1, r3
76; P9BE-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
77; P9BE-NEXT:    ori r4, r4, 33826
78; P9BE-NEXT:    addi r3, r3, .LCPI0_0@toc@l
79; P9BE-NEXT:    lxv vs2, 0(r3)
80; P9BE-NEXT:    li r3, 2
81; P9BE-NEXT:    vextuhlx r3, r3, v2
82; P9BE-NEXT:    clrlwi r3, r3, 16
83; P9BE-NEXT:    xxperm vs0, vs1, vs2
84; P9BE-NEXT:    mulhwu r4, r3, r4
85; P9BE-NEXT:    mulli r4, r4, 124
86; P9BE-NEXT:    sub r3, r3, r4
87; P9BE-NEXT:    lis r4, 689
88; P9BE-NEXT:    mtfprwz f1, r3
89; P9BE-NEXT:    li r3, 0
90; P9BE-NEXT:    ori r4, r4, 55879
91; P9BE-NEXT:    vextuhlx r3, r3, v2
92; P9BE-NEXT:    clrlwi r3, r3, 16
93; P9BE-NEXT:    mulhwu r4, r3, r4
94; P9BE-NEXT:    mulli r4, r4, 95
95; P9BE-NEXT:    sub r3, r3, r4
96; P9BE-NEXT:    mtfprwz f3, r3
97; P9BE-NEXT:    xxperm vs1, vs3, vs2
98; P9BE-NEXT:    xxmrghw v2, vs1, vs0
99; P9BE-NEXT:    blr
100;
101; P8LE-LABEL: fold_urem_vec_1:
102; P8LE:       # %bb.0:
103; P8LE-NEXT:    xxswapd vs0, v2
104; P8LE-NEXT:    lis r4, 689
105; P8LE-NEXT:    lis r5, 528
106; P8LE-NEXT:    lis r6, 668
107; P8LE-NEXT:    mffprd r3, f0
108; P8LE-NEXT:    ori r4, r4, 55879
109; P8LE-NEXT:    ori r5, r5, 33826
110; P8LE-NEXT:    ori r6, r6, 48149
111; P8LE-NEXT:    clrldi r7, r3, 48
112; P8LE-NEXT:    clrlwi r7, r7, 16
113; P8LE-NEXT:    mulhwu r4, r7, r4
114; P8LE-NEXT:    mulli r4, r4, 95
115; P8LE-NEXT:    sub r4, r7, r4
116; P8LE-NEXT:    rldicl r7, r3, 48, 48
117; P8LE-NEXT:    clrlwi r7, r7, 16
118; P8LE-NEXT:    mtvsrd v2, r4
119; P8LE-NEXT:    lis r4, 65
120; P8LE-NEXT:    mulhwu r5, r7, r5
121; P8LE-NEXT:    ori r4, r4, 22281
122; P8LE-NEXT:    mulli r5, r5, 124
123; P8LE-NEXT:    sub r5, r7, r5
124; P8LE-NEXT:    rldicl r7, r3, 32, 48
125; P8LE-NEXT:    rldicl r3, r3, 16, 48
126; P8LE-NEXT:    clrlwi r7, r7, 16
127; P8LE-NEXT:    clrlwi r3, r3, 16
128; P8LE-NEXT:    mtvsrd v3, r5
129; P8LE-NEXT:    mulhwu r6, r7, r6
130; P8LE-NEXT:    mulhwu r4, r3, r4
131; P8LE-NEXT:    mulli r6, r6, 98
132; P8LE-NEXT:    mulli r4, r4, 1003
133; P8LE-NEXT:    sub r6, r7, r6
134; P8LE-NEXT:    sub r3, r3, r4
135; P8LE-NEXT:    mtvsrd v4, r3
136; P8LE-NEXT:    vmrghh v2, v3, v2
137; P8LE-NEXT:    mtvsrd v3, r6
138; P8LE-NEXT:    vmrghh v3, v4, v3
139; P8LE-NEXT:    xxmrglw v2, v3, v2
140; P8LE-NEXT:    blr
141;
142; P8BE-LABEL: fold_urem_vec_1:
143; P8BE:       # %bb.0:
144; P8BE-NEXT:    mfvsrd r3, v2
145; P8BE-NEXT:    addis r6, r2, .LCPI0_0@toc@ha
146; P8BE-NEXT:    lis r4, 65
147; P8BE-NEXT:    lis r5, 668
148; P8BE-NEXT:    lis r7, 528
149; P8BE-NEXT:    addi r6, r6, .LCPI0_0@toc@l
150; P8BE-NEXT:    ori r4, r4, 22281
151; P8BE-NEXT:    ori r5, r5, 48149
152; P8BE-NEXT:    ori r7, r7, 33826
153; P8BE-NEXT:    lxvw4x v2, 0, r6
154; P8BE-NEXT:    clrldi r6, r3, 48
155; P8BE-NEXT:    clrlwi r6, r6, 16
156; P8BE-NEXT:    mulhwu r4, r6, r4
157; P8BE-NEXT:    mulli r4, r4, 1003
158; P8BE-NEXT:    sub r4, r6, r4
159; P8BE-NEXT:    rldicl r6, r3, 48, 48
160; P8BE-NEXT:    clrlwi r6, r6, 16
161; P8BE-NEXT:    mtvsrwz v3, r4
162; P8BE-NEXT:    lis r4, 689
163; P8BE-NEXT:    mulhwu r5, r6, r5
164; P8BE-NEXT:    ori r4, r4, 55879
165; P8BE-NEXT:    mulli r5, r5, 98
166; P8BE-NEXT:    sub r5, r6, r5
167; P8BE-NEXT:    rldicl r6, r3, 32, 48
168; P8BE-NEXT:    rldicl r3, r3, 16, 48
169; P8BE-NEXT:    clrlwi r6, r6, 16
170; P8BE-NEXT:    clrlwi r3, r3, 16
171; P8BE-NEXT:    mtvsrwz v4, r5
172; P8BE-NEXT:    mulhwu r7, r6, r7
173; P8BE-NEXT:    mulhwu r4, r3, r4
174; P8BE-NEXT:    mulli r7, r7, 124
175; P8BE-NEXT:    mulli r4, r4, 95
176; P8BE-NEXT:    sub r6, r6, r7
177; P8BE-NEXT:    sub r3, r3, r4
178; P8BE-NEXT:    mtvsrwz v5, r3
179; P8BE-NEXT:    vperm v3, v4, v3, v2
180; P8BE-NEXT:    mtvsrwz v4, r6
181; P8BE-NEXT:    vperm v2, v5, v4, v2
182; P8BE-NEXT:    xxmrghw v2, v2, v3
183; P8BE-NEXT:    blr
184  %1 = urem <4 x i16> %x, <i16 95, i16 124, i16 98, i16 1003>
185  ret <4 x i16> %1
186}
187
188define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
189; P9LE-LABEL: fold_urem_vec_2:
190; P9LE:       # %bb.0:
191; P9LE-NEXT:    li r3, 0
192; P9LE-NEXT:    lis r4, 689
193; P9LE-NEXT:    vextuhrx r3, r3, v2
194; P9LE-NEXT:    ori r4, r4, 55879
195; P9LE-NEXT:    clrlwi r3, r3, 16
196; P9LE-NEXT:    mulhwu r5, r3, r4
197; P9LE-NEXT:    mulli r5, r5, 95
198; P9LE-NEXT:    sub r3, r3, r5
199; P9LE-NEXT:    mtvsrd v3, r3
200; P9LE-NEXT:    li r3, 2
201; P9LE-NEXT:    vextuhrx r3, r3, v2
202; P9LE-NEXT:    clrlwi r3, r3, 16
203; P9LE-NEXT:    mulhwu r5, r3, r4
204; P9LE-NEXT:    mulli r5, r5, 95
205; P9LE-NEXT:    sub r3, r3, r5
206; P9LE-NEXT:    mtvsrd v4, r3
207; P9LE-NEXT:    li r3, 4
208; P9LE-NEXT:    vextuhrx r3, r3, v2
209; P9LE-NEXT:    vmrghh v3, v4, v3
210; P9LE-NEXT:    clrlwi r3, r3, 16
211; P9LE-NEXT:    mulhwu r5, r3, r4
212; P9LE-NEXT:    mulli r5, r5, 95
213; P9LE-NEXT:    sub r3, r3, r5
214; P9LE-NEXT:    mtvsrd v4, r3
215; P9LE-NEXT:    li r3, 6
216; P9LE-NEXT:    vextuhrx r3, r3, v2
217; P9LE-NEXT:    clrlwi r3, r3, 16
218; P9LE-NEXT:    mulhwu r4, r3, r4
219; P9LE-NEXT:    mulli r4, r4, 95
220; P9LE-NEXT:    sub r3, r3, r4
221; P9LE-NEXT:    mtvsrd v2, r3
222; P9LE-NEXT:    vmrghh v2, v2, v4
223; P9LE-NEXT:    xxmrglw v2, v2, v3
224; P9LE-NEXT:    blr
225;
226; P9BE-LABEL: fold_urem_vec_2:
227; P9BE:       # %bb.0:
228; P9BE-NEXT:    li r3, 6
229; P9BE-NEXT:    lis r4, 689
230; P9BE-NEXT:    vextuhlx r3, r3, v2
231; P9BE-NEXT:    ori r4, r4, 55879
232; P9BE-NEXT:    clrlwi r3, r3, 16
233; P9BE-NEXT:    mulhwu r5, r3, r4
234; P9BE-NEXT:    mulli r5, r5, 95
235; P9BE-NEXT:    sub r3, r3, r5
236; P9BE-NEXT:    mtfprwz f0, r3
237; P9BE-NEXT:    li r3, 4
238; P9BE-NEXT:    vextuhlx r3, r3, v2
239; P9BE-NEXT:    clrlwi r3, r3, 16
240; P9BE-NEXT:    mulhwu r5, r3, r4
241; P9BE-NEXT:    mulli r5, r5, 95
242; P9BE-NEXT:    sub r3, r3, r5
243; P9BE-NEXT:    mtfprwz f1, r3
244; P9BE-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
245; P9BE-NEXT:    addi r3, r3, .LCPI1_0@toc@l
246; P9BE-NEXT:    lxv vs2, 0(r3)
247; P9BE-NEXT:    li r3, 2
248; P9BE-NEXT:    vextuhlx r3, r3, v2
249; P9BE-NEXT:    clrlwi r3, r3, 16
250; P9BE-NEXT:    xxperm vs0, vs1, vs2
251; P9BE-NEXT:    mulhwu r5, r3, r4
252; P9BE-NEXT:    mulli r5, r5, 95
253; P9BE-NEXT:    sub r3, r3, r5
254; P9BE-NEXT:    mtfprwz f1, r3
255; P9BE-NEXT:    li r3, 0
256; P9BE-NEXT:    vextuhlx r3, r3, v2
257; P9BE-NEXT:    clrlwi r3, r3, 16
258; P9BE-NEXT:    mulhwu r4, r3, r4
259; P9BE-NEXT:    mulli r4, r4, 95
260; P9BE-NEXT:    sub r3, r3, r4
261; P9BE-NEXT:    mtfprwz f3, r3
262; P9BE-NEXT:    xxperm vs1, vs3, vs2
263; P9BE-NEXT:    xxmrghw v2, vs1, vs0
264; P9BE-NEXT:    blr
265;
266; P8LE-LABEL: fold_urem_vec_2:
267; P8LE:       # %bb.0:
268; P8LE-NEXT:    xxswapd vs0, v2
269; P8LE-NEXT:    lis r4, 689
270; P8LE-NEXT:    mffprd r3, f0
271; P8LE-NEXT:    ori r4, r4, 55879
272; P8LE-NEXT:    clrldi r5, r3, 48
273; P8LE-NEXT:    rldicl r6, r3, 48, 48
274; P8LE-NEXT:    rldicl r7, r3, 32, 48
275; P8LE-NEXT:    rldicl r3, r3, 16, 48
276; P8LE-NEXT:    clrlwi r5, r5, 16
277; P8LE-NEXT:    clrlwi r6, r6, 16
278; P8LE-NEXT:    clrlwi r7, r7, 16
279; P8LE-NEXT:    clrlwi r3, r3, 16
280; P8LE-NEXT:    mulhwu r8, r5, r4
281; P8LE-NEXT:    mulli r8, r8, 95
282; P8LE-NEXT:    sub r5, r5, r8
283; P8LE-NEXT:    mulhwu r8, r6, r4
284; P8LE-NEXT:    mtvsrd v2, r5
285; P8LE-NEXT:    mulli r8, r8, 95
286; P8LE-NEXT:    sub r6, r6, r8
287; P8LE-NEXT:    mulhwu r8, r7, r4
288; P8LE-NEXT:    mulhwu r4, r3, r4
289; P8LE-NEXT:    mtvsrd v3, r6
290; P8LE-NEXT:    mulli r8, r8, 95
291; P8LE-NEXT:    mulli r4, r4, 95
292; P8LE-NEXT:    sub r7, r7, r8
293; P8LE-NEXT:    sub r3, r3, r4
294; P8LE-NEXT:    mtvsrd v4, r7
295; P8LE-NEXT:    vmrghh v2, v3, v2
296; P8LE-NEXT:    mtvsrd v3, r3
297; P8LE-NEXT:    vmrghh v3, v3, v4
298; P8LE-NEXT:    xxmrglw v2, v3, v2
299; P8LE-NEXT:    blr
300;
301; P8BE-LABEL: fold_urem_vec_2:
302; P8BE:       # %bb.0:
303; P8BE-NEXT:    mfvsrd r3, v2
304; P8BE-NEXT:    addis r5, r2, .LCPI1_0@toc@ha
305; P8BE-NEXT:    lis r4, 689
306; P8BE-NEXT:    addi r5, r5, .LCPI1_0@toc@l
307; P8BE-NEXT:    ori r4, r4, 55879
308; P8BE-NEXT:    lxvw4x v2, 0, r5
309; P8BE-NEXT:    clrldi r5, r3, 48
310; P8BE-NEXT:    rldicl r6, r3, 48, 48
311; P8BE-NEXT:    rldicl r7, r3, 32, 48
312; P8BE-NEXT:    rldicl r3, r3, 16, 48
313; P8BE-NEXT:    clrlwi r5, r5, 16
314; P8BE-NEXT:    clrlwi r6, r6, 16
315; P8BE-NEXT:    clrlwi r7, r7, 16
316; P8BE-NEXT:    clrlwi r3, r3, 16
317; P8BE-NEXT:    mulhwu r8, r5, r4
318; P8BE-NEXT:    mulli r8, r8, 95
319; P8BE-NEXT:    sub r5, r5, r8
320; P8BE-NEXT:    mulhwu r8, r6, r4
321; P8BE-NEXT:    mtvsrwz v3, r5
322; P8BE-NEXT:    mulli r8, r8, 95
323; P8BE-NEXT:    sub r6, r6, r8
324; P8BE-NEXT:    mulhwu r8, r7, r4
325; P8BE-NEXT:    mulhwu r4, r3, r4
326; P8BE-NEXT:    mtvsrwz v4, r6
327; P8BE-NEXT:    mulli r8, r8, 95
328; P8BE-NEXT:    mulli r4, r4, 95
329; P8BE-NEXT:    sub r7, r7, r8
330; P8BE-NEXT:    sub r3, r3, r4
331; P8BE-NEXT:    mtvsrwz v5, r7
332; P8BE-NEXT:    vperm v3, v4, v3, v2
333; P8BE-NEXT:    mtvsrwz v4, r3
334; P8BE-NEXT:    vperm v2, v4, v5, v2
335; P8BE-NEXT:    xxmrghw v2, v2, v3
336; P8BE-NEXT:    blr
337  %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
338  ret <4 x i16> %1
339}
340
341
342; Don't fold if we can combine urem with udiv.
343define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
344; P9LE-LABEL: combine_urem_udiv:
345; P9LE:       # %bb.0:
346; P9LE-NEXT:    li r3, 0
347; P9LE-NEXT:    lis r4, 689
348; P9LE-NEXT:    vextuhrx r3, r3, v2
349; P9LE-NEXT:    ori r4, r4, 55879
350; P9LE-NEXT:    clrlwi r3, r3, 16
351; P9LE-NEXT:    mulhwu r5, r3, r4
352; P9LE-NEXT:    mulli r6, r5, 95
353; P9LE-NEXT:    sub r3, r3, r6
354; P9LE-NEXT:    mtvsrd v3, r3
355; P9LE-NEXT:    li r3, 2
356; P9LE-NEXT:    vextuhrx r3, r3, v2
357; P9LE-NEXT:    clrlwi r6, r3, 16
358; P9LE-NEXT:    mulhwu r6, r6, r4
359; P9LE-NEXT:    mulli r7, r6, 95
360; P9LE-NEXT:    sub r3, r3, r7
361; P9LE-NEXT:    mtvsrd v4, r3
362; P9LE-NEXT:    li r3, 4
363; P9LE-NEXT:    vextuhrx r3, r3, v2
364; P9LE-NEXT:    vmrghh v3, v4, v3
365; P9LE-NEXT:    clrlwi r7, r3, 16
366; P9LE-NEXT:    mulhwu r7, r7, r4
367; P9LE-NEXT:    mulli r8, r7, 95
368; P9LE-NEXT:    sub r3, r3, r8
369; P9LE-NEXT:    mtvsrd v4, r3
370; P9LE-NEXT:    li r3, 6
371; P9LE-NEXT:    vextuhrx r3, r3, v2
372; P9LE-NEXT:    clrlwi r8, r3, 16
373; P9LE-NEXT:    mulhwu r4, r8, r4
374; P9LE-NEXT:    mulli r8, r4, 95
375; P9LE-NEXT:    mtvsrd v5, r4
376; P9LE-NEXT:    sub r3, r3, r8
377; P9LE-NEXT:    mtvsrd v2, r3
378; P9LE-NEXT:    vmrghh v2, v2, v4
379; P9LE-NEXT:    mtvsrd v4, r6
380; P9LE-NEXT:    xxmrglw v2, v2, v3
381; P9LE-NEXT:    mtvsrd v3, r5
382; P9LE-NEXT:    vmrghh v3, v4, v3
383; P9LE-NEXT:    mtvsrd v4, r7
384; P9LE-NEXT:    vmrghh v4, v5, v4
385; P9LE-NEXT:    xxmrglw v3, v4, v3
386; P9LE-NEXT:    vadduhm v2, v2, v3
387; P9LE-NEXT:    blr
388;
389; P9BE-LABEL: combine_urem_udiv:
390; P9BE:       # %bb.0:
391; P9BE-NEXT:    li r3, 6
392; P9BE-NEXT:    lis r5, 689
393; P9BE-NEXT:    vextuhlx r3, r3, v2
394; P9BE-NEXT:    ori r5, r5, 55879
395; P9BE-NEXT:    clrlwi r4, r3, 16
396; P9BE-NEXT:    mulhwu r4, r4, r5
397; P9BE-NEXT:    mulli r6, r4, 95
398; P9BE-NEXT:    sub r3, r3, r6
399; P9BE-NEXT:    mtfprwz f0, r3
400; P9BE-NEXT:    li r3, 4
401; P9BE-NEXT:    vextuhlx r3, r3, v2
402; P9BE-NEXT:    clrlwi r6, r3, 16
403; P9BE-NEXT:    mulhwu r6, r6, r5
404; P9BE-NEXT:    mulli r7, r6, 95
405; P9BE-NEXT:    sub r3, r3, r7
406; P9BE-NEXT:    mtfprwz f1, r3
407; P9BE-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
408; P9BE-NEXT:    addi r3, r3, .LCPI2_0@toc@l
409; P9BE-NEXT:    lxv vs2, 0(r3)
410; P9BE-NEXT:    li r3, 2
411; P9BE-NEXT:    vextuhlx r3, r3, v2
412; P9BE-NEXT:    clrlwi r7, r3, 16
413; P9BE-NEXT:    xxperm vs0, vs1, vs2
414; P9BE-NEXT:    mulhwu r7, r7, r5
415; P9BE-NEXT:    mulli r8, r7, 95
416; P9BE-NEXT:    sub r3, r3, r8
417; P9BE-NEXT:    mtfprwz f1, r3
418; P9BE-NEXT:    li r3, 0
419; P9BE-NEXT:    vextuhlx r3, r3, v2
420; P9BE-NEXT:    clrlwi r3, r3, 16
421; P9BE-NEXT:    mulhwu r5, r3, r5
422; P9BE-NEXT:    mulli r8, r5, 95
423; P9BE-NEXT:    sub r3, r3, r8
424; P9BE-NEXT:    mtfprwz f3, r3
425; P9BE-NEXT:    xxperm vs1, vs3, vs2
426; P9BE-NEXT:    mtfprwz f3, r5
427; P9BE-NEXT:    xxmrghw v2, vs1, vs0
428; P9BE-NEXT:    mtfprwz f0, r4
429; P9BE-NEXT:    mtfprwz f1, r6
430; P9BE-NEXT:    xxperm vs0, vs1, vs2
431; P9BE-NEXT:    mtfprwz f1, r7
432; P9BE-NEXT:    xxperm vs1, vs3, vs2
433; P9BE-NEXT:    xxmrghw v3, vs1, vs0
434; P9BE-NEXT:    vadduhm v2, v2, v3
435; P9BE-NEXT:    blr
436;
437; P8LE-LABEL: combine_urem_udiv:
438; P8LE:       # %bb.0:
439; P8LE-NEXT:    xxswapd vs0, v2
440; P8LE-NEXT:    lis r4, 689
441; P8LE-NEXT:    mffprd r3, f0
442; P8LE-NEXT:    ori r4, r4, 55879
443; P8LE-NEXT:    clrldi r5, r3, 48
444; P8LE-NEXT:    rldicl r6, r3, 48, 48
445; P8LE-NEXT:    rldicl r7, r3, 32, 48
446; P8LE-NEXT:    rldicl r3, r3, 16, 48
447; P8LE-NEXT:    clrlwi r5, r5, 16
448; P8LE-NEXT:    clrlwi r8, r6, 16
449; P8LE-NEXT:    clrlwi r9, r7, 16
450; P8LE-NEXT:    clrlwi r10, r3, 16
451; P8LE-NEXT:    mulhwu r11, r5, r4
452; P8LE-NEXT:    mulhwu r8, r8, r4
453; P8LE-NEXT:    mulhwu r9, r9, r4
454; P8LE-NEXT:    mulhwu r4, r10, r4
455; P8LE-NEXT:    mulli r10, r11, 95
456; P8LE-NEXT:    mtvsrd v2, r11
457; P8LE-NEXT:    mtvsrd v3, r8
458; P8LE-NEXT:    sub r5, r5, r10
459; P8LE-NEXT:    mulli r10, r8, 95
460; P8LE-NEXT:    mtvsrd v4, r5
461; P8LE-NEXT:    sub r6, r6, r10
462; P8LE-NEXT:    mulli r10, r9, 95
463; P8LE-NEXT:    mtvsrd v5, r6
464; P8LE-NEXT:    sub r7, r7, r10
465; P8LE-NEXT:    mulli r10, r4, 95
466; P8LE-NEXT:    mtvsrd v0, r7
467; P8LE-NEXT:    sub r3, r3, r10
468; P8LE-NEXT:    vmrghh v2, v3, v2
469; P8LE-NEXT:    mtvsrd v3, r9
470; P8LE-NEXT:    vmrghh v4, v5, v4
471; P8LE-NEXT:    mtvsrd v5, r3
472; P8LE-NEXT:    vmrghh v5, v5, v0
473; P8LE-NEXT:    mtvsrd v0, r4
474; P8LE-NEXT:    xxmrglw v4, v5, v4
475; P8LE-NEXT:    vmrghh v3, v0, v3
476; P8LE-NEXT:    xxmrglw v2, v3, v2
477; P8LE-NEXT:    vadduhm v2, v4, v2
478; P8LE-NEXT:    blr
479;
480; P8BE-LABEL: combine_urem_udiv:
481; P8BE:       # %bb.0:
482; P8BE-NEXT:    mfvsrd r3, v2
483; P8BE-NEXT:    lis r4, 689
484; P8BE-NEXT:    ori r4, r4, 55879
485; P8BE-NEXT:    clrldi r5, r3, 48
486; P8BE-NEXT:    rldicl r6, r3, 48, 48
487; P8BE-NEXT:    rldicl r7, r3, 32, 48
488; P8BE-NEXT:    rldicl r3, r3, 16, 48
489; P8BE-NEXT:    clrlwi r8, r5, 16
490; P8BE-NEXT:    clrlwi r9, r6, 16
491; P8BE-NEXT:    clrlwi r10, r7, 16
492; P8BE-NEXT:    clrlwi r3, r3, 16
493; P8BE-NEXT:    mulhwu r8, r8, r4
494; P8BE-NEXT:    mulhwu r9, r9, r4
495; P8BE-NEXT:    mulhwu r10, r10, r4
496; P8BE-NEXT:    mulhwu r4, r3, r4
497; P8BE-NEXT:    mulli r11, r8, 95
498; P8BE-NEXT:    mtvsrwz v3, r8
499; P8BE-NEXT:    mtvsrwz v4, r9
500; P8BE-NEXT:    sub r5, r5, r11
501; P8BE-NEXT:    mulli r11, r9, 95
502; P8BE-NEXT:    mtvsrwz v5, r5
503; P8BE-NEXT:    sub r6, r6, r11
504; P8BE-NEXT:    mulli r11, r10, 95
505; P8BE-NEXT:    mtvsrwz v0, r6
506; P8BE-NEXT:    sub r7, r7, r11
507; P8BE-NEXT:    mulli r11, r4, 95
508; P8BE-NEXT:    mtvsrwz v1, r7
509; P8BE-NEXT:    sub r3, r3, r11
510; P8BE-NEXT:    addis r11, r2, .LCPI2_0@toc@ha
511; P8BE-NEXT:    addi r11, r11, .LCPI2_0@toc@l
512; P8BE-NEXT:    lxvw4x v2, 0, r11
513; P8BE-NEXT:    vperm v5, v0, v5, v2
514; P8BE-NEXT:    mtvsrwz v0, r3
515; P8BE-NEXT:    vperm v3, v4, v3, v2
516; P8BE-NEXT:    mtvsrwz v4, r10
517; P8BE-NEXT:    vperm v0, v0, v1, v2
518; P8BE-NEXT:    mtvsrwz v1, r4
519; P8BE-NEXT:    vperm v2, v1, v4, v2
520; P8BE-NEXT:    xxmrghw v4, v0, v5
521; P8BE-NEXT:    xxmrghw v2, v2, v3
522; P8BE-NEXT:    vadduhm v2, v4, v2
523; P8BE-NEXT:    blr
524  %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
525  %2 = udiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
526  %3 = add <4 x i16> %1, %2
527  ret <4 x i16> %3
528}
529
530; Don't fold for divisors that are a power of two.
531define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
532; P9LE-LABEL: dont_fold_urem_power_of_two:
533; P9LE:       # %bb.0:
534; P9LE-NEXT:    li r3, 0
535; P9LE-NEXT:    lis r4, 689
536; P9LE-NEXT:    vextuhrx r3, r3, v2
537; P9LE-NEXT:    ori r4, r4, 55879
538; P9LE-NEXT:    clrlwi r3, r3, 26
539; P9LE-NEXT:    mtvsrd v3, r3
540; P9LE-NEXT:    li r3, 2
541; P9LE-NEXT:    vextuhrx r3, r3, v2
542; P9LE-NEXT:    clrlwi r3, r3, 27
543; P9LE-NEXT:    mtvsrd v4, r3
544; P9LE-NEXT:    li r3, 6
545; P9LE-NEXT:    vextuhrx r3, r3, v2
546; P9LE-NEXT:    vmrghh v3, v4, v3
547; P9LE-NEXT:    clrlwi r3, r3, 16
548; P9LE-NEXT:    mulhwu r4, r3, r4
549; P9LE-NEXT:    mulli r4, r4, 95
550; P9LE-NEXT:    sub r3, r3, r4
551; P9LE-NEXT:    mtvsrd v4, r3
552; P9LE-NEXT:    li r3, 4
553; P9LE-NEXT:    vextuhrx r3, r3, v2
554; P9LE-NEXT:    clrlwi r3, r3, 29
555; P9LE-NEXT:    mtvsrd v2, r3
556; P9LE-NEXT:    vmrghh v2, v4, v2
557; P9LE-NEXT:    xxmrglw v2, v2, v3
558; P9LE-NEXT:    blr
559;
560; P9BE-LABEL: dont_fold_urem_power_of_two:
561; P9BE:       # %bb.0:
562; P9BE-NEXT:    li r3, 2
563; P9BE-NEXT:    lis r4, 689
564; P9BE-NEXT:    vextuhlx r3, r3, v2
565; P9BE-NEXT:    ori r4, r4, 55879
566; P9BE-NEXT:    clrlwi r3, r3, 27
567; P9BE-NEXT:    mtfprwz f0, r3
568; P9BE-NEXT:    li r3, 0
569; P9BE-NEXT:    vextuhlx r3, r3, v2
570; P9BE-NEXT:    clrlwi r3, r3, 26
571; P9BE-NEXT:    mtfprwz f1, r3
572; P9BE-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
573; P9BE-NEXT:    addi r3, r3, .LCPI3_0@toc@l
574; P9BE-NEXT:    lxv vs2, 0(r3)
575; P9BE-NEXT:    li r3, 6
576; P9BE-NEXT:    vextuhlx r3, r3, v2
577; P9BE-NEXT:    clrlwi r3, r3, 16
578; P9BE-NEXT:    xxperm vs0, vs1, vs2
579; P9BE-NEXT:    mulhwu r4, r3, r4
580; P9BE-NEXT:    mulli r4, r4, 95
581; P9BE-NEXT:    sub r3, r3, r4
582; P9BE-NEXT:    mtfprwz f1, r3
583; P9BE-NEXT:    li r3, 4
584; P9BE-NEXT:    vextuhlx r3, r3, v2
585; P9BE-NEXT:    clrlwi r3, r3, 29
586; P9BE-NEXT:    mtfprwz f3, r3
587; P9BE-NEXT:    xxperm vs1, vs3, vs2
588; P9BE-NEXT:    xxmrghw v2, vs0, vs1
589; P9BE-NEXT:    blr
590;
591; P8LE-LABEL: dont_fold_urem_power_of_two:
592; P8LE:       # %bb.0:
593; P8LE-NEXT:    xxswapd vs0, v2
594; P8LE-NEXT:    mffprd r3, f0
595; P8LE-NEXT:    clrldi r4, r3, 48
596; P8LE-NEXT:    clrlwi r4, r4, 26
597; P8LE-NEXT:    mtvsrd v2, r4
598; P8LE-NEXT:    rldicl r4, r3, 48, 48
599; P8LE-NEXT:    clrlwi r4, r4, 27
600; P8LE-NEXT:    mtvsrd v3, r4
601; P8LE-NEXT:    rldicl r4, r3, 32, 48
602; P8LE-NEXT:    rldicl r3, r3, 16, 48
603; P8LE-NEXT:    clrlwi r4, r4, 29
604; P8LE-NEXT:    clrlwi r3, r3, 16
605; P8LE-NEXT:    vmrghh v2, v3, v2
606; P8LE-NEXT:    mtvsrd v3, r4
607; P8LE-NEXT:    lis r4, 689
608; P8LE-NEXT:    ori r4, r4, 55879
609; P8LE-NEXT:    mulhwu r4, r3, r4
610; P8LE-NEXT:    mulli r4, r4, 95
611; P8LE-NEXT:    sub r3, r3, r4
612; P8LE-NEXT:    mtvsrd v4, r3
613; P8LE-NEXT:    vmrghh v3, v4, v3
614; P8LE-NEXT:    xxmrglw v2, v3, v2
615; P8LE-NEXT:    blr
616;
617; P8BE-LABEL: dont_fold_urem_power_of_two:
618; P8BE:       # %bb.0:
619; P8BE-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
620; P8BE-NEXT:    lis r5, 689
621; P8BE-NEXT:    addi r3, r3, .LCPI3_0@toc@l
622; P8BE-NEXT:    ori r5, r5, 55879
623; P8BE-NEXT:    lxvw4x v3, 0, r3
624; P8BE-NEXT:    mfvsrd r3, v2
625; P8BE-NEXT:    rldicl r4, r3, 32, 48
626; P8BE-NEXT:    clrlwi r4, r4, 27
627; P8BE-NEXT:    mtvsrwz v2, r4
628; P8BE-NEXT:    rldicl r4, r3, 16, 48
629; P8BE-NEXT:    clrlwi r4, r4, 26
630; P8BE-NEXT:    mtvsrwz v4, r4
631; P8BE-NEXT:    clrldi r4, r3, 48
632; P8BE-NEXT:    rldicl r3, r3, 48, 48
633; P8BE-NEXT:    clrlwi r4, r4, 16
634; P8BE-NEXT:    clrlwi r3, r3, 29
635; P8BE-NEXT:    mulhwu r5, r4, r5
636; P8BE-NEXT:    mtvsrwz v5, r3
637; P8BE-NEXT:    mulli r5, r5, 95
638; P8BE-NEXT:    sub r4, r4, r5
639; P8BE-NEXT:    vperm v2, v4, v2, v3
640; P8BE-NEXT:    mtvsrwz v4, r4
641; P8BE-NEXT:    vperm v3, v5, v4, v3
642; P8BE-NEXT:    xxmrghw v2, v2, v3
643; P8BE-NEXT:    blr
644  %1 = urem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
645  ret <4 x i16> %1
646}
647
648; Don't fold if the divisor is one.
649define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
650; P9LE-LABEL: dont_fold_urem_one:
651; P9LE:       # %bb.0:
652; P9LE-NEXT:    li r3, 4
653; P9LE-NEXT:    lis r4, 2849
654; P9LE-NEXT:    vextuhrx r3, r3, v2
655; P9LE-NEXT:    ori r4, r4, 25645
656; P9LE-NEXT:    clrlwi r3, r3, 16
657; P9LE-NEXT:    mulhwu r4, r3, r4
658; P9LE-NEXT:    mulli r4, r4, 23
659; P9LE-NEXT:    sub r3, r3, r4
660; P9LE-NEXT:    lis r4, 12
661; P9LE-NEXT:    mtvsrd v3, r3
662; P9LE-NEXT:    li r3, 6
663; P9LE-NEXT:    ori r4, r4, 5560
664; P9LE-NEXT:    vextuhrx r3, r3, v2
665; P9LE-NEXT:    clrlwi r3, r3, 16
666; P9LE-NEXT:    mulhwu r4, r3, r4
667; P9LE-NEXT:    mulli r4, r4, 5423
668; P9LE-NEXT:    sub r3, r3, r4
669; P9LE-NEXT:    lis r4, 100
670; P9LE-NEXT:    mtvsrd v4, r3
671; P9LE-NEXT:    li r3, 2
672; P9LE-NEXT:    ori r4, r4, 13629
673; P9LE-NEXT:    vextuhrx r3, r3, v2
674; P9LE-NEXT:    vmrghh v3, v4, v3
675; P9LE-NEXT:    clrlwi r3, r3, 16
676; P9LE-NEXT:    mulhwu r4, r3, r4
677; P9LE-NEXT:    mulli r4, r4, 654
678; P9LE-NEXT:    sub r3, r3, r4
679; P9LE-NEXT:    mtvsrd v2, r3
680; P9LE-NEXT:    li r3, 0
681; P9LE-NEXT:    mtvsrd v4, r3
682; P9LE-NEXT:    vmrghh v2, v2, v4
683; P9LE-NEXT:    xxmrglw v2, v3, v2
684; P9LE-NEXT:    blr
685;
686; P9BE-LABEL: dont_fold_urem_one:
687; P9BE:       # %bb.0:
688; P9BE-NEXT:    li r3, 6
689; P9BE-NEXT:    lis r4, 12
690; P9BE-NEXT:    vextuhlx r3, r3, v2
691; P9BE-NEXT:    ori r4, r4, 5560
692; P9BE-NEXT:    clrlwi r3, r3, 16
693; P9BE-NEXT:    mulhwu r4, r3, r4
694; P9BE-NEXT:    mulli r4, r4, 5423
695; P9BE-NEXT:    sub r3, r3, r4
696; P9BE-NEXT:    lis r4, 2849
697; P9BE-NEXT:    mtfprwz f0, r3
698; P9BE-NEXT:    li r3, 4
699; P9BE-NEXT:    ori r4, r4, 25645
700; P9BE-NEXT:    vextuhlx r3, r3, v2
701; P9BE-NEXT:    clrlwi r3, r3, 16
702; P9BE-NEXT:    mulhwu r4, r3, r4
703; P9BE-NEXT:    mulli r4, r4, 23
704; P9BE-NEXT:    sub r3, r3, r4
705; P9BE-NEXT:    lis r4, 100
706; P9BE-NEXT:    mtfprwz f1, r3
707; P9BE-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
708; P9BE-NEXT:    ori r4, r4, 13629
709; P9BE-NEXT:    addi r3, r3, .LCPI4_0@toc@l
710; P9BE-NEXT:    lxv vs2, 0(r3)
711; P9BE-NEXT:    li r3, 2
712; P9BE-NEXT:    vextuhlx r3, r3, v2
713; P9BE-NEXT:    clrlwi r3, r3, 16
714; P9BE-NEXT:    xxperm vs0, vs1, vs2
715; P9BE-NEXT:    mulhwu r4, r3, r4
716; P9BE-NEXT:    mulli r4, r4, 654
717; P9BE-NEXT:    sub r3, r3, r4
718; P9BE-NEXT:    mtfprwz f1, r3
719; P9BE-NEXT:    li r3, 0
720; P9BE-NEXT:    mtfprwz f3, r3
721; P9BE-NEXT:    xxperm vs1, vs3, vs2
722; P9BE-NEXT:    xxmrghw v2, vs1, vs0
723; P9BE-NEXT:    blr
724;
725; P8LE-LABEL: dont_fold_urem_one:
726; P8LE:       # %bb.0:
727; P8LE-NEXT:    xxswapd vs0, v2
728; P8LE-NEXT:    li r4, 0
729; P8LE-NEXT:    lis r5, 100
730; P8LE-NEXT:    lis r6, 2849
731; P8LE-NEXT:    mffprd r3, f0
732; P8LE-NEXT:    mtvsrd v2, r4
733; P8LE-NEXT:    ori r4, r5, 13629
734; P8LE-NEXT:    ori r5, r6, 25645
735; P8LE-NEXT:    rldicl r6, r3, 48, 48
736; P8LE-NEXT:    clrlwi r6, r6, 16
737; P8LE-NEXT:    mulhwu r4, r6, r4
738; P8LE-NEXT:    mulli r4, r4, 654
739; P8LE-NEXT:    sub r4, r6, r4
740; P8LE-NEXT:    rldicl r6, r3, 32, 48
741; P8LE-NEXT:    rldicl r3, r3, 16, 48
742; P8LE-NEXT:    mtvsrd v3, r4
743; P8LE-NEXT:    lis r4, 12
744; P8LE-NEXT:    clrlwi r6, r6, 16
745; P8LE-NEXT:    clrlwi r3, r3, 16
746; P8LE-NEXT:    ori r4, r4, 5560
747; P8LE-NEXT:    mulhwu r5, r6, r5
748; P8LE-NEXT:    mulhwu r4, r3, r4
749; P8LE-NEXT:    mulli r5, r5, 23
750; P8LE-NEXT:    mulli r4, r4, 5423
751; P8LE-NEXT:    sub r5, r6, r5
752; P8LE-NEXT:    sub r3, r3, r4
753; P8LE-NEXT:    mtvsrd v4, r3
754; P8LE-NEXT:    vmrghh v2, v3, v2
755; P8LE-NEXT:    mtvsrd v3, r5
756; P8LE-NEXT:    vmrghh v3, v4, v3
757; P8LE-NEXT:    xxmrglw v2, v3, v2
758; P8LE-NEXT:    blr
759;
760; P8BE-LABEL: dont_fold_urem_one:
761; P8BE:       # %bb.0:
762; P8BE-NEXT:    mfvsrd r3, v2
763; P8BE-NEXT:    addis r6, r2, .LCPI4_0@toc@ha
764; P8BE-NEXT:    lis r4, 12
765; P8BE-NEXT:    lis r5, 2849
766; P8BE-NEXT:    addi r6, r6, .LCPI4_0@toc@l
767; P8BE-NEXT:    ori r4, r4, 5560
768; P8BE-NEXT:    ori r5, r5, 25645
769; P8BE-NEXT:    lxvw4x v2, 0, r6
770; P8BE-NEXT:    clrldi r6, r3, 48
771; P8BE-NEXT:    clrlwi r6, r6, 16
772; P8BE-NEXT:    mulhwu r4, r6, r4
773; P8BE-NEXT:    mulli r4, r4, 5423
774; P8BE-NEXT:    sub r4, r6, r4
775; P8BE-NEXT:    rldicl r6, r3, 48, 48
776; P8BE-NEXT:    rldicl r3, r3, 32, 48
777; P8BE-NEXT:    clrlwi r6, r6, 16
778; P8BE-NEXT:    clrlwi r3, r3, 16
779; P8BE-NEXT:    mtvsrwz v3, r4
780; P8BE-NEXT:    mulhwu r5, r6, r5
781; P8BE-NEXT:    mulli r5, r5, 23
782; P8BE-NEXT:    sub r5, r6, r5
783; P8BE-NEXT:    lis r6, 100
784; P8BE-NEXT:    ori r6, r6, 13629
785; P8BE-NEXT:    mtvsrwz v4, r5
786; P8BE-NEXT:    mulhwu r6, r3, r6
787; P8BE-NEXT:    mulli r6, r6, 654
788; P8BE-NEXT:    sub r3, r3, r6
789; P8BE-NEXT:    vperm v3, v4, v3, v2
790; P8BE-NEXT:    mtvsrwz v4, r3
791; P8BE-NEXT:    li r3, 0
792; P8BE-NEXT:    mtvsrwz v5, r3
793; P8BE-NEXT:    vperm v2, v5, v4, v2
794; P8BE-NEXT:    xxmrghw v2, v2, v3
795; P8BE-NEXT:    blr
796  %1 = urem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
797  ret <4 x i16> %1
798}
799
800; Don't fold if the divisor is 2^16.
801define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
802; CHECK-LABEL: dont_fold_urem_i16_smax:
803; CHECK:       # %bb.0:
804; CHECK-NEXT:    blr
805  %1 = urem <4 x i16> %x, <i16 1, i16 65536, i16 23, i16 5423>
806  ret <4 x i16> %1
807}
808
809; Don't fold i64 urem.
810define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) {
811; P9LE-LABEL: dont_fold_urem_i64:
812; P9LE:       # %bb.0:
813; P9LE-NEXT:    lis r4, 1602
814; P9LE-NEXT:    mfvsrld r3, v3
815; P9LE-NEXT:    ori r4, r4, 51289
816; P9LE-NEXT:    rldic r4, r4, 36, 1
817; P9LE-NEXT:    oris r4, r4, 45590
818; P9LE-NEXT:    ori r4, r4, 17097
819; P9LE-NEXT:    mulhdu r4, r3, r4
820; P9LE-NEXT:    sub r5, r3, r4
821; P9LE-NEXT:    rldicl r5, r5, 63, 1
822; P9LE-NEXT:    add r4, r5, r4
823; P9LE-NEXT:    lis r5, -16037
824; P9LE-NEXT:    rldicl r4, r4, 60, 4
825; P9LE-NEXT:    ori r5, r5, 28749
826; P9LE-NEXT:    mulli r4, r4, 23
827; P9LE-NEXT:    rldic r5, r5, 32, 0
828; P9LE-NEXT:    oris r5, r5, 52170
829; P9LE-NEXT:    ori r5, r5, 12109
830; P9LE-NEXT:    sub r3, r3, r4
831; P9LE-NEXT:    mfvsrd r4, v3
832; P9LE-NEXT:    mulhdu r5, r4, r5
833; P9LE-NEXT:    rldicl r5, r5, 52, 12
834; P9LE-NEXT:    mulli r5, r5, 5423
835; P9LE-NEXT:    sub r4, r4, r5
836; P9LE-NEXT:    lis r5, 3206
837; P9LE-NEXT:    ori r5, r5, 42889
838; P9LE-NEXT:    mtvsrdd v3, r4, r3
839; P9LE-NEXT:    mfvsrd r3, v2
840; P9LE-NEXT:    rldic r5, r5, 35, 1
841; P9LE-NEXT:    rldicl r4, r3, 63, 1
842; P9LE-NEXT:    oris r5, r5, 1603
843; P9LE-NEXT:    ori r5, r5, 21445
844; P9LE-NEXT:    mulhdu r4, r4, r5
845; P9LE-NEXT:    rldicl r4, r4, 57, 7
846; P9LE-NEXT:    mulli r4, r4, 654
847; P9LE-NEXT:    sub r3, r3, r4
848; P9LE-NEXT:    li r4, 0
849; P9LE-NEXT:    mtvsrdd v2, r3, r4
850; P9LE-NEXT:    blr
851;
852; P9BE-LABEL: dont_fold_urem_i64:
853; P9BE:       # %bb.0:
854; P9BE-NEXT:    lis r4, 1602
855; P9BE-NEXT:    mfvsrd r3, v3
856; P9BE-NEXT:    ori r4, r4, 51289
857; P9BE-NEXT:    rldic r4, r4, 36, 1
858; P9BE-NEXT:    oris r4, r4, 45590
859; P9BE-NEXT:    ori r4, r4, 17097
860; P9BE-NEXT:    mulhdu r4, r3, r4
861; P9BE-NEXT:    sub r5, r3, r4
862; P9BE-NEXT:    rldicl r5, r5, 63, 1
863; P9BE-NEXT:    add r4, r5, r4
864; P9BE-NEXT:    lis r5, -16037
865; P9BE-NEXT:    rldicl r4, r4, 60, 4
866; P9BE-NEXT:    ori r5, r5, 28749
867; P9BE-NEXT:    mulli r4, r4, 23
868; P9BE-NEXT:    rldic r5, r5, 32, 0
869; P9BE-NEXT:    oris r5, r5, 52170
870; P9BE-NEXT:    ori r5, r5, 12109
871; P9BE-NEXT:    sub r3, r3, r4
872; P9BE-NEXT:    mfvsrld r4, v3
873; P9BE-NEXT:    mulhdu r5, r4, r5
874; P9BE-NEXT:    rldicl r5, r5, 52, 12
875; P9BE-NEXT:    mulli r5, r5, 5423
876; P9BE-NEXT:    sub r4, r4, r5
877; P9BE-NEXT:    lis r5, 3206
878; P9BE-NEXT:    ori r5, r5, 42889
879; P9BE-NEXT:    mtvsrdd v3, r3, r4
880; P9BE-NEXT:    mfvsrld r3, v2
881; P9BE-NEXT:    rldic r5, r5, 35, 1
882; P9BE-NEXT:    rldicl r4, r3, 63, 1
883; P9BE-NEXT:    oris r5, r5, 1603
884; P9BE-NEXT:    ori r5, r5, 21445
885; P9BE-NEXT:    mulhdu r4, r4, r5
886; P9BE-NEXT:    rldicl r4, r4, 57, 7
887; P9BE-NEXT:    mulli r4, r4, 654
888; P9BE-NEXT:    sub r3, r3, r4
889; P9BE-NEXT:    mtvsrdd v2, 0, r3
890; P9BE-NEXT:    blr
891;
892; P8LE-LABEL: dont_fold_urem_i64:
893; P8LE:       # %bb.0:
894; P8LE-NEXT:    lis r3, 1602
895; P8LE-NEXT:    xxswapd vs0, v3
896; P8LE-NEXT:    lis r5, 3206
897; P8LE-NEXT:    mfvsrd r6, v2
898; P8LE-NEXT:    mfvsrd r8, v3
899; P8LE-NEXT:    ori r3, r3, 51289
900; P8LE-NEXT:    ori r5, r5, 42889
901; P8LE-NEXT:    rldic r4, r3, 36, 1
902; P8LE-NEXT:    mffprd r3, f0
903; P8LE-NEXT:    rldic r5, r5, 35, 1
904; P8LE-NEXT:    rldicl r7, r6, 63, 1
905; P8LE-NEXT:    oris r4, r4, 45590
906; P8LE-NEXT:    oris r5, r5, 1603
907; P8LE-NEXT:    ori r4, r4, 17097
908; P8LE-NEXT:    ori r5, r5, 21445
909; P8LE-NEXT:    mulhdu r4, r3, r4
910; P8LE-NEXT:    mulhdu r5, r7, r5
911; P8LE-NEXT:    sub r7, r3, r4
912; P8LE-NEXT:    rldicl r5, r5, 57, 7
913; P8LE-NEXT:    rldicl r7, r7, 63, 1
914; P8LE-NEXT:    mulli r5, r5, 654
915; P8LE-NEXT:    add r4, r7, r4
916; P8LE-NEXT:    lis r7, -16037
917; P8LE-NEXT:    ori r7, r7, 28749
918; P8LE-NEXT:    rldicl r4, r4, 60, 4
919; P8LE-NEXT:    sub r5, r6, r5
920; P8LE-NEXT:    rldic r7, r7, 32, 0
921; P8LE-NEXT:    mulli r4, r4, 23
922; P8LE-NEXT:    oris r7, r7, 52170
923; P8LE-NEXT:    ori r7, r7, 12109
924; P8LE-NEXT:    sub r3, r3, r4
925; P8LE-NEXT:    mulhdu r7, r8, r7
926; P8LE-NEXT:    mtfprd f1, r3
927; P8LE-NEXT:    li r3, 0
928; P8LE-NEXT:    rldicl r7, r7, 52, 12
929; P8LE-NEXT:    mulli r7, r7, 5423
930; P8LE-NEXT:    sub r7, r8, r7
931; P8LE-NEXT:    mtfprd f0, r7
932; P8LE-NEXT:    xxmrghd v3, vs0, vs1
933; P8LE-NEXT:    mtfprd f0, r5
934; P8LE-NEXT:    mtfprd f1, r3
935; P8LE-NEXT:    xxmrghd v2, vs0, vs1
936; P8LE-NEXT:    blr
937;
938; P8BE-LABEL: dont_fold_urem_i64:
939; P8BE:       # %bb.0:
940; P8BE-NEXT:    lis r3, 1602
941; P8BE-NEXT:    mfvsrd r4, v3
942; P8BE-NEXT:    lis r5, 3206
943; P8BE-NEXT:    xxswapd vs0, v2
944; P8BE-NEXT:    xxswapd vs1, v3
945; P8BE-NEXT:    ori r3, r3, 51289
946; P8BE-NEXT:    ori r5, r5, 42889
947; P8BE-NEXT:    mffprd r6, f0
948; P8BE-NEXT:    mffprd r8, f1
949; P8BE-NEXT:    rldic r3, r3, 36, 1
950; P8BE-NEXT:    rldic r5, r5, 35, 1
951; P8BE-NEXT:    oris r3, r3, 45590
952; P8BE-NEXT:    oris r5, r5, 1603
953; P8BE-NEXT:    rldicl r7, r6, 63, 1
954; P8BE-NEXT:    ori r3, r3, 17097
955; P8BE-NEXT:    ori r5, r5, 21445
956; P8BE-NEXT:    mulhdu r3, r4, r3
957; P8BE-NEXT:    mulhdu r5, r7, r5
958; P8BE-NEXT:    sub r7, r4, r3
959; P8BE-NEXT:    rldicl r5, r5, 57, 7
960; P8BE-NEXT:    rldicl r7, r7, 63, 1
961; P8BE-NEXT:    mulli r5, r5, 654
962; P8BE-NEXT:    add r3, r7, r3
963; P8BE-NEXT:    lis r7, -16037
964; P8BE-NEXT:    ori r7, r7, 28749
965; P8BE-NEXT:    rldicl r3, r3, 60, 4
966; P8BE-NEXT:    sub r5, r6, r5
967; P8BE-NEXT:    rldic r7, r7, 32, 0
968; P8BE-NEXT:    mulli r3, r3, 23
969; P8BE-NEXT:    oris r7, r7, 52170
970; P8BE-NEXT:    ori r7, r7, 12109
971; P8BE-NEXT:    sub r3, r4, r3
972; P8BE-NEXT:    mulhdu r7, r8, r7
973; P8BE-NEXT:    mtfprd f1, r3
974; P8BE-NEXT:    li r3, 0
975; P8BE-NEXT:    rldicl r7, r7, 52, 12
976; P8BE-NEXT:    mulli r7, r7, 5423
977; P8BE-NEXT:    sub r7, r8, r7
978; P8BE-NEXT:    mtfprd f0, r7
979; P8BE-NEXT:    xxmrghd v3, vs1, vs0
980; P8BE-NEXT:    mtfprd f0, r5
981; P8BE-NEXT:    mtfprd f1, r3
982; P8BE-NEXT:    xxmrghd v2, vs1, vs0
983; P8BE-NEXT:    blr
984  %1 = urem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423>
985  ret <4 x i64> %1
986}
987