xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/rs6000/vsx.md (revision d536862b7d93d77932ef5de7eebdc48d76921b77)
1;; VSX patterns.
2;; Copyright (C) 2009-2019 Free Software Foundation, Inc.
3;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
4
5;; This file is part of GCC.
6
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published
9;; by the Free Software Foundation; either version 3, or (at your
10;; option) any later version.
11
12;; GCC is distributed in the hope that it will be useful, but WITHOUT
13;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15;; License for more details.
16
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21;; Iterator for comparison types
22(define_code_iterator CMP_TEST [eq lt gt unordered])
23
24;; Mode attribute for vector floate and floato conversions
25(define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
26
27;; Iterator for both scalar and vector floating point types supported by VSX
28(define_mode_iterator VSX_B [DF V4SF V2DF])
29
30;; Iterator for the 2 64-bit vector types
31(define_mode_iterator VSX_D [V2DF V2DI])
32
33;; Mode iterator to handle swapping words on little endian for the 128-bit
34;; types that goes in a single vector register.
35(define_mode_iterator VSX_LE_128 [(KF   "FLOAT128_VECTOR_P (KFmode)")
36				  (TF   "FLOAT128_VECTOR_P (TFmode)")
37				  TI
38				  V1TI])
39
40;; Iterator for 128-bit integer types that go in a single vector register.
41(define_mode_iterator VSX_TI [TI V1TI])
42
43;; Iterator for the 2 32-bit vector types
44(define_mode_iterator VSX_W [V4SF V4SI])
45
46;; Iterator for the DF types
47(define_mode_iterator VSX_DF [V2DF DF])
48
49;; Iterator for vector floating point types supported by VSX
50(define_mode_iterator VSX_F [V4SF V2DF])
51
52;; Iterator for logical types supported by VSX
53(define_mode_iterator VSX_L [V16QI
54			     V8HI
55			     V4SI
56			     V2DI
57			     V4SF
58			     V2DF
59			     V1TI
60			     TI
61			     (KF	"FLOAT128_VECTOR_P (KFmode)")
62			     (TF	"FLOAT128_VECTOR_P (TFmode)")])
63
64;; Iterator for memory moves.
65(define_mode_iterator VSX_M [V16QI
66			     V8HI
67			     V4SI
68			     V2DI
69			     V4SF
70			     V2DF
71			     V1TI
72			     (KF	"FLOAT128_VECTOR_P (KFmode)")
73			     (TF	"FLOAT128_VECTOR_P (TFmode)")
74			     TI])
75
76(define_mode_attr VSX_XXBR  [(V8HI  "h")
77			     (V4SI  "w")
78			     (V4SF  "w")
79			     (V2DF  "d")
80			     (V2DI  "d")
81			     (V1TI  "q")])
82
83;; Map into the appropriate load/store name based on the type
84(define_mode_attr VSm  [(V16QI "vw4")
85			(V8HI  "vw4")
86			(V4SI  "vw4")
87			(V4SF  "vw4")
88			(V2DF  "vd2")
89			(V2DI  "vd2")
90			(DF    "d")
91			(TF    "vd2")
92			(KF    "vd2")
93			(V1TI  "vd2")
94			(TI    "vd2")])
95
96;; Map into the appropriate suffix based on the type
97(define_mode_attr VSs	[(V16QI "sp")
98			 (V8HI  "sp")
99			 (V4SI  "sp")
100			 (V4SF  "sp")
101			 (V2DF  "dp")
102			 (V2DI  "dp")
103			 (DF    "dp")
104			 (SF	"sp")
105			 (TF    "dp")
106			 (KF    "dp")
107			 (V1TI  "dp")
108			 (TI    "dp")])
109
110;; Map the register class used
111(define_mode_attr VSr	[(V16QI "v")
112			 (V8HI  "v")
113			 (V4SI  "v")
114			 (V4SF  "wf")
115			 (V2DI  "wd")
116			 (V2DF  "wd")
117			 (DI	"wi")
118			 (DF    "ws")
119			 (SF	"ww")
120			 (TF	"wp")
121			 (KF	"wq")
122			 (V1TI  "v")
123			 (TI    "wt")])
124
125;; Map the register class used for float<->int conversions (floating point side)
126;; VSr2 is the preferred register class, VSr3 is any register class that will
127;; hold the data
128(define_mode_attr VSr2	[(V2DF  "wd")
129			 (V4SF  "wf")
130			 (DF    "ws")
131			 (SF	"ww")
132			 (DI	"wi")
133			 (KF	"wq")
134			 (TF	"wp")])
135
136(define_mode_attr VSr3	[(V2DF  "wa")
137			 (V4SF  "wa")
138			 (DF    "ws")
139			 (SF	"ww")
140			 (DI	"wi")
141			 (KF	"wq")
142			 (TF	"wp")])
143
144;; Map the register class for sp<->dp float conversions, destination
145(define_mode_attr VSr4	[(SF	"ws")
146			 (DF	"f")
147			 (V2DF  "wd")
148			 (V4SF	"v")])
149
150;; Map the register class for sp<->dp float conversions, source
151(define_mode_attr VSr5	[(SF	"ws")
152			 (DF	"f")
153			 (V2DF  "v")
154			 (V4SF	"wd")])
155
156;; The VSX register class that a type can occupy, even if it is not the
157;; preferred register class (VSr is the preferred register class that will get
158;; allocated first).
159(define_mode_attr VSa	[(V16QI "wa")
160			 (V8HI  "wa")
161			 (V4SI  "wa")
162			 (V4SF  "wa")
163			 (V2DI  "wa")
164			 (V2DF  "wa")
165			 (DI	"wi")
166			 (DF    "ws")
167			 (SF	"ww")
168			 (V1TI	"wa")
169			 (TI    "wt")
170			 (TF	"wp")
171			 (KF	"wq")])
172
173;; A mode attribute to disparage use of GPR registers, except for scalar
174;; integer modes.
175(define_mode_attr ??r	[(V16QI	"??r")
176			 (V8HI	"??r")
177			 (V4SI	"??r")
178			 (V4SF	"??r")
179			 (V2DI	"??r")
180			 (V2DF	"??r")
181			 (V1TI	"??r")
182			 (KF	"??r")
183			 (TF	"??r")
184			 (TI	"r")])
185
186;; A mode attribute used for 128-bit constant values.
187(define_mode_attr nW	[(V16QI	"W")
188			 (V8HI	"W")
189			 (V4SI	"W")
190			 (V4SF	"W")
191			 (V2DI	"W")
192			 (V2DF	"W")
193			 (V1TI	"W")
194			 (KF	"W")
195			 (TF	"W")
196			 (TI	"n")])
197
198;; Same size integer type for floating point data
199(define_mode_attr VSi [(V4SF  "v4si")
200		       (V2DF  "v2di")
201		       (DF    "di")])
202
203(define_mode_attr VSI [(V4SF  "V4SI")
204		       (V2DF  "V2DI")
205		       (DF    "DI")])
206
207;; Word size for same size conversion
208(define_mode_attr VSc [(V4SF "w")
209		       (V2DF "d")
210		       (DF   "d")])
211
212;; Map into either s or v, depending on whether this is a scalar or vector
213;; operation
214(define_mode_attr VSv	[(V16QI "v")
215			 (V8HI  "v")
216			 (V4SI  "v")
217			 (V4SF  "v")
218			 (V2DI  "v")
219			 (V2DF  "v")
220			 (V1TI  "v")
221			 (DF    "s")
222			 (KF	"v")])
223
224;; Appropriate type for add ops (and other simple FP ops)
225(define_mode_attr VStype_simple	[(V2DF "vecdouble")
226				 (V4SF "vecfloat")
227				 (DF   "fp")])
228
229;; Appropriate type for multiply ops
230(define_mode_attr VStype_mul	[(V2DF "vecdouble")
231				 (V4SF "vecfloat")
232				 (DF   "dmul")])
233
234;; Appropriate type for divide ops.
235(define_mode_attr VStype_div	[(V2DF "vecdiv")
236				 (V4SF "vecfdiv")
237				 (DF   "ddiv")])
238
239;; Appropriate type for sqrt ops.  For now, just lump the vector sqrt with
240;; the scalar sqrt
241(define_mode_attr VStype_sqrt	[(V2DF "dsqrt")
242				 (V4SF "ssqrt")
243				 (DF   "dsqrt")])
244
245;; Iterator and modes for sp<->dp conversions
246;; Because scalar SF values are represented internally as double, use the
247;; V4SF type to represent this than SF.
248(define_mode_iterator VSX_SPDP [DF V4SF V2DF])
249
250(define_mode_attr VS_spdp_res [(DF	"V4SF")
251			       (V4SF	"V2DF")
252			       (V2DF	"V4SF")])
253
254(define_mode_attr VS_spdp_insn [(DF	"xscvdpsp")
255				(V4SF	"xvcvspdp")
256				(V2DF	"xvcvdpsp")])
257
258(define_mode_attr VS_spdp_type [(DF	"fp")
259				(V4SF	"vecdouble")
260				(V2DF	"vecdouble")])
261
262;; Map the scalar mode for a vector type
263(define_mode_attr VS_scalar [(V1TI	"TI")
264			     (V2DF	"DF")
265			     (V2DI	"DI")
266			     (V4SF	"SF")
267			     (V4SI	"SI")
268			     (V8HI	"HI")
269			     (V16QI	"QI")])
270
271;; Map to a double-sized vector mode
272(define_mode_attr VS_double [(V4SI	"V8SI")
273			     (V4SF	"V8SF")
274			     (V2DI	"V4DI")
275			     (V2DF	"V4DF")
276			     (V1TI	"V2TI")])
277
278;; Map register class for 64-bit element in 128-bit vector for direct moves
279;; to/from gprs
280(define_mode_attr VS_64dm [(V2DF	"wk")
281			   (V2DI	"wj")])
282
283;; Map register class for 64-bit element in 128-bit vector for normal register
284;; to register moves
285(define_mode_attr VS_64reg [(V2DF	"ws")
286			    (V2DI	"wi")])
287
288;; Iterators for loading constants with xxspltib
289(define_mode_iterator VSINT_84  [V4SI V2DI DI SI])
290(define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
291
292;; Vector reverse byte modes
293(define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
294
295;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
296;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
297;; done on ISA 2.07 and not just ISA 3.0.
298(define_mode_iterator VSX_EXTRACT_I  [V16QI V8HI V4SI])
299(define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
300
301(define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
302		  		     (V8HI "h")
303				     (V4SI "w")])
304
305;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
306;; insert to validate the operand number.
307(define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
308					 (V8HI  "const_0_to_7_operand")
309					 (V4SI  "const_0_to_3_operand")])
310
311;; Mode attribute to give the constraint for vector extract and insert
312;; operations.
313(define_mode_attr VSX_EX [(V16QI "v")
314			  (V8HI  "v")
315			  (V4SI  "wa")])
316
317;; Mode iterator for binary floating types other than double to
318;; optimize convert to that floating point type from an extract
319;; of an integer type
320(define_mode_iterator VSX_EXTRACT_FL [SF
321				      (IF "FLOAT128_2REG_P (IFmode)")
322				      (KF "TARGET_FLOAT128_HW")
323				      (TF "FLOAT128_2REG_P (TFmode)
324					   || (FLOAT128_IEEE_P (TFmode)
325					       && TARGET_FLOAT128_HW)")])
326
327;; Mode iterator for binary floating types that have a direct conversion
328;; from 64-bit integer to floating point
329(define_mode_iterator FL_CONV [SF
330			       DF
331			       (KF "TARGET_FLOAT128_HW")
332			       (TF "TARGET_FLOAT128_HW
333				    && FLOAT128_IEEE_P (TFmode)")])
334
335;; Iterator for the 2 short vector types to do a splat from an integer
336(define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
337
338;; Mode attribute to give the count for the splat instruction to splat
339;; the value in the 64-bit integer slot
340(define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
341
342;; Mode attribute to give the suffix for the splat instruction
343(define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
344
345;; Constants for creating unspecs
346(define_c_enum "unspec"
347  [UNSPEC_VSX_CONCAT
348   UNSPEC_VSX_CVDPSXWS
349   UNSPEC_VSX_CVDPUXWS
350   UNSPEC_VSX_CVSPDP
351   UNSPEC_VSX_CVHPSP
352   UNSPEC_VSX_CVSPDPN
353   UNSPEC_VSX_CVDPSPN
354   UNSPEC_VSX_CVSXWDP
355   UNSPEC_VSX_CVUXWDP
356   UNSPEC_VSX_CVSXDSP
357   UNSPEC_VSX_CVUXDSP
358   UNSPEC_VSX_CVSPSXDS
359   UNSPEC_VSX_CVSPUXDS
360   UNSPEC_VSX_CVSXWSP
361   UNSPEC_VSX_CVUXWSP
362   UNSPEC_VSX_FLOAT2
363   UNSPEC_VSX_UNS_FLOAT2
364   UNSPEC_VSX_FLOATE
365   UNSPEC_VSX_UNS_FLOATE
366   UNSPEC_VSX_FLOATO
367   UNSPEC_VSX_UNS_FLOATO
368   UNSPEC_VSX_TDIV
369   UNSPEC_VSX_TSQRT
370   UNSPEC_VSX_SET
371   UNSPEC_VSX_ROUND_I
372   UNSPEC_VSX_ROUND_IC
373   UNSPEC_VSX_SLDWI
374   UNSPEC_VSX_XXPERM
375
376   UNSPEC_VSX_XXSPLTW
377   UNSPEC_VSX_XXSPLTD
378   UNSPEC_VSX_DIVSD
379   UNSPEC_VSX_DIVUD
380   UNSPEC_VSX_MULSD
381   UNSPEC_VSX_XVCVSXDDP
382   UNSPEC_VSX_XVCVUXDDP
383   UNSPEC_VSX_XVCVDPSXDS
384   UNSPEC_VSX_XVCDPSP
385   UNSPEC_VSX_XVCVDPUXDS
386   UNSPEC_VSX_SIGN_EXTEND
387   UNSPEC_VSX_XVCVSPSXWS
388   UNSPEC_VSX_XVCVSPSXDS
389   UNSPEC_VSX_VSLO
390   UNSPEC_VSX_EXTRACT
391   UNSPEC_VSX_SXEXPDP
392   UNSPEC_VSX_SXSIG
393   UNSPEC_VSX_SIEXPDP
394   UNSPEC_VSX_SIEXPQP
395   UNSPEC_VSX_SCMPEXPDP
396   UNSPEC_VSX_SCMPEXPQP
397   UNSPEC_VSX_STSTDC
398   UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
399   UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
400   UNSPEC_VSX_VXEXP
401   UNSPEC_VSX_VXSIG
402   UNSPEC_VSX_VIEXP
403   UNSPEC_VSX_VTSTDC
404   UNSPEC_VSX_VSIGNED2
405
406   UNSPEC_LXVL
407   UNSPEC_LXVLL
408   UNSPEC_LVSL_REG
409   UNSPEC_LVSR_REG
410   UNSPEC_STXVL
411   UNSPEC_STXVLL
412   UNSPEC_XL_LEN_R
413   UNSPEC_XST_LEN_R
414
415   UNSPEC_VCLZLSBB
416   UNSPEC_VCTZLSBB
417   UNSPEC_VEXTUBLX
418   UNSPEC_VEXTUHLX
419   UNSPEC_VEXTUWLX
420   UNSPEC_VEXTUBRX
421   UNSPEC_VEXTUHRX
422   UNSPEC_VEXTUWRX
423   UNSPEC_VCMPNEB
424   UNSPEC_VCMPNEZB
425   UNSPEC_VCMPNEH
426   UNSPEC_VCMPNEZH
427   UNSPEC_VCMPNEW
428   UNSPEC_VCMPNEZW
429   UNSPEC_XXEXTRACTUW
430   UNSPEC_XXINSERTW
431   UNSPEC_VSX_FIRST_MATCH_INDEX
432   UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
433   UNSPEC_VSX_FIRST_MISMATCH_INDEX
434   UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
435  ])
436
437;; VSX moves
438
439;; The patterns for LE permuted loads and stores come before the general
440;; VSX moves so they match first.
441(define_insn_and_split "*vsx_le_perm_load_<mode>"
442  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
443        (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
444  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
445  "#"
446  "&& 1"
447  [(set (match_dup 2)
448        (vec_select:<MODE>
449          (match_dup 1)
450          (parallel [(const_int 1) (const_int 0)])))
451   (set (match_dup 0)
452        (vec_select:<MODE>
453          (match_dup 2)
454          (parallel [(const_int 1) (const_int 0)])))]
455{
456  rtx mem = operands[1];
457
458  /* Don't apply the swap optimization if we've already performed register
459     allocation and the hard register destination is not in the altivec
460     range.  */
461  if ((MEM_ALIGN (mem) >= 128)
462      && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[0]))
463	  || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
464    {
465      rtx mem_address = XEXP (mem, 0);
466      enum machine_mode mode = GET_MODE (mem);
467
468      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
469        {
470	  /* Replace the source memory address with masked address.  */
471          rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
472	  emit_insn (lvx_set_expr);
473	  DONE;
474        }
475      else if (rs6000_quadword_masked_address_p (mem_address))
476        {
477	  /* This rtl is already in the form that matches lvx
478	     instruction, so leave it alone.  */
479	  DONE;
480        }
481      /* Otherwise, fall through to transform into a swapping load.  */
482    }
483  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
484                                       : operands[0];
485}
486  [(set_attr "type" "vecload")
487   (set_attr "length" "8")])
488
489(define_insn_and_split "*vsx_le_perm_load_<mode>"
490  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
491        (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
492  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
493  "#"
494  "&& 1"
495  [(set (match_dup 2)
496        (vec_select:<MODE>
497          (match_dup 1)
498          (parallel [(const_int 2) (const_int 3)
499                     (const_int 0) (const_int 1)])))
500   (set (match_dup 0)
501        (vec_select:<MODE>
502          (match_dup 2)
503          (parallel [(const_int 2) (const_int 3)
504                     (const_int 0) (const_int 1)])))]
505{
506  rtx mem = operands[1];
507
508  /* Don't apply the swap optimization if we've already performed register
509     allocation and the hard register destination is not in the altivec
510     range.  */
511  if ((MEM_ALIGN (mem) >= 128)
512      && (!HARD_REGISTER_P (operands[0])
513	  || ALTIVEC_REGNO_P (REGNO(operands[0]))))
514    {
515      rtx mem_address = XEXP (mem, 0);
516      enum machine_mode mode = GET_MODE (mem);
517
518      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
519        {
520	  /* Replace the source memory address with masked address.  */
521          rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
522	  emit_insn (lvx_set_expr);
523	  DONE;
524        }
525      else if (rs6000_quadword_masked_address_p (mem_address))
526        {
527	  /* This rtl is already in the form that matches lvx
528	     instruction, so leave it alone.  */
529	  DONE;
530        }
531      /* Otherwise, fall through to transform into a swapping load.  */
532    }
533  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
534                                       : operands[0];
535}
536  [(set_attr "type" "vecload")
537   (set_attr "length" "8")])
538
539(define_insn_and_split "*vsx_le_perm_load_v8hi"
540  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
541        (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
542  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
543  "#"
544  "&& 1"
545  [(set (match_dup 2)
546        (vec_select:V8HI
547          (match_dup 1)
548          (parallel [(const_int 4) (const_int 5)
549                     (const_int 6) (const_int 7)
550                     (const_int 0) (const_int 1)
551                     (const_int 2) (const_int 3)])))
552   (set (match_dup 0)
553        (vec_select:V8HI
554          (match_dup 2)
555          (parallel [(const_int 4) (const_int 5)
556                     (const_int 6) (const_int 7)
557                     (const_int 0) (const_int 1)
558                     (const_int 2) (const_int 3)])))]
559{
560  rtx mem = operands[1];
561
562  /* Don't apply the swap optimization if we've already performed register
563     allocation and the hard register destination is not in the altivec
564     range.  */
565  if ((MEM_ALIGN (mem) >= 128)
566      && (!HARD_REGISTER_P (operands[0])
567	  || ALTIVEC_REGNO_P (REGNO(operands[0]))))
568    {
569      rtx mem_address = XEXP (mem, 0);
570      enum machine_mode mode = GET_MODE (mem);
571
572      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
573        {
574	  /* Replace the source memory address with masked address.  */
575	  rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
576	  emit_insn (lvx_set_expr);
577	  DONE;
578        }
579      else if (rs6000_quadword_masked_address_p (mem_address))
580        {
581	  /* This rtl is already in the form that matches lvx
582	     instruction, so leave it alone.  */
583	  DONE;
584        }
585      /* Otherwise, fall through to transform into a swapping load.  */
586    }
587  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
588                                       : operands[0];
589}
590  [(set_attr "type" "vecload")
591   (set_attr "length" "8")])
592
593(define_insn_and_split "*vsx_le_perm_load_v16qi"
594  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
595        (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
596  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
597  "#"
598  "&& 1"
599  [(set (match_dup 2)
600        (vec_select:V16QI
601          (match_dup 1)
602          (parallel [(const_int 8) (const_int 9)
603                     (const_int 10) (const_int 11)
604                     (const_int 12) (const_int 13)
605                     (const_int 14) (const_int 15)
606                     (const_int 0) (const_int 1)
607                     (const_int 2) (const_int 3)
608                     (const_int 4) (const_int 5)
609                     (const_int 6) (const_int 7)])))
610   (set (match_dup 0)
611        (vec_select:V16QI
612          (match_dup 2)
613          (parallel [(const_int 8) (const_int 9)
614                     (const_int 10) (const_int 11)
615                     (const_int 12) (const_int 13)
616                     (const_int 14) (const_int 15)
617                     (const_int 0) (const_int 1)
618                     (const_int 2) (const_int 3)
619                     (const_int 4) (const_int 5)
620                     (const_int 6) (const_int 7)])))]
621{
622  rtx mem = operands[1];
623
624  /* Don't apply the swap optimization if we've already performed register
625     allocation and the hard register destination is not in the altivec
626     range.  */
627  if ((MEM_ALIGN (mem) >= 128)
628      && (!HARD_REGISTER_P (operands[0])
629	  || ALTIVEC_REGNO_P (REGNO(operands[0]))))
630    {
631      rtx mem_address = XEXP (mem, 0);
632      enum machine_mode mode = GET_MODE (mem);
633
634      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
635        {
636	  /* Replace the source memory address with masked address.  */
637	  rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
638	  emit_insn (lvx_set_expr);
639	  DONE;
640        }
641      else if (rs6000_quadword_masked_address_p (mem_address))
642        {
643	  /* This rtl is already in the form that matches lvx
644	     instruction, so leave it alone.  */
645	  DONE;
646        }
647      /* Otherwise, fall through to transform into a swapping load.  */
648    }
649  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
650                                       : operands[0];
651}
652  [(set_attr "type" "vecload")
653   (set_attr "length" "8")])
654
655(define_insn "*vsx_le_perm_store_<mode>"
656  [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
657        (match_operand:VSX_D 1 "vsx_register_operand" "+<VSa>"))]
658  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
659  "#"
660  [(set_attr "type" "vecstore")
661   (set_attr "length" "12")])
662
663(define_split
664  [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
665        (match_operand:VSX_D 1 "vsx_register_operand"))]
666  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
667  [(set (match_dup 2)
668        (vec_select:<MODE>
669          (match_dup 1)
670          (parallel [(const_int 1) (const_int 0)])))
671   (set (match_dup 0)
672        (vec_select:<MODE>
673          (match_dup 2)
674          (parallel [(const_int 1) (const_int 0)])))]
675{
676  rtx mem = operands[0];
677
678  /* Don't apply the swap optimization if we've already performed register
679     allocation and the hard register source is not in the altivec range.  */
680  if ((MEM_ALIGN (mem) >= 128)
681      && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
682	  || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
683    {
684      rtx mem_address = XEXP (mem, 0);
685      enum machine_mode mode = GET_MODE (mem);
686      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
687	{
688	  rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
689	  emit_insn (stvx_set_expr);
690	  DONE;
691	}
692      else if (rs6000_quadword_masked_address_p (mem_address))
693	{
694	  /* This rtl is already in the form that matches stvx instruction,
695	     so leave it alone.  */
696	  DONE;
697	}
698      /* Otherwise, fall through to transform into a swapping store.  */
699    }
700
701  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
702                                       : operands[1];
703})
704
705;; The post-reload split requires that we re-permute the source
706;; register in case it is still live.
707(define_split
708  [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
709        (match_operand:VSX_D 1 "vsx_register_operand"))]
710  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
711  [(set (match_dup 1)
712        (vec_select:<MODE>
713          (match_dup 1)
714          (parallel [(const_int 1) (const_int 0)])))
715   (set (match_dup 0)
716        (vec_select:<MODE>
717          (match_dup 1)
718          (parallel [(const_int 1) (const_int 0)])))
719   (set (match_dup 1)
720        (vec_select:<MODE>
721          (match_dup 1)
722          (parallel [(const_int 1) (const_int 0)])))]
723  "")
724
725(define_insn "*vsx_le_perm_store_<mode>"
726  [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
727        (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
728  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
729  "#"
730  [(set_attr "type" "vecstore")
731   (set_attr "length" "12")])
732
733(define_split
734  [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
735        (match_operand:VSX_W 1 "vsx_register_operand"))]
736  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
737  [(set (match_dup 2)
738        (vec_select:<MODE>
739          (match_dup 1)
740          (parallel [(const_int 2) (const_int 3)
741	             (const_int 0) (const_int 1)])))
742   (set (match_dup 0)
743        (vec_select:<MODE>
744          (match_dup 2)
745          (parallel [(const_int 2) (const_int 3)
746	             (const_int 0) (const_int 1)])))]
747{
748  rtx mem = operands[0];
749
750  /* Don't apply the swap optimization if we've already performed register
751     allocation and the hard register source is not in the altivec range.  */
752  if ((MEM_ALIGN (mem) >= 128)
753      && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
754	  || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
755    {
756      rtx mem_address = XEXP (mem, 0);
757      enum machine_mode mode = GET_MODE (mem);
758      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
759	{
760	  rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
761	  emit_insn (stvx_set_expr);
762	  DONE;
763	}
764      else if (rs6000_quadword_masked_address_p (mem_address))
765	{
766	  /* This rtl is already in the form that matches stvx instruction,
767	     so leave it alone.  */
768	  DONE;
769	}
770      /* Otherwise, fall through to transform into a swapping store.  */
771    }
772
773  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
774                                       : operands[1];
775})
776
777;; The post-reload split requires that we re-permute the source
778;; register in case it is still live.
779(define_split
780  [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
781        (match_operand:VSX_W 1 "vsx_register_operand"))]
782  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
783  [(set (match_dup 1)
784        (vec_select:<MODE>
785          (match_dup 1)
786          (parallel [(const_int 2) (const_int 3)
787	             (const_int 0) (const_int 1)])))
788   (set (match_dup 0)
789        (vec_select:<MODE>
790          (match_dup 1)
791          (parallel [(const_int 2) (const_int 3)
792	             (const_int 0) (const_int 1)])))
793   (set (match_dup 1)
794        (vec_select:<MODE>
795          (match_dup 1)
796          (parallel [(const_int 2) (const_int 3)
797	             (const_int 0) (const_int 1)])))]
798  "")
799
800(define_insn "*vsx_le_perm_store_v8hi"
801  [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
802        (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
803  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
804  "#"
805  [(set_attr "type" "vecstore")
806   (set_attr "length" "12")])
807
808(define_split
809  [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
810        (match_operand:V8HI 1 "vsx_register_operand"))]
811  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
812  [(set (match_dup 2)
813        (vec_select:V8HI
814          (match_dup 1)
815          (parallel [(const_int 4) (const_int 5)
816                     (const_int 6) (const_int 7)
817                     (const_int 0) (const_int 1)
818                     (const_int 2) (const_int 3)])))
819   (set (match_dup 0)
820        (vec_select:V8HI
821          (match_dup 2)
822          (parallel [(const_int 4) (const_int 5)
823                     (const_int 6) (const_int 7)
824                     (const_int 0) (const_int 1)
825                     (const_int 2) (const_int 3)])))]
826{
827  rtx mem = operands[0];
828
829  /* Don't apply the swap optimization if we've already performed register
830     allocation and the hard register source is not in the altivec range.  */
831  if ((MEM_ALIGN (mem) >= 128)
832      && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
833	  || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
834    {
835      rtx mem_address = XEXP (mem, 0);
836      enum machine_mode mode = GET_MODE (mem);
837      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
838	{
839	  rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
840	  emit_insn (stvx_set_expr);
841	  DONE;
842	}
843      else if (rs6000_quadword_masked_address_p (mem_address))
844	{
845	  /* This rtl is already in the form that matches stvx instruction,
846	     so leave it alone.  */
847	  DONE;
848	}
849      /* Otherwise, fall through to transform into a swapping store.  */
850    }
851
852  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
853                                       : operands[1];
854})
855
856;; The post-reload split requires that we re-permute the source
857;; register in case it is still live.
858(define_split
859  [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
860        (match_operand:V8HI 1 "vsx_register_operand"))]
861  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
862  [(set (match_dup 1)
863        (vec_select:V8HI
864          (match_dup 1)
865          (parallel [(const_int 4) (const_int 5)
866                     (const_int 6) (const_int 7)
867                     (const_int 0) (const_int 1)
868                     (const_int 2) (const_int 3)])))
869   (set (match_dup 0)
870        (vec_select:V8HI
871          (match_dup 1)
872          (parallel [(const_int 4) (const_int 5)
873                     (const_int 6) (const_int 7)
874                     (const_int 0) (const_int 1)
875                     (const_int 2) (const_int 3)])))
876   (set (match_dup 1)
877        (vec_select:V8HI
878          (match_dup 1)
879          (parallel [(const_int 4) (const_int 5)
880                     (const_int 6) (const_int 7)
881                     (const_int 0) (const_int 1)
882                     (const_int 2) (const_int 3)])))]
883  "")
884
885(define_insn "*vsx_le_perm_store_v16qi"
886  [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
887        (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
888  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
889  "#"
890  [(set_attr "type" "vecstore")
891   (set_attr "length" "12")])
892
893(define_split
894  [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
895        (match_operand:V16QI 1 "vsx_register_operand"))]
896  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
897  [(set (match_dup 2)
898        (vec_select:V16QI
899          (match_dup 1)
900          (parallel [(const_int 8) (const_int 9)
901                     (const_int 10) (const_int 11)
902                     (const_int 12) (const_int 13)
903                     (const_int 14) (const_int 15)
904                     (const_int 0) (const_int 1)
905                     (const_int 2) (const_int 3)
906                     (const_int 4) (const_int 5)
907                     (const_int 6) (const_int 7)])))
908   (set (match_dup 0)
909        (vec_select:V16QI
910          (match_dup 2)
911          (parallel [(const_int 8) (const_int 9)
912                     (const_int 10) (const_int 11)
913                     (const_int 12) (const_int 13)
914                     (const_int 14) (const_int 15)
915                     (const_int 0) (const_int 1)
916                     (const_int 2) (const_int 3)
917                     (const_int 4) (const_int 5)
918                     (const_int 6) (const_int 7)])))]
919{
920  rtx mem = operands[0];
921
922  /* Don't apply the swap optimization if we've already performed register
923     allocation and the hard register source is not in the altivec range.  */
924  if ((MEM_ALIGN (mem) >= 128)
925      && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
926	  || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
927    {
928      rtx mem_address = XEXP (mem, 0);
929      enum machine_mode mode = GET_MODE (mem);
930      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
931	{
932	  rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
933	  emit_insn (stvx_set_expr);
934	  DONE;
935	}
936      else if (rs6000_quadword_masked_address_p (mem_address))
937	{
938	  /* This rtl is already in the form that matches stvx instruction,
939	     so leave it alone.  */
940	  DONE;
941	}
942      /* Otherwise, fall through to transform into a swapping store.  */
943    }
944
945  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
946                                       : operands[1];
947})
948
949;; The post-reload split requires that we re-permute the source
950;; register in case it is still live.
951(define_split
952  [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
953        (match_operand:V16QI 1 "vsx_register_operand"))]
954  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
955  [(set (match_dup 1)
956        (vec_select:V16QI
957          (match_dup 1)
958          (parallel [(const_int 8) (const_int 9)
959                     (const_int 10) (const_int 11)
960                     (const_int 12) (const_int 13)
961                     (const_int 14) (const_int 15)
962                     (const_int 0) (const_int 1)
963                     (const_int 2) (const_int 3)
964                     (const_int 4) (const_int 5)
965                     (const_int 6) (const_int 7)])))
966   (set (match_dup 0)
967        (vec_select:V16QI
968          (match_dup 1)
969          (parallel [(const_int 8) (const_int 9)
970                     (const_int 10) (const_int 11)
971                     (const_int 12) (const_int 13)
972                     (const_int 14) (const_int 15)
973                     (const_int 0) (const_int 1)
974                     (const_int 2) (const_int 3)
975                     (const_int 4) (const_int 5)
976                     (const_int 6) (const_int 7)])))
977   (set (match_dup 1)
978        (vec_select:V16QI
979          (match_dup 1)
980          (parallel [(const_int 8) (const_int 9)
981                     (const_int 10) (const_int 11)
982                     (const_int 12) (const_int 13)
983                     (const_int 14) (const_int 15)
984                     (const_int 0) (const_int 1)
985                     (const_int 2) (const_int 3)
986                     (const_int 4) (const_int 5)
987                     (const_int 6) (const_int 7)])))]
988  "")
989
990;; Little endian word swapping for 128-bit types that are either scalars or the
991;; special V1TI container class, which it is not appropriate to use vec_select
992;; for the type.
993(define_insn "*vsx_le_permute_<mode>"
994  [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z,&r,&r,Q")
995	(rotate:VSX_TI
996	 (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>,r,Q,r")
997	 (const_int 64)))]
998  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
999  "@
1000   xxpermdi %x0,%x1,%x1,2
1001   lxvd2x %x0,%y1
1002   stxvd2x %x1,%y0
1003   mr %0,%L1\;mr %L0,%1
1004   ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
1005   std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
1006  [(set_attr "length" "4,4,4,8,8,8")
1007   (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
1008
1009(define_insn_and_split "*vsx_le_undo_permute_<mode>"
1010  [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>")
1011	(rotate:VSX_TI
1012	 (rotate:VSX_TI
1013	  (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>")
1014	  (const_int 64))
1015	 (const_int 64)))]
1016  "!BYTES_BIG_ENDIAN && TARGET_VSX"
1017  "@
1018   #
1019   xxlor %x0,%x1"
1020  ""
1021  [(set (match_dup 0) (match_dup 1))]
1022{
1023  if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
1024    {
1025      emit_note (NOTE_INSN_DELETED);
1026      DONE;
1027    }
1028}
1029  [(set_attr "length" "0,4")
1030   (set_attr "type" "veclogical")])
1031
1032(define_insn_and_split "*vsx_le_perm_load_<mode>"
1033  [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,r")
1034        (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
1035  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1036  "@
1037   #
1038   #"
1039  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1040  [(const_int 0)]
1041{
1042  rtx tmp = (can_create_pseudo_p ()
1043	     ? gen_reg_rtx_and_attrs (operands[0])
1044	     : operands[0]);
1045  rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1046  rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1047  DONE;
1048}
1049  [(set_attr "type" "vecload,load")
1050   (set_attr "length" "8,8")])
1051
1052(define_insn "*vsx_le_perm_store_<mode>"
1053  [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
1054        (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>,r"))]
1055  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1056  "@
1057   #
1058   #"
1059  [(set_attr "type" "vecstore,store")
1060   (set_attr "length" "12,8")])
1061
1062(define_split
1063  [(set (match_operand:VSX_LE_128 0 "memory_operand")
1064        (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1065  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
1066  [(const_int 0)]
1067{
1068  rtx tmp = (can_create_pseudo_p ()
1069	     ? gen_reg_rtx_and_attrs (operands[0])
1070	     : operands[0]);
1071  rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1072  rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1073  DONE;
1074})
1075
1076;; Peepholes to catch loads and stores for TImode if TImode landed in
1077;; GPR registers on a little endian system.
1078(define_peephole2
1079  [(set (match_operand:VSX_TI 0 "int_reg_operand")
1080	(rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1081		       (const_int 64)))
1082   (set (match_operand:VSX_TI 2 "int_reg_operand")
1083	(rotate:VSX_TI (match_dup 0)
1084		       (const_int 64)))]
1085  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1086   && (rtx_equal_p (operands[0], operands[2])
1087       || peep2_reg_dead_p (2, operands[0]))"
1088   [(set (match_dup 2) (match_dup 1))])
1089
1090(define_peephole2
1091  [(set (match_operand:VSX_TI 0 "int_reg_operand")
1092	(rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1093		       (const_int 64)))
1094   (set (match_operand:VSX_TI 2 "memory_operand")
1095	(rotate:VSX_TI (match_dup 0)
1096		       (const_int 64)))]
1097  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1098   && peep2_reg_dead_p (2, operands[0])"
1099   [(set (match_dup 2) (match_dup 1))])
1100
1101;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1102;; VSX registers on a little endian system.  The vector types and IEEE 128-bit
1103;; floating point are handled by the more generic swap elimination pass.
1104(define_peephole2
1105  [(set (match_operand:TI 0 "vsx_register_operand")
1106	(rotate:TI (match_operand:TI 1 "vsx_register_operand")
1107		   (const_int 64)))
1108   (set (match_operand:TI 2 "vsx_register_operand")
1109	(rotate:TI (match_dup 0)
1110		   (const_int 64)))]
1111  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1112   && (rtx_equal_p (operands[0], operands[2])
1113       || peep2_reg_dead_p (2, operands[0]))"
1114   [(set (match_dup 2) (match_dup 1))])
1115
1116;; The post-reload split requires that we re-permute the source
1117;; register in case it is still live.
1118(define_split
1119  [(set (match_operand:VSX_LE_128 0 "memory_operand")
1120        (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1121  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
1122  [(const_int 0)]
1123{
1124  rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1125  rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1126  rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1127  DONE;
1128})
1129
1130;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1131;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1132(define_insn "xxspltib_v16qi"
1133  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1134	(vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1135  "TARGET_P9_VECTOR"
1136{
1137  operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1138  return "xxspltib %x0,%2";
1139}
1140  [(set_attr "type" "vecperm")])
1141
1142(define_insn "xxspltib_<mode>_nosplit"
1143  [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1144	(match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1145  "TARGET_P9_VECTOR"
1146{
1147  rtx op1 = operands[1];
1148  int value = 256;
1149  int num_insns = -1;
1150
1151  if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1152      || num_insns != 1)
1153    gcc_unreachable ();
1154
1155  operands[2] = GEN_INT (value & 0xff);
1156  return "xxspltib %x0,%2";
1157}
1158  [(set_attr "type" "vecperm")])
1159
1160(define_insn_and_split "*xxspltib_<mode>_split"
1161  [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1162	(match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1163  "TARGET_P9_VECTOR"
1164  "#"
1165  "&& 1"
1166  [(const_int 0)]
1167{
1168  int value = 256;
1169  int num_insns = -1;
1170  rtx op0 = operands[0];
1171  rtx op1 = operands[1];
1172  rtx tmp = ((can_create_pseudo_p ())
1173	     ? gen_reg_rtx (V16QImode)
1174	     : gen_lowpart (V16QImode, op0));
1175
1176  if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1177      || num_insns != 2)
1178    gcc_unreachable ();
1179
1180  emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1181
1182  if (<MODE>mode == V2DImode)
1183    emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
1184
1185  else if (<MODE>mode == V4SImode)
1186    emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1187
1188  else if (<MODE>mode == V8HImode)
1189    emit_insn (gen_altivec_vupkhsb  (op0, tmp));
1190
1191  else
1192    gcc_unreachable ();
1193
1194  DONE;
1195}
1196  [(set_attr "type" "vecperm")
1197   (set_attr "length" "8")])
1198
1199
1200;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
1201;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1202;; all 1's, since the machine does not have to wait for the previous
1203;; instruction using the register being set (such as a store waiting on a slow
1204;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1205
1206;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
1207;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
1208;;              VSX 0/-1   VMX const  GPR const LVX (VMX)  STVX (VMX)
1209(define_insn "vsx_mov<mode>_64bit"
1210  [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1211               "=ZwO,      <VSa>,     <VSa>,     r,         we,        ?wQ,
1212                ?&r,       ??r,       ??Y,       <??r>,     wo,        v,
1213                ?<VSa>,    v,         <??r>,     wZ,        v")
1214
1215	(match_operand:VSX_M 1 "input_operand"
1216               "<VSa>,     ZwO,       <VSa>,     we,        r,         r,
1217                wQ,        Y,         r,         r,         wE,        jwM,
1218                ?jwM,      W,         <nW>,      v,         wZ"))]
1219
1220  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1221   && (register_operand (operands[0], <MODE>mode)
1222       || register_operand (operands[1], <MODE>mode))"
1223{
1224  return rs6000_output_move_128bit (operands);
1225}
1226  [(set_attr "type"
1227               "vecstore,  vecload,   vecsimple, mffgpr,    mftgpr,    load,
1228                store,     load,      store,     *,         vecsimple, vecsimple,
1229                vecsimple, *,         *,         vecstore,  vecload")
1230
1231   (set_attr "length"
1232               "4,         4,         4,         8,         4,         8,
1233                8,         8,         8,         8,         4,         4,
1234                4,         20,        8,         4,         4")])
1235
1236;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
1237;;              XXSPLTIB   VSPLTISW   VSX 0/-1   VMX const  GPR const
1238;;              LVX (VMX)  STVX (VMX)
1239(define_insn "*vsx_mov<mode>_32bit"
1240  [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1241               "=ZwO,      <VSa>,     <VSa>,     ??r,       ??Y,       <??r>,
1242                wo,        v,         ?<VSa>,    v,         <??r>,
1243                wZ,        v")
1244
1245	(match_operand:VSX_M 1 "input_operand"
1246               "<VSa>,     ZwO,       <VSa>,     Y,         r,         r,
1247                wE,        jwM,       ?jwM,      W,         <nW>,
1248                v,         wZ"))]
1249
1250  "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1251   && (register_operand (operands[0], <MODE>mode)
1252       || register_operand (operands[1], <MODE>mode))"
1253{
1254  return rs6000_output_move_128bit (operands);
1255}
1256  [(set_attr "type"
1257               "vecstore,  vecload,   vecsimple, load,      store,    *,
1258                vecsimple, vecsimple, vecsimple, *,         *,
1259                vecstore,  vecload")
1260
1261   (set_attr "length"
1262               "4,         4,         4,         16,        16,        16,
1263                4,         4,         4,         20,        16,
1264                4,         4")])
1265
1266;; Explicit  load/store expanders for the builtin functions
1267(define_expand "vsx_load_<mode>"
1268  [(set (match_operand:VSX_M 0 "vsx_register_operand")
1269	(match_operand:VSX_M 1 "memory_operand"))]
1270  "VECTOR_MEM_VSX_P (<MODE>mode)"
1271{
1272  /* Expand to swaps if needed, prior to swap optimization.  */
1273  if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1274    {
1275      rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1276      DONE;
1277    }
1278})
1279
1280(define_expand "vsx_store_<mode>"
1281  [(set (match_operand:VSX_M 0 "memory_operand")
1282	(match_operand:VSX_M 1 "vsx_register_operand"))]
1283  "VECTOR_MEM_VSX_P (<MODE>mode)"
1284{
1285  /* Expand to swaps if needed, prior to swap optimization.  */
1286  if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1287    {
1288      rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1289      DONE;
1290    }
1291})
1292
1293;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1294;; when you really want their element-reversing behavior.
1295(define_insn "vsx_ld_elemrev_v2di"
1296  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1297        (vec_select:V2DI
1298	  (match_operand:V2DI 1 "memory_operand" "Z")
1299	  (parallel [(const_int 1) (const_int 0)])))]
1300  "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1301  "lxvd2x %x0,%y1"
1302  [(set_attr "type" "vecload")])
1303
1304(define_insn "vsx_ld_elemrev_v1ti"
1305  [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1306        (vec_select:V1TI
1307	  (match_operand:V1TI 1 "memory_operand" "Z")
1308	  (parallel [(const_int 0)])))]
1309  "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1310{
1311   return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1312}
1313  [(set_attr "type" "vecload")])
1314
1315(define_insn "vsx_ld_elemrev_v2df"
1316  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1317        (vec_select:V2DF
1318	  (match_operand:V2DF 1 "memory_operand" "Z")
1319	  (parallel [(const_int 1) (const_int 0)])))]
1320  "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1321  "lxvd2x %x0,%y1"
1322  [(set_attr "type" "vecload")])
1323
1324(define_insn "vsx_ld_elemrev_v4si"
1325  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1326        (vec_select:V4SI
1327	  (match_operand:V4SI 1 "memory_operand" "Z")
1328	  (parallel [(const_int 3) (const_int 2)
1329	             (const_int 1) (const_int 0)])))]
1330  "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1331  "lxvw4x %x0,%y1"
1332  [(set_attr "type" "vecload")])
1333
1334(define_insn "vsx_ld_elemrev_v4sf"
1335  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1336        (vec_select:V4SF
1337	  (match_operand:V4SF 1 "memory_operand" "Z")
1338	  (parallel [(const_int 3) (const_int 2)
1339	             (const_int 1) (const_int 0)])))]
1340  "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1341  "lxvw4x %x0,%y1"
1342  [(set_attr "type" "vecload")])
1343
1344(define_expand "vsx_ld_elemrev_v8hi"
1345  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1346        (vec_select:V8HI
1347	  (match_operand:V8HI 1 "memory_operand" "Z")
1348	  (parallel [(const_int 7) (const_int 6)
1349	             (const_int 5) (const_int 4)
1350		     (const_int 3) (const_int 2)
1351	             (const_int 1) (const_int 0)])))]
1352  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1353{
1354  if (!TARGET_P9_VECTOR)
1355    {
1356      rtx tmp = gen_reg_rtx (V4SImode);
1357      rtx subreg, subreg2, perm[16], pcv;
1358      /* 2 is leftmost element in register */
1359      unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1360      int i;
1361
1362      subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1363      emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1364      subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1365
1366      for (i = 0; i < 16; ++i)
1367      	perm[i] = GEN_INT (reorder[i]);
1368
1369      pcv = force_reg (V16QImode,
1370                       gen_rtx_CONST_VECTOR (V16QImode,
1371                                             gen_rtvec_v (16, perm)));
1372      emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1373                                                subreg2, pcv));
1374      DONE;
1375    }
1376})
1377
1378(define_insn "*vsx_ld_elemrev_v8hi_internal"
1379  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1380        (vec_select:V8HI
1381          (match_operand:V8HI 1 "memory_operand" "Z")
1382          (parallel [(const_int 7) (const_int 6)
1383                     (const_int 5) (const_int 4)
1384                     (const_int 3) (const_int 2)
1385                     (const_int 1) (const_int 0)])))]
1386  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1387  "lxvh8x %x0,%y1"
1388  [(set_attr "type" "vecload")])
1389
1390(define_expand "vsx_ld_elemrev_v16qi"
1391  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1392        (vec_select:V16QI
1393          (match_operand:V16QI 1 "memory_operand" "Z")
1394          (parallel [(const_int 15) (const_int 14)
1395                     (const_int 13) (const_int 12)
1396                     (const_int 11) (const_int 10)
1397                     (const_int  9) (const_int  8)
1398                     (const_int  7) (const_int  6)
1399                     (const_int  5) (const_int  4)
1400                     (const_int  3) (const_int  2)
1401                     (const_int  1) (const_int  0)])))]
1402  "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1403{
1404  if (!TARGET_P9_VECTOR)
1405    {
1406      rtx tmp = gen_reg_rtx (V4SImode);
1407      rtx subreg, subreg2, perm[16], pcv;
1408      /* 3 is leftmost element in register */
1409      unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1410      int i;
1411
1412      subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1413      emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1414      subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1415
1416      for (i = 0; i < 16; ++i)
1417        perm[i] = GEN_INT (reorder[i]);
1418
1419      pcv = force_reg (V16QImode,
1420                       gen_rtx_CONST_VECTOR (V16QImode,
1421                                             gen_rtvec_v (16, perm)));
1422      emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1423                                                 subreg2, pcv));
1424      DONE;
1425    }
1426})
1427
1428(define_insn "vsx_ld_elemrev_v16qi_internal"
1429  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1430        (vec_select:V16QI
1431          (match_operand:V16QI 1 "memory_operand" "Z")
1432          (parallel [(const_int 15) (const_int 14)
1433                     (const_int 13) (const_int 12)
1434                     (const_int 11) (const_int 10)
1435                     (const_int  9) (const_int  8)
1436                     (const_int  7) (const_int  6)
1437                     (const_int  5) (const_int  4)
1438                     (const_int  3) (const_int  2)
1439                     (const_int  1) (const_int  0)])))]
1440  "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1441  "lxvb16x %x0,%y1"
1442  [(set_attr "type" "vecload")])
1443
1444(define_insn "vsx_st_elemrev_v1ti"
1445  [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1446        (vec_select:V1TI
1447          (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1448          (parallel [(const_int 0)])))
1449   (clobber (match_dup 1))]
1450  "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1451{
1452  return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1453}
1454  [(set_attr "type" "vecstore")])
1455
1456(define_insn "vsx_st_elemrev_v2df"
1457  [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1458        (vec_select:V2DF
1459          (match_operand:V2DF 1 "vsx_register_operand" "wa")
1460          (parallel [(const_int 1) (const_int 0)])))]
1461  "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1462  "stxvd2x %x1,%y0"
1463  [(set_attr "type" "vecstore")])
1464
1465(define_insn "vsx_st_elemrev_v2di"
1466  [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1467        (vec_select:V2DI
1468          (match_operand:V2DI 1 "vsx_register_operand" "wa")
1469          (parallel [(const_int 1) (const_int 0)])))]
1470  "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1471  "stxvd2x %x1,%y0"
1472  [(set_attr "type" "vecstore")])
1473
1474(define_insn "vsx_st_elemrev_v4sf"
1475  [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1476        (vec_select:V4SF
1477          (match_operand:V4SF 1 "vsx_register_operand" "wa")
1478          (parallel [(const_int 3) (const_int 2)
1479                     (const_int 1) (const_int 0)])))]
1480  "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1481  "stxvw4x %x1,%y0"
1482  [(set_attr "type" "vecstore")])
1483
1484(define_insn "vsx_st_elemrev_v4si"
1485  [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1486        (vec_select:V4SI
1487	  (match_operand:V4SI 1 "vsx_register_operand" "wa")
1488	  (parallel [(const_int 3) (const_int 2)
1489	             (const_int 1) (const_int 0)])))]
1490  "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1491  "stxvw4x %x1,%y0"
1492  [(set_attr "type" "vecstore")])
1493
1494(define_expand "vsx_st_elemrev_v8hi"
1495  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1496        (vec_select:V8HI
1497          (match_operand:V8HI 1 "vsx_register_operand" "wa")
1498          (parallel [(const_int 7) (const_int 6)
1499                     (const_int 5) (const_int 4)
1500                     (const_int 3) (const_int 2)
1501                     (const_int 1) (const_int 0)])))]
1502  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1503{
1504  if (!TARGET_P9_VECTOR)
1505    {
1506      rtx mem_subreg, subreg, perm[16], pcv;
1507      rtx tmp = gen_reg_rtx (V8HImode);
1508      /* 2 is leftmost element in register */
1509      unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1510      int i;
1511
1512      for (i = 0; i < 16; ++i)
1513      	perm[i] = GEN_INT (reorder[i]);
1514
1515      pcv = force_reg (V16QImode,
1516                       gen_rtx_CONST_VECTOR (V16QImode,
1517                                             gen_rtvec_v (16, perm)));
1518      emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1519                                                operands[1], pcv));
1520      subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1521      mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1522      emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1523      DONE;
1524    }
1525})
1526
1527(define_insn "*vsx_st_elemrev_v2di_internal"
1528  [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1529        (vec_select:V2DI
1530          (match_operand:V2DI 1 "vsx_register_operand" "wa")
1531          (parallel [(const_int 1) (const_int 0)])))]
1532  "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1533  "stxvd2x %x1,%y0"
1534  [(set_attr "type" "vecstore")])
1535
1536(define_insn "*vsx_st_elemrev_v8hi_internal"
1537  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1538        (vec_select:V8HI
1539          (match_operand:V8HI 1 "vsx_register_operand" "wa")
1540          (parallel [(const_int 7) (const_int 6)
1541                     (const_int 5) (const_int 4)
1542                     (const_int 3) (const_int 2)
1543                     (const_int 1) (const_int 0)])))]
1544  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1545  "stxvh8x %x1,%y0"
1546  [(set_attr "type" "vecstore")])
1547
1548(define_expand "vsx_st_elemrev_v16qi"
1549  [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1550        (vec_select:V16QI
1551          (match_operand:V16QI 1 "vsx_register_operand" "wa")
1552          (parallel [(const_int 15) (const_int 14)
1553                     (const_int 13) (const_int 12)
1554                     (const_int 11) (const_int 10)
1555                     (const_int  9) (const_int  8)
1556                     (const_int  7) (const_int  6)
1557                     (const_int  5) (const_int  4)
1558                     (const_int  3) (const_int  2)
1559                     (const_int  1) (const_int  0)])))]
1560  "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1561{
1562  if (!TARGET_P9_VECTOR)
1563    {
1564      rtx mem_subreg, subreg, perm[16], pcv;
1565      rtx tmp = gen_reg_rtx (V16QImode);
1566      /* 3 is leftmost element in register */
1567      unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1568      int i;
1569
1570      for (i = 0; i < 16; ++i)
1571      	perm[i] = GEN_INT (reorder[i]);
1572
1573      pcv = force_reg (V16QImode,
1574                       gen_rtx_CONST_VECTOR (V16QImode,
1575                                             gen_rtvec_v (16, perm)));
1576      emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1577                                                 operands[1], pcv));
1578      subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1579      mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1580      emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1581      DONE;
1582    }
1583})
1584
1585(define_insn "*vsx_st_elemrev_v16qi_internal"
1586  [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1587        (vec_select:V16QI
1588          (match_operand:V16QI 1 "vsx_register_operand" "wa")
1589          (parallel [(const_int 15) (const_int 14)
1590                     (const_int 13) (const_int 12)
1591                     (const_int 11) (const_int 10)
1592                     (const_int  9) (const_int  8)
1593                     (const_int  7) (const_int  6)
1594                     (const_int  5) (const_int  4)
1595                     (const_int  3) (const_int  2)
1596                     (const_int  1) (const_int  0)])))]
1597  "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1598  "stxvb16x %x1,%y0"
1599  [(set_attr "type" "vecstore")])
1600
1601
1602;; VSX vector floating point arithmetic instructions.  The VSX scalar
1603;; instructions are now combined with the insn for the traditional floating
1604;; point unit.
1605(define_insn "*vsx_add<mode>3"
1606  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1607        (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1608		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1609  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1610  "xvadd<VSs> %x0,%x1,%x2"
1611  [(set_attr "type" "<VStype_simple>")])
1612
1613(define_insn "*vsx_sub<mode>3"
1614  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1615        (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1616		     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1617  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1618  "xvsub<VSs> %x0,%x1,%x2"
1619  [(set_attr "type" "<VStype_simple>")])
1620
1621(define_insn "*vsx_mul<mode>3"
1622  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1623        (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1624		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1625  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1626  "xvmul<VSs> %x0,%x1,%x2"
1627  [(set_attr "type" "<VStype_simple>")])
1628
1629; Emulate vector with scalar for vec_mul in V2DImode
1630(define_insn_and_split "vsx_mul_v2di"
1631  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1632        (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1633                      (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1634                     UNSPEC_VSX_MULSD))]
1635  "VECTOR_MEM_VSX_P (V2DImode)"
1636  "#"
1637  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1638  [(const_int 0)]
1639{
1640  rtx op0 = operands[0];
1641  rtx op1 = operands[1];
1642  rtx op2 = operands[2];
1643  rtx op3 = gen_reg_rtx (DImode);
1644  rtx op4 = gen_reg_rtx (DImode);
1645  rtx op5 = gen_reg_rtx (DImode);
1646  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1647  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1648  if (TARGET_POWERPC64)
1649    emit_insn (gen_muldi3 (op5, op3, op4));
1650  else
1651    {
1652      rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1653      emit_move_insn (op5, ret);
1654    }
1655  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1656  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1657  if (TARGET_POWERPC64)
1658    emit_insn (gen_muldi3 (op3, op3, op4));
1659  else
1660    {
1661      rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1662      emit_move_insn (op3, ret);
1663    }
1664  emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1665  DONE;
1666}
1667  [(set_attr "type" "mul")])
1668
1669(define_insn "*vsx_div<mode>3"
1670  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1671        (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1672		   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1673  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1674  "xvdiv<VSs> %x0,%x1,%x2"
1675  [(set_attr "type" "<VStype_div>")])
1676
1677; Emulate vector with scalar for vec_div in V2DImode
1678(define_insn_and_split "vsx_div_v2di"
1679  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1680        (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1681                      (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1682                     UNSPEC_VSX_DIVSD))]
1683  "VECTOR_MEM_VSX_P (V2DImode)"
1684  "#"
1685  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1686  [(const_int 0)]
1687{
1688  rtx op0 = operands[0];
1689  rtx op1 = operands[1];
1690  rtx op2 = operands[2];
1691  rtx op3 = gen_reg_rtx (DImode);
1692  rtx op4 = gen_reg_rtx (DImode);
1693  rtx op5 = gen_reg_rtx (DImode);
1694  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1695  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1696  if (TARGET_POWERPC64)
1697    emit_insn (gen_divdi3 (op5, op3, op4));
1698  else
1699    {
1700      rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1701      rtx target = emit_library_call_value (libfunc,
1702					    op5, LCT_NORMAL, DImode,
1703					    op3, DImode,
1704					    op4, DImode);
1705      emit_move_insn (op5, target);
1706    }
1707  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1708  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1709  if (TARGET_POWERPC64)
1710    emit_insn (gen_divdi3 (op3, op3, op4));
1711  else
1712    {
1713      rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1714      rtx target = emit_library_call_value (libfunc,
1715					    op3, LCT_NORMAL, DImode,
1716					    op3, DImode,
1717					    op4, DImode);
1718      emit_move_insn (op3, target);
1719    }
1720  emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1721  DONE;
1722}
1723  [(set_attr "type" "div")])
1724
1725(define_insn_and_split "vsx_udiv_v2di"
1726  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1727        (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1728                      (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1729                     UNSPEC_VSX_DIVUD))]
1730  "VECTOR_MEM_VSX_P (V2DImode)"
1731  "#"
1732  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1733  [(const_int 0)]
1734{
1735  rtx op0 = operands[0];
1736  rtx op1 = operands[1];
1737  rtx op2 = operands[2];
1738  rtx op3 = gen_reg_rtx (DImode);
1739  rtx op4 = gen_reg_rtx (DImode);
1740  rtx op5 = gen_reg_rtx (DImode);
1741  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1742  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1743  if (TARGET_POWERPC64)
1744    emit_insn (gen_udivdi3 (op5, op3, op4));
1745  else
1746    {
1747      rtx libfunc = optab_libfunc (udiv_optab, DImode);
1748      rtx target = emit_library_call_value (libfunc,
1749					    op5, LCT_NORMAL, DImode,
1750					    op3, DImode,
1751					    op4, DImode);
1752      emit_move_insn (op5, target);
1753    }
1754  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1755  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1756  if (TARGET_POWERPC64)
1757    emit_insn (gen_udivdi3 (op3, op3, op4));
1758  else
1759    {
1760      rtx libfunc = optab_libfunc (udiv_optab, DImode);
1761      rtx target = emit_library_call_value (libfunc,
1762					    op3, LCT_NORMAL, DImode,
1763					    op3, DImode,
1764					    op4, DImode);
1765      emit_move_insn (op3, target);
1766    }
1767  emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1768  DONE;
1769}
1770  [(set_attr "type" "div")])
1771
1772;; *tdiv* instruction returning the FG flag
1773(define_expand "vsx_tdiv<mode>3_fg"
1774  [(set (match_dup 3)
1775	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1776		      (match_operand:VSX_B 2 "vsx_register_operand")]
1777		     UNSPEC_VSX_TDIV))
1778   (set (match_operand:SI 0 "gpc_reg_operand")
1779	(gt:SI (match_dup 3)
1780	       (const_int 0)))]
1781  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1782{
1783  operands[3] = gen_reg_rtx (CCFPmode);
1784})
1785
1786;; *tdiv* instruction returning the FE flag
1787(define_expand "vsx_tdiv<mode>3_fe"
1788  [(set (match_dup 3)
1789	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1790		      (match_operand:VSX_B 2 "vsx_register_operand")]
1791		     UNSPEC_VSX_TDIV))
1792   (set (match_operand:SI 0 "gpc_reg_operand")
1793	(eq:SI (match_dup 3)
1794	       (const_int 0)))]
1795  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1796{
1797  operands[3] = gen_reg_rtx (CCFPmode);
1798})
1799
1800(define_insn "*vsx_tdiv<mode>3_internal"
1801  [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1802	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
1803		      (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
1804		   UNSPEC_VSX_TDIV))]
1805  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1806  "x<VSv>tdiv<VSs> %0,%x1,%x2"
1807  [(set_attr "type" "<VStype_simple>")])
1808
1809(define_insn "vsx_fre<mode>2"
1810  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1811	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1812		      UNSPEC_FRES))]
1813  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1814  "xvre<VSs> %x0,%x1"
1815  [(set_attr "type" "<VStype_simple>")])
1816
1817(define_insn "*vsx_neg<mode>2"
1818  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1819        (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1820  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1821  "xvneg<VSs> %x0,%x1"
1822  [(set_attr "type" "<VStype_simple>")])
1823
1824(define_insn "*vsx_abs<mode>2"
1825  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1826        (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1827  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1828  "xvabs<VSs> %x0,%x1"
1829  [(set_attr "type" "<VStype_simple>")])
1830
1831(define_insn "vsx_nabs<mode>2"
1832  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1833        (neg:VSX_F
1834	 (abs:VSX_F
1835	  (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
1836  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1837  "xvnabs<VSs> %x0,%x1"
1838  [(set_attr "type" "<VStype_simple>")])
1839
1840(define_insn "vsx_smax<mode>3"
1841  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1842        (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1843		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1844  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1845  "xvmax<VSs> %x0,%x1,%x2"
1846  [(set_attr "type" "<VStype_simple>")])
1847
1848(define_insn "*vsx_smin<mode>3"
1849  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1850        (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1851		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1852  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1853  "xvmin<VSs> %x0,%x1,%x2"
1854  [(set_attr "type" "<VStype_simple>")])
1855
1856(define_insn "*vsx_sqrt<mode>2"
1857  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1858        (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1859  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1860  "xvsqrt<VSs> %x0,%x1"
1861  [(set_attr "type" "<VStype_sqrt>")])
1862
1863(define_insn "*vsx_rsqrte<mode>2"
1864  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1865	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1866		      UNSPEC_RSQRT))]
1867  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1868  "xvrsqrte<VSs> %x0,%x1"
1869  [(set_attr "type" "<VStype_simple>")])
1870
1871;; *tsqrt* returning the fg flag
1872(define_expand "vsx_tsqrt<mode>2_fg"
1873  [(set (match_dup 2)
1874	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1875		     UNSPEC_VSX_TSQRT))
1876   (set (match_operand:SI 0 "gpc_reg_operand")
1877	(gt:SI (match_dup 2)
1878	       (const_int 0)))]
1879  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1880{
1881  operands[2] = gen_reg_rtx (CCFPmode);
1882})
1883
1884;; *tsqrt* returning the fe flag
1885(define_expand "vsx_tsqrt<mode>2_fe"
1886  [(set (match_dup 2)
1887	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1888		     UNSPEC_VSX_TSQRT))
1889   (set (match_operand:SI 0 "gpc_reg_operand")
1890	(eq:SI (match_dup 2)
1891	       (const_int 0)))]
1892  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1893{
1894  operands[2] = gen_reg_rtx (CCFPmode);
1895})
1896
1897(define_insn "*vsx_tsqrt<mode>2_internal"
1898  [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1899	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1900		     UNSPEC_VSX_TSQRT))]
1901  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1902  "x<VSv>tsqrt<VSs> %0,%x1"
1903  [(set_attr "type" "<VStype_simple>")])
1904
1905;; Fused vector multiply/add instructions. Support the classical Altivec
1906;; versions of fma, which allows the target to be a separate register from the
1907;; 3 inputs.  Under VSX, the target must be either the addend or the first
1908;; multiply.
1909
1910(define_insn "*vsx_fmav4sf4"
1911  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1912	(fma:V4SF
1913	  (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1914	  (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1915	  (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
1916  "VECTOR_UNIT_VSX_P (V4SFmode)"
1917  "@
1918   xvmaddasp %x0,%x1,%x2
1919   xvmaddmsp %x0,%x1,%x3
1920   xvmaddasp %x0,%x1,%x2
1921   xvmaddmsp %x0,%x1,%x3
1922   vmaddfp %0,%1,%2,%3"
1923  [(set_attr "type" "vecfloat")])
1924
1925(define_insn "*vsx_fmav2df4"
1926  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1927	(fma:V2DF
1928	  (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1929	  (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1930	  (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
1931  "VECTOR_UNIT_VSX_P (V2DFmode)"
1932  "@
1933   xvmaddadp %x0,%x1,%x2
1934   xvmaddmdp %x0,%x1,%x3
1935   xvmaddadp %x0,%x1,%x2
1936   xvmaddmdp %x0,%x1,%x3"
1937  [(set_attr "type" "vecdouble")])
1938
1939(define_insn "*vsx_fms<mode>4"
1940  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1941	(fma:VSX_F
1942	  (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1943	  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1944	  (neg:VSX_F
1945	    (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1946  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1947  "@
1948   xvmsuba<VSs> %x0,%x1,%x2
1949   xvmsubm<VSs> %x0,%x1,%x3
1950   xvmsuba<VSs> %x0,%x1,%x2
1951   xvmsubm<VSs> %x0,%x1,%x3"
1952  [(set_attr "type" "<VStype_mul>")])
1953
1954(define_insn "*vsx_nfma<mode>4"
1955  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1956	(neg:VSX_F
1957	 (fma:VSX_F
1958	  (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1959	  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1960	  (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1961  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1962  "@
1963   xvnmadda<VSs> %x0,%x1,%x2
1964   xvnmaddm<VSs> %x0,%x1,%x3
1965   xvnmadda<VSs> %x0,%x1,%x2
1966   xvnmaddm<VSs> %x0,%x1,%x3"
1967  [(set_attr "type" "<VStype_mul>")])
1968
1969(define_insn "*vsx_nfmsv4sf4"
1970  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1971	(neg:V4SF
1972	 (fma:V4SF
1973	   (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1974	   (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1975	   (neg:V4SF
1976	     (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1977  "VECTOR_UNIT_VSX_P (V4SFmode)"
1978  "@
1979   xvnmsubasp %x0,%x1,%x2
1980   xvnmsubmsp %x0,%x1,%x3
1981   xvnmsubasp %x0,%x1,%x2
1982   xvnmsubmsp %x0,%x1,%x3
1983   vnmsubfp %0,%1,%2,%3"
1984  [(set_attr "type" "vecfloat")])
1985
1986(define_insn "*vsx_nfmsv2df4"
1987  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1988	(neg:V2DF
1989	 (fma:V2DF
1990	   (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1991	   (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1992	   (neg:V2DF
1993	     (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
1994  "VECTOR_UNIT_VSX_P (V2DFmode)"
1995  "@
1996   xvnmsubadp %x0,%x1,%x2
1997   xvnmsubmdp %x0,%x1,%x3
1998   xvnmsubadp %x0,%x1,%x2
1999   xvnmsubmdp %x0,%x1,%x3"
2000  [(set_attr "type" "vecdouble")])
2001
2002;; Vector conditional expressions (no scalar version for these instructions)
2003(define_insn "vsx_eq<mode>"
2004  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2005	(eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2006		  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2007  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2008  "xvcmpeq<VSs> %x0,%x1,%x2"
2009  [(set_attr "type" "<VStype_simple>")])
2010
2011(define_insn "vsx_gt<mode>"
2012  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2013	(gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2014		  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2015  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2016  "xvcmpgt<VSs> %x0,%x1,%x2"
2017  [(set_attr "type" "<VStype_simple>")])
2018
2019(define_insn "*vsx_ge<mode>"
2020  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2021	(ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2022		  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2023  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2024  "xvcmpge<VSs> %x0,%x1,%x2"
2025  [(set_attr "type" "<VStype_simple>")])
2026
2027;; Compare vectors producing a vector result and a predicate, setting CR6 to
2028;; indicate a combined status
2029(define_insn "*vsx_eq_<mode>_p"
2030  [(set (reg:CC CR6_REGNO)
2031	(unspec:CC
2032	 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2033		 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2034	 UNSPEC_PREDICATE))
2035   (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2036	(eq:VSX_F (match_dup 1)
2037		  (match_dup 2)))]
2038  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2039  "xvcmpeq<VSs>. %x0,%x1,%x2"
2040  [(set_attr "type" "<VStype_simple>")])
2041
2042(define_insn "*vsx_gt_<mode>_p"
2043  [(set (reg:CC CR6_REGNO)
2044	(unspec:CC
2045	 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2046		 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2047	 UNSPEC_PREDICATE))
2048   (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2049	(gt:VSX_F (match_dup 1)
2050		  (match_dup 2)))]
2051  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2052  "xvcmpgt<VSs>. %x0,%x1,%x2"
2053  [(set_attr "type" "<VStype_simple>")])
2054
2055(define_insn "*vsx_ge_<mode>_p"
2056  [(set (reg:CC CR6_REGNO)
2057	(unspec:CC
2058	 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2059		 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2060	 UNSPEC_PREDICATE))
2061   (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2062	(ge:VSX_F (match_dup 1)
2063		  (match_dup 2)))]
2064  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2065  "xvcmpge<VSs>. %x0,%x1,%x2"
2066  [(set_attr "type" "<VStype_simple>")])
2067
2068;; Vector select
2069(define_insn "*vsx_xxsel<mode>"
2070  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2071	(if_then_else:VSX_L
2072	 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
2073		(match_operand:VSX_L 4 "zero_constant" ""))
2074	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2075	 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
2076  "VECTOR_MEM_VSX_P (<MODE>mode)"
2077  "xxsel %x0,%x3,%x2,%x1"
2078  [(set_attr "type" "vecmove")])
2079
2080(define_insn "*vsx_xxsel<mode>_uns"
2081  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2082	(if_then_else:VSX_L
2083	 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
2084		   (match_operand:VSX_L 4 "zero_constant" ""))
2085	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2086	 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
2087  "VECTOR_MEM_VSX_P (<MODE>mode)"
2088  "xxsel %x0,%x3,%x2,%x1"
2089  [(set_attr "type" "vecmove")])
2090
2091;; Copy sign
2092(define_insn "vsx_copysign<mode>3"
2093  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2094	(unspec:VSX_F
2095	 [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2096	  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
2097	 UNSPEC_COPYSIGN))]
2098  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2099  "xvcpsgn<VSs> %x0,%x2,%x1"
2100  [(set_attr "type" "<VStype_simple>")])
2101
2102;; For the conversions, limit the register class for the integer value to be
2103;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2104;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2105;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2106;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2107;; in allowing virtual registers.
2108(define_insn "vsx_float<VSi><mode>2"
2109  [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2110	(float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
2111  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2112  "xvcvsx<VSc><VSs> %x0,%x1"
2113  [(set_attr "type" "<VStype_simple>")])
2114
2115(define_insn "vsx_floatuns<VSi><mode>2"
2116  [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2117	(unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
2118  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2119  "xvcvux<VSc><VSs> %x0,%x1"
2120  [(set_attr "type" "<VStype_simple>")])
2121
2122(define_insn "vsx_fix_trunc<mode><VSi>2"
2123  [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
2124	(fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
2125  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2126  "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
2127  [(set_attr "type" "<VStype_simple>")])
2128
2129(define_insn "vsx_fixuns_trunc<mode><VSi>2"
2130  [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
2131	(unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
2132  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2133  "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
2134  [(set_attr "type" "<VStype_simple>")])
2135
2136;; Math rounding functions
2137(define_insn "vsx_x<VSv>r<VSs>i"
2138  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2139	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2140		      UNSPEC_VSX_ROUND_I))]
2141  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2142  "x<VSv>r<VSs>i %x0,%x1"
2143  [(set_attr "type" "<VStype_simple>")])
2144
2145(define_insn "vsx_x<VSv>r<VSs>ic"
2146  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2147	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2148		      UNSPEC_VSX_ROUND_IC))]
2149  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2150  "x<VSv>r<VSs>ic %x0,%x1"
2151  [(set_attr "type" "<VStype_simple>")])
2152
2153(define_insn "vsx_btrunc<mode>2"
2154  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2155	(fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
2156  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2157  "xvr<VSs>iz %x0,%x1"
2158  [(set_attr "type" "<VStype_simple>")])
2159
2160(define_insn "*vsx_b2trunc<mode>2"
2161  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2162	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2163		      UNSPEC_FRIZ))]
2164  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2165  "x<VSv>r<VSs>iz %x0,%x1"
2166  [(set_attr "type" "<VStype_simple>")])
2167
2168(define_insn "vsx_floor<mode>2"
2169  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2170	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
2171		      UNSPEC_FRIM))]
2172  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2173  "xvr<VSs>im %x0,%x1"
2174  [(set_attr "type" "<VStype_simple>")])
2175
2176(define_insn "vsx_ceil<mode>2"
2177  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2178	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
2179		      UNSPEC_FRIP))]
2180  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2181  "xvr<VSs>ip %x0,%x1"
2182  [(set_attr "type" "<VStype_simple>")])
2183
2184
2185;; VSX convert to/from double vector
2186
2187;; Convert between single and double precision
2188;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2189;; scalar single precision instructions internally use the double format.
2190;; Prefer the altivec registers, since we likely will need to do a vperm
2191(define_insn "vsx_<VS_spdp_insn>"
2192  [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
2193	(unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
2194			      UNSPEC_VSX_CVSPDP))]
2195  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2196  "<VS_spdp_insn> %x0,%x1"
2197  [(set_attr "type" "<VS_spdp_type>")])
2198
2199;; xscvspdp, represent the scalar SF type as V4SF
2200(define_insn "vsx_xscvspdp"
2201  [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2202	(unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2203		   UNSPEC_VSX_CVSPDP))]
2204  "VECTOR_UNIT_VSX_P (V4SFmode)"
2205  "xscvspdp %x0,%x1"
2206  [(set_attr "type" "fp")])
2207
2208;; Same as vsx_xscvspdp, but use SF as the type
2209(define_insn "vsx_xscvspdp_scalar2"
2210  [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
2211	(unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2212		   UNSPEC_VSX_CVSPDP))]
2213  "VECTOR_UNIT_VSX_P (V4SFmode)"
2214  "xscvspdp %x0,%x1"
2215  [(set_attr "type" "fp")])
2216
2217;; Generate xvcvhpsp instruction
2218(define_insn "vsx_xvcvhpsp"
2219  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2220	(unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2221		     UNSPEC_VSX_CVHPSP))]
2222  "TARGET_P9_VECTOR"
2223  "xvcvhpsp %x0,%x1"
2224  [(set_attr "type" "vecfloat")])
2225
2226;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2227;; format of scalars is actually DF.
2228(define_insn "vsx_xscvdpsp_scalar"
2229  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2230	(unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
2231		     UNSPEC_VSX_CVSPDP))]
2232  "VECTOR_UNIT_VSX_P (V4SFmode)"
2233  "xscvdpsp %x0,%x1"
2234  [(set_attr "type" "fp")])
2235
2236;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2237(define_insn "vsx_xscvdpspn"
2238  [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww")
2239	(unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "ws")]
2240		     UNSPEC_VSX_CVDPSPN))]
2241  "TARGET_XSCVDPSPN"
2242  "xscvdpspn %x0,%x1"
2243  [(set_attr "type" "fp")])
2244
2245(define_insn "vsx_xscvspdpn"
2246  [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2247	(unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2248		   UNSPEC_VSX_CVSPDPN))]
2249  "TARGET_XSCVSPDPN"
2250  "xscvspdpn %x0,%x1"
2251  [(set_attr "type" "fp")])
2252
2253(define_insn "vsx_xscvdpspn_scalar"
2254  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2255	(unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
2256		     UNSPEC_VSX_CVDPSPN))]
2257  "TARGET_XSCVDPSPN"
2258  "xscvdpspn %x0,%x1"
2259  [(set_attr "type" "fp")])
2260
2261;; Used by direct move to move a SFmode value from GPR to VSX register
2262(define_insn "vsx_xscvspdpn_directmove"
2263  [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2264	(unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2265		   UNSPEC_VSX_CVSPDPN))]
2266  "TARGET_XSCVSPDPN"
2267  "xscvspdpn %x0,%x1"
2268  [(set_attr "type" "fp")])
2269
2270;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2271
2272(define_expand "vsx_xvcvsxddp_scale"
2273  [(match_operand:V2DF 0 "vsx_register_operand")
2274   (match_operand:V2DI 1 "vsx_register_operand")
2275   (match_operand:QI 2 "immediate_operand")]
2276  "VECTOR_UNIT_VSX_P (V2DFmode)"
2277{
2278  rtx op0 = operands[0];
2279  rtx op1 = operands[1];
2280  int scale = INTVAL(operands[2]);
2281  emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2282  if (scale != 0)
2283    rs6000_scale_v2df (op0, op0, -scale);
2284  DONE;
2285})
2286
2287(define_insn "vsx_xvcvsxddp"
2288  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2289        (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2290                     UNSPEC_VSX_XVCVSXDDP))]
2291  "VECTOR_UNIT_VSX_P (V2DFmode)"
2292  "xvcvsxddp %x0,%x1"
2293  [(set_attr "type" "vecdouble")])
2294
2295(define_expand "vsx_xvcvuxddp_scale"
2296  [(match_operand:V2DF 0 "vsx_register_operand")
2297   (match_operand:V2DI 1 "vsx_register_operand")
2298   (match_operand:QI 2 "immediate_operand")]
2299  "VECTOR_UNIT_VSX_P (V2DFmode)"
2300{
2301  rtx op0 = operands[0];
2302  rtx op1 = operands[1];
2303  int scale = INTVAL(operands[2]);
2304  emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2305  if (scale != 0)
2306    rs6000_scale_v2df (op0, op0, -scale);
2307  DONE;
2308})
2309
2310(define_insn "vsx_xvcvuxddp"
2311  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2312        (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2313                     UNSPEC_VSX_XVCVUXDDP))]
2314  "VECTOR_UNIT_VSX_P (V2DFmode)"
2315  "xvcvuxddp %x0,%x1"
2316  [(set_attr "type" "vecdouble")])
2317
2318(define_expand "vsx_xvcvdpsxds_scale"
2319  [(match_operand:V2DI 0 "vsx_register_operand")
2320   (match_operand:V2DF 1 "vsx_register_operand")
2321   (match_operand:QI 2 "immediate_operand")]
2322  "VECTOR_UNIT_VSX_P (V2DFmode)"
2323{
2324  rtx op0 = operands[0];
2325  rtx op1 = operands[1];
2326  rtx tmp;
2327  int scale = INTVAL (operands[2]);
2328  if (scale == 0)
2329    tmp = op1;
2330  else
2331    {
2332      tmp  = gen_reg_rtx (V2DFmode);
2333      rs6000_scale_v2df (tmp, op1, scale);
2334    }
2335  emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2336  DONE;
2337})
2338
2339;; convert vector of 64-bit floating point numbers to vector of
2340;; 64-bit signed integer
2341(define_insn "vsx_xvcvdpsxds"
2342  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2343        (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2344                     UNSPEC_VSX_XVCVDPSXDS))]
2345  "VECTOR_UNIT_VSX_P (V2DFmode)"
2346  "xvcvdpsxds %x0,%x1"
2347  [(set_attr "type" "vecdouble")])
2348
2349;; convert vector of 32-bit floating point numbers to vector of
2350;; 32-bit signed integer
2351(define_insn "vsx_xvcvspsxws"
2352  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2353	(unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2354		     UNSPEC_VSX_XVCVSPSXWS))]
2355  "VECTOR_UNIT_VSX_P (V4SFmode)"
2356  "xvcvspsxws %x0,%x1"
2357  [(set_attr "type" "vecfloat")])
2358
2359;; convert vector of 64-bit floating point numbers to vector of
2360;; 64-bit unsigned integer
2361(define_expand "vsx_xvcvdpuxds_scale"
2362  [(match_operand:V2DI 0 "vsx_register_operand")
2363   (match_operand:V2DF 1 "vsx_register_operand")
2364   (match_operand:QI 2 "immediate_operand")]
2365  "VECTOR_UNIT_VSX_P (V2DFmode)"
2366{
2367  rtx op0 = operands[0];
2368  rtx op1 = operands[1];
2369  rtx tmp;
2370  int scale = INTVAL (operands[2]);
2371  if (scale == 0)
2372    tmp = op1;
2373  else
2374    {
2375      tmp = gen_reg_rtx (V2DFmode);
2376      rs6000_scale_v2df (tmp, op1, scale);
2377    }
2378  emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2379  DONE;
2380})
2381
2382;; convert vector of 32-bit floating point numbers to vector of
2383;; 32-bit unsigned integer
2384(define_insn "vsx_xvcvspuxws"
2385  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2386	(unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2387		     UNSPEC_VSX_XVCVSPSXWS))]
2388  "VECTOR_UNIT_VSX_P (V4SFmode)"
2389  "xvcvspuxws %x0,%x1"
2390  [(set_attr "type" "vecfloat")])
2391
2392(define_insn "vsx_xvcvdpuxds"
2393  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2394        (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2395                     UNSPEC_VSX_XVCVDPUXDS))]
2396  "VECTOR_UNIT_VSX_P (V2DFmode)"
2397  "xvcvdpuxds %x0,%x1"
2398  [(set_attr "type" "vecdouble")])
2399
2400;; Convert from 64-bit to 32-bit types
2401;; Note, favor the Altivec registers since the usual use of these instructions
2402;; is in vector converts and we need to use the Altivec vperm instruction.
2403
2404(define_insn "vsx_xvcvdpsxws"
2405  [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2406	(unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2407		     UNSPEC_VSX_CVDPSXWS))]
2408  "VECTOR_UNIT_VSX_P (V2DFmode)"
2409  "xvcvdpsxws %x0,%x1"
2410  [(set_attr "type" "vecdouble")])
2411
2412(define_insn "vsx_xvcvdpuxws"
2413  [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2414	(unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2415		     UNSPEC_VSX_CVDPUXWS))]
2416  "VECTOR_UNIT_VSX_P (V2DFmode)"
2417  "xvcvdpuxws %x0,%x1"
2418  [(set_attr "type" "vecdouble")])
2419
2420(define_insn "vsx_xvcvsxdsp"
2421  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2422	(unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2423		     UNSPEC_VSX_CVSXDSP))]
2424  "VECTOR_UNIT_VSX_P (V2DFmode)"
2425  "xvcvsxdsp %x0,%x1"
2426  [(set_attr "type" "vecfloat")])
2427
2428(define_insn "vsx_xvcvuxdsp"
2429  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2430	(unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2431		     UNSPEC_VSX_CVUXDSP))]
2432  "VECTOR_UNIT_VSX_P (V2DFmode)"
2433  "xvcvuxdsp %x0,%x1"
2434  [(set_attr "type" "vecdouble")])
2435
2436(define_insn "vsx_xvcdpsp"
2437  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2438	(unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
2439		     UNSPEC_VSX_XVCDPSP))]
2440  "VECTOR_UNIT_VSX_P (V2DFmode)"
2441  "xvcvdpsp %x0,%x1"
2442  [(set_attr "type" "vecdouble")])
2443
2444;; Convert from 32-bit to 64-bit types
2445;; Provide both vector and scalar targets
2446(define_insn "vsx_xvcvsxwdp"
2447  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2448	(unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2449		     UNSPEC_VSX_CVSXWDP))]
2450  "VECTOR_UNIT_VSX_P (V2DFmode)"
2451  "xvcvsxwdp %x0,%x1"
2452  [(set_attr "type" "vecdouble")])
2453
2454(define_insn "vsx_xvcvsxwdp_df"
2455  [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2456	(unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2457		   UNSPEC_VSX_CVSXWDP))]
2458  "TARGET_VSX"
2459  "xvcvsxwdp %x0,%x1"
2460  [(set_attr "type" "vecdouble")])
2461
2462(define_insn "vsx_xvcvuxwdp"
2463  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2464	(unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2465		     UNSPEC_VSX_CVUXWDP))]
2466  "VECTOR_UNIT_VSX_P (V2DFmode)"
2467  "xvcvuxwdp %x0,%x1"
2468  [(set_attr "type" "vecdouble")])
2469
2470(define_insn "vsx_xvcvuxwdp_df"
2471  [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2472	(unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2473		   UNSPEC_VSX_CVUXWDP))]
2474  "TARGET_VSX"
2475  "xvcvuxwdp %x0,%x1"
2476  [(set_attr "type" "vecdouble")])
2477
2478(define_insn "vsx_xvcvspsxds"
2479  [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2480	(unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2481		     UNSPEC_VSX_CVSPSXDS))]
2482  "VECTOR_UNIT_VSX_P (V2DFmode)"
2483  "xvcvspsxds %x0,%x1"
2484  [(set_attr "type" "vecdouble")])
2485
2486(define_insn "vsx_xvcvspuxds"
2487  [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2488	(unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2489		     UNSPEC_VSX_CVSPUXDS))]
2490  "VECTOR_UNIT_VSX_P (V2DFmode)"
2491  "xvcvspuxds %x0,%x1"
2492  [(set_attr "type" "vecdouble")])
2493
2494(define_insn "vsx_xvcvsxwsp"
2495  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2496	(unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2497		     UNSPEC_VSX_CVSXWSP))]
2498  "VECTOR_UNIT_VSX_P (V4SFmode)"
2499  "xvcvsxwsp %x0,%x1"
2500  [(set_attr "type" "vecfloat")])
2501
2502(define_insn "vsx_xvcvuxwsp"
2503  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2504	(unspec:V4SF[(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2505		    UNSPEC_VSX_CVUXWSP))]
2506  "VECTOR_UNIT_VSX_P (V4SFmode)"
2507  "xvcvuxwsp %x0,%x1"
2508  [(set_attr "type" "vecfloat")])
2509
2510;; Generate float2 double
2511;; convert two double to float
2512(define_expand "float2_v2df"
2513  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2514   (use (match_operand:V2DF 1 "register_operand" "wa"))
2515   (use (match_operand:V2DF 2 "register_operand" "wa"))]
2516 "VECTOR_UNIT_VSX_P (V4SFmode)"
2517{
2518  rtx rtx_src1, rtx_src2, rtx_dst;
2519
2520  rtx_dst = operands[0];
2521  rtx_src1 = operands[1];
2522  rtx_src2 = operands[2];
2523
2524  rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2525  DONE;
2526})
2527
2528;; Generate float2
2529;; convert two long long signed ints to float
2530(define_expand "float2_v2di"
2531  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2532   (use (match_operand:V2DI 1 "register_operand" "wa"))
2533   (use (match_operand:V2DI 2 "register_operand" "wa"))]
2534 "VECTOR_UNIT_VSX_P (V4SFmode)"
2535{
2536  rtx rtx_src1, rtx_src2, rtx_dst;
2537
2538  rtx_dst = operands[0];
2539  rtx_src1 = operands[1];
2540  rtx_src2 = operands[2];
2541
2542  rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2543  DONE;
2544})
2545
2546;; Generate uns_float2
2547;; convert two long long unsigned ints to float
2548(define_expand "uns_float2_v2di"
2549  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2550   (use (match_operand:V2DI 1 "register_operand" "wa"))
2551   (use (match_operand:V2DI 2 "register_operand" "wa"))]
2552 "VECTOR_UNIT_VSX_P (V4SFmode)"
2553{
2554  rtx rtx_src1, rtx_src2, rtx_dst;
2555
2556  rtx_dst = operands[0];
2557  rtx_src1 = operands[1];
2558  rtx_src2 = operands[2];
2559
2560  rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2561  DONE;
2562})
2563
2564;; Generate floate
2565;; convert  double or long long signed to float
2566;; (Only even words are valid, BE numbering)
2567(define_expand "floate<mode>"
2568  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2569   (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2570  "VECTOR_UNIT_VSX_P (V4SFmode)"
2571{
2572  if (BYTES_BIG_ENDIAN)
2573    {
2574      /* Shift left one word to put even word correct location */
2575      rtx rtx_tmp;
2576      rtx rtx_val = GEN_INT (4);
2577
2578      rtx_tmp = gen_reg_rtx (V4SFmode);
2579      emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2580      emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2581		 rtx_tmp, rtx_tmp, rtx_val));
2582    }
2583  else
2584    emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2585
2586  DONE;
2587})
2588
2589;; Generate uns_floate
2590;; convert long long unsigned to float
2591;; (Only even words are valid, BE numbering)
2592(define_expand "unsfloatev2di"
2593  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2594   (use (match_operand:V2DI 1 "register_operand" "wa"))]
2595  "VECTOR_UNIT_VSX_P (V4SFmode)"
2596{
2597  if (BYTES_BIG_ENDIAN)
2598    {
2599      /* Shift left one word to put even word correct location */
2600      rtx rtx_tmp;
2601      rtx rtx_val = GEN_INT (4);
2602
2603      rtx_tmp = gen_reg_rtx (V4SFmode);
2604      emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2605      emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2606		 rtx_tmp, rtx_tmp, rtx_val));
2607    }
2608  else
2609    emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2610
2611  DONE;
2612})
2613
2614;; Generate floato
2615;; convert double or long long signed to float
2616;; Only odd words are valid, BE numbering)
2617(define_expand "floato<mode>"
2618  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2619   (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2620  "VECTOR_UNIT_VSX_P (V4SFmode)"
2621{
2622  if (BYTES_BIG_ENDIAN)
2623    emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2624  else
2625    {
2626      /* Shift left one word to put odd word correct location */
2627      rtx rtx_tmp;
2628      rtx rtx_val = GEN_INT (4);
2629
2630      rtx_tmp = gen_reg_rtx (V4SFmode);
2631      emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2632      emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2633		 rtx_tmp, rtx_tmp, rtx_val));
2634    }
2635  DONE;
2636})
2637
2638;; Generate uns_floato
2639;; convert long long unsigned to float
2640;; (Only odd words are valid, BE numbering)
2641(define_expand "unsfloatov2di"
2642 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2643  (use (match_operand:V2DI 1 "register_operand" "wa"))]
2644 "VECTOR_UNIT_VSX_P (V4SFmode)"
2645{
2646  if (BYTES_BIG_ENDIAN)
2647    emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2648  else
2649    {
2650      /* Shift left one word to put odd word correct location */
2651      rtx rtx_tmp;
2652      rtx rtx_val = GEN_INT (4);
2653
2654      rtx_tmp = gen_reg_rtx (V4SFmode);
2655      emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2656      emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2657		 rtx_tmp, rtx_tmp, rtx_val));
2658    }
2659  DONE;
2660})
2661
2662;; Generate vsigned2
2663;; convert two double float vectors to a vector of single precision ints
2664(define_expand "vsigned2_v2df"
2665  [(match_operand:V4SI 0 "register_operand" "=wa")
2666   (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2667		 (match_operand:V2DF 2 "register_operand" "wa")]
2668  UNSPEC_VSX_VSIGNED2)]
2669  "TARGET_VSX"
2670{
2671  rtx rtx_src1, rtx_src2, rtx_dst;
2672  bool signed_convert=true;
2673
2674  rtx_dst = operands[0];
2675  rtx_src1 = operands[1];
2676  rtx_src2 = operands[2];
2677
2678  rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2679  DONE;
2680})
2681
2682;; Generate vsignedo_v2df
2683;; signed double float to int convert odd word
2684(define_expand "vsignedo_v2df"
2685  [(set (match_operand:V4SI 0 "register_operand" "=wa")
2686	(match_operand:V2DF 1 "register_operand" "wa"))]
2687  "TARGET_VSX"
2688{
2689  if (BYTES_BIG_ENDIAN)
2690    {
2691      rtx rtx_tmp;
2692      rtx rtx_val = GEN_INT (12);
2693      rtx_tmp = gen_reg_rtx (V4SImode);
2694
2695      emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2696
2697      /* Big endian word numbering for words in operand is 0 1 2 3.
2698	 take (operand[1] operand[1]) and shift left one word
2699	 0 1 2 3    0 1 2 3  =>  1 2 3 0
2700	 Words 1 and 3 are now are now where they need to be for result.  */
2701
2702      emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2703		 rtx_tmp, rtx_val));
2704    }
2705  else
2706    /* Little endian word numbering for operand is 3 2 1 0.
2707       Result words 3 and 1 are where they need to be.  */
2708    emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2709
2710  DONE;
2711}
2712  [(set_attr "type" "veccomplex")])
2713
2714;; Generate vsignede_v2df
2715;; signed double float to int even word
2716(define_expand "vsignede_v2df"
2717  [(set (match_operand:V4SI 0 "register_operand" "=v")
2718	(match_operand:V2DF 1 "register_operand" "v"))]
2719  "TARGET_VSX"
2720{
2721  if (BYTES_BIG_ENDIAN)
2722    /* Big endian word numbering for words in operand is 0 1
2723       Result words 0 is where they need to be.  */
2724    emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2725
2726  else
2727    {
2728      rtx rtx_tmp;
2729      rtx rtx_val = GEN_INT (12);
2730      rtx_tmp = gen_reg_rtx (V4SImode);
2731
2732      emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2733
2734      /* Little endian word numbering for operand is 3 2 1 0.
2735	 take (operand[1] operand[1]) and shift left three words
2736	 0 1 2 3   0 1 2 3  =>  3 0 1 2
2737	 Words 0 and 2 are now where they need to be for the result.  */
2738      emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2739		 rtx_tmp, rtx_val));
2740    }
2741  DONE;
2742}
2743  [(set_attr "type" "veccomplex")])
2744
2745;; Generate unsigned2
2746;; convert two double float vectors to a vector of single precision
2747;; unsigned ints
2748(define_expand "vunsigned2_v2df"
2749[(match_operand:V4SI 0 "register_operand" "=v")
2750 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2751	       (match_operand:V2DF 2 "register_operand" "v")]
2752	      UNSPEC_VSX_VSIGNED2)]
2753 "TARGET_VSX"
2754{
2755  rtx rtx_src1, rtx_src2, rtx_dst;
2756  bool signed_convert=false;
2757
2758  rtx_dst = operands[0];
2759  rtx_src1 = operands[1];
2760  rtx_src2 = operands[2];
2761
2762  rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2763  DONE;
2764})
2765
2766;; Generate vunsignedo_v2df
2767;; unsigned double float to int convert odd word
2768(define_expand "vunsignedo_v2df"
2769  [(set (match_operand:V4SI 0 "register_operand" "=v")
2770	(match_operand:V2DF 1 "register_operand" "v"))]
2771  "TARGET_VSX"
2772{
2773  if (BYTES_BIG_ENDIAN)
2774    {
2775      rtx rtx_tmp;
2776      rtx rtx_val = GEN_INT (12);
2777      rtx_tmp = gen_reg_rtx (V4SImode);
2778
2779      emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2780
2781      /* Big endian word numbering for words in operand is 0 1 2 3.
2782	 take (operand[1] operand[1]) and shift left one word
2783	 0 1 2 3    0 1 2 3  =>  1 2 3 0
2784	 Words 1 and 3 are now are now where they need to be for result.  */
2785
2786      emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2787		 rtx_tmp, rtx_val));
2788    }
2789  else
2790    /* Little endian word numbering for operand is 3 2 1 0.
2791       Result words 3 and 1 are where they need to be.  */
2792    emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2793
2794  DONE;
2795}
2796  [(set_attr "type" "veccomplex")])
2797
2798;; Generate vunsignede_v2df
2799;; unsigned double float to int even word
2800(define_expand "vunsignede_v2df"
2801  [(set (match_operand:V4SI 0 "register_operand" "=v")
2802	(match_operand:V2DF 1 "register_operand" "v"))]
2803  "TARGET_VSX"
2804{
2805  if (BYTES_BIG_ENDIAN)
2806    /* Big endian word numbering for words in operand is 0 1
2807       Result words 0 is where they need to be.  */
2808    emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2809
2810  else
2811    {
2812      rtx rtx_tmp;
2813      rtx rtx_val = GEN_INT (12);
2814      rtx_tmp = gen_reg_rtx (V4SImode);
2815
2816      emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2817
2818      /* Little endian word numbering for operand is 3 2 1 0.
2819	 take (operand[1] operand[1]) and shift left three words
2820	 0 1 2 3   0 1 2 3  =>  3 0 1 2
2821	 Words 0 and 2 are now where they need to be for the result.  */
2822      emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2823		 rtx_tmp, rtx_val));
2824    }
2825  DONE;
2826}
2827  [(set_attr "type" "veccomplex")])
2828
2829;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2830;; since the xvrdpiz instruction does not truncate the value if the floating
2831;; point value is < LONG_MIN or > LONG_MAX.
2832(define_insn "*vsx_float_fix_v2df2"
2833  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2834	(float:V2DF
2835	 (fix:V2DI
2836	  (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]
2837  "TARGET_HARD_FLOAT
2838   && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2839   && !flag_trapping_math && TARGET_FRIZ"
2840  "xvrdpiz %x0,%x1"
2841  [(set_attr "type" "vecdouble")])
2842
2843
2844;; Permute operations
2845
2846;; Build a V2DF/V2DI vector from two scalars
2847(define_insn "vsx_concat_<mode>"
2848  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2849	(vec_concat:VSX_D
2850	 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2851	 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2852  "VECTOR_MEM_VSX_P (<MODE>mode)"
2853{
2854  if (which_alternative == 0)
2855    return (BYTES_BIG_ENDIAN
2856	    ? "xxpermdi %x0,%x1,%x2,0"
2857	    : "xxpermdi %x0,%x2,%x1,0");
2858
2859  else if (which_alternative == 1)
2860    return (BYTES_BIG_ENDIAN
2861	    ? "mtvsrdd %x0,%1,%2"
2862	    : "mtvsrdd %x0,%2,%1");
2863
2864  else
2865    gcc_unreachable ();
2866}
2867  [(set_attr "type" "vecperm")])
2868
2869;; Combiner patterns to allow creating XXPERMDI's to access either double
2870;; word element in a vector register.
2871(define_insn "*vsx_concat_<mode>_1"
2872  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2873	(vec_concat:VSX_D
2874	 (vec_select:<VS_scalar>
2875	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2876	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2877	 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2878  "VECTOR_MEM_VSX_P (<MODE>mode)"
2879{
2880  HOST_WIDE_INT dword = INTVAL (operands[2]);
2881  if (BYTES_BIG_ENDIAN)
2882    {
2883      operands[4] = GEN_INT (2*dword);
2884      return "xxpermdi %x0,%x1,%x3,%4";
2885    }
2886  else
2887    {
2888      operands[4] = GEN_INT (!dword);
2889      return "xxpermdi %x0,%x3,%x1,%4";
2890    }
2891}
2892  [(set_attr "type" "vecperm")])
2893
2894(define_insn "*vsx_concat_<mode>_2"
2895  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2896	(vec_concat:VSX_D
2897	 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2898	 (vec_select:<VS_scalar>
2899	  (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2900	  (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2901  "VECTOR_MEM_VSX_P (<MODE>mode)"
2902{
2903  HOST_WIDE_INT dword = INTVAL (operands[3]);
2904  if (BYTES_BIG_ENDIAN)
2905    {
2906      operands[4] = GEN_INT (dword);
2907      return "xxpermdi %x0,%x1,%x2,%4";
2908    }
2909  else
2910    {
2911      operands[4] = GEN_INT (2 * !dword);
2912      return "xxpermdi %x0,%x2,%x1,%4";
2913    }
2914}
2915  [(set_attr "type" "vecperm")])
2916
2917(define_insn "*vsx_concat_<mode>_3"
2918  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2919	(vec_concat:VSX_D
2920	 (vec_select:<VS_scalar>
2921	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2922	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2923	 (vec_select:<VS_scalar>
2924	  (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2925	  (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2926  "VECTOR_MEM_VSX_P (<MODE>mode)"
2927{
2928  HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2929  HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2930  if (BYTES_BIG_ENDIAN)
2931    {
2932      operands[5] = GEN_INT ((2 * dword1) + dword2);
2933      return "xxpermdi %x0,%x1,%x3,%5";
2934    }
2935  else
2936    {
2937      operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2938      return "xxpermdi %x0,%x3,%x1,%5";
2939    }
2940}
2941  [(set_attr "type" "vecperm")])
2942
2943;; Special purpose concat using xxpermdi to glue two single precision values
2944;; together, relying on the fact that internally scalar floats are represented
2945;; as doubles.  This is used to initialize a V4SF vector with 4 floats
2946(define_insn "vsx_concat_v2sf"
2947  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2948	(unspec:V2DF
2949	 [(match_operand:SF 1 "vsx_register_operand" "ww")
2950	  (match_operand:SF 2 "vsx_register_operand" "ww")]
2951	 UNSPEC_VSX_CONCAT))]
2952  "VECTOR_MEM_VSX_P (V2DFmode)"
2953{
2954  if (BYTES_BIG_ENDIAN)
2955    return "xxpermdi %x0,%x1,%x2,0";
2956  else
2957    return "xxpermdi %x0,%x2,%x1,0";
2958}
2959  [(set_attr "type" "vecperm")])
2960
2961;; Concatenate 4 SImode elements into a V4SImode reg.
2962(define_expand "vsx_init_v4si"
2963  [(use (match_operand:V4SI 0 "gpc_reg_operand"))
2964   (use (match_operand:SI 1 "gpc_reg_operand"))
2965   (use (match_operand:SI 2 "gpc_reg_operand"))
2966   (use (match_operand:SI 3 "gpc_reg_operand"))
2967   (use (match_operand:SI 4 "gpc_reg_operand"))]
2968   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2969{
2970  rtx a = gen_reg_rtx (DImode);
2971  rtx b = gen_reg_rtx (DImode);
2972  rtx c = gen_reg_rtx (DImode);
2973  rtx d = gen_reg_rtx (DImode);
2974  emit_insn (gen_zero_extendsidi2 (a, operands[1]));
2975  emit_insn (gen_zero_extendsidi2 (b, operands[2]));
2976  emit_insn (gen_zero_extendsidi2 (c, operands[3]));
2977  emit_insn (gen_zero_extendsidi2 (d, operands[4]));
2978  if (!BYTES_BIG_ENDIAN)
2979    {
2980      std::swap (a, b);
2981      std::swap (c, d);
2982    }
2983
2984  rtx aa = gen_reg_rtx (DImode);
2985  rtx ab = gen_reg_rtx (DImode);
2986  rtx cc = gen_reg_rtx (DImode);
2987  rtx cd = gen_reg_rtx (DImode);
2988  emit_insn (gen_ashldi3 (aa, a, GEN_INT (32)));
2989  emit_insn (gen_ashldi3 (cc, c, GEN_INT (32)));
2990  emit_insn (gen_iordi3 (ab, aa, b));
2991  emit_insn (gen_iordi3 (cd, cc, d));
2992
2993  rtx abcd = gen_reg_rtx (V2DImode);
2994  emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
2995  emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd));
2996  DONE;
2997})
2998
2999;; xxpermdi for little endian loads and stores.  We need several of
3000;; these since the form of the PARALLEL differs by mode.
3001(define_insn "*vsx_xxpermdi2_le_<mode>"
3002  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
3003        (vec_select:VSX_D
3004          (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
3005          (parallel [(const_int 1) (const_int 0)])))]
3006  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3007  "xxpermdi %x0,%x1,%x1,2"
3008  [(set_attr "type" "vecperm")])
3009
3010(define_insn "*vsx_xxpermdi4_le_<mode>"
3011  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3012        (vec_select:VSX_W
3013          (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3014          (parallel [(const_int 2) (const_int 3)
3015                     (const_int 0) (const_int 1)])))]
3016  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3017  "xxpermdi %x0,%x1,%x1,2"
3018  [(set_attr "type" "vecperm")])
3019
3020(define_insn "*vsx_xxpermdi8_le_V8HI"
3021  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3022        (vec_select:V8HI
3023          (match_operand:V8HI 1 "vsx_register_operand" "wa")
3024          (parallel [(const_int 4) (const_int 5)
3025                     (const_int 6) (const_int 7)
3026                     (const_int 0) (const_int 1)
3027                     (const_int 2) (const_int 3)])))]
3028  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
3029  "xxpermdi %x0,%x1,%x1,2"
3030  [(set_attr "type" "vecperm")])
3031
3032(define_insn "*vsx_xxpermdi16_le_V16QI"
3033  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3034        (vec_select:V16QI
3035          (match_operand:V16QI 1 "vsx_register_operand" "wa")
3036          (parallel [(const_int 8) (const_int 9)
3037                     (const_int 10) (const_int 11)
3038                     (const_int 12) (const_int 13)
3039                     (const_int 14) (const_int 15)
3040                     (const_int 0) (const_int 1)
3041                     (const_int 2) (const_int 3)
3042                     (const_int 4) (const_int 5)
3043                     (const_int 6) (const_int 7)])))]
3044  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
3045  "xxpermdi %x0,%x1,%x1,2"
3046  [(set_attr "type" "vecperm")])
3047
3048;; lxvd2x for little endian loads.  We need several of
3049;; these since the form of the PARALLEL differs by mode.
3050(define_insn "*vsx_lxvd2x2_le_<mode>"
3051  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
3052        (vec_select:VSX_D
3053          (match_operand:VSX_D 1 "memory_operand" "Z")
3054          (parallel [(const_int 1) (const_int 0)])))]
3055  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3056  "lxvd2x %x0,%y1"
3057  [(set_attr "type" "vecload")])
3058
3059(define_insn "*vsx_lxvd2x4_le_<mode>"
3060  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3061        (vec_select:VSX_W
3062          (match_operand:VSX_W 1 "memory_operand" "Z")
3063          (parallel [(const_int 2) (const_int 3)
3064                     (const_int 0) (const_int 1)])))]
3065  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3066  "lxvd2x %x0,%y1"
3067  [(set_attr "type" "vecload")])
3068
3069(define_insn "*vsx_lxvd2x8_le_V8HI"
3070  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3071        (vec_select:V8HI
3072          (match_operand:V8HI 1 "memory_operand" "Z")
3073          (parallel [(const_int 4) (const_int 5)
3074                     (const_int 6) (const_int 7)
3075                     (const_int 0) (const_int 1)
3076                     (const_int 2) (const_int 3)])))]
3077  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3078  "lxvd2x %x0,%y1"
3079  [(set_attr "type" "vecload")])
3080
3081(define_insn "*vsx_lxvd2x16_le_V16QI"
3082  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3083        (vec_select:V16QI
3084          (match_operand:V16QI 1 "memory_operand" "Z")
3085          (parallel [(const_int 8) (const_int 9)
3086                     (const_int 10) (const_int 11)
3087                     (const_int 12) (const_int 13)
3088                     (const_int 14) (const_int 15)
3089                     (const_int 0) (const_int 1)
3090                     (const_int 2) (const_int 3)
3091                     (const_int 4) (const_int 5)
3092                     (const_int 6) (const_int 7)])))]
3093  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3094  "lxvd2x %x0,%y1"
3095  [(set_attr "type" "vecload")])
3096
3097;; stxvd2x for little endian stores.  We need several of
3098;; these since the form of the PARALLEL differs by mode.
3099(define_insn "*vsx_stxvd2x2_le_<mode>"
3100  [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3101        (vec_select:VSX_D
3102          (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
3103          (parallel [(const_int 1) (const_int 0)])))]
3104  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3105  "stxvd2x %x1,%y0"
3106  [(set_attr "type" "vecstore")])
3107
3108(define_insn "*vsx_stxvd2x4_le_<mode>"
3109  [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3110        (vec_select:VSX_W
3111          (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3112          (parallel [(const_int 2) (const_int 3)
3113                     (const_int 0) (const_int 1)])))]
3114  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3115  "stxvd2x %x1,%y0"
3116  [(set_attr "type" "vecstore")])
3117
3118(define_insn "*vsx_stxvd2x8_le_V8HI"
3119  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3120        (vec_select:V8HI
3121          (match_operand:V8HI 1 "vsx_register_operand" "wa")
3122          (parallel [(const_int 4) (const_int 5)
3123                     (const_int 6) (const_int 7)
3124                     (const_int 0) (const_int 1)
3125                     (const_int 2) (const_int 3)])))]
3126  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3127  "stxvd2x %x1,%y0"
3128  [(set_attr "type" "vecstore")])
3129
3130(define_insn "*vsx_stxvd2x16_le_V16QI"
3131  [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3132        (vec_select:V16QI
3133          (match_operand:V16QI 1 "vsx_register_operand" "wa")
3134          (parallel [(const_int 8) (const_int 9)
3135                     (const_int 10) (const_int 11)
3136                     (const_int 12) (const_int 13)
3137                     (const_int 14) (const_int 15)
3138                     (const_int 0) (const_int 1)
3139                     (const_int 2) (const_int 3)
3140                     (const_int 4) (const_int 5)
3141                     (const_int 6) (const_int 7)])))]
3142  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3143  "stxvd2x %x1,%y0"
3144  [(set_attr "type" "vecstore")])
3145
3146;; Convert a TImode value into V1TImode
3147(define_expand "vsx_set_v1ti"
3148  [(match_operand:V1TI 0 "nonimmediate_operand")
3149   (match_operand:V1TI 1 "nonimmediate_operand")
3150   (match_operand:TI 2 "input_operand")
3151   (match_operand:QI 3 "u5bit_cint_operand")]
3152  "VECTOR_MEM_VSX_P (V1TImode)"
3153{
3154  if (operands[3] != const0_rtx)
3155    gcc_unreachable ();
3156
3157  emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3158  DONE;
3159})
3160
3161;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3162(define_expand "vsx_set_<mode>"
3163  [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3164   (use (match_operand:VSX_D 1 "vsx_register_operand"))
3165   (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3166   (use (match_operand:QI 3 "const_0_to_1_operand"))]
3167  "VECTOR_MEM_VSX_P (<MODE>mode)"
3168{
3169  rtx dest = operands[0];
3170  rtx vec_reg = operands[1];
3171  rtx value = operands[2];
3172  rtx ele = operands[3];
3173  rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3174
3175  if (ele == const0_rtx)
3176    {
3177      emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3178      emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3179      DONE;
3180    }
3181  else if (ele == const1_rtx)
3182    {
3183      emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3184      emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3185      DONE;
3186    }
3187  else
3188    gcc_unreachable ();
3189})
3190
3191;; Extract a DF/DI element from V2DF/V2DI
3192;; Optimize cases were we can do a simple or direct move.
3193;; Or see if we can avoid doing the move at all
3194
3195;; There are some unresolved problems with reload that show up if an Altivec
3196;; register was picked.  Limit the scalar value to FPRs for now.
3197
3198(define_insn "vsx_extract_<mode>"
3199  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d,    d,     wr, wr")
3200
3201	(vec_select:<VS_scalar>
3202	 (match_operand:VSX_D 1 "gpc_reg_operand"      "<VSa>, <VSa>, wm, wo")
3203
3204	 (parallel
3205	  [(match_operand:QI 2 "const_0_to_1_operand"  "wD,    n,     wD, n")])))]
3206  "VECTOR_MEM_VSX_P (<MODE>mode)"
3207{
3208  int element = INTVAL (operands[2]);
3209  int op0_regno = REGNO (operands[0]);
3210  int op1_regno = REGNO (operands[1]);
3211  int fldDM;
3212
3213  gcc_assert (IN_RANGE (element, 0, 1));
3214  gcc_assert (VSX_REGNO_P (op1_regno));
3215
3216  if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3217    {
3218      if (op0_regno == op1_regno)
3219	return ASM_COMMENT_START " vec_extract to same register";
3220
3221      else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3222	       && TARGET_POWERPC64)
3223	return "mfvsrd %0,%x1";
3224
3225      else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3226	return "fmr %0,%1";
3227
3228      else if (VSX_REGNO_P (op0_regno))
3229	return "xxlor %x0,%x1,%x1";
3230
3231      else
3232	gcc_unreachable ();
3233    }
3234
3235  else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3236	   && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
3237    return "mfvsrld %0,%x1";
3238
3239  else if (VSX_REGNO_P (op0_regno))
3240    {
3241      fldDM = element << 1;
3242      if (!BYTES_BIG_ENDIAN)
3243	fldDM = 3 - fldDM;
3244      operands[3] = GEN_INT (fldDM);
3245      return "xxpermdi %x0,%x1,%x1,%3";
3246    }
3247
3248  else
3249    gcc_unreachable ();
3250}
3251  [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")])
3252
3253;; Optimize extracting a single scalar element from memory.
3254(define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3255  [(set (match_operand:<VS_scalar> 0 "register_operand" "=<VSX_D:VS_64reg>,wr")
3256	(vec_select:<VSX_D:VS_scalar>
3257	 (match_operand:VSX_D 1 "memory_operand" "m,m")
3258	 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3259   (clobber (match_scratch:P 3 "=&b,&b"))]
3260  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3261  "#"
3262  "&& reload_completed"
3263  [(set (match_dup 0) (match_dup 4))]
3264{
3265  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3266					   operands[3], <VSX_D:VS_scalar>mode);
3267}
3268  [(set_attr "type" "fpload,load")
3269   (set_attr "length" "8")])
3270
3271;; Optimize storing a single scalar element that is the right location to
3272;; memory
3273(define_insn "*vsx_extract_<mode>_store"
3274  [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
3275	(vec_select:<VS_scalar>
3276	 (match_operand:VSX_D 1 "register_operand" "d,wv,wb")
3277	 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3278  "VECTOR_MEM_VSX_P (<MODE>mode)"
3279  "@
3280   stfd%U0%X0 %1,%0
3281   stxsdx %x1,%y0
3282   stxsd %1,%0"
3283  [(set_attr "type" "fpstore")])
3284
3285;; Variable V2DI/V2DF extract shift
3286(define_insn "vsx_vslo_<mode>"
3287  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3288	(unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3289			     (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3290			    UNSPEC_VSX_VSLO))]
3291  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3292  "vslo %0,%1,%2"
3293  [(set_attr "type" "vecperm")])
3294
3295;; Variable V2DI/V2DF extract
3296(define_insn_and_split "vsx_extract_<mode>_var"
3297  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,<VSa>,r")
3298	(unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m")
3299			     (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3300			    UNSPEC_VSX_EXTRACT))
3301   (clobber (match_scratch:DI 3 "=r,&b,&b"))
3302   (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3303  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3304  "#"
3305  "&& reload_completed"
3306  [(const_int 0)]
3307{
3308  rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3309				operands[3], operands[4]);
3310  DONE;
3311})
3312
3313;; Extract a SF element from V4SF
3314(define_insn_and_split "vsx_extract_v4sf"
3315  [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
3316	(vec_select:SF
3317	 (match_operand:V4SF 1 "vsx_register_operand" "wa")
3318	 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3319   (clobber (match_scratch:V4SF 3 "=0"))]
3320  "VECTOR_UNIT_VSX_P (V4SFmode)"
3321  "#"
3322  "&& 1"
3323  [(const_int 0)]
3324{
3325  rtx op0 = operands[0];
3326  rtx op1 = operands[1];
3327  rtx op2 = operands[2];
3328  rtx op3 = operands[3];
3329  rtx tmp;
3330  HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3331
3332  if (ele == 0)
3333    tmp = op1;
3334  else
3335    {
3336      if (GET_CODE (op3) == SCRATCH)
3337	op3 = gen_reg_rtx (V4SFmode);
3338      emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3339      tmp = op3;
3340    }
3341  emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3342  DONE;
3343}
3344  [(set_attr "length" "8")
3345   (set_attr "type" "fp")])
3346
3347(define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3348  [(set (match_operand:SF 0 "register_operand" "=f,wv,wb,?r")
3349	(vec_select:SF
3350	 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3351	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3352   (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3353  "VECTOR_MEM_VSX_P (V4SFmode)"
3354  "#"
3355  "&& reload_completed"
3356  [(set (match_dup 0) (match_dup 4))]
3357{
3358  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3359					   operands[3], SFmode);
3360}
3361  [(set_attr "type" "fpload,fpload,fpload,load")
3362   (set_attr "length" "8")])
3363
3364;; Variable V4SF extract
3365(define_insn_and_split "vsx_extract_v4sf_var"
3366  [(set (match_operand:SF 0 "gpc_reg_operand" "=ww,ww,?r")
3367	(unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m")
3368		    (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3369		   UNSPEC_VSX_EXTRACT))
3370   (clobber (match_scratch:DI 3 "=r,&b,&b"))
3371   (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3372  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3373  "#"
3374  "&& reload_completed"
3375  [(const_int 0)]
3376{
3377  rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3378				operands[3], operands[4]);
3379  DONE;
3380})
3381
3382;; Expand the builtin form of xxpermdi to canonical rtl.
3383(define_expand "vsx_xxpermdi_<mode>"
3384  [(match_operand:VSX_L 0 "vsx_register_operand")
3385   (match_operand:VSX_L 1 "vsx_register_operand")
3386   (match_operand:VSX_L 2 "vsx_register_operand")
3387   (match_operand:QI 3 "u5bit_cint_operand")]
3388  "VECTOR_MEM_VSX_P (<MODE>mode)"
3389{
3390  rtx target = operands[0];
3391  rtx op0 = operands[1];
3392  rtx op1 = operands[2];
3393  int mask = INTVAL (operands[3]);
3394  rtx perm0 = GEN_INT ((mask >> 1) & 1);
3395  rtx perm1 = GEN_INT ((mask & 1) + 2);
3396  rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3397
3398  if (<MODE>mode == V2DFmode)
3399    gen = gen_vsx_xxpermdi2_v2df_1;
3400  else
3401    {
3402      gen = gen_vsx_xxpermdi2_v2di_1;
3403      if (<MODE>mode != V2DImode)
3404	{
3405	  target = gen_lowpart (V2DImode, target);
3406	  op0 = gen_lowpart (V2DImode, op0);
3407	  op1 = gen_lowpart (V2DImode, op1);
3408	}
3409    }
3410  emit_insn (gen (target, op0, op1, perm0, perm1));
3411  DONE;
3412})
3413
3414;; Special version of xxpermdi that retains big-endian semantics.
3415(define_expand "vsx_xxpermdi_<mode>_be"
3416  [(match_operand:VSX_L 0 "vsx_register_operand")
3417   (match_operand:VSX_L 1 "vsx_register_operand")
3418   (match_operand:VSX_L 2 "vsx_register_operand")
3419   (match_operand:QI 3 "u5bit_cint_operand")]
3420  "VECTOR_MEM_VSX_P (<MODE>mode)"
3421{
3422  rtx target = operands[0];
3423  rtx op0 = operands[1];
3424  rtx op1 = operands[2];
3425  int mask = INTVAL (operands[3]);
3426  rtx perm0 = GEN_INT ((mask >> 1) & 1);
3427  rtx perm1 = GEN_INT ((mask & 1) + 2);
3428  rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3429
3430  if (<MODE>mode == V2DFmode)
3431    gen = gen_vsx_xxpermdi2_v2df_1;
3432  else
3433    {
3434      gen = gen_vsx_xxpermdi2_v2di_1;
3435      if (<MODE>mode != V2DImode)
3436	{
3437	  target = gen_lowpart (V2DImode, target);
3438	  op0 = gen_lowpart (V2DImode, op0);
3439	  op1 = gen_lowpart (V2DImode, op1);
3440	}
3441    }
3442  /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3443     transformation we don't want; it is necessary for
3444     rs6000_expand_vec_perm_const_1 but not for this use.  So we
3445     prepare for that by reversing the transformation here.  */
3446  if (BYTES_BIG_ENDIAN)
3447    emit_insn (gen (target, op0, op1, perm0, perm1));
3448  else
3449    {
3450      rtx p0 = GEN_INT (3 - INTVAL (perm1));
3451      rtx p1 = GEN_INT (3 - INTVAL (perm0));
3452      emit_insn (gen (target, op1, op0, p0, p1));
3453    }
3454  DONE;
3455})
3456
3457(define_insn "vsx_xxpermdi2_<mode>_1"
3458  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
3459	(vec_select:VSX_D
3460	  (vec_concat:<VS_double>
3461	    (match_operand:VSX_D 1 "vsx_register_operand" "wd")
3462	    (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
3463	  (parallel [(match_operand 3 "const_0_to_1_operand" "")
3464		     (match_operand 4 "const_2_to_3_operand" "")])))]
3465  "VECTOR_MEM_VSX_P (<MODE>mode)"
3466{
3467  int op3, op4, mask;
3468
3469  /* For little endian, swap operands and invert/swap selectors
3470     to get the correct xxpermdi.  The operand swap sets up the
3471     inputs as a little endian array.  The selectors are swapped
3472     because they are defined to use big endian ordering.  The
3473     selectors are inverted to get the correct doublewords for
3474     little endian ordering.  */
3475  if (BYTES_BIG_ENDIAN)
3476    {
3477      op3 = INTVAL (operands[3]);
3478      op4 = INTVAL (operands[4]);
3479    }
3480  else
3481    {
3482      op3 = 3 - INTVAL (operands[4]);
3483      op4 = 3 - INTVAL (operands[3]);
3484    }
3485
3486  mask = (op3 << 1) | (op4 - 2);
3487  operands[3] = GEN_INT (mask);
3488
3489  if (BYTES_BIG_ENDIAN)
3490    return "xxpermdi %x0,%x1,%x2,%3";
3491  else
3492    return "xxpermdi %x0,%x2,%x1,%3";
3493}
3494  [(set_attr "type" "vecperm")])
3495
3496;; Extraction of a single element in a small integer vector.  Until ISA 3.0,
3497;; none of the small types were allowed in a vector register, so we had to
3498;; extract to a DImode and either do a direct move or store.
3499(define_expand  "vsx_extract_<mode>"
3500  [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3501		   (vec_select:<VS_scalar>
3502		    (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3503		    (parallel [(match_operand:QI 2 "const_int_operand")])))
3504	      (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3505  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3506{
3507  /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
3508  if (TARGET_P9_VECTOR)
3509    {
3510      emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3511					    operands[2]));
3512      DONE;
3513    }
3514})
3515
3516(define_insn "vsx_extract_<mode>_p9"
3517  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3518	(vec_select:<VS_scalar>
3519	 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3520	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3521   (clobber (match_scratch:SI 3 "=r,X"))]
3522  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3523{
3524  if (which_alternative == 0)
3525    return "#";
3526
3527  else
3528    {
3529      HOST_WIDE_INT elt = INTVAL (operands[2]);
3530      HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN
3531			       ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3532			       : elt);
3533
3534      HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3535      HOST_WIDE_INT offset = unit_size * elt_adj;
3536
3537      operands[2] = GEN_INT (offset);
3538      if (unit_size == 4)
3539	return "xxextractuw %x0,%x1,%2";
3540      else
3541	return "vextractu<wd> %0,%1,%2";
3542    }
3543}
3544  [(set_attr "type" "vecsimple")])
3545
3546(define_split
3547  [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3548	(vec_select:<VS_scalar>
3549	 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3550	 (parallel [(match_operand:QI 2 "const_int_operand")])))
3551   (clobber (match_operand:SI 3 "int_reg_operand"))]
3552  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3553  [(const_int 0)]
3554{
3555  rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3556  rtx op1 = operands[1];
3557  rtx op2 = operands[2];
3558  rtx op3 = operands[3];
3559  HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3560
3561  emit_move_insn (op3, GEN_INT (offset));
3562  if (BYTES_BIG_ENDIAN)
3563    emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3564  else
3565    emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3566  DONE;
3567})
3568
3569;; Optimize zero extracts to eliminate the AND after the extract.
3570(define_insn_and_split "*vsx_extract_<mode>_di_p9"
3571  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3572	(zero_extend:DI
3573	 (vec_select:<VS_scalar>
3574	  (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3575	  (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3576   (clobber (match_scratch:SI 3 "=r,X"))]
3577  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3578  "#"
3579  "&& reload_completed"
3580  [(parallel [(set (match_dup 4)
3581		   (vec_select:<VS_scalar>
3582		    (match_dup 1)
3583		    (parallel [(match_dup 2)])))
3584	      (clobber (match_dup 3))])]
3585{
3586  operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3587})
3588
3589;; Optimize stores to use the ISA 3.0 scalar store instructions
3590(define_insn_and_split "*vsx_extract_<mode>_store_p9"
3591  [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3592	(vec_select:<VS_scalar>
3593	 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3594	 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3595   (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3596   (clobber (match_scratch:SI 4 "=X,&r"))]
3597  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3598  "#"
3599  "&& reload_completed"
3600  [(parallel [(set (match_dup 3)
3601		   (vec_select:<VS_scalar>
3602		    (match_dup 1)
3603		    (parallel [(match_dup 2)])))
3604	      (clobber (match_dup 4))])
3605   (set (match_dup 0)
3606	(match_dup 3))])
3607
3608(define_insn_and_split  "*vsx_extract_si"
3609  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z")
3610	(vec_select:SI
3611	 (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")
3612	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3613   (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]
3614  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3615  "#"
3616  "&& reload_completed"
3617  [(const_int 0)]
3618{
3619  rtx dest = operands[0];
3620  rtx src = operands[1];
3621  rtx element = operands[2];
3622  rtx vec_tmp = operands[3];
3623  int value;
3624
3625  if (!BYTES_BIG_ENDIAN)
3626    element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3627
3628  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3629     instruction.  */
3630  value = INTVAL (element);
3631  if (value != 1)
3632    emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3633  else
3634    vec_tmp = src;
3635
3636  if (MEM_P (operands[0]))
3637    {
3638      if (can_create_pseudo_p ())
3639	dest = rs6000_force_indexed_or_indirect_mem (dest);
3640
3641      if (TARGET_P8_VECTOR)
3642	emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3643      else
3644	emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3645    }
3646
3647  else if (TARGET_P8_VECTOR)
3648    emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3649  else
3650    emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3651		    gen_rtx_REG (DImode, REGNO (vec_tmp)));
3652
3653  DONE;
3654}
3655  [(set_attr "type" "mftgpr,vecperm,fpstore")
3656   (set_attr "length" "8")])
3657
3658(define_insn_and_split  "*vsx_extract_<mode>_p8"
3659  [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3660	(vec_select:<VS_scalar>
3661	 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3662	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3663   (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3664  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3665   && !TARGET_P9_VECTOR"
3666  "#"
3667  "&& reload_completed"
3668  [(const_int 0)]
3669{
3670  rtx dest = operands[0];
3671  rtx src = operands[1];
3672  rtx element = operands[2];
3673  rtx vec_tmp = operands[3];
3674  int value;
3675
3676  if (!BYTES_BIG_ENDIAN)
3677    element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3678
3679  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3680     instruction.  */
3681  value = INTVAL (element);
3682  if (<MODE>mode == V16QImode)
3683    {
3684      if (value != 7)
3685	emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3686      else
3687	vec_tmp = src;
3688    }
3689  else if (<MODE>mode == V8HImode)
3690    {
3691      if (value != 3)
3692	emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3693      else
3694	vec_tmp = src;
3695    }
3696  else
3697    gcc_unreachable ();
3698
3699  emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3700		  gen_rtx_REG (DImode, REGNO (vec_tmp)));
3701  DONE;
3702}
3703  [(set_attr "type" "mftgpr")])
3704
3705;; Optimize extracting a single scalar element from memory.
3706(define_insn_and_split "*vsx_extract_<mode>_load"
3707  [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3708	(vec_select:<VS_scalar>
3709	 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3710	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3711   (clobber (match_scratch:DI 3 "=&b"))]
3712  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3713  "#"
3714  "&& reload_completed"
3715  [(set (match_dup 0) (match_dup 4))]
3716{
3717  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3718					   operands[3], <VS_scalar>mode);
3719}
3720  [(set_attr "type" "load")
3721   (set_attr "length" "8")])
3722
3723;; Variable V16QI/V8HI/V4SI extract
3724(define_insn_and_split "vsx_extract_<mode>_var"
3725  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
3726	(unspec:<VS_scalar>
3727	 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3728	  (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3729	 UNSPEC_VSX_EXTRACT))
3730   (clobber (match_scratch:DI 3 "=r,r,&b"))
3731   (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3732  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3733  "#"
3734  "&& reload_completed"
3735  [(const_int 0)]
3736{
3737  rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3738				operands[3], operands[4]);
3739  DONE;
3740})
3741
3742(define_insn_and_split "*vsx_extract_<mode>_<VS_scalar>mode_var"
3743  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
3744	(zero_extend:<VS_scalar>
3745	 (unspec:<VSX_EXTRACT_I:VS_scalar>
3746	  [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3747	   (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3748	  UNSPEC_VSX_EXTRACT)))
3749   (clobber (match_scratch:DI 3 "=r,r,&b"))
3750   (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3751  "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3752  "#"
3753  "&& reload_completed"
3754  [(const_int 0)]
3755{
3756  machine_mode smode = <VS_scalar>mode;
3757  rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])),
3758				operands[1], operands[2],
3759				operands[3], operands[4]);
3760  DONE;
3761})
3762
3763;; VSX_EXTRACT optimizations
3764;; Optimize double d = (double) vec_extract (vi, <n>)
3765;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
3766(define_insn_and_split "*vsx_extract_si_<uns>float_df"
3767  [(set (match_operand:DF 0 "gpc_reg_operand" "=ws")
3768	(any_float:DF
3769	 (vec_select:SI
3770	  (match_operand:V4SI 1 "gpc_reg_operand" "v")
3771	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3772   (clobber (match_scratch:V4SI 3 "=v"))]
3773  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3774  "#"
3775  "&& 1"
3776  [(const_int 0)]
3777{
3778  rtx dest = operands[0];
3779  rtx src = operands[1];
3780  rtx element = operands[2];
3781  rtx v4si_tmp = operands[3];
3782  int value;
3783
3784  if (!BYTES_BIG_ENDIAN)
3785    element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3786
3787  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3788     instruction.  */
3789  value = INTVAL (element);
3790  if (value != 0)
3791    {
3792      if (GET_CODE (v4si_tmp) == SCRATCH)
3793	v4si_tmp = gen_reg_rtx (V4SImode);
3794      emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3795    }
3796  else
3797    v4si_tmp = src;
3798
3799  emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
3800  DONE;
3801})
3802
3803;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
3804;; where <type> is a floating point type that supported by the hardware that is
3805;; not double.  First convert the value to double, and then to the desired
3806;; type.
3807(define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
3808  [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww")
3809	(any_float:VSX_EXTRACT_FL
3810	 (vec_select:SI
3811	  (match_operand:V4SI 1 "gpc_reg_operand" "v")
3812	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3813   (clobber (match_scratch:V4SI 3 "=v"))
3814   (clobber (match_scratch:DF 4 "=ws"))]
3815  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3816  "#"
3817  "&& 1"
3818  [(const_int 0)]
3819{
3820  rtx dest = operands[0];
3821  rtx src = operands[1];
3822  rtx element = operands[2];
3823  rtx v4si_tmp = operands[3];
3824  rtx df_tmp = operands[4];
3825  int value;
3826
3827  if (!BYTES_BIG_ENDIAN)
3828    element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3829
3830  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3831     instruction.  */
3832  value = INTVAL (element);
3833  if (value != 0)
3834    {
3835      if (GET_CODE (v4si_tmp) == SCRATCH)
3836	v4si_tmp = gen_reg_rtx (V4SImode);
3837      emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3838    }
3839  else
3840    v4si_tmp = src;
3841
3842  if (GET_CODE (df_tmp) == SCRATCH)
3843    df_tmp = gen_reg_rtx (DFmode);
3844
3845  emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
3846
3847  if (<MODE>mode == SFmode)
3848    emit_insn (gen_truncdfsf2 (dest, df_tmp));
3849  else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
3850    emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
3851  else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
3852	   && TARGET_FLOAT128_HW)
3853    emit_insn (gen_extenddftf2_hw (dest, df_tmp));
3854  else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
3855    emit_insn (gen_extenddfif2 (dest, df_tmp));
3856  else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
3857    emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
3858  else
3859    gcc_unreachable ();
3860
3861  DONE;
3862})
3863
3864;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
3865;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
3866;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
3867;; vector short or vector unsigned short.
3868(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
3869  [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3870	(float:FL_CONV
3871	 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3872	  (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3873	  (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3874   (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3875  "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3876   && TARGET_P9_VECTOR"
3877  "#"
3878  "&& reload_completed"
3879  [(parallel [(set (match_dup 3)
3880		   (vec_select:<VSX_EXTRACT_I:VS_scalar>
3881		    (match_dup 1)
3882		    (parallel [(match_dup 2)])))
3883	      (clobber (scratch:SI))])
3884   (set (match_dup 4)
3885	(sign_extend:DI (match_dup 3)))
3886   (set (match_dup 0)
3887	(float:<FL_CONV:MODE> (match_dup 4)))]
3888{
3889  operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3890})
3891
3892(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
3893  [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3894	(unsigned_float:FL_CONV
3895	 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3896	  (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3897	  (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3898   (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3899  "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3900   && TARGET_P9_VECTOR"
3901  "#"
3902  "&& reload_completed"
3903  [(parallel [(set (match_dup 3)
3904		   (vec_select:<VSX_EXTRACT_I:VS_scalar>
3905		    (match_dup 1)
3906		    (parallel [(match_dup 2)])))
3907	      (clobber (scratch:SI))])
3908   (set (match_dup 0)
3909	(float:<FL_CONV:MODE> (match_dup 4)))]
3910{
3911  operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3912})
3913
3914;; V4SI/V8HI/V16QI set operation on ISA 3.0
3915(define_insn "vsx_set_<mode>_p9"
3916  [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
3917	(unspec:VSX_EXTRACT_I
3918	 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
3919	  (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
3920	  (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
3921	 UNSPEC_VSX_SET))]
3922  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3923{
3924  int ele = INTVAL (operands[3]);
3925  int nunits = GET_MODE_NUNITS (<MODE>mode);
3926
3927  if (!BYTES_BIG_ENDIAN)
3928    ele = nunits - 1 - ele;
3929
3930  operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
3931  if (<MODE>mode == V4SImode)
3932    return "xxinsertw %x0,%x2,%3";
3933  else
3934    return "vinsert<wd> %0,%2,%3";
3935}
3936  [(set_attr "type" "vecperm")])
3937
3938(define_insn_and_split "vsx_set_v4sf_p9"
3939  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3940	(unspec:V4SF
3941	 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3942	  (match_operand:SF 2 "gpc_reg_operand" "ww")
3943	  (match_operand:QI 3 "const_0_to_3_operand" "n")]
3944	 UNSPEC_VSX_SET))
3945   (clobber (match_scratch:SI 4 "=&wJwK"))]
3946  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3947  "#"
3948  "&& reload_completed"
3949  [(set (match_dup 5)
3950	(unspec:V4SF [(match_dup 2)]
3951		     UNSPEC_VSX_CVDPSPN))
3952   (parallel [(set (match_dup 4)
3953		   (vec_select:SI (match_dup 6)
3954				  (parallel [(match_dup 7)])))
3955	      (clobber (scratch:SI))])
3956   (set (match_dup 8)
3957	(unspec:V4SI [(match_dup 8)
3958		      (match_dup 4)
3959		      (match_dup 3)]
3960		     UNSPEC_VSX_SET))]
3961{
3962  unsigned int tmp_regno = reg_or_subregno (operands[4]);
3963
3964  operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
3965  operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
3966  operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3);
3967  operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3968}
3969  [(set_attr "type" "vecperm")
3970   (set_attr "length" "12")])
3971
3972;; Special case setting 0.0f to a V4SF element
3973(define_insn_and_split "*vsx_set_v4sf_p9_zero"
3974  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3975	(unspec:V4SF
3976	 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3977	  (match_operand:SF 2 "zero_fp_constant" "j")
3978	  (match_operand:QI 3 "const_0_to_3_operand" "n")]
3979	 UNSPEC_VSX_SET))
3980   (clobber (match_scratch:SI 4 "=&wJwK"))]
3981  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3982  "#"
3983  "&& reload_completed"
3984  [(set (match_dup 4)
3985	(const_int 0))
3986   (set (match_dup 5)
3987	(unspec:V4SI [(match_dup 5)
3988		      (match_dup 4)
3989		      (match_dup 3)]
3990		     UNSPEC_VSX_SET))]
3991{
3992  operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3993}
3994  [(set_attr "type" "vecperm")
3995   (set_attr "length" "8")])
3996
3997;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
3998;; that is in the default scalar position (1 for big endian, 2 for little
3999;; endian).  We just need to do an xxinsertw since the element is in the
4000;; correct location.
4001
4002(define_insn "*vsx_insert_extract_v4sf_p9"
4003  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4004	(unspec:V4SF
4005	 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4006	  (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4007			 (parallel
4008			  [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4009	  (match_operand:QI 4 "const_0_to_3_operand" "n")]
4010	 UNSPEC_VSX_SET))]
4011  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
4012   && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
4013{
4014  int ele = INTVAL (operands[4]);
4015
4016  if (!BYTES_BIG_ENDIAN)
4017    ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
4018
4019  operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
4020  return "xxinsertw %x0,%x2,%4";
4021}
4022  [(set_attr "type" "vecperm")])
4023
4024;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
4025;; that is in the default scalar position (1 for big endian, 2 for little
4026;; endian).  Convert the insert/extract to int and avoid doing the conversion.
4027
4028(define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4029  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4030	(unspec:V4SF
4031	 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4032	  (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4033			 (parallel
4034			  [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4035	  (match_operand:QI 4 "const_0_to_3_operand" "n")]
4036	 UNSPEC_VSX_SET))
4037   (clobber (match_scratch:SI 5 "=&wJwK"))]
4038  "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4039   && TARGET_P9_VECTOR && TARGET_POWERPC64
4040   && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
4041  "#"
4042  "&& 1"
4043  [(parallel [(set (match_dup 5)
4044		   (vec_select:SI (match_dup 6)
4045				  (parallel [(match_dup 3)])))
4046	      (clobber (scratch:SI))])
4047   (set (match_dup 7)
4048	(unspec:V4SI [(match_dup 8)
4049		      (match_dup 5)
4050		      (match_dup 4)]
4051		     UNSPEC_VSX_SET))]
4052{
4053  if (GET_CODE (operands[5]) == SCRATCH)
4054    operands[5] = gen_reg_rtx (SImode);
4055
4056  operands[6] = gen_lowpart (V4SImode, operands[2]);
4057  operands[7] = gen_lowpart (V4SImode, operands[0]);
4058  operands[8] = gen_lowpart (V4SImode, operands[1]);
4059}
4060  [(set_attr "type" "vecperm")])
4061
4062;; Expanders for builtins
4063(define_expand "vsx_mergel_<mode>"
4064  [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4065   (use (match_operand:VSX_D 1 "vsx_register_operand"))
4066   (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4067  "VECTOR_MEM_VSX_P (<MODE>mode)"
4068{
4069  rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4070  rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4071  x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4072  emit_insn (gen_rtx_SET (operands[0], x));
4073  DONE;
4074})
4075
4076(define_expand "vsx_mergeh_<mode>"
4077  [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4078   (use (match_operand:VSX_D 1 "vsx_register_operand"))
4079   (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4080  "VECTOR_MEM_VSX_P (<MODE>mode)"
4081{
4082  rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4083  rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4084  x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4085  emit_insn (gen_rtx_SET (operands[0], x));
4086  DONE;
4087})
4088
4089;; V2DF/V2DI splat
4090;; We separate the register splat insn from the memory splat insn to force the
4091;; register allocator to generate the indexed form of the SPLAT when it is
4092;; given an offsettable memory reference.  Otherwise, if the register and
4093;; memory insns were combined into a single insn, the register allocator will
4094;; load the value into a register, and then do a double word permute.
4095(define_expand "vsx_splat_<mode>"
4096  [(set (match_operand:VSX_D 0 "vsx_register_operand")
4097	(vec_duplicate:VSX_D
4098	 (match_operand:<VS_scalar> 1 "input_operand")))]
4099  "VECTOR_MEM_VSX_P (<MODE>mode)"
4100{
4101  rtx op1 = operands[1];
4102  if (MEM_P (op1))
4103    operands[1] = rs6000_force_indexed_or_indirect_mem (op1);
4104  else if (!REG_P (op1))
4105    op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4106})
4107
4108(define_insn "vsx_splat_<mode>_reg"
4109  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we")
4110	(vec_duplicate:VSX_D
4111	 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))]
4112  "VECTOR_MEM_VSX_P (<MODE>mode)"
4113  "@
4114   xxpermdi %x0,%x1,%x1,0
4115   mtvsrdd %x0,%1,%1"
4116  [(set_attr "type" "vecperm")])
4117
4118(define_insn "vsx_splat_<VSX_D:mode>_mem"
4119  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>")
4120	(vec_duplicate:VSX_D
4121	 (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4122  "VECTOR_MEM_VSX_P (<MODE>mode)"
4123  "lxvdsx %x0,%y1"
4124  [(set_attr "type" "vecload")])
4125
4126;; V4SI splat support
4127(define_insn "vsx_splat_v4si"
4128  [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
4129	(vec_duplicate:V4SI
4130	 (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4131  "TARGET_P9_VECTOR"
4132  "@
4133   mtvsrws %x0,%1
4134   lxvwsx %x0,%y1"
4135  [(set_attr "type" "vecperm,vecload")])
4136
4137;; SImode is not currently allowed in vector registers.  This pattern
4138;; allows us to use direct move to get the value in a vector register
4139;; so that we can use XXSPLTW
4140(define_insn "vsx_splat_v4si_di"
4141  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4142	(vec_duplicate:V4SI
4143	 (truncate:SI
4144	  (match_operand:DI 1 "gpc_reg_operand" "wj,r"))))]
4145  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4146  "@
4147   xxspltw %x0,%x1,1
4148   mtvsrws %x0,%1"
4149  [(set_attr "type" "vecperm")])
4150
4151;; V4SF splat (ISA 3.0)
4152(define_insn_and_split "vsx_splat_v4sf"
4153  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4154	(vec_duplicate:V4SF
4155	 (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))]
4156  "TARGET_P9_VECTOR"
4157  "@
4158   lxvwsx %x0,%y1
4159   #
4160   mtvsrws %x0,%1"
4161  "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4162  [(set (match_dup 0)
4163	(unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4164   (set (match_dup 0)
4165	(unspec:V4SF [(match_dup 0)
4166		      (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4167  ""
4168  [(set_attr "type" "vecload,vecperm,mftgpr")
4169   (set_attr "length" "4,8,4")])
4170
4171;; V4SF/V4SI splat from a vector element
4172(define_insn "vsx_xxspltw_<mode>"
4173  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
4174	(vec_duplicate:VSX_W
4175	 (vec_select:<VS_scalar>
4176	  (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
4177	  (parallel
4178	   [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4179  "VECTOR_MEM_VSX_P (<MODE>mode)"
4180{
4181  if (!BYTES_BIG_ENDIAN)
4182    operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4183
4184  return "xxspltw %x0,%x1,%2";
4185}
4186  [(set_attr "type" "vecperm")])
4187
4188(define_insn "vsx_xxspltw_<mode>_direct"
4189  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
4190        (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
4191                       (match_operand:QI 2 "u5bit_cint_operand" "i")]
4192                      UNSPEC_VSX_XXSPLTW))]
4193  "VECTOR_MEM_VSX_P (<MODE>mode)"
4194  "xxspltw %x0,%x1,%2"
4195  [(set_attr "type" "vecperm")])
4196
4197;; V16QI/V8HI splat support on ISA 2.07
4198(define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4199  [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4200	(vec_duplicate:VSX_SPLAT_I
4201	 (truncate:<VS_scalar>
4202	  (match_operand:DI 1 "altivec_register_operand" "v"))))]
4203  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4204  "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4205  [(set_attr "type" "vecperm")])
4206
4207;; V2DF/V2DI splat for use by vec_splat builtin
4208(define_insn "vsx_xxspltd_<mode>"
4209  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4210        (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4211	               (match_operand:QI 2 "u5bit_cint_operand" "i")]
4212                      UNSPEC_VSX_XXSPLTD))]
4213  "VECTOR_MEM_VSX_P (<MODE>mode)"
4214{
4215  if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
4216      || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
4217    return "xxpermdi %x0,%x1,%x1,0";
4218  else
4219    return "xxpermdi %x0,%x1,%x1,3";
4220}
4221  [(set_attr "type" "vecperm")])
4222
4223;; V4SF/V4SI interleave
4224(define_insn "vsx_xxmrghw_<mode>"
4225  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
4226        (vec_select:VSX_W
4227	  (vec_concat:<VS_double>
4228	    (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4229	    (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
4230	  (parallel [(const_int 0) (const_int 4)
4231		     (const_int 1) (const_int 5)])))]
4232  "VECTOR_MEM_VSX_P (<MODE>mode)"
4233{
4234  if (BYTES_BIG_ENDIAN)
4235    return "xxmrghw %x0,%x1,%x2";
4236  else
4237    return "xxmrglw %x0,%x2,%x1";
4238}
4239  [(set_attr "type" "vecperm")])
4240
4241(define_insn "vsx_xxmrglw_<mode>"
4242  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
4243	(vec_select:VSX_W
4244	  (vec_concat:<VS_double>
4245	    (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4246	    (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
4247	  (parallel [(const_int 2) (const_int 6)
4248		     (const_int 3) (const_int 7)])))]
4249  "VECTOR_MEM_VSX_P (<MODE>mode)"
4250{
4251  if (BYTES_BIG_ENDIAN)
4252    return "xxmrglw %x0,%x1,%x2";
4253  else
4254    return "xxmrghw %x0,%x2,%x1";
4255}
4256  [(set_attr "type" "vecperm")])
4257
4258;; Shift left double by word immediate
4259(define_insn "vsx_xxsldwi_<mode>"
4260  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
4261	(unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
4262		       (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
4263		       (match_operand:QI 3 "u5bit_cint_operand" "i")]
4264		      UNSPEC_VSX_SLDWI))]
4265  "VECTOR_MEM_VSX_P (<MODE>mode)"
4266  "xxsldwi %x0,%x1,%x2,%3"
4267  [(set_attr "type" "vecperm")])
4268
4269
4270;; Vector reduction insns and splitters
4271
4272(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4273  [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
4274	(VEC_reduc:V2DF
4275	 (vec_concat:V2DF
4276	  (vec_select:DF
4277	   (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4278	   (parallel [(const_int 1)]))
4279	  (vec_select:DF
4280	   (match_dup 1)
4281	   (parallel [(const_int 0)])))
4282	 (match_dup 1)))
4283   (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
4284  "VECTOR_UNIT_VSX_P (V2DFmode)"
4285  "#"
4286  ""
4287  [(const_int 0)]
4288{
4289  rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4290	     ? gen_reg_rtx (V2DFmode)
4291	     : operands[2];
4292  emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4293  emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4294  DONE;
4295}
4296  [(set_attr "length" "8")
4297   (set_attr "type" "veccomplex")])
4298
4299(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4300  [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
4301	(VEC_reduc:V4SF
4302	 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4303	 (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
4304   (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4305   (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
4306  "VECTOR_UNIT_VSX_P (V4SFmode)"
4307  "#"
4308  ""
4309  [(const_int 0)]
4310{
4311  rtx op0 = operands[0];
4312  rtx op1 = operands[1];
4313  rtx tmp2, tmp3, tmp4;
4314
4315  if (can_create_pseudo_p ())
4316    {
4317      tmp2 = gen_reg_rtx (V4SFmode);
4318      tmp3 = gen_reg_rtx (V4SFmode);
4319      tmp4 = gen_reg_rtx (V4SFmode);
4320    }
4321  else
4322    {
4323      tmp2 = operands[2];
4324      tmp3 = operands[3];
4325      tmp4 = tmp2;
4326    }
4327
4328  emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4329  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4330  emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4331  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4332  DONE;
4333}
4334  [(set_attr "length" "16")
4335   (set_attr "type" "veccomplex")])
4336
4337;; Combiner patterns with the vector reduction patterns that knows we can get
4338;; to the top element of the V2DF array without doing an extract.
4339
4340(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4341  [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
4342	(vec_select:DF
4343	 (VEC_reduc:V2DF
4344	  (vec_concat:V2DF
4345	   (vec_select:DF
4346	    (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4347	    (parallel [(const_int 1)]))
4348	   (vec_select:DF
4349	    (match_dup 1)
4350	    (parallel [(const_int 0)])))
4351	  (match_dup 1))
4352	 (parallel [(const_int 1)])))
4353   (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
4354  "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)"
4355  "#"
4356  ""
4357  [(const_int 0)]
4358{
4359  rtx hi = gen_highpart (DFmode, operands[1]);
4360  rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4361	    ? gen_reg_rtx (DFmode)
4362	    : operands[2];
4363
4364  emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4365  emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4366  DONE;
4367}
4368  [(set_attr "length" "8")
4369   (set_attr "type" "veccomplex")])
4370
4371(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4372  [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
4373	(vec_select:SF
4374	 (VEC_reduc:V4SF
4375	  (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4376	  (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
4377	 (parallel [(const_int 3)])))
4378   (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4379   (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
4380   (clobber (match_scratch:V4SF 4 "=0,0"))]
4381  "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)"
4382  "#"
4383  ""
4384  [(const_int 0)]
4385{
4386  rtx op0 = operands[0];
4387  rtx op1 = operands[1];
4388  rtx tmp2, tmp3, tmp4, tmp5;
4389
4390  if (can_create_pseudo_p ())
4391    {
4392      tmp2 = gen_reg_rtx (V4SFmode);
4393      tmp3 = gen_reg_rtx (V4SFmode);
4394      tmp4 = gen_reg_rtx (V4SFmode);
4395      tmp5 = gen_reg_rtx (V4SFmode);
4396    }
4397  else
4398    {
4399      tmp2 = operands[2];
4400      tmp3 = operands[3];
4401      tmp4 = tmp2;
4402      tmp5 = operands[4];
4403    }
4404
4405  emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4406  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4407  emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4408  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4409  emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4410  DONE;
4411}
4412  [(set_attr "length" "20")
4413   (set_attr "type" "veccomplex")])
4414
4415
4416;; Power8 Vector fusion.  The fused ops must be physically adjacent.
4417(define_peephole
4418  [(set (match_operand:P 0 "base_reg_operand")
4419	(match_operand:P 1 "short_cint_operand"))
4420   (set (match_operand:VSX_M 2 "vsx_register_operand")
4421	(mem:VSX_M (plus:P (match_dup 0)
4422			   (match_operand:P 3 "int_reg_operand"))))]
4423  "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4424  "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4425  [(set_attr "length" "8")
4426   (set_attr "type" "vecload")])
4427
4428(define_peephole
4429  [(set (match_operand:P 0 "base_reg_operand")
4430	(match_operand:P 1 "short_cint_operand"))
4431   (set (match_operand:VSX_M 2 "vsx_register_operand")
4432	(mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
4433			   (match_dup 0))))]
4434  "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4435  "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4436  [(set_attr "length" "8")
4437   (set_attr "type" "vecload")])
4438
4439
4440;; ISA 3.0 vector extend sign support
4441
4442(define_insn "vsx_sign_extend_qi_<mode>"
4443  [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4444	(unspec:VSINT_84
4445	 [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4446	 UNSPEC_VSX_SIGN_EXTEND))]
4447  "TARGET_P9_VECTOR"
4448  "vextsb2<wd> %0,%1"
4449  [(set_attr "type" "vecexts")])
4450
4451(define_insn "vsx_sign_extend_hi_<mode>"
4452  [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4453	(unspec:VSINT_84
4454	 [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4455	 UNSPEC_VSX_SIGN_EXTEND))]
4456  "TARGET_P9_VECTOR"
4457  "vextsh2<wd> %0,%1"
4458  [(set_attr "type" "vecexts")])
4459
4460(define_insn "*vsx_sign_extend_si_v2di"
4461  [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4462	(unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4463		     UNSPEC_VSX_SIGN_EXTEND))]
4464  "TARGET_P9_VECTOR"
4465  "vextsw2d %0,%1"
4466  [(set_attr "type" "vecexts")])
4467
4468
4469;; ISA 3.0 Binary Floating-Point Support
4470
4471;; VSX Scalar Extract Exponent Quad-Precision
4472(define_insn "xsxexpqp_<mode>"
4473  [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4474	(unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4475	 UNSPEC_VSX_SXEXPDP))]
4476  "TARGET_P9_VECTOR"
4477  "xsxexpqp %0,%1"
4478  [(set_attr "type" "vecmove")])
4479
4480;; VSX Scalar Extract Exponent Double-Precision
4481(define_insn "xsxexpdp"
4482  [(set (match_operand:DI 0 "register_operand" "=r")
4483	(unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4484	 UNSPEC_VSX_SXEXPDP))]
4485  "TARGET_P9_VECTOR && TARGET_64BIT"
4486  "xsxexpdp %0,%x1"
4487  [(set_attr "type" "integer")])
4488
4489;; VSX Scalar Extract Significand Quad-Precision
4490(define_insn "xsxsigqp_<mode>"
4491  [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4492	(unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4493	 UNSPEC_VSX_SXSIG))]
4494  "TARGET_P9_VECTOR"
4495  "xsxsigqp %0,%1"
4496  [(set_attr "type" "vecmove")])
4497
4498;; VSX Scalar Extract Significand Double-Precision
4499(define_insn "xsxsigdp"
4500  [(set (match_operand:DI 0 "register_operand" "=r")
4501	(unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4502	 UNSPEC_VSX_SXSIG))]
4503  "TARGET_P9_VECTOR && TARGET_64BIT"
4504  "xsxsigdp %0,%x1"
4505  [(set_attr "type" "integer")])
4506
4507;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4508(define_insn "xsiexpqpf_<mode>"
4509  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4510	(unspec:IEEE128
4511	 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4512	  (match_operand:DI 2 "altivec_register_operand" "v")]
4513	 UNSPEC_VSX_SIEXPQP))]
4514  "TARGET_P9_VECTOR"
4515  "xsiexpqp %0,%1,%2"
4516  [(set_attr "type" "vecmove")])
4517
4518;; VSX Scalar Insert Exponent Quad-Precision
4519(define_insn "xsiexpqp_<mode>"
4520  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4521	(unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4522			 (match_operand:DI 2 "altivec_register_operand" "v")]
4523	 UNSPEC_VSX_SIEXPQP))]
4524  "TARGET_P9_VECTOR"
4525  "xsiexpqp %0,%1,%2"
4526  [(set_attr "type" "vecmove")])
4527
4528;; VSX Scalar Insert Exponent Double-Precision
4529(define_insn "xsiexpdp"
4530  [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4531	(unspec:DF [(match_operand:DI 1 "register_operand" "r")
4532		    (match_operand:DI 2 "register_operand" "r")]
4533	 UNSPEC_VSX_SIEXPDP))]
4534  "TARGET_P9_VECTOR && TARGET_64BIT"
4535  "xsiexpdp %x0,%1,%2"
4536  [(set_attr "type" "fpsimple")])
4537
4538;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4539(define_insn "xsiexpdpf"
4540  [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4541	(unspec:DF [(match_operand:DF 1 "register_operand" "r")
4542		    (match_operand:DI 2 "register_operand" "r")]
4543	 UNSPEC_VSX_SIEXPDP))]
4544  "TARGET_P9_VECTOR && TARGET_64BIT"
4545  "xsiexpdp %x0,%1,%2"
4546  [(set_attr "type" "fpsimple")])
4547
4548;; VSX Scalar Compare Exponents Double-Precision
4549(define_expand "xscmpexpdp_<code>"
4550  [(set (match_dup 3)
4551	(compare:CCFP
4552	 (unspec:DF
4553	  [(match_operand:DF 1 "vsx_register_operand" "wa")
4554	   (match_operand:DF 2 "vsx_register_operand" "wa")]
4555	  UNSPEC_VSX_SCMPEXPDP)
4556	 (const_int 0)))
4557   (set (match_operand:SI 0 "register_operand" "=r")
4558	(CMP_TEST:SI (match_dup 3)
4559		     (const_int 0)))]
4560  "TARGET_P9_VECTOR"
4561{
4562  operands[3] = gen_reg_rtx (CCFPmode);
4563})
4564
4565(define_insn "*xscmpexpdp"
4566  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4567	(compare:CCFP
4568	 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4569		     (match_operand:DF 2 "vsx_register_operand" "wa")]
4570	  UNSPEC_VSX_SCMPEXPDP)
4571	 (match_operand:SI 3 "zero_constant" "j")))]
4572  "TARGET_P9_VECTOR"
4573  "xscmpexpdp %0,%x1,%x2"
4574  [(set_attr "type" "fpcompare")])
4575
4576;; VSX Scalar Compare Exponents Quad-Precision
4577(define_expand "xscmpexpqp_<code>_<mode>"
4578  [(set (match_dup 3)
4579	(compare:CCFP
4580	 (unspec:IEEE128
4581	  [(match_operand:IEEE128 1 "vsx_register_operand" "v")
4582	   (match_operand:IEEE128 2 "vsx_register_operand" "v")]
4583	  UNSPEC_VSX_SCMPEXPQP)
4584	 (const_int 0)))
4585   (set (match_operand:SI 0 "register_operand" "=r")
4586	(CMP_TEST:SI (match_dup 3)
4587		     (const_int 0)))]
4588  "TARGET_P9_VECTOR"
4589{
4590  operands[3] = gen_reg_rtx (CCFPmode);
4591})
4592
4593(define_insn "*xscmpexpqp"
4594  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4595	(compare:CCFP
4596	 (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4597		          (match_operand:IEEE128 2 "altivec_register_operand" "v")]
4598	  UNSPEC_VSX_SCMPEXPQP)
4599	 (match_operand:SI 3 "zero_constant" "j")))]
4600  "TARGET_P9_VECTOR"
4601  "xscmpexpqp %0,%1,%2"
4602  [(set_attr "type" "fpcompare")])
4603
4604;; VSX Scalar Test Data Class Quad-Precision
4605;;  (Expansion for scalar_test_data_class (__ieee128, int))
4606;;   (Has side effect of setting the lt bit if operand 1 is negative,
4607;;    setting the eq bit if any of the conditions tested by operand 2
4608;;    are satisfied, and clearing the gt and undordered bits to zero.)
4609(define_expand "xststdcqp_<mode>"
4610  [(set (match_dup 3)
4611	(compare:CCFP
4612	 (unspec:IEEE128
4613	  [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4614	   (match_operand:SI 2 "u7bit_cint_operand" "n")]
4615	  UNSPEC_VSX_STSTDC)
4616	 (const_int 0)))
4617   (set (match_operand:SI 0 "register_operand" "=r")
4618	(eq:SI (match_dup 3)
4619	       (const_int 0)))]
4620  "TARGET_P9_VECTOR"
4621{
4622  operands[3] = gen_reg_rtx (CCFPmode);
4623})
4624
4625;; VSX Scalar Test Data Class Double- and Single-Precision
4626;;  (The lt bit is set if operand 1 is negative.  The eq bit is set
4627;;   if any of the conditions tested by operand 2 are satisfied.
4628;;   The gt and unordered bits are cleared to zero.)
4629(define_expand "xststdc<Fvsx>"
4630  [(set (match_dup 3)
4631	(compare:CCFP
4632	 (unspec:SFDF
4633	  [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4634	   (match_operand:SI 2 "u7bit_cint_operand" "n")]
4635	  UNSPEC_VSX_STSTDC)
4636	 (match_dup 4)))
4637   (set (match_operand:SI 0 "register_operand" "=r")
4638	(eq:SI (match_dup 3)
4639	       (const_int 0)))]
4640  "TARGET_P9_VECTOR"
4641{
4642  operands[3] = gen_reg_rtx (CCFPmode);
4643  operands[4] = CONST0_RTX (SImode);
4644})
4645
4646;; The VSX Scalar Test Negative Quad-Precision
4647(define_expand "xststdcnegqp_<mode>"
4648  [(set (match_dup 2)
4649	(compare:CCFP
4650	 (unspec:IEEE128
4651	  [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4652	   (const_int 0)]
4653	  UNSPEC_VSX_STSTDC)
4654	 (const_int 0)))
4655   (set (match_operand:SI 0 "register_operand" "=r")
4656	(lt:SI (match_dup 2)
4657	       (const_int 0)))]
4658  "TARGET_P9_VECTOR"
4659{
4660  operands[2] = gen_reg_rtx (CCFPmode);
4661})
4662
4663;; The VSX Scalar Test Negative Double- and Single-Precision
4664(define_expand "xststdcneg<Fvsx>"
4665  [(set (match_dup 2)
4666	(compare:CCFP
4667	 (unspec:SFDF
4668	  [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4669	   (const_int 0)]
4670	  UNSPEC_VSX_STSTDC)
4671	 (match_dup 3)))
4672   (set (match_operand:SI 0 "register_operand" "=r")
4673	(lt:SI (match_dup 2)
4674	       (const_int 0)))]
4675  "TARGET_P9_VECTOR"
4676{
4677  operands[2] = gen_reg_rtx (CCFPmode);
4678  operands[3] = CONST0_RTX (SImode);
4679})
4680
4681(define_insn "*xststdcqp_<mode>"
4682  [(set (match_operand:CCFP 0 "" "=y")
4683	(compare:CCFP
4684	 (unspec:IEEE128
4685	  [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4686	   (match_operand:SI 2 "u7bit_cint_operand" "n")]
4687	  UNSPEC_VSX_STSTDC)
4688	 (const_int 0)))]
4689  "TARGET_P9_VECTOR"
4690  "xststdcqp %0,%1,%2"
4691  [(set_attr "type" "fpcompare")])
4692
4693(define_insn "*xststdc<Fvsx>"
4694  [(set (match_operand:CCFP 0 "" "=y")
4695	(compare:CCFP
4696	 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4697		       (match_operand:SI 2 "u7bit_cint_operand" "n")]
4698	  UNSPEC_VSX_STSTDC)
4699	 (match_operand:SI 3 "zero_constant" "j")))]
4700  "TARGET_P9_VECTOR"
4701  "xststdc<Fvsx> %0,%x1,%2"
4702  [(set_attr "type" "fpcompare")])
4703
4704;; VSX Vector Extract Exponent Double and Single Precision
4705(define_insn "xvxexp<VSs>"
4706  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4707	(unspec:VSX_F
4708	 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4709	 UNSPEC_VSX_VXEXP))]
4710  "TARGET_P9_VECTOR"
4711  "xvxexp<VSs> %x0,%x1"
4712  [(set_attr "type" "vecsimple")])
4713
4714;; VSX Vector Extract Significand Double and Single Precision
4715(define_insn "xvxsig<VSs>"
4716  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4717	(unspec:VSX_F
4718	 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4719	 UNSPEC_VSX_VXSIG))]
4720  "TARGET_P9_VECTOR"
4721  "xvxsig<VSs> %x0,%x1"
4722  [(set_attr "type" "vecsimple")])
4723
4724;; VSX Vector Insert Exponent Double and Single Precision
4725(define_insn "xviexp<VSs>"
4726  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4727	(unspec:VSX_F
4728	 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4729	  (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
4730	 UNSPEC_VSX_VIEXP))]
4731  "TARGET_P9_VECTOR"
4732  "xviexp<VSs> %x0,%x1,%x2"
4733  [(set_attr "type" "vecsimple")])
4734
4735;; VSX Vector Test Data Class Double and Single Precision
4736;; The corresponding elements of the result vector are all ones
4737;; if any of the conditions tested by operand 3 are satisfied.
4738(define_insn "xvtstdc<VSs>"
4739  [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
4740	(unspec:<VSI>
4741	 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4742	  (match_operand:SI 2 "u7bit_cint_operand" "n")]
4743	 UNSPEC_VSX_VTSTDC))]
4744  "TARGET_P9_VECTOR"
4745  "xvtstdc<VSs> %x0,%x1,%2"
4746  [(set_attr "type" "vecsimple")])
4747
4748;; ISA 3.0 String Operations Support
4749
4750;; Compare vectors producing a vector result and a predicate, setting CR6
4751;; to indicate a combined status.  This pattern matches v16qi, v8hi, and
4752;; v4si modes.  It does not match v2df, v4sf, or v2di modes.  There's no
4753;; need to match v4sf, v2df, or v2di modes because those are expanded
4754;; to use Power8 instructions.
4755(define_insn "*vsx_ne_<mode>_p"
4756  [(set (reg:CC CR6_REGNO)
4757	(unspec:CC
4758	 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4759		 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
4760	 UNSPEC_PREDICATE))
4761   (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
4762	(ne:VSX_EXTRACT_I (match_dup 1)
4763			  (match_dup 2)))]
4764  "TARGET_P9_VECTOR"
4765  "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4766  [(set_attr "type" "vecsimple")])
4767
4768(define_insn "*vector_nez_<mode>_p"
4769  [(set (reg:CC CR6_REGNO)
4770	(unspec:CC [(unspec:VI
4771		     [(match_operand:VI 1 "gpc_reg_operand" "v")
4772		      (match_operand:VI 2 "gpc_reg_operand" "v")]
4773		     UNSPEC_NEZ_P)]
4774	 UNSPEC_PREDICATE))
4775   (set (match_operand:VI 0 "gpc_reg_operand" "=v")
4776	(unspec:VI [(match_dup 1)
4777		    (match_dup 2)]
4778	 UNSPEC_NEZ_P))]
4779  "TARGET_P9_VECTOR"
4780  "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4781  [(set_attr "type" "vecsimple")])
4782
4783;; Return first position of match between vectors using natural order
4784;; for both LE and BE execution modes.
4785(define_expand "first_match_index_<mode>"
4786  [(match_operand:SI 0 "register_operand")
4787   (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4788	       (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4789  UNSPEC_VSX_FIRST_MATCH_INDEX)]
4790  "TARGET_P9_VECTOR"
4791{
4792  int sh;
4793
4794  rtx cmp_result = gen_reg_rtx (<MODE>mode);
4795  rtx not_result = gen_reg_rtx (<MODE>mode);
4796
4797  emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4798					     operands[2]));
4799  emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
4800
4801  sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4802
4803  if (<MODE>mode == V16QImode)
4804    {
4805      if (!BYTES_BIG_ENDIAN)
4806        emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
4807      else
4808        emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result));
4809    }
4810  else
4811    {
4812      rtx tmp = gen_reg_rtx (SImode);
4813      if (!BYTES_BIG_ENDIAN)
4814        emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
4815      else
4816        emit_insn (gen_vclzlsbb_<mode> (tmp, not_result));
4817      emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4818    }
4819  DONE;
4820})
4821
4822;; Return first position of match between vectors or end of string (EOS) using
4823;; natural element order for both LE and BE execution modes.
4824(define_expand "first_match_or_eos_index_<mode>"
4825  [(match_operand:SI 0 "register_operand")
4826   (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4827   (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4828  UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
4829  "TARGET_P9_VECTOR"
4830{
4831  int sh;
4832  rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4833  rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4834  rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4835  rtx and_result = gen_reg_rtx (<MODE>mode);
4836  rtx result = gen_reg_rtx (<MODE>mode);
4837  rtx vzero = gen_reg_rtx (<MODE>mode);
4838
4839  /* Vector with zeros in elements that correspond to zeros in operands.  */
4840  emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4841  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4842  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4843  emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4844
4845  /* Vector with ones in elments that do not match.  */
4846  emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4847                                             operands[2]));
4848
4849  /* Create vector with ones in elements where there was a zero in one of
4850     the source elements or the elements that match.  */
4851  emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
4852  sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4853
4854  if (<MODE>mode == V16QImode)
4855    {
4856      if (!BYTES_BIG_ENDIAN)
4857        emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4858      else
4859        emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
4860    }
4861  else
4862    {
4863      rtx tmp = gen_reg_rtx (SImode);
4864      if (!BYTES_BIG_ENDIAN)
4865        emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4866      else
4867        emit_insn (gen_vclzlsbb_<mode> (tmp, result));
4868      emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4869    }
4870  DONE;
4871})
4872
4873;; Return first position of mismatch between vectors using natural
4874;; element order for both LE and BE execution modes.
4875(define_expand "first_mismatch_index_<mode>"
4876  [(match_operand:SI 0 "register_operand")
4877   (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4878   (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4879  UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
4880  "TARGET_P9_VECTOR"
4881{
4882  int sh;
4883  rtx cmp_result = gen_reg_rtx (<MODE>mode);
4884
4885  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4886					    operands[2]));
4887  sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4888
4889  if (<MODE>mode == V16QImode)
4890    {
4891      if (!BYTES_BIG_ENDIAN)
4892        emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
4893      else
4894        emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result));
4895    }
4896  else
4897    {
4898      rtx tmp = gen_reg_rtx (SImode);
4899      if (!BYTES_BIG_ENDIAN)
4900        emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
4901      else
4902        emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result));
4903      emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4904    }
4905  DONE;
4906})
4907
4908;; Return first position of mismatch between vectors or end of string (EOS)
4909;; using natural element order for both LE and BE execution modes.
4910(define_expand "first_mismatch_or_eos_index_<mode>"
4911  [(match_operand:SI 0 "register_operand")
4912   (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4913   (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4914  UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
4915  "TARGET_P9_VECTOR"
4916{
4917  int sh;
4918  rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4919  rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4920  rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4921  rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
4922  rtx and_result = gen_reg_rtx (<MODE>mode);
4923  rtx result = gen_reg_rtx (<MODE>mode);
4924  rtx vzero = gen_reg_rtx (<MODE>mode);
4925
4926  /* Vector with zeros in elements that correspond to zeros in operands.  */
4927  emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4928
4929  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4930  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4931  emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4932
4933  /* Vector with ones in elments that match.  */
4934  emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4935                                             operands[2]));
4936  emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
4937
4938  /* Create vector with ones in elements where there was a zero in one of
4939     the source elements or the elements did not match.  */
4940  emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
4941  sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4942
4943  if (<MODE>mode == V16QImode)
4944    {
4945      if (!BYTES_BIG_ENDIAN)
4946        emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4947      else
4948        emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
4949    }
4950  else
4951    {
4952      rtx tmp = gen_reg_rtx (SImode);
4953      if (!BYTES_BIG_ENDIAN)
4954        emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4955      else
4956        emit_insn (gen_vclzlsbb_<mode> (tmp, result));
4957      emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4958    }
4959  DONE;
4960})
4961
4962;; Load VSX Vector with Length
4963(define_expand "lxvl"
4964  [(set (match_dup 3)
4965        (ashift:DI (match_operand:DI 2 "register_operand")
4966                   (const_int 56)))
4967   (set (match_operand:V16QI 0 "vsx_register_operand")
4968	(unspec:V16QI
4969	 [(match_operand:DI 1 "gpc_reg_operand")
4970          (mem:V16QI (match_dup 1))
4971	  (match_dup 3)]
4972	 UNSPEC_LXVL))]
4973  "TARGET_P9_VECTOR && TARGET_64BIT"
4974{
4975  operands[3] = gen_reg_rtx (DImode);
4976})
4977
4978(define_insn "*lxvl"
4979  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4980	(unspec:V16QI
4981	 [(match_operand:DI 1 "gpc_reg_operand" "b")
4982	  (mem:V16QI (match_dup 1))
4983	  (match_operand:DI 2 "register_operand" "r")]
4984	 UNSPEC_LXVL))]
4985  "TARGET_P9_VECTOR && TARGET_64BIT"
4986  "lxvl %x0,%1,%2"
4987  [(set_attr "type" "vecload")])
4988
4989(define_insn "lxvll"
4990  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4991	(unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
4992                       (mem:V16QI (match_dup 1))
4993		       (match_operand:DI 2 "register_operand" "r")]
4994		      UNSPEC_LXVLL))]
4995  "TARGET_P9_VECTOR"
4996  "lxvll %x0,%1,%2"
4997  [(set_attr "type" "vecload")])
4998
4999;; Expand for builtin xl_len_r
5000(define_expand "xl_len_r"
5001  [(match_operand:V16QI 0 "vsx_register_operand")
5002   (match_operand:DI 1 "register_operand")
5003   (match_operand:DI 2 "register_operand")]
5004  ""
5005{
5006  rtx shift_mask = gen_reg_rtx (V16QImode);
5007  rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5008  rtx tmp = gen_reg_rtx (DImode);
5009
5010  emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
5011  emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5012  emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
5013  emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
5014	     shift_mask));
5015  DONE;
5016})
5017
5018(define_insn "stxvll"
5019  [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5020	(unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5021		       (mem:V16QI (match_dup 1))
5022		       (match_operand:DI 2 "register_operand" "r")]
5023	              UNSPEC_STXVLL))]
5024  "TARGET_P9_VECTOR"
5025  "stxvll %x0,%1,%2"
5026  [(set_attr "type" "vecstore")])
5027
5028;; Store VSX Vector with Length
5029(define_expand "stxvl"
5030  [(set (match_dup 3)
5031	(ashift:DI (match_operand:DI 2 "register_operand")
5032		   (const_int 56)))
5033   (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
5034	(unspec:V16QI
5035	 [(match_operand:V16QI 0 "vsx_register_operand")
5036	  (mem:V16QI (match_dup 1))
5037	  (match_dup 3)]
5038	 UNSPEC_STXVL))]
5039  "TARGET_P9_VECTOR && TARGET_64BIT"
5040{
5041  operands[3] = gen_reg_rtx (DImode);
5042})
5043
5044(define_insn "*stxvl"
5045  [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5046	(unspec:V16QI
5047	 [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5048	  (mem:V16QI (match_dup 1))
5049	  (match_operand:DI 2 "register_operand" "r")]
5050	 UNSPEC_STXVL))]
5051  "TARGET_P9_VECTOR && TARGET_64BIT"
5052  "stxvl %x0,%1,%2"
5053  [(set_attr "type" "vecstore")])
5054
5055;; Expand for builtin xst_len_r
5056(define_expand "xst_len_r"
5057  [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5058   (match_operand:DI 1 "register_operand" "b")
5059   (match_operand:DI 2 "register_operand" "r")]
5060  "UNSPEC_XST_LEN_R"
5061{
5062  rtx shift_mask = gen_reg_rtx (V16QImode);
5063  rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5064  rtx tmp = gen_reg_rtx (DImode);
5065
5066  emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
5067  emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5068	     shift_mask));
5069  emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5070  emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5071  DONE;
5072})
5073
5074;; Vector Compare Not Equal Byte (specified/not+eq:)
5075(define_insn "vcmpneb"
5076  [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5077	 (not:V16QI
5078	   (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5079		     (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5080  "TARGET_P9_VECTOR"
5081  "vcmpneb %0,%1,%2"
5082  [(set_attr "type" "vecsimple")])
5083
5084;; Vector Compare Not Equal or Zero Byte
5085(define_insn "vcmpnezb"
5086  [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5087	(unspec:V16QI
5088	 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5089	  (match_operand:V16QI 2 "altivec_register_operand" "v")]
5090	 UNSPEC_VCMPNEZB))]
5091  "TARGET_P9_VECTOR"
5092  "vcmpnezb %0,%1,%2"
5093  [(set_attr "type" "vecsimple")])
5094
5095;; Vector Compare Not Equal or Zero Byte predicate or record-form
5096(define_insn "vcmpnezb_p"
5097  [(set (reg:CC CR6_REGNO)
5098	(unspec:CC
5099	 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5100	  (match_operand:V16QI 2 "altivec_register_operand" "v")]
5101	 UNSPEC_VCMPNEZB))
5102   (set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5103	(unspec:V16QI
5104	 [(match_dup 1)
5105	  (match_dup 2)]
5106	 UNSPEC_VCMPNEZB))]
5107  "TARGET_P9_VECTOR"
5108  "vcmpnezb. %0,%1,%2"
5109  [(set_attr "type" "vecsimple")])
5110
5111;; Vector Compare Not Equal Half Word (specified/not+eq:)
5112(define_insn "vcmpneh"
5113  [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5114	(not:V8HI
5115	  (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5116		   (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5117  "TARGET_P9_VECTOR"
5118  "vcmpneh %0,%1,%2"
5119  [(set_attr "type" "vecsimple")])
5120
5121;; Vector Compare Not Equal or Zero Half Word
5122(define_insn "vcmpnezh"
5123  [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5124	(unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5125		      (match_operand:V8HI 2 "altivec_register_operand" "v")]
5126	 UNSPEC_VCMPNEZH))]
5127  "TARGET_P9_VECTOR"
5128  "vcmpnezh %0,%1,%2"
5129  [(set_attr "type" "vecsimple")])
5130
5131;; Vector Compare Not Equal Word (specified/not+eq:)
5132(define_insn "vcmpnew"
5133  [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5134	(not:V4SI
5135	  (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5136		   (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5137  "TARGET_P9_VECTOR"
5138  "vcmpnew %0,%1,%2"
5139  [(set_attr "type" "vecsimple")])
5140
5141;; Vector Compare Not Equal or Zero Word
5142(define_insn "vcmpnezw"
5143  [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5144	(unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5145		      (match_operand:V4SI 2 "altivec_register_operand" "v")]
5146	 UNSPEC_VCMPNEZW))]
5147  "TARGET_P9_VECTOR"
5148  "vcmpnezw %0,%1,%2"
5149  [(set_attr "type" "vecsimple")])
5150
5151;; Vector Count Leading Zero Least-Significant Bits Byte
5152(define_insn "vclzlsbb_<mode>"
5153  [(set (match_operand:SI 0 "register_operand" "=r")
5154	(unspec:SI
5155	 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5156	 UNSPEC_VCLZLSBB))]
5157  "TARGET_P9_VECTOR"
5158  "vclzlsbb %0,%1"
5159  [(set_attr "type" "vecsimple")])
5160
5161;; Vector Count Trailing Zero Least-Significant Bits Byte
5162(define_insn "vctzlsbb_<mode>"
5163  [(set (match_operand:SI 0 "register_operand" "=r")
5164	(unspec:SI
5165	 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5166	 UNSPEC_VCTZLSBB))]
5167  "TARGET_P9_VECTOR"
5168  "vctzlsbb %0,%1"
5169  [(set_attr "type" "vecsimple")])
5170
5171;; Vector Extract Unsigned Byte Left-Indexed
5172(define_insn "vextublx"
5173  [(set (match_operand:SI 0 "register_operand" "=r")
5174	(unspec:SI
5175	 [(match_operand:SI 1 "register_operand" "r")
5176	  (match_operand:V16QI 2 "altivec_register_operand" "v")]
5177	 UNSPEC_VEXTUBLX))]
5178  "TARGET_P9_VECTOR"
5179  "vextublx %0,%1,%2"
5180  [(set_attr "type" "vecsimple")])
5181
5182;; Vector Extract Unsigned Byte Right-Indexed
5183(define_insn "vextubrx"
5184  [(set (match_operand:SI 0 "register_operand" "=r")
5185	(unspec:SI
5186	 [(match_operand:SI 1 "register_operand" "r")
5187	  (match_operand:V16QI 2 "altivec_register_operand" "v")]
5188	 UNSPEC_VEXTUBRX))]
5189  "TARGET_P9_VECTOR"
5190  "vextubrx %0,%1,%2"
5191  [(set_attr "type" "vecsimple")])
5192
5193;; Vector Extract Unsigned Half Word Left-Indexed
5194(define_insn "vextuhlx"
5195  [(set (match_operand:SI 0 "register_operand" "=r")
5196	(unspec:SI
5197	 [(match_operand:SI 1 "register_operand" "r")
5198	  (match_operand:V8HI 2 "altivec_register_operand" "v")]
5199	 UNSPEC_VEXTUHLX))]
5200  "TARGET_P9_VECTOR"
5201  "vextuhlx %0,%1,%2"
5202  [(set_attr "type" "vecsimple")])
5203
5204;; Vector Extract Unsigned Half Word Right-Indexed
5205(define_insn "vextuhrx"
5206  [(set (match_operand:SI 0 "register_operand" "=r")
5207	(unspec:SI
5208	 [(match_operand:SI 1 "register_operand" "r")
5209	  (match_operand:V8HI 2 "altivec_register_operand" "v")]
5210	 UNSPEC_VEXTUHRX))]
5211  "TARGET_P9_VECTOR"
5212  "vextuhrx %0,%1,%2"
5213  [(set_attr "type" "vecsimple")])
5214
5215;; Vector Extract Unsigned Word Left-Indexed
5216(define_insn "vextuwlx"
5217  [(set (match_operand:SI 0 "register_operand" "=r")
5218	(unspec:SI
5219	 [(match_operand:SI 1 "register_operand" "r")
5220	  (match_operand:V4SI 2 "altivec_register_operand" "v")]
5221	 UNSPEC_VEXTUWLX))]
5222  "TARGET_P9_VECTOR"
5223  "vextuwlx %0,%1,%2"
5224  [(set_attr "type" "vecsimple")])
5225
5226;; Vector Extract Unsigned Word Right-Indexed
5227(define_insn "vextuwrx"
5228  [(set (match_operand:SI 0 "register_operand" "=r")
5229	(unspec:SI
5230	 [(match_operand:SI 1 "register_operand" "r")
5231	  (match_operand:V4SI 2 "altivec_register_operand" "v")]
5232	 UNSPEC_VEXTUWRX))]
5233  "TARGET_P9_VECTOR"
5234  "vextuwrx %0,%1,%2"
5235  [(set_attr "type" "vecsimple")])
5236
5237;; Vector insert/extract word at arbitrary byte values.  Note, the little
5238;; endian version needs to adjust the byte number, and the V4SI element in
5239;; vinsert4b.
5240(define_insn "extract4b"
5241  [(set (match_operand:V2DI 0 "vsx_register_operand")
5242       (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5243                     (match_operand:QI 2 "const_0_to_12_operand" "n")]
5244                    UNSPEC_XXEXTRACTUW))]
5245  "TARGET_P9_VECTOR"
5246{
5247  if (!BYTES_BIG_ENDIAN)
5248    operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5249
5250  return "xxextractuw %x0,%x1,%2";
5251})
5252
5253(define_expand "insert4b"
5254  [(set (match_operand:V16QI 0 "vsx_register_operand")
5255	(unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5256		       (match_operand:V16QI 2 "vsx_register_operand")
5257		       (match_operand:QI 3 "const_0_to_12_operand")]
5258		   UNSPEC_XXINSERTW))]
5259  "TARGET_P9_VECTOR"
5260{
5261  if (!BYTES_BIG_ENDIAN)
5262    {
5263      rtx op1 = operands[1];
5264      rtx v4si_tmp = gen_reg_rtx (V4SImode);
5265      emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5266      operands[1] = v4si_tmp;
5267      operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5268    }
5269})
5270
5271(define_insn "*insert4b_internal"
5272  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5273	(unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5274		       (match_operand:V16QI 2 "vsx_register_operand" "0")
5275		       (match_operand:QI 3 "const_0_to_12_operand" "n")]
5276		   UNSPEC_XXINSERTW))]
5277  "TARGET_P9_VECTOR"
5278  "xxinsertw %x0,%x1,%3"
5279  [(set_attr "type" "vecperm")])
5280
5281
5282;; Generate vector extract four float 32 values from left four elements
5283;; of eight element vector of float 16 values.
5284(define_expand "vextract_fp_from_shorth"
5285  [(set (match_operand:V4SF 0 "register_operand" "=wa")
5286	(unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5287   UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5288  "TARGET_P9_VECTOR"
5289{
5290  int i;
5291  int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5292  int vals_be[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5293
5294  rtx rvals[16];
5295  rtx mask = gen_reg_rtx (V16QImode);
5296  rtx tmp = gen_reg_rtx (V16QImode);
5297  rtvec v;
5298
5299  for (i = 0; i < 16; i++)
5300    if (!BYTES_BIG_ENDIAN)
5301      rvals[i] = GEN_INT (vals_le[i]);
5302    else
5303      rvals[i] = GEN_INT (vals_be[i]);
5304
5305  /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5306     inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5307     src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the
5308     conversion instruction.  */
5309  v = gen_rtvec_v (16, rvals);
5310  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5311  emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5312					  operands[1], mask));
5313  emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5314  DONE;
5315})
5316
5317;; Generate vector extract four float 32 values from right four elements
5318;; of eight element vector of float 16 values.
5319(define_expand "vextract_fp_from_shortl"
5320  [(set (match_operand:V4SF 0 "register_operand" "=wa")
5321	(unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5322	UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5323  "TARGET_P9_VECTOR"
5324{
5325  int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5326  int vals_be[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5327
5328  int i;
5329  rtx rvals[16];
5330  rtx mask = gen_reg_rtx (V16QImode);
5331  rtx tmp = gen_reg_rtx (V16QImode);
5332  rtvec v;
5333
5334  for (i = 0; i < 16; i++)
5335    if (!BYTES_BIG_ENDIAN)
5336      rvals[i] = GEN_INT (vals_le[i]);
5337    else
5338      rvals[i] = GEN_INT (vals_be[i]);
5339
5340  /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5341     inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5342     src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the
5343     conversion instruction.  */
5344  v = gen_rtvec_v (16, rvals);
5345  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5346  emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5347					  operands[1], mask));
5348  emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5349  DONE;
5350})
5351
5352;; Support for ISA 3.0 vector byte reverse
5353
5354;; Swap all bytes with in a vector
5355(define_insn "p9_xxbrq_v1ti"
5356  [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5357	(bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5358  "TARGET_P9_VECTOR"
5359  "xxbrq %x0,%x1"
5360  [(set_attr "type" "vecperm")])
5361
5362(define_expand "p9_xxbrq_v16qi"
5363  [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5364   (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5365  "TARGET_P9_VECTOR"
5366{
5367  rtx op0 = gen_reg_rtx (V1TImode);
5368  rtx op1 = gen_lowpart (V1TImode, operands[1]);
5369  emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5370  emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5371  DONE;
5372})
5373
5374;; Swap all bytes in each 64-bit element
5375(define_insn "p9_xxbrd_v2di"
5376  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5377	(bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5378  "TARGET_P9_VECTOR"
5379  "xxbrd %x0,%x1"
5380  [(set_attr "type" "vecperm")])
5381
5382(define_expand "p9_xxbrd_v2df"
5383  [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5384   (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5385  "TARGET_P9_VECTOR"
5386{
5387  rtx op0 = gen_reg_rtx (V2DImode);
5388  rtx op1 = gen_lowpart (V2DImode, operands[1]);
5389  emit_insn (gen_p9_xxbrd_v2di (op0, op1));
5390  emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
5391  DONE;
5392})
5393
5394;; Swap all bytes in each 32-bit element
5395(define_insn "p9_xxbrw_v4si"
5396  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
5397	(bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
5398  "TARGET_P9_VECTOR"
5399  "xxbrw %x0,%x1"
5400  [(set_attr "type" "vecperm")])
5401
5402(define_expand "p9_xxbrw_v4sf"
5403  [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
5404   (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
5405  "TARGET_P9_VECTOR"
5406{
5407  rtx op0 = gen_reg_rtx (V4SImode);
5408  rtx op1 = gen_lowpart (V4SImode, operands[1]);
5409  emit_insn (gen_p9_xxbrw_v4si (op0, op1));
5410  emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
5411  DONE;
5412})
5413
5414;; Swap all bytes in each element of vector
5415(define_expand "revb_<mode>"
5416  [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
5417   (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
5418  ""
5419{
5420  if (TARGET_P9_VECTOR)
5421    emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5422  else
5423    {
5424      /* Want to have the elements in reverse order relative
5425	 to the endian mode in use, i.e. in LE mode, put elements
5426	 in BE order.  */
5427      rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5428      emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5429					   operands[1], sel));
5430    }
5431
5432  DONE;
5433})
5434
5435;; Reversing bytes in vector char is just a NOP.
5436(define_expand "revb_v16qi"
5437  [(set (match_operand:V16QI 0 "vsx_register_operand")
5438	(bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5439  ""
5440{
5441  emit_move_insn (operands[0], operands[1]);
5442  DONE;
5443})
5444
5445;; Swap all bytes in each 16-bit element
5446(define_insn "p9_xxbrh_v8hi"
5447  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5448	(bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5449  "TARGET_P9_VECTOR"
5450  "xxbrh %x0,%x1"
5451  [(set_attr "type" "vecperm")])
5452
5453
5454;; Operand numbers for the following peephole2
5455(define_constants
5456  [(SFBOOL_TMP_GPR		 0)		;; GPR temporary
5457   (SFBOOL_TMP_VSX		 1)		;; vector temporary
5458   (SFBOOL_MFVSR_D		 2)		;; move to gpr dest
5459   (SFBOOL_MFVSR_A		 3)		;; move to gpr src
5460   (SFBOOL_BOOL_D		 4)		;; and/ior/xor dest
5461   (SFBOOL_BOOL_A1		 5)		;; and/ior/xor arg1
5462   (SFBOOL_BOOL_A2		 6)		;; and/ior/xor arg1
5463   (SFBOOL_SHL_D		 7)		;; shift left dest
5464   (SFBOOL_SHL_A		 8)		;; shift left arg
5465   (SFBOOL_MTVSR_D		 9)		;; move to vecter dest
5466   (SFBOOL_MFVSR_A_V4SF		10)		;; SFBOOL_MFVSR_A as V4SFmode
5467   (SFBOOL_BOOL_A_DI		11)		;; SFBOOL_BOOL_A1/A2 as DImode
5468   (SFBOOL_TMP_VSX_DI		12)		;; SFBOOL_TMP_VSX as DImode
5469   (SFBOOL_MTVSR_D_V4SF		13)])		;; SFBOOL_MTVSRD_D as V4SFmode
5470
5471;; Attempt to optimize some common GLIBC operations using logical operations to
5472;; pick apart SFmode operations.  For example, there is code from e_powf.c
5473;; after macro expansion that looks like:
5474;;
5475;;	typedef union {
5476;;	  float value;
5477;;	  uint32_t word;
5478;;	} ieee_float_shape_type;
5479;;
5480;;	float t1;
5481;;	int32_t is;
5482;;
5483;;	do {
5484;;	  ieee_float_shape_type gf_u;
5485;;	  gf_u.value = (t1);
5486;;	  (is) = gf_u.word;
5487;;	} while (0);
5488;;
5489;;	do {
5490;;	  ieee_float_shape_type sf_u;
5491;;	  sf_u.word = (is & 0xfffff000);
5492;;	  (t1) = sf_u.value;
5493;;	} while (0);
5494;;
5495;;
5496;; This would result in two direct move operations (convert to memory format,
5497;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5498;; scalar format).  With this peephole, we eliminate the direct move to the
5499;; GPR, and instead move the integer mask value to the vector register after a
5500;; shift and do the VSX logical operation.
5501
5502;; The insns for dealing with SFmode in GPR registers looks like:
5503;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5504;;
5505;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5506;;
5507;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
5508;;
5509;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
5510;;
5511;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
5512;;
5513;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
5514
5515(define_peephole2
5516  [(match_scratch:DI SFBOOL_TMP_GPR "r")
5517   (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5518
5519   ;; MFVSRWZ (aka zero_extend)
5520   (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
5521	(zero_extend:DI
5522	 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
5523
5524   ;; AND/IOR/XOR operation on int
5525   (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5526	(and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5527			(match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5528
5529   ;; SLDI
5530   (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5531	(ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5532		   (const_int 32)))
5533
5534   ;; MTVSRD
5535   (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5536	(unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5537
5538  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5539   /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5540      to compare registers, when the mode is different.  */
5541   && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5542   && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5543   && REG_P (operands[SFBOOL_SHL_A])   && REG_P (operands[SFBOOL_MTVSR_D])
5544   && (REG_P (operands[SFBOOL_BOOL_A2])
5545       || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5546   && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
5547       || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
5548   && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5549       || (REG_P (operands[SFBOOL_BOOL_A2])
5550	   && REGNO (operands[SFBOOL_MFVSR_D])
5551		== REGNO (operands[SFBOOL_BOOL_A2])))
5552   && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5553   && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
5554       || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5555   && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
5556  [(set (match_dup SFBOOL_TMP_GPR)
5557	(ashift:DI (match_dup SFBOOL_BOOL_A_DI)
5558		   (const_int 32)))
5559
5560   (set (match_dup SFBOOL_TMP_VSX_DI)
5561	(match_dup SFBOOL_TMP_GPR))
5562
5563   (set (match_dup SFBOOL_MTVSR_D_V4SF)
5564	(and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
5565			  (match_dup SFBOOL_TMP_VSX)))]
5566{
5567  rtx bool_a1 = operands[SFBOOL_BOOL_A1];
5568  rtx bool_a2 = operands[SFBOOL_BOOL_A2];
5569  int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
5570  int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
5571  int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
5572  int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
5573
5574  if (CONST_INT_P (bool_a2))
5575    {
5576      rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
5577      emit_move_insn (tmp_gpr, bool_a2);
5578      operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
5579    }
5580  else
5581    {
5582      int regno_bool_a1 = REGNO (bool_a1);
5583      int regno_bool_a2 = REGNO (bool_a2);
5584      int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
5585			  ? regno_bool_a2 : regno_bool_a1);
5586      operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
5587    }
5588
5589  operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
5590  operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
5591  operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
5592})
5593