xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/rs6000/vsx.md (revision 924795e69c8bb3f17afd8fcbb799710cc1719dc4)
1;; VSX patterns.
2;; Copyright (C) 2009-2020 Free Software Foundation, Inc.
3;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
4
5;; This file is part of GCC.
6
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published
9;; by the Free Software Foundation; either version 3, or (at your
10;; option) any later version.
11
12;; GCC is distributed in the hope that it will be useful, but WITHOUT
13;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15;; License for more details.
16
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21;; Iterator for comparison types
22(define_code_iterator CMP_TEST [eq lt gt unordered])
23
24;; Mode attribute for vector floate and floato conversions
25(define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
26
27;; Iterator for both scalar and vector floating point types supported by VSX
28(define_mode_iterator VSX_B [DF V4SF V2DF])
29
30;; Iterator for the 2 64-bit vector types
31(define_mode_iterator VSX_D [V2DF V2DI])
32
33;; Mode iterator to handle swapping words on little endian for the 128-bit
34;; types that goes in a single vector register.
35(define_mode_iterator VSX_LE_128 [(KF   "FLOAT128_VECTOR_P (KFmode)")
36				  (TF   "FLOAT128_VECTOR_P (TFmode)")
37				  TI
38				  V1TI])
39
40;; Iterator for 128-bit integer types that go in a single vector register.
41(define_mode_iterator VSX_TI [TI V1TI])
42
43;; Iterator for the 2 32-bit vector types
44(define_mode_iterator VSX_W [V4SF V4SI])
45
46;; Iterator for the DF types
47(define_mode_iterator VSX_DF [V2DF DF])
48
49;; Iterator for vector floating point types supported by VSX
50(define_mode_iterator VSX_F [V4SF V2DF])
51
52;; Iterator for logical types supported by VSX
53(define_mode_iterator VSX_L [V16QI
54			     V8HI
55			     V4SI
56			     V2DI
57			     V4SF
58			     V2DF
59			     V1TI
60			     TI
61			     (KF	"FLOAT128_VECTOR_P (KFmode)")
62			     (TF	"FLOAT128_VECTOR_P (TFmode)")])
63
64;; Iterator for memory moves.
65(define_mode_iterator VSX_M [V16QI
66			     V8HI
67			     V4SI
68			     V2DI
69			     V4SF
70			     V2DF
71			     V1TI
72			     (KF	"FLOAT128_VECTOR_P (KFmode)")
73			     (TF	"FLOAT128_VECTOR_P (TFmode)")
74			     TI])
75
76(define_mode_attr VSX_XXBR  [(V8HI  "h")
77			     (V4SI  "w")
78			     (V4SF  "w")
79			     (V2DF  "d")
80			     (V2DI  "d")
81			     (V1TI  "q")])
82
83;; Map into the appropriate load/store name based on the type
84(define_mode_attr VSm  [(V16QI "vw4")
85			(V8HI  "vw4")
86			(V4SI  "vw4")
87			(V4SF  "vw4")
88			(V2DF  "vd2")
89			(V2DI  "vd2")
90			(DF    "d")
91			(TF    "vd2")
92			(KF    "vd2")
93			(V1TI  "vd2")
94			(TI    "vd2")])
95
96;; Map the register class used
97(define_mode_attr VSr	[(V16QI "v")
98			 (V8HI  "v")
99			 (V4SI  "v")
100			 (V4SF  "wa")
101			 (V2DI  "wa")
102			 (V2DF  "wa")
103			 (DI	"wa")
104			 (DF    "wa")
105			 (SF	"wa")
106			 (TF	"wa")
107			 (KF	"wa")
108			 (V1TI  "v")
109			 (TI    "wa")])
110
111;; What value we need in the "isa" field, to make the IEEE QP float work.
112(define_mode_attr VSisa	[(V16QI "*")
113			 (V8HI  "*")
114			 (V4SI  "*")
115			 (V4SF  "*")
116			 (V2DI  "*")
117			 (V2DF  "*")
118			 (DI	"*")
119			 (DF    "*")
120			 (SF	"*")
121			 (V1TI	"*")
122			 (TI    "*")
123			 (TF	"p9tf")
124			 (KF	"p9kf")])
125
126;; A mode attribute to disparage use of GPR registers, except for scalar
127;; integer modes.
128(define_mode_attr ??r	[(V16QI	"??r")
129			 (V8HI	"??r")
130			 (V4SI	"??r")
131			 (V4SF	"??r")
132			 (V2DI	"??r")
133			 (V2DF	"??r")
134			 (V1TI	"??r")
135			 (KF	"??r")
136			 (TF	"??r")
137			 (TI	"r")])
138
139;; A mode attribute used for 128-bit constant values.
140(define_mode_attr nW	[(V16QI	"W")
141			 (V8HI	"W")
142			 (V4SI	"W")
143			 (V4SF	"W")
144			 (V2DI	"W")
145			 (V2DF	"W")
146			 (V1TI	"W")
147			 (KF	"W")
148			 (TF	"W")
149			 (TI	"n")])
150
151;; Same size integer type for floating point data
152(define_mode_attr VSi [(V4SF  "v4si")
153		       (V2DF  "v2di")
154		       (DF    "di")])
155
156(define_mode_attr VSI [(V4SF  "V4SI")
157		       (V2DF  "V2DI")
158		       (DF    "DI")])
159
160;; Word size for same size conversion
161(define_mode_attr VSc [(V4SF "w")
162		       (V2DF "d")
163		       (DF   "d")])
164
165;; Map into either s or v, depending on whether this is a scalar or vector
166;; operation
167(define_mode_attr VSv	[(V16QI "v")
168			 (V8HI  "v")
169			 (V4SI  "v")
170			 (V4SF  "v")
171			 (V2DI  "v")
172			 (V2DF  "v")
173			 (V1TI  "v")
174			 (DF    "s")
175			 (KF	"v")])
176
177;; Appropriate type for add ops (and other simple FP ops)
178(define_mode_attr VStype_simple	[(V2DF "vecdouble")
179				 (V4SF "vecfloat")
180				 (DF   "fp")])
181
182;; Appropriate type for multiply ops
183(define_mode_attr VStype_mul	[(V2DF "vecdouble")
184				 (V4SF "vecfloat")
185				 (DF   "dmul")])
186
187;; Appropriate type for divide ops.
188(define_mode_attr VStype_div	[(V2DF "vecdiv")
189				 (V4SF "vecfdiv")
190				 (DF   "ddiv")])
191
192;; Map the scalar mode for a vector type
193(define_mode_attr VS_scalar [(V1TI	"TI")
194			     (V2DF	"DF")
195			     (V2DI	"DI")
196			     (V4SF	"SF")
197			     (V4SI	"SI")
198			     (V8HI	"HI")
199			     (V16QI	"QI")])
200
201;; Map to a double-sized vector mode
202(define_mode_attr VS_double [(V4SI	"V8SI")
203			     (V4SF	"V8SF")
204			     (V2DI	"V4DI")
205			     (V2DF	"V4DF")
206			     (V1TI	"V2TI")])
207
208;; Iterators for loading constants with xxspltib
209(define_mode_iterator VSINT_84  [V4SI V2DI DI SI])
210(define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
211
212;; Vector reverse byte modes
213(define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
214
215;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
216;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
217;; done on ISA 2.07 and not just ISA 3.0.
218(define_mode_iterator VSX_EXTRACT_I  [V16QI V8HI V4SI])
219(define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
220
221(define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
222		  		     (V8HI "h")
223				     (V4SI "w")])
224
225;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
226;; insert to validate the operand number.
227(define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
228					 (V8HI  "const_0_to_7_operand")
229					 (V4SI  "const_0_to_3_operand")])
230
231;; Mode attribute to give the constraint for vector extract and insert
232;; operations.
233(define_mode_attr VSX_EX [(V16QI "v")
234			  (V8HI  "v")
235			  (V4SI  "wa")])
236
237;; Mode iterator for binary floating types other than double to
238;; optimize convert to that floating point type from an extract
239;; of an integer type
240(define_mode_iterator VSX_EXTRACT_FL [SF
241				      (IF "FLOAT128_2REG_P (IFmode)")
242				      (KF "TARGET_FLOAT128_HW")
243				      (TF "FLOAT128_2REG_P (TFmode)
244					   || (FLOAT128_IEEE_P (TFmode)
245					       && TARGET_FLOAT128_HW)")])
246
247;; Mode iterator for binary floating types that have a direct conversion
248;; from 64-bit integer to floating point
249(define_mode_iterator FL_CONV [SF
250			       DF
251			       (KF "TARGET_FLOAT128_HW")
252			       (TF "TARGET_FLOAT128_HW
253				    && FLOAT128_IEEE_P (TFmode)")])
254
255;; Iterator for the 2 short vector types to do a splat from an integer
256(define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
257
258;; Mode attribute to give the count for the splat instruction to splat
259;; the value in the 64-bit integer slot
260(define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
261
262;; Mode attribute to give the suffix for the splat instruction
263(define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
264
265;; Constants for creating unspecs
266(define_c_enum "unspec"
267  [UNSPEC_VSX_CONCAT
268   UNSPEC_VSX_CVDPSXWS
269   UNSPEC_VSX_CVDPUXWS
270   UNSPEC_VSX_CVSPDP
271   UNSPEC_VSX_CVHPSP
272   UNSPEC_VSX_CVSPDPN
273   UNSPEC_VSX_CVDPSPN
274   UNSPEC_VSX_CVSXWDP
275   UNSPEC_VSX_CVUXWDP
276   UNSPEC_VSX_CVSXDSP
277   UNSPEC_VSX_CVUXDSP
278   UNSPEC_VSX_FLOAT2
279   UNSPEC_VSX_UNS_FLOAT2
280   UNSPEC_VSX_FLOATE
281   UNSPEC_VSX_UNS_FLOATE
282   UNSPEC_VSX_FLOATO
283   UNSPEC_VSX_UNS_FLOATO
284   UNSPEC_VSX_TDIV
285   UNSPEC_VSX_TSQRT
286   UNSPEC_VSX_SET
287   UNSPEC_VSX_ROUND_I
288   UNSPEC_VSX_ROUND_IC
289   UNSPEC_VSX_SLDWI
290   UNSPEC_VSX_XXPERM
291
292   UNSPEC_VSX_XXSPLTW
293   UNSPEC_VSX_XXSPLTD
294   UNSPEC_VSX_DIVSD
295   UNSPEC_VSX_DIVUD
296   UNSPEC_VSX_MULSD
297   UNSPEC_VSX_SIGN_EXTEND
298   UNSPEC_VSX_XVCVBF16SPN
299   UNSPEC_VSX_XVCVSPBF16
300   UNSPEC_VSX_XVCVSPSXDS
301   UNSPEC_VSX_XVCVSPHP
302   UNSPEC_VSX_VSLO
303   UNSPEC_VSX_EXTRACT
304   UNSPEC_VSX_SXEXPDP
305   UNSPEC_VSX_SXSIG
306   UNSPEC_VSX_SIEXPDP
307   UNSPEC_VSX_SIEXPQP
308   UNSPEC_VSX_SCMPEXPDP
309   UNSPEC_VSX_SCMPEXPQP
310   UNSPEC_VSX_STSTDC
311   UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
312   UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
313   UNSPEC_VSX_VXEXP
314   UNSPEC_VSX_VXSIG
315   UNSPEC_VSX_VIEXP
316   UNSPEC_VSX_VTSTDC
317   UNSPEC_VSX_VSIGNED2
318
319   UNSPEC_LXVL
320   UNSPEC_LXVLL
321   UNSPEC_LVSL_REG
322   UNSPEC_LVSR_REG
323   UNSPEC_STXVL
324   UNSPEC_STXVLL
325   UNSPEC_XL_LEN_R
326   UNSPEC_XST_LEN_R
327
328   UNSPEC_VCLZLSBB
329   UNSPEC_VCTZLSBB
330   UNSPEC_VEXTUBLX
331   UNSPEC_VEXTUHLX
332   UNSPEC_VEXTUWLX
333   UNSPEC_VEXTUBRX
334   UNSPEC_VEXTUHRX
335   UNSPEC_VEXTUWRX
336   UNSPEC_VCMPNEB
337   UNSPEC_VCMPNEZB
338   UNSPEC_VCMPNEH
339   UNSPEC_VCMPNEZH
340   UNSPEC_VCMPNEW
341   UNSPEC_VCMPNEZW
342   UNSPEC_XXEXTRACTUW
343   UNSPEC_XXINSERTW
344   UNSPEC_VSX_FIRST_MATCH_INDEX
345   UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
346   UNSPEC_VSX_FIRST_MISMATCH_INDEX
347   UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
348  ])
349
350(define_int_iterator XVCVBF16	[UNSPEC_VSX_XVCVSPBF16
351				 UNSPEC_VSX_XVCVBF16SPN])
352
353(define_int_attr xvcvbf16       [(UNSPEC_VSX_XVCVSPBF16 "xvcvspbf16")
354				 (UNSPEC_VSX_XVCVBF16SPN "xvcvbf16spn")])
355
356;; VSX moves
357
358;; The patterns for LE permuted loads and stores come before the general
359;; VSX moves so they match first.
360(define_insn_and_split "*vsx_le_perm_load_<mode>"
361  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
362        (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
363  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
364  "#"
365  "&& 1"
366  [(set (match_dup 2)
367        (vec_select:<MODE>
368          (match_dup 1)
369          (parallel [(const_int 1) (const_int 0)])))
370   (set (match_dup 0)
371        (vec_select:<MODE>
372          (match_dup 2)
373          (parallel [(const_int 1) (const_int 0)])))]
374{
375  rtx mem = operands[1];
376
377  /* Don't apply the swap optimization if we've already performed register
378     allocation and the hard register destination is not in the altivec
379     range.  */
380  if ((MEM_ALIGN (mem) >= 128)
381      && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[0]))
382	  || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
383    {
384      rtx mem_address = XEXP (mem, 0);
385      enum machine_mode mode = GET_MODE (mem);
386
387      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
388        {
389	  /* Replace the source memory address with masked address.  */
390          rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
391	  emit_insn (lvx_set_expr);
392	  DONE;
393        }
394      else if (rs6000_quadword_masked_address_p (mem_address))
395        {
396	  /* This rtl is already in the form that matches lvx
397	     instruction, so leave it alone.  */
398	  DONE;
399        }
400      /* Otherwise, fall through to transform into a swapping load.  */
401    }
402  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
403                                       : operands[0];
404}
405  [(set_attr "type" "vecload")
406   (set_attr "length" "8")])
407
408(define_insn_and_split "*vsx_le_perm_load_<mode>"
409  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
410        (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
411  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
412  "#"
413  "&& 1"
414  [(set (match_dup 2)
415        (vec_select:<MODE>
416          (match_dup 1)
417          (parallel [(const_int 2) (const_int 3)
418                     (const_int 0) (const_int 1)])))
419   (set (match_dup 0)
420        (vec_select:<MODE>
421          (match_dup 2)
422          (parallel [(const_int 2) (const_int 3)
423                     (const_int 0) (const_int 1)])))]
424{
425  rtx mem = operands[1];
426
427  /* Don't apply the swap optimization if we've already performed register
428     allocation and the hard register destination is not in the altivec
429     range.  */
430  if ((MEM_ALIGN (mem) >= 128)
431      && (!HARD_REGISTER_P (operands[0])
432	  || ALTIVEC_REGNO_P (REGNO(operands[0]))))
433    {
434      rtx mem_address = XEXP (mem, 0);
435      enum machine_mode mode = GET_MODE (mem);
436
437      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
438        {
439	  /* Replace the source memory address with masked address.  */
440          rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
441	  emit_insn (lvx_set_expr);
442	  DONE;
443        }
444      else if (rs6000_quadword_masked_address_p (mem_address))
445        {
446	  /* This rtl is already in the form that matches lvx
447	     instruction, so leave it alone.  */
448	  DONE;
449        }
450      /* Otherwise, fall through to transform into a swapping load.  */
451    }
452  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
453                                       : operands[0];
454}
455  [(set_attr "type" "vecload")
456   (set_attr "length" "8")])
457
458(define_insn_and_split "*vsx_le_perm_load_v8hi"
459  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
460        (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
461  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
462  "#"
463  "&& 1"
464  [(set (match_dup 2)
465        (vec_select:V8HI
466          (match_dup 1)
467          (parallel [(const_int 4) (const_int 5)
468                     (const_int 6) (const_int 7)
469                     (const_int 0) (const_int 1)
470                     (const_int 2) (const_int 3)])))
471   (set (match_dup 0)
472        (vec_select:V8HI
473          (match_dup 2)
474          (parallel [(const_int 4) (const_int 5)
475                     (const_int 6) (const_int 7)
476                     (const_int 0) (const_int 1)
477                     (const_int 2) (const_int 3)])))]
478{
479  rtx mem = operands[1];
480
481  /* Don't apply the swap optimization if we've already performed register
482     allocation and the hard register destination is not in the altivec
483     range.  */
484  if ((MEM_ALIGN (mem) >= 128)
485      && (!HARD_REGISTER_P (operands[0])
486	  || ALTIVEC_REGNO_P (REGNO(operands[0]))))
487    {
488      rtx mem_address = XEXP (mem, 0);
489      enum machine_mode mode = GET_MODE (mem);
490
491      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
492        {
493	  /* Replace the source memory address with masked address.  */
494	  rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
495	  emit_insn (lvx_set_expr);
496	  DONE;
497        }
498      else if (rs6000_quadword_masked_address_p (mem_address))
499        {
500	  /* This rtl is already in the form that matches lvx
501	     instruction, so leave it alone.  */
502	  DONE;
503        }
504      /* Otherwise, fall through to transform into a swapping load.  */
505    }
506  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
507                                       : operands[0];
508}
509  [(set_attr "type" "vecload")
510   (set_attr "length" "8")])
511
512(define_insn_and_split "*vsx_le_perm_load_v16qi"
513  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
514        (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
515  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
516  "#"
517  "&& 1"
518  [(set (match_dup 2)
519        (vec_select:V16QI
520          (match_dup 1)
521          (parallel [(const_int 8) (const_int 9)
522                     (const_int 10) (const_int 11)
523                     (const_int 12) (const_int 13)
524                     (const_int 14) (const_int 15)
525                     (const_int 0) (const_int 1)
526                     (const_int 2) (const_int 3)
527                     (const_int 4) (const_int 5)
528                     (const_int 6) (const_int 7)])))
529   (set (match_dup 0)
530        (vec_select:V16QI
531          (match_dup 2)
532          (parallel [(const_int 8) (const_int 9)
533                     (const_int 10) (const_int 11)
534                     (const_int 12) (const_int 13)
535                     (const_int 14) (const_int 15)
536                     (const_int 0) (const_int 1)
537                     (const_int 2) (const_int 3)
538                     (const_int 4) (const_int 5)
539                     (const_int 6) (const_int 7)])))]
540{
541  rtx mem = operands[1];
542
543  /* Don't apply the swap optimization if we've already performed register
544     allocation and the hard register destination is not in the altivec
545     range.  */
546  if ((MEM_ALIGN (mem) >= 128)
547      && (!HARD_REGISTER_P (operands[0])
548	  || ALTIVEC_REGNO_P (REGNO(operands[0]))))
549    {
550      rtx mem_address = XEXP (mem, 0);
551      enum machine_mode mode = GET_MODE (mem);
552
553      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
554        {
555	  /* Replace the source memory address with masked address.  */
556	  rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
557	  emit_insn (lvx_set_expr);
558	  DONE;
559        }
560      else if (rs6000_quadword_masked_address_p (mem_address))
561        {
562	  /* This rtl is already in the form that matches lvx
563	     instruction, so leave it alone.  */
564	  DONE;
565        }
566      /* Otherwise, fall through to transform into a swapping load.  */
567    }
568  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
569                                       : operands[0];
570}
571  [(set_attr "type" "vecload")
572   (set_attr "length" "8")])
573
574(define_insn "*vsx_le_perm_store_<mode>"
575  [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
576        (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
577  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
578  "#"
579  [(set_attr "type" "vecstore")
580   (set_attr "length" "12")])
581
582(define_split
583  [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
584        (match_operand:VSX_D 1 "vsx_register_operand"))]
585  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
586  [(set (match_dup 2)
587        (vec_select:<MODE>
588          (match_dup 1)
589          (parallel [(const_int 1) (const_int 0)])))
590   (set (match_dup 0)
591        (vec_select:<MODE>
592          (match_dup 2)
593          (parallel [(const_int 1) (const_int 0)])))]
594{
595  rtx mem = operands[0];
596
597  /* Don't apply the swap optimization if we've already performed register
598     allocation and the hard register source is not in the altivec range.  */
599  if ((MEM_ALIGN (mem) >= 128)
600      && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
601	  || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
602    {
603      rtx mem_address = XEXP (mem, 0);
604      enum machine_mode mode = GET_MODE (mem);
605      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
606	{
607	  rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
608	  emit_insn (stvx_set_expr);
609	  DONE;
610	}
611      else if (rs6000_quadword_masked_address_p (mem_address))
612	{
613	  /* This rtl is already in the form that matches stvx instruction,
614	     so leave it alone.  */
615	  DONE;
616	}
617      /* Otherwise, fall through to transform into a swapping store.  */
618    }
619
620  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
621                                       : operands[1];
622})
623
624;; The post-reload split requires that we re-permute the source
625;; register in case it is still live.
626(define_split
627  [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
628        (match_operand:VSX_D 1 "vsx_register_operand"))]
629  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
630  [(set (match_dup 1)
631        (vec_select:<MODE>
632          (match_dup 1)
633          (parallel [(const_int 1) (const_int 0)])))
634   (set (match_dup 0)
635        (vec_select:<MODE>
636          (match_dup 1)
637          (parallel [(const_int 1) (const_int 0)])))
638   (set (match_dup 1)
639        (vec_select:<MODE>
640          (match_dup 1)
641          (parallel [(const_int 1) (const_int 0)])))]
642  "")
643
644(define_insn "*vsx_le_perm_store_<mode>"
645  [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
646        (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
647  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
648  "#"
649  [(set_attr "type" "vecstore")
650   (set_attr "length" "12")])
651
652(define_split
653  [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
654        (match_operand:VSX_W 1 "vsx_register_operand"))]
655  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
656  [(set (match_dup 2)
657        (vec_select:<MODE>
658          (match_dup 1)
659          (parallel [(const_int 2) (const_int 3)
660	             (const_int 0) (const_int 1)])))
661   (set (match_dup 0)
662        (vec_select:<MODE>
663          (match_dup 2)
664          (parallel [(const_int 2) (const_int 3)
665	             (const_int 0) (const_int 1)])))]
666{
667  rtx mem = operands[0];
668
669  /* Don't apply the swap optimization if we've already performed register
670     allocation and the hard register source is not in the altivec range.  */
671  if ((MEM_ALIGN (mem) >= 128)
672      && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
673	  || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
674    {
675      rtx mem_address = XEXP (mem, 0);
676      enum machine_mode mode = GET_MODE (mem);
677      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
678	{
679	  rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
680	  emit_insn (stvx_set_expr);
681	  DONE;
682	}
683      else if (rs6000_quadword_masked_address_p (mem_address))
684	{
685	  /* This rtl is already in the form that matches stvx instruction,
686	     so leave it alone.  */
687	  DONE;
688	}
689      /* Otherwise, fall through to transform into a swapping store.  */
690    }
691
692  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
693                                       : operands[1];
694})
695
696;; The post-reload split requires that we re-permute the source
697;; register in case it is still live.
698(define_split
699  [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
700        (match_operand:VSX_W 1 "vsx_register_operand"))]
701  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
702  [(set (match_dup 1)
703        (vec_select:<MODE>
704          (match_dup 1)
705          (parallel [(const_int 2) (const_int 3)
706	             (const_int 0) (const_int 1)])))
707   (set (match_dup 0)
708        (vec_select:<MODE>
709          (match_dup 1)
710          (parallel [(const_int 2) (const_int 3)
711	             (const_int 0) (const_int 1)])))
712   (set (match_dup 1)
713        (vec_select:<MODE>
714          (match_dup 1)
715          (parallel [(const_int 2) (const_int 3)
716	             (const_int 0) (const_int 1)])))]
717  "")
718
719(define_insn "*vsx_le_perm_store_v8hi"
720  [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
721        (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
722  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
723  "#"
724  [(set_attr "type" "vecstore")
725   (set_attr "length" "12")])
726
727(define_split
728  [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
729        (match_operand:V8HI 1 "vsx_register_operand"))]
730  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
731  [(set (match_dup 2)
732        (vec_select:V8HI
733          (match_dup 1)
734          (parallel [(const_int 4) (const_int 5)
735                     (const_int 6) (const_int 7)
736                     (const_int 0) (const_int 1)
737                     (const_int 2) (const_int 3)])))
738   (set (match_dup 0)
739        (vec_select:V8HI
740          (match_dup 2)
741          (parallel [(const_int 4) (const_int 5)
742                     (const_int 6) (const_int 7)
743                     (const_int 0) (const_int 1)
744                     (const_int 2) (const_int 3)])))]
745{
746  rtx mem = operands[0];
747
748  /* Don't apply the swap optimization if we've already performed register
749     allocation and the hard register source is not in the altivec range.  */
750  if ((MEM_ALIGN (mem) >= 128)
751      && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
752	  || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
753    {
754      rtx mem_address = XEXP (mem, 0);
755      enum machine_mode mode = GET_MODE (mem);
756      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
757	{
758	  rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
759	  emit_insn (stvx_set_expr);
760	  DONE;
761	}
762      else if (rs6000_quadword_masked_address_p (mem_address))
763	{
764	  /* This rtl is already in the form that matches stvx instruction,
765	     so leave it alone.  */
766	  DONE;
767	}
768      /* Otherwise, fall through to transform into a swapping store.  */
769    }
770
771  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
772                                       : operands[1];
773})
774
775;; The post-reload split requires that we re-permute the source
776;; register in case it is still live.
777(define_split
778  [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
779        (match_operand:V8HI 1 "vsx_register_operand"))]
780  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
781  [(set (match_dup 1)
782        (vec_select:V8HI
783          (match_dup 1)
784          (parallel [(const_int 4) (const_int 5)
785                     (const_int 6) (const_int 7)
786                     (const_int 0) (const_int 1)
787                     (const_int 2) (const_int 3)])))
788   (set (match_dup 0)
789        (vec_select:V8HI
790          (match_dup 1)
791          (parallel [(const_int 4) (const_int 5)
792                     (const_int 6) (const_int 7)
793                     (const_int 0) (const_int 1)
794                     (const_int 2) (const_int 3)])))
795   (set (match_dup 1)
796        (vec_select:V8HI
797          (match_dup 1)
798          (parallel [(const_int 4) (const_int 5)
799                     (const_int 6) (const_int 7)
800                     (const_int 0) (const_int 1)
801                     (const_int 2) (const_int 3)])))]
802  "")
803
804(define_insn "*vsx_le_perm_store_v16qi"
805  [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
806        (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
807  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
808  "#"
809  [(set_attr "type" "vecstore")
810   (set_attr "length" "12")])
811
812(define_split
813  [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
814        (match_operand:V16QI 1 "vsx_register_operand"))]
815  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
816  [(set (match_dup 2)
817        (vec_select:V16QI
818          (match_dup 1)
819          (parallel [(const_int 8) (const_int 9)
820                     (const_int 10) (const_int 11)
821                     (const_int 12) (const_int 13)
822                     (const_int 14) (const_int 15)
823                     (const_int 0) (const_int 1)
824                     (const_int 2) (const_int 3)
825                     (const_int 4) (const_int 5)
826                     (const_int 6) (const_int 7)])))
827   (set (match_dup 0)
828        (vec_select:V16QI
829          (match_dup 2)
830          (parallel [(const_int 8) (const_int 9)
831                     (const_int 10) (const_int 11)
832                     (const_int 12) (const_int 13)
833                     (const_int 14) (const_int 15)
834                     (const_int 0) (const_int 1)
835                     (const_int 2) (const_int 3)
836                     (const_int 4) (const_int 5)
837                     (const_int 6) (const_int 7)])))]
838{
839  rtx mem = operands[0];
840
841  /* Don't apply the swap optimization if we've already performed register
842     allocation and the hard register source is not in the altivec range.  */
843  if ((MEM_ALIGN (mem) >= 128)
844      && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
845	  || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
846    {
847      rtx mem_address = XEXP (mem, 0);
848      enum machine_mode mode = GET_MODE (mem);
849      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
850	{
851	  rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
852	  emit_insn (stvx_set_expr);
853	  DONE;
854	}
855      else if (rs6000_quadword_masked_address_p (mem_address))
856	{
857	  /* This rtl is already in the form that matches stvx instruction,
858	     so leave it alone.  */
859	  DONE;
860	}
861      /* Otherwise, fall through to transform into a swapping store.  */
862    }
863
864  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
865                                       : operands[1];
866})
867
868;; The post-reload split requires that we re-permute the source
869;; register in case it is still live.
870(define_split
871  [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
872        (match_operand:V16QI 1 "vsx_register_operand"))]
873  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
874  [(set (match_dup 1)
875        (vec_select:V16QI
876          (match_dup 1)
877          (parallel [(const_int 8) (const_int 9)
878                     (const_int 10) (const_int 11)
879                     (const_int 12) (const_int 13)
880                     (const_int 14) (const_int 15)
881                     (const_int 0) (const_int 1)
882                     (const_int 2) (const_int 3)
883                     (const_int 4) (const_int 5)
884                     (const_int 6) (const_int 7)])))
885   (set (match_dup 0)
886        (vec_select:V16QI
887          (match_dup 1)
888          (parallel [(const_int 8) (const_int 9)
889                     (const_int 10) (const_int 11)
890                     (const_int 12) (const_int 13)
891                     (const_int 14) (const_int 15)
892                     (const_int 0) (const_int 1)
893                     (const_int 2) (const_int 3)
894                     (const_int 4) (const_int 5)
895                     (const_int 6) (const_int 7)])))
896   (set (match_dup 1)
897        (vec_select:V16QI
898          (match_dup 1)
899          (parallel [(const_int 8) (const_int 9)
900                     (const_int 10) (const_int 11)
901                     (const_int 12) (const_int 13)
902                     (const_int 14) (const_int 15)
903                     (const_int 0) (const_int 1)
904                     (const_int 2) (const_int 3)
905                     (const_int 4) (const_int 5)
906                     (const_int 6) (const_int 7)])))]
907  "")
908
909;; Little endian word swapping for 128-bit types that are either scalars or the
910;; special V1TI container class, which it is not appropriate to use vec_select
911;; for the type.
912(define_insn "*vsx_le_permute_<mode>"
913  [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q")
914	(rotate:VSX_TI
915	 (match_operand:VSX_TI 1 "input_operand" "wa,Z,wa,r,Q,r")
916	 (const_int 64)))]
917  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
918  "@
919   xxpermdi %x0,%x1,%x1,2
920   lxvd2x %x0,%y1
921   stxvd2x %x1,%y0
922   mr %0,%L1\;mr %L0,%1
923   ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
924   std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
925  [(set_attr "length" "*,*,*,8,8,8")
926   (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
927
928(define_insn_and_split "*vsx_le_undo_permute_<mode>"
929  [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=wa,wa")
930	(rotate:VSX_TI
931	 (rotate:VSX_TI
932	  (match_operand:VSX_TI 1 "vsx_register_operand" "0,wa")
933	  (const_int 64))
934	 (const_int 64)))]
935  "!BYTES_BIG_ENDIAN && TARGET_VSX"
936  "@
937   #
938   xxlor %x0,%x1"
939  ""
940  [(set (match_dup 0) (match_dup 1))]
941{
942  if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
943    {
944      emit_note (NOTE_INSN_DELETED);
945      DONE;
946    }
947}
948  [(set_attr "length" "0,4")
949   (set_attr "type" "veclogical")])
950
951(define_insn_and_split "*vsx_le_perm_load_<mode>"
952  [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=wa,r")
953        (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
954  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
955   && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)"
956  "@
957   #
958   #"
959  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
960   && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)"
961  [(const_int 0)]
962{
963  rtx tmp = (can_create_pseudo_p ()
964	     ? gen_reg_rtx_and_attrs (operands[0])
965	     : operands[0]);
966  rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
967  rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
968  DONE;
969}
970  [(set_attr "type" "vecload,load")
971   (set_attr "length" "8,8")
972   (set_attr "isa" "<VSisa>,*")])
973
974(define_insn "*vsx_le_perm_store_<mode>"
975  [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
976        (match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))]
977  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
978   & !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
979  "@
980   #
981   #"
982  [(set_attr "type" "vecstore,store")
983   (set_attr "length" "12,8")
984   (set_attr "isa" "<VSisa>,*")])
985
986(define_split
987  [(set (match_operand:VSX_LE_128 0 "memory_operand")
988        (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
989  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR
990   && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
991  [(const_int 0)]
992{
993  rtx tmp = (can_create_pseudo_p ()
994	     ? gen_reg_rtx_and_attrs (operands[0])
995	     : operands[0]);
996  rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
997  rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
998  DONE;
999})
1000
1001;; Peepholes to catch loads and stores for TImode if TImode landed in
1002;; GPR registers on a little endian system.
1003(define_peephole2
1004  [(set (match_operand:VSX_TI 0 "int_reg_operand")
1005	(rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1006		       (const_int 64)))
1007   (set (match_operand:VSX_TI 2 "int_reg_operand")
1008	(rotate:VSX_TI (match_dup 0)
1009		       (const_int 64)))]
1010  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1011   && (rtx_equal_p (operands[0], operands[2])
1012       || peep2_reg_dead_p (2, operands[0]))"
1013   [(set (match_dup 2) (match_dup 1))])
1014
1015(define_peephole2
1016  [(set (match_operand:VSX_TI 0 "int_reg_operand")
1017	(rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1018		       (const_int 64)))
1019   (set (match_operand:VSX_TI 2 "memory_operand")
1020	(rotate:VSX_TI (match_dup 0)
1021		       (const_int 64)))]
1022  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1023   && peep2_reg_dead_p (2, operands[0])"
1024   [(set (match_dup 2) (match_dup 1))])
1025
1026;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1027;; VSX registers on a little endian system.  The vector types and IEEE 128-bit
1028;; floating point are handled by the more generic swap elimination pass.
1029(define_peephole2
1030  [(set (match_operand:TI 0 "vsx_register_operand")
1031	(rotate:TI (match_operand:TI 1 "vsx_register_operand")
1032		   (const_int 64)))
1033   (set (match_operand:TI 2 "vsx_register_operand")
1034	(rotate:TI (match_dup 0)
1035		   (const_int 64)))]
1036  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1037   && (rtx_equal_p (operands[0], operands[2])
1038       || peep2_reg_dead_p (2, operands[0]))"
1039   [(set (match_dup 2) (match_dup 1))])
1040
1041;; The post-reload split requires that we re-permute the source
1042;; register in case it is still live.
1043(define_split
1044  [(set (match_operand:VSX_LE_128 0 "memory_operand")
1045        (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1046  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR
1047   && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
1048  [(const_int 0)]
1049{
1050  rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1051  rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1052  rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1053  DONE;
1054})
1055
1056;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1057;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1058(define_insn "xxspltib_v16qi"
1059  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1060	(vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1061  "TARGET_P9_VECTOR"
1062{
1063  operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1064  return "xxspltib %x0,%2";
1065}
1066  [(set_attr "type" "vecperm")])
1067
1068(define_insn "xxspltib_<mode>_nosplit"
1069  [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1070	(match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1071  "TARGET_P9_VECTOR"
1072{
1073  rtx op1 = operands[1];
1074  int value = 256;
1075  int num_insns = -1;
1076
1077  if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1078      || num_insns != 1)
1079    gcc_unreachable ();
1080
1081  operands[2] = GEN_INT (value & 0xff);
1082  return "xxspltib %x0,%2";
1083}
1084  [(set_attr "type" "vecperm")])
1085
1086(define_insn_and_split "*xxspltib_<mode>_split"
1087  [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1088	(match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1089  "TARGET_P9_VECTOR"
1090  "#"
1091  "&& 1"
1092  [(const_int 0)]
1093{
1094  int value = 256;
1095  int num_insns = -1;
1096  rtx op0 = operands[0];
1097  rtx op1 = operands[1];
1098  rtx tmp = ((can_create_pseudo_p ())
1099	     ? gen_reg_rtx (V16QImode)
1100	     : gen_lowpart (V16QImode, op0));
1101
1102  if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1103      || num_insns != 2)
1104    gcc_unreachable ();
1105
1106  emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1107
1108  if (<MODE>mode == V2DImode)
1109    emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
1110
1111  else if (<MODE>mode == V4SImode)
1112    emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1113
1114  else if (<MODE>mode == V8HImode)
1115    emit_insn (gen_altivec_vupkhsb  (op0, tmp));
1116
1117  else
1118    gcc_unreachable ();
1119
1120  DONE;
1121}
1122  [(set_attr "type" "vecperm")
1123   (set_attr "length" "8")])
1124
1125
1126;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
1127;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1128;; all 1's, since the machine does not have to wait for the previous
1129;; instruction using the register being set (such as a store waiting on a slow
1130;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1131
1132;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
1133;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
1134;;              VSX 0/-1   VMX const  GPR const LVX (VMX)  STVX (VMX)
1135(define_insn "vsx_mov<mode>_64bit"
1136  [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1137               "=ZwO,      wa,        wa,        r,         we,        ?wQ,
1138                ?&r,       ??r,       ??Y,       <??r>,     wa,        v,
1139                ?wa,       v,         <??r>,     wZ,        v")
1140
1141	(match_operand:VSX_M 1 "input_operand"
1142               "wa,        ZwO,       wa,        we,        r,         r,
1143                wQ,        Y,         r,         r,         wE,        jwM,
1144                ?jwM,      W,         <nW>,      v,         wZ"))]
1145
1146  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1147   && (register_operand (operands[0], <MODE>mode)
1148       || register_operand (operands[1], <MODE>mode))"
1149{
1150  return rs6000_output_move_128bit (operands);
1151}
1152  [(set_attr "type"
1153               "vecstore,  vecload,   vecsimple, mffgpr,    mftgpr,    load,
1154                store,     load,      store,     *,         vecsimple, vecsimple,
1155                vecsimple, *,         *,         vecstore,  vecload")
1156   (set_attr "num_insns"
1157               "*,         *,         *,         2,         *,         2,
1158                2,         2,         2,         2,         *,         *,
1159                *,         5,         2,         *,         *")
1160   (set_attr "max_prefixed_insns"
1161               "*,         *,         *,         *,         *,         2,
1162                2,         2,         2,         2,         *,         *,
1163                *,         *,         *,         *,         *")
1164   (set_attr "length"
1165               "*,         *,         *,         8,         *,         8,
1166                8,         8,         8,         8,         *,         *,
1167                *,         20,        8,         *,         *")
1168   (set_attr "isa"
1169               "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
1170                *,         *,         *,         *,         p9v,       *,
1171                <VSisa>,   *,         *,         *,         *")])
1172
1173;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
1174;;              XXSPLTIB   VSPLTISW   VSX 0/-1   VMX const  GPR const
1175;;              LVX (VMX)  STVX (VMX)
1176(define_insn "*vsx_mov<mode>_32bit"
1177  [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1178               "=ZwO,      wa,        wa,        ??r,       ??Y,       <??r>,
1179                wa,        v,         ?wa,       v,         <??r>,
1180                wZ,        v")
1181
1182	(match_operand:VSX_M 1 "input_operand"
1183               "wa,        ZwO,       wa,        Y,         r,         r,
1184                wE,        jwM,       ?jwM,      W,         <nW>,
1185                v,         wZ"))]
1186
1187  "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1188   && (register_operand (operands[0], <MODE>mode)
1189       || register_operand (operands[1], <MODE>mode))"
1190{
1191  return rs6000_output_move_128bit (operands);
1192}
1193  [(set_attr "type"
1194               "vecstore,  vecload,   vecsimple, load,      store,    *,
1195                vecsimple, vecsimple, vecsimple, *,         *,
1196                vecstore,  vecload")
1197   (set_attr "length"
1198               "*,         *,         *,         16,        16,        16,
1199                *,         *,         *,         20,        16,
1200                *,         *")
1201   (set_attr "isa"
1202               "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
1203                p9v,       *,         <VSisa>,   *,         *,
1204                *,         *")])
1205
1206;; Explicit  load/store expanders for the builtin functions
1207(define_expand "vsx_load_<mode>"
1208  [(set (match_operand:VSX_M 0 "vsx_register_operand")
1209	(match_operand:VSX_M 1 "memory_operand"))]
1210  "VECTOR_MEM_VSX_P (<MODE>mode)"
1211{
1212  /* Expand to swaps if needed, prior to swap optimization.  */
1213  if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR
1214      && !altivec_indexed_or_indirect_operand(operands[1], <MODE>mode))
1215    {
1216      rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1217      DONE;
1218    }
1219})
1220
1221(define_expand "vsx_store_<mode>"
1222  [(set (match_operand:VSX_M 0 "memory_operand")
1223	(match_operand:VSX_M 1 "vsx_register_operand"))]
1224  "VECTOR_MEM_VSX_P (<MODE>mode)"
1225{
1226  /* Expand to swaps if needed, prior to swap optimization.  */
1227  if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR
1228      && !altivec_indexed_or_indirect_operand(operands[0], <MODE>mode))
1229    {
1230      rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1231      DONE;
1232    }
1233})
1234
1235;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1236;; when you really want their element-reversing behavior.
1237(define_insn "vsx_ld_elemrev_v2di"
1238  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1239        (vec_select:V2DI
1240	  (match_operand:V2DI 1 "memory_operand" "Z")
1241	  (parallel [(const_int 1) (const_int 0)])))]
1242  "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1243  "lxvd2x %x0,%y1"
1244  [(set_attr "type" "vecload")])
1245
1246(define_insn "vsx_ld_elemrev_v1ti"
1247  [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1248        (vec_select:V1TI
1249	  (match_operand:V1TI 1 "memory_operand" "Z")
1250	  (parallel [(const_int 0)])))]
1251  "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1252{
1253   return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1254}
1255  [(set_attr "type" "vecload")])
1256
1257(define_insn "vsx_ld_elemrev_v2df"
1258  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1259        (vec_select:V2DF
1260	  (match_operand:V2DF 1 "memory_operand" "Z")
1261	  (parallel [(const_int 1) (const_int 0)])))]
1262  "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1263  "lxvd2x %x0,%y1"
1264  [(set_attr "type" "vecload")])
1265
1266(define_insn "vsx_ld_elemrev_v4si"
1267  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1268        (vec_select:V4SI
1269	  (match_operand:V4SI 1 "memory_operand" "Z")
1270	  (parallel [(const_int 3) (const_int 2)
1271	             (const_int 1) (const_int 0)])))]
1272  "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1273  "lxvw4x %x0,%y1"
1274  [(set_attr "type" "vecload")])
1275
1276(define_insn "vsx_ld_elemrev_v4sf"
1277  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1278        (vec_select:V4SF
1279	  (match_operand:V4SF 1 "memory_operand" "Z")
1280	  (parallel [(const_int 3) (const_int 2)
1281	             (const_int 1) (const_int 0)])))]
1282  "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1283  "lxvw4x %x0,%y1"
1284  [(set_attr "type" "vecload")])
1285
1286(define_expand "vsx_ld_elemrev_v8hi"
1287  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1288        (vec_select:V8HI
1289	  (match_operand:V8HI 1 "memory_operand" "Z")
1290	  (parallel [(const_int 7) (const_int 6)
1291	             (const_int 5) (const_int 4)
1292		     (const_int 3) (const_int 2)
1293	             (const_int 1) (const_int 0)])))]
1294  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1295{
1296  if (!TARGET_P9_VECTOR)
1297    {
1298      rtx tmp = gen_reg_rtx (V4SImode);
1299      rtx subreg, subreg2, perm[16], pcv;
1300      /* 2 is leftmost element in register */
1301      unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1302      int i;
1303
1304      subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1305      emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1306      subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1307
1308      for (i = 0; i < 16; ++i)
1309      	perm[i] = GEN_INT (reorder[i]);
1310
1311      pcv = force_reg (V16QImode,
1312                       gen_rtx_CONST_VECTOR (V16QImode,
1313                                             gen_rtvec_v (16, perm)));
1314      emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1315                                                subreg2, pcv));
1316      DONE;
1317    }
1318})
1319
1320(define_insn "*vsx_ld_elemrev_v8hi_internal"
1321  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1322        (vec_select:V8HI
1323          (match_operand:V8HI 1 "memory_operand" "Z")
1324          (parallel [(const_int 7) (const_int 6)
1325                     (const_int 5) (const_int 4)
1326                     (const_int 3) (const_int 2)
1327                     (const_int 1) (const_int 0)])))]
1328  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1329  "lxvh8x %x0,%y1"
1330  [(set_attr "type" "vecload")])
1331
1332(define_expand "vsx_ld_elemrev_v16qi"
1333  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1334        (vec_select:V16QI
1335          (match_operand:V16QI 1 "memory_operand" "Z")
1336          (parallel [(const_int 15) (const_int 14)
1337                     (const_int 13) (const_int 12)
1338                     (const_int 11) (const_int 10)
1339                     (const_int  9) (const_int  8)
1340                     (const_int  7) (const_int  6)
1341                     (const_int  5) (const_int  4)
1342                     (const_int  3) (const_int  2)
1343                     (const_int  1) (const_int  0)])))]
1344  "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1345{
1346  if (!TARGET_P9_VECTOR)
1347    {
1348      rtx tmp = gen_reg_rtx (V4SImode);
1349      rtx subreg, subreg2, perm[16], pcv;
1350      /* 3 is leftmost element in register */
1351      unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1352      int i;
1353
1354      subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1355      emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1356      subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1357
1358      for (i = 0; i < 16; ++i)
1359        perm[i] = GEN_INT (reorder[i]);
1360
1361      pcv = force_reg (V16QImode,
1362                       gen_rtx_CONST_VECTOR (V16QImode,
1363                                             gen_rtvec_v (16, perm)));
1364      emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1365                                                 subreg2, pcv));
1366      DONE;
1367    }
1368})
1369
1370(define_insn "vsx_ld_elemrev_v16qi_internal"
1371  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1372        (vec_select:V16QI
1373          (match_operand:V16QI 1 "memory_operand" "Z")
1374          (parallel [(const_int 15) (const_int 14)
1375                     (const_int 13) (const_int 12)
1376                     (const_int 11) (const_int 10)
1377                     (const_int  9) (const_int  8)
1378                     (const_int  7) (const_int  6)
1379                     (const_int  5) (const_int  4)
1380                     (const_int  3) (const_int  2)
1381                     (const_int  1) (const_int  0)])))]
1382  "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1383  "lxvb16x %x0,%y1"
1384  [(set_attr "type" "vecload")])
1385
1386(define_insn "vsx_st_elemrev_v1ti"
1387  [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1388        (vec_select:V1TI
1389          (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1390          (parallel [(const_int 0)])))
1391   (clobber (match_dup 1))]
1392  "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1393{
1394  return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1395}
1396  [(set_attr "type" "vecstore")])
1397
1398(define_insn "vsx_st_elemrev_v2df"
1399  [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1400        (vec_select:V2DF
1401          (match_operand:V2DF 1 "vsx_register_operand" "wa")
1402          (parallel [(const_int 1) (const_int 0)])))]
1403  "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1404  "stxvd2x %x1,%y0"
1405  [(set_attr "type" "vecstore")])
1406
1407(define_insn "vsx_st_elemrev_v2di"
1408  [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1409        (vec_select:V2DI
1410          (match_operand:V2DI 1 "vsx_register_operand" "wa")
1411          (parallel [(const_int 1) (const_int 0)])))]
1412  "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1413  "stxvd2x %x1,%y0"
1414  [(set_attr "type" "vecstore")])
1415
1416(define_insn "vsx_st_elemrev_v4sf"
1417  [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1418        (vec_select:V4SF
1419          (match_operand:V4SF 1 "vsx_register_operand" "wa")
1420          (parallel [(const_int 3) (const_int 2)
1421                     (const_int 1) (const_int 0)])))]
1422  "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1423  "stxvw4x %x1,%y0"
1424  [(set_attr "type" "vecstore")])
1425
1426(define_insn "vsx_st_elemrev_v4si"
1427  [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1428        (vec_select:V4SI
1429	  (match_operand:V4SI 1 "vsx_register_operand" "wa")
1430	  (parallel [(const_int 3) (const_int 2)
1431	             (const_int 1) (const_int 0)])))]
1432  "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1433  "stxvw4x %x1,%y0"
1434  [(set_attr "type" "vecstore")])
1435
1436(define_expand "vsx_st_elemrev_v8hi"
1437  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1438        (vec_select:V8HI
1439          (match_operand:V8HI 1 "vsx_register_operand" "wa")
1440          (parallel [(const_int 7) (const_int 6)
1441                     (const_int 5) (const_int 4)
1442                     (const_int 3) (const_int 2)
1443                     (const_int 1) (const_int 0)])))]
1444  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1445{
1446  if (!TARGET_P9_VECTOR)
1447    {
1448      rtx mem_subreg, subreg, perm[16], pcv;
1449      rtx tmp = gen_reg_rtx (V8HImode);
1450      /* 2 is leftmost element in register */
1451      unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1452      int i;
1453
1454      for (i = 0; i < 16; ++i)
1455      	perm[i] = GEN_INT (reorder[i]);
1456
1457      pcv = force_reg (V16QImode,
1458                       gen_rtx_CONST_VECTOR (V16QImode,
1459                                             gen_rtvec_v (16, perm)));
1460      emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1461                                                operands[1], pcv));
1462      subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1463      mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1464      emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1465      DONE;
1466    }
1467})
1468
1469(define_insn "*vsx_st_elemrev_v2di_internal"
1470  [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1471        (vec_select:V2DI
1472          (match_operand:V2DI 1 "vsx_register_operand" "wa")
1473          (parallel [(const_int 1) (const_int 0)])))]
1474  "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1475  "stxvd2x %x1,%y0"
1476  [(set_attr "type" "vecstore")])
1477
1478(define_insn "*vsx_st_elemrev_v8hi_internal"
1479  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1480        (vec_select:V8HI
1481          (match_operand:V8HI 1 "vsx_register_operand" "wa")
1482          (parallel [(const_int 7) (const_int 6)
1483                     (const_int 5) (const_int 4)
1484                     (const_int 3) (const_int 2)
1485                     (const_int 1) (const_int 0)])))]
1486  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1487  "stxvh8x %x1,%y0"
1488  [(set_attr "type" "vecstore")])
1489
1490(define_expand "vsx_st_elemrev_v16qi"
1491  [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1492        (vec_select:V16QI
1493          (match_operand:V16QI 1 "vsx_register_operand" "wa")
1494          (parallel [(const_int 15) (const_int 14)
1495                     (const_int 13) (const_int 12)
1496                     (const_int 11) (const_int 10)
1497                     (const_int  9) (const_int  8)
1498                     (const_int  7) (const_int  6)
1499                     (const_int  5) (const_int  4)
1500                     (const_int  3) (const_int  2)
1501                     (const_int  1) (const_int  0)])))]
1502  "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1503{
1504  if (!TARGET_P9_VECTOR)
1505    {
1506      rtx mem_subreg, subreg, perm[16], pcv;
1507      rtx tmp = gen_reg_rtx (V16QImode);
1508      /* 3 is leftmost element in register */
1509      unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1510      int i;
1511
1512      for (i = 0; i < 16; ++i)
1513      	perm[i] = GEN_INT (reorder[i]);
1514
1515      pcv = force_reg (V16QImode,
1516                       gen_rtx_CONST_VECTOR (V16QImode,
1517                                             gen_rtvec_v (16, perm)));
1518      emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1519                                                 operands[1], pcv));
1520      subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1521      mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1522      emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1523      DONE;
1524    }
1525})
1526
1527(define_insn "*vsx_st_elemrev_v16qi_internal"
1528  [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1529        (vec_select:V16QI
1530          (match_operand:V16QI 1 "vsx_register_operand" "wa")
1531          (parallel [(const_int 15) (const_int 14)
1532                     (const_int 13) (const_int 12)
1533                     (const_int 11) (const_int 10)
1534                     (const_int  9) (const_int  8)
1535                     (const_int  7) (const_int  6)
1536                     (const_int  5) (const_int  4)
1537                     (const_int  3) (const_int  2)
1538                     (const_int  1) (const_int  0)])))]
1539  "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1540  "stxvb16x %x1,%y0"
1541  [(set_attr "type" "vecstore")])
1542
1543
1544;; VSX vector floating point arithmetic instructions.  The VSX scalar
1545;; instructions are now combined with the insn for the traditional floating
1546;; point unit.
1547(define_insn "*vsx_add<mode>3"
1548  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1549        (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1550		    (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1551  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1552  "xvadd<sd>p %x0,%x1,%x2"
1553  [(set_attr "type" "<VStype_simple>")])
1554
1555(define_insn "*vsx_sub<mode>3"
1556  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa>")
1557        (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1558		     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1559  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1560  "xvsub<sd>p %x0,%x1,%x2"
1561  [(set_attr "type" "<VStype_simple>")])
1562
1563(define_insn "*vsx_mul<mode>3"
1564  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1565        (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1566		    (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1567  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1568  "xvmul<sd>p %x0,%x1,%x2"
1569  [(set_attr "type" "<VStype_simple>")])
1570
1571; Emulate vector with scalar for vec_mul in V2DImode
1572(define_insn_and_split "vsx_mul_v2di"
1573  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1574        (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1575                      (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1576                     UNSPEC_VSX_MULSD))]
1577  "VECTOR_MEM_VSX_P (V2DImode)"
1578  "#"
1579  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1580  [(const_int 0)]
1581{
1582  rtx op0 = operands[0];
1583  rtx op1 = operands[1];
1584  rtx op2 = operands[2];
1585  rtx op3 = gen_reg_rtx (DImode);
1586  rtx op4 = gen_reg_rtx (DImode);
1587  rtx op5 = gen_reg_rtx (DImode);
1588  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1589  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1590  if (TARGET_POWERPC64)
1591    emit_insn (gen_muldi3 (op5, op3, op4));
1592  else
1593    {
1594      rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1595      emit_move_insn (op5, ret);
1596    }
1597  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1598  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1599  if (TARGET_POWERPC64)
1600    emit_insn (gen_muldi3 (op3, op3, op4));
1601  else
1602    {
1603      rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1604      emit_move_insn (op3, ret);
1605    }
1606  emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1607  DONE;
1608}
1609  [(set_attr "type" "mul")])
1610
1611(define_insn "*vsx_div<mode>3"
1612  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1613        (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1614		   (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1615  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1616  "xvdiv<sd>p %x0,%x1,%x2"
1617  [(set_attr "type" "<VStype_div>")])
1618
1619; Emulate vector with scalar for vec_div in V2DImode
1620(define_insn_and_split "vsx_div_v2di"
1621  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1622        (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1623                      (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1624                     UNSPEC_VSX_DIVSD))]
1625  "VECTOR_MEM_VSX_P (V2DImode)"
1626  "#"
1627  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1628  [(const_int 0)]
1629{
1630  rtx op0 = operands[0];
1631  rtx op1 = operands[1];
1632  rtx op2 = operands[2];
1633  rtx op3 = gen_reg_rtx (DImode);
1634  rtx op4 = gen_reg_rtx (DImode);
1635  rtx op5 = gen_reg_rtx (DImode);
1636  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1637  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1638  if (TARGET_POWERPC64)
1639    emit_insn (gen_divdi3 (op5, op3, op4));
1640  else
1641    {
1642      rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1643      rtx target = emit_library_call_value (libfunc,
1644					    op5, LCT_NORMAL, DImode,
1645					    op3, DImode,
1646					    op4, DImode);
1647      emit_move_insn (op5, target);
1648    }
1649  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1650  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1651  if (TARGET_POWERPC64)
1652    emit_insn (gen_divdi3 (op3, op3, op4));
1653  else
1654    {
1655      rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1656      rtx target = emit_library_call_value (libfunc,
1657					    op3, LCT_NORMAL, DImode,
1658					    op3, DImode,
1659					    op4, DImode);
1660      emit_move_insn (op3, target);
1661    }
1662  emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1663  DONE;
1664}
1665  [(set_attr "type" "div")])
1666
1667(define_insn_and_split "vsx_udiv_v2di"
1668  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1669        (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1670                      (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1671                     UNSPEC_VSX_DIVUD))]
1672  "VECTOR_MEM_VSX_P (V2DImode)"
1673  "#"
1674  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1675  [(const_int 0)]
1676{
1677  rtx op0 = operands[0];
1678  rtx op1 = operands[1];
1679  rtx op2 = operands[2];
1680  rtx op3 = gen_reg_rtx (DImode);
1681  rtx op4 = gen_reg_rtx (DImode);
1682  rtx op5 = gen_reg_rtx (DImode);
1683  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1684  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1685  if (TARGET_POWERPC64)
1686    emit_insn (gen_udivdi3 (op5, op3, op4));
1687  else
1688    {
1689      rtx libfunc = optab_libfunc (udiv_optab, DImode);
1690      rtx target = emit_library_call_value (libfunc,
1691					    op5, LCT_NORMAL, DImode,
1692					    op3, DImode,
1693					    op4, DImode);
1694      emit_move_insn (op5, target);
1695    }
1696  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1697  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1698  if (TARGET_POWERPC64)
1699    emit_insn (gen_udivdi3 (op3, op3, op4));
1700  else
1701    {
1702      rtx libfunc = optab_libfunc (udiv_optab, DImode);
1703      rtx target = emit_library_call_value (libfunc,
1704					    op3, LCT_NORMAL, DImode,
1705					    op3, DImode,
1706					    op4, DImode);
1707      emit_move_insn (op3, target);
1708    }
1709  emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1710  DONE;
1711}
1712  [(set_attr "type" "div")])
1713
1714;; *tdiv* instruction returning the FG flag
1715(define_expand "vsx_tdiv<mode>3_fg"
1716  [(set (match_dup 3)
1717	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1718		      (match_operand:VSX_B 2 "vsx_register_operand")]
1719		     UNSPEC_VSX_TDIV))
1720   (set (match_operand:SI 0 "gpc_reg_operand")
1721	(gt:SI (match_dup 3)
1722	       (const_int 0)))]
1723  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1724{
1725  operands[3] = gen_reg_rtx (CCFPmode);
1726})
1727
1728;; *tdiv* instruction returning the FE flag
1729(define_expand "vsx_tdiv<mode>3_fe"
1730  [(set (match_dup 3)
1731	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1732		      (match_operand:VSX_B 2 "vsx_register_operand")]
1733		     UNSPEC_VSX_TDIV))
1734   (set (match_operand:SI 0 "gpc_reg_operand")
1735	(eq:SI (match_dup 3)
1736	       (const_int 0)))]
1737  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1738{
1739  operands[3] = gen_reg_rtx (CCFPmode);
1740})
1741
1742(define_insn "*vsx_tdiv<mode>3_internal"
1743  [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1744	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")
1745		      (match_operand:VSX_B 2 "vsx_register_operand" "wa")]
1746		   UNSPEC_VSX_TDIV))]
1747  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1748  "x<VSv>tdiv<sd>p %0,%x1,%x2"
1749  [(set_attr "type" "<VStype_simple>")])
1750
1751(define_insn "vsx_fre<mode>2"
1752  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1753	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1754		      UNSPEC_FRES))]
1755  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1756  "xvre<sd>p %x0,%x1"
1757  [(set_attr "type" "<VStype_simple>")])
1758
1759(define_insn "*vsx_neg<mode>2"
1760  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1761        (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1762  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1763  "xvneg<sd>p %x0,%x1"
1764  [(set_attr "type" "<VStype_simple>")])
1765
1766(define_insn "*vsx_abs<mode>2"
1767  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1768        (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1769  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1770  "xvabs<sd>p %x0,%x1"
1771  [(set_attr "type" "<VStype_simple>")])
1772
1773(define_insn "vsx_nabs<mode>2"
1774  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1775        (neg:VSX_F
1776	 (abs:VSX_F
1777	  (match_operand:VSX_F 1 "vsx_register_operand" "wa"))))]
1778  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1779  "xvnabs<sd>p %x0,%x1"
1780  [(set_attr "type" "<VStype_simple>")])
1781
1782(define_insn "vsx_smax<mode>3"
1783  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1784        (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1785		    (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1786  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1787  "xvmax<sd>p %x0,%x1,%x2"
1788  [(set_attr "type" "<VStype_simple>")])
1789
1790(define_insn "*vsx_smin<mode>3"
1791  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1792        (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1793		    (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1794  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1795  "xvmin<sd>p %x0,%x1,%x2"
1796  [(set_attr "type" "<VStype_simple>")])
1797
1798(define_insn "*vsx_sqrt<mode>2"
1799  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1800        (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1801  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1802  "xvsqrt<sd>p %x0,%x1"
1803  [(set_attr "type" "<sd>sqrt")])
1804
1805(define_insn "*vsx_rsqrte<mode>2"
1806  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1807	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1808		      UNSPEC_RSQRT))]
1809  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1810  "xvrsqrte<sd>p %x0,%x1"
1811  [(set_attr "type" "<VStype_simple>")])
1812
1813;; *tsqrt* returning the fg flag
1814(define_expand "vsx_tsqrt<mode>2_fg"
1815  [(set (match_dup 2)
1816	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1817		     UNSPEC_VSX_TSQRT))
1818   (set (match_operand:SI 0 "gpc_reg_operand")
1819	(gt:SI (match_dup 2)
1820	       (const_int 0)))]
1821  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1822{
1823  operands[2] = gen_reg_rtx (CCFPmode);
1824})
1825
1826;; *tsqrt* returning the fe flag
1827(define_expand "vsx_tsqrt<mode>2_fe"
1828  [(set (match_dup 2)
1829	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1830		     UNSPEC_VSX_TSQRT))
1831   (set (match_operand:SI 0 "gpc_reg_operand")
1832	(eq:SI (match_dup 2)
1833	       (const_int 0)))]
1834  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1835{
1836  operands[2] = gen_reg_rtx (CCFPmode);
1837})
1838
1839(define_insn "*vsx_tsqrt<mode>2_internal"
1840  [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1841	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
1842		     UNSPEC_VSX_TSQRT))]
1843  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1844  "x<VSv>tsqrt<sd>p %0,%x1"
1845  [(set_attr "type" "<VStype_simple>")])
1846
1847;; Fused vector multiply/add instructions. Do not generate the Altivec versions
1848;; of fma (vmaddfp and vnmsubfp).  These instructions allows the target to be a
1849;; separate register from the 3 inputs, but they have different rounding
1850;; behaviors than the VSX instructions.
1851(define_insn "*vsx_fmav4sf4"
1852  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa")
1853	(fma:V4SF
1854	  (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa")
1855	  (match_operand:V4SF 2 "vsx_register_operand" "wa,0")
1856	  (match_operand:V4SF 3 "vsx_register_operand" "0,wa")))]
1857  "VECTOR_UNIT_VSX_P (V4SFmode)"
1858  "@
1859   xvmaddasp %x0,%x1,%x2
1860   xvmaddmsp %x0,%x1,%x3"
1861  [(set_attr "type" "vecfloat")])
1862
1863(define_insn "*vsx_fmav2df4"
1864  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1865	(fma:V2DF
1866	  (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1867	  (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1868	  (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))]
1869  "VECTOR_UNIT_VSX_P (V2DFmode)"
1870  "@
1871   xvmaddadp %x0,%x1,%x2
1872   xvmaddmdp %x0,%x1,%x3"
1873  [(set_attr "type" "vecdouble")])
1874
1875(define_insn "*vsx_fms<mode>4"
1876  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1877	(fma:VSX_F
1878	  (match_operand:VSX_F 1 "vsx_register_operand" "%wa,wa")
1879	  (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1880	  (neg:VSX_F
1881	    (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1882  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1883  "@
1884   xvmsuba<sd>p %x0,%x1,%x2
1885   xvmsubm<sd>p %x0,%x1,%x3"
1886  [(set_attr "type" "<VStype_mul>")])
1887
1888(define_insn "*vsx_nfma<mode>4"
1889  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1890	(neg:VSX_F
1891	 (fma:VSX_F
1892	  (match_operand:VSX_F 1 "vsx_register_operand" "wa,wa")
1893	  (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1894	  (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1895  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1896  "@
1897   xvnmadda<sd>p %x0,%x1,%x2
1898   xvnmaddm<sd>p %x0,%x1,%x3"
1899  [(set_attr "type" "<VStype_mul>")])
1900
1901(define_insn "*vsx_nfmsv4sf4"
1902  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa")
1903	(neg:V4SF
1904	 (fma:V4SF
1905	   (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa")
1906	   (match_operand:V4SF 2 "vsx_register_operand" "wa,0")
1907	   (neg:V4SF
1908	     (match_operand:V4SF 3 "vsx_register_operand" "0,wa")))))]
1909  "VECTOR_UNIT_VSX_P (V4SFmode)"
1910  "@
1911   xvnmsubasp %x0,%x1,%x2
1912   xvnmsubmsp %x0,%x1,%x3"
1913  [(set_attr "type" "vecfloat")])
1914
1915(define_insn "*vsx_nfmsv2df4"
1916  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1917	(neg:V2DF
1918	 (fma:V2DF
1919	   (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1920	   (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1921	   (neg:V2DF
1922	     (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))))]
1923  "VECTOR_UNIT_VSX_P (V2DFmode)"
1924  "@
1925   xvnmsubadp %x0,%x1,%x2
1926   xvnmsubmdp %x0,%x1,%x3"
1927  [(set_attr "type" "vecdouble")])
1928
1929;; Vector conditional expressions (no scalar version for these instructions)
1930(define_insn "vsx_eq<mode>"
1931  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1932	(eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1933		  (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1934  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1935  "xvcmpeq<sd>p %x0,%x1,%x2"
1936  [(set_attr "type" "<VStype_simple>")])
1937
1938(define_insn "vsx_gt<mode>"
1939  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1940	(gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1941		  (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1942  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1943  "xvcmpgt<sd>p %x0,%x1,%x2"
1944  [(set_attr "type" "<VStype_simple>")])
1945
1946(define_insn "*vsx_ge<mode>"
1947  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1948	(ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1949		  (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1950  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1951  "xvcmpge<sd>p %x0,%x1,%x2"
1952  [(set_attr "type" "<VStype_simple>")])
1953
1954;; Compare vectors producing a vector result and a predicate, setting CR6 to
1955;; indicate a combined status
1956(define_insn "*vsx_eq_<mode>_p"
1957  [(set (reg:CC CR6_REGNO)
1958	(unspec:CC
1959	 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1960		 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1961	 UNSPEC_PREDICATE))
1962   (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1963	(eq:VSX_F (match_dup 1)
1964		  (match_dup 2)))]
1965  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1966  "xvcmpeq<sd>p. %x0,%x1,%x2"
1967  [(set_attr "type" "<VStype_simple>")])
1968
1969(define_insn "*vsx_gt_<mode>_p"
1970  [(set (reg:CC CR6_REGNO)
1971	(unspec:CC
1972	 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1973		 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1974	 UNSPEC_PREDICATE))
1975   (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1976	(gt:VSX_F (match_dup 1)
1977		  (match_dup 2)))]
1978  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1979  "xvcmpgt<sd>p. %x0,%x1,%x2"
1980  [(set_attr "type" "<VStype_simple>")])
1981
1982(define_insn "*vsx_ge_<mode>_p"
1983  [(set (reg:CC CR6_REGNO)
1984	(unspec:CC
1985	 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1986		 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1987	 UNSPEC_PREDICATE))
1988   (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1989	(ge:VSX_F (match_dup 1)
1990		  (match_dup 2)))]
1991  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1992  "xvcmpge<sd>p. %x0,%x1,%x2"
1993  [(set_attr "type" "<VStype_simple>")])
1994
1995;; Vector select
1996(define_insn "*vsx_xxsel<mode>"
1997  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
1998	(if_then_else:VSX_L
1999	 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
2000		(match_operand:VSX_L 4 "zero_constant" ""))
2001	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
2002	 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
2003  "VECTOR_MEM_VSX_P (<MODE>mode)"
2004  "xxsel %x0,%x3,%x2,%x1"
2005  [(set_attr "type" "vecmove")
2006   (set_attr "isa" "<VSisa>")])
2007
2008(define_insn "*vsx_xxsel<mode>_uns"
2009  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
2010	(if_then_else:VSX_L
2011	 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
2012		   (match_operand:VSX_L 4 "zero_constant" ""))
2013	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
2014	 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
2015  "VECTOR_MEM_VSX_P (<MODE>mode)"
2016  "xxsel %x0,%x3,%x2,%x1"
2017  [(set_attr "type" "vecmove")
2018   (set_attr "isa" "<VSisa>")])
2019
2020;; Copy sign
2021(define_insn "vsx_copysign<mode>3"
2022  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2023	(unspec:VSX_F
2024	 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
2025	  (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
2026	 UNSPEC_COPYSIGN))]
2027  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2028  "xvcpsgn<sd>p %x0,%x2,%x1"
2029  [(set_attr "type" "<VStype_simple>")])
2030
2031;; For the conversions, limit the register class for the integer value to be
2032;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2033;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2034;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2035;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2036;; in allowing virtual registers.
2037(define_insn "vsx_float<VSi><mode>2"
2038  [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2039	(float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2040  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2041  "xvcvsx<VSc><sd>p %x0,%x1"
2042  [(set_attr "type" "<VStype_simple>")])
2043
2044(define_insn "vsx_floatuns<VSi><mode>2"
2045  [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2046	(unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2047  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2048  "xvcvux<VSc><sd>p %x0,%x1"
2049  [(set_attr "type" "<VStype_simple>")])
2050
2051(define_insn "vsx_fix_trunc<mode><VSi>2"
2052  [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2053	(fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2054  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2055  "x<VSv>cv<sd>psx<VSc>s %x0,%x1"
2056  [(set_attr "type" "<VStype_simple>")])
2057
2058(define_insn "vsx_fixuns_trunc<mode><VSi>2"
2059  [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2060	(unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2061  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2062  "x<VSv>cv<sd>pux<VSc>s %x0,%x1"
2063  [(set_attr "type" "<VStype_simple>")])
2064
2065;; Math rounding functions
2066(define_insn "vsx_x<VSv>r<sd>pi"
2067  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2068	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2069		      UNSPEC_VSX_ROUND_I))]
2070  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2071  "x<VSv>r<sd>pi %x0,%x1"
2072  [(set_attr "type" "<VStype_simple>")])
2073
2074(define_insn "vsx_x<VSv>r<sd>pic"
2075  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2076	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2077		      UNSPEC_VSX_ROUND_IC))]
2078  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2079  "x<VSv>r<sd>pic %x0,%x1"
2080  [(set_attr "type" "<VStype_simple>")])
2081
2082(define_insn "vsx_btrunc<mode>2"
2083  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2084	(fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
2085  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2086  "xvr<sd>piz %x0,%x1"
2087  [(set_attr "type" "<VStype_simple>")])
2088
2089(define_insn "*vsx_b2trunc<mode>2"
2090  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2091	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2092		      UNSPEC_FRIZ))]
2093  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2094  "x<VSv>r<sd>piz %x0,%x1"
2095  [(set_attr "type" "<VStype_simple>")])
2096
2097(define_insn "vsx_floor<mode>2"
2098  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2099	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2100		      UNSPEC_FRIM))]
2101  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2102  "xvr<sd>pim %x0,%x1"
2103  [(set_attr "type" "<VStype_simple>")])
2104
2105(define_insn "vsx_ceil<mode>2"
2106  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2107	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2108		      UNSPEC_FRIP))]
2109  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2110  "xvr<sd>pip %x0,%x1"
2111  [(set_attr "type" "<VStype_simple>")])
2112
2113
2114;; VSX convert to/from double vector
2115
2116;; Convert between single and double precision
2117;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2118;; scalar single precision instructions internally use the double format.
2119;; Prefer the altivec registers, since we likely will need to do a vperm
2120(define_insn "vsx_xscvdpsp"
2121  [(set (match_operand:V4SF 0 "vsx_register_operand" "=f,?wa")
2122	(unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "f,wa")]
2123			      UNSPEC_VSX_CVSPDP))]
2124  "VECTOR_UNIT_VSX_P (DFmode)"
2125  "xscvdpsp %x0,%x1"
2126  [(set_attr "type" "fp")])
2127
2128(define_insn "vsx_xvcvspdp_be"
2129  [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2130     (float_extend:V2DF
2131       (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2132	 (parallel [(const_int 0) (const_int 2)]))))]
2133  "VECTOR_UNIT_VSX_P (V4SFmode) && BYTES_BIG_ENDIAN"
2134  "xvcvspdp %x0,%x1"
2135  [(set_attr "type" "vecdouble")])
2136
2137(define_insn "vsx_xvcvspdp_le"
2138  [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2139     (float_extend:V2DF
2140       (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2141	 (parallel [(const_int 1) (const_int 3)]))))]
2142  "VECTOR_UNIT_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
2143  "xvcvspdp %x0,%x1"
2144  [(set_attr "type" "vecdouble")])
2145
2146(define_expand "vsx_xvcvspdp"
2147  [(match_operand:V2DF 0 "vsx_register_operand")
2148   (match_operand:V4SF 1 "vsx_register_operand")]
2149  "VECTOR_UNIT_VSX_P (V4SFmode)"
2150{
2151  if (BYTES_BIG_ENDIAN)
2152    emit_insn (gen_vsx_xvcvspdp_be (operands[0], operands[1]));
2153  else
2154    emit_insn (gen_vsx_xvcvspdp_le (operands[0], operands[1]));
2155  DONE;
2156})
2157
2158(define_insn "vsx_xvcvdpsp"
2159  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,?wa")
2160	(unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "v,wa")]
2161			      UNSPEC_VSX_CVSPDP))]
2162  "VECTOR_UNIT_VSX_P (V2DFmode)"
2163  "xvcvdpsp %x0,%x1"
2164  [(set_attr "type" "vecdouble")])
2165
2166;; xscvspdp, represent the scalar SF type as V4SF
2167(define_insn "vsx_xscvspdp"
2168  [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2169	(unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2170		   UNSPEC_VSX_CVSPDP))]
2171  "VECTOR_UNIT_VSX_P (V4SFmode)"
2172  "xscvspdp %x0,%x1"
2173  [(set_attr "type" "fp")])
2174
2175;; Same as vsx_xscvspdp, but use SF as the type
2176(define_insn "vsx_xscvspdp_scalar2"
2177  [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2178	(unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2179		   UNSPEC_VSX_CVSPDP))]
2180  "VECTOR_UNIT_VSX_P (V4SFmode)"
2181  "xscvspdp %x0,%x1"
2182  [(set_attr "type" "fp")])
2183
2184;; Generate xvcvhpsp instruction
2185(define_insn "vsx_xvcvhpsp"
2186  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2187	(unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2188		     UNSPEC_VSX_CVHPSP))]
2189  "TARGET_P9_VECTOR"
2190  "xvcvhpsp %x0,%x1"
2191  [(set_attr "type" "vecfloat")])
2192
2193;; Generate xvcvsphp
2194(define_insn "vsx_xvcvsphp"
2195  [(set (match_operand:V4SI 0 "register_operand" "=wa")
2196	(unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2197		     UNSPEC_VSX_XVCVSPHP))]
2198  "TARGET_P9_VECTOR"
2199  "xvcvsphp %x0,%x1"
2200[(set_attr "type" "vecfloat")])
2201
2202;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2203;; format of scalars is actually DF.
2204(define_insn "vsx_xscvdpsp_scalar"
2205  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2206	(unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2207		     UNSPEC_VSX_CVSPDP))]
2208  "VECTOR_UNIT_VSX_P (V4SFmode)"
2209  "xscvdpsp %x0,%x1"
2210  [(set_attr "type" "fp")])
2211
2212;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2213(define_insn "vsx_xscvdpspn"
2214  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2215	(unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wa")]
2216		     UNSPEC_VSX_CVDPSPN))]
2217  "TARGET_XSCVDPSPN"
2218  "xscvdpspn %x0,%x1"
2219  [(set_attr "type" "fp")])
2220
2221(define_insn "vsx_xscvspdpn"
2222  [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2223	(unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2224		   UNSPEC_VSX_CVSPDPN))]
2225  "TARGET_XSCVSPDPN"
2226  "xscvspdpn %x0,%x1"
2227  [(set_attr "type" "fp")])
2228
2229(define_insn "vsx_xscvdpspn_scalar"
2230  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2231	(unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2232		     UNSPEC_VSX_CVDPSPN))]
2233  "TARGET_XSCVDPSPN"
2234  "xscvdpspn %x0,%x1"
2235  [(set_attr "type" "fp")])
2236
2237;; Used by direct move to move a SFmode value from GPR to VSX register
2238(define_insn "vsx_xscvspdpn_directmove"
2239  [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2240	(unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2241		   UNSPEC_VSX_CVSPDPN))]
2242  "TARGET_XSCVSPDPN"
2243  "xscvspdpn %x0,%x1"
2244  [(set_attr "type" "fp")])
2245
2246;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2247
2248(define_insn "vsx_xvcv<su>xwsp"
2249  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2250     (any_float:V4SF (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
2251  "VECTOR_UNIT_VSX_P (V4SFmode)"
2252  "xvcv<su>xwsp %x0,%x1"
2253  [(set_attr "type" "vecfloat")])
2254
2255(define_insn "vsx_xvcv<su>xddp"
2256  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2257        (any_float:V2DF (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
2258  "VECTOR_UNIT_VSX_P (V2DFmode)"
2259  "xvcv<su>xddp %x0,%x1"
2260  [(set_attr "type" "vecdouble")])
2261
2262(define_insn "vsx_xvcvsp<su>xws"
2263  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2264        (any_fix:V4SI (match_operand:V4SF 1 "vsx_register_operand" "wa")))]
2265  "VECTOR_UNIT_VSX_P (V4SFmode)"
2266  "xvcvsp<su>xws %x0,%x1"
2267  [(set_attr "type" "vecfloat")])
2268
2269(define_insn "vsx_xvcvdp<su>xds"
2270  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2271        (any_fix:V2DI (match_operand:V2DF 1 "vsx_register_operand" "wa")))]
2272  "VECTOR_UNIT_VSX_P (V2DFmode)"
2273  "xvcvdp<su>xds %x0,%x1"
2274  [(set_attr "type" "vecdouble")])
2275
2276(define_expand "vsx_xvcvsxddp_scale"
2277  [(match_operand:V2DF 0 "vsx_register_operand")
2278   (match_operand:V2DI 1 "vsx_register_operand")
2279   (match_operand:QI 2 "immediate_operand")]
2280  "VECTOR_UNIT_VSX_P (V2DFmode)"
2281{
2282  rtx op0 = operands[0];
2283  rtx op1 = operands[1];
2284  int scale = INTVAL(operands[2]);
2285  emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2286  if (scale != 0)
2287    rs6000_scale_v2df (op0, op0, -scale);
2288  DONE;
2289})
2290
2291(define_expand "vsx_xvcvuxddp_scale"
2292  [(match_operand:V2DF 0 "vsx_register_operand")
2293   (match_operand:V2DI 1 "vsx_register_operand")
2294   (match_operand:QI 2 "immediate_operand")]
2295  "VECTOR_UNIT_VSX_P (V2DFmode)"
2296{
2297  rtx op0 = operands[0];
2298  rtx op1 = operands[1];
2299  int scale = INTVAL(operands[2]);
2300  emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2301  if (scale != 0)
2302    rs6000_scale_v2df (op0, op0, -scale);
2303  DONE;
2304})
2305
2306(define_expand "vsx_xvcvdpsxds_scale"
2307  [(match_operand:V2DI 0 "vsx_register_operand")
2308   (match_operand:V2DF 1 "vsx_register_operand")
2309   (match_operand:QI 2 "immediate_operand")]
2310  "VECTOR_UNIT_VSX_P (V2DFmode)"
2311{
2312  rtx op0 = operands[0];
2313  rtx op1 = operands[1];
2314  rtx tmp;
2315  int scale = INTVAL (operands[2]);
2316  if (scale == 0)
2317    tmp = op1;
2318  else
2319    {
2320      tmp  = gen_reg_rtx (V2DFmode);
2321      rs6000_scale_v2df (tmp, op1, scale);
2322    }
2323  emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2324  DONE;
2325})
2326
2327;; convert vector of 64-bit floating point numbers to vector of
2328;; 64-bit unsigned integer
2329(define_expand "vsx_xvcvdpuxds_scale"
2330  [(match_operand:V2DI 0 "vsx_register_operand")
2331   (match_operand:V2DF 1 "vsx_register_operand")
2332   (match_operand:QI 2 "immediate_operand")]
2333  "VECTOR_UNIT_VSX_P (V2DFmode)"
2334{
2335  rtx op0 = operands[0];
2336  rtx op1 = operands[1];
2337  rtx tmp;
2338  int scale = INTVAL (operands[2]);
2339  if (scale == 0)
2340    tmp = op1;
2341  else
2342    {
2343      tmp = gen_reg_rtx (V2DFmode);
2344      rs6000_scale_v2df (tmp, op1, scale);
2345    }
2346  emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2347  DONE;
2348})
2349
2350;; Convert from 64-bit to 32-bit types
2351;; Note, favor the Altivec registers since the usual use of these instructions
2352;; is in vector converts and we need to use the Altivec vperm instruction.
2353
2354(define_insn "vsx_xvcvdpsxws"
2355  [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2356	(unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2357		     UNSPEC_VSX_CVDPSXWS))]
2358  "VECTOR_UNIT_VSX_P (V2DFmode)"
2359  "xvcvdpsxws %x0,%x1"
2360  [(set_attr "type" "vecdouble")])
2361
2362(define_insn "vsx_xvcvdpuxws"
2363  [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2364	(unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2365		     UNSPEC_VSX_CVDPUXWS))]
2366  "VECTOR_UNIT_VSX_P (V2DFmode)"
2367  "xvcvdpuxws %x0,%x1"
2368  [(set_attr "type" "vecdouble")])
2369
2370(define_insn "vsx_xvcvsxdsp"
2371  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2372	(unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2373		     UNSPEC_VSX_CVSXDSP))]
2374  "VECTOR_UNIT_VSX_P (V2DFmode)"
2375  "xvcvsxdsp %x0,%x1"
2376  [(set_attr "type" "vecfloat")])
2377
2378(define_insn "vsx_xvcvuxdsp"
2379  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2380	(unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2381		     UNSPEC_VSX_CVUXDSP))]
2382  "VECTOR_UNIT_VSX_P (V2DFmode)"
2383  "xvcvuxdsp %x0,%x1"
2384  [(set_attr "type" "vecdouble")])
2385
2386;; Convert vector of 32-bit signed/unsigned integers to vector of
2387;; 64-bit floating point numbers.
2388(define_insn "vsx_xvcv<su>xwdp_be"
2389  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2390     (any_float:V2DF
2391       (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2392	 (parallel [(const_int 0) (const_int 2)]))))]
2393  "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2394  "xvcv<su>xwdp %x0,%x1"
2395  [(set_attr "type" "vecdouble")])
2396
2397(define_insn "vsx_xvcv<su>xwdp_le"
2398  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2399     (any_float:V2DF
2400       (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2401	 (parallel [(const_int 1) (const_int 3)]))))]
2402  "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2403  "xvcv<su>xwdp %x0,%x1"
2404  [(set_attr "type" "vecdouble")])
2405
2406(define_expand "vsx_xvcv<su>xwdp"
2407  [(match_operand:V2DF 0 "vsx_register_operand")
2408   (match_operand:V4SI 1 "vsx_register_operand")
2409   (any_float (pc))]
2410  "VECTOR_UNIT_VSX_P (V2DFmode)"
2411{
2412  if (BYTES_BIG_ENDIAN)
2413    emit_insn (gen_vsx_xvcv<su>xwdp_be (operands[0], operands[1]));
2414  else
2415    emit_insn (gen_vsx_xvcv<su>xwdp_le (operands[0], operands[1]));
2416  DONE;
2417})
2418
2419(define_insn "vsx_xvcvsxwdp_df"
2420  [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2421	(unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2422		   UNSPEC_VSX_CVSXWDP))]
2423  "TARGET_VSX"
2424  "xvcvsxwdp %x0,%x1"
2425  [(set_attr "type" "vecdouble")])
2426
2427(define_insn "vsx_xvcvuxwdp_df"
2428  [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2429	(unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2430		   UNSPEC_VSX_CVUXWDP))]
2431  "TARGET_VSX"
2432  "xvcvuxwdp %x0,%x1"
2433  [(set_attr "type" "vecdouble")])
2434
2435;; Convert vector of 32-bit floating point numbers to vector of
2436;; 64-bit signed/unsigned integers.
2437(define_insn "vsx_xvcvsp<su>xds_be"
2438  [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2439     (any_fix:V2DI
2440       (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2441	 (parallel [(const_int 0) (const_int 2)]))))]
2442  "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2443  "xvcvsp<su>xds %x0,%x1"
2444  [(set_attr "type" "vecdouble")])
2445
2446(define_insn "vsx_xvcvsp<su>xds_le"
2447  [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2448     (any_fix:V2DI
2449       (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2450	 (parallel [(const_int 1) (const_int 3)]))))]
2451  "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2452  "xvcvsp<su>xds %x0,%x1"
2453  [(set_attr "type" "vecdouble")])
2454
2455(define_expand "vsx_xvcvsp<su>xds"
2456  [(match_operand:V2DI 0 "vsx_register_operand")
2457   (match_operand:V4SF 1 "vsx_register_operand")
2458   (any_fix (pc))]
2459  "VECTOR_UNIT_VSX_P (V2DFmode)"
2460{
2461  if (BYTES_BIG_ENDIAN)
2462    emit_insn (gen_vsx_xvcvsp<su>xds_be (operands[0], operands[1]));
2463  else
2464    emit_insn (gen_vsx_xvcvsp<su>xds_le (operands[0], operands[1]));
2465  DONE;
2466})
2467
2468;; Generate float2 double
2469;; convert two double to float
2470(define_expand "float2_v2df"
2471  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2472   (use (match_operand:V2DF 1 "register_operand" "wa"))
2473   (use (match_operand:V2DF 2 "register_operand" "wa"))]
2474 "VECTOR_UNIT_VSX_P (V4SFmode)"
2475{
2476  rtx rtx_src1, rtx_src2, rtx_dst;
2477
2478  rtx_dst = operands[0];
2479  rtx_src1 = operands[1];
2480  rtx_src2 = operands[2];
2481
2482  rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2483  DONE;
2484})
2485
2486;; Generate float2
2487;; convert two long long signed ints to float
2488(define_expand "float2_v2di"
2489  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2490   (use (match_operand:V2DI 1 "register_operand" "wa"))
2491   (use (match_operand:V2DI 2 "register_operand" "wa"))]
2492 "VECTOR_UNIT_VSX_P (V4SFmode)"
2493{
2494  rtx rtx_src1, rtx_src2, rtx_dst;
2495
2496  rtx_dst = operands[0];
2497  rtx_src1 = operands[1];
2498  rtx_src2 = operands[2];
2499
2500  rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2501  DONE;
2502})
2503
2504;; Generate uns_float2
2505;; convert two long long unsigned ints to float
2506(define_expand "uns_float2_v2di"
2507  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2508   (use (match_operand:V2DI 1 "register_operand" "wa"))
2509   (use (match_operand:V2DI 2 "register_operand" "wa"))]
2510 "VECTOR_UNIT_VSX_P (V4SFmode)"
2511{
2512  rtx rtx_src1, rtx_src2, rtx_dst;
2513
2514  rtx_dst = operands[0];
2515  rtx_src1 = operands[1];
2516  rtx_src2 = operands[2];
2517
2518  rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2519  DONE;
2520})
2521
2522;; Generate floate
2523;; convert  double or long long signed to float
2524;; (Only even words are valid, BE numbering)
2525(define_expand "floate<mode>"
2526  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2527   (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2528  "VECTOR_UNIT_VSX_P (V4SFmode)"
2529{
2530  if (BYTES_BIG_ENDIAN)
2531    {
2532      /* Shift left one word to put even word correct location */
2533      rtx rtx_tmp;
2534      rtx rtx_val = GEN_INT (4);
2535
2536      rtx_tmp = gen_reg_rtx (V4SFmode);
2537      emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2538      emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2539		 rtx_tmp, rtx_tmp, rtx_val));
2540    }
2541  else
2542    emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2543
2544  DONE;
2545})
2546
2547;; Generate uns_floate
2548;; convert long long unsigned to float
2549;; (Only even words are valid, BE numbering)
2550(define_expand "unsfloatev2di"
2551  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2552   (use (match_operand:V2DI 1 "register_operand" "wa"))]
2553  "VECTOR_UNIT_VSX_P (V4SFmode)"
2554{
2555  if (BYTES_BIG_ENDIAN)
2556    {
2557      /* Shift left one word to put even word correct location */
2558      rtx rtx_tmp;
2559      rtx rtx_val = GEN_INT (4);
2560
2561      rtx_tmp = gen_reg_rtx (V4SFmode);
2562      emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2563      emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2564		 rtx_tmp, rtx_tmp, rtx_val));
2565    }
2566  else
2567    emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2568
2569  DONE;
2570})
2571
2572;; Generate floato
2573;; convert double or long long signed to float
2574;; Only odd words are valid, BE numbering)
2575(define_expand "floato<mode>"
2576  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2577   (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2578  "VECTOR_UNIT_VSX_P (V4SFmode)"
2579{
2580  if (BYTES_BIG_ENDIAN)
2581    emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2582  else
2583    {
2584      /* Shift left one word to put odd word correct location */
2585      rtx rtx_tmp;
2586      rtx rtx_val = GEN_INT (4);
2587
2588      rtx_tmp = gen_reg_rtx (V4SFmode);
2589      emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2590      emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2591		 rtx_tmp, rtx_tmp, rtx_val));
2592    }
2593  DONE;
2594})
2595
2596;; Generate uns_floato
2597;; convert long long unsigned to float
2598;; (Only odd words are valid, BE numbering)
2599(define_expand "unsfloatov2di"
2600 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2601  (use (match_operand:V2DI 1 "register_operand" "wa"))]
2602 "VECTOR_UNIT_VSX_P (V4SFmode)"
2603{
2604  if (BYTES_BIG_ENDIAN)
2605    emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2606  else
2607    {
2608      /* Shift left one word to put odd word correct location */
2609      rtx rtx_tmp;
2610      rtx rtx_val = GEN_INT (4);
2611
2612      rtx_tmp = gen_reg_rtx (V4SFmode);
2613      emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2614      emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2615		 rtx_tmp, rtx_tmp, rtx_val));
2616    }
2617  DONE;
2618})
2619
2620;; Generate vsigned2
2621;; convert two double float vectors to a vector of single precision ints
2622(define_expand "vsigned2_v2df"
2623  [(match_operand:V4SI 0 "register_operand" "=wa")
2624   (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2625		 (match_operand:V2DF 2 "register_operand" "wa")]
2626  UNSPEC_VSX_VSIGNED2)]
2627  "TARGET_VSX"
2628{
2629  rtx rtx_src1, rtx_src2, rtx_dst;
2630  bool signed_convert=true;
2631
2632  rtx_dst = operands[0];
2633  rtx_src1 = operands[1];
2634  rtx_src2 = operands[2];
2635
2636  rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2637  DONE;
2638})
2639
2640;; Generate vsignedo_v2df
2641;; signed double float to int convert odd word
2642(define_expand "vsignedo_v2df"
2643  [(set (match_operand:V4SI 0 "register_operand" "=wa")
2644	(match_operand:V2DF 1 "register_operand" "wa"))]
2645  "TARGET_VSX"
2646{
2647  if (BYTES_BIG_ENDIAN)
2648    {
2649      rtx rtx_tmp;
2650      rtx rtx_val = GEN_INT (12);
2651      rtx_tmp = gen_reg_rtx (V4SImode);
2652
2653      emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2654
2655      /* Big endian word numbering for words in operand is 0 1 2 3.
2656	 take (operand[1] operand[1]) and shift left one word
2657	 0 1 2 3    0 1 2 3  =>  1 2 3 0
2658	 Words 1 and 3 are now are now where they need to be for result.  */
2659
2660      emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2661		 rtx_tmp, rtx_val));
2662    }
2663  else
2664    /* Little endian word numbering for operand is 3 2 1 0.
2665       Result words 3 and 1 are where they need to be.  */
2666    emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2667
2668  DONE;
2669}
2670  [(set_attr "type" "veccomplex")])
2671
2672;; Generate vsignede_v2df
2673;; signed double float to int even word
2674(define_expand "vsignede_v2df"
2675  [(set (match_operand:V4SI 0 "register_operand" "=v")
2676	(match_operand:V2DF 1 "register_operand" "v"))]
2677  "TARGET_VSX"
2678{
2679  if (BYTES_BIG_ENDIAN)
2680    /* Big endian word numbering for words in operand is 0 1
2681       Result words 0 is where they need to be.  */
2682    emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2683
2684  else
2685    {
2686      rtx rtx_tmp;
2687      rtx rtx_val = GEN_INT (12);
2688      rtx_tmp = gen_reg_rtx (V4SImode);
2689
2690      emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2691
2692      /* Little endian word numbering for operand is 3 2 1 0.
2693	 take (operand[1] operand[1]) and shift left three words
2694	 0 1 2 3   0 1 2 3  =>  3 0 1 2
2695	 Words 0 and 2 are now where they need to be for the result.  */
2696      emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2697		 rtx_tmp, rtx_val));
2698    }
2699  DONE;
2700}
2701  [(set_attr "type" "veccomplex")])
2702
2703;; Generate unsigned2
2704;; convert two double float vectors to a vector of single precision
2705;; unsigned ints
2706(define_expand "vunsigned2_v2df"
2707[(match_operand:V4SI 0 "register_operand" "=v")
2708 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2709	       (match_operand:V2DF 2 "register_operand" "v")]
2710	      UNSPEC_VSX_VSIGNED2)]
2711 "TARGET_VSX"
2712{
2713  rtx rtx_src1, rtx_src2, rtx_dst;
2714  bool signed_convert=false;
2715
2716  rtx_dst = operands[0];
2717  rtx_src1 = operands[1];
2718  rtx_src2 = operands[2];
2719
2720  rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2721  DONE;
2722})
2723
2724;; Generate vunsignedo_v2df
2725;; unsigned double float to int convert odd word
2726(define_expand "vunsignedo_v2df"
2727  [(set (match_operand:V4SI 0 "register_operand" "=v")
2728	(match_operand:V2DF 1 "register_operand" "v"))]
2729  "TARGET_VSX"
2730{
2731  if (BYTES_BIG_ENDIAN)
2732    {
2733      rtx rtx_tmp;
2734      rtx rtx_val = GEN_INT (12);
2735      rtx_tmp = gen_reg_rtx (V4SImode);
2736
2737      emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2738
2739      /* Big endian word numbering for words in operand is 0 1 2 3.
2740	 take (operand[1] operand[1]) and shift left one word
2741	 0 1 2 3    0 1 2 3  =>  1 2 3 0
2742	 Words 1 and 3 are now are now where they need to be for result.  */
2743
2744      emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2745		 rtx_tmp, rtx_val));
2746    }
2747  else
2748    /* Little endian word numbering for operand is 3 2 1 0.
2749       Result words 3 and 1 are where they need to be.  */
2750    emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2751
2752  DONE;
2753}
2754  [(set_attr "type" "veccomplex")])
2755
2756;; Generate vunsignede_v2df
2757;; unsigned double float to int even word
2758(define_expand "vunsignede_v2df"
2759  [(set (match_operand:V4SI 0 "register_operand" "=v")
2760	(match_operand:V2DF 1 "register_operand" "v"))]
2761  "TARGET_VSX"
2762{
2763  if (BYTES_BIG_ENDIAN)
2764    /* Big endian word numbering for words in operand is 0 1
2765       Result words 0 is where they need to be.  */
2766    emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2767
2768  else
2769    {
2770      rtx rtx_tmp;
2771      rtx rtx_val = GEN_INT (12);
2772      rtx_tmp = gen_reg_rtx (V4SImode);
2773
2774      emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2775
2776      /* Little endian word numbering for operand is 3 2 1 0.
2777	 take (operand[1] operand[1]) and shift left three words
2778	 0 1 2 3   0 1 2 3  =>  3 0 1 2
2779	 Words 0 and 2 are now where they need to be for the result.  */
2780      emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2781		 rtx_tmp, rtx_val));
2782    }
2783  DONE;
2784}
2785  [(set_attr "type" "veccomplex")])
2786
2787;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2788;; since the xvrdpiz instruction does not truncate the value if the floating
2789;; point value is < LONG_MIN or > LONG_MAX.
2790(define_insn "*vsx_float_fix_v2df2"
2791  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,?wa")
2792	(float:V2DF
2793	 (fix:V2DI
2794	  (match_operand:V2DF 1 "vsx_register_operand" "wa,?wa"))))]
2795  "TARGET_HARD_FLOAT
2796   && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2797   && !flag_trapping_math && TARGET_FRIZ"
2798  "xvrdpiz %x0,%x1"
2799  [(set_attr "type" "vecdouble")])
2800
2801
2802;; Permute operations
2803
2804;; Build a V2DF/V2DI vector from two scalars
2805(define_insn "vsx_concat_<mode>"
2806  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2807	(vec_concat:VSX_D
2808	 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2809	 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2810  "VECTOR_MEM_VSX_P (<MODE>mode)"
2811{
2812  if (which_alternative == 0)
2813    return (BYTES_BIG_ENDIAN
2814	    ? "xxpermdi %x0,%x1,%x2,0"
2815	    : "xxpermdi %x0,%x2,%x1,0");
2816
2817  else if (which_alternative == 1)
2818    return (BYTES_BIG_ENDIAN
2819	    ? "mtvsrdd %x0,%1,%2"
2820	    : "mtvsrdd %x0,%2,%1");
2821
2822  else
2823    gcc_unreachable ();
2824}
2825  [(set_attr "type" "vecperm")])
2826
2827;; Combiner patterns to allow creating XXPERMDI's to access either double
2828;; word element in a vector register.
2829(define_insn "*vsx_concat_<mode>_1"
2830  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2831	(vec_concat:VSX_D
2832	 (vec_select:<VS_scalar>
2833	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2834	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2835	 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2836  "VECTOR_MEM_VSX_P (<MODE>mode)"
2837{
2838  HOST_WIDE_INT dword = INTVAL (operands[2]);
2839  if (BYTES_BIG_ENDIAN)
2840    {
2841      operands[4] = GEN_INT (2*dword);
2842      return "xxpermdi %x0,%x1,%x3,%4";
2843    }
2844  else
2845    {
2846      operands[4] = GEN_INT (!dword);
2847      return "xxpermdi %x0,%x3,%x1,%4";
2848    }
2849}
2850  [(set_attr "type" "vecperm")])
2851
2852(define_insn "*vsx_concat_<mode>_2"
2853  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2854	(vec_concat:VSX_D
2855	 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2856	 (vec_select:<VS_scalar>
2857	  (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2858	  (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2859  "VECTOR_MEM_VSX_P (<MODE>mode)"
2860{
2861  HOST_WIDE_INT dword = INTVAL (operands[3]);
2862  if (BYTES_BIG_ENDIAN)
2863    {
2864      operands[4] = GEN_INT (dword);
2865      return "xxpermdi %x0,%x1,%x2,%4";
2866    }
2867  else
2868    {
2869      operands[4] = GEN_INT (2 * !dword);
2870      return "xxpermdi %x0,%x2,%x1,%4";
2871    }
2872}
2873  [(set_attr "type" "vecperm")])
2874
2875(define_insn "*vsx_concat_<mode>_3"
2876  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2877	(vec_concat:VSX_D
2878	 (vec_select:<VS_scalar>
2879	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2880	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2881	 (vec_select:<VS_scalar>
2882	  (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2883	  (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2884  "VECTOR_MEM_VSX_P (<MODE>mode)"
2885{
2886  HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2887  HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2888  if (BYTES_BIG_ENDIAN)
2889    {
2890      operands[5] = GEN_INT ((2 * dword1) + dword2);
2891      return "xxpermdi %x0,%x1,%x3,%5";
2892    }
2893  else
2894    {
2895      operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2896      return "xxpermdi %x0,%x3,%x1,%5";
2897    }
2898}
2899  [(set_attr "type" "vecperm")])
2900
2901;; Special purpose concat using xxpermdi to glue two single precision values
2902;; together, relying on the fact that internally scalar floats are represented
2903;; as doubles.  This is used to initialize a V4SF vector with 4 floats
2904(define_insn "vsx_concat_v2sf"
2905  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2906	(unspec:V2DF
2907	 [(match_operand:SF 1 "vsx_register_operand" "wa")
2908	  (match_operand:SF 2 "vsx_register_operand" "wa")]
2909	 UNSPEC_VSX_CONCAT))]
2910  "VECTOR_MEM_VSX_P (V2DFmode)"
2911{
2912  if (BYTES_BIG_ENDIAN)
2913    return "xxpermdi %x0,%x1,%x2,0";
2914  else
2915    return "xxpermdi %x0,%x2,%x1,0";
2916}
2917  [(set_attr "type" "vecperm")])
2918
2919;; Concatenate 4 SImode elements into a V4SImode reg.
2920(define_expand "vsx_init_v4si"
2921  [(use (match_operand:V4SI 0 "gpc_reg_operand"))
2922   (use (match_operand:SI 1 "gpc_reg_operand"))
2923   (use (match_operand:SI 2 "gpc_reg_operand"))
2924   (use (match_operand:SI 3 "gpc_reg_operand"))
2925   (use (match_operand:SI 4 "gpc_reg_operand"))]
2926   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2927{
2928  rtx a = gen_reg_rtx (DImode);
2929  rtx b = gen_reg_rtx (DImode);
2930  rtx c = gen_reg_rtx (DImode);
2931  rtx d = gen_reg_rtx (DImode);
2932  emit_insn (gen_zero_extendsidi2 (a, operands[1]));
2933  emit_insn (gen_zero_extendsidi2 (b, operands[2]));
2934  emit_insn (gen_zero_extendsidi2 (c, operands[3]));
2935  emit_insn (gen_zero_extendsidi2 (d, operands[4]));
2936  if (!BYTES_BIG_ENDIAN)
2937    {
2938      std::swap (a, b);
2939      std::swap (c, d);
2940    }
2941
2942  rtx aa = gen_reg_rtx (DImode);
2943  rtx ab = gen_reg_rtx (DImode);
2944  rtx cc = gen_reg_rtx (DImode);
2945  rtx cd = gen_reg_rtx (DImode);
2946  emit_insn (gen_ashldi3 (aa, a, GEN_INT (32)));
2947  emit_insn (gen_ashldi3 (cc, c, GEN_INT (32)));
2948  emit_insn (gen_iordi3 (ab, aa, b));
2949  emit_insn (gen_iordi3 (cd, cc, d));
2950
2951  rtx abcd = gen_reg_rtx (V2DImode);
2952  emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
2953  emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd));
2954  DONE;
2955})
2956
2957;; xxpermdi for little endian loads and stores.  We need several of
2958;; these since the form of the PARALLEL differs by mode.
2959(define_insn "*vsx_xxpermdi2_le_<mode>"
2960  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2961        (vec_select:VSX_D
2962          (match_operand:VSX_D 1 "vsx_register_operand" "wa")
2963          (parallel [(const_int 1) (const_int 0)])))]
2964  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2965  "xxpermdi %x0,%x1,%x1,2"
2966  [(set_attr "type" "vecperm")])
2967
2968(define_insn "xxswapd_v16qi"
2969  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
2970	(vec_select:V16QI
2971	  (match_operand:V16QI 1 "vsx_register_operand" "wa")
2972	  (parallel [(const_int 8) (const_int 9)
2973		     (const_int 10) (const_int 11)
2974		     (const_int 12) (const_int 13)
2975		     (const_int 14) (const_int 15)
2976		     (const_int 0) (const_int 1)
2977		     (const_int 2) (const_int 3)
2978		     (const_int 4) (const_int 5)
2979		     (const_int 6) (const_int 7)])))]
2980  "TARGET_VSX"
2981;; AIX does not support the extended mnemonic xxswapd.  Use the basic
2982;; mnemonic xxpermdi instead.
2983  "xxpermdi %x0,%x1,%x1,2"
2984  [(set_attr "type" "vecperm")])
2985
2986(define_insn "xxswapd_v8hi"
2987  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
2988	(vec_select:V8HI
2989	  (match_operand:V8HI 1 "vsx_register_operand" "wa")
2990	  (parallel [(const_int 4) (const_int 5)
2991		     (const_int 6) (const_int 7)
2992		     (const_int 0) (const_int 1)
2993		     (const_int 2) (const_int 3)])))]
2994  "TARGET_VSX"
2995;; AIX does not support the extended mnemonic xxswapd.  Use the basic
2996;; mnemonic xxpermdi instead.
2997  "xxpermdi %x0,%x1,%x1,2"
2998  [(set_attr "type" "vecperm")])
2999
3000(define_insn "xxswapd_<mode>"
3001  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3002	(vec_select:VSX_W
3003	  (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3004	  (parallel [(const_int 2) (const_int 3)
3005		     (const_int 0) (const_int 1)])))]
3006  "TARGET_VSX"
3007;; AIX does not support extended mnemonic xxswapd.  Use the basic
3008;; mnemonic xxpermdi instead.
3009  "xxpermdi %x0,%x1,%x1,2"
3010  [(set_attr "type" "vecperm")])
3011
3012(define_insn "xxswapd_<mode>"
3013  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3014	(vec_select:VSX_D
3015	  (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3016	  (parallel [(const_int 1) (const_int 0)])))]
3017  "TARGET_VSX"
3018;; AIX does not support extended mnemonic xxswapd.  Use the basic
3019;; mnemonic xxpermdi instead.
3020  "xxpermdi %x0,%x1,%x1,2"
3021  [(set_attr "type" "vecperm")])
3022
3023;; lxvd2x for little endian loads.  We need several of
3024;; these since the form of the PARALLEL differs by mode.
3025(define_insn "*vsx_lxvd2x2_le_<mode>"
3026  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3027        (vec_select:VSX_D
3028          (match_operand:VSX_D 1 "memory_operand" "Z")
3029          (parallel [(const_int 1) (const_int 0)])))]
3030  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3031  "lxvd2x %x0,%y1"
3032  [(set_attr "type" "vecload")])
3033
3034(define_insn "*vsx_lxvd2x4_le_<mode>"
3035  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3036        (vec_select:VSX_W
3037          (match_operand:VSX_W 1 "memory_operand" "Z")
3038          (parallel [(const_int 2) (const_int 3)
3039                     (const_int 0) (const_int 1)])))]
3040  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3041  "lxvd2x %x0,%y1"
3042  [(set_attr "type" "vecload")])
3043
3044(define_insn "*vsx_lxvd2x8_le_V8HI"
3045  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3046        (vec_select:V8HI
3047          (match_operand:V8HI 1 "memory_operand" "Z")
3048          (parallel [(const_int 4) (const_int 5)
3049                     (const_int 6) (const_int 7)
3050                     (const_int 0) (const_int 1)
3051                     (const_int 2) (const_int 3)])))]
3052  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3053  "lxvd2x %x0,%y1"
3054  [(set_attr "type" "vecload")])
3055
3056(define_insn "*vsx_lxvd2x16_le_V16QI"
3057  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3058        (vec_select:V16QI
3059          (match_operand:V16QI 1 "memory_operand" "Z")
3060          (parallel [(const_int 8) (const_int 9)
3061                     (const_int 10) (const_int 11)
3062                     (const_int 12) (const_int 13)
3063                     (const_int 14) (const_int 15)
3064                     (const_int 0) (const_int 1)
3065                     (const_int 2) (const_int 3)
3066                     (const_int 4) (const_int 5)
3067                     (const_int 6) (const_int 7)])))]
3068  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3069  "lxvd2x %x0,%y1"
3070  [(set_attr "type" "vecload")])
3071
3072;; stxvd2x for little endian stores.  We need several of
3073;; these since the form of the PARALLEL differs by mode.
3074(define_insn "*vsx_stxvd2x2_le_<mode>"
3075  [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3076        (vec_select:VSX_D
3077          (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3078          (parallel [(const_int 1) (const_int 0)])))]
3079  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3080  "stxvd2x %x1,%y0"
3081  [(set_attr "type" "vecstore")])
3082
3083(define_insn "*vsx_stxvd2x4_le_<mode>"
3084  [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3085        (vec_select:VSX_W
3086          (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3087          (parallel [(const_int 2) (const_int 3)
3088                     (const_int 0) (const_int 1)])))]
3089  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3090  "stxvd2x %x1,%y0"
3091  [(set_attr "type" "vecstore")])
3092
3093(define_insn "*vsx_stxvd2x8_le_V8HI"
3094  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3095        (vec_select:V8HI
3096          (match_operand:V8HI 1 "vsx_register_operand" "wa")
3097          (parallel [(const_int 4) (const_int 5)
3098                     (const_int 6) (const_int 7)
3099                     (const_int 0) (const_int 1)
3100                     (const_int 2) (const_int 3)])))]
3101  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3102  "stxvd2x %x1,%y0"
3103  [(set_attr "type" "vecstore")])
3104
3105(define_insn "*vsx_stxvd2x16_le_V16QI"
3106  [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3107        (vec_select:V16QI
3108          (match_operand:V16QI 1 "vsx_register_operand" "wa")
3109          (parallel [(const_int 8) (const_int 9)
3110                     (const_int 10) (const_int 11)
3111                     (const_int 12) (const_int 13)
3112                     (const_int 14) (const_int 15)
3113                     (const_int 0) (const_int 1)
3114                     (const_int 2) (const_int 3)
3115                     (const_int 4) (const_int 5)
3116                     (const_int 6) (const_int 7)])))]
3117  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3118  "stxvd2x %x1,%y0"
3119  [(set_attr "type" "vecstore")])
3120
3121;; Convert a TImode value into V1TImode
3122(define_expand "vsx_set_v1ti"
3123  [(match_operand:V1TI 0 "nonimmediate_operand")
3124   (match_operand:V1TI 1 "nonimmediate_operand")
3125   (match_operand:TI 2 "input_operand")
3126   (match_operand:QI 3 "u5bit_cint_operand")]
3127  "VECTOR_MEM_VSX_P (V1TImode)"
3128{
3129  if (operands[3] != const0_rtx)
3130    gcc_unreachable ();
3131
3132  emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3133  DONE;
3134})
3135
3136;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3137(define_expand "vsx_set_<mode>"
3138  [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3139   (use (match_operand:VSX_D 1 "vsx_register_operand"))
3140   (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3141   (use (match_operand:QI 3 "const_0_to_1_operand"))]
3142  "VECTOR_MEM_VSX_P (<MODE>mode)"
3143{
3144  rtx dest = operands[0];
3145  rtx vec_reg = operands[1];
3146  rtx value = operands[2];
3147  rtx ele = operands[3];
3148  rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3149
3150  if (ele == const0_rtx)
3151    {
3152      emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3153      emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3154      DONE;
3155    }
3156  else if (ele == const1_rtx)
3157    {
3158      emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3159      emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3160      DONE;
3161    }
3162  else
3163    gcc_unreachable ();
3164})
3165
3166;; Extract a DF/DI element from V2DF/V2DI
3167;; Optimize cases were we can do a simple or direct move.
3168;; Or see if we can avoid doing the move at all
3169
3170;; There are some unresolved problems with reload that show up if an Altivec
3171;; register was picked.  Limit the scalar value to FPRs for now.
3172
3173(define_insn "vsx_extract_<mode>"
3174  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d,  wr, wr")
3175	(vec_select:<VS_scalar>
3176	 (match_operand:VSX_D 1 "gpc_reg_operand"      "wa, wa, wa, wa")
3177	 (parallel
3178	  [(match_operand:QI 2 "const_0_to_1_operand"  "wD, n,  wD, n")])))]
3179  "VECTOR_MEM_VSX_P (<MODE>mode)"
3180{
3181  int element = INTVAL (operands[2]);
3182  int op0_regno = REGNO (operands[0]);
3183  int op1_regno = REGNO (operands[1]);
3184  int fldDM;
3185
3186  gcc_assert (IN_RANGE (element, 0, 1));
3187  gcc_assert (VSX_REGNO_P (op1_regno));
3188
3189  if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3190    {
3191      if (op0_regno == op1_regno)
3192	return ASM_COMMENT_START " vec_extract to same register";
3193
3194      else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3195	       && TARGET_POWERPC64)
3196	return "mfvsrd %0,%x1";
3197
3198      else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3199	return "fmr %0,%1";
3200
3201      else if (VSX_REGNO_P (op0_regno))
3202	return "xxlor %x0,%x1,%x1";
3203
3204      else
3205	gcc_unreachable ();
3206    }
3207
3208  else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3209	   && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
3210    return "mfvsrld %0,%x1";
3211
3212  else if (VSX_REGNO_P (op0_regno))
3213    {
3214      fldDM = element << 1;
3215      if (!BYTES_BIG_ENDIAN)
3216	fldDM = 3 - fldDM;
3217      operands[3] = GEN_INT (fldDM);
3218      return "xxpermdi %x0,%x1,%x1,%3";
3219    }
3220
3221  else
3222    gcc_unreachable ();
3223}
3224  [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")
3225   (set_attr "isa" "*,*,p8v,p9v")])
3226
3227;; Optimize extracting a single scalar element from memory.
3228(define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3229  [(set (match_operand:<VS_scalar> 0 "register_operand" "=wa,wr")
3230	(vec_select:<VSX_D:VS_scalar>
3231	 (match_operand:VSX_D 1 "memory_operand" "m,m")
3232	 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3233   (clobber (match_scratch:P 3 "=&b,&b"))]
3234  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3235  "#"
3236  "&& reload_completed"
3237  [(set (match_dup 0) (match_dup 4))]
3238{
3239  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3240					   operands[3], <VSX_D:VS_scalar>mode);
3241}
3242  [(set_attr "type" "fpload,load")
3243   (set_attr "length" "8")])
3244
3245;; Optimize storing a single scalar element that is the right location to
3246;; memory
3247(define_insn "*vsx_extract_<mode>_store"
3248  [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
3249	(vec_select:<VS_scalar>
3250	 (match_operand:VSX_D 1 "register_operand" "d,v,v")
3251	 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3252  "VECTOR_MEM_VSX_P (<MODE>mode)"
3253  "@
3254   stfd%U0%X0 %1,%0
3255   stxsdx %x1,%y0
3256   stxsd %1,%0"
3257  [(set_attr "type" "fpstore")
3258   (set_attr "isa" "*,p7v,p9v")])
3259
3260;; Variable V2DI/V2DF extract shift
3261(define_insn "vsx_vslo_<mode>"
3262  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3263	(unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3264			     (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3265			    UNSPEC_VSX_VSLO))]
3266  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3267  "vslo %0,%1,%2"
3268  [(set_attr "type" "vecperm")])
3269
3270;; Variable V2DI/V2DF extract from a register
3271(define_insn_and_split "vsx_extract_<mode>_var"
3272  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3273	(unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3274			     (match_operand:DI 2 "gpc_reg_operand" "r")]
3275			    UNSPEC_VSX_EXTRACT))
3276   (clobber (match_scratch:DI 3 "=r"))
3277   (clobber (match_scratch:V2DI 4 "=&v"))]
3278  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3279  "#"
3280  "&& reload_completed"
3281  [(const_int 0)]
3282{
3283  rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3284				operands[3], operands[4]);
3285  DONE;
3286})
3287
3288;; Variable V2DI/V2DF extract from memory
3289(define_insn_and_split "*vsx_extract_<mode>_var_load"
3290  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=wa,r")
3291	(unspec:<VS_scalar> [(match_operand:VSX_D 1 "memory_operand" "Q,Q")
3292			     (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3293			    UNSPEC_VSX_EXTRACT))
3294   (clobber (match_scratch:DI 3 "=&b,&b"))]
3295  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3296  "#"
3297  "&& reload_completed"
3298  [(set (match_dup 0) (match_dup 4))]
3299{
3300  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3301					   operands[3], <VS_scalar>mode);
3302}
3303  [(set_attr "type" "fpload,load")])
3304
3305;; Extract a SF element from V4SF
3306(define_insn_and_split "vsx_extract_v4sf"
3307  [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
3308	(vec_select:SF
3309	 (match_operand:V4SF 1 "vsx_register_operand" "wa")
3310	 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3311   (clobber (match_scratch:V4SF 3 "=0"))]
3312  "VECTOR_UNIT_VSX_P (V4SFmode)"
3313  "#"
3314  "&& 1"
3315  [(const_int 0)]
3316{
3317  rtx op0 = operands[0];
3318  rtx op1 = operands[1];
3319  rtx op2 = operands[2];
3320  rtx op3 = operands[3];
3321  rtx tmp;
3322  HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3323
3324  if (ele == 0)
3325    tmp = op1;
3326  else
3327    {
3328      if (GET_CODE (op3) == SCRATCH)
3329	op3 = gen_reg_rtx (V4SFmode);
3330      emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3331      tmp = op3;
3332    }
3333  emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3334  DONE;
3335}
3336  [(set_attr "length" "8")
3337   (set_attr "type" "fp")])
3338
3339(define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3340  [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
3341	(vec_select:SF
3342	 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3343	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3344   (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3345  "VECTOR_MEM_VSX_P (V4SFmode)"
3346  "#"
3347  "&& reload_completed"
3348  [(set (match_dup 0) (match_dup 4))]
3349{
3350  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3351					   operands[3], SFmode);
3352}
3353  [(set_attr "type" "fpload,fpload,fpload,load")
3354   (set_attr "length" "8")
3355   (set_attr "isa" "*,p7v,p9v,*")])
3356
3357;; Variable V4SF extract from a register
3358(define_insn_and_split "vsx_extract_v4sf_var"
3359  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
3360	(unspec:SF [(match_operand:V4SF 1 "gpc_reg_operand" "v")
3361		    (match_operand:DI 2 "gpc_reg_operand" "r")]
3362		   UNSPEC_VSX_EXTRACT))
3363   (clobber (match_scratch:DI 3 "=r"))
3364   (clobber (match_scratch:V2DI 4 "=&v"))]
3365  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3366  "#"
3367  "&& reload_completed"
3368  [(const_int 0)]
3369{
3370  rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3371				operands[3], operands[4]);
3372  DONE;
3373})
3374
3375;; Variable V4SF extract from memory
3376(define_insn_and_split "*vsx_extract_v4sf_var_load"
3377  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r")
3378	(unspec:SF [(match_operand:V4SF 1 "memory_operand" "Q,Q")
3379		    (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3380		   UNSPEC_VSX_EXTRACT))
3381   (clobber (match_scratch:DI 3 "=&b,&b"))]
3382  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3383  "#"
3384  "&& reload_completed"
3385  [(set (match_dup 0) (match_dup 4))]
3386{
3387  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3388					   operands[3], SFmode);
3389}
3390  [(set_attr "type" "fpload,load")])
3391
3392;; Expand the builtin form of xxpermdi to canonical rtl.
3393(define_expand "vsx_xxpermdi_<mode>"
3394  [(match_operand:VSX_L 0 "vsx_register_operand")
3395   (match_operand:VSX_L 1 "vsx_register_operand")
3396   (match_operand:VSX_L 2 "vsx_register_operand")
3397   (match_operand:QI 3 "u5bit_cint_operand")]
3398  "VECTOR_MEM_VSX_P (<MODE>mode)"
3399{
3400  rtx target = operands[0];
3401  rtx op0 = operands[1];
3402  rtx op1 = operands[2];
3403  int mask = INTVAL (operands[3]);
3404  rtx perm0 = GEN_INT ((mask >> 1) & 1);
3405  rtx perm1 = GEN_INT ((mask & 1) + 2);
3406  rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3407
3408  if (<MODE>mode == V2DFmode)
3409    gen = gen_vsx_xxpermdi2_v2df_1;
3410  else
3411    {
3412      gen = gen_vsx_xxpermdi2_v2di_1;
3413      if (<MODE>mode != V2DImode)
3414	{
3415	  target = gen_lowpart (V2DImode, target);
3416	  op0 = gen_lowpart (V2DImode, op0);
3417	  op1 = gen_lowpart (V2DImode, op1);
3418	}
3419    }
3420  emit_insn (gen (target, op0, op1, perm0, perm1));
3421  DONE;
3422})
3423
3424;; Special version of xxpermdi that retains big-endian semantics.
3425(define_expand "vsx_xxpermdi_<mode>_be"
3426  [(match_operand:VSX_L 0 "vsx_register_operand")
3427   (match_operand:VSX_L 1 "vsx_register_operand")
3428   (match_operand:VSX_L 2 "vsx_register_operand")
3429   (match_operand:QI 3 "u5bit_cint_operand")]
3430  "VECTOR_MEM_VSX_P (<MODE>mode)"
3431{
3432  rtx target = operands[0];
3433  rtx op0 = operands[1];
3434  rtx op1 = operands[2];
3435  int mask = INTVAL (operands[3]);
3436  rtx perm0 = GEN_INT ((mask >> 1) & 1);
3437  rtx perm1 = GEN_INT ((mask & 1) + 2);
3438  rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3439
3440  if (<MODE>mode == V2DFmode)
3441    gen = gen_vsx_xxpermdi2_v2df_1;
3442  else
3443    {
3444      gen = gen_vsx_xxpermdi2_v2di_1;
3445      if (<MODE>mode != V2DImode)
3446	{
3447	  target = gen_lowpart (V2DImode, target);
3448	  op0 = gen_lowpart (V2DImode, op0);
3449	  op1 = gen_lowpart (V2DImode, op1);
3450	}
3451    }
3452  /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3453     transformation we don't want; it is necessary for
3454     rs6000_expand_vec_perm_const_1 but not for this use.  So we
3455     prepare for that by reversing the transformation here.  */
3456  if (BYTES_BIG_ENDIAN)
3457    emit_insn (gen (target, op0, op1, perm0, perm1));
3458  else
3459    {
3460      rtx p0 = GEN_INT (3 - INTVAL (perm1));
3461      rtx p1 = GEN_INT (3 - INTVAL (perm0));
3462      emit_insn (gen (target, op1, op0, p0, p1));
3463    }
3464  DONE;
3465})
3466
3467(define_insn "vsx_xxpermdi2_<mode>_1"
3468  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3469	(vec_select:VSX_D
3470	  (vec_concat:<VS_double>
3471	    (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3472	    (match_operand:VSX_D 2 "vsx_register_operand" "wa"))
3473	  (parallel [(match_operand 3 "const_0_to_1_operand" "")
3474		     (match_operand 4 "const_2_to_3_operand" "")])))]
3475  "VECTOR_MEM_VSX_P (<MODE>mode)"
3476{
3477  int op3, op4, mask;
3478
3479  /* For little endian, swap operands and invert/swap selectors
3480     to get the correct xxpermdi.  The operand swap sets up the
3481     inputs as a little endian array.  The selectors are swapped
3482     because they are defined to use big endian ordering.  The
3483     selectors are inverted to get the correct doublewords for
3484     little endian ordering.  */
3485  if (BYTES_BIG_ENDIAN)
3486    {
3487      op3 = INTVAL (operands[3]);
3488      op4 = INTVAL (operands[4]);
3489    }
3490  else
3491    {
3492      op3 = 3 - INTVAL (operands[4]);
3493      op4 = 3 - INTVAL (operands[3]);
3494    }
3495
3496  mask = (op3 << 1) | (op4 - 2);
3497  operands[3] = GEN_INT (mask);
3498
3499  if (BYTES_BIG_ENDIAN)
3500    return "xxpermdi %x0,%x1,%x2,%3";
3501  else
3502    return "xxpermdi %x0,%x2,%x1,%3";
3503}
3504  [(set_attr "type" "vecperm")])
3505
3506;; Extraction of a single element in a small integer vector.  Until ISA 3.0,
3507;; none of the small types were allowed in a vector register, so we had to
3508;; extract to a DImode and either do a direct move or store.
3509(define_expand  "vsx_extract_<mode>"
3510  [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3511		   (vec_select:<VS_scalar>
3512		    (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3513		    (parallel [(match_operand:QI 2 "const_int_operand")])))
3514	      (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3515  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3516{
3517  /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
3518  if (TARGET_P9_VECTOR)
3519    {
3520      emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3521					    operands[2]));
3522      DONE;
3523    }
3524})
3525
3526(define_insn "vsx_extract_<mode>_p9"
3527  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3528	(vec_select:<VS_scalar>
3529	 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3530	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3531   (clobber (match_scratch:SI 3 "=r,X"))]
3532  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3533{
3534  if (which_alternative == 0)
3535    return "#";
3536
3537  else
3538    {
3539      HOST_WIDE_INT elt = INTVAL (operands[2]);
3540      HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN
3541			       ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3542			       : elt);
3543
3544      HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3545      HOST_WIDE_INT offset = unit_size * elt_adj;
3546
3547      operands[2] = GEN_INT (offset);
3548      if (unit_size == 4)
3549	return "xxextractuw %x0,%x1,%2";
3550      else
3551	return "vextractu<wd> %0,%1,%2";
3552    }
3553}
3554  [(set_attr "type" "vecsimple")
3555   (set_attr "isa" "p9v,*")])
3556
3557(define_split
3558  [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3559	(vec_select:<VS_scalar>
3560	 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3561	 (parallel [(match_operand:QI 2 "const_int_operand")])))
3562   (clobber (match_operand:SI 3 "int_reg_operand"))]
3563  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3564  [(const_int 0)]
3565{
3566  rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3567  rtx op1 = operands[1];
3568  rtx op2 = operands[2];
3569  rtx op3 = operands[3];
3570  HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3571
3572  emit_move_insn (op3, GEN_INT (offset));
3573  if (BYTES_BIG_ENDIAN)
3574    emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3575  else
3576    emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3577  DONE;
3578})
3579
3580;; Optimize zero extracts to eliminate the AND after the extract.
3581(define_insn_and_split "*vsx_extract_<mode>_di_p9"
3582  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3583	(zero_extend:DI
3584	 (vec_select:<VS_scalar>
3585	  (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3586	  (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3587   (clobber (match_scratch:SI 3 "=r,X"))]
3588  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3589  "#"
3590  "&& reload_completed"
3591  [(parallel [(set (match_dup 4)
3592		   (vec_select:<VS_scalar>
3593		    (match_dup 1)
3594		    (parallel [(match_dup 2)])))
3595	      (clobber (match_dup 3))])]
3596{
3597  operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3598}
3599  [(set_attr "isa" "p9v,*")])
3600
3601;; Optimize stores to use the ISA 3.0 scalar store instructions
3602(define_insn_and_split "*vsx_extract_<mode>_store_p9"
3603  [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3604	(vec_select:<VS_scalar>
3605	 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3606	 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3607   (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3608   (clobber (match_scratch:SI 4 "=X,&r"))]
3609  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3610  "#"
3611  "&& reload_completed"
3612  [(parallel [(set (match_dup 3)
3613		   (vec_select:<VS_scalar>
3614		    (match_dup 1)
3615		    (parallel [(match_dup 2)])))
3616	      (clobber (match_dup 4))])
3617   (set (match_dup 0)
3618	(match_dup 3))])
3619
3620(define_insn_and_split  "*vsx_extract_si"
3621  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
3622	(vec_select:SI
3623	 (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v")
3624	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3625   (clobber (match_scratch:V4SI 3 "=v,v,v"))]
3626  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3627  "#"
3628  "&& reload_completed"
3629  [(const_int 0)]
3630{
3631  rtx dest = operands[0];
3632  rtx src = operands[1];
3633  rtx element = operands[2];
3634  rtx vec_tmp = operands[3];
3635  int value;
3636
3637  if (!BYTES_BIG_ENDIAN)
3638    element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3639
3640  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3641     instruction.  */
3642  value = INTVAL (element);
3643  if (value != 1)
3644    emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3645  else
3646    vec_tmp = src;
3647
3648  if (MEM_P (operands[0]))
3649    {
3650      if (can_create_pseudo_p ())
3651	dest = rs6000_force_indexed_or_indirect_mem (dest);
3652
3653      if (TARGET_P8_VECTOR)
3654	emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3655      else
3656	emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3657    }
3658
3659  else if (TARGET_P8_VECTOR)
3660    emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3661  else
3662    emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3663		    gen_rtx_REG (DImode, REGNO (vec_tmp)));
3664
3665  DONE;
3666}
3667  [(set_attr "type" "mftgpr,vecperm,fpstore")
3668   (set_attr "length" "8")
3669   (set_attr "isa" "*,p8v,*")])
3670
3671(define_insn_and_split  "*vsx_extract_<mode>_p8"
3672  [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3673	(vec_select:<VS_scalar>
3674	 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3675	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3676   (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3677  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3678   && !TARGET_P9_VECTOR"
3679  "#"
3680  "&& reload_completed"
3681  [(const_int 0)]
3682{
3683  rtx dest = operands[0];
3684  rtx src = operands[1];
3685  rtx element = operands[2];
3686  rtx vec_tmp = operands[3];
3687  int value;
3688
3689  if (!BYTES_BIG_ENDIAN)
3690    element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3691
3692  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3693     instruction.  */
3694  value = INTVAL (element);
3695  if (<MODE>mode == V16QImode)
3696    {
3697      if (value != 7)
3698	emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3699      else
3700	vec_tmp = src;
3701    }
3702  else if (<MODE>mode == V8HImode)
3703    {
3704      if (value != 3)
3705	emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3706      else
3707	vec_tmp = src;
3708    }
3709  else
3710    gcc_unreachable ();
3711
3712  emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3713		  gen_rtx_REG (DImode, REGNO (vec_tmp)));
3714  DONE;
3715}
3716  [(set_attr "type" "mftgpr")])
3717
3718;; Optimize extracting a single scalar element from memory.
3719(define_insn_and_split "*vsx_extract_<mode>_load"
3720  [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3721	(vec_select:<VS_scalar>
3722	 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3723	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3724   (clobber (match_scratch:DI 3 "=&b"))]
3725  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3726  "#"
3727  "&& reload_completed"
3728  [(set (match_dup 0) (match_dup 4))]
3729{
3730  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3731					   operands[3], <VS_scalar>mode);
3732}
3733  [(set_attr "type" "load")
3734   (set_attr "length" "8")])
3735
3736;; Variable V16QI/V8HI/V4SI extract from a register
3737(define_insn_and_split "vsx_extract_<mode>_var"
3738  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r")
3739	(unspec:<VS_scalar>
3740	 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,v")
3741	  (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3742	 UNSPEC_VSX_EXTRACT))
3743   (clobber (match_scratch:DI 3 "=r,r"))
3744   (clobber (match_scratch:V2DI 4 "=X,&v"))]
3745  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3746  "#"
3747  "&& reload_completed"
3748  [(const_int 0)]
3749{
3750  rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3751				operands[3], operands[4]);
3752  DONE;
3753}
3754  [(set_attr "isa" "p9v,*")])
3755
3756;; Variable V16QI/V8HI/V4SI extract from memory
3757(define_insn_and_split "*vsx_extract_<mode>_var_load"
3758  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r")
3759	(unspec:<VS_scalar>
3760	 [(match_operand:VSX_EXTRACT_I 1 "memory_operand" "Q")
3761	  (match_operand:DI 2 "gpc_reg_operand" "r")]
3762	 UNSPEC_VSX_EXTRACT))
3763   (clobber (match_scratch:DI 3 "=&b"))]
3764  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3765  "#"
3766  "&& reload_completed"
3767  [(set (match_dup 0) (match_dup 4))]
3768{
3769  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3770					   operands[3], <VS_scalar>mode);
3771}
3772  [(set_attr "type" "load")])
3773
3774;; VSX_EXTRACT optimizations
3775;; Optimize double d = (double) vec_extract (vi, <n>)
3776;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
3777(define_insn_and_split "*vsx_extract_si_<uns>float_df"
3778  [(set (match_operand:DF 0 "gpc_reg_operand" "=wa")
3779	(any_float:DF
3780	 (vec_select:SI
3781	  (match_operand:V4SI 1 "gpc_reg_operand" "v")
3782	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3783   (clobber (match_scratch:V4SI 3 "=v"))]
3784  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3785  "#"
3786  "&& 1"
3787  [(const_int 0)]
3788{
3789  rtx dest = operands[0];
3790  rtx src = operands[1];
3791  rtx element = operands[2];
3792  rtx v4si_tmp = operands[3];
3793  int value;
3794
3795  if (!BYTES_BIG_ENDIAN)
3796    element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3797
3798  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3799     instruction.  */
3800  value = INTVAL (element);
3801  if (value != 0)
3802    {
3803      if (GET_CODE (v4si_tmp) == SCRATCH)
3804	v4si_tmp = gen_reg_rtx (V4SImode);
3805      emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3806    }
3807  else
3808    v4si_tmp = src;
3809
3810  emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
3811  DONE;
3812})
3813
3814;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
3815;; where <type> is a floating point type that supported by the hardware that is
3816;; not double.  First convert the value to double, and then to the desired
3817;; type.
3818(define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
3819  [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=wa")
3820	(any_float:VSX_EXTRACT_FL
3821	 (vec_select:SI
3822	  (match_operand:V4SI 1 "gpc_reg_operand" "v")
3823	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3824   (clobber (match_scratch:V4SI 3 "=v"))
3825   (clobber (match_scratch:DF 4 "=wa"))]
3826  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3827  "#"
3828  "&& 1"
3829  [(const_int 0)]
3830{
3831  rtx dest = operands[0];
3832  rtx src = operands[1];
3833  rtx element = operands[2];
3834  rtx v4si_tmp = operands[3];
3835  rtx df_tmp = operands[4];
3836  int value;
3837
3838  if (!BYTES_BIG_ENDIAN)
3839    element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3840
3841  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3842     instruction.  */
3843  value = INTVAL (element);
3844  if (value != 0)
3845    {
3846      if (GET_CODE (v4si_tmp) == SCRATCH)
3847	v4si_tmp = gen_reg_rtx (V4SImode);
3848      emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3849    }
3850  else
3851    v4si_tmp = src;
3852
3853  if (GET_CODE (df_tmp) == SCRATCH)
3854    df_tmp = gen_reg_rtx (DFmode);
3855
3856  emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
3857
3858  if (<MODE>mode == SFmode)
3859    emit_insn (gen_truncdfsf2 (dest, df_tmp));
3860  else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
3861    emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
3862  else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
3863	   && TARGET_FLOAT128_HW)
3864    emit_insn (gen_extenddftf2_hw (dest, df_tmp));
3865  else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
3866    emit_insn (gen_extenddfif2 (dest, df_tmp));
3867  else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
3868    emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
3869  else
3870    gcc_unreachable ();
3871
3872  DONE;
3873})
3874
3875;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
3876;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
3877;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
3878;; vector short or vector unsigned short.
3879(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
3880  [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
3881	(float:FL_CONV
3882	 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3883	  (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3884	  (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3885   (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3886  "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3887   && TARGET_P9_VECTOR"
3888  "#"
3889  "&& reload_completed"
3890  [(parallel [(set (match_dup 3)
3891		   (vec_select:<VSX_EXTRACT_I:VS_scalar>
3892		    (match_dup 1)
3893		    (parallel [(match_dup 2)])))
3894	      (clobber (scratch:SI))])
3895   (set (match_dup 4)
3896	(sign_extend:DI (match_dup 3)))
3897   (set (match_dup 0)
3898	(float:<FL_CONV:MODE> (match_dup 4)))]
3899{
3900  operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3901}
3902  [(set_attr "isa" "<FL_CONV:VSisa>")])
3903
3904(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
3905  [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
3906	(unsigned_float:FL_CONV
3907	 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3908	  (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3909	  (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3910   (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3911  "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3912   && TARGET_P9_VECTOR"
3913  "#"
3914  "&& reload_completed"
3915  [(parallel [(set (match_dup 3)
3916		   (vec_select:<VSX_EXTRACT_I:VS_scalar>
3917		    (match_dup 1)
3918		    (parallel [(match_dup 2)])))
3919	      (clobber (scratch:SI))])
3920   (set (match_dup 0)
3921	(float:<FL_CONV:MODE> (match_dup 4)))]
3922{
3923  operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3924}
3925  [(set_attr "isa" "<FL_CONV:VSisa>")])
3926
3927;; V4SI/V8HI/V16QI set operation on ISA 3.0
3928(define_insn "vsx_set_<mode>_p9"
3929  [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
3930	(unspec:VSX_EXTRACT_I
3931	 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
3932	  (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
3933	  (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
3934	 UNSPEC_VSX_SET))]
3935  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3936{
3937  int ele = INTVAL (operands[3]);
3938  int nunits = GET_MODE_NUNITS (<MODE>mode);
3939
3940  if (!BYTES_BIG_ENDIAN)
3941    ele = nunits - 1 - ele;
3942
3943  operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
3944  if (<MODE>mode == V4SImode)
3945    return "xxinsertw %x0,%x2,%3";
3946  else
3947    return "vinsert<wd> %0,%2,%3";
3948}
3949  [(set_attr "type" "vecperm")])
3950
3951(define_insn_and_split "vsx_set_v4sf_p9"
3952  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3953	(unspec:V4SF
3954	 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3955	  (match_operand:SF 2 "gpc_reg_operand" "wa")
3956	  (match_operand:QI 3 "const_0_to_3_operand" "n")]
3957	 UNSPEC_VSX_SET))
3958   (clobber (match_scratch:SI 4 "=&wa"))]
3959  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3960  "#"
3961  "&& reload_completed"
3962  [(set (match_dup 5)
3963	(unspec:V4SF [(match_dup 2)]
3964		     UNSPEC_VSX_CVDPSPN))
3965   (parallel [(set (match_dup 4)
3966		   (vec_select:SI (match_dup 6)
3967				  (parallel [(match_dup 7)])))
3968	      (clobber (scratch:SI))])
3969   (set (match_dup 8)
3970	(unspec:V4SI [(match_dup 8)
3971		      (match_dup 4)
3972		      (match_dup 3)]
3973		     UNSPEC_VSX_SET))]
3974{
3975  unsigned int tmp_regno = reg_or_subregno (operands[4]);
3976
3977  operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
3978  operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
3979  operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3);
3980  operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3981}
3982  [(set_attr "type" "vecperm")
3983   (set_attr "length" "12")
3984   (set_attr "isa" "p9v")])
3985
3986;; Special case setting 0.0f to a V4SF element
3987(define_insn_and_split "*vsx_set_v4sf_p9_zero"
3988  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3989	(unspec:V4SF
3990	 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3991	  (match_operand:SF 2 "zero_fp_constant" "j")
3992	  (match_operand:QI 3 "const_0_to_3_operand" "n")]
3993	 UNSPEC_VSX_SET))
3994   (clobber (match_scratch:SI 4 "=&wa"))]
3995  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3996  "#"
3997  "&& reload_completed"
3998  [(set (match_dup 4)
3999	(const_int 0))
4000   (set (match_dup 5)
4001	(unspec:V4SI [(match_dup 5)
4002		      (match_dup 4)
4003		      (match_dup 3)]
4004		     UNSPEC_VSX_SET))]
4005{
4006  operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4007}
4008  [(set_attr "type" "vecperm")
4009   (set_attr "length" "8")
4010   (set_attr "isa" "p9v")])
4011
4012;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
4013;; that is in the default scalar position (1 for big endian, 2 for little
4014;; endian).  We just need to do an xxinsertw since the element is in the
4015;; correct location.
4016
4017(define_insn "*vsx_insert_extract_v4sf_p9"
4018  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4019	(unspec:V4SF
4020	 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4021	  (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4022			 (parallel
4023			  [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4024	  (match_operand:QI 4 "const_0_to_3_operand" "n")]
4025	 UNSPEC_VSX_SET))]
4026  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
4027   && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
4028{
4029  int ele = INTVAL (operands[4]);
4030
4031  if (!BYTES_BIG_ENDIAN)
4032    ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
4033
4034  operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
4035  return "xxinsertw %x0,%x2,%4";
4036}
4037  [(set_attr "type" "vecperm")])
4038
4039;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
4040;; that is in the default scalar position (1 for big endian, 2 for little
4041;; endian).  Convert the insert/extract to int and avoid doing the conversion.
4042
4043(define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4044  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4045	(unspec:V4SF
4046	 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4047	  (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4048			 (parallel
4049			  [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4050	  (match_operand:QI 4 "const_0_to_3_operand" "n")]
4051	 UNSPEC_VSX_SET))
4052   (clobber (match_scratch:SI 5 "=&wa"))]
4053  "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4054   && TARGET_P9_VECTOR && TARGET_POWERPC64
4055   && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
4056  "#"
4057  "&& 1"
4058  [(parallel [(set (match_dup 5)
4059		   (vec_select:SI (match_dup 6)
4060				  (parallel [(match_dup 3)])))
4061	      (clobber (scratch:SI))])
4062   (set (match_dup 7)
4063	(unspec:V4SI [(match_dup 8)
4064		      (match_dup 5)
4065		      (match_dup 4)]
4066		     UNSPEC_VSX_SET))]
4067{
4068  if (GET_CODE (operands[5]) == SCRATCH)
4069    operands[5] = gen_reg_rtx (SImode);
4070
4071  operands[6] = gen_lowpart (V4SImode, operands[2]);
4072  operands[7] = gen_lowpart (V4SImode, operands[0]);
4073  operands[8] = gen_lowpart (V4SImode, operands[1]);
4074}
4075  [(set_attr "type" "vecperm")
4076   (set_attr "isa" "p9v")])
4077
4078;; Expanders for builtins
4079(define_expand "vsx_mergel_<mode>"
4080  [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4081   (use (match_operand:VSX_D 1 "vsx_register_operand"))
4082   (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4083  "VECTOR_MEM_VSX_P (<MODE>mode)"
4084{
4085  rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4086  rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4087  x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4088  emit_insn (gen_rtx_SET (operands[0], x));
4089  DONE;
4090})
4091
4092(define_expand "vsx_mergeh_<mode>"
4093  [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4094   (use (match_operand:VSX_D 1 "vsx_register_operand"))
4095   (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4096  "VECTOR_MEM_VSX_P (<MODE>mode)"
4097{
4098  rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4099  rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4100  x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4101  emit_insn (gen_rtx_SET (operands[0], x));
4102  DONE;
4103})
4104
4105;; V2DF/V2DI splat
4106;; We separate the register splat insn from the memory splat insn to force the
4107;; register allocator to generate the indexed form of the SPLAT when it is
4108;; given an offsettable memory reference.  Otherwise, if the register and
4109;; memory insns were combined into a single insn, the register allocator will
4110;; load the value into a register, and then do a double word permute.
4111(define_expand "vsx_splat_<mode>"
4112  [(set (match_operand:VSX_D 0 "vsx_register_operand")
4113	(vec_duplicate:VSX_D
4114	 (match_operand:<VS_scalar> 1 "input_operand")))]
4115  "VECTOR_MEM_VSX_P (<MODE>mode)"
4116{
4117  rtx op1 = operands[1];
4118  if (MEM_P (op1))
4119    operands[1] = rs6000_force_indexed_or_indirect_mem (op1);
4120  else if (!REG_P (op1))
4121    op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4122})
4123
4124(define_insn "vsx_splat_<mode>_reg"
4125  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
4126	(vec_duplicate:VSX_D
4127	 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")))]
4128  "VECTOR_MEM_VSX_P (<MODE>mode)"
4129  "@
4130   xxpermdi %x0,%x1,%x1,0
4131   mtvsrdd %x0,%1,%1"
4132  [(set_attr "type" "vecperm")])
4133
4134(define_insn "vsx_splat_<mode>_mem"
4135  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4136	(vec_duplicate:VSX_D
4137	 (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4138  "VECTOR_MEM_VSX_P (<MODE>mode)"
4139  "lxvdsx %x0,%y1"
4140  [(set_attr "type" "vecload")])
4141
4142;; V4SI splat support
4143(define_insn "vsx_splat_v4si"
4144  [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
4145	(vec_duplicate:V4SI
4146	 (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4147  "TARGET_P9_VECTOR"
4148  "@
4149   mtvsrws %x0,%1
4150   lxvwsx %x0,%y1"
4151  [(set_attr "type" "vecperm,vecload")])
4152
4153;; SImode is not currently allowed in vector registers.  This pattern
4154;; allows us to use direct move to get the value in a vector register
4155;; so that we can use XXSPLTW
4156(define_insn "vsx_splat_v4si_di"
4157  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4158	(vec_duplicate:V4SI
4159	 (truncate:SI
4160	  (match_operand:DI 1 "gpc_reg_operand" "wa,r"))))]
4161  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4162  "@
4163   xxspltw %x0,%x1,1
4164   mtvsrws %x0,%1"
4165  [(set_attr "type" "vecperm")
4166   (set_attr "isa" "p8v,*")])
4167
4168;; V4SF splat (ISA 3.0)
4169(define_insn_and_split "vsx_splat_v4sf"
4170  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4171	(vec_duplicate:V4SF
4172	 (match_operand:SF 1 "splat_input_operand" "Z,wa,r")))]
4173  "TARGET_P9_VECTOR"
4174  "@
4175   lxvwsx %x0,%y1
4176   #
4177   mtvsrws %x0,%1"
4178  "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4179  [(set (match_dup 0)
4180	(unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4181   (set (match_dup 0)
4182	(unspec:V4SF [(match_dup 0)
4183		      (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4184  ""
4185  [(set_attr "type" "vecload,vecperm,mftgpr")
4186   (set_attr "length" "*,8,*")
4187   (set_attr "isa" "*,p8v,*")])
4188
4189;; V4SF/V4SI splat from a vector element
4190(define_insn "vsx_xxspltw_<mode>"
4191  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4192	(vec_duplicate:VSX_W
4193	 (vec_select:<VS_scalar>
4194	  (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4195	  (parallel
4196	   [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4197  "VECTOR_MEM_VSX_P (<MODE>mode)"
4198{
4199  if (!BYTES_BIG_ENDIAN)
4200    operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4201
4202  return "xxspltw %x0,%x1,%2";
4203}
4204  [(set_attr "type" "vecperm")])
4205
4206(define_insn "vsx_xxspltw_<mode>_direct"
4207  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4208        (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wa")
4209                       (match_operand:QI 2 "u5bit_cint_operand" "i")]
4210                      UNSPEC_VSX_XXSPLTW))]
4211  "VECTOR_MEM_VSX_P (<MODE>mode)"
4212  "xxspltw %x0,%x1,%2"
4213  [(set_attr "type" "vecperm")])
4214
4215;; V16QI/V8HI splat support on ISA 2.07
4216(define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4217  [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4218	(vec_duplicate:VSX_SPLAT_I
4219	 (truncate:<VS_scalar>
4220	  (match_operand:DI 1 "altivec_register_operand" "v"))))]
4221  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4222  "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4223  [(set_attr "type" "vecperm")])
4224
4225;; V2DF/V2DI splat for use by vec_splat builtin
4226(define_insn "vsx_xxspltd_<mode>"
4227  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4228        (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4229	               (match_operand:QI 2 "u5bit_cint_operand" "i")]
4230                      UNSPEC_VSX_XXSPLTD))]
4231  "VECTOR_MEM_VSX_P (<MODE>mode)"
4232{
4233  if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
4234      || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
4235    return "xxpermdi %x0,%x1,%x1,0";
4236  else
4237    return "xxpermdi %x0,%x1,%x1,3";
4238}
4239  [(set_attr "type" "vecperm")])
4240
4241;; V4SF/V4SI interleave
4242(define_insn "vsx_xxmrghw_<mode>"
4243  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4244        (vec_select:VSX_W
4245	  (vec_concat:<VS_double>
4246	    (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4247	    (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4248	  (parallel [(const_int 0) (const_int 4)
4249		     (const_int 1) (const_int 5)])))]
4250  "VECTOR_MEM_VSX_P (<MODE>mode)"
4251{
4252  if (BYTES_BIG_ENDIAN)
4253    return "xxmrghw %x0,%x1,%x2";
4254  else
4255    return "xxmrglw %x0,%x2,%x1";
4256}
4257  [(set_attr "type" "vecperm")])
4258
4259(define_insn "vsx_xxmrglw_<mode>"
4260  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4261	(vec_select:VSX_W
4262	  (vec_concat:<VS_double>
4263	    (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4264	    (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4265	  (parallel [(const_int 2) (const_int 6)
4266		     (const_int 3) (const_int 7)])))]
4267  "VECTOR_MEM_VSX_P (<MODE>mode)"
4268{
4269  if (BYTES_BIG_ENDIAN)
4270    return "xxmrglw %x0,%x1,%x2";
4271  else
4272    return "xxmrghw %x0,%x2,%x1";
4273}
4274  [(set_attr "type" "vecperm")])
4275
4276;; Shift left double by word immediate
4277(define_insn "vsx_xxsldwi_<mode>"
4278  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa")
4279	(unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa")
4280		       (match_operand:VSX_L 2 "vsx_register_operand" "wa")
4281		       (match_operand:QI 3 "u5bit_cint_operand" "i")]
4282		      UNSPEC_VSX_SLDWI))]
4283  "VECTOR_MEM_VSX_P (<MODE>mode)"
4284  "xxsldwi %x0,%x1,%x2,%3"
4285  [(set_attr "type" "vecperm")
4286   (set_attr "isa" "<VSisa>")])
4287
4288
4289;; Vector reduction insns and splitters
4290
4291(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4292  [(set (match_operand:V2DF 0 "vfloat_operand" "=&wa,wa")
4293	(VEC_reduc:V2DF
4294	 (vec_concat:V2DF
4295	  (vec_select:DF
4296	   (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4297	   (parallel [(const_int 1)]))
4298	  (vec_select:DF
4299	   (match_dup 1)
4300	   (parallel [(const_int 0)])))
4301	 (match_dup 1)))
4302   (clobber (match_scratch:V2DF 2 "=0,&wa"))]
4303  "VECTOR_UNIT_VSX_P (V2DFmode)"
4304  "#"
4305  ""
4306  [(const_int 0)]
4307{
4308  rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4309	     ? gen_reg_rtx (V2DFmode)
4310	     : operands[2];
4311  emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4312  emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4313  DONE;
4314}
4315  [(set_attr "length" "8")
4316   (set_attr "type" "veccomplex")])
4317
4318(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4319  [(set (match_operand:V4SF 0 "vfloat_operand" "=wa")
4320	(VEC_reduc:V4SF
4321	 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4322	 (match_operand:V4SF 1 "vfloat_operand" "wa")))
4323   (clobber (match_scratch:V4SF 2 "=&wa"))
4324   (clobber (match_scratch:V4SF 3 "=&wa"))]
4325  "VECTOR_UNIT_VSX_P (V4SFmode)"
4326  "#"
4327  ""
4328  [(const_int 0)]
4329{
4330  rtx op0 = operands[0];
4331  rtx op1 = operands[1];
4332  rtx tmp2, tmp3, tmp4;
4333
4334  if (can_create_pseudo_p ())
4335    {
4336      tmp2 = gen_reg_rtx (V4SFmode);
4337      tmp3 = gen_reg_rtx (V4SFmode);
4338      tmp4 = gen_reg_rtx (V4SFmode);
4339    }
4340  else
4341    {
4342      tmp2 = operands[2];
4343      tmp3 = operands[3];
4344      tmp4 = tmp2;
4345    }
4346
4347  emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4348  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4349  emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4350  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4351  DONE;
4352}
4353  [(set_attr "length" "16")
4354   (set_attr "type" "veccomplex")])
4355
4356;; Combiner patterns with the vector reduction patterns that knows we can get
4357;; to the top element of the V2DF array without doing an extract.
4358
4359(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4360  [(set (match_operand:DF 0 "vfloat_operand" "=&wa,wa")
4361	(vec_select:DF
4362	 (VEC_reduc:V2DF
4363	  (vec_concat:V2DF
4364	   (vec_select:DF
4365	    (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4366	    (parallel [(const_int 1)]))
4367	   (vec_select:DF
4368	    (match_dup 1)
4369	    (parallel [(const_int 0)])))
4370	  (match_dup 1))
4371	 (parallel [(const_int 1)])))
4372   (clobber (match_scratch:DF 2 "=0,&wa"))]
4373  "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)"
4374  "#"
4375  ""
4376  [(const_int 0)]
4377{
4378  rtx hi = gen_highpart (DFmode, operands[1]);
4379  rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4380	    ? gen_reg_rtx (DFmode)
4381	    : operands[2];
4382
4383  emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4384  emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4385  DONE;
4386}
4387  [(set_attr "length" "8")
4388   (set_attr "type" "veccomplex")])
4389
4390(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4391  [(set (match_operand:SF 0 "vfloat_operand" "=f")
4392	(vec_select:SF
4393	 (VEC_reduc:V4SF
4394	  (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4395	  (match_operand:V4SF 1 "vfloat_operand" "wa"))
4396	 (parallel [(const_int 3)])))
4397   (clobber (match_scratch:V4SF 2 "=&wa"))
4398   (clobber (match_scratch:V4SF 3 "=&wa"))
4399   (clobber (match_scratch:V4SF 4 "=0"))]
4400  "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)"
4401  "#"
4402  ""
4403  [(const_int 0)]
4404{
4405  rtx op0 = operands[0];
4406  rtx op1 = operands[1];
4407  rtx tmp2, tmp3, tmp4, tmp5;
4408
4409  if (can_create_pseudo_p ())
4410    {
4411      tmp2 = gen_reg_rtx (V4SFmode);
4412      tmp3 = gen_reg_rtx (V4SFmode);
4413      tmp4 = gen_reg_rtx (V4SFmode);
4414      tmp5 = gen_reg_rtx (V4SFmode);
4415    }
4416  else
4417    {
4418      tmp2 = operands[2];
4419      tmp3 = operands[3];
4420      tmp4 = tmp2;
4421      tmp5 = operands[4];
4422    }
4423
4424  emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4425  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4426  emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4427  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4428  emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4429  DONE;
4430}
4431  [(set_attr "length" "20")
4432   (set_attr "type" "veccomplex")])
4433
4434
4435;; Power8 Vector fusion.  The fused ops must be physically adjacent.
4436(define_peephole
4437  [(set (match_operand:P 0 "base_reg_operand")
4438	(match_operand:P 1 "short_cint_operand"))
4439   (set (match_operand:VSX_M 2 "vsx_register_operand")
4440	(mem:VSX_M (plus:P (match_dup 0)
4441			   (match_operand:P 3 "int_reg_operand"))))]
4442  "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4443  "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4444  [(set_attr "length" "8")
4445   (set_attr "type" "vecload")])
4446
4447(define_peephole
4448  [(set (match_operand:P 0 "base_reg_operand")
4449	(match_operand:P 1 "short_cint_operand"))
4450   (set (match_operand:VSX_M 2 "vsx_register_operand")
4451	(mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
4452			   (match_dup 0))))]
4453  "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4454  "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4455  [(set_attr "length" "8")
4456   (set_attr "type" "vecload")])
4457
4458
4459;; ISA 3.0 vector extend sign support
4460
4461(define_insn "vsx_sign_extend_qi_<mode>"
4462  [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4463	(unspec:VSINT_84
4464	 [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4465	 UNSPEC_VSX_SIGN_EXTEND))]
4466  "TARGET_P9_VECTOR"
4467  "vextsb2<wd> %0,%1"
4468  [(set_attr "type" "vecexts")])
4469
4470(define_insn "vsx_sign_extend_hi_<mode>"
4471  [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4472	(unspec:VSINT_84
4473	 [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4474	 UNSPEC_VSX_SIGN_EXTEND))]
4475  "TARGET_P9_VECTOR"
4476  "vextsh2<wd> %0,%1"
4477  [(set_attr "type" "vecexts")])
4478
4479(define_insn "*vsx_sign_extend_si_v2di"
4480  [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4481	(unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4482		     UNSPEC_VSX_SIGN_EXTEND))]
4483  "TARGET_P9_VECTOR"
4484  "vextsw2d %0,%1"
4485  [(set_attr "type" "vecexts")])
4486
4487
4488;; ISA 3.0 Binary Floating-Point Support
4489
4490;; VSX Scalar Extract Exponent Quad-Precision
4491(define_insn "xsxexpqp_<mode>"
4492  [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4493	(unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4494	 UNSPEC_VSX_SXEXPDP))]
4495  "TARGET_P9_VECTOR"
4496  "xsxexpqp %0,%1"
4497  [(set_attr "type" "vecmove")])
4498
4499;; VSX Scalar Extract Exponent Double-Precision
4500(define_insn "xsxexpdp"
4501  [(set (match_operand:DI 0 "register_operand" "=r")
4502	(unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4503	 UNSPEC_VSX_SXEXPDP))]
4504  "TARGET_P9_VECTOR && TARGET_64BIT"
4505  "xsxexpdp %0,%x1"
4506  [(set_attr "type" "integer")])
4507
4508;; VSX Scalar Extract Significand Quad-Precision
4509(define_insn "xsxsigqp_<mode>"
4510  [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4511	(unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4512	 UNSPEC_VSX_SXSIG))]
4513  "TARGET_P9_VECTOR"
4514  "xsxsigqp %0,%1"
4515  [(set_attr "type" "vecmove")])
4516
4517;; VSX Scalar Extract Significand Double-Precision
4518(define_insn "xsxsigdp"
4519  [(set (match_operand:DI 0 "register_operand" "=r")
4520	(unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4521	 UNSPEC_VSX_SXSIG))]
4522  "TARGET_P9_VECTOR && TARGET_64BIT"
4523  "xsxsigdp %0,%x1"
4524  [(set_attr "type" "integer")])
4525
4526;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4527(define_insn "xsiexpqpf_<mode>"
4528  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4529	(unspec:IEEE128
4530	 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4531	  (match_operand:DI 2 "altivec_register_operand" "v")]
4532	 UNSPEC_VSX_SIEXPQP))]
4533  "TARGET_P9_VECTOR"
4534  "xsiexpqp %0,%1,%2"
4535  [(set_attr "type" "vecmove")])
4536
4537;; VSX Scalar Insert Exponent Quad-Precision
4538(define_insn "xsiexpqp_<mode>"
4539  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4540	(unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4541			 (match_operand:DI 2 "altivec_register_operand" "v")]
4542	 UNSPEC_VSX_SIEXPQP))]
4543  "TARGET_P9_VECTOR"
4544  "xsiexpqp %0,%1,%2"
4545  [(set_attr "type" "vecmove")])
4546
4547;; VSX Scalar Insert Exponent Double-Precision
4548(define_insn "xsiexpdp"
4549  [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4550	(unspec:DF [(match_operand:DI 1 "register_operand" "r")
4551		    (match_operand:DI 2 "register_operand" "r")]
4552	 UNSPEC_VSX_SIEXPDP))]
4553  "TARGET_P9_VECTOR && TARGET_64BIT"
4554  "xsiexpdp %x0,%1,%2"
4555  [(set_attr "type" "fpsimple")])
4556
4557;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4558(define_insn "xsiexpdpf"
4559  [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4560	(unspec:DF [(match_operand:DF 1 "register_operand" "r")
4561		    (match_operand:DI 2 "register_operand" "r")]
4562	 UNSPEC_VSX_SIEXPDP))]
4563  "TARGET_P9_VECTOR && TARGET_64BIT"
4564  "xsiexpdp %x0,%1,%2"
4565  [(set_attr "type" "fpsimple")])
4566
4567;; VSX Scalar Compare Exponents Double-Precision
4568(define_expand "xscmpexpdp_<code>"
4569  [(set (match_dup 3)
4570	(compare:CCFP
4571	 (unspec:DF
4572	  [(match_operand:DF 1 "vsx_register_operand" "wa")
4573	   (match_operand:DF 2 "vsx_register_operand" "wa")]
4574	  UNSPEC_VSX_SCMPEXPDP)
4575	 (const_int 0)))
4576   (set (match_operand:SI 0 "register_operand" "=r")
4577	(CMP_TEST:SI (match_dup 3)
4578		     (const_int 0)))]
4579  "TARGET_P9_VECTOR"
4580{
4581  if (<CODE> == UNORDERED && !HONOR_NANS (DFmode))
4582    {
4583      emit_move_insn (operands[0], const0_rtx);
4584      DONE;
4585    }
4586
4587  operands[3] = gen_reg_rtx (CCFPmode);
4588})
4589
4590(define_insn "*xscmpexpdp"
4591  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4592	(compare:CCFP
4593	 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4594		     (match_operand:DF 2 "vsx_register_operand" "wa")]
4595	  UNSPEC_VSX_SCMPEXPDP)
4596	 (match_operand:SI 3 "zero_constant" "j")))]
4597  "TARGET_P9_VECTOR"
4598  "xscmpexpdp %0,%x1,%x2"
4599  [(set_attr "type" "fpcompare")])
4600
4601;; VSX Scalar Compare Exponents Quad-Precision
4602(define_expand "xscmpexpqp_<code>_<mode>"
4603  [(set (match_dup 3)
4604	(compare:CCFP
4605	 (unspec:IEEE128
4606	  [(match_operand:IEEE128 1 "vsx_register_operand" "v")
4607	   (match_operand:IEEE128 2 "vsx_register_operand" "v")]
4608	  UNSPEC_VSX_SCMPEXPQP)
4609	 (const_int 0)))
4610   (set (match_operand:SI 0 "register_operand" "=r")
4611	(CMP_TEST:SI (match_dup 3)
4612		     (const_int 0)))]
4613  "TARGET_P9_VECTOR"
4614{
4615  if (<CODE> == UNORDERED && !HONOR_NANS (<MODE>mode))
4616    {
4617      emit_move_insn (operands[0], const0_rtx);
4618      DONE;
4619    }
4620
4621  operands[3] = gen_reg_rtx (CCFPmode);
4622})
4623
4624(define_insn "*xscmpexpqp"
4625  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4626	(compare:CCFP
4627	 (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4628		          (match_operand:IEEE128 2 "altivec_register_operand" "v")]
4629	  UNSPEC_VSX_SCMPEXPQP)
4630	 (match_operand:SI 3 "zero_constant" "j")))]
4631  "TARGET_P9_VECTOR"
4632  "xscmpexpqp %0,%1,%2"
4633  [(set_attr "type" "fpcompare")])
4634
4635;; VSX Scalar Test Data Class Quad-Precision
4636;;  (Expansion for scalar_test_data_class (__ieee128, int))
4637;;   (Has side effect of setting the lt bit if operand 1 is negative,
4638;;    setting the eq bit if any of the conditions tested by operand 2
4639;;    are satisfied, and clearing the gt and undordered bits to zero.)
4640(define_expand "xststdcqp_<mode>"
4641  [(set (match_dup 3)
4642	(compare:CCFP
4643	 (unspec:IEEE128
4644	  [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4645	   (match_operand:SI 2 "u7bit_cint_operand" "n")]
4646	  UNSPEC_VSX_STSTDC)
4647	 (const_int 0)))
4648   (set (match_operand:SI 0 "register_operand" "=r")
4649	(eq:SI (match_dup 3)
4650	       (const_int 0)))]
4651  "TARGET_P9_VECTOR"
4652{
4653  operands[3] = gen_reg_rtx (CCFPmode);
4654})
4655
4656;; VSX Scalar Test Data Class Double- and Single-Precision
4657;;  (The lt bit is set if operand 1 is negative.  The eq bit is set
4658;;   if any of the conditions tested by operand 2 are satisfied.
4659;;   The gt and unordered bits are cleared to zero.)
4660(define_expand "xststdc<sd>p"
4661  [(set (match_dup 3)
4662	(compare:CCFP
4663	 (unspec:SFDF
4664	  [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4665	   (match_operand:SI 2 "u7bit_cint_operand" "n")]
4666	  UNSPEC_VSX_STSTDC)
4667	 (match_dup 4)))
4668   (set (match_operand:SI 0 "register_operand" "=r")
4669	(eq:SI (match_dup 3)
4670	       (const_int 0)))]
4671  "TARGET_P9_VECTOR"
4672{
4673  operands[3] = gen_reg_rtx (CCFPmode);
4674  operands[4] = CONST0_RTX (SImode);
4675})
4676
4677;; The VSX Scalar Test Negative Quad-Precision
4678(define_expand "xststdcnegqp_<mode>"
4679  [(set (match_dup 2)
4680	(compare:CCFP
4681	 (unspec:IEEE128
4682	  [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4683	   (const_int 0)]
4684	  UNSPEC_VSX_STSTDC)
4685	 (const_int 0)))
4686   (set (match_operand:SI 0 "register_operand" "=r")
4687	(lt:SI (match_dup 2)
4688	       (const_int 0)))]
4689  "TARGET_P9_VECTOR"
4690{
4691  operands[2] = gen_reg_rtx (CCFPmode);
4692})
4693
4694;; The VSX Scalar Test Negative Double- and Single-Precision
4695(define_expand "xststdcneg<sd>p"
4696  [(set (match_dup 2)
4697	(compare:CCFP
4698	 (unspec:SFDF
4699	  [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4700	   (const_int 0)]
4701	  UNSPEC_VSX_STSTDC)
4702	 (match_dup 3)))
4703   (set (match_operand:SI 0 "register_operand" "=r")
4704	(lt:SI (match_dup 2)
4705	       (const_int 0)))]
4706  "TARGET_P9_VECTOR"
4707{
4708  operands[2] = gen_reg_rtx (CCFPmode);
4709  operands[3] = CONST0_RTX (SImode);
4710})
4711
4712(define_insn "*xststdcqp_<mode>"
4713  [(set (match_operand:CCFP 0 "" "=y")
4714	(compare:CCFP
4715	 (unspec:IEEE128
4716	  [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4717	   (match_operand:SI 2 "u7bit_cint_operand" "n")]
4718	  UNSPEC_VSX_STSTDC)
4719	 (const_int 0)))]
4720  "TARGET_P9_VECTOR"
4721  "xststdcqp %0,%1,%2"
4722  [(set_attr "type" "fpcompare")])
4723
4724(define_insn "*xststdc<sd>p"
4725  [(set (match_operand:CCFP 0 "" "=y")
4726	(compare:CCFP
4727	 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4728		       (match_operand:SI 2 "u7bit_cint_operand" "n")]
4729	  UNSPEC_VSX_STSTDC)
4730	 (match_operand:SI 3 "zero_constant" "j")))]
4731  "TARGET_P9_VECTOR"
4732  "xststdc<sd>p %0,%x1,%2"
4733  [(set_attr "type" "fpcompare")])
4734
4735;; VSX Vector Extract Exponent Double and Single Precision
4736(define_insn "xvxexp<sd>p"
4737  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4738	(unspec:VSX_F
4739	 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4740	 UNSPEC_VSX_VXEXP))]
4741  "TARGET_P9_VECTOR"
4742  "xvxexp<sd>p %x0,%x1"
4743  [(set_attr "type" "vecsimple")])
4744
4745;; VSX Vector Extract Significand Double and Single Precision
4746(define_insn "xvxsig<sd>p"
4747  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4748	(unspec:VSX_F
4749	 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4750	 UNSPEC_VSX_VXSIG))]
4751  "TARGET_P9_VECTOR"
4752  "xvxsig<sd>p %x0,%x1"
4753  [(set_attr "type" "vecsimple")])
4754
4755;; VSX Vector Insert Exponent Double and Single Precision
4756(define_insn "xviexp<sd>p"
4757  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4758	(unspec:VSX_F
4759	 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4760	  (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
4761	 UNSPEC_VSX_VIEXP))]
4762  "TARGET_P9_VECTOR"
4763  "xviexp<sd>p %x0,%x1,%x2"
4764  [(set_attr "type" "vecsimple")])
4765
4766;; VSX Vector Test Data Class Double and Single Precision
4767;; The corresponding elements of the result vector are all ones
4768;; if any of the conditions tested by operand 3 are satisfied.
4769(define_insn "xvtstdc<sd>p"
4770  [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
4771	(unspec:<VSI>
4772	 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4773	  (match_operand:SI 2 "u7bit_cint_operand" "n")]
4774	 UNSPEC_VSX_VTSTDC))]
4775  "TARGET_P9_VECTOR"
4776  "xvtstdc<sd>p %x0,%x1,%2"
4777  [(set_attr "type" "vecsimple")])
4778
4779;; ISA 3.0 String Operations Support
4780
4781;; Compare vectors producing a vector result and a predicate, setting CR6
4782;; to indicate a combined status.  This pattern matches v16qi, v8hi, and
4783;; v4si modes.  It does not match v2df, v4sf, or v2di modes.  There's no
4784;; need to match v4sf, v2df, or v2di modes because those are expanded
4785;; to use Power8 instructions.
4786(define_insn "*vsx_ne_<mode>_p"
4787  [(set (reg:CC CR6_REGNO)
4788	(unspec:CC
4789	 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4790		 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
4791	 UNSPEC_PREDICATE))
4792   (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
4793	(ne:VSX_EXTRACT_I (match_dup 1)
4794			  (match_dup 2)))]
4795  "TARGET_P9_VECTOR"
4796  "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4797  [(set_attr "type" "vecsimple")])
4798
4799(define_insn "*vector_nez_<mode>_p"
4800  [(set (reg:CC CR6_REGNO)
4801	(unspec:CC [(unspec:VI
4802		     [(match_operand:VI 1 "gpc_reg_operand" "v")
4803		      (match_operand:VI 2 "gpc_reg_operand" "v")]
4804		     UNSPEC_NEZ_P)]
4805	 UNSPEC_PREDICATE))
4806   (set (match_operand:VI 0 "gpc_reg_operand" "=v")
4807	(unspec:VI [(match_dup 1)
4808		    (match_dup 2)]
4809	 UNSPEC_NEZ_P))]
4810  "TARGET_P9_VECTOR"
4811  "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4812  [(set_attr "type" "vecsimple")])
4813
4814;; Return first position of match between vectors using natural order
4815;; for both LE and BE execution modes.
4816(define_expand "first_match_index_<mode>"
4817  [(match_operand:SI 0 "register_operand")
4818   (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4819	       (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4820  UNSPEC_VSX_FIRST_MATCH_INDEX)]
4821  "TARGET_P9_VECTOR"
4822{
4823  int sh;
4824
4825  rtx cmp_result = gen_reg_rtx (<MODE>mode);
4826  rtx not_result = gen_reg_rtx (<MODE>mode);
4827
4828  emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4829					     operands[2]));
4830  emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
4831
4832  sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4833
4834  if (<MODE>mode == V16QImode)
4835    {
4836      if (!BYTES_BIG_ENDIAN)
4837        emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
4838      else
4839        emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result));
4840    }
4841  else
4842    {
4843      rtx tmp = gen_reg_rtx (SImode);
4844      if (!BYTES_BIG_ENDIAN)
4845        emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
4846      else
4847        emit_insn (gen_vclzlsbb_<mode> (tmp, not_result));
4848      emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4849    }
4850  DONE;
4851})
4852
4853;; Return first position of match between vectors or end of string (EOS) using
4854;; natural element order for both LE and BE execution modes.
4855(define_expand "first_match_or_eos_index_<mode>"
4856  [(match_operand:SI 0 "register_operand")
4857   (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4858   (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4859  UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
4860  "TARGET_P9_VECTOR"
4861{
4862  int sh;
4863  rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4864  rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4865  rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4866  rtx and_result = gen_reg_rtx (<MODE>mode);
4867  rtx result = gen_reg_rtx (<MODE>mode);
4868  rtx vzero = gen_reg_rtx (<MODE>mode);
4869
4870  /* Vector with zeros in elements that correspond to zeros in operands.  */
4871  emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4872  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4873  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4874  emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4875
4876  /* Vector with ones in elments that do not match.  */
4877  emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4878                                             operands[2]));
4879
4880  /* Create vector with ones in elements where there was a zero in one of
4881     the source elements or the elements that match.  */
4882  emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
4883  sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4884
4885  if (<MODE>mode == V16QImode)
4886    {
4887      if (!BYTES_BIG_ENDIAN)
4888        emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4889      else
4890        emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
4891    }
4892  else
4893    {
4894      rtx tmp = gen_reg_rtx (SImode);
4895      if (!BYTES_BIG_ENDIAN)
4896        emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4897      else
4898        emit_insn (gen_vclzlsbb_<mode> (tmp, result));
4899      emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4900    }
4901  DONE;
4902})
4903
4904;; Return first position of mismatch between vectors using natural
4905;; element order for both LE and BE execution modes.
4906(define_expand "first_mismatch_index_<mode>"
4907  [(match_operand:SI 0 "register_operand")
4908   (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4909   (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4910  UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
4911  "TARGET_P9_VECTOR"
4912{
4913  int sh;
4914  rtx cmp_result = gen_reg_rtx (<MODE>mode);
4915
4916  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4917					    operands[2]));
4918  sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4919
4920  if (<MODE>mode == V16QImode)
4921    {
4922      if (!BYTES_BIG_ENDIAN)
4923        emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
4924      else
4925        emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result));
4926    }
4927  else
4928    {
4929      rtx tmp = gen_reg_rtx (SImode);
4930      if (!BYTES_BIG_ENDIAN)
4931        emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
4932      else
4933        emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result));
4934      emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4935    }
4936  DONE;
4937})
4938
4939;; Return first position of mismatch between vectors or end of string (EOS)
4940;; using natural element order for both LE and BE execution modes.
4941(define_expand "first_mismatch_or_eos_index_<mode>"
4942  [(match_operand:SI 0 "register_operand")
4943   (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4944   (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4945  UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
4946  "TARGET_P9_VECTOR"
4947{
4948  int sh;
4949  rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4950  rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4951  rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4952  rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
4953  rtx and_result = gen_reg_rtx (<MODE>mode);
4954  rtx result = gen_reg_rtx (<MODE>mode);
4955  rtx vzero = gen_reg_rtx (<MODE>mode);
4956
4957  /* Vector with zeros in elements that correspond to zeros in operands.  */
4958  emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4959
4960  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4961  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4962  emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4963
4964  /* Vector with ones in elments that match.  */
4965  emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4966                                             operands[2]));
4967  emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
4968
4969  /* Create vector with ones in elements where there was a zero in one of
4970     the source elements or the elements did not match.  */
4971  emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
4972  sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4973
4974  if (<MODE>mode == V16QImode)
4975    {
4976      if (!BYTES_BIG_ENDIAN)
4977        emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4978      else
4979        emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
4980    }
4981  else
4982    {
4983      rtx tmp = gen_reg_rtx (SImode);
4984      if (!BYTES_BIG_ENDIAN)
4985        emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4986      else
4987        emit_insn (gen_vclzlsbb_<mode> (tmp, result));
4988      emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4989    }
4990  DONE;
4991})
4992
4993;; Load VSX Vector with Length
4994(define_expand "lxvl"
4995  [(set (match_dup 3)
4996        (ashift:DI (match_operand:DI 2 "register_operand")
4997                   (const_int 56)))
4998   (set (match_operand:V16QI 0 "vsx_register_operand")
4999	(unspec:V16QI
5000	 [(match_operand:DI 1 "gpc_reg_operand")
5001          (mem:V16QI (match_dup 1))
5002	  (match_dup 3)]
5003	 UNSPEC_LXVL))]
5004  "TARGET_P9_VECTOR && TARGET_64BIT"
5005{
5006  operands[3] = gen_reg_rtx (DImode);
5007})
5008
5009(define_insn "*lxvl"
5010  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5011	(unspec:V16QI
5012	 [(match_operand:DI 1 "gpc_reg_operand" "b")
5013	  (mem:V16QI (match_dup 1))
5014	  (match_operand:DI 2 "register_operand" "r")]
5015	 UNSPEC_LXVL))]
5016  "TARGET_P9_VECTOR && TARGET_64BIT"
5017  "lxvl %x0,%1,%2"
5018  [(set_attr "type" "vecload")])
5019
5020(define_insn "lxvll"
5021  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5022	(unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
5023                       (mem:V16QI (match_dup 1))
5024		       (match_operand:DI 2 "register_operand" "r")]
5025		      UNSPEC_LXVLL))]
5026  "TARGET_P9_VECTOR"
5027  "lxvll %x0,%1,%2"
5028  [(set_attr "type" "vecload")])
5029
5030;; Expand for builtin xl_len_r
5031(define_expand "xl_len_r"
5032  [(match_operand:V16QI 0 "vsx_register_operand")
5033   (match_operand:DI 1 "register_operand")
5034   (match_operand:DI 2 "register_operand")]
5035  ""
5036{
5037  rtx shift_mask = gen_reg_rtx (V16QImode);
5038  rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5039  rtx tmp = gen_reg_rtx (DImode);
5040
5041  emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
5042  emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5043  emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
5044  emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
5045	     shift_mask));
5046  DONE;
5047})
5048
5049(define_insn "stxvll"
5050  [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5051	(unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5052		       (mem:V16QI (match_dup 1))
5053		       (match_operand:DI 2 "register_operand" "r")]
5054	              UNSPEC_STXVLL))]
5055  "TARGET_P9_VECTOR"
5056  "stxvll %x0,%1,%2"
5057  [(set_attr "type" "vecstore")])
5058
5059;; Store VSX Vector with Length
5060(define_expand "stxvl"
5061  [(set (match_dup 3)
5062	(ashift:DI (match_operand:DI 2 "register_operand")
5063		   (const_int 56)))
5064   (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
5065	(unspec:V16QI
5066	 [(match_operand:V16QI 0 "vsx_register_operand")
5067	  (mem:V16QI (match_dup 1))
5068	  (match_dup 3)]
5069	 UNSPEC_STXVL))]
5070  "TARGET_P9_VECTOR && TARGET_64BIT"
5071{
5072  operands[3] = gen_reg_rtx (DImode);
5073})
5074
5075(define_insn "*stxvl"
5076  [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5077	(unspec:V16QI
5078	 [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5079	  (mem:V16QI (match_dup 1))
5080	  (match_operand:DI 2 "register_operand" "r")]
5081	 UNSPEC_STXVL))]
5082  "TARGET_P9_VECTOR && TARGET_64BIT"
5083  "stxvl %x0,%1,%2"
5084  [(set_attr "type" "vecstore")])
5085
5086;; Expand for builtin xst_len_r
5087(define_expand "xst_len_r"
5088  [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5089   (match_operand:DI 1 "register_operand" "b")
5090   (match_operand:DI 2 "register_operand" "r")]
5091  "UNSPEC_XST_LEN_R"
5092{
5093  rtx shift_mask = gen_reg_rtx (V16QImode);
5094  rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5095  rtx tmp = gen_reg_rtx (DImode);
5096
5097  emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
5098  emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5099	     shift_mask));
5100  emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5101  emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5102  DONE;
5103})
5104
5105;; Vector Compare Not Equal Byte (specified/not+eq:)
5106(define_insn "vcmpneb"
5107  [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5108	 (not:V16QI
5109	   (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5110		     (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5111  "TARGET_P9_VECTOR"
5112  "vcmpneb %0,%1,%2"
5113  [(set_attr "type" "vecsimple")])
5114
5115;; Vector Compare Not Equal or Zero Byte
5116(define_insn "vcmpnezb"
5117  [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5118	(unspec:V16QI
5119	 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5120	  (match_operand:V16QI 2 "altivec_register_operand" "v")]
5121	 UNSPEC_VCMPNEZB))]
5122  "TARGET_P9_VECTOR"
5123  "vcmpnezb %0,%1,%2"
5124  [(set_attr "type" "vecsimple")])
5125
5126;; Vector Compare Not Equal or Zero Byte predicate or record-form
5127(define_insn "vcmpnezb_p"
5128  [(set (reg:CC CR6_REGNO)
5129	(unspec:CC
5130	 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5131	  (match_operand:V16QI 2 "altivec_register_operand" "v")]
5132	 UNSPEC_VCMPNEZB))
5133   (set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5134	(unspec:V16QI
5135	 [(match_dup 1)
5136	  (match_dup 2)]
5137	 UNSPEC_VCMPNEZB))]
5138  "TARGET_P9_VECTOR"
5139  "vcmpnezb. %0,%1,%2"
5140  [(set_attr "type" "vecsimple")])
5141
5142;; Vector Compare Not Equal Half Word (specified/not+eq:)
5143(define_insn "vcmpneh"
5144  [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5145	(not:V8HI
5146	  (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5147		   (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5148  "TARGET_P9_VECTOR"
5149  "vcmpneh %0,%1,%2"
5150  [(set_attr "type" "vecsimple")])
5151
5152;; Vector Compare Not Equal or Zero Half Word
5153(define_insn "vcmpnezh"
5154  [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5155	(unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5156		      (match_operand:V8HI 2 "altivec_register_operand" "v")]
5157	 UNSPEC_VCMPNEZH))]
5158  "TARGET_P9_VECTOR"
5159  "vcmpnezh %0,%1,%2"
5160  [(set_attr "type" "vecsimple")])
5161
5162;; Vector Compare Not Equal Word (specified/not+eq:)
5163(define_insn "vcmpnew"
5164  [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5165	(not:V4SI
5166	  (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5167		   (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5168  "TARGET_P9_VECTOR"
5169  "vcmpnew %0,%1,%2"
5170  [(set_attr "type" "vecsimple")])
5171
5172;; Vector Compare Not Equal or Zero Word
5173(define_insn "vcmpnezw"
5174  [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5175	(unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5176		      (match_operand:V4SI 2 "altivec_register_operand" "v")]
5177	 UNSPEC_VCMPNEZW))]
5178  "TARGET_P9_VECTOR"
5179  "vcmpnezw %0,%1,%2"
5180  [(set_attr "type" "vecsimple")])
5181
5182;; Vector Count Leading Zero Least-Significant Bits Byte
5183(define_insn "vclzlsbb_<mode>"
5184  [(set (match_operand:SI 0 "register_operand" "=r")
5185	(unspec:SI
5186	 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5187	 UNSPEC_VCLZLSBB))]
5188  "TARGET_P9_VECTOR"
5189  "vclzlsbb %0,%1"
5190  [(set_attr "type" "vecsimple")])
5191
5192;; Vector Count Trailing Zero Least-Significant Bits Byte
5193(define_insn "vctzlsbb_<mode>"
5194  [(set (match_operand:SI 0 "register_operand" "=r")
5195	(unspec:SI
5196	 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5197	 UNSPEC_VCTZLSBB))]
5198  "TARGET_P9_VECTOR"
5199  "vctzlsbb %0,%1"
5200  [(set_attr "type" "vecsimple")])
5201
5202;; Vector Extract Unsigned Byte Left-Indexed
5203(define_insn "vextublx"
5204  [(set (match_operand:SI 0 "register_operand" "=r")
5205	(unspec:SI
5206	 [(match_operand:SI 1 "register_operand" "r")
5207	  (match_operand:V16QI 2 "altivec_register_operand" "v")]
5208	 UNSPEC_VEXTUBLX))]
5209  "TARGET_P9_VECTOR"
5210  "vextublx %0,%1,%2"
5211  [(set_attr "type" "vecsimple")])
5212
5213;; Vector Extract Unsigned Byte Right-Indexed
5214(define_insn "vextubrx"
5215  [(set (match_operand:SI 0 "register_operand" "=r")
5216	(unspec:SI
5217	 [(match_operand:SI 1 "register_operand" "r")
5218	  (match_operand:V16QI 2 "altivec_register_operand" "v")]
5219	 UNSPEC_VEXTUBRX))]
5220  "TARGET_P9_VECTOR"
5221  "vextubrx %0,%1,%2"
5222  [(set_attr "type" "vecsimple")])
5223
5224;; Vector Extract Unsigned Half Word Left-Indexed
5225(define_insn "vextuhlx"
5226  [(set (match_operand:SI 0 "register_operand" "=r")
5227	(unspec:SI
5228	 [(match_operand:SI 1 "register_operand" "r")
5229	  (match_operand:V8HI 2 "altivec_register_operand" "v")]
5230	 UNSPEC_VEXTUHLX))]
5231  "TARGET_P9_VECTOR"
5232  "vextuhlx %0,%1,%2"
5233  [(set_attr "type" "vecsimple")])
5234
5235;; Vector Extract Unsigned Half Word Right-Indexed
5236(define_insn "vextuhrx"
5237  [(set (match_operand:SI 0 "register_operand" "=r")
5238	(unspec:SI
5239	 [(match_operand:SI 1 "register_operand" "r")
5240	  (match_operand:V8HI 2 "altivec_register_operand" "v")]
5241	 UNSPEC_VEXTUHRX))]
5242  "TARGET_P9_VECTOR"
5243  "vextuhrx %0,%1,%2"
5244  [(set_attr "type" "vecsimple")])
5245
5246;; Vector Extract Unsigned Word Left-Indexed
5247(define_insn "vextuwlx"
5248  [(set (match_operand:SI 0 "register_operand" "=r")
5249	(unspec:SI
5250	 [(match_operand:SI 1 "register_operand" "r")
5251	  (match_operand:V4SI 2 "altivec_register_operand" "v")]
5252	 UNSPEC_VEXTUWLX))]
5253  "TARGET_P9_VECTOR"
5254  "vextuwlx %0,%1,%2"
5255  [(set_attr "type" "vecsimple")])
5256
5257;; Vector Extract Unsigned Word Right-Indexed
5258(define_insn "vextuwrx"
5259  [(set (match_operand:SI 0 "register_operand" "=r")
5260	(unspec:SI
5261	 [(match_operand:SI 1 "register_operand" "r")
5262	  (match_operand:V4SI 2 "altivec_register_operand" "v")]
5263	 UNSPEC_VEXTUWRX))]
5264  "TARGET_P9_VECTOR"
5265  "vextuwrx %0,%1,%2"
5266  [(set_attr "type" "vecsimple")])
5267
5268;; Vector insert/extract word at arbitrary byte values.  Note, the little
5269;; endian version needs to adjust the byte number, and the V4SI element in
5270;; vinsert4b.
5271(define_insn "extract4b"
5272  [(set (match_operand:V2DI 0 "vsx_register_operand")
5273       (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5274                     (match_operand:QI 2 "const_0_to_12_operand" "n")]
5275                    UNSPEC_XXEXTRACTUW))]
5276  "TARGET_P9_VECTOR"
5277{
5278  if (!BYTES_BIG_ENDIAN)
5279    operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5280
5281  return "xxextractuw %x0,%x1,%2";
5282})
5283
5284(define_expand "insert4b"
5285  [(set (match_operand:V16QI 0 "vsx_register_operand")
5286	(unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5287		       (match_operand:V16QI 2 "vsx_register_operand")
5288		       (match_operand:QI 3 "const_0_to_12_operand")]
5289		   UNSPEC_XXINSERTW))]
5290  "TARGET_P9_VECTOR"
5291{
5292  if (!BYTES_BIG_ENDIAN)
5293    {
5294      rtx op1 = operands[1];
5295      rtx v4si_tmp = gen_reg_rtx (V4SImode);
5296      emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5297      operands[1] = v4si_tmp;
5298      operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5299    }
5300})
5301
5302(define_insn "*insert4b_internal"
5303  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5304	(unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5305		       (match_operand:V16QI 2 "vsx_register_operand" "0")
5306		       (match_operand:QI 3 "const_0_to_12_operand" "n")]
5307		   UNSPEC_XXINSERTW))]
5308  "TARGET_P9_VECTOR"
5309  "xxinsertw %x0,%x1,%3"
5310  [(set_attr "type" "vecperm")])
5311
5312
5313;; Generate vector extract four float 32 values from left four elements
5314;; of eight element vector of float 16 values.
5315(define_expand "vextract_fp_from_shorth"
5316  [(set (match_operand:V4SF 0 "register_operand" "=wa")
5317	(unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5318   UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5319  "TARGET_P9_VECTOR"
5320{
5321  int i;
5322  int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5323  int vals_be[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5324
5325  rtx rvals[16];
5326  rtx mask = gen_reg_rtx (V16QImode);
5327  rtx tmp = gen_reg_rtx (V16QImode);
5328  rtvec v;
5329
5330  for (i = 0; i < 16; i++)
5331    if (!BYTES_BIG_ENDIAN)
5332      rvals[i] = GEN_INT (vals_le[i]);
5333    else
5334      rvals[i] = GEN_INT (vals_be[i]);
5335
5336  /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5337     inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5338     src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the
5339     conversion instruction.  */
5340  v = gen_rtvec_v (16, rvals);
5341  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5342  emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5343					  operands[1], mask));
5344  emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5345  DONE;
5346})
5347
5348;; Generate vector extract four float 32 values from right four elements
5349;; of eight element vector of float 16 values.
5350(define_expand "vextract_fp_from_shortl"
5351  [(set (match_operand:V4SF 0 "register_operand" "=wa")
5352	(unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5353	UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5354  "TARGET_P9_VECTOR"
5355{
5356  int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5357  int vals_be[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5358
5359  int i;
5360  rtx rvals[16];
5361  rtx mask = gen_reg_rtx (V16QImode);
5362  rtx tmp = gen_reg_rtx (V16QImode);
5363  rtvec v;
5364
5365  for (i = 0; i < 16; i++)
5366    if (!BYTES_BIG_ENDIAN)
5367      rvals[i] = GEN_INT (vals_le[i]);
5368    else
5369      rvals[i] = GEN_INT (vals_be[i]);
5370
5371  /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5372     inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5373     src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the
5374     conversion instruction.  */
5375  v = gen_rtvec_v (16, rvals);
5376  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5377  emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5378					  operands[1], mask));
5379  emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5380  DONE;
5381})
5382
5383;; Support for ISA 3.0 vector byte reverse
5384
5385;; Swap all bytes with in a vector
5386(define_insn "p9_xxbrq_v1ti"
5387  [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5388	(bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5389  "TARGET_P9_VECTOR"
5390  "xxbrq %x0,%x1"
5391  [(set_attr "type" "vecperm")])
5392
5393(define_expand "p9_xxbrq_v16qi"
5394  [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5395   (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5396  "TARGET_P9_VECTOR"
5397{
5398  rtx op0 = gen_reg_rtx (V1TImode);
5399  rtx op1 = gen_lowpart (V1TImode, operands[1]);
5400  emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5401  emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5402  DONE;
5403})
5404
5405;; Swap all bytes in each 64-bit element
5406(define_insn "p9_xxbrd_v2di"
5407  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5408	(bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5409  "TARGET_P9_VECTOR"
5410  "xxbrd %x0,%x1"
5411  [(set_attr "type" "vecperm")])
5412
5413(define_expand "p9_xxbrd_v2df"
5414  [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5415   (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5416  "TARGET_P9_VECTOR"
5417{
5418  rtx op0 = gen_reg_rtx (V2DImode);
5419  rtx op1 = gen_lowpart (V2DImode, operands[1]);
5420  emit_insn (gen_p9_xxbrd_v2di (op0, op1));
5421  emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
5422  DONE;
5423})
5424
5425;; Swap all bytes in each 32-bit element
5426(define_insn "p9_xxbrw_v4si"
5427  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
5428	(bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
5429  "TARGET_P9_VECTOR"
5430  "xxbrw %x0,%x1"
5431  [(set_attr "type" "vecperm")])
5432
5433(define_expand "p9_xxbrw_v4sf"
5434  [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
5435   (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
5436  "TARGET_P9_VECTOR"
5437{
5438  rtx op0 = gen_reg_rtx (V4SImode);
5439  rtx op1 = gen_lowpart (V4SImode, operands[1]);
5440  emit_insn (gen_p9_xxbrw_v4si (op0, op1));
5441  emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
5442  DONE;
5443})
5444
5445;; Swap all bytes in each element of vector
5446(define_expand "revb_<mode>"
5447  [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
5448   (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
5449  ""
5450{
5451  if (TARGET_P9_VECTOR)
5452    emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5453  else
5454    {
5455      /* Want to have the elements in reverse order relative
5456	 to the endian mode in use, i.e. in LE mode, put elements
5457	 in BE order.  */
5458      rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5459      emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5460					   operands[1], sel));
5461    }
5462
5463  DONE;
5464})
5465
5466;; Reversing bytes in vector char is just a NOP.
5467(define_expand "revb_v16qi"
5468  [(set (match_operand:V16QI 0 "vsx_register_operand")
5469	(bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5470  ""
5471{
5472  emit_move_insn (operands[0], operands[1]);
5473  DONE;
5474})
5475
5476;; Swap all bytes in each 16-bit element
5477(define_insn "p9_xxbrh_v8hi"
5478  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5479	(bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5480  "TARGET_P9_VECTOR"
5481  "xxbrh %x0,%x1"
5482  [(set_attr "type" "vecperm")])
5483
5484
5485;; Operand numbers for the following peephole2
5486(define_constants
5487  [(SFBOOL_TMP_GPR		 0)		;; GPR temporary
5488   (SFBOOL_TMP_VSX		 1)		;; vector temporary
5489   (SFBOOL_MFVSR_D		 2)		;; move to gpr dest
5490   (SFBOOL_MFVSR_A		 3)		;; move to gpr src
5491   (SFBOOL_BOOL_D		 4)		;; and/ior/xor dest
5492   (SFBOOL_BOOL_A1		 5)		;; and/ior/xor arg1
5493   (SFBOOL_BOOL_A2		 6)		;; and/ior/xor arg1
5494   (SFBOOL_SHL_D		 7)		;; shift left dest
5495   (SFBOOL_SHL_A		 8)		;; shift left arg
5496   (SFBOOL_MTVSR_D		 9)		;; move to vecter dest
5497   (SFBOOL_MFVSR_A_V4SF		10)		;; SFBOOL_MFVSR_A as V4SFmode
5498   (SFBOOL_BOOL_A_DI		11)		;; SFBOOL_BOOL_A1/A2 as DImode
5499   (SFBOOL_TMP_VSX_DI		12)		;; SFBOOL_TMP_VSX as DImode
5500   (SFBOOL_MTVSR_D_V4SF		13)])		;; SFBOOL_MTVSRD_D as V4SFmode
5501
5502;; Attempt to optimize some common GLIBC operations using logical operations to
5503;; pick apart SFmode operations.  For example, there is code from e_powf.c
5504;; after macro expansion that looks like:
5505;;
5506;;	typedef union {
5507;;	  float value;
5508;;	  uint32_t word;
5509;;	} ieee_float_shape_type;
5510;;
5511;;	float t1;
5512;;	int32_t is;
5513;;
5514;;	do {
5515;;	  ieee_float_shape_type gf_u;
5516;;	  gf_u.value = (t1);
5517;;	  (is) = gf_u.word;
5518;;	} while (0);
5519;;
5520;;	do {
5521;;	  ieee_float_shape_type sf_u;
5522;;	  sf_u.word = (is & 0xfffff000);
5523;;	  (t1) = sf_u.value;
5524;;	} while (0);
5525;;
5526;;
5527;; This would result in two direct move operations (convert to memory format,
5528;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5529;; scalar format).  With this peephole, we eliminate the direct move to the
5530;; GPR, and instead move the integer mask value to the vector register after a
5531;; shift and do the VSX logical operation.
5532
5533;; The insns for dealing with SFmode in GPR registers looks like:
5534;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5535;;
5536;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5537;;
5538;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
5539;;
5540;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
5541;;
5542;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
5543;;
5544;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
5545
5546(define_peephole2
5547  [(match_scratch:DI SFBOOL_TMP_GPR "r")
5548   (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5549
5550   ;; MFVSRWZ (aka zero_extend)
5551   (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
5552	(zero_extend:DI
5553	 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
5554
5555   ;; AND/IOR/XOR operation on int
5556   (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5557	(and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5558			(match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5559
5560   ;; SLDI
5561   (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5562	(ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5563		   (const_int 32)))
5564
5565   ;; MTVSRD
5566   (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5567	(unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5568
5569  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5570   /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5571      to compare registers, when the mode is different.  */
5572   && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5573   && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5574   && REG_P (operands[SFBOOL_SHL_A])   && REG_P (operands[SFBOOL_MTVSR_D])
5575   && (REG_P (operands[SFBOOL_BOOL_A2])
5576       || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5577   && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
5578       || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
5579   && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5580       || (REG_P (operands[SFBOOL_BOOL_A2])
5581	   && REGNO (operands[SFBOOL_MFVSR_D])
5582		== REGNO (operands[SFBOOL_BOOL_A2])))
5583   && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5584   && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
5585       || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5586   && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
5587  [(set (match_dup SFBOOL_TMP_GPR)
5588	(ashift:DI (match_dup SFBOOL_BOOL_A_DI)
5589		   (const_int 32)))
5590
5591   (set (match_dup SFBOOL_TMP_VSX_DI)
5592	(match_dup SFBOOL_TMP_GPR))
5593
5594   (set (match_dup SFBOOL_MTVSR_D_V4SF)
5595	(and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
5596			  (match_dup SFBOOL_TMP_VSX)))]
5597{
5598  rtx bool_a1 = operands[SFBOOL_BOOL_A1];
5599  rtx bool_a2 = operands[SFBOOL_BOOL_A2];
5600  int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
5601  int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
5602  int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
5603  int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
5604
5605  if (CONST_INT_P (bool_a2))
5606    {
5607      rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
5608      emit_move_insn (tmp_gpr, bool_a2);
5609      operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
5610    }
5611  else
5612    {
5613      int regno_bool_a1 = REGNO (bool_a1);
5614      int regno_bool_a2 = REGNO (bool_a2);
5615      int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
5616			  ? regno_bool_a2 : regno_bool_a1);
5617      operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
5618    }
5619
5620  operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
5621  operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
5622  operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
5623})
5624
5625;; Support signed/unsigned long long to float conversion vectorization.
5626;; Note that any_float (pc) here is just for code attribute <su>.
5627(define_expand "vec_pack<su>_float_v2di"
5628  [(match_operand:V4SF 0 "vfloat_operand")
5629   (match_operand:V2DI 1 "vint_operand")
5630   (match_operand:V2DI 2 "vint_operand")
5631   (any_float (pc))]
5632  "TARGET_VSX"
5633{
5634  rtx r1 = gen_reg_rtx (V4SFmode);
5635  rtx r2 = gen_reg_rtx (V4SFmode);
5636  emit_insn (gen_vsx_xvcv<su>xdsp (r1, operands[1]));
5637  emit_insn (gen_vsx_xvcv<su>xdsp (r2, operands[2]));
5638  rs6000_expand_extract_even (operands[0], r1, r2);
5639  DONE;
5640})
5641
5642;; Support float to signed/unsigned long long conversion vectorization.
5643;; Note that any_fix (pc) here is just for code attribute <su>.
5644(define_expand "vec_unpack_<su>fix_trunc_hi_v4sf"
5645  [(match_operand:V2DI 0 "vint_operand")
5646   (match_operand:V4SF 1 "vfloat_operand")
5647   (any_fix (pc))]
5648  "TARGET_VSX"
5649{
5650  rtx reg = gen_reg_rtx (V4SFmode);
5651  rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
5652  emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
5653  DONE;
5654})
5655
5656;; Note that any_fix (pc) here is just for code attribute <su>.
5657(define_expand "vec_unpack_<su>fix_trunc_lo_v4sf"
5658  [(match_operand:V2DI 0 "vint_operand")
5659   (match_operand:V4SF 1 "vfloat_operand")
5660   (any_fix (pc))]
5661  "TARGET_VSX"
5662{
5663  rtx reg = gen_reg_rtx (V4SFmode);
5664  rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
5665  emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
5666  DONE;
5667})
5668
5669(define_insn "vsx_<xvcvbf16>"
5670  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5671	(unspec:V16QI [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
5672		      XVCVBF16))]
5673  "TARGET_POWER10"
5674  "<xvcvbf16> %x0,%x1"
5675  [(set_attr "type" "vecfloat")])
5676