xref: /dflybsd-src/contrib/gcc-8.0/gcc/config/i386/sync.md (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj;; GCC machine description for i386 synchronization instructions.
2*38fd1498Szrj;; Copyright (C) 2005-2018 Free Software Foundation, Inc.
3*38fd1498Szrj;;
4*38fd1498Szrj;; This file is part of GCC.
5*38fd1498Szrj;;
6*38fd1498Szrj;; GCC is free software; you can redistribute it and/or modify
7*38fd1498Szrj;; it under the terms of the GNU General Public License as published by
8*38fd1498Szrj;; the Free Software Foundation; either version 3, or (at your option)
9*38fd1498Szrj;; any later version.
10*38fd1498Szrj;;
11*38fd1498Szrj;; GCC is distributed in the hope that it will be useful,
12*38fd1498Szrj;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13*38fd1498Szrj;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14*38fd1498Szrj;; GNU General Public License for more details.
15*38fd1498Szrj;;
16*38fd1498Szrj;; You should have received a copy of the GNU General Public License
17*38fd1498Szrj;; along with GCC; see the file COPYING3.  If not see
18*38fd1498Szrj;; <http://www.gnu.org/licenses/>.
19*38fd1498Szrj
20*38fd1498Szrj(define_c_enum "unspec" [
21*38fd1498Szrj  UNSPEC_LFENCE
22*38fd1498Szrj  UNSPEC_SFENCE
23*38fd1498Szrj  UNSPEC_MFENCE
24*38fd1498Szrj
25*38fd1498Szrj  UNSPEC_FILD_ATOMIC
26*38fd1498Szrj  UNSPEC_FIST_ATOMIC
27*38fd1498Szrj
28*38fd1498Szrj  UNSPEC_LDX_ATOMIC
29*38fd1498Szrj  UNSPEC_STX_ATOMIC
30*38fd1498Szrj
31*38fd1498Szrj  ;; __atomic support
32*38fd1498Szrj  UNSPEC_LDA
33*38fd1498Szrj  UNSPEC_STA
34*38fd1498Szrj])
35*38fd1498Szrj
36*38fd1498Szrj(define_c_enum "unspecv" [
37*38fd1498Szrj  UNSPECV_CMPXCHG
38*38fd1498Szrj  UNSPECV_XCHG
39*38fd1498Szrj  UNSPECV_LOCK
40*38fd1498Szrj])
41*38fd1498Szrj
42*38fd1498Szrj(define_expand "sse2_lfence"
43*38fd1498Szrj  [(set (match_dup 0)
44*38fd1498Szrj	(unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
45*38fd1498Szrj  "TARGET_SSE2"
46*38fd1498Szrj{
47*38fd1498Szrj  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
48*38fd1498Szrj  MEM_VOLATILE_P (operands[0]) = 1;
49*38fd1498Szrj})
50*38fd1498Szrj
51*38fd1498Szrj(define_insn "*sse2_lfence"
52*38fd1498Szrj  [(set (match_operand:BLK 0)
53*38fd1498Szrj	(unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
54*38fd1498Szrj  "TARGET_SSE2"
55*38fd1498Szrj  "lfence"
56*38fd1498Szrj  [(set_attr "type" "sse")
57*38fd1498Szrj   (set_attr "length_address" "0")
58*38fd1498Szrj   (set_attr "atom_sse_attr" "lfence")
59*38fd1498Szrj   (set_attr "memory" "unknown")])
60*38fd1498Szrj
61*38fd1498Szrj(define_expand "sse_sfence"
62*38fd1498Szrj  [(set (match_dup 0)
63*38fd1498Szrj	(unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
64*38fd1498Szrj  "TARGET_SSE || TARGET_3DNOW_A"
65*38fd1498Szrj{
66*38fd1498Szrj  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
67*38fd1498Szrj  MEM_VOLATILE_P (operands[0]) = 1;
68*38fd1498Szrj})
69*38fd1498Szrj
70*38fd1498Szrj(define_insn "*sse_sfence"
71*38fd1498Szrj  [(set (match_operand:BLK 0)
72*38fd1498Szrj	(unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
73*38fd1498Szrj  "TARGET_SSE || TARGET_3DNOW_A"
74*38fd1498Szrj  "sfence"
75*38fd1498Szrj  [(set_attr "type" "sse")
76*38fd1498Szrj   (set_attr "length_address" "0")
77*38fd1498Szrj   (set_attr "atom_sse_attr" "fence")
78*38fd1498Szrj   (set_attr "memory" "unknown")])
79*38fd1498Szrj
80*38fd1498Szrj(define_expand "sse2_mfence"
81*38fd1498Szrj  [(set (match_dup 0)
82*38fd1498Szrj	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
83*38fd1498Szrj  "TARGET_SSE2"
84*38fd1498Szrj{
85*38fd1498Szrj  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
86*38fd1498Szrj  MEM_VOLATILE_P (operands[0]) = 1;
87*38fd1498Szrj})
88*38fd1498Szrj
89*38fd1498Szrj(define_insn "mfence_sse2"
90*38fd1498Szrj  [(set (match_operand:BLK 0)
91*38fd1498Szrj	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
92*38fd1498Szrj  "TARGET_64BIT || TARGET_SSE2"
93*38fd1498Szrj  "mfence"
94*38fd1498Szrj  [(set_attr "type" "sse")
95*38fd1498Szrj   (set_attr "length_address" "0")
96*38fd1498Szrj   (set_attr "atom_sse_attr" "fence")
97*38fd1498Szrj   (set_attr "memory" "unknown")])
98*38fd1498Szrj
99*38fd1498Szrj(define_insn "mfence_nosse"
100*38fd1498Szrj  [(set (match_operand:BLK 0)
101*38fd1498Szrj	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))
102*38fd1498Szrj   (clobber (reg:CC FLAGS_REG))]
103*38fd1498Szrj  "!(TARGET_64BIT || TARGET_SSE2)"
104*38fd1498Szrj  "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}"
105*38fd1498Szrj  [(set_attr "memory" "unknown")])
106*38fd1498Szrj
107*38fd1498Szrj(define_expand "mem_thread_fence"
108*38fd1498Szrj  [(match_operand:SI 0 "const_int_operand")]		;; model
109*38fd1498Szrj  ""
110*38fd1498Szrj{
111*38fd1498Szrj  enum memmodel model = memmodel_from_int (INTVAL (operands[0]));
112*38fd1498Szrj
113*38fd1498Szrj  /* Unless this is a SEQ_CST fence, the i386 memory model is strong
114*38fd1498Szrj     enough not to require barriers of any kind.  */
115*38fd1498Szrj  if (is_mm_seq_cst (model))
116*38fd1498Szrj    {
117*38fd1498Szrj      rtx (*mfence_insn)(rtx);
118*38fd1498Szrj      rtx mem;
119*38fd1498Szrj
120*38fd1498Szrj      if (TARGET_64BIT || TARGET_SSE2)
121*38fd1498Szrj	mfence_insn = gen_mfence_sse2;
122*38fd1498Szrj      else
123*38fd1498Szrj	mfence_insn = gen_mfence_nosse;
124*38fd1498Szrj
125*38fd1498Szrj      mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
126*38fd1498Szrj      MEM_VOLATILE_P (mem) = 1;
127*38fd1498Szrj
128*38fd1498Szrj      emit_insn (mfence_insn (mem));
129*38fd1498Szrj    }
130*38fd1498Szrj  DONE;
131*38fd1498Szrj})
132*38fd1498Szrj
133*38fd1498Szrj;; ??? From volume 3 section 8.1.1 Guaranteed Atomic Operations,
134*38fd1498Szrj;; Only beginning at Pentium family processors do we get any guarantee of
135*38fd1498Szrj;; atomicity in aligned 64-bit quantities.  Beginning at P6, we get a
136*38fd1498Szrj;; guarantee for 64-bit accesses that do not cross a cacheline boundary.
137*38fd1498Szrj;;
138*38fd1498Szrj;; Note that the TARGET_CMPXCHG8B test below is a stand-in for "Pentium".
139*38fd1498Szrj;;
140*38fd1498Szrj;; Importantly, *no* processor makes atomicity guarantees for larger
141*38fd1498Szrj;; accesses.  In particular, there's no way to perform an atomic TImode
142*38fd1498Szrj;; move, despite the apparent applicability of MOVDQA et al.
143*38fd1498Szrj
144*38fd1498Szrj(define_mode_iterator ATOMIC
145*38fd1498Szrj   [QI HI SI
146*38fd1498Szrj    (DI "TARGET_64BIT || (TARGET_CMPXCHG8B && (TARGET_80387 || TARGET_SSE))")
147*38fd1498Szrj   ])
148*38fd1498Szrj
149*38fd1498Szrj(define_expand "atomic_load<mode>"
150*38fd1498Szrj  [(set (match_operand:ATOMIC 0 "nonimmediate_operand")
151*38fd1498Szrj	(unspec:ATOMIC [(match_operand:ATOMIC 1 "memory_operand")
152*38fd1498Szrj			(match_operand:SI 2 "const_int_operand")]
153*38fd1498Szrj		       UNSPEC_LDA))]
154*38fd1498Szrj  ""
155*38fd1498Szrj{
156*38fd1498Szrj  /* For DImode on 32-bit, we can use the FPU to perform the load.  */
157*38fd1498Szrj  if (<MODE>mode == DImode && !TARGET_64BIT)
158*38fd1498Szrj    emit_insn (gen_atomic_loaddi_fpu
159*38fd1498Szrj	       (operands[0], operands[1],
160*38fd1498Szrj	        assign_386_stack_local (DImode, SLOT_TEMP)));
161*38fd1498Szrj  else
162*38fd1498Szrj    {
163*38fd1498Szrj      rtx dst = operands[0];
164*38fd1498Szrj
165*38fd1498Szrj      if (MEM_P (dst))
166*38fd1498Szrj	dst = gen_reg_rtx (<MODE>mode);
167*38fd1498Szrj
168*38fd1498Szrj      emit_move_insn (dst, operands[1]);
169*38fd1498Szrj
170*38fd1498Szrj      /* Fix up the destination if needed.  */
171*38fd1498Szrj      if (dst != operands[0])
172*38fd1498Szrj	emit_move_insn (operands[0], dst);
173*38fd1498Szrj    }
174*38fd1498Szrj  DONE;
175*38fd1498Szrj})
176*38fd1498Szrj
177*38fd1498Szrj(define_insn_and_split "atomic_loaddi_fpu"
178*38fd1498Szrj  [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m,?r")
179*38fd1498Szrj	(unspec:DI [(match_operand:DI 1 "memory_operand" "m,m,m")]
180*38fd1498Szrj		   UNSPEC_LDA))
181*38fd1498Szrj   (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
182*38fd1498Szrj   (clobber (match_scratch:DF 3 "=X,xf,xf"))]
183*38fd1498Szrj  "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
184*38fd1498Szrj  "#"
185*38fd1498Szrj  "&& reload_completed"
186*38fd1498Szrj  [(const_int 0)]
187*38fd1498Szrj{
188*38fd1498Szrj  rtx dst = operands[0], src = operands[1];
189*38fd1498Szrj  rtx mem = operands[2], tmp = operands[3];
190*38fd1498Szrj
191*38fd1498Szrj  if (SSE_REG_P (dst))
192*38fd1498Szrj    emit_move_insn (dst, src);
193*38fd1498Szrj  else
194*38fd1498Szrj    {
195*38fd1498Szrj      if (MEM_P (dst))
196*38fd1498Szrj	mem = dst;
197*38fd1498Szrj
198*38fd1498Szrj      if (STACK_REG_P (tmp))
199*38fd1498Szrj        {
200*38fd1498Szrj	  emit_insn (gen_loaddi_via_fpu (tmp, src));
201*38fd1498Szrj	  emit_insn (gen_storedi_via_fpu (mem, tmp));
202*38fd1498Szrj	}
203*38fd1498Szrj      else
204*38fd1498Szrj	{
205*38fd1498Szrj	  emit_insn (gen_loaddi_via_sse (tmp, src));
206*38fd1498Szrj	  emit_insn (gen_storedi_via_sse (mem, tmp));
207*38fd1498Szrj	}
208*38fd1498Szrj
209*38fd1498Szrj      if (mem != dst)
210*38fd1498Szrj	emit_move_insn (dst, mem);
211*38fd1498Szrj    }
212*38fd1498Szrj  DONE;
213*38fd1498Szrj})
214*38fd1498Szrj
215*38fd1498Szrj(define_peephole2
216*38fd1498Szrj  [(set (match_operand:DF 0 "fp_register_operand")
217*38fd1498Szrj	(unspec:DF [(match_operand:DI 1 "memory_operand")]
218*38fd1498Szrj		   UNSPEC_FILD_ATOMIC))
219*38fd1498Szrj   (set (match_operand:DI 2 "memory_operand")
220*38fd1498Szrj	(unspec:DI [(match_dup 0)]
221*38fd1498Szrj		   UNSPEC_FIST_ATOMIC))
222*38fd1498Szrj   (set (match_operand:DF 3 "any_fp_register_operand")
223*38fd1498Szrj	(match_operand:DF 4 "memory_operand"))]
224*38fd1498Szrj  "!TARGET_64BIT
225*38fd1498Szrj   && peep2_reg_dead_p (2, operands[0])
226*38fd1498Szrj   && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))"
227*38fd1498Szrj  [(set (match_dup 3) (match_dup 5))]
228*38fd1498Szrj  "operands[5] = gen_lowpart (DFmode, operands[1]);")
229*38fd1498Szrj
230*38fd1498Szrj(define_peephole2
231*38fd1498Szrj  [(set (match_operand:DF 0 "fp_register_operand")
232*38fd1498Szrj	(unspec:DF [(match_operand:DI 1 "memory_operand")]
233*38fd1498Szrj		   UNSPEC_FILD_ATOMIC))
234*38fd1498Szrj   (set (match_operand:DI 2 "memory_operand")
235*38fd1498Szrj	(unspec:DI [(match_dup 0)]
236*38fd1498Szrj		   UNSPEC_FIST_ATOMIC))
237*38fd1498Szrj   (set (mem:BLK (scratch:SI))
238*38fd1498Szrj	(unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE))
239*38fd1498Szrj   (set (match_operand:DF 3 "any_fp_register_operand")
240*38fd1498Szrj	(match_operand:DF 4 "memory_operand"))]
241*38fd1498Szrj  "!TARGET_64BIT
242*38fd1498Szrj   && peep2_reg_dead_p (2, operands[0])
243*38fd1498Szrj   && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))"
244*38fd1498Szrj  [(const_int 0)]
245*38fd1498Szrj{
246*38fd1498Szrj  emit_move_insn (operands[3], gen_lowpart (DFmode, operands[1]));
247*38fd1498Szrj  emit_insn (gen_memory_blockage ());
248*38fd1498Szrj  DONE;
249*38fd1498Szrj})
250*38fd1498Szrj
251*38fd1498Szrj(define_peephole2
252*38fd1498Szrj  [(set (match_operand:DF 0 "sse_reg_operand")
253*38fd1498Szrj	(unspec:DF [(match_operand:DI 1 "memory_operand")]
254*38fd1498Szrj		   UNSPEC_LDX_ATOMIC))
255*38fd1498Szrj   (set (match_operand:DI 2 "memory_operand")
256*38fd1498Szrj	(unspec:DI [(match_dup 0)]
257*38fd1498Szrj		   UNSPEC_STX_ATOMIC))
258*38fd1498Szrj   (set (match_operand:DF 3 "any_fp_register_operand")
259*38fd1498Szrj	(match_operand:DF 4 "memory_operand"))]
260*38fd1498Szrj  "!TARGET_64BIT
261*38fd1498Szrj   && peep2_reg_dead_p (2, operands[0])
262*38fd1498Szrj   && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))"
263*38fd1498Szrj  [(set (match_dup 3) (match_dup 5))]
264*38fd1498Szrj  "operands[5] = gen_lowpart (DFmode, operands[1]);")
265*38fd1498Szrj
266*38fd1498Szrj(define_peephole2
267*38fd1498Szrj  [(set (match_operand:DF 0 "sse_reg_operand")
268*38fd1498Szrj	(unspec:DF [(match_operand:DI 1 "memory_operand")]
269*38fd1498Szrj		   UNSPEC_LDX_ATOMIC))
270*38fd1498Szrj   (set (match_operand:DI 2 "memory_operand")
271*38fd1498Szrj	(unspec:DI [(match_dup 0)]
272*38fd1498Szrj		   UNSPEC_STX_ATOMIC))
273*38fd1498Szrj   (set (mem:BLK (scratch:SI))
274*38fd1498Szrj	(unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE))
275*38fd1498Szrj   (set (match_operand:DF 3 "any_fp_register_operand")
276*38fd1498Szrj	(match_operand:DF 4 "memory_operand"))]
277*38fd1498Szrj  "!TARGET_64BIT
278*38fd1498Szrj   && peep2_reg_dead_p (2, operands[0])
279*38fd1498Szrj   && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))"
280*38fd1498Szrj  [(const_int 0)]
281*38fd1498Szrj{
282*38fd1498Szrj  emit_move_insn (operands[3], gen_lowpart (DFmode, operands[1]));
283*38fd1498Szrj  emit_insn (gen_memory_blockage ());
284*38fd1498Szrj  DONE;
285*38fd1498Szrj})
286*38fd1498Szrj
287*38fd1498Szrj(define_expand "atomic_store<mode>"
288*38fd1498Szrj  [(set (match_operand:ATOMIC 0 "memory_operand")
289*38fd1498Szrj	(unspec:ATOMIC [(match_operand:ATOMIC 1 "nonimmediate_operand")
290*38fd1498Szrj			(match_operand:SI 2 "const_int_operand")]
291*38fd1498Szrj		       UNSPEC_STA))]
292*38fd1498Szrj  ""
293*38fd1498Szrj{
294*38fd1498Szrj  enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
295*38fd1498Szrj
296*38fd1498Szrj  if (<MODE>mode == DImode && !TARGET_64BIT)
297*38fd1498Szrj    {
298*38fd1498Szrj      /* For DImode on 32-bit, we can use the FPU to perform the store.  */
299*38fd1498Szrj      /* Note that while we could perform a cmpxchg8b loop, that turns
300*38fd1498Szrj	 out to be significantly larger than this plus a barrier.  */
301*38fd1498Szrj      emit_insn (gen_atomic_storedi_fpu
302*38fd1498Szrj		 (operands[0], operands[1],
303*38fd1498Szrj	          assign_386_stack_local (DImode, SLOT_TEMP)));
304*38fd1498Szrj    }
305*38fd1498Szrj  else
306*38fd1498Szrj    {
307*38fd1498Szrj      operands[1] = force_reg (<MODE>mode, operands[1]);
308*38fd1498Szrj
309*38fd1498Szrj      /* For seq-cst stores, when we lack MFENCE, use XCHG.  */
310*38fd1498Szrj      if (is_mm_seq_cst (model) && !(TARGET_64BIT || TARGET_SSE2))
311*38fd1498Szrj	{
312*38fd1498Szrj	  emit_insn (gen_atomic_exchange<mode> (gen_reg_rtx (<MODE>mode),
313*38fd1498Szrj						operands[0], operands[1],
314*38fd1498Szrj						operands[2]));
315*38fd1498Szrj	  DONE;
316*38fd1498Szrj	}
317*38fd1498Szrj
318*38fd1498Szrj      /* Otherwise use a store.  */
319*38fd1498Szrj      emit_insn (gen_atomic_store<mode>_1 (operands[0], operands[1],
320*38fd1498Szrj					   operands[2]));
321*38fd1498Szrj    }
322*38fd1498Szrj  /* ... followed by an MFENCE, if required.  */
323*38fd1498Szrj  if (is_mm_seq_cst (model))
324*38fd1498Szrj    emit_insn (gen_mem_thread_fence (operands[2]));
325*38fd1498Szrj  DONE;
326*38fd1498Szrj})
327*38fd1498Szrj
328*38fd1498Szrj(define_insn "atomic_store<mode>_1"
329*38fd1498Szrj  [(set (match_operand:SWI 0 "memory_operand" "=m")
330*38fd1498Szrj	(unspec:SWI [(match_operand:SWI 1 "<nonmemory_operand>" "<r><i>")
331*38fd1498Szrj		     (match_operand:SI 2 "const_int_operand")]
332*38fd1498Szrj		    UNSPEC_STA))]
333*38fd1498Szrj  ""
334*38fd1498Szrj  "%K2mov{<imodesuffix>}\t{%1, %0|%0, %1}")
335*38fd1498Szrj
336*38fd1498Szrj(define_insn_and_split "atomic_storedi_fpu"
337*38fd1498Szrj  [(set (match_operand:DI 0 "memory_operand" "=m,m,m")
338*38fd1498Szrj	(unspec:DI [(match_operand:DI 1 "nonimmediate_operand" "x,m,?r")]
339*38fd1498Szrj		   UNSPEC_STA))
340*38fd1498Szrj   (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
341*38fd1498Szrj   (clobber (match_scratch:DF 3 "=X,xf,xf"))]
342*38fd1498Szrj  "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
343*38fd1498Szrj  "#"
344*38fd1498Szrj  "&& reload_completed"
345*38fd1498Szrj  [(const_int 0)]
346*38fd1498Szrj{
347*38fd1498Szrj  rtx dst = operands[0], src = operands[1];
348*38fd1498Szrj  rtx mem = operands[2], tmp = operands[3];
349*38fd1498Szrj
350*38fd1498Szrj  if (SSE_REG_P (src))
351*38fd1498Szrj    emit_move_insn (dst, src);
352*38fd1498Szrj  else
353*38fd1498Szrj    {
354*38fd1498Szrj      if (REG_P (src))
355*38fd1498Szrj	{
356*38fd1498Szrj	  emit_move_insn (mem, src);
357*38fd1498Szrj	  src = mem;
358*38fd1498Szrj	}
359*38fd1498Szrj
360*38fd1498Szrj      if (STACK_REG_P (tmp))
361*38fd1498Szrj	{
362*38fd1498Szrj	  emit_insn (gen_loaddi_via_fpu (tmp, src));
363*38fd1498Szrj	  emit_insn (gen_storedi_via_fpu (dst, tmp));
364*38fd1498Szrj	}
365*38fd1498Szrj      else
366*38fd1498Szrj	{
367*38fd1498Szrj	  emit_insn (gen_loaddi_via_sse (tmp, src));
368*38fd1498Szrj	  emit_insn (gen_storedi_via_sse (dst, tmp));
369*38fd1498Szrj	}
370*38fd1498Szrj    }
371*38fd1498Szrj  DONE;
372*38fd1498Szrj})
373*38fd1498Szrj
374*38fd1498Szrj(define_peephole2
375*38fd1498Szrj  [(set (match_operand:DF 0 "memory_operand")
376*38fd1498Szrj	(match_operand:DF 1 "any_fp_register_operand"))
377*38fd1498Szrj   (set (match_operand:DF 2 "fp_register_operand")
378*38fd1498Szrj	(unspec:DF [(match_operand:DI 3 "memory_operand")]
379*38fd1498Szrj		   UNSPEC_FILD_ATOMIC))
380*38fd1498Szrj   (set (match_operand:DI 4 "memory_operand")
381*38fd1498Szrj	(unspec:DI [(match_dup 2)]
382*38fd1498Szrj		   UNSPEC_FIST_ATOMIC))]
383*38fd1498Szrj  "!TARGET_64BIT
384*38fd1498Szrj   && peep2_reg_dead_p (3, operands[2])
385*38fd1498Szrj   && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))"
386*38fd1498Szrj  [(set (match_dup 5) (match_dup 1))]
387*38fd1498Szrj  "operands[5] = gen_lowpart (DFmode, operands[4]);")
388*38fd1498Szrj
389*38fd1498Szrj(define_peephole2
390*38fd1498Szrj  [(set (match_operand:DF 0 "memory_operand")
391*38fd1498Szrj	(match_operand:DF 1 "any_fp_register_operand"))
392*38fd1498Szrj   (set (mem:BLK (scratch:SI))
393*38fd1498Szrj	(unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE))
394*38fd1498Szrj   (set (match_operand:DF 2 "fp_register_operand")
395*38fd1498Szrj	(unspec:DF [(match_operand:DI 3 "memory_operand")]
396*38fd1498Szrj		   UNSPEC_FILD_ATOMIC))
397*38fd1498Szrj   (set (match_operand:DI 4 "memory_operand")
398*38fd1498Szrj	(unspec:DI [(match_dup 2)]
399*38fd1498Szrj		   UNSPEC_FIST_ATOMIC))]
400*38fd1498Szrj  "!TARGET_64BIT
401*38fd1498Szrj   && peep2_reg_dead_p (4, operands[2])
402*38fd1498Szrj   && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))"
403*38fd1498Szrj  [(const_int 0)]
404*38fd1498Szrj{
405*38fd1498Szrj  emit_insn (gen_memory_blockage ());
406*38fd1498Szrj  emit_move_insn (gen_lowpart (DFmode, operands[4]), operands[1]);
407*38fd1498Szrj  DONE;
408*38fd1498Szrj})
409*38fd1498Szrj
410*38fd1498Szrj(define_peephole2
411*38fd1498Szrj  [(set (match_operand:DF 0 "memory_operand")
412*38fd1498Szrj	(match_operand:DF 1 "any_fp_register_operand"))
413*38fd1498Szrj   (set (match_operand:DF 2 "sse_reg_operand")
414*38fd1498Szrj	(unspec:DF [(match_operand:DI 3 "memory_operand")]
415*38fd1498Szrj		   UNSPEC_LDX_ATOMIC))
416*38fd1498Szrj   (set (match_operand:DI 4 "memory_operand")
417*38fd1498Szrj	(unspec:DI [(match_dup 2)]
418*38fd1498Szrj		   UNSPEC_STX_ATOMIC))]
419*38fd1498Szrj  "!TARGET_64BIT
420*38fd1498Szrj   && peep2_reg_dead_p (3, operands[2])
421*38fd1498Szrj   && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))"
422*38fd1498Szrj  [(set (match_dup 5) (match_dup 1))]
423*38fd1498Szrj  "operands[5] = gen_lowpart (DFmode, operands[4]);")
424*38fd1498Szrj
425*38fd1498Szrj(define_peephole2
426*38fd1498Szrj  [(set (match_operand:DF 0 "memory_operand")
427*38fd1498Szrj	(match_operand:DF 1 "any_fp_register_operand"))
428*38fd1498Szrj   (set (mem:BLK (scratch:SI))
429*38fd1498Szrj	(unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE))
430*38fd1498Szrj   (set (match_operand:DF 2 "sse_reg_operand")
431*38fd1498Szrj	(unspec:DF [(match_operand:DI 3 "memory_operand")]
432*38fd1498Szrj		   UNSPEC_LDX_ATOMIC))
433*38fd1498Szrj   (set (match_operand:DI 4 "memory_operand")
434*38fd1498Szrj	(unspec:DI [(match_dup 2)]
435*38fd1498Szrj		   UNSPEC_STX_ATOMIC))]
436*38fd1498Szrj  "!TARGET_64BIT
437*38fd1498Szrj   && peep2_reg_dead_p (4, operands[2])
438*38fd1498Szrj   && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))"
439*38fd1498Szrj  [(const_int 0)]
440*38fd1498Szrj{
441*38fd1498Szrj  emit_insn (gen_memory_blockage ());
442*38fd1498Szrj  emit_move_insn (gen_lowpart (DFmode, operands[4]), operands[1]);
443*38fd1498Szrj  DONE;
444*38fd1498Szrj})
445*38fd1498Szrj
446*38fd1498Szrj;; ??? You'd think that we'd be able to perform this via FLOAT + FIX_TRUNC
447*38fd1498Szrj;; operations.  But the fix_trunc patterns want way more setup than we want
448*38fd1498Szrj;; to provide.  Note that the scratch is DFmode instead of XFmode in order
449*38fd1498Szrj;; to make it easy to allocate a scratch in either SSE or FP_REGs above.
450*38fd1498Szrj
451*38fd1498Szrj(define_insn "loaddi_via_fpu"
452*38fd1498Szrj  [(set (match_operand:DF 0 "register_operand" "=f")
453*38fd1498Szrj	(unspec:DF [(match_operand:DI 1 "memory_operand" "m")]
454*38fd1498Szrj		   UNSPEC_FILD_ATOMIC))]
455*38fd1498Szrj  "TARGET_80387"
456*38fd1498Szrj  "fild%Z1\t%1"
457*38fd1498Szrj  [(set_attr "type" "fmov")
458*38fd1498Szrj   (set_attr "mode" "DF")
459*38fd1498Szrj   (set_attr "fp_int_src" "true")])
460*38fd1498Szrj
461*38fd1498Szrj(define_insn "storedi_via_fpu"
462*38fd1498Szrj  [(set (match_operand:DI 0 "memory_operand" "=m")
463*38fd1498Szrj	(unspec:DI [(match_operand:DF 1 "register_operand" "f")]
464*38fd1498Szrj		   UNSPEC_FIST_ATOMIC))]
465*38fd1498Szrj  "TARGET_80387"
466*38fd1498Szrj{
467*38fd1498Szrj  gcc_assert (find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != NULL_RTX);
468*38fd1498Szrj
469*38fd1498Szrj  return "fistp%Z0\t%0";
470*38fd1498Szrj}
471*38fd1498Szrj  [(set_attr "type" "fmov")
472*38fd1498Szrj   (set_attr "mode" "DI")])
473*38fd1498Szrj
474*38fd1498Szrj(define_insn "loaddi_via_sse"
475*38fd1498Szrj  [(set (match_operand:DF 0 "register_operand" "=x")
476*38fd1498Szrj	(unspec:DF [(match_operand:DI 1 "memory_operand" "m")]
477*38fd1498Szrj		   UNSPEC_LDX_ATOMIC))]
478*38fd1498Szrj  "TARGET_SSE"
479*38fd1498Szrj{
480*38fd1498Szrj  if (TARGET_SSE2)
481*38fd1498Szrj    return "%vmovq\t{%1, %0|%0, %1}";
482*38fd1498Szrj  return "movlps\t{%1, %0|%0, %1}";
483*38fd1498Szrj}
484*38fd1498Szrj  [(set_attr "type" "ssemov")
485*38fd1498Szrj   (set_attr "mode" "DI")])
486*38fd1498Szrj
487*38fd1498Szrj(define_insn "storedi_via_sse"
488*38fd1498Szrj  [(set (match_operand:DI 0 "memory_operand" "=m")
489*38fd1498Szrj	(unspec:DI [(match_operand:DF 1 "register_operand" "x")]
490*38fd1498Szrj		   UNSPEC_STX_ATOMIC))]
491*38fd1498Szrj  "TARGET_SSE"
492*38fd1498Szrj{
493*38fd1498Szrj  if (TARGET_SSE2)
494*38fd1498Szrj    return "%vmovq\t{%1, %0|%0, %1}";
495*38fd1498Szrj  return "movlps\t{%1, %0|%0, %1}";
496*38fd1498Szrj}
497*38fd1498Szrj  [(set_attr "type" "ssemov")
498*38fd1498Szrj   (set_attr "mode" "DI")])
499*38fd1498Szrj
500*38fd1498Szrj(define_expand "atomic_compare_and_swap<mode>"
501*38fd1498Szrj  [(match_operand:QI 0 "register_operand")	;; bool success output
502*38fd1498Szrj   (match_operand:SWI124 1 "register_operand")	;; oldval output
503*38fd1498Szrj   (match_operand:SWI124 2 "memory_operand")	;; memory
504*38fd1498Szrj   (match_operand:SWI124 3 "register_operand")	;; expected input
505*38fd1498Szrj   (match_operand:SWI124 4 "register_operand")	;; newval input
506*38fd1498Szrj   (match_operand:SI 5 "const_int_operand")	;; is_weak
507*38fd1498Szrj   (match_operand:SI 6 "const_int_operand")	;; success model
508*38fd1498Szrj   (match_operand:SI 7 "const_int_operand")]	;; failure model
509*38fd1498Szrj  "TARGET_CMPXCHG"
510*38fd1498Szrj{
511*38fd1498Szrj  emit_insn
512*38fd1498Szrj   (gen_atomic_compare_and_swap<mode>_1
513*38fd1498Szrj    (operands[1], operands[2], operands[3], operands[4], operands[6]));
514*38fd1498Szrj  ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
515*38fd1498Szrj		     const0_rtx);
516*38fd1498Szrj  DONE;
517*38fd1498Szrj})
518*38fd1498Szrj
519*38fd1498Szrj(define_mode_iterator CASMODE
520*38fd1498Szrj  [(DI "TARGET_64BIT || TARGET_CMPXCHG8B")
521*38fd1498Szrj   (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
522*38fd1498Szrj(define_mode_attr CASHMODE [(DI "SI") (TI "DI")])
523*38fd1498Szrj
524*38fd1498Szrj(define_expand "atomic_compare_and_swap<mode>"
525*38fd1498Szrj  [(match_operand:QI 0 "register_operand")	;; bool success output
526*38fd1498Szrj   (match_operand:CASMODE 1 "register_operand")	;; oldval output
527*38fd1498Szrj   (match_operand:CASMODE 2 "memory_operand")	;; memory
528*38fd1498Szrj   (match_operand:CASMODE 3 "register_operand")	;; expected input
529*38fd1498Szrj   (match_operand:CASMODE 4 "register_operand")	;; newval input
530*38fd1498Szrj   (match_operand:SI 5 "const_int_operand")	;; is_weak
531*38fd1498Szrj   (match_operand:SI 6 "const_int_operand")	;; success model
532*38fd1498Szrj   (match_operand:SI 7 "const_int_operand")]	;; failure model
533*38fd1498Szrj  "TARGET_CMPXCHG"
534*38fd1498Szrj{
535*38fd1498Szrj  if (<MODE>mode == DImode && TARGET_64BIT)
536*38fd1498Szrj    {
537*38fd1498Szrj      emit_insn
538*38fd1498Szrj       (gen_atomic_compare_and_swapdi_1
539*38fd1498Szrj	(operands[1], operands[2], operands[3], operands[4], operands[6]));
540*38fd1498Szrj    }
541*38fd1498Szrj  else
542*38fd1498Szrj    {
543*38fd1498Szrj      machine_mode hmode = <CASHMODE>mode;
544*38fd1498Szrj
545*38fd1498Szrj      emit_insn
546*38fd1498Szrj       (gen_atomic_compare_and_swap<mode>_doubleword
547*38fd1498Szrj        (operands[1], operands[2], operands[3],
548*38fd1498Szrj	 gen_lowpart (hmode, operands[4]), gen_highpart (hmode, operands[4]),
549*38fd1498Szrj	 operands[6]));
550*38fd1498Szrj    }
551*38fd1498Szrj
552*38fd1498Szrj  ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
553*38fd1498Szrj		     const0_rtx);
554*38fd1498Szrj  DONE;
555*38fd1498Szrj})
556*38fd1498Szrj
557*38fd1498Szrj;; For double-word compare and swap, we are obliged to play tricks with
558*38fd1498Szrj;; the input newval (op3:op4) because the Intel register numbering does
559*38fd1498Szrj;; not match the gcc register numbering, so the pair must be CX:BX.
560*38fd1498Szrj
561*38fd1498Szrj(define_mode_attr doublemodesuffix [(SI "8") (DI "16")])
562*38fd1498Szrj
563*38fd1498Szrj(define_insn "atomic_compare_and_swap<dwi>_doubleword"
564*38fd1498Szrj  [(set (match_operand:<DWI> 0 "register_operand" "=A")
565*38fd1498Szrj	(unspec_volatile:<DWI>
566*38fd1498Szrj	  [(match_operand:<DWI> 1 "memory_operand" "+m")
567*38fd1498Szrj	   (match_operand:<DWI> 2 "register_operand" "0")
568*38fd1498Szrj	   (match_operand:DWIH 3 "register_operand" "b")
569*38fd1498Szrj	   (match_operand:DWIH 4 "register_operand" "c")
570*38fd1498Szrj	   (match_operand:SI 5 "const_int_operand")]
571*38fd1498Szrj	  UNSPECV_CMPXCHG))
572*38fd1498Szrj   (set (match_dup 1)
573*38fd1498Szrj	(unspec_volatile:<DWI> [(const_int 0)] UNSPECV_CMPXCHG))
574*38fd1498Szrj   (set (reg:CCZ FLAGS_REG)
575*38fd1498Szrj        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))]
576*38fd1498Szrj  "TARGET_CMPXCHG<doublemodesuffix>B"
577*38fd1498Szrj  "lock{%;} %K5cmpxchg<doublemodesuffix>b\t%1")
578*38fd1498Szrj
579*38fd1498Szrj(define_insn "atomic_compare_and_swap<mode>_1"
580*38fd1498Szrj  [(set (match_operand:SWI 0 "register_operand" "=a")
581*38fd1498Szrj	(unspec_volatile:SWI
582*38fd1498Szrj	  [(match_operand:SWI 1 "memory_operand" "+m")
583*38fd1498Szrj	   (match_operand:SWI 2 "register_operand" "0")
584*38fd1498Szrj	   (match_operand:SWI 3 "register_operand" "<r>")
585*38fd1498Szrj	   (match_operand:SI 4 "const_int_operand")]
586*38fd1498Szrj	  UNSPECV_CMPXCHG))
587*38fd1498Szrj   (set (match_dup 1)
588*38fd1498Szrj	(unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
589*38fd1498Szrj   (set (reg:CCZ FLAGS_REG)
590*38fd1498Szrj        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))]
591*38fd1498Szrj  "TARGET_CMPXCHG"
592*38fd1498Szrj  "lock{%;} %K4cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
593*38fd1498Szrj
594*38fd1498Szrj;; For operand 2 nonmemory_operand predicate is used instead of
595*38fd1498Szrj;; register_operand to allow combiner to better optimize atomic
596*38fd1498Szrj;; additions of constants.
597*38fd1498Szrj(define_insn "atomic_fetch_add<mode>"
598*38fd1498Szrj  [(set (match_operand:SWI 0 "register_operand" "=<r>")
599*38fd1498Szrj	(unspec_volatile:SWI
600*38fd1498Szrj	  [(match_operand:SWI 1 "memory_operand" "+m")
601*38fd1498Szrj	   (match_operand:SI 3 "const_int_operand")]		;; model
602*38fd1498Szrj	  UNSPECV_XCHG))
603*38fd1498Szrj   (set (match_dup 1)
604*38fd1498Szrj	(plus:SWI (match_dup 1)
605*38fd1498Szrj		  (match_operand:SWI 2 "nonmemory_operand" "0")))
606*38fd1498Szrj   (clobber (reg:CC FLAGS_REG))]
607*38fd1498Szrj  "TARGET_XADD"
608*38fd1498Szrj  "lock{%;} %K3xadd{<imodesuffix>}\t{%0, %1|%1, %0}")
609*38fd1498Szrj
610*38fd1498Szrj;; This peephole2 and following insn optimize
611*38fd1498Szrj;; __sync_fetch_and_add (x, -N) == N into just lock {add,sub,inc,dec}
612*38fd1498Szrj;; followed by testing of flags instead of lock xadd and comparisons.
613*38fd1498Szrj(define_peephole2
614*38fd1498Szrj  [(set (match_operand:SWI 0 "register_operand")
615*38fd1498Szrj	(match_operand:SWI 2 "const_int_operand"))
616*38fd1498Szrj   (parallel [(set (match_dup 0)
617*38fd1498Szrj		   (unspec_volatile:SWI
618*38fd1498Szrj		     [(match_operand:SWI 1 "memory_operand")
619*38fd1498Szrj		      (match_operand:SI 4 "const_int_operand")]
620*38fd1498Szrj		     UNSPECV_XCHG))
621*38fd1498Szrj	      (set (match_dup 1)
622*38fd1498Szrj		   (plus:SWI (match_dup 1)
623*38fd1498Szrj			     (match_dup 0)))
624*38fd1498Szrj	      (clobber (reg:CC FLAGS_REG))])
625*38fd1498Szrj   (set (reg:CCZ FLAGS_REG)
626*38fd1498Szrj	(compare:CCZ (match_dup 0)
627*38fd1498Szrj		     (match_operand:SWI 3 "const_int_operand")))]
628*38fd1498Szrj  "peep2_reg_dead_p (3, operands[0])
629*38fd1498Szrj   && (unsigned HOST_WIDE_INT) INTVAL (operands[2])
630*38fd1498Szrj      == -(unsigned HOST_WIDE_INT) INTVAL (operands[3])
631*38fd1498Szrj   && !reg_overlap_mentioned_p (operands[0], operands[1])"
632*38fd1498Szrj  [(parallel [(set (reg:CCZ FLAGS_REG)
633*38fd1498Szrj		   (compare:CCZ
634*38fd1498Szrj		     (unspec_volatile:SWI [(match_dup 1) (match_dup 4)]
635*38fd1498Szrj					  UNSPECV_XCHG)
636*38fd1498Szrj		     (match_dup 3)))
637*38fd1498Szrj	      (set (match_dup 1)
638*38fd1498Szrj		   (plus:SWI (match_dup 1)
639*38fd1498Szrj			     (match_dup 2)))])])
640*38fd1498Szrj
641*38fd1498Szrj;; Likewise, but for the -Os special case of *mov<mode>_or.
642*38fd1498Szrj(define_peephole2
643*38fd1498Szrj  [(parallel [(set (match_operand:SWI 0 "register_operand")
644*38fd1498Szrj		   (match_operand:SWI 2 "constm1_operand"))
645*38fd1498Szrj	      (clobber (reg:CC FLAGS_REG))])
646*38fd1498Szrj   (parallel [(set (match_dup 0)
647*38fd1498Szrj		   (unspec_volatile:SWI
648*38fd1498Szrj		     [(match_operand:SWI 1 "memory_operand")
649*38fd1498Szrj		      (match_operand:SI 4 "const_int_operand")]
650*38fd1498Szrj		     UNSPECV_XCHG))
651*38fd1498Szrj	      (set (match_dup 1)
652*38fd1498Szrj		   (plus:SWI (match_dup 1)
653*38fd1498Szrj			     (match_dup 0)))
654*38fd1498Szrj	      (clobber (reg:CC FLAGS_REG))])
655*38fd1498Szrj   (set (reg:CCZ FLAGS_REG)
656*38fd1498Szrj	(compare:CCZ (match_dup 0)
657*38fd1498Szrj		     (match_operand:SWI 3 "const_int_operand")))]
658*38fd1498Szrj  "peep2_reg_dead_p (3, operands[0])
659*38fd1498Szrj   && (unsigned HOST_WIDE_INT) INTVAL (operands[2])
660*38fd1498Szrj      == -(unsigned HOST_WIDE_INT) INTVAL (operands[3])
661*38fd1498Szrj   && !reg_overlap_mentioned_p (operands[0], operands[1])"
662*38fd1498Szrj  [(parallel [(set (reg:CCZ FLAGS_REG)
663*38fd1498Szrj		   (compare:CCZ
664*38fd1498Szrj		     (unspec_volatile:SWI [(match_dup 1) (match_dup 4)]
665*38fd1498Szrj					  UNSPECV_XCHG)
666*38fd1498Szrj		     (match_dup 3)))
667*38fd1498Szrj	      (set (match_dup 1)
668*38fd1498Szrj		   (plus:SWI (match_dup 1)
669*38fd1498Szrj			     (match_dup 2)))])])
670*38fd1498Szrj
671*38fd1498Szrj(define_insn "*atomic_fetch_add_cmp<mode>"
672*38fd1498Szrj  [(set (reg:CCZ FLAGS_REG)
673*38fd1498Szrj	(compare:CCZ
674*38fd1498Szrj	  (unspec_volatile:SWI
675*38fd1498Szrj	    [(match_operand:SWI 0 "memory_operand" "+m")
676*38fd1498Szrj	     (match_operand:SI 3 "const_int_operand")]		;; model
677*38fd1498Szrj	    UNSPECV_XCHG)
678*38fd1498Szrj	  (match_operand:SWI 2 "const_int_operand" "i")))
679*38fd1498Szrj   (set (match_dup 0)
680*38fd1498Szrj	(plus:SWI (match_dup 0)
681*38fd1498Szrj		  (match_operand:SWI 1 "const_int_operand" "i")))]
682*38fd1498Szrj  "(unsigned HOST_WIDE_INT) INTVAL (operands[1])
683*38fd1498Szrj   == -(unsigned HOST_WIDE_INT) INTVAL (operands[2])"
684*38fd1498Szrj{
685*38fd1498Szrj  if (incdec_operand (operands[1], <MODE>mode))
686*38fd1498Szrj    {
687*38fd1498Szrj      if (operands[1] == const1_rtx)
688*38fd1498Szrj	return "lock{%;} %K3inc{<imodesuffix>}\t%0";
689*38fd1498Szrj      else
690*38fd1498Szrj	{
691*38fd1498Szrj	  gcc_assert (operands[1] == constm1_rtx);
692*38fd1498Szrj	  return "lock{%;} %K3dec{<imodesuffix>}\t%0";
693*38fd1498Szrj	}
694*38fd1498Szrj    }
695*38fd1498Szrj
696*38fd1498Szrj  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
697*38fd1498Szrj    return "lock{%;} %K3sub{<imodesuffix>}\t{%1, %0|%0, %1}";
698*38fd1498Szrj
699*38fd1498Szrj  return "lock{%;} %K3add{<imodesuffix>}\t{%1, %0|%0, %1}";
700*38fd1498Szrj})
701*38fd1498Szrj
702*38fd1498Szrj;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
703*38fd1498Szrj;; In addition, it is always a full barrier, so we can ignore the memory model.
704*38fd1498Szrj(define_insn "atomic_exchange<mode>"
705*38fd1498Szrj  [(set (match_operand:SWI 0 "register_operand" "=<r>")		;; output
706*38fd1498Szrj	(unspec_volatile:SWI
707*38fd1498Szrj	  [(match_operand:SWI 1 "memory_operand" "+m")		;; memory
708*38fd1498Szrj	   (match_operand:SI 3 "const_int_operand")]		;; model
709*38fd1498Szrj	  UNSPECV_XCHG))
710*38fd1498Szrj   (set (match_dup 1)
711*38fd1498Szrj	(match_operand:SWI 2 "register_operand" "0"))]		;; input
712*38fd1498Szrj  ""
713*38fd1498Szrj  "%K3xchg{<imodesuffix>}\t{%1, %0|%0, %1}")
714*38fd1498Szrj
715*38fd1498Szrj(define_insn "atomic_add<mode>"
716*38fd1498Szrj  [(set (match_operand:SWI 0 "memory_operand" "+m")
717*38fd1498Szrj	(unspec_volatile:SWI
718*38fd1498Szrj	  [(plus:SWI (match_dup 0)
719*38fd1498Szrj		     (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
720*38fd1498Szrj	   (match_operand:SI 2 "const_int_operand")]		;; model
721*38fd1498Szrj	  UNSPECV_LOCK))
722*38fd1498Szrj   (clobber (reg:CC FLAGS_REG))]
723*38fd1498Szrj  ""
724*38fd1498Szrj{
725*38fd1498Szrj  if (incdec_operand (operands[1], <MODE>mode))
726*38fd1498Szrj    {
727*38fd1498Szrj      if (operands[1] == const1_rtx)
728*38fd1498Szrj	return "lock{%;} %K2inc{<imodesuffix>}\t%0";
729*38fd1498Szrj      else
730*38fd1498Szrj	{
731*38fd1498Szrj	  gcc_assert (operands[1] == constm1_rtx);
732*38fd1498Szrj	  return "lock{%;} %K2dec{<imodesuffix>}\t%0";
733*38fd1498Szrj	}
734*38fd1498Szrj    }
735*38fd1498Szrj
736*38fd1498Szrj  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
737*38fd1498Szrj    return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
738*38fd1498Szrj
739*38fd1498Szrj  return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
740*38fd1498Szrj})
741*38fd1498Szrj
742*38fd1498Szrj(define_insn "atomic_sub<mode>"
743*38fd1498Szrj  [(set (match_operand:SWI 0 "memory_operand" "+m")
744*38fd1498Szrj	(unspec_volatile:SWI
745*38fd1498Szrj	  [(minus:SWI (match_dup 0)
746*38fd1498Szrj		      (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
747*38fd1498Szrj	   (match_operand:SI 2 "const_int_operand")]		;; model
748*38fd1498Szrj	  UNSPECV_LOCK))
749*38fd1498Szrj   (clobber (reg:CC FLAGS_REG))]
750*38fd1498Szrj  ""
751*38fd1498Szrj{
752*38fd1498Szrj  if (incdec_operand (operands[1], <MODE>mode))
753*38fd1498Szrj    {
754*38fd1498Szrj      if (operands[1] == const1_rtx)
755*38fd1498Szrj	return "lock{%;} %K2dec{<imodesuffix>}\t%0";
756*38fd1498Szrj      else
757*38fd1498Szrj	{
758*38fd1498Szrj	  gcc_assert (operands[1] == constm1_rtx);
759*38fd1498Szrj	  return "lock{%;} %K2inc{<imodesuffix>}\t%0";
760*38fd1498Szrj	}
761*38fd1498Szrj    }
762*38fd1498Szrj
763*38fd1498Szrj  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
764*38fd1498Szrj    return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
765*38fd1498Szrj
766*38fd1498Szrj  return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
767*38fd1498Szrj})
768*38fd1498Szrj
769*38fd1498Szrj(define_insn "atomic_<logic><mode>"
770*38fd1498Szrj  [(set (match_operand:SWI 0 "memory_operand" "+m")
771*38fd1498Szrj	(unspec_volatile:SWI
772*38fd1498Szrj	  [(any_logic:SWI (match_dup 0)
773*38fd1498Szrj			  (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
774*38fd1498Szrj	   (match_operand:SI 2 "const_int_operand")]		;; model
775*38fd1498Szrj	  UNSPECV_LOCK))
776*38fd1498Szrj   (clobber (reg:CC FLAGS_REG))]
777*38fd1498Szrj  ""
778*38fd1498Szrj  "lock{%;} %K2<logic>{<imodesuffix>}\t{%1, %0|%0, %1}")
779*38fd1498Szrj
780*38fd1498Szrj(define_expand "atomic_bit_test_and_set<mode>"
781*38fd1498Szrj  [(match_operand:SWI248 0 "register_operand")
782*38fd1498Szrj   (match_operand:SWI248 1 "memory_operand")
783*38fd1498Szrj   (match_operand:SWI248 2 "nonmemory_operand")
784*38fd1498Szrj   (match_operand:SI 3 "const_int_operand") ;; model
785*38fd1498Szrj   (match_operand:SI 4 "const_int_operand")]
786*38fd1498Szrj  ""
787*38fd1498Szrj{
788*38fd1498Szrj  emit_insn (gen_atomic_bit_test_and_set<mode>_1 (operands[1], operands[2],
789*38fd1498Szrj						  operands[3]));
790*38fd1498Szrj  rtx tem = gen_reg_rtx (QImode);
791*38fd1498Szrj  ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
792*38fd1498Szrj  rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
793*38fd1498Szrj  if (operands[4] == const0_rtx)
794*38fd1498Szrj    result = expand_simple_binop (<MODE>mode, ASHIFT, result,
795*38fd1498Szrj				  operands[2], operands[0], 0, OPTAB_DIRECT);
796*38fd1498Szrj  if (result != operands[0])
797*38fd1498Szrj    emit_move_insn (operands[0], result);
798*38fd1498Szrj  DONE;
799*38fd1498Szrj})
800*38fd1498Szrj
801*38fd1498Szrj(define_insn "atomic_bit_test_and_set<mode>_1"
802*38fd1498Szrj  [(set (reg:CCC FLAGS_REG)
803*38fd1498Szrj	(compare:CCC
804*38fd1498Szrj	  (unspec_volatile:SWI248
805*38fd1498Szrj	    [(match_operand:SWI248 0 "memory_operand" "+m")
806*38fd1498Szrj	     (match_operand:SI 2 "const_int_operand")]		;; model
807*38fd1498Szrj	    UNSPECV_XCHG)
808*38fd1498Szrj	  (const_int 0)))
809*38fd1498Szrj   (set (zero_extract:SWI248 (match_dup 0)
810*38fd1498Szrj			     (const_int 1)
811*38fd1498Szrj			     (match_operand:SWI248 1 "nonmemory_operand" "rN"))
812*38fd1498Szrj	(const_int 1))]
813*38fd1498Szrj  ""
814*38fd1498Szrj  "lock{%;} %K2bts{<imodesuffix>}\t{%1, %0|%0, %1}")
815*38fd1498Szrj
816*38fd1498Szrj(define_expand "atomic_bit_test_and_complement<mode>"
817*38fd1498Szrj  [(match_operand:SWI248 0 "register_operand")
818*38fd1498Szrj   (match_operand:SWI248 1 "memory_operand")
819*38fd1498Szrj   (match_operand:SWI248 2 "nonmemory_operand")
820*38fd1498Szrj   (match_operand:SI 3 "const_int_operand") ;; model
821*38fd1498Szrj   (match_operand:SI 4 "const_int_operand")]
822*38fd1498Szrj  ""
823*38fd1498Szrj{
824*38fd1498Szrj  emit_insn (gen_atomic_bit_test_and_complement<mode>_1 (operands[1],
825*38fd1498Szrj							 operands[2],
826*38fd1498Szrj							 operands[3]));
827*38fd1498Szrj  rtx tem = gen_reg_rtx (QImode);
828*38fd1498Szrj  ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
829*38fd1498Szrj  rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
830*38fd1498Szrj  if (operands[4] == const0_rtx)
831*38fd1498Szrj    result = expand_simple_binop (<MODE>mode, ASHIFT, result,
832*38fd1498Szrj				  operands[2], operands[0], 0, OPTAB_DIRECT);
833*38fd1498Szrj  if (result != operands[0])
834*38fd1498Szrj    emit_move_insn (operands[0], result);
835*38fd1498Szrj  DONE;
836*38fd1498Szrj})
837*38fd1498Szrj
838*38fd1498Szrj(define_insn "atomic_bit_test_and_complement<mode>_1"
839*38fd1498Szrj  [(set (reg:CCC FLAGS_REG)
840*38fd1498Szrj	(compare:CCC
841*38fd1498Szrj	  (unspec_volatile:SWI248
842*38fd1498Szrj	    [(match_operand:SWI248 0 "memory_operand" "+m")
843*38fd1498Szrj	     (match_operand:SI 2 "const_int_operand")]		;; model
844*38fd1498Szrj	    UNSPECV_XCHG)
845*38fd1498Szrj	  (const_int 0)))
846*38fd1498Szrj   (set (zero_extract:SWI248 (match_dup 0)
847*38fd1498Szrj			     (const_int 1)
848*38fd1498Szrj			     (match_operand:SWI248 1 "nonmemory_operand" "rN"))
849*38fd1498Szrj	(not:SWI248 (zero_extract:SWI248 (match_dup 0)
850*38fd1498Szrj					 (const_int 1)
851*38fd1498Szrj					 (match_dup 1))))]
852*38fd1498Szrj  ""
853*38fd1498Szrj  "lock{%;} %K2btc{<imodesuffix>}\t{%1, %0|%0, %1}")
854*38fd1498Szrj
855*38fd1498Szrj(define_expand "atomic_bit_test_and_reset<mode>"
856*38fd1498Szrj  [(match_operand:SWI248 0 "register_operand")
857*38fd1498Szrj   (match_operand:SWI248 1 "memory_operand")
858*38fd1498Szrj   (match_operand:SWI248 2 "nonmemory_operand")
859*38fd1498Szrj   (match_operand:SI 3 "const_int_operand") ;; model
860*38fd1498Szrj   (match_operand:SI 4 "const_int_operand")]
861*38fd1498Szrj  ""
862*38fd1498Szrj{
863*38fd1498Szrj  emit_insn (gen_atomic_bit_test_and_reset<mode>_1 (operands[1], operands[2],
864*38fd1498Szrj						    operands[3]));
865*38fd1498Szrj  rtx tem = gen_reg_rtx (QImode);
866*38fd1498Szrj  ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
867*38fd1498Szrj  rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
868*38fd1498Szrj  if (operands[4] == const0_rtx)
869*38fd1498Szrj    result = expand_simple_binop (<MODE>mode, ASHIFT, result,
870*38fd1498Szrj				  operands[2], operands[0], 0, OPTAB_DIRECT);
871*38fd1498Szrj  if (result != operands[0])
872*38fd1498Szrj    emit_move_insn (operands[0], result);
873*38fd1498Szrj  DONE;
874*38fd1498Szrj})
875*38fd1498Szrj
876*38fd1498Szrj(define_insn "atomic_bit_test_and_reset<mode>_1"
877*38fd1498Szrj  [(set (reg:CCC FLAGS_REG)
878*38fd1498Szrj	(compare:CCC
879*38fd1498Szrj	  (unspec_volatile:SWI248
880*38fd1498Szrj	    [(match_operand:SWI248 0 "memory_operand" "+m")
881*38fd1498Szrj	     (match_operand:SI 2 "const_int_operand")]		;; model
882*38fd1498Szrj	    UNSPECV_XCHG)
883*38fd1498Szrj	  (const_int 0)))
884*38fd1498Szrj   (set (zero_extract:SWI248 (match_dup 0)
885*38fd1498Szrj			     (const_int 1)
886*38fd1498Szrj			     (match_operand:SWI248 1 "nonmemory_operand" "rN"))
887*38fd1498Szrj	(const_int 0))]
888*38fd1498Szrj  ""
889*38fd1498Szrj  "lock{%;} %K2btr{<imodesuffix>}\t{%1, %0|%0, %1}")
890