1*38fd1498Szrj;; GCC machine description for i386 synchronization instructions. 2*38fd1498Szrj;; Copyright (C) 2005-2018 Free Software Foundation, Inc. 3*38fd1498Szrj;; 4*38fd1498Szrj;; This file is part of GCC. 5*38fd1498Szrj;; 6*38fd1498Szrj;; GCC is free software; you can redistribute it and/or modify 7*38fd1498Szrj;; it under the terms of the GNU General Public License as published by 8*38fd1498Szrj;; the Free Software Foundation; either version 3, or (at your option) 9*38fd1498Szrj;; any later version. 10*38fd1498Szrj;; 11*38fd1498Szrj;; GCC is distributed in the hope that it will be useful, 12*38fd1498Szrj;; but WITHOUT ANY WARRANTY; without even the implied warranty of 13*38fd1498Szrj;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14*38fd1498Szrj;; GNU General Public License for more details. 15*38fd1498Szrj;; 16*38fd1498Szrj;; You should have received a copy of the GNU General Public License 17*38fd1498Szrj;; along with GCC; see the file COPYING3. If not see 18*38fd1498Szrj;; <http://www.gnu.org/licenses/>. 19*38fd1498Szrj 20*38fd1498Szrj(define_c_enum "unspec" [ 21*38fd1498Szrj UNSPEC_LFENCE 22*38fd1498Szrj UNSPEC_SFENCE 23*38fd1498Szrj UNSPEC_MFENCE 24*38fd1498Szrj 25*38fd1498Szrj UNSPEC_FILD_ATOMIC 26*38fd1498Szrj UNSPEC_FIST_ATOMIC 27*38fd1498Szrj 28*38fd1498Szrj UNSPEC_LDX_ATOMIC 29*38fd1498Szrj UNSPEC_STX_ATOMIC 30*38fd1498Szrj 31*38fd1498Szrj ;; __atomic support 32*38fd1498Szrj UNSPEC_LDA 33*38fd1498Szrj UNSPEC_STA 34*38fd1498Szrj]) 35*38fd1498Szrj 36*38fd1498Szrj(define_c_enum "unspecv" [ 37*38fd1498Szrj UNSPECV_CMPXCHG 38*38fd1498Szrj UNSPECV_XCHG 39*38fd1498Szrj UNSPECV_LOCK 40*38fd1498Szrj]) 41*38fd1498Szrj 42*38fd1498Szrj(define_expand "sse2_lfence" 43*38fd1498Szrj [(set (match_dup 0) 44*38fd1498Szrj (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] 45*38fd1498Szrj "TARGET_SSE2" 46*38fd1498Szrj{ 47*38fd1498Szrj operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 48*38fd1498Szrj MEM_VOLATILE_P (operands[0]) = 1; 49*38fd1498Szrj}) 50*38fd1498Szrj 51*38fd1498Szrj(define_insn "*sse2_lfence" 52*38fd1498Szrj [(set (match_operand:BLK 0) 53*38fd1498Szrj (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] 54*38fd1498Szrj "TARGET_SSE2" 55*38fd1498Szrj "lfence" 56*38fd1498Szrj [(set_attr "type" "sse") 57*38fd1498Szrj (set_attr "length_address" "0") 58*38fd1498Szrj (set_attr "atom_sse_attr" "lfence") 59*38fd1498Szrj (set_attr "memory" "unknown")]) 60*38fd1498Szrj 61*38fd1498Szrj(define_expand "sse_sfence" 62*38fd1498Szrj [(set (match_dup 0) 63*38fd1498Szrj (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] 64*38fd1498Szrj "TARGET_SSE || TARGET_3DNOW_A" 65*38fd1498Szrj{ 66*38fd1498Szrj operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 67*38fd1498Szrj MEM_VOLATILE_P (operands[0]) = 1; 68*38fd1498Szrj}) 69*38fd1498Szrj 70*38fd1498Szrj(define_insn "*sse_sfence" 71*38fd1498Szrj [(set (match_operand:BLK 0) 72*38fd1498Szrj (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] 73*38fd1498Szrj "TARGET_SSE || TARGET_3DNOW_A" 74*38fd1498Szrj "sfence" 75*38fd1498Szrj [(set_attr "type" "sse") 76*38fd1498Szrj (set_attr "length_address" "0") 77*38fd1498Szrj (set_attr "atom_sse_attr" "fence") 78*38fd1498Szrj (set_attr "memory" "unknown")]) 79*38fd1498Szrj 80*38fd1498Szrj(define_expand "sse2_mfence" 81*38fd1498Szrj [(set (match_dup 0) 82*38fd1498Szrj (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] 83*38fd1498Szrj "TARGET_SSE2" 84*38fd1498Szrj{ 85*38fd1498Szrj operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 86*38fd1498Szrj MEM_VOLATILE_P (operands[0]) = 1; 87*38fd1498Szrj}) 88*38fd1498Szrj 89*38fd1498Szrj(define_insn "mfence_sse2" 90*38fd1498Szrj [(set (match_operand:BLK 0) 91*38fd1498Szrj (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] 92*38fd1498Szrj "TARGET_64BIT || TARGET_SSE2" 93*38fd1498Szrj "mfence" 94*38fd1498Szrj [(set_attr "type" "sse") 95*38fd1498Szrj (set_attr "length_address" "0") 96*38fd1498Szrj (set_attr "atom_sse_attr" "fence") 97*38fd1498Szrj (set_attr "memory" "unknown")]) 98*38fd1498Szrj 99*38fd1498Szrj(define_insn "mfence_nosse" 100*38fd1498Szrj [(set (match_operand:BLK 0) 101*38fd1498Szrj (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE)) 102*38fd1498Szrj (clobber (reg:CC FLAGS_REG))] 103*38fd1498Szrj "!(TARGET_64BIT || TARGET_SSE2)" 104*38fd1498Szrj "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}" 105*38fd1498Szrj [(set_attr "memory" "unknown")]) 106*38fd1498Szrj 107*38fd1498Szrj(define_expand "mem_thread_fence" 108*38fd1498Szrj [(match_operand:SI 0 "const_int_operand")] ;; model 109*38fd1498Szrj "" 110*38fd1498Szrj{ 111*38fd1498Szrj enum memmodel model = memmodel_from_int (INTVAL (operands[0])); 112*38fd1498Szrj 113*38fd1498Szrj /* Unless this is a SEQ_CST fence, the i386 memory model is strong 114*38fd1498Szrj enough not to require barriers of any kind. */ 115*38fd1498Szrj if (is_mm_seq_cst (model)) 116*38fd1498Szrj { 117*38fd1498Szrj rtx (*mfence_insn)(rtx); 118*38fd1498Szrj rtx mem; 119*38fd1498Szrj 120*38fd1498Szrj if (TARGET_64BIT || TARGET_SSE2) 121*38fd1498Szrj mfence_insn = gen_mfence_sse2; 122*38fd1498Szrj else 123*38fd1498Szrj mfence_insn = gen_mfence_nosse; 124*38fd1498Szrj 125*38fd1498Szrj mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 126*38fd1498Szrj MEM_VOLATILE_P (mem) = 1; 127*38fd1498Szrj 128*38fd1498Szrj emit_insn (mfence_insn (mem)); 129*38fd1498Szrj } 130*38fd1498Szrj DONE; 131*38fd1498Szrj}) 132*38fd1498Szrj 133*38fd1498Szrj;; ??? From volume 3 section 8.1.1 Guaranteed Atomic Operations, 134*38fd1498Szrj;; Only beginning at Pentium family processors do we get any guarantee of 135*38fd1498Szrj;; atomicity in aligned 64-bit quantities. Beginning at P6, we get a 136*38fd1498Szrj;; guarantee for 64-bit accesses that do not cross a cacheline boundary. 137*38fd1498Szrj;; 138*38fd1498Szrj;; Note that the TARGET_CMPXCHG8B test below is a stand-in for "Pentium". 139*38fd1498Szrj;; 140*38fd1498Szrj;; Importantly, *no* processor makes atomicity guarantees for larger 141*38fd1498Szrj;; accesses. In particular, there's no way to perform an atomic TImode 142*38fd1498Szrj;; move, despite the apparent applicability of MOVDQA et al. 143*38fd1498Szrj 144*38fd1498Szrj(define_mode_iterator ATOMIC 145*38fd1498Szrj [QI HI SI 146*38fd1498Szrj (DI "TARGET_64BIT || (TARGET_CMPXCHG8B && (TARGET_80387 || TARGET_SSE))") 147*38fd1498Szrj ]) 148*38fd1498Szrj 149*38fd1498Szrj(define_expand "atomic_load<mode>" 150*38fd1498Szrj [(set (match_operand:ATOMIC 0 "nonimmediate_operand") 151*38fd1498Szrj (unspec:ATOMIC [(match_operand:ATOMIC 1 "memory_operand") 152*38fd1498Szrj (match_operand:SI 2 "const_int_operand")] 153*38fd1498Szrj UNSPEC_LDA))] 154*38fd1498Szrj "" 155*38fd1498Szrj{ 156*38fd1498Szrj /* For DImode on 32-bit, we can use the FPU to perform the load. */ 157*38fd1498Szrj if (<MODE>mode == DImode && !TARGET_64BIT) 158*38fd1498Szrj emit_insn (gen_atomic_loaddi_fpu 159*38fd1498Szrj (operands[0], operands[1], 160*38fd1498Szrj assign_386_stack_local (DImode, SLOT_TEMP))); 161*38fd1498Szrj else 162*38fd1498Szrj { 163*38fd1498Szrj rtx dst = operands[0]; 164*38fd1498Szrj 165*38fd1498Szrj if (MEM_P (dst)) 166*38fd1498Szrj dst = gen_reg_rtx (<MODE>mode); 167*38fd1498Szrj 168*38fd1498Szrj emit_move_insn (dst, operands[1]); 169*38fd1498Szrj 170*38fd1498Szrj /* Fix up the destination if needed. */ 171*38fd1498Szrj if (dst != operands[0]) 172*38fd1498Szrj emit_move_insn (operands[0], dst); 173*38fd1498Szrj } 174*38fd1498Szrj DONE; 175*38fd1498Szrj}) 176*38fd1498Szrj 177*38fd1498Szrj(define_insn_and_split "atomic_loaddi_fpu" 178*38fd1498Szrj [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m,?r") 179*38fd1498Szrj (unspec:DI [(match_operand:DI 1 "memory_operand" "m,m,m")] 180*38fd1498Szrj UNSPEC_LDA)) 181*38fd1498Szrj (clobber (match_operand:DI 2 "memory_operand" "=X,X,m")) 182*38fd1498Szrj (clobber (match_scratch:DF 3 "=X,xf,xf"))] 183*38fd1498Szrj "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)" 184*38fd1498Szrj "#" 185*38fd1498Szrj "&& reload_completed" 186*38fd1498Szrj [(const_int 0)] 187*38fd1498Szrj{ 188*38fd1498Szrj rtx dst = operands[0], src = operands[1]; 189*38fd1498Szrj rtx mem = operands[2], tmp = operands[3]; 190*38fd1498Szrj 191*38fd1498Szrj if (SSE_REG_P (dst)) 192*38fd1498Szrj emit_move_insn (dst, src); 193*38fd1498Szrj else 194*38fd1498Szrj { 195*38fd1498Szrj if (MEM_P (dst)) 196*38fd1498Szrj mem = dst; 197*38fd1498Szrj 198*38fd1498Szrj if (STACK_REG_P (tmp)) 199*38fd1498Szrj { 200*38fd1498Szrj emit_insn (gen_loaddi_via_fpu (tmp, src)); 201*38fd1498Szrj emit_insn (gen_storedi_via_fpu (mem, tmp)); 202*38fd1498Szrj } 203*38fd1498Szrj else 204*38fd1498Szrj { 205*38fd1498Szrj emit_insn (gen_loaddi_via_sse (tmp, src)); 206*38fd1498Szrj emit_insn (gen_storedi_via_sse (mem, tmp)); 207*38fd1498Szrj } 208*38fd1498Szrj 209*38fd1498Szrj if (mem != dst) 210*38fd1498Szrj emit_move_insn (dst, mem); 211*38fd1498Szrj } 212*38fd1498Szrj DONE; 213*38fd1498Szrj}) 214*38fd1498Szrj 215*38fd1498Szrj(define_peephole2 216*38fd1498Szrj [(set (match_operand:DF 0 "fp_register_operand") 217*38fd1498Szrj (unspec:DF [(match_operand:DI 1 "memory_operand")] 218*38fd1498Szrj UNSPEC_FILD_ATOMIC)) 219*38fd1498Szrj (set (match_operand:DI 2 "memory_operand") 220*38fd1498Szrj (unspec:DI [(match_dup 0)] 221*38fd1498Szrj UNSPEC_FIST_ATOMIC)) 222*38fd1498Szrj (set (match_operand:DF 3 "any_fp_register_operand") 223*38fd1498Szrj (match_operand:DF 4 "memory_operand"))] 224*38fd1498Szrj "!TARGET_64BIT 225*38fd1498Szrj && peep2_reg_dead_p (2, operands[0]) 226*38fd1498Szrj && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))" 227*38fd1498Szrj [(set (match_dup 3) (match_dup 5))] 228*38fd1498Szrj "operands[5] = gen_lowpart (DFmode, operands[1]);") 229*38fd1498Szrj 230*38fd1498Szrj(define_peephole2 231*38fd1498Szrj [(set (match_operand:DF 0 "fp_register_operand") 232*38fd1498Szrj (unspec:DF [(match_operand:DI 1 "memory_operand")] 233*38fd1498Szrj UNSPEC_FILD_ATOMIC)) 234*38fd1498Szrj (set (match_operand:DI 2 "memory_operand") 235*38fd1498Szrj (unspec:DI [(match_dup 0)] 236*38fd1498Szrj UNSPEC_FIST_ATOMIC)) 237*38fd1498Szrj (set (mem:BLK (scratch:SI)) 238*38fd1498Szrj (unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE)) 239*38fd1498Szrj (set (match_operand:DF 3 "any_fp_register_operand") 240*38fd1498Szrj (match_operand:DF 4 "memory_operand"))] 241*38fd1498Szrj "!TARGET_64BIT 242*38fd1498Szrj && peep2_reg_dead_p (2, operands[0]) 243*38fd1498Szrj && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))" 244*38fd1498Szrj [(const_int 0)] 245*38fd1498Szrj{ 246*38fd1498Szrj emit_move_insn (operands[3], gen_lowpart (DFmode, operands[1])); 247*38fd1498Szrj emit_insn (gen_memory_blockage ()); 248*38fd1498Szrj DONE; 249*38fd1498Szrj}) 250*38fd1498Szrj 251*38fd1498Szrj(define_peephole2 252*38fd1498Szrj [(set (match_operand:DF 0 "sse_reg_operand") 253*38fd1498Szrj (unspec:DF [(match_operand:DI 1 "memory_operand")] 254*38fd1498Szrj UNSPEC_LDX_ATOMIC)) 255*38fd1498Szrj (set (match_operand:DI 2 "memory_operand") 256*38fd1498Szrj (unspec:DI [(match_dup 0)] 257*38fd1498Szrj UNSPEC_STX_ATOMIC)) 258*38fd1498Szrj (set (match_operand:DF 3 "any_fp_register_operand") 259*38fd1498Szrj (match_operand:DF 4 "memory_operand"))] 260*38fd1498Szrj "!TARGET_64BIT 261*38fd1498Szrj && peep2_reg_dead_p (2, operands[0]) 262*38fd1498Szrj && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))" 263*38fd1498Szrj [(set (match_dup 3) (match_dup 5))] 264*38fd1498Szrj "operands[5] = gen_lowpart (DFmode, operands[1]);") 265*38fd1498Szrj 266*38fd1498Szrj(define_peephole2 267*38fd1498Szrj [(set (match_operand:DF 0 "sse_reg_operand") 268*38fd1498Szrj (unspec:DF [(match_operand:DI 1 "memory_operand")] 269*38fd1498Szrj UNSPEC_LDX_ATOMIC)) 270*38fd1498Szrj (set (match_operand:DI 2 "memory_operand") 271*38fd1498Szrj (unspec:DI [(match_dup 0)] 272*38fd1498Szrj UNSPEC_STX_ATOMIC)) 273*38fd1498Szrj (set (mem:BLK (scratch:SI)) 274*38fd1498Szrj (unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE)) 275*38fd1498Szrj (set (match_operand:DF 3 "any_fp_register_operand") 276*38fd1498Szrj (match_operand:DF 4 "memory_operand"))] 277*38fd1498Szrj "!TARGET_64BIT 278*38fd1498Szrj && peep2_reg_dead_p (2, operands[0]) 279*38fd1498Szrj && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))" 280*38fd1498Szrj [(const_int 0)] 281*38fd1498Szrj{ 282*38fd1498Szrj emit_move_insn (operands[3], gen_lowpart (DFmode, operands[1])); 283*38fd1498Szrj emit_insn (gen_memory_blockage ()); 284*38fd1498Szrj DONE; 285*38fd1498Szrj}) 286*38fd1498Szrj 287*38fd1498Szrj(define_expand "atomic_store<mode>" 288*38fd1498Szrj [(set (match_operand:ATOMIC 0 "memory_operand") 289*38fd1498Szrj (unspec:ATOMIC [(match_operand:ATOMIC 1 "nonimmediate_operand") 290*38fd1498Szrj (match_operand:SI 2 "const_int_operand")] 291*38fd1498Szrj UNSPEC_STA))] 292*38fd1498Szrj "" 293*38fd1498Szrj{ 294*38fd1498Szrj enum memmodel model = memmodel_from_int (INTVAL (operands[2])); 295*38fd1498Szrj 296*38fd1498Szrj if (<MODE>mode == DImode && !TARGET_64BIT) 297*38fd1498Szrj { 298*38fd1498Szrj /* For DImode on 32-bit, we can use the FPU to perform the store. */ 299*38fd1498Szrj /* Note that while we could perform a cmpxchg8b loop, that turns 300*38fd1498Szrj out to be significantly larger than this plus a barrier. */ 301*38fd1498Szrj emit_insn (gen_atomic_storedi_fpu 302*38fd1498Szrj (operands[0], operands[1], 303*38fd1498Szrj assign_386_stack_local (DImode, SLOT_TEMP))); 304*38fd1498Szrj } 305*38fd1498Szrj else 306*38fd1498Szrj { 307*38fd1498Szrj operands[1] = force_reg (<MODE>mode, operands[1]); 308*38fd1498Szrj 309*38fd1498Szrj /* For seq-cst stores, when we lack MFENCE, use XCHG. */ 310*38fd1498Szrj if (is_mm_seq_cst (model) && !(TARGET_64BIT || TARGET_SSE2)) 311*38fd1498Szrj { 312*38fd1498Szrj emit_insn (gen_atomic_exchange<mode> (gen_reg_rtx (<MODE>mode), 313*38fd1498Szrj operands[0], operands[1], 314*38fd1498Szrj operands[2])); 315*38fd1498Szrj DONE; 316*38fd1498Szrj } 317*38fd1498Szrj 318*38fd1498Szrj /* Otherwise use a store. */ 319*38fd1498Szrj emit_insn (gen_atomic_store<mode>_1 (operands[0], operands[1], 320*38fd1498Szrj operands[2])); 321*38fd1498Szrj } 322*38fd1498Szrj /* ... followed by an MFENCE, if required. */ 323*38fd1498Szrj if (is_mm_seq_cst (model)) 324*38fd1498Szrj emit_insn (gen_mem_thread_fence (operands[2])); 325*38fd1498Szrj DONE; 326*38fd1498Szrj}) 327*38fd1498Szrj 328*38fd1498Szrj(define_insn "atomic_store<mode>_1" 329*38fd1498Szrj [(set (match_operand:SWI 0 "memory_operand" "=m") 330*38fd1498Szrj (unspec:SWI [(match_operand:SWI 1 "<nonmemory_operand>" "<r><i>") 331*38fd1498Szrj (match_operand:SI 2 "const_int_operand")] 332*38fd1498Szrj UNSPEC_STA))] 333*38fd1498Szrj "" 334*38fd1498Szrj "%K2mov{<imodesuffix>}\t{%1, %0|%0, %1}") 335*38fd1498Szrj 336*38fd1498Szrj(define_insn_and_split "atomic_storedi_fpu" 337*38fd1498Szrj [(set (match_operand:DI 0 "memory_operand" "=m,m,m") 338*38fd1498Szrj (unspec:DI [(match_operand:DI 1 "nonimmediate_operand" "x,m,?r")] 339*38fd1498Szrj UNSPEC_STA)) 340*38fd1498Szrj (clobber (match_operand:DI 2 "memory_operand" "=X,X,m")) 341*38fd1498Szrj (clobber (match_scratch:DF 3 "=X,xf,xf"))] 342*38fd1498Szrj "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)" 343*38fd1498Szrj "#" 344*38fd1498Szrj "&& reload_completed" 345*38fd1498Szrj [(const_int 0)] 346*38fd1498Szrj{ 347*38fd1498Szrj rtx dst = operands[0], src = operands[1]; 348*38fd1498Szrj rtx mem = operands[2], tmp = operands[3]; 349*38fd1498Szrj 350*38fd1498Szrj if (SSE_REG_P (src)) 351*38fd1498Szrj emit_move_insn (dst, src); 352*38fd1498Szrj else 353*38fd1498Szrj { 354*38fd1498Szrj if (REG_P (src)) 355*38fd1498Szrj { 356*38fd1498Szrj emit_move_insn (mem, src); 357*38fd1498Szrj src = mem; 358*38fd1498Szrj } 359*38fd1498Szrj 360*38fd1498Szrj if (STACK_REG_P (tmp)) 361*38fd1498Szrj { 362*38fd1498Szrj emit_insn (gen_loaddi_via_fpu (tmp, src)); 363*38fd1498Szrj emit_insn (gen_storedi_via_fpu (dst, tmp)); 364*38fd1498Szrj } 365*38fd1498Szrj else 366*38fd1498Szrj { 367*38fd1498Szrj emit_insn (gen_loaddi_via_sse (tmp, src)); 368*38fd1498Szrj emit_insn (gen_storedi_via_sse (dst, tmp)); 369*38fd1498Szrj } 370*38fd1498Szrj } 371*38fd1498Szrj DONE; 372*38fd1498Szrj}) 373*38fd1498Szrj 374*38fd1498Szrj(define_peephole2 375*38fd1498Szrj [(set (match_operand:DF 0 "memory_operand") 376*38fd1498Szrj (match_operand:DF 1 "any_fp_register_operand")) 377*38fd1498Szrj (set (match_operand:DF 2 "fp_register_operand") 378*38fd1498Szrj (unspec:DF [(match_operand:DI 3 "memory_operand")] 379*38fd1498Szrj UNSPEC_FILD_ATOMIC)) 380*38fd1498Szrj (set (match_operand:DI 4 "memory_operand") 381*38fd1498Szrj (unspec:DI [(match_dup 2)] 382*38fd1498Szrj UNSPEC_FIST_ATOMIC))] 383*38fd1498Szrj "!TARGET_64BIT 384*38fd1498Szrj && peep2_reg_dead_p (3, operands[2]) 385*38fd1498Szrj && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))" 386*38fd1498Szrj [(set (match_dup 5) (match_dup 1))] 387*38fd1498Szrj "operands[5] = gen_lowpart (DFmode, operands[4]);") 388*38fd1498Szrj 389*38fd1498Szrj(define_peephole2 390*38fd1498Szrj [(set (match_operand:DF 0 "memory_operand") 391*38fd1498Szrj (match_operand:DF 1 "any_fp_register_operand")) 392*38fd1498Szrj (set (mem:BLK (scratch:SI)) 393*38fd1498Szrj (unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE)) 394*38fd1498Szrj (set (match_operand:DF 2 "fp_register_operand") 395*38fd1498Szrj (unspec:DF [(match_operand:DI 3 "memory_operand")] 396*38fd1498Szrj UNSPEC_FILD_ATOMIC)) 397*38fd1498Szrj (set (match_operand:DI 4 "memory_operand") 398*38fd1498Szrj (unspec:DI [(match_dup 2)] 399*38fd1498Szrj UNSPEC_FIST_ATOMIC))] 400*38fd1498Szrj "!TARGET_64BIT 401*38fd1498Szrj && peep2_reg_dead_p (4, operands[2]) 402*38fd1498Szrj && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))" 403*38fd1498Szrj [(const_int 0)] 404*38fd1498Szrj{ 405*38fd1498Szrj emit_insn (gen_memory_blockage ()); 406*38fd1498Szrj emit_move_insn (gen_lowpart (DFmode, operands[4]), operands[1]); 407*38fd1498Szrj DONE; 408*38fd1498Szrj}) 409*38fd1498Szrj 410*38fd1498Szrj(define_peephole2 411*38fd1498Szrj [(set (match_operand:DF 0 "memory_operand") 412*38fd1498Szrj (match_operand:DF 1 "any_fp_register_operand")) 413*38fd1498Szrj (set (match_operand:DF 2 "sse_reg_operand") 414*38fd1498Szrj (unspec:DF [(match_operand:DI 3 "memory_operand")] 415*38fd1498Szrj UNSPEC_LDX_ATOMIC)) 416*38fd1498Szrj (set (match_operand:DI 4 "memory_operand") 417*38fd1498Szrj (unspec:DI [(match_dup 2)] 418*38fd1498Szrj UNSPEC_STX_ATOMIC))] 419*38fd1498Szrj "!TARGET_64BIT 420*38fd1498Szrj && peep2_reg_dead_p (3, operands[2]) 421*38fd1498Szrj && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))" 422*38fd1498Szrj [(set (match_dup 5) (match_dup 1))] 423*38fd1498Szrj "operands[5] = gen_lowpart (DFmode, operands[4]);") 424*38fd1498Szrj 425*38fd1498Szrj(define_peephole2 426*38fd1498Szrj [(set (match_operand:DF 0 "memory_operand") 427*38fd1498Szrj (match_operand:DF 1 "any_fp_register_operand")) 428*38fd1498Szrj (set (mem:BLK (scratch:SI)) 429*38fd1498Szrj (unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE)) 430*38fd1498Szrj (set (match_operand:DF 2 "sse_reg_operand") 431*38fd1498Szrj (unspec:DF [(match_operand:DI 3 "memory_operand")] 432*38fd1498Szrj UNSPEC_LDX_ATOMIC)) 433*38fd1498Szrj (set (match_operand:DI 4 "memory_operand") 434*38fd1498Szrj (unspec:DI [(match_dup 2)] 435*38fd1498Szrj UNSPEC_STX_ATOMIC))] 436*38fd1498Szrj "!TARGET_64BIT 437*38fd1498Szrj && peep2_reg_dead_p (4, operands[2]) 438*38fd1498Szrj && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))" 439*38fd1498Szrj [(const_int 0)] 440*38fd1498Szrj{ 441*38fd1498Szrj emit_insn (gen_memory_blockage ()); 442*38fd1498Szrj emit_move_insn (gen_lowpart (DFmode, operands[4]), operands[1]); 443*38fd1498Szrj DONE; 444*38fd1498Szrj}) 445*38fd1498Szrj 446*38fd1498Szrj;; ??? You'd think that we'd be able to perform this via FLOAT + FIX_TRUNC 447*38fd1498Szrj;; operations. But the fix_trunc patterns want way more setup than we want 448*38fd1498Szrj;; to provide. Note that the scratch is DFmode instead of XFmode in order 449*38fd1498Szrj;; to make it easy to allocate a scratch in either SSE or FP_REGs above. 450*38fd1498Szrj 451*38fd1498Szrj(define_insn "loaddi_via_fpu" 452*38fd1498Szrj [(set (match_operand:DF 0 "register_operand" "=f") 453*38fd1498Szrj (unspec:DF [(match_operand:DI 1 "memory_operand" "m")] 454*38fd1498Szrj UNSPEC_FILD_ATOMIC))] 455*38fd1498Szrj "TARGET_80387" 456*38fd1498Szrj "fild%Z1\t%1" 457*38fd1498Szrj [(set_attr "type" "fmov") 458*38fd1498Szrj (set_attr "mode" "DF") 459*38fd1498Szrj (set_attr "fp_int_src" "true")]) 460*38fd1498Szrj 461*38fd1498Szrj(define_insn "storedi_via_fpu" 462*38fd1498Szrj [(set (match_operand:DI 0 "memory_operand" "=m") 463*38fd1498Szrj (unspec:DI [(match_operand:DF 1 "register_operand" "f")] 464*38fd1498Szrj UNSPEC_FIST_ATOMIC))] 465*38fd1498Szrj "TARGET_80387" 466*38fd1498Szrj{ 467*38fd1498Szrj gcc_assert (find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != NULL_RTX); 468*38fd1498Szrj 469*38fd1498Szrj return "fistp%Z0\t%0"; 470*38fd1498Szrj} 471*38fd1498Szrj [(set_attr "type" "fmov") 472*38fd1498Szrj (set_attr "mode" "DI")]) 473*38fd1498Szrj 474*38fd1498Szrj(define_insn "loaddi_via_sse" 475*38fd1498Szrj [(set (match_operand:DF 0 "register_operand" "=x") 476*38fd1498Szrj (unspec:DF [(match_operand:DI 1 "memory_operand" "m")] 477*38fd1498Szrj UNSPEC_LDX_ATOMIC))] 478*38fd1498Szrj "TARGET_SSE" 479*38fd1498Szrj{ 480*38fd1498Szrj if (TARGET_SSE2) 481*38fd1498Szrj return "%vmovq\t{%1, %0|%0, %1}"; 482*38fd1498Szrj return "movlps\t{%1, %0|%0, %1}"; 483*38fd1498Szrj} 484*38fd1498Szrj [(set_attr "type" "ssemov") 485*38fd1498Szrj (set_attr "mode" "DI")]) 486*38fd1498Szrj 487*38fd1498Szrj(define_insn "storedi_via_sse" 488*38fd1498Szrj [(set (match_operand:DI 0 "memory_operand" "=m") 489*38fd1498Szrj (unspec:DI [(match_operand:DF 1 "register_operand" "x")] 490*38fd1498Szrj UNSPEC_STX_ATOMIC))] 491*38fd1498Szrj "TARGET_SSE" 492*38fd1498Szrj{ 493*38fd1498Szrj if (TARGET_SSE2) 494*38fd1498Szrj return "%vmovq\t{%1, %0|%0, %1}"; 495*38fd1498Szrj return "movlps\t{%1, %0|%0, %1}"; 496*38fd1498Szrj} 497*38fd1498Szrj [(set_attr "type" "ssemov") 498*38fd1498Szrj (set_attr "mode" "DI")]) 499*38fd1498Szrj 500*38fd1498Szrj(define_expand "atomic_compare_and_swap<mode>" 501*38fd1498Szrj [(match_operand:QI 0 "register_operand") ;; bool success output 502*38fd1498Szrj (match_operand:SWI124 1 "register_operand") ;; oldval output 503*38fd1498Szrj (match_operand:SWI124 2 "memory_operand") ;; memory 504*38fd1498Szrj (match_operand:SWI124 3 "register_operand") ;; expected input 505*38fd1498Szrj (match_operand:SWI124 4 "register_operand") ;; newval input 506*38fd1498Szrj (match_operand:SI 5 "const_int_operand") ;; is_weak 507*38fd1498Szrj (match_operand:SI 6 "const_int_operand") ;; success model 508*38fd1498Szrj (match_operand:SI 7 "const_int_operand")] ;; failure model 509*38fd1498Szrj "TARGET_CMPXCHG" 510*38fd1498Szrj{ 511*38fd1498Szrj emit_insn 512*38fd1498Szrj (gen_atomic_compare_and_swap<mode>_1 513*38fd1498Szrj (operands[1], operands[2], operands[3], operands[4], operands[6])); 514*38fd1498Szrj ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG), 515*38fd1498Szrj const0_rtx); 516*38fd1498Szrj DONE; 517*38fd1498Szrj}) 518*38fd1498Szrj 519*38fd1498Szrj(define_mode_iterator CASMODE 520*38fd1498Szrj [(DI "TARGET_64BIT || TARGET_CMPXCHG8B") 521*38fd1498Szrj (TI "TARGET_64BIT && TARGET_CMPXCHG16B")]) 522*38fd1498Szrj(define_mode_attr CASHMODE [(DI "SI") (TI "DI")]) 523*38fd1498Szrj 524*38fd1498Szrj(define_expand "atomic_compare_and_swap<mode>" 525*38fd1498Szrj [(match_operand:QI 0 "register_operand") ;; bool success output 526*38fd1498Szrj (match_operand:CASMODE 1 "register_operand") ;; oldval output 527*38fd1498Szrj (match_operand:CASMODE 2 "memory_operand") ;; memory 528*38fd1498Szrj (match_operand:CASMODE 3 "register_operand") ;; expected input 529*38fd1498Szrj (match_operand:CASMODE 4 "register_operand") ;; newval input 530*38fd1498Szrj (match_operand:SI 5 "const_int_operand") ;; is_weak 531*38fd1498Szrj (match_operand:SI 6 "const_int_operand") ;; success model 532*38fd1498Szrj (match_operand:SI 7 "const_int_operand")] ;; failure model 533*38fd1498Szrj "TARGET_CMPXCHG" 534*38fd1498Szrj{ 535*38fd1498Szrj if (<MODE>mode == DImode && TARGET_64BIT) 536*38fd1498Szrj { 537*38fd1498Szrj emit_insn 538*38fd1498Szrj (gen_atomic_compare_and_swapdi_1 539*38fd1498Szrj (operands[1], operands[2], operands[3], operands[4], operands[6])); 540*38fd1498Szrj } 541*38fd1498Szrj else 542*38fd1498Szrj { 543*38fd1498Szrj machine_mode hmode = <CASHMODE>mode; 544*38fd1498Szrj 545*38fd1498Szrj emit_insn 546*38fd1498Szrj (gen_atomic_compare_and_swap<mode>_doubleword 547*38fd1498Szrj (operands[1], operands[2], operands[3], 548*38fd1498Szrj gen_lowpart (hmode, operands[4]), gen_highpart (hmode, operands[4]), 549*38fd1498Szrj operands[6])); 550*38fd1498Szrj } 551*38fd1498Szrj 552*38fd1498Szrj ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG), 553*38fd1498Szrj const0_rtx); 554*38fd1498Szrj DONE; 555*38fd1498Szrj}) 556*38fd1498Szrj 557*38fd1498Szrj;; For double-word compare and swap, we are obliged to play tricks with 558*38fd1498Szrj;; the input newval (op3:op4) because the Intel register numbering does 559*38fd1498Szrj;; not match the gcc register numbering, so the pair must be CX:BX. 560*38fd1498Szrj 561*38fd1498Szrj(define_mode_attr doublemodesuffix [(SI "8") (DI "16")]) 562*38fd1498Szrj 563*38fd1498Szrj(define_insn "atomic_compare_and_swap<dwi>_doubleword" 564*38fd1498Szrj [(set (match_operand:<DWI> 0 "register_operand" "=A") 565*38fd1498Szrj (unspec_volatile:<DWI> 566*38fd1498Szrj [(match_operand:<DWI> 1 "memory_operand" "+m") 567*38fd1498Szrj (match_operand:<DWI> 2 "register_operand" "0") 568*38fd1498Szrj (match_operand:DWIH 3 "register_operand" "b") 569*38fd1498Szrj (match_operand:DWIH 4 "register_operand" "c") 570*38fd1498Szrj (match_operand:SI 5 "const_int_operand")] 571*38fd1498Szrj UNSPECV_CMPXCHG)) 572*38fd1498Szrj (set (match_dup 1) 573*38fd1498Szrj (unspec_volatile:<DWI> [(const_int 0)] UNSPECV_CMPXCHG)) 574*38fd1498Szrj (set (reg:CCZ FLAGS_REG) 575*38fd1498Szrj (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))] 576*38fd1498Szrj "TARGET_CMPXCHG<doublemodesuffix>B" 577*38fd1498Szrj "lock{%;} %K5cmpxchg<doublemodesuffix>b\t%1") 578*38fd1498Szrj 579*38fd1498Szrj(define_insn "atomic_compare_and_swap<mode>_1" 580*38fd1498Szrj [(set (match_operand:SWI 0 "register_operand" "=a") 581*38fd1498Szrj (unspec_volatile:SWI 582*38fd1498Szrj [(match_operand:SWI 1 "memory_operand" "+m") 583*38fd1498Szrj (match_operand:SWI 2 "register_operand" "0") 584*38fd1498Szrj (match_operand:SWI 3 "register_operand" "<r>") 585*38fd1498Szrj (match_operand:SI 4 "const_int_operand")] 586*38fd1498Szrj UNSPECV_CMPXCHG)) 587*38fd1498Szrj (set (match_dup 1) 588*38fd1498Szrj (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG)) 589*38fd1498Szrj (set (reg:CCZ FLAGS_REG) 590*38fd1498Szrj (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))] 591*38fd1498Szrj "TARGET_CMPXCHG" 592*38fd1498Szrj "lock{%;} %K4cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}") 593*38fd1498Szrj 594*38fd1498Szrj;; For operand 2 nonmemory_operand predicate is used instead of 595*38fd1498Szrj;; register_operand to allow combiner to better optimize atomic 596*38fd1498Szrj;; additions of constants. 597*38fd1498Szrj(define_insn "atomic_fetch_add<mode>" 598*38fd1498Szrj [(set (match_operand:SWI 0 "register_operand" "=<r>") 599*38fd1498Szrj (unspec_volatile:SWI 600*38fd1498Szrj [(match_operand:SWI 1 "memory_operand" "+m") 601*38fd1498Szrj (match_operand:SI 3 "const_int_operand")] ;; model 602*38fd1498Szrj UNSPECV_XCHG)) 603*38fd1498Szrj (set (match_dup 1) 604*38fd1498Szrj (plus:SWI (match_dup 1) 605*38fd1498Szrj (match_operand:SWI 2 "nonmemory_operand" "0"))) 606*38fd1498Szrj (clobber (reg:CC FLAGS_REG))] 607*38fd1498Szrj "TARGET_XADD" 608*38fd1498Szrj "lock{%;} %K3xadd{<imodesuffix>}\t{%0, %1|%1, %0}") 609*38fd1498Szrj 610*38fd1498Szrj;; This peephole2 and following insn optimize 611*38fd1498Szrj;; __sync_fetch_and_add (x, -N) == N into just lock {add,sub,inc,dec} 612*38fd1498Szrj;; followed by testing of flags instead of lock xadd and comparisons. 613*38fd1498Szrj(define_peephole2 614*38fd1498Szrj [(set (match_operand:SWI 0 "register_operand") 615*38fd1498Szrj (match_operand:SWI 2 "const_int_operand")) 616*38fd1498Szrj (parallel [(set (match_dup 0) 617*38fd1498Szrj (unspec_volatile:SWI 618*38fd1498Szrj [(match_operand:SWI 1 "memory_operand") 619*38fd1498Szrj (match_operand:SI 4 "const_int_operand")] 620*38fd1498Szrj UNSPECV_XCHG)) 621*38fd1498Szrj (set (match_dup 1) 622*38fd1498Szrj (plus:SWI (match_dup 1) 623*38fd1498Szrj (match_dup 0))) 624*38fd1498Szrj (clobber (reg:CC FLAGS_REG))]) 625*38fd1498Szrj (set (reg:CCZ FLAGS_REG) 626*38fd1498Szrj (compare:CCZ (match_dup 0) 627*38fd1498Szrj (match_operand:SWI 3 "const_int_operand")))] 628*38fd1498Szrj "peep2_reg_dead_p (3, operands[0]) 629*38fd1498Szrj && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) 630*38fd1498Szrj == -(unsigned HOST_WIDE_INT) INTVAL (operands[3]) 631*38fd1498Szrj && !reg_overlap_mentioned_p (operands[0], operands[1])" 632*38fd1498Szrj [(parallel [(set (reg:CCZ FLAGS_REG) 633*38fd1498Szrj (compare:CCZ 634*38fd1498Szrj (unspec_volatile:SWI [(match_dup 1) (match_dup 4)] 635*38fd1498Szrj UNSPECV_XCHG) 636*38fd1498Szrj (match_dup 3))) 637*38fd1498Szrj (set (match_dup 1) 638*38fd1498Szrj (plus:SWI (match_dup 1) 639*38fd1498Szrj (match_dup 2)))])]) 640*38fd1498Szrj 641*38fd1498Szrj;; Likewise, but for the -Os special case of *mov<mode>_or. 642*38fd1498Szrj(define_peephole2 643*38fd1498Szrj [(parallel [(set (match_operand:SWI 0 "register_operand") 644*38fd1498Szrj (match_operand:SWI 2 "constm1_operand")) 645*38fd1498Szrj (clobber (reg:CC FLAGS_REG))]) 646*38fd1498Szrj (parallel [(set (match_dup 0) 647*38fd1498Szrj (unspec_volatile:SWI 648*38fd1498Szrj [(match_operand:SWI 1 "memory_operand") 649*38fd1498Szrj (match_operand:SI 4 "const_int_operand")] 650*38fd1498Szrj UNSPECV_XCHG)) 651*38fd1498Szrj (set (match_dup 1) 652*38fd1498Szrj (plus:SWI (match_dup 1) 653*38fd1498Szrj (match_dup 0))) 654*38fd1498Szrj (clobber (reg:CC FLAGS_REG))]) 655*38fd1498Szrj (set (reg:CCZ FLAGS_REG) 656*38fd1498Szrj (compare:CCZ (match_dup 0) 657*38fd1498Szrj (match_operand:SWI 3 "const_int_operand")))] 658*38fd1498Szrj "peep2_reg_dead_p (3, operands[0]) 659*38fd1498Szrj && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) 660*38fd1498Szrj == -(unsigned HOST_WIDE_INT) INTVAL (operands[3]) 661*38fd1498Szrj && !reg_overlap_mentioned_p (operands[0], operands[1])" 662*38fd1498Szrj [(parallel [(set (reg:CCZ FLAGS_REG) 663*38fd1498Szrj (compare:CCZ 664*38fd1498Szrj (unspec_volatile:SWI [(match_dup 1) (match_dup 4)] 665*38fd1498Szrj UNSPECV_XCHG) 666*38fd1498Szrj (match_dup 3))) 667*38fd1498Szrj (set (match_dup 1) 668*38fd1498Szrj (plus:SWI (match_dup 1) 669*38fd1498Szrj (match_dup 2)))])]) 670*38fd1498Szrj 671*38fd1498Szrj(define_insn "*atomic_fetch_add_cmp<mode>" 672*38fd1498Szrj [(set (reg:CCZ FLAGS_REG) 673*38fd1498Szrj (compare:CCZ 674*38fd1498Szrj (unspec_volatile:SWI 675*38fd1498Szrj [(match_operand:SWI 0 "memory_operand" "+m") 676*38fd1498Szrj (match_operand:SI 3 "const_int_operand")] ;; model 677*38fd1498Szrj UNSPECV_XCHG) 678*38fd1498Szrj (match_operand:SWI 2 "const_int_operand" "i"))) 679*38fd1498Szrj (set (match_dup 0) 680*38fd1498Szrj (plus:SWI (match_dup 0) 681*38fd1498Szrj (match_operand:SWI 1 "const_int_operand" "i")))] 682*38fd1498Szrj "(unsigned HOST_WIDE_INT) INTVAL (operands[1]) 683*38fd1498Szrj == -(unsigned HOST_WIDE_INT) INTVAL (operands[2])" 684*38fd1498Szrj{ 685*38fd1498Szrj if (incdec_operand (operands[1], <MODE>mode)) 686*38fd1498Szrj { 687*38fd1498Szrj if (operands[1] == const1_rtx) 688*38fd1498Szrj return "lock{%;} %K3inc{<imodesuffix>}\t%0"; 689*38fd1498Szrj else 690*38fd1498Szrj { 691*38fd1498Szrj gcc_assert (operands[1] == constm1_rtx); 692*38fd1498Szrj return "lock{%;} %K3dec{<imodesuffix>}\t%0"; 693*38fd1498Szrj } 694*38fd1498Szrj } 695*38fd1498Szrj 696*38fd1498Szrj if (x86_maybe_negate_const_int (&operands[1], <MODE>mode)) 697*38fd1498Szrj return "lock{%;} %K3sub{<imodesuffix>}\t{%1, %0|%0, %1}"; 698*38fd1498Szrj 699*38fd1498Szrj return "lock{%;} %K3add{<imodesuffix>}\t{%1, %0|%0, %1}"; 700*38fd1498Szrj}) 701*38fd1498Szrj 702*38fd1498Szrj;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space. 703*38fd1498Szrj;; In addition, it is always a full barrier, so we can ignore the memory model. 704*38fd1498Szrj(define_insn "atomic_exchange<mode>" 705*38fd1498Szrj [(set (match_operand:SWI 0 "register_operand" "=<r>") ;; output 706*38fd1498Szrj (unspec_volatile:SWI 707*38fd1498Szrj [(match_operand:SWI 1 "memory_operand" "+m") ;; memory 708*38fd1498Szrj (match_operand:SI 3 "const_int_operand")] ;; model 709*38fd1498Szrj UNSPECV_XCHG)) 710*38fd1498Szrj (set (match_dup 1) 711*38fd1498Szrj (match_operand:SWI 2 "register_operand" "0"))] ;; input 712*38fd1498Szrj "" 713*38fd1498Szrj "%K3xchg{<imodesuffix>}\t{%1, %0|%0, %1}") 714*38fd1498Szrj 715*38fd1498Szrj(define_insn "atomic_add<mode>" 716*38fd1498Szrj [(set (match_operand:SWI 0 "memory_operand" "+m") 717*38fd1498Szrj (unspec_volatile:SWI 718*38fd1498Szrj [(plus:SWI (match_dup 0) 719*38fd1498Szrj (match_operand:SWI 1 "nonmemory_operand" "<r><i>")) 720*38fd1498Szrj (match_operand:SI 2 "const_int_operand")] ;; model 721*38fd1498Szrj UNSPECV_LOCK)) 722*38fd1498Szrj (clobber (reg:CC FLAGS_REG))] 723*38fd1498Szrj "" 724*38fd1498Szrj{ 725*38fd1498Szrj if (incdec_operand (operands[1], <MODE>mode)) 726*38fd1498Szrj { 727*38fd1498Szrj if (operands[1] == const1_rtx) 728*38fd1498Szrj return "lock{%;} %K2inc{<imodesuffix>}\t%0"; 729*38fd1498Szrj else 730*38fd1498Szrj { 731*38fd1498Szrj gcc_assert (operands[1] == constm1_rtx); 732*38fd1498Szrj return "lock{%;} %K2dec{<imodesuffix>}\t%0"; 733*38fd1498Szrj } 734*38fd1498Szrj } 735*38fd1498Szrj 736*38fd1498Szrj if (x86_maybe_negate_const_int (&operands[1], <MODE>mode)) 737*38fd1498Szrj return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}"; 738*38fd1498Szrj 739*38fd1498Szrj return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}"; 740*38fd1498Szrj}) 741*38fd1498Szrj 742*38fd1498Szrj(define_insn "atomic_sub<mode>" 743*38fd1498Szrj [(set (match_operand:SWI 0 "memory_operand" "+m") 744*38fd1498Szrj (unspec_volatile:SWI 745*38fd1498Szrj [(minus:SWI (match_dup 0) 746*38fd1498Szrj (match_operand:SWI 1 "nonmemory_operand" "<r><i>")) 747*38fd1498Szrj (match_operand:SI 2 "const_int_operand")] ;; model 748*38fd1498Szrj UNSPECV_LOCK)) 749*38fd1498Szrj (clobber (reg:CC FLAGS_REG))] 750*38fd1498Szrj "" 751*38fd1498Szrj{ 752*38fd1498Szrj if (incdec_operand (operands[1], <MODE>mode)) 753*38fd1498Szrj { 754*38fd1498Szrj if (operands[1] == const1_rtx) 755*38fd1498Szrj return "lock{%;} %K2dec{<imodesuffix>}\t%0"; 756*38fd1498Szrj else 757*38fd1498Szrj { 758*38fd1498Szrj gcc_assert (operands[1] == constm1_rtx); 759*38fd1498Szrj return "lock{%;} %K2inc{<imodesuffix>}\t%0"; 760*38fd1498Szrj } 761*38fd1498Szrj } 762*38fd1498Szrj 763*38fd1498Szrj if (x86_maybe_negate_const_int (&operands[1], <MODE>mode)) 764*38fd1498Szrj return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}"; 765*38fd1498Szrj 766*38fd1498Szrj return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}"; 767*38fd1498Szrj}) 768*38fd1498Szrj 769*38fd1498Szrj(define_insn "atomic_<logic><mode>" 770*38fd1498Szrj [(set (match_operand:SWI 0 "memory_operand" "+m") 771*38fd1498Szrj (unspec_volatile:SWI 772*38fd1498Szrj [(any_logic:SWI (match_dup 0) 773*38fd1498Szrj (match_operand:SWI 1 "nonmemory_operand" "<r><i>")) 774*38fd1498Szrj (match_operand:SI 2 "const_int_operand")] ;; model 775*38fd1498Szrj UNSPECV_LOCK)) 776*38fd1498Szrj (clobber (reg:CC FLAGS_REG))] 777*38fd1498Szrj "" 778*38fd1498Szrj "lock{%;} %K2<logic>{<imodesuffix>}\t{%1, %0|%0, %1}") 779*38fd1498Szrj 780*38fd1498Szrj(define_expand "atomic_bit_test_and_set<mode>" 781*38fd1498Szrj [(match_operand:SWI248 0 "register_operand") 782*38fd1498Szrj (match_operand:SWI248 1 "memory_operand") 783*38fd1498Szrj (match_operand:SWI248 2 "nonmemory_operand") 784*38fd1498Szrj (match_operand:SI 3 "const_int_operand") ;; model 785*38fd1498Szrj (match_operand:SI 4 "const_int_operand")] 786*38fd1498Szrj "" 787*38fd1498Szrj{ 788*38fd1498Szrj emit_insn (gen_atomic_bit_test_and_set<mode>_1 (operands[1], operands[2], 789*38fd1498Szrj operands[3])); 790*38fd1498Szrj rtx tem = gen_reg_rtx (QImode); 791*38fd1498Szrj ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx); 792*38fd1498Szrj rtx result = convert_modes (<MODE>mode, QImode, tem, 1); 793*38fd1498Szrj if (operands[4] == const0_rtx) 794*38fd1498Szrj result = expand_simple_binop (<MODE>mode, ASHIFT, result, 795*38fd1498Szrj operands[2], operands[0], 0, OPTAB_DIRECT); 796*38fd1498Szrj if (result != operands[0]) 797*38fd1498Szrj emit_move_insn (operands[0], result); 798*38fd1498Szrj DONE; 799*38fd1498Szrj}) 800*38fd1498Szrj 801*38fd1498Szrj(define_insn "atomic_bit_test_and_set<mode>_1" 802*38fd1498Szrj [(set (reg:CCC FLAGS_REG) 803*38fd1498Szrj (compare:CCC 804*38fd1498Szrj (unspec_volatile:SWI248 805*38fd1498Szrj [(match_operand:SWI248 0 "memory_operand" "+m") 806*38fd1498Szrj (match_operand:SI 2 "const_int_operand")] ;; model 807*38fd1498Szrj UNSPECV_XCHG) 808*38fd1498Szrj (const_int 0))) 809*38fd1498Szrj (set (zero_extract:SWI248 (match_dup 0) 810*38fd1498Szrj (const_int 1) 811*38fd1498Szrj (match_operand:SWI248 1 "nonmemory_operand" "rN")) 812*38fd1498Szrj (const_int 1))] 813*38fd1498Szrj "" 814*38fd1498Szrj "lock{%;} %K2bts{<imodesuffix>}\t{%1, %0|%0, %1}") 815*38fd1498Szrj 816*38fd1498Szrj(define_expand "atomic_bit_test_and_complement<mode>" 817*38fd1498Szrj [(match_operand:SWI248 0 "register_operand") 818*38fd1498Szrj (match_operand:SWI248 1 "memory_operand") 819*38fd1498Szrj (match_operand:SWI248 2 "nonmemory_operand") 820*38fd1498Szrj (match_operand:SI 3 "const_int_operand") ;; model 821*38fd1498Szrj (match_operand:SI 4 "const_int_operand")] 822*38fd1498Szrj "" 823*38fd1498Szrj{ 824*38fd1498Szrj emit_insn (gen_atomic_bit_test_and_complement<mode>_1 (operands[1], 825*38fd1498Szrj operands[2], 826*38fd1498Szrj operands[3])); 827*38fd1498Szrj rtx tem = gen_reg_rtx (QImode); 828*38fd1498Szrj ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx); 829*38fd1498Szrj rtx result = convert_modes (<MODE>mode, QImode, tem, 1); 830*38fd1498Szrj if (operands[4] == const0_rtx) 831*38fd1498Szrj result = expand_simple_binop (<MODE>mode, ASHIFT, result, 832*38fd1498Szrj operands[2], operands[0], 0, OPTAB_DIRECT); 833*38fd1498Szrj if (result != operands[0]) 834*38fd1498Szrj emit_move_insn (operands[0], result); 835*38fd1498Szrj DONE; 836*38fd1498Szrj}) 837*38fd1498Szrj 838*38fd1498Szrj(define_insn "atomic_bit_test_and_complement<mode>_1" 839*38fd1498Szrj [(set (reg:CCC FLAGS_REG) 840*38fd1498Szrj (compare:CCC 841*38fd1498Szrj (unspec_volatile:SWI248 842*38fd1498Szrj [(match_operand:SWI248 0 "memory_operand" "+m") 843*38fd1498Szrj (match_operand:SI 2 "const_int_operand")] ;; model 844*38fd1498Szrj UNSPECV_XCHG) 845*38fd1498Szrj (const_int 0))) 846*38fd1498Szrj (set (zero_extract:SWI248 (match_dup 0) 847*38fd1498Szrj (const_int 1) 848*38fd1498Szrj (match_operand:SWI248 1 "nonmemory_operand" "rN")) 849*38fd1498Szrj (not:SWI248 (zero_extract:SWI248 (match_dup 0) 850*38fd1498Szrj (const_int 1) 851*38fd1498Szrj (match_dup 1))))] 852*38fd1498Szrj "" 853*38fd1498Szrj "lock{%;} %K2btc{<imodesuffix>}\t{%1, %0|%0, %1}") 854*38fd1498Szrj 855*38fd1498Szrj(define_expand "atomic_bit_test_and_reset<mode>" 856*38fd1498Szrj [(match_operand:SWI248 0 "register_operand") 857*38fd1498Szrj (match_operand:SWI248 1 "memory_operand") 858*38fd1498Szrj (match_operand:SWI248 2 "nonmemory_operand") 859*38fd1498Szrj (match_operand:SI 3 "const_int_operand") ;; model 860*38fd1498Szrj (match_operand:SI 4 "const_int_operand")] 861*38fd1498Szrj "" 862*38fd1498Szrj{ 863*38fd1498Szrj emit_insn (gen_atomic_bit_test_and_reset<mode>_1 (operands[1], operands[2], 864*38fd1498Szrj operands[3])); 865*38fd1498Szrj rtx tem = gen_reg_rtx (QImode); 866*38fd1498Szrj ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx); 867*38fd1498Szrj rtx result = convert_modes (<MODE>mode, QImode, tem, 1); 868*38fd1498Szrj if (operands[4] == const0_rtx) 869*38fd1498Szrj result = expand_simple_binop (<MODE>mode, ASHIFT, result, 870*38fd1498Szrj operands[2], operands[0], 0, OPTAB_DIRECT); 871*38fd1498Szrj if (result != operands[0]) 872*38fd1498Szrj emit_move_insn (operands[0], result); 873*38fd1498Szrj DONE; 874*38fd1498Szrj}) 875*38fd1498Szrj 876*38fd1498Szrj(define_insn "atomic_bit_test_and_reset<mode>_1" 877*38fd1498Szrj [(set (reg:CCC FLAGS_REG) 878*38fd1498Szrj (compare:CCC 879*38fd1498Szrj (unspec_volatile:SWI248 880*38fd1498Szrj [(match_operand:SWI248 0 "memory_operand" "+m") 881*38fd1498Szrj (match_operand:SI 2 "const_int_operand")] ;; model 882*38fd1498Szrj UNSPECV_XCHG) 883*38fd1498Szrj (const_int 0))) 884*38fd1498Szrj (set (zero_extract:SWI248 (match_dup 0) 885*38fd1498Szrj (const_int 1) 886*38fd1498Szrj (match_operand:SWI248 1 "nonmemory_operand" "rN")) 887*38fd1498Szrj (const_int 0))] 888*38fd1498Szrj "" 889*38fd1498Szrj "lock{%;} %K2btr{<imodesuffix>}\t{%1, %0|%0, %1}") 890