1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefixes=R600 %s 3 4define amdgpu_kernel void @xor_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) { 5; R600-LABEL: xor_v2i32: 6; R600: ; %bb.0: 7; R600-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[] 8; R600-NEXT: TEX 1 @6 9; R600-NEXT: ALU 3, @12, KC0[CB0:0-32], KC1[] 10; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 11; R600-NEXT: CF_END 12; R600-NEXT: PAD 13; R600-NEXT: Fetch clause starting at 6: 14; R600-NEXT: VTX_READ_64 T1.XY, T1.X, 0, #1 15; R600-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 16; R600-NEXT: ALU clause starting at 10: 17; R600-NEXT: MOV T0.X, KC0[2].Z, 18; R600-NEXT: MOV * T1.X, KC0[2].W, 19; R600-NEXT: ALU clause starting at 12: 20; R600-NEXT: XOR_INT * T0.Y, T0.Y, T1.Y, 21; R600-NEXT: XOR_INT T0.X, T0.X, T1.X, 22; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 23; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 24 %a = load <2 x i32>, ptr addrspace(1) %in0 25 %b = load <2 x i32>, ptr addrspace(1) %in1 26 %result = xor <2 x i32> %a, %b 27 store <2 x i32> %result, ptr addrspace(1) %out 28 ret void 29} 30 31define amdgpu_kernel void @xor_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) { 32; R600-LABEL: xor_v4i32: 33; R600: ; %bb.0: 34; R600-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[] 35; R600-NEXT: TEX 1 @6 36; R600-NEXT: ALU 5, @12, KC0[CB0:0-32], KC1[] 37; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 38; R600-NEXT: CF_END 39; R600-NEXT: PAD 40; R600-NEXT: Fetch clause starting at 6: 41; R600-NEXT: VTX_READ_128 T1.XYZW, T1.X, 0, #1 42; R600-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 43; R600-NEXT: ALU clause starting at 10: 44; R600-NEXT: MOV T0.X, KC0[2].Z, 45; R600-NEXT: MOV * T1.X, KC0[2].W, 46; R600-NEXT: ALU clause starting at 12: 47; R600-NEXT: XOR_INT * T0.W, T0.W, T1.W, 48; R600-NEXT: XOR_INT * T0.Z, T0.Z, T1.Z, 49; R600-NEXT: XOR_INT * T0.Y, T0.Y, T1.Y, 50; R600-NEXT: XOR_INT T0.X, T0.X, T1.X, 51; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 52; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 53 %a = load <4 x i32>, ptr addrspace(1) %in0 54 %b = load <4 x i32>, ptr addrspace(1) %in1 55 %result = xor <4 x i32> %a, %b 56 store <4 x i32> %result, ptr addrspace(1) %out 57 ret void 58} 59 60define amdgpu_kernel void @xor_i1(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) { 61; R600-LABEL: xor_i1: 62; R600: ; %bb.0: 63; R600-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 64; R600-NEXT: TEX 0 @8 65; R600-NEXT: ALU 0, @13, KC0[CB0:0-32], KC1[] 66; R600-NEXT: TEX 0 @10 67; R600-NEXT: ALU 5, @14, KC0[CB0:0-32], KC1[] 68; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 69; R600-NEXT: CF_END 70; R600-NEXT: PAD 71; R600-NEXT: Fetch clause starting at 8: 72; R600-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 73; R600-NEXT: Fetch clause starting at 10: 74; R600-NEXT: VTX_READ_32 T1.X, T1.X, 0, #1 75; R600-NEXT: ALU clause starting at 12: 76; R600-NEXT: MOV * T0.X, KC0[2].W, 77; R600-NEXT: ALU clause starting at 13: 78; R600-NEXT: MOV * T1.X, KC0[2].Z, 79; R600-NEXT: ALU clause starting at 14: 80; R600-NEXT: SETGE_DX10 T0.W, T0.X, 1.0, 81; R600-NEXT: SETGE_DX10 * T1.W, T1.X, 0.0, 82; R600-NEXT: XOR_INT * T0.W, PS, PV.W, 83; R600-NEXT: CNDE_INT T0.X, PV.W, T0.X, T1.X, 84; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 85; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 86 %a = load float, ptr addrspace(1) %in0 87 %b = load float, ptr addrspace(1) %in1 88 %acmp = fcmp oge float %a, 0.000000e+00 89 %bcmp = fcmp oge float %b, 1.000000e+00 90 %xor = xor i1 %acmp, %bcmp 91 %result = select i1 %xor, float %a, float %b 92 store float %result, ptr addrspace(1) %out 93 ret void 94} 95 96define amdgpu_kernel void @v_xor_i1(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) { 97; R600-LABEL: v_xor_i1: 98; R600: ; %bb.0: 99; R600-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 100; R600-NEXT: TEX 0 @8 101; R600-NEXT: ALU 0, @13, KC0[CB0:0-32], KC1[] 102; R600-NEXT: TEX 0 @10 103; R600-NEXT: ALU 12, @14, KC0[CB0:0-32], KC1[] 104; R600-NEXT: MEM_RAT MSKOR T0.XW, T1.X 105; R600-NEXT: CF_END 106; R600-NEXT: PAD 107; R600-NEXT: Fetch clause starting at 8: 108; R600-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 109; R600-NEXT: Fetch clause starting at 10: 110; R600-NEXT: VTX_READ_8 T1.X, T1.X, 0, #1 111; R600-NEXT: ALU clause starting at 12: 112; R600-NEXT: MOV * T0.X, KC0[2].Z, 113; R600-NEXT: ALU clause starting at 13: 114; R600-NEXT: MOV * T1.X, KC0[2].W, 115; R600-NEXT: ALU clause starting at 14: 116; R600-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 117; R600-NEXT: XOR_INT * T1.W, T0.X, T1.X, 118; R600-NEXT: 3(4.203895e-45), 0(0.000000e+00) 119; R600-NEXT: AND_INT T1.W, PS, 1, 120; R600-NEXT: LSHL * T0.W, PV.W, literal.x, 121; R600-NEXT: 3(4.203895e-45), 0(0.000000e+00) 122; R600-NEXT: LSHL T0.X, PV.W, PS, 123; R600-NEXT: LSHL * T0.W, literal.x, PS, 124; R600-NEXT: 255(3.573311e-43), 0(0.000000e+00) 125; R600-NEXT: MOV T0.Y, 0.0, 126; R600-NEXT: MOV * T0.Z, 0.0, 127; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 128; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 129 %a = load volatile i1, ptr addrspace(1) %in0 130 %b = load volatile i1, ptr addrspace(1) %in1 131 %xor = xor i1 %a, %b 132 store i1 %xor, ptr addrspace(1) %out 133 ret void 134} 135 136define amdgpu_kernel void @vector_xor_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) { 137; R600-LABEL: vector_xor_i32: 138; R600: ; %bb.0: 139; R600-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[] 140; R600-NEXT: TEX 1 @6 141; R600-NEXT: ALU 2, @12, KC0[CB0:0-32], KC1[] 142; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 143; R600-NEXT: CF_END 144; R600-NEXT: PAD 145; R600-NEXT: Fetch clause starting at 6: 146; R600-NEXT: VTX_READ_32 T1.X, T1.X, 0, #1 147; R600-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 148; R600-NEXT: ALU clause starting at 10: 149; R600-NEXT: MOV T0.X, KC0[2].Z, 150; R600-NEXT: MOV * T1.X, KC0[2].W, 151; R600-NEXT: ALU clause starting at 12: 152; R600-NEXT: XOR_INT T0.X, T0.X, T1.X, 153; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 154; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 155 %a = load i32, ptr addrspace(1) %in0 156 %b = load i32, ptr addrspace(1) %in1 157 %result = xor i32 %a, %b 158 store i32 %result, ptr addrspace(1) %out 159 ret void 160} 161 162define amdgpu_kernel void @scalar_xor_i32(ptr addrspace(1) %out, i32 %a, i32 %b) { 163; R600-LABEL: scalar_xor_i32: 164; R600: ; %bb.0: 165; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 166; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 167; R600-NEXT: CF_END 168; R600-NEXT: PAD 169; R600-NEXT: ALU clause starting at 4: 170; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x, 171; R600-NEXT: NOT_INT * T1.X, KC0[2].Z, 172; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 173 %result = xor i32 %a, -1 174 store i32 %result, ptr addrspace(1) %out 175 ret void 176} 177 178define amdgpu_kernel void @vector_not_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) { 179; R600-LABEL: vector_not_i32: 180; R600: ; %bb.0: 181; R600-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 182; R600-NEXT: TEX 0 @6 183; R600-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 184; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 185; R600-NEXT: CF_END 186; R600-NEXT: PAD 187; R600-NEXT: Fetch clause starting at 6: 188; R600-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 189; R600-NEXT: ALU clause starting at 8: 190; R600-NEXT: MOV * T0.X, KC0[2].Z, 191; R600-NEXT: ALU clause starting at 9: 192; R600-NEXT: NOT_INT T0.X, T0.X, 193; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 194; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 195 %a = load i32, ptr addrspace(1) %in0 196 %b = load i32, ptr addrspace(1) %in1 197 %result = xor i32 %a, -1 198 store i32 %result, ptr addrspace(1) %out 199 ret void 200} 201 202define amdgpu_kernel void @vector_xor_i64(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) { 203; R600-LABEL: vector_xor_i64: 204; R600: ; %bb.0: 205; R600-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[] 206; R600-NEXT: TEX 1 @6 207; R600-NEXT: ALU 3, @12, KC0[CB0:0-32], KC1[] 208; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 209; R600-NEXT: CF_END 210; R600-NEXT: PAD 211; R600-NEXT: Fetch clause starting at 6: 212; R600-NEXT: VTX_READ_64 T1.XY, T1.X, 0, #1 213; R600-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 214; R600-NEXT: ALU clause starting at 10: 215; R600-NEXT: MOV T0.X, KC0[2].Z, 216; R600-NEXT: MOV * T1.X, KC0[2].W, 217; R600-NEXT: ALU clause starting at 12: 218; R600-NEXT: XOR_INT * T0.Y, T0.Y, T1.Y, 219; R600-NEXT: XOR_INT T0.X, T0.X, T1.X, 220; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 221; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 222 %a = load i64, ptr addrspace(1) %in0 223 %b = load i64, ptr addrspace(1) %in1 224 %result = xor i64 %a, %b 225 store i64 %result, ptr addrspace(1) %out 226 ret void 227} 228 229define amdgpu_kernel void @scalar_xor_i64(ptr addrspace(1) %out, i64 %a, i64 %b) { 230; R600-LABEL: scalar_xor_i64: 231; R600: ; %bb.0: 232; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 233; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 234; R600-NEXT: CF_END 235; R600-NEXT: PAD 236; R600-NEXT: ALU clause starting at 4: 237; R600-NEXT: XOR_INT * T0.Y, KC0[3].X, KC0[3].Z, 238; R600-NEXT: XOR_INT * T0.X, KC0[2].W, KC0[3].Y, 239; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 240; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 241 %result = xor i64 %a, %b 242 store i64 %result, ptr addrspace(1) %out 243 ret void 244} 245 246define amdgpu_kernel void @scalar_not_i64(ptr addrspace(1) %out, i64 %a) { 247; R600-LABEL: scalar_not_i64: 248; R600: ; %bb.0: 249; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 250; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 251; R600-NEXT: CF_END 252; R600-NEXT: PAD 253; R600-NEXT: ALU clause starting at 4: 254; R600-NEXT: NOT_INT * T0.Y, KC0[3].X, 255; R600-NEXT: NOT_INT T0.X, KC0[2].W, 256; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 257; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 258 %result = xor i64 %a, -1 259 store i64 %result, ptr addrspace(1) %out 260 ret void 261} 262 263define amdgpu_kernel void @vector_not_i64(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) { 264; R600-LABEL: vector_not_i64: 265; R600: ; %bb.0: 266; R600-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 267; R600-NEXT: TEX 0 @6 268; R600-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[] 269; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 270; R600-NEXT: CF_END 271; R600-NEXT: PAD 272; R600-NEXT: Fetch clause starting at 6: 273; R600-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 274; R600-NEXT: ALU clause starting at 8: 275; R600-NEXT: MOV * T0.X, KC0[2].Z, 276; R600-NEXT: ALU clause starting at 9: 277; R600-NEXT: NOT_INT * T0.Y, T0.Y, 278; R600-NEXT: NOT_INT T0.X, T0.X, 279; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 280; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 281 %a = load i64, ptr addrspace(1) %in0 282 %b = load i64, ptr addrspace(1) %in1 283 %result = xor i64 %a, -1 284 store i64 %result, ptr addrspace(1) %out 285 ret void 286} 287 288define amdgpu_kernel void @xor_cf(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %a, i64 %b) { 289; R600-LABEL: xor_cf: 290; R600: ; %bb.0: ; %entry 291; R600-NEXT: ALU_PUSH_BEFORE 4, @14, KC0[CB0:0-32], KC1[] 292; R600-NEXT: JUMP @5 POP:1 293; R600-NEXT: ALU 0, @19, KC0[CB0:0-32], KC1[] 294; R600-NEXT: TEX 0 @12 295; R600-NEXT: ALU_POP_AFTER 1, @20, KC0[], KC1[] 296; R600-NEXT: ALU_PUSH_BEFORE 2, @22, KC0[CB0:0-32], KC1[] 297; R600-NEXT: JUMP @8 POP:1 298; R600-NEXT: ALU_POP_AFTER 5, @25, KC0[CB0:0-32], KC1[] 299; R600-NEXT: ALU 1, @31, KC0[], KC1[] 300; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 301; R600-NEXT: CF_END 302; R600-NEXT: PAD 303; R600-NEXT: Fetch clause starting at 12: 304; R600-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 305; R600-NEXT: ALU clause starting at 14: 306; R600-NEXT: OR_INT T0.W, KC0[2].W, KC0[3].X, 307; R600-NEXT: MOV * T1.W, literal.x, 308; R600-NEXT: 1(1.401298e-45), 0(0.000000e+00) 309; R600-NEXT: SETNE_INT * T0.W, PV.W, 0.0, 310; R600-NEXT: PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0, 311; R600-NEXT: ALU clause starting at 19: 312; R600-NEXT: MOV * T0.X, KC0[2].Z, 313; R600-NEXT: ALU clause starting at 20: 314; R600-NEXT: MOV * T1.W, literal.x, 315; R600-NEXT: 0(0.000000e+00), 0(0.000000e+00) 316; R600-NEXT: ALU clause starting at 22: 317; R600-NEXT: MOV T0.W, KC0[2].Y, 318; R600-NEXT: SETE_INT * T1.W, T1.W, 0.0, 319; R600-NEXT: PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PS, 0.0, 320; R600-NEXT: ALU clause starting at 25: 321; R600-NEXT: MOV T1.W, KC0[2].W, 322; R600-NEXT: MOV * T2.W, KC0[3].Y, 323; R600-NEXT: XOR_INT T0.X, PV.W, PS, 324; R600-NEXT: MOV T1.W, KC0[3].X, 325; R600-NEXT: MOV * T2.W, KC0[3].Z, 326; R600-NEXT: XOR_INT * T0.Y, PV.W, PS, 327; R600-NEXT: ALU clause starting at 31: 328; R600-NEXT: LSHR * T1.X, T0.W, literal.x, 329; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 330entry: 331 %0 = icmp eq i64 %a, 0 332 br i1 %0, label %if, label %else 333 334if: 335 %1 = xor i64 %a, %b 336 br label %endif 337 338else: 339 %2 = load i64, ptr addrspace(1) %in 340 br label %endif 341 342endif: 343 %3 = phi i64 [%1, %if], [%2, %else] 344 store i64 %3, ptr addrspace(1) %out 345 ret void 346} 347 348define amdgpu_kernel void @scalar_xor_literal_i64(ptr addrspace(1) %out, [8 x i32], i64 %a) { 349; R600-LABEL: scalar_xor_literal_i64: 350; R600: ; %bb.0: 351; R600-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[] 352; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 353; R600-NEXT: CF_END 354; R600-NEXT: PAD 355; R600-NEXT: ALU clause starting at 4: 356; R600-NEXT: XOR_INT * T0.Y, KC0[5].X, literal.x, 357; R600-NEXT: 992123(1.390260e-39), 0(0.000000e+00) 358; R600-NEXT: XOR_INT T0.X, KC0[4].W, literal.x, 359; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 360; R600-NEXT: 12345(1.729903e-41), 2(2.802597e-45) 361 %or = xor i64 %a, 4261135838621753 362 store i64 %or, ptr addrspace(1) %out 363 ret void 364} 365 366define amdgpu_kernel void @scalar_xor_literal_multi_use_i64(ptr addrspace(1) %out, [8 x i32], i64 %a, i64 %b) { 367; R600-LABEL: scalar_xor_literal_multi_use_i64: 368; R600: ; %bb.0: 369; R600-NEXT: ALU 12, @6, KC0[CB0:0-32], KC1[] 370; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XY, T4.X, 0 371; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T2.X, 0 372; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T2.X, 1 373; R600-NEXT: CF_END 374; R600-NEXT: PAD 375; R600-NEXT: ALU clause starting at 6: 376; R600-NEXT: ADDC_UINT * T0.W, KC0[5].Y, literal.x, 377; R600-NEXT: 12345(1.729903e-41), 0(0.000000e+00) 378; R600-NEXT: ADD_INT T0.X, KC0[5].Y, literal.x, 379; R600-NEXT: ADD_INT * T0.W, KC0[5].Z, PV.W, 380; R600-NEXT: 12345(1.729903e-41), 0(0.000000e+00) 381; R600-NEXT: ADD_INT T1.X, PV.W, literal.x, 382; R600-NEXT: MOV * T2.X, literal.y, 383; R600-NEXT: 992123(1.390260e-39), 0(0.000000e+00) 384; R600-NEXT: XOR_INT * T3.Y, KC0[5].X, literal.x, 385; R600-NEXT: 992123(1.390260e-39), 0(0.000000e+00) 386; R600-NEXT: XOR_INT T3.X, KC0[4].W, literal.x, 387; R600-NEXT: LSHR * T4.X, KC0[2].Y, literal.y, 388; R600-NEXT: 12345(1.729903e-41), 2(2.802597e-45) 389 %or = xor i64 %a, 4261135838621753 390 store i64 %or, ptr addrspace(1) %out 391 392 %foo = add i64 %b, 4261135838621753 393 store volatile i64 %foo, ptr addrspace(1) undef 394 ret void 395} 396 397define amdgpu_kernel void @scalar_xor_inline_imm_i64(ptr addrspace(1) %out, [8 x i32], i64 %a) { 398; R600-LABEL: scalar_xor_inline_imm_i64: 399; R600: ; %bb.0: 400; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 401; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 402; R600-NEXT: CF_END 403; R600-NEXT: PAD 404; R600-NEXT: ALU clause starting at 4: 405; R600-NEXT: MOV * T0.Y, KC0[5].X, 406; R600-NEXT: XOR_INT T0.X, KC0[4].W, literal.x, 407; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 408; R600-NEXT: 63(8.828180e-44), 2(2.802597e-45) 409 %or = xor i64 %a, 63 410 store i64 %or, ptr addrspace(1) %out 411 ret void 412} 413 414define amdgpu_kernel void @scalar_xor_neg_inline_imm_i64(ptr addrspace(1) %out, [8 x i32], i64 %a) { 415; R600-LABEL: scalar_xor_neg_inline_imm_i64: 416; R600: ; %bb.0: 417; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 418; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 419; R600-NEXT: CF_END 420; R600-NEXT: PAD 421; R600-NEXT: ALU clause starting at 4: 422; R600-NEXT: NOT_INT * T0.Y, KC0[5].X, 423; R600-NEXT: XOR_INT T0.X, KC0[4].W, literal.x, 424; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 425; R600-NEXT: -8(nan), 2(2.802597e-45) 426 %or = xor i64 %a, -8 427 store i64 %or, ptr addrspace(1) %out 428 ret void 429} 430 431define amdgpu_kernel void @vector_xor_i64_neg_inline_imm(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) { 432; R600-LABEL: vector_xor_i64_neg_inline_imm: 433; R600: ; %bb.0: 434; R600-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 435; R600-NEXT: TEX 0 @6 436; R600-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[] 437; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 438; R600-NEXT: CF_END 439; R600-NEXT: PAD 440; R600-NEXT: Fetch clause starting at 6: 441; R600-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 442; R600-NEXT: ALU clause starting at 8: 443; R600-NEXT: MOV * T0.X, KC0[2].Z, 444; R600-NEXT: ALU clause starting at 9: 445; R600-NEXT: NOT_INT * T0.Y, T0.Y, 446; R600-NEXT: XOR_INT T0.X, T0.X, literal.x, 447; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 448; R600-NEXT: -8(nan), 2(2.802597e-45) 449 %loada = load i64, ptr addrspace(1) %a, align 8 450 %or = xor i64 %loada, -8 451 store i64 %or, ptr addrspace(1) %out 452 ret void 453} 454 455define amdgpu_kernel void @vector_xor_literal_i64(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) { 456; R600-LABEL: vector_xor_literal_i64: 457; R600: ; %bb.0: 458; R600-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 459; R600-NEXT: TEX 0 @6 460; R600-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 461; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 462; R600-NEXT: CF_END 463; R600-NEXT: PAD 464; R600-NEXT: Fetch clause starting at 6: 465; R600-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 466; R600-NEXT: ALU clause starting at 8: 467; R600-NEXT: MOV * T0.X, KC0[2].Z, 468; R600-NEXT: ALU clause starting at 9: 469; R600-NEXT: XOR_INT * T0.Y, T0.Y, literal.x, 470; R600-NEXT: 5231(7.330192e-42), 0(0.000000e+00) 471; R600-NEXT: XOR_INT T0.X, T0.X, literal.x, 472; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 473; R600-NEXT: -545810305(-1.784115e+19), 2(2.802597e-45) 474 %loada = load i64, ptr addrspace(1) %a, align 8 475 %or = xor i64 %loada, 22470723082367 476 store i64 %or, ptr addrspace(1) %out 477 ret void 478} 479