1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX7 %s 3; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s 4; RUN: llc -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s 5; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8-GISEL %s 6; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10-GISEL %s 7 8; BFI_INT Definition pattern from ISA docs 9; (y & x) | (z & ~x) 10; 11define amdgpu_kernel void @s_bfi_def_i32(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { 12; GFX7-LABEL: s_bfi_def_i32: 13; GFX7: ; %bb.0: ; %entry 14; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb 15; GFX7-NEXT: s_load_dword s6, s[0:1], 0xd 16; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 17; GFX7-NEXT: s_mov_b32 s3, 0xf000 18; GFX7-NEXT: s_mov_b32 s2, -1 19; GFX7-NEXT: s_waitcnt lgkmcnt(0) 20; GFX7-NEXT: s_andn2_b32 s6, s6, s4 21; GFX7-NEXT: s_and_b32 s4, s5, s4 22; GFX7-NEXT: s_or_b32 s4, s6, s4 23; GFX7-NEXT: v_mov_b32_e32 v0, s4 24; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 25; GFX7-NEXT: s_endpgm 26; 27; GFX8-LABEL: s_bfi_def_i32: 28; GFX8: ; %bb.0: ; %entry 29; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c 30; GFX8-NEXT: s_load_dword s4, s[0:1], 0x34 31; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 32; GFX8-NEXT: s_waitcnt lgkmcnt(0) 33; GFX8-NEXT: s_andn2_b32 s4, s4, s2 34; GFX8-NEXT: s_and_b32 s2, s3, s2 35; GFX8-NEXT: s_or_b32 s2, s4, s2 36; GFX8-NEXT: v_mov_b32_e32 v0, s0 37; GFX8-NEXT: v_mov_b32_e32 v1, s1 38; GFX8-NEXT: v_mov_b32_e32 v2, s2 39; GFX8-NEXT: flat_store_dword v[0:1], v2 40; GFX8-NEXT: s_endpgm 41; 42; GFX10-LABEL: s_bfi_def_i32: 43; GFX10: ; %bb.0: ; %entry 44; GFX10-NEXT: s_clause 0x2 45; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c 46; GFX10-NEXT: s_load_dword s4, s[0:1], 0x34 47; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 48; GFX10-NEXT: v_mov_b32_e32 v0, 0 49; GFX10-NEXT: s_waitcnt lgkmcnt(0) 50; GFX10-NEXT: s_andn2_b32 s4, s4, s2 51; GFX10-NEXT: s_and_b32 s2, s3, s2 52; GFX10-NEXT: s_or_b32 s2, s4, s2 53; GFX10-NEXT: v_mov_b32_e32 v1, s2 54; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 55; GFX10-NEXT: s_endpgm 56; 57; GFX8-GISEL-LABEL: s_bfi_def_i32: 58; GFX8-GISEL: ; %bb.0: ; %entry 59; GFX8-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c 60; GFX8-GISEL-NEXT: s_load_dword s4, s[0:1], 0x34 61; GFX8-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 62; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 63; GFX8-GISEL-NEXT: s_andn2_b32 s4, s4, s2 64; GFX8-GISEL-NEXT: s_and_b32 s2, s3, s2 65; GFX8-GISEL-NEXT: s_or_b32 s2, s4, s2 66; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0 67; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s2 68; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1 69; GFX8-GISEL-NEXT: flat_store_dword v[0:1], v2 70; GFX8-GISEL-NEXT: s_endpgm 71; 72; GFX10-GISEL-LABEL: s_bfi_def_i32: 73; GFX10-GISEL: ; %bb.0: ; %entry 74; GFX10-GISEL-NEXT: s_clause 0x2 75; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c 76; GFX10-GISEL-NEXT: s_load_dword s4, s[0:1], 0x34 77; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 78; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 79; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 80; GFX10-GISEL-NEXT: s_andn2_b32 s4, s4, s2 81; GFX10-GISEL-NEXT: s_and_b32 s2, s3, s2 82; GFX10-GISEL-NEXT: s_or_b32 s2, s4, s2 83; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s2 84; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] 85; GFX10-GISEL-NEXT: s_endpgm 86entry: 87 %0 = xor i32 %x, -1 88 %1 = and i32 %z, %0 89 %2 = and i32 %y, %x 90 %3 = or i32 %1, %2 91 store i32 %3, i32 addrspace(1)* %out 92 ret void 93} 94 95define i32 @v_bfi_def_i32(i32 %x, i32 %y, i32 %z) { 96; GFX7-LABEL: v_bfi_def_i32: 97; GFX7: ; %bb.0: ; %entry 98; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 99; GFX7-NEXT: v_bfi_b32 v0, v0, v1, v2 100; GFX7-NEXT: s_setpc_b64 s[30:31] 101; 102; GFX8-LABEL: v_bfi_def_i32: 103; GFX8: ; %bb.0: ; %entry 104; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 105; GFX8-NEXT: v_bfi_b32 v0, v0, v1, v2 106; GFX8-NEXT: s_setpc_b64 s[30:31] 107; 108; GFX10-LABEL: v_bfi_def_i32: 109; GFX10: ; %bb.0: ; %entry 110; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 111; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 112; GFX10-NEXT: v_bfi_b32 v0, v0, v1, v2 113; GFX10-NEXT: s_setpc_b64 s[30:31] 114; 115; GFX8-GISEL-LABEL: v_bfi_def_i32: 116; GFX8-GISEL: ; %bb.0: ; %entry 117; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 118; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v1, v2 119; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 120; 121; GFX10-GISEL-LABEL: v_bfi_def_i32: 122; GFX10-GISEL: ; %bb.0: ; %entry 123; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 124; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 125; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v1, v2 126; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 127entry: 128 %0 = xor i32 %x, -1 129 %1 = and i32 %z, %0 130 %2 = and i32 %y, %x 131 %3 = or i32 %1, %2 132 ret i32 %3 133} 134 135; SHA-256 Ch function 136; z ^ (x & (y ^ z)) 137define amdgpu_kernel void @s_bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { 138; GFX7-LABEL: s_bfi_sha256_ch: 139; GFX7: ; %bb.0: ; %entry 140; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb 141; GFX7-NEXT: s_load_dword s6, s[0:1], 0xd 142; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 143; GFX7-NEXT: s_mov_b32 s3, 0xf000 144; GFX7-NEXT: s_mov_b32 s2, -1 145; GFX7-NEXT: s_waitcnt lgkmcnt(0) 146; GFX7-NEXT: s_xor_b32 s5, s5, s6 147; GFX7-NEXT: s_and_b32 s4, s4, s5 148; GFX7-NEXT: s_xor_b32 s4, s6, s4 149; GFX7-NEXT: v_mov_b32_e32 v0, s4 150; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 151; GFX7-NEXT: s_endpgm 152; 153; GFX8-LABEL: s_bfi_sha256_ch: 154; GFX8: ; %bb.0: ; %entry 155; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c 156; GFX8-NEXT: s_load_dword s4, s[0:1], 0x34 157; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 158; GFX8-NEXT: s_waitcnt lgkmcnt(0) 159; GFX8-NEXT: s_xor_b32 s3, s3, s4 160; GFX8-NEXT: s_and_b32 s2, s2, s3 161; GFX8-NEXT: s_xor_b32 s2, s4, s2 162; GFX8-NEXT: v_mov_b32_e32 v0, s0 163; GFX8-NEXT: v_mov_b32_e32 v1, s1 164; GFX8-NEXT: v_mov_b32_e32 v2, s2 165; GFX8-NEXT: flat_store_dword v[0:1], v2 166; GFX8-NEXT: s_endpgm 167; 168; GFX10-LABEL: s_bfi_sha256_ch: 169; GFX10: ; %bb.0: ; %entry 170; GFX10-NEXT: s_clause 0x2 171; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c 172; GFX10-NEXT: s_load_dword s4, s[0:1], 0x34 173; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 174; GFX10-NEXT: v_mov_b32_e32 v0, 0 175; GFX10-NEXT: s_waitcnt lgkmcnt(0) 176; GFX10-NEXT: s_xor_b32 s3, s3, s4 177; GFX10-NEXT: s_and_b32 s2, s2, s3 178; GFX10-NEXT: s_xor_b32 s2, s4, s2 179; GFX10-NEXT: v_mov_b32_e32 v1, s2 180; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 181; GFX10-NEXT: s_endpgm 182; 183; GFX8-GISEL-LABEL: s_bfi_sha256_ch: 184; GFX8-GISEL: ; %bb.0: ; %entry 185; GFX8-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c 186; GFX8-GISEL-NEXT: s_load_dword s4, s[0:1], 0x34 187; GFX8-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 188; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 189; GFX8-GISEL-NEXT: s_xor_b32 s3, s3, s4 190; GFX8-GISEL-NEXT: s_and_b32 s2, s2, s3 191; GFX8-GISEL-NEXT: s_xor_b32 s2, s4, s2 192; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0 193; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s2 194; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1 195; GFX8-GISEL-NEXT: flat_store_dword v[0:1], v2 196; GFX8-GISEL-NEXT: s_endpgm 197; 198; GFX10-GISEL-LABEL: s_bfi_sha256_ch: 199; GFX10-GISEL: ; %bb.0: ; %entry 200; GFX10-GISEL-NEXT: s_clause 0x2 201; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c 202; GFX10-GISEL-NEXT: s_load_dword s4, s[0:1], 0x34 203; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 204; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 205; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 206; GFX10-GISEL-NEXT: s_xor_b32 s3, s3, s4 207; GFX10-GISEL-NEXT: s_and_b32 s2, s2, s3 208; GFX10-GISEL-NEXT: s_xor_b32 s2, s4, s2 209; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s2 210; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] 211; GFX10-GISEL-NEXT: s_endpgm 212entry: 213 %0 = xor i32 %y, %z 214 %1 = and i32 %x, %0 215 %2 = xor i32 %z, %1 216 store i32 %2, i32 addrspace(1)* %out 217 ret void 218} 219 220define i32 @v_bfi_sha256_ch(i32 %x, i32 %y, i32 %z) { 221; GFX7-LABEL: v_bfi_sha256_ch: 222; GFX7: ; %bb.0: ; %entry 223; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 224; GFX7-NEXT: v_bfi_b32 v0, v0, v1, v2 225; GFX7-NEXT: s_setpc_b64 s[30:31] 226; 227; GFX8-LABEL: v_bfi_sha256_ch: 228; GFX8: ; %bb.0: ; %entry 229; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 230; GFX8-NEXT: v_bfi_b32 v0, v0, v1, v2 231; GFX8-NEXT: s_setpc_b64 s[30:31] 232; 233; GFX10-LABEL: v_bfi_sha256_ch: 234; GFX10: ; %bb.0: ; %entry 235; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 236; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 237; GFX10-NEXT: v_bfi_b32 v0, v0, v1, v2 238; GFX10-NEXT: s_setpc_b64 s[30:31] 239; 240; GFX8-GISEL-LABEL: v_bfi_sha256_ch: 241; GFX8-GISEL: ; %bb.0: ; %entry 242; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 243; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v1, v2 244; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 245; 246; GFX10-GISEL-LABEL: v_bfi_sha256_ch: 247; GFX10-GISEL: ; %bb.0: ; %entry 248; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 249; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 250; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v1, v2 251; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 252entry: 253 %0 = xor i32 %y, %z 254 %1 = and i32 %x, %0 255 %2 = xor i32 %z, %1 256 ret i32 %2 257} 258 259define amdgpu_ps float @v_s_s_bfi_sha256_ch(i32 %x, i32 inreg %y, i32 inreg %z) { 260; GFX7-LABEL: v_s_s_bfi_sha256_ch: 261; GFX7: ; %bb.0: ; %entry 262; GFX7-NEXT: v_mov_b32_e32 v1, s1 263; GFX7-NEXT: v_bfi_b32 v0, v0, s0, v1 264; GFX7-NEXT: ; return to shader part epilog 265; 266; GFX8-LABEL: v_s_s_bfi_sha256_ch: 267; GFX8: ; %bb.0: ; %entry 268; GFX8-NEXT: v_mov_b32_e32 v1, s1 269; GFX8-NEXT: v_bfi_b32 v0, v0, s0, v1 270; GFX8-NEXT: ; return to shader part epilog 271; 272; GFX10-LABEL: v_s_s_bfi_sha256_ch: 273; GFX10: ; %bb.0: ; %entry 274; GFX10-NEXT: v_bfi_b32 v0, v0, s0, s1 275; GFX10-NEXT: ; return to shader part epilog 276; 277; GFX8-GISEL-LABEL: v_s_s_bfi_sha256_ch: 278; GFX8-GISEL: ; %bb.0: ; %entry 279; GFX8-GISEL-NEXT: s_xor_b32 s0, s0, s1 280; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 281; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, s1, v0 282; GFX8-GISEL-NEXT: ; return to shader part epilog 283; 284; GFX10-GISEL-LABEL: v_s_s_bfi_sha256_ch: 285; GFX10-GISEL: ; %bb.0: ; %entry 286; GFX10-GISEL-NEXT: s_xor_b32 s0, s0, s1 287; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 288; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, s1, v0 289; GFX10-GISEL-NEXT: ; return to shader part epilog 290entry: 291 %xor0 = xor i32 %y, %z 292 %and = and i32 %x, %xor0 293 %xor1 = xor i32 %z, %and 294 %cast = bitcast i32 %xor1 to float 295 ret float %cast 296} 297 298define amdgpu_ps float @s_v_s_bfi_sha256_ch(i32 inreg %x, i32 %y, i32 inreg %z) { 299; GFX7-LABEL: s_v_s_bfi_sha256_ch: 300; GFX7: ; %bb.0: ; %entry 301; GFX7-NEXT: v_mov_b32_e32 v1, s1 302; GFX7-NEXT: v_bfi_b32 v0, s0, v0, v1 303; GFX7-NEXT: ; return to shader part epilog 304; 305; GFX8-LABEL: s_v_s_bfi_sha256_ch: 306; GFX8: ; %bb.0: ; %entry 307; GFX8-NEXT: v_mov_b32_e32 v1, s1 308; GFX8-NEXT: v_bfi_b32 v0, s0, v0, v1 309; GFX8-NEXT: ; return to shader part epilog 310; 311; GFX10-LABEL: s_v_s_bfi_sha256_ch: 312; GFX10: ; %bb.0: ; %entry 313; GFX10-NEXT: v_bfi_b32 v0, s0, v0, s1 314; GFX10-NEXT: ; return to shader part epilog 315; 316; GFX8-GISEL-LABEL: s_v_s_bfi_sha256_ch: 317; GFX8-GISEL: ; %bb.0: ; %entry 318; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, s1, v0 319; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 320; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, s1, v0 321; GFX8-GISEL-NEXT: ; return to shader part epilog 322; 323; GFX10-GISEL-LABEL: s_v_s_bfi_sha256_ch: 324; GFX10-GISEL: ; %bb.0: ; %entry 325; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, s1, v0 326; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 327; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, s1, v0 328; GFX10-GISEL-NEXT: ; return to shader part epilog 329entry: 330 %xor0 = xor i32 %y, %z 331 %and = and i32 %x, %xor0 332 %xor1 = xor i32 %z, %and 333 %cast = bitcast i32 %xor1 to float 334 ret float %cast 335} 336 337define amdgpu_ps float @s_s_v_bfi_sha256_ch(i32 inreg %x, i32 inreg %y, i32 %z) { 338; GFX7-LABEL: s_s_v_bfi_sha256_ch: 339; GFX7: ; %bb.0: ; %entry 340; GFX7-NEXT: v_mov_b32_e32 v1, s1 341; GFX7-NEXT: v_bfi_b32 v0, s0, v1, v0 342; GFX7-NEXT: ; return to shader part epilog 343; 344; GFX8-LABEL: s_s_v_bfi_sha256_ch: 345; GFX8: ; %bb.0: ; %entry 346; GFX8-NEXT: v_mov_b32_e32 v1, s1 347; GFX8-NEXT: v_bfi_b32 v0, s0, v1, v0 348; GFX8-NEXT: ; return to shader part epilog 349; 350; GFX10-LABEL: s_s_v_bfi_sha256_ch: 351; GFX10: ; %bb.0: ; %entry 352; GFX10-NEXT: v_bfi_b32 v0, s0, s1, v0 353; GFX10-NEXT: ; return to shader part epilog 354; 355; GFX8-GISEL-LABEL: s_s_v_bfi_sha256_ch: 356; GFX8-GISEL: ; %bb.0: ; %entry 357; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s0 358; GFX8-GISEL-NEXT: v_bfi_b32 v0, v1, s1, v0 359; GFX8-GISEL-NEXT: ; return to shader part epilog 360; 361; GFX10-GISEL-LABEL: s_s_v_bfi_sha256_ch: 362; GFX10-GISEL: ; %bb.0: ; %entry 363; GFX10-GISEL-NEXT: v_bfi_b32 v0, s0, s1, v0 364; GFX10-GISEL-NEXT: ; return to shader part epilog 365entry: 366 %xor0 = xor i32 %y, %z 367 %and = and i32 %x, %xor0 368 %xor1 = xor i32 %z, %and 369 %cast = bitcast i32 %xor1 to float 370 ret float %cast 371} 372 373define amdgpu_ps float @s_v_v_bfi_sha256_ch(i32 inreg %x, i32 %y, i32 %z) { 374; GFX7-LABEL: s_v_v_bfi_sha256_ch: 375; GFX7: ; %bb.0: ; %entry 376; GFX7-NEXT: v_bfi_b32 v0, s0, v0, v1 377; GFX7-NEXT: ; return to shader part epilog 378; 379; GFX8-LABEL: s_v_v_bfi_sha256_ch: 380; GFX8: ; %bb.0: ; %entry 381; GFX8-NEXT: v_bfi_b32 v0, s0, v0, v1 382; GFX8-NEXT: ; return to shader part epilog 383; 384; GFX10-LABEL: s_v_v_bfi_sha256_ch: 385; GFX10: ; %bb.0: ; %entry 386; GFX10-NEXT: v_bfi_b32 v0, s0, v0, v1 387; GFX10-NEXT: ; return to shader part epilog 388; 389; GFX8-GISEL-LABEL: s_v_v_bfi_sha256_ch: 390; GFX8-GISEL: ; %bb.0: ; %entry 391; GFX8-GISEL-NEXT: v_bfi_b32 v0, s0, v0, v1 392; GFX8-GISEL-NEXT: ; return to shader part epilog 393; 394; GFX10-GISEL-LABEL: s_v_v_bfi_sha256_ch: 395; GFX10-GISEL: ; %bb.0: ; %entry 396; GFX10-GISEL-NEXT: v_bfi_b32 v0, s0, v0, v1 397; GFX10-GISEL-NEXT: ; return to shader part epilog 398entry: 399 %xor0 = xor i32 %y, %z 400 %and = and i32 %x, %xor0 401 %xor1 = xor i32 %z, %and 402 %cast = bitcast i32 %xor1 to float 403 ret float %cast 404} 405 406define amdgpu_ps float @v_s_v_bfi_sha256_ch(i32 %x, i32 inreg %y, i32 %z) { 407; GFX7-LABEL: v_s_v_bfi_sha256_ch: 408; GFX7: ; %bb.0: ; %entry 409; GFX7-NEXT: v_bfi_b32 v0, v0, s0, v1 410; GFX7-NEXT: ; return to shader part epilog 411; 412; GFX8-LABEL: v_s_v_bfi_sha256_ch: 413; GFX8: ; %bb.0: ; %entry 414; GFX8-NEXT: v_bfi_b32 v0, v0, s0, v1 415; GFX8-NEXT: ; return to shader part epilog 416; 417; GFX10-LABEL: v_s_v_bfi_sha256_ch: 418; GFX10: ; %bb.0: ; %entry 419; GFX10-NEXT: v_bfi_b32 v0, v0, s0, v1 420; GFX10-NEXT: ; return to shader part epilog 421; 422; GFX8-GISEL-LABEL: v_s_v_bfi_sha256_ch: 423; GFX8-GISEL: ; %bb.0: ; %entry 424; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, s0, v1 425; GFX8-GISEL-NEXT: ; return to shader part epilog 426; 427; GFX10-GISEL-LABEL: v_s_v_bfi_sha256_ch: 428; GFX10-GISEL: ; %bb.0: ; %entry 429; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, s0, v1 430; GFX10-GISEL-NEXT: ; return to shader part epilog 431entry: 432 %xor0 = xor i32 %y, %z 433 %and = and i32 %x, %xor0 434 %xor1 = xor i32 %z, %and 435 %cast = bitcast i32 %xor1 to float 436 ret float %cast 437} 438 439define amdgpu_ps float @v_v_s_bfi_sha256_ch(i32 %x, i32 %y, i32 inreg %z) { 440; GFX7-LABEL: v_v_s_bfi_sha256_ch: 441; GFX7: ; %bb.0: ; %entry 442; GFX7-NEXT: v_bfi_b32 v0, v0, v1, s0 443; GFX7-NEXT: ; return to shader part epilog 444; 445; GFX8-LABEL: v_v_s_bfi_sha256_ch: 446; GFX8: ; %bb.0: ; %entry 447; GFX8-NEXT: v_bfi_b32 v0, v0, v1, s0 448; GFX8-NEXT: ; return to shader part epilog 449; 450; GFX10-LABEL: v_v_s_bfi_sha256_ch: 451; GFX10: ; %bb.0: ; %entry 452; GFX10-NEXT: v_bfi_b32 v0, v0, v1, s0 453; GFX10-NEXT: ; return to shader part epilog 454; 455; GFX8-GISEL-LABEL: v_v_s_bfi_sha256_ch: 456; GFX8-GISEL: ; %bb.0: ; %entry 457; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, s0, v1 458; GFX8-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 459; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, s0, v0 460; GFX8-GISEL-NEXT: ; return to shader part epilog 461; 462; GFX10-GISEL-LABEL: v_v_s_bfi_sha256_ch: 463; GFX10-GISEL: ; %bb.0: ; %entry 464; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, s0, v1 465; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 466; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, s0, v0 467; GFX10-GISEL-NEXT: ; return to shader part epilog 468entry: 469 %xor0 = xor i32 %y, %z 470 %and = and i32 %x, %xor0 471 %xor1 = xor i32 %z, %and 472 %cast = bitcast i32 %xor1 to float 473 ret float %cast 474} 475 476; SHA-256 Ma function 477; ((x & z) | (y & (x | z))) 478define amdgpu_kernel void @s_bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { 479; GFX7-LABEL: s_bfi_sha256_ma: 480; GFX7: ; %bb.0: ; %entry 481; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb 482; GFX7-NEXT: s_load_dword s6, s[0:1], 0xd 483; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 484; GFX7-NEXT: s_mov_b32 s3, 0xf000 485; GFX7-NEXT: s_mov_b32 s2, -1 486; GFX7-NEXT: s_waitcnt lgkmcnt(0) 487; GFX7-NEXT: s_and_b32 s7, s4, s6 488; GFX7-NEXT: s_or_b32 s4, s4, s6 489; GFX7-NEXT: s_and_b32 s4, s5, s4 490; GFX7-NEXT: s_or_b32 s4, s7, s4 491; GFX7-NEXT: v_mov_b32_e32 v0, s4 492; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 493; GFX7-NEXT: s_endpgm 494; 495; GFX8-LABEL: s_bfi_sha256_ma: 496; GFX8: ; %bb.0: ; %entry 497; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c 498; GFX8-NEXT: s_load_dword s4, s[0:1], 0x34 499; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 500; GFX8-NEXT: s_waitcnt lgkmcnt(0) 501; GFX8-NEXT: s_and_b32 s5, s2, s4 502; GFX8-NEXT: s_or_b32 s2, s2, s4 503; GFX8-NEXT: s_and_b32 s2, s3, s2 504; GFX8-NEXT: s_or_b32 s2, s5, s2 505; GFX8-NEXT: v_mov_b32_e32 v0, s0 506; GFX8-NEXT: v_mov_b32_e32 v1, s1 507; GFX8-NEXT: v_mov_b32_e32 v2, s2 508; GFX8-NEXT: flat_store_dword v[0:1], v2 509; GFX8-NEXT: s_endpgm 510; 511; GFX10-LABEL: s_bfi_sha256_ma: 512; GFX10: ; %bb.0: ; %entry 513; GFX10-NEXT: s_clause 0x2 514; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c 515; GFX10-NEXT: s_load_dword s4, s[0:1], 0x34 516; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 517; GFX10-NEXT: v_mov_b32_e32 v0, 0 518; GFX10-NEXT: s_waitcnt lgkmcnt(0) 519; GFX10-NEXT: s_or_b32 s5, s2, s4 520; GFX10-NEXT: s_and_b32 s2, s2, s4 521; GFX10-NEXT: s_and_b32 s3, s3, s5 522; GFX10-NEXT: s_or_b32 s2, s2, s3 523; GFX10-NEXT: v_mov_b32_e32 v1, s2 524; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 525; GFX10-NEXT: s_endpgm 526; 527; GFX8-GISEL-LABEL: s_bfi_sha256_ma: 528; GFX8-GISEL: ; %bb.0: ; %entry 529; GFX8-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c 530; GFX8-GISEL-NEXT: s_load_dword s4, s[0:1], 0x34 531; GFX8-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 532; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 533; GFX8-GISEL-NEXT: s_and_b32 s5, s2, s4 534; GFX8-GISEL-NEXT: s_or_b32 s2, s2, s4 535; GFX8-GISEL-NEXT: s_and_b32 s2, s3, s2 536; GFX8-GISEL-NEXT: s_or_b32 s2, s5, s2 537; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0 538; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s2 539; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1 540; GFX8-GISEL-NEXT: flat_store_dword v[0:1], v2 541; GFX8-GISEL-NEXT: s_endpgm 542; 543; GFX10-GISEL-LABEL: s_bfi_sha256_ma: 544; GFX10-GISEL: ; %bb.0: ; %entry 545; GFX10-GISEL-NEXT: s_clause 0x2 546; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c 547; GFX10-GISEL-NEXT: s_load_dword s4, s[0:1], 0x34 548; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 549; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 550; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 551; GFX10-GISEL-NEXT: s_or_b32 s5, s2, s4 552; GFX10-GISEL-NEXT: s_and_b32 s2, s2, s4 553; GFX10-GISEL-NEXT: s_and_b32 s3, s3, s5 554; GFX10-GISEL-NEXT: s_or_b32 s2, s2, s3 555; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s2 556; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] 557; GFX10-GISEL-NEXT: s_endpgm 558entry: 559 %0 = and i32 %x, %z 560 %1 = or i32 %x, %z 561 %2 = and i32 %y, %1 562 %3 = or i32 %0, %2 563 store i32 %3, i32 addrspace(1)* %out 564 ret void 565} 566 567define i32 @v_bfi_sha256_ma(i32 %x, i32 %y, i32 %z) { 568; GFX7-LABEL: v_bfi_sha256_ma: 569; GFX7: ; %bb.0: ; %entry 570; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 571; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1 572; GFX7-NEXT: v_bfi_b32 v0, v0, v2, v1 573; GFX7-NEXT: s_setpc_b64 s[30:31] 574; 575; GFX8-LABEL: v_bfi_sha256_ma: 576; GFX8: ; %bb.0: ; %entry 577; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 578; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1 579; GFX8-NEXT: v_bfi_b32 v0, v0, v2, v1 580; GFX8-NEXT: s_setpc_b64 s[30:31] 581; 582; GFX10-LABEL: v_bfi_sha256_ma: 583; GFX10: ; %bb.0: ; %entry 584; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 585; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 586; GFX10-NEXT: v_xor_b32_e32 v0, v0, v1 587; GFX10-NEXT: v_bfi_b32 v0, v0, v2, v1 588; GFX10-NEXT: s_setpc_b64 s[30:31] 589; 590; GFX8-GISEL-LABEL: v_bfi_sha256_ma: 591; GFX8-GISEL: ; %bb.0: ; %entry 592; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 593; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v1 594; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v2, v1 595; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 596; 597; GFX10-GISEL-LABEL: v_bfi_sha256_ma: 598; GFX10-GISEL: ; %bb.0: ; %entry 599; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 600; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 601; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v1 602; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v2, v1 603; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 604entry: 605 %0 = and i32 %x, %z 606 %1 = or i32 %x, %z 607 %2 = and i32 %y, %1 608 %3 = or i32 %0, %2 609 ret i32 %3 610} 611 612define <2 x i32> @v_bitselect_v2i32_pat1(<2 x i32> %a, <2 x i32> %b, <2 x i32> %mask) { 613; GFX7-LABEL: v_bitselect_v2i32_pat1: 614; GFX7: ; %bb.0: 615; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 616; GFX7-NEXT: v_bfi_b32 v0, v2, v0, v4 617; GFX7-NEXT: v_bfi_b32 v1, v3, v1, v5 618; GFX7-NEXT: s_setpc_b64 s[30:31] 619; 620; GFX8-LABEL: v_bitselect_v2i32_pat1: 621; GFX8: ; %bb.0: 622; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 623; GFX8-NEXT: v_bfi_b32 v0, v2, v0, v4 624; GFX8-NEXT: v_bfi_b32 v1, v3, v1, v5 625; GFX8-NEXT: s_setpc_b64 s[30:31] 626; 627; GFX10-LABEL: v_bitselect_v2i32_pat1: 628; GFX10: ; %bb.0: 629; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 630; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 631; GFX10-NEXT: v_bfi_b32 v0, v2, v0, v4 632; GFX10-NEXT: v_bfi_b32 v1, v3, v1, v5 633; GFX10-NEXT: s_setpc_b64 s[30:31] 634; 635; GFX8-GISEL-LABEL: v_bitselect_v2i32_pat1: 636; GFX8-GISEL: ; %bb.0: 637; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 638; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 639; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 640; GFX8-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 641; GFX8-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 642; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 643; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 644; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 645; 646; GFX10-GISEL-LABEL: v_bitselect_v2i32_pat1: 647; GFX10-GISEL: ; %bb.0: 648; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 649; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 650; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 651; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 652; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 653; GFX10-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 654; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 655; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 656; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 657 %xor.0 = xor <2 x i32> %a, %mask 658 %and = and <2 x i32> %xor.0, %b 659 %bitselect = xor <2 x i32> %and, %mask 660 ret <2 x i32> %bitselect 661} 662 663define i64 @v_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) { 664; GFX7-LABEL: v_bitselect_i64_pat_0: 665; GFX7: ; %bb.0: 666; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 667; GFX7-NEXT: v_bfi_b32 v1, v1, v3, v5 668; GFX7-NEXT: v_bfi_b32 v0, v0, v2, v4 669; GFX7-NEXT: s_setpc_b64 s[30:31] 670; 671; GFX8-LABEL: v_bitselect_i64_pat_0: 672; GFX8: ; %bb.0: 673; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 674; GFX8-NEXT: v_bfi_b32 v1, v1, v3, v5 675; GFX8-NEXT: v_bfi_b32 v0, v0, v2, v4 676; GFX8-NEXT: s_setpc_b64 s[30:31] 677; 678; GFX10-LABEL: v_bitselect_i64_pat_0: 679; GFX10: ; %bb.0: 680; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 681; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 682; GFX10-NEXT: v_bfi_b32 v0, v0, v2, v4 683; GFX10-NEXT: v_bfi_b32 v1, v1, v3, v5 684; GFX10-NEXT: s_setpc_b64 s[30:31] 685; 686; GFX8-GISEL-LABEL: v_bitselect_i64_pat_0: 687; GFX8-GISEL: ; %bb.0: 688; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 689; GFX8-GISEL-NEXT: v_and_b32_e32 v2, v0, v2 690; GFX8-GISEL-NEXT: v_and_b32_e32 v3, v1, v3 691; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, -1, v0 692; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, -1, v1 693; GFX8-GISEL-NEXT: v_and_b32_e32 v0, v0, v4 694; GFX8-GISEL-NEXT: v_and_b32_e32 v1, v1, v5 695; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 696; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 697; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 698; 699; GFX10-GISEL-LABEL: v_bitselect_i64_pat_0: 700; GFX10-GISEL: ; %bb.0: 701; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 702; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 703; GFX10-GISEL-NEXT: v_xor_b32_e32 v6, -1, v0 704; GFX10-GISEL-NEXT: v_xor_b32_e32 v7, -1, v1 705; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 706; GFX10-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 707; GFX10-GISEL-NEXT: v_and_b32_e32 v2, v6, v4 708; GFX10-GISEL-NEXT: v_and_b32_e32 v3, v7, v5 709; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 710; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 711; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 712 %and0 = and i64 %a, %b 713 %not.a = xor i64 %a, -1 714 %and1 = and i64 %not.a, %mask 715 %bitselect = or i64 %and0, %and1 716 ret i64 %bitselect 717} 718 719define amdgpu_ps <2 x float> @v_s_s_bitselect_i64_pat_0(i64 %a, i64 inreg %b, i64 inreg %mask) { 720; GFX7-LABEL: v_s_s_bitselect_i64_pat_0: 721; GFX7: ; %bb.0: 722; GFX7-NEXT: v_mov_b32_e32 v2, s3 723; GFX7-NEXT: v_bfi_b32 v1, v1, s1, v2 724; GFX7-NEXT: v_mov_b32_e32 v2, s2 725; GFX7-NEXT: v_bfi_b32 v0, v0, s0, v2 726; GFX7-NEXT: ; return to shader part epilog 727; 728; GFX8-LABEL: v_s_s_bitselect_i64_pat_0: 729; GFX8: ; %bb.0: 730; GFX8-NEXT: v_mov_b32_e32 v2, s3 731; GFX8-NEXT: v_bfi_b32 v1, v1, s1, v2 732; GFX8-NEXT: v_mov_b32_e32 v2, s2 733; GFX8-NEXT: v_bfi_b32 v0, v0, s0, v2 734; GFX8-NEXT: ; return to shader part epilog 735; 736; GFX10-LABEL: v_s_s_bitselect_i64_pat_0: 737; GFX10: ; %bb.0: 738; GFX10-NEXT: v_bfi_b32 v0, v0, s0, s2 739; GFX10-NEXT: v_bfi_b32 v1, v1, s1, s3 740; GFX10-NEXT: ; return to shader part epilog 741; 742; GFX8-GISEL-LABEL: v_s_s_bitselect_i64_pat_0: 743; GFX8-GISEL: ; %bb.0: 744; GFX8-GISEL-NEXT: v_and_b32_e32 v2, s0, v0 745; GFX8-GISEL-NEXT: v_and_b32_e32 v3, s1, v1 746; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, -1, v0 747; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, -1, v1 748; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s2, v0 749; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s3, v1 750; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 751; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 752; GFX8-GISEL-NEXT: ; return to shader part epilog 753; 754; GFX10-GISEL-LABEL: v_s_s_bitselect_i64_pat_0: 755; GFX10-GISEL: ; %bb.0: 756; GFX10-GISEL-NEXT: v_xor_b32_e32 v2, -1, v0 757; GFX10-GISEL-NEXT: v_xor_b32_e32 v3, -1, v1 758; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 759; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 760; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s2, v2 761; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s3, v3 762; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 763; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 764; GFX10-GISEL-NEXT: ; return to shader part epilog 765 %and0 = and i64 %a, %b 766 %not.a = xor i64 %a, -1 767 %and1 = and i64 %not.a, %mask 768 %bitselect = or i64 %and0, %and1 769 %cast = bitcast i64 %bitselect to <2 x float> 770 ret <2 x float> %cast 771} 772 773define amdgpu_ps <2 x float> @s_v_s_bitselect_i64_pat_0(i64 inreg %a, i64 %b, i64 inreg %mask) { 774; GFX7-LABEL: s_v_s_bitselect_i64_pat_0: 775; GFX7: ; %bb.0: 776; GFX7-NEXT: v_mov_b32_e32 v2, s3 777; GFX7-NEXT: v_bfi_b32 v1, s1, v1, v2 778; GFX7-NEXT: v_mov_b32_e32 v2, s2 779; GFX7-NEXT: v_bfi_b32 v0, s0, v0, v2 780; GFX7-NEXT: ; return to shader part epilog 781; 782; GFX8-LABEL: s_v_s_bitselect_i64_pat_0: 783; GFX8: ; %bb.0: 784; GFX8-NEXT: v_mov_b32_e32 v2, s3 785; GFX8-NEXT: v_bfi_b32 v1, s1, v1, v2 786; GFX8-NEXT: v_mov_b32_e32 v2, s2 787; GFX8-NEXT: v_bfi_b32 v0, s0, v0, v2 788; GFX8-NEXT: ; return to shader part epilog 789; 790; GFX10-LABEL: s_v_s_bitselect_i64_pat_0: 791; GFX10: ; %bb.0: 792; GFX10-NEXT: v_bfi_b32 v0, s0, v0, s2 793; GFX10-NEXT: v_bfi_b32 v1, s1, v1, s3 794; GFX10-NEXT: ; return to shader part epilog 795; 796; GFX8-GISEL-LABEL: s_v_s_bitselect_i64_pat_0: 797; GFX8-GISEL: ; %bb.0: 798; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 799; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 800; GFX8-GISEL-NEXT: s_andn2_b64 s[0:1], s[2:3], s[0:1] 801; GFX8-GISEL-NEXT: v_or_b32_e32 v0, s0, v0 802; GFX8-GISEL-NEXT: v_or_b32_e32 v1, s1, v1 803; GFX8-GISEL-NEXT: ; return to shader part epilog 804; 805; GFX10-GISEL-LABEL: s_v_s_bitselect_i64_pat_0: 806; GFX10-GISEL: ; %bb.0: 807; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 808; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 809; GFX10-GISEL-NEXT: s_andn2_b64 s[0:1], s[2:3], s[0:1] 810; GFX10-GISEL-NEXT: v_or_b32_e32 v0, s0, v0 811; GFX10-GISEL-NEXT: v_or_b32_e32 v1, s1, v1 812; GFX10-GISEL-NEXT: ; return to shader part epilog 813 %and0 = and i64 %a, %b 814 %not.a = xor i64 %a, -1 815 %and1 = and i64 %not.a, %mask 816 %bitselect = or i64 %and0, %and1 817 %cast = bitcast i64 %bitselect to <2 x float> 818 ret <2 x float> %cast 819} 820 821define amdgpu_ps <2 x float> @s_s_v_bitselect_i64_pat_0(i64 inreg %a, i64 inreg %b, i64 %mask) { 822; GFX7-LABEL: s_s_v_bitselect_i64_pat_0: 823; GFX7: ; %bb.0: 824; GFX7-NEXT: v_mov_b32_e32 v2, s3 825; GFX7-NEXT: v_bfi_b32 v1, s1, v2, v1 826; GFX7-NEXT: v_mov_b32_e32 v2, s2 827; GFX7-NEXT: v_bfi_b32 v0, s0, v2, v0 828; GFX7-NEXT: ; return to shader part epilog 829; 830; GFX8-LABEL: s_s_v_bitselect_i64_pat_0: 831; GFX8: ; %bb.0: 832; GFX8-NEXT: v_mov_b32_e32 v2, s3 833; GFX8-NEXT: v_bfi_b32 v1, s1, v2, v1 834; GFX8-NEXT: v_mov_b32_e32 v2, s2 835; GFX8-NEXT: v_bfi_b32 v0, s0, v2, v0 836; GFX8-NEXT: ; return to shader part epilog 837; 838; GFX10-LABEL: s_s_v_bitselect_i64_pat_0: 839; GFX10: ; %bb.0: 840; GFX10-NEXT: v_bfi_b32 v0, s0, s2, v0 841; GFX10-NEXT: v_bfi_b32 v1, s1, s3, v1 842; GFX10-NEXT: ; return to shader part epilog 843; 844; GFX8-GISEL-LABEL: s_s_v_bitselect_i64_pat_0: 845; GFX8-GISEL: ; %bb.0: 846; GFX8-GISEL-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3] 847; GFX8-GISEL-NEXT: s_not_b64 s[0:1], s[0:1] 848; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 849; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 850; GFX8-GISEL-NEXT: v_or_b32_e32 v0, s2, v0 851; GFX8-GISEL-NEXT: v_or_b32_e32 v1, s3, v1 852; GFX8-GISEL-NEXT: ; return to shader part epilog 853; 854; GFX10-GISEL-LABEL: s_s_v_bitselect_i64_pat_0: 855; GFX10-GISEL: ; %bb.0: 856; GFX10-GISEL-NEXT: s_not_b64 s[4:5], s[0:1] 857; GFX10-GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 858; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s4, v0 859; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s5, v1 860; GFX10-GISEL-NEXT: v_or_b32_e32 v0, s0, v0 861; GFX10-GISEL-NEXT: v_or_b32_e32 v1, s1, v1 862; GFX10-GISEL-NEXT: ; return to shader part epilog 863 %and0 = and i64 %a, %b 864 %not.a = xor i64 %a, -1 865 %and1 = and i64 %not.a, %mask 866 %bitselect = or i64 %and0, %and1 867 %cast = bitcast i64 %bitselect to <2 x float> 868 ret <2 x float> %cast 869} 870 871define amdgpu_ps <2 x float> @v_v_s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 inreg %mask) { 872; GFX7-LABEL: v_v_s_bitselect_i64_pat_0: 873; GFX7: ; %bb.0: 874; GFX7-NEXT: v_bfi_b32 v1, v1, v3, s1 875; GFX7-NEXT: v_bfi_b32 v0, v0, v2, s0 876; GFX7-NEXT: ; return to shader part epilog 877; 878; GFX8-LABEL: v_v_s_bitselect_i64_pat_0: 879; GFX8: ; %bb.0: 880; GFX8-NEXT: v_bfi_b32 v1, v1, v3, s1 881; GFX8-NEXT: v_bfi_b32 v0, v0, v2, s0 882; GFX8-NEXT: ; return to shader part epilog 883; 884; GFX10-LABEL: v_v_s_bitselect_i64_pat_0: 885; GFX10: ; %bb.0: 886; GFX10-NEXT: v_bfi_b32 v0, v0, v2, s0 887; GFX10-NEXT: v_bfi_b32 v1, v1, v3, s1 888; GFX10-NEXT: ; return to shader part epilog 889; 890; GFX8-GISEL-LABEL: v_v_s_bitselect_i64_pat_0: 891; GFX8-GISEL: ; %bb.0: 892; GFX8-GISEL-NEXT: v_and_b32_e32 v2, v0, v2 893; GFX8-GISEL-NEXT: v_and_b32_e32 v3, v1, v3 894; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, -1, v0 895; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, -1, v1 896; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 897; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 898; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 899; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 900; GFX8-GISEL-NEXT: ; return to shader part epilog 901; 902; GFX10-GISEL-LABEL: v_v_s_bitselect_i64_pat_0: 903; GFX10-GISEL: ; %bb.0: 904; GFX10-GISEL-NEXT: v_xor_b32_e32 v4, -1, v0 905; GFX10-GISEL-NEXT: v_xor_b32_e32 v5, -1, v1 906; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 907; GFX10-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 908; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s0, v4 909; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s1, v5 910; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 911; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 912; GFX10-GISEL-NEXT: ; return to shader part epilog 913 %and0 = and i64 %a, %b 914 %not.a = xor i64 %a, -1 915 %and1 = and i64 %not.a, %mask 916 %bitselect = or i64 %and0, %and1 917 %cast = bitcast i64 %bitselect to <2 x float> 918 ret <2 x float> %cast 919} 920 921define amdgpu_ps <2 x float> @v_s_v_bitselect_i64_pat_0(i64 %a, i64 inreg %b, i64 %mask) { 922; GFX7-LABEL: v_s_v_bitselect_i64_pat_0: 923; GFX7: ; %bb.0: 924; GFX7-NEXT: v_bfi_b32 v1, v1, s1, v3 925; GFX7-NEXT: v_bfi_b32 v0, v0, s0, v2 926; GFX7-NEXT: ; return to shader part epilog 927; 928; GFX8-LABEL: v_s_v_bitselect_i64_pat_0: 929; GFX8: ; %bb.0: 930; GFX8-NEXT: v_bfi_b32 v1, v1, s1, v3 931; GFX8-NEXT: v_bfi_b32 v0, v0, s0, v2 932; GFX8-NEXT: ; return to shader part epilog 933; 934; GFX10-LABEL: v_s_v_bitselect_i64_pat_0: 935; GFX10: ; %bb.0: 936; GFX10-NEXT: v_bfi_b32 v0, v0, s0, v2 937; GFX10-NEXT: v_bfi_b32 v1, v1, s1, v3 938; GFX10-NEXT: ; return to shader part epilog 939; 940; GFX8-GISEL-LABEL: v_s_v_bitselect_i64_pat_0: 941; GFX8-GISEL: ; %bb.0: 942; GFX8-GISEL-NEXT: v_and_b32_e32 v4, s0, v0 943; GFX8-GISEL-NEXT: v_and_b32_e32 v5, s1, v1 944; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, -1, v0 945; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, -1, v1 946; GFX8-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 947; GFX8-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 948; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 949; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v5, v1 950; GFX8-GISEL-NEXT: ; return to shader part epilog 951; 952; GFX10-GISEL-LABEL: v_s_v_bitselect_i64_pat_0: 953; GFX10-GISEL: ; %bb.0: 954; GFX10-GISEL-NEXT: v_xor_b32_e32 v4, -1, v0 955; GFX10-GISEL-NEXT: v_xor_b32_e32 v5, -1, v1 956; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 957; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 958; GFX10-GISEL-NEXT: v_and_b32_e32 v2, v4, v2 959; GFX10-GISEL-NEXT: v_and_b32_e32 v3, v5, v3 960; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 961; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 962; GFX10-GISEL-NEXT: ; return to shader part epilog 963 %and0 = and i64 %a, %b 964 %not.a = xor i64 %a, -1 965 %and1 = and i64 %not.a, %mask 966 %bitselect = or i64 %and0, %and1 967 %cast = bitcast i64 %bitselect to <2 x float> 968 ret <2 x float> %cast 969} 970 971define amdgpu_ps <2 x float> @s_v_v_bitselect_i64_pat_0(i64 inreg %a, i64 %b, i64 %mask) { 972; GFX7-LABEL: s_v_v_bitselect_i64_pat_0: 973; GFX7: ; %bb.0: 974; GFX7-NEXT: v_bfi_b32 v1, s1, v1, v3 975; GFX7-NEXT: v_bfi_b32 v0, s0, v0, v2 976; GFX7-NEXT: ; return to shader part epilog 977; 978; GFX8-LABEL: s_v_v_bitselect_i64_pat_0: 979; GFX8: ; %bb.0: 980; GFX8-NEXT: v_bfi_b32 v1, s1, v1, v3 981; GFX8-NEXT: v_bfi_b32 v0, s0, v0, v2 982; GFX8-NEXT: ; return to shader part epilog 983; 984; GFX10-LABEL: s_v_v_bitselect_i64_pat_0: 985; GFX10: ; %bb.0: 986; GFX10-NEXT: v_bfi_b32 v0, s0, v0, v2 987; GFX10-NEXT: v_bfi_b32 v1, s1, v1, v3 988; GFX10-NEXT: ; return to shader part epilog 989; 990; GFX8-GISEL-LABEL: s_v_v_bitselect_i64_pat_0: 991; GFX8-GISEL: ; %bb.0: 992; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 993; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 994; GFX8-GISEL-NEXT: s_not_b64 s[0:1], s[0:1] 995; GFX8-GISEL-NEXT: v_and_b32_e32 v2, s0, v2 996; GFX8-GISEL-NEXT: v_and_b32_e32 v3, s1, v3 997; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 998; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 999; GFX8-GISEL-NEXT: ; return to shader part epilog 1000; 1001; GFX10-GISEL-LABEL: s_v_v_bitselect_i64_pat_0: 1002; GFX10-GISEL: ; %bb.0: 1003; GFX10-GISEL-NEXT: s_not_b64 s[2:3], s[0:1] 1004; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 1005; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 1006; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s2, v2 1007; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s3, v3 1008; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 1009; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 1010; GFX10-GISEL-NEXT: ; return to shader part epilog 1011 %and0 = and i64 %a, %b 1012 %not.a = xor i64 %a, -1 1013 %and1 = and i64 %not.a, %mask 1014 %bitselect = or i64 %and0, %and1 1015 %cast = bitcast i64 %bitselect to <2 x float> 1016 ret <2 x float> %cast 1017} 1018 1019define i64 @v_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) { 1020; GFX7-LABEL: v_bitselect_i64_pat_1: 1021; GFX7: ; %bb.0: 1022; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1023; GFX7-NEXT: v_bfi_b32 v1, v3, v1, v5 1024; GFX7-NEXT: v_bfi_b32 v0, v2, v0, v4 1025; GFX7-NEXT: s_setpc_b64 s[30:31] 1026; 1027; GFX8-LABEL: v_bitselect_i64_pat_1: 1028; GFX8: ; %bb.0: 1029; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1030; GFX8-NEXT: v_bfi_b32 v1, v3, v1, v5 1031; GFX8-NEXT: v_bfi_b32 v0, v2, v0, v4 1032; GFX8-NEXT: s_setpc_b64 s[30:31] 1033; 1034; GFX10-LABEL: v_bitselect_i64_pat_1: 1035; GFX10: ; %bb.0: 1036; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1037; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1038; GFX10-NEXT: v_bfi_b32 v0, v2, v0, v4 1039; GFX10-NEXT: v_bfi_b32 v1, v3, v1, v5 1040; GFX10-NEXT: s_setpc_b64 s[30:31] 1041; 1042; GFX8-GISEL-LABEL: v_bitselect_i64_pat_1: 1043; GFX8-GISEL: ; %bb.0: 1044; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1045; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 1046; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 1047; GFX8-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 1048; GFX8-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 1049; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 1050; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 1051; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 1052; 1053; GFX10-GISEL-LABEL: v_bitselect_i64_pat_1: 1054; GFX10-GISEL: ; %bb.0: 1055; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1056; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1057; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 1058; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 1059; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 1060; GFX10-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 1061; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 1062; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 1063; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 1064 %xor.0 = xor i64 %a, %mask 1065 %and = and i64 %xor.0, %b 1066 %bitselect = xor i64 %and, %mask 1067 ret i64 %bitselect 1068} 1069 1070define amdgpu_ps <2 x float> @v_s_s_bitselect_i64_pat_1(i64 %a, i64 inreg %b, i64 inreg %mask) { 1071; GFX7-LABEL: v_s_s_bitselect_i64_pat_1: 1072; GFX7: ; %bb.0: 1073; GFX7-NEXT: v_mov_b32_e32 v2, s3 1074; GFX7-NEXT: v_bfi_b32 v1, s1, v1, v2 1075; GFX7-NEXT: v_mov_b32_e32 v2, s2 1076; GFX7-NEXT: v_bfi_b32 v0, s0, v0, v2 1077; GFX7-NEXT: ; return to shader part epilog 1078; 1079; GFX8-LABEL: v_s_s_bitselect_i64_pat_1: 1080; GFX8: ; %bb.0: 1081; GFX8-NEXT: v_mov_b32_e32 v2, s3 1082; GFX8-NEXT: v_bfi_b32 v1, s1, v1, v2 1083; GFX8-NEXT: v_mov_b32_e32 v2, s2 1084; GFX8-NEXT: v_bfi_b32 v0, s0, v0, v2 1085; GFX8-NEXT: ; return to shader part epilog 1086; 1087; GFX10-LABEL: v_s_s_bitselect_i64_pat_1: 1088; GFX10: ; %bb.0: 1089; GFX10-NEXT: v_bfi_b32 v0, s0, v0, s2 1090; GFX10-NEXT: v_bfi_b32 v1, s1, v1, s3 1091; GFX10-NEXT: ; return to shader part epilog 1092; 1093; GFX8-GISEL-LABEL: v_s_s_bitselect_i64_pat_1: 1094; GFX8-GISEL: ; %bb.0: 1095; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 1096; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, s3, v1 1097; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 1098; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 1099; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 1100; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, s3, v1 1101; GFX8-GISEL-NEXT: ; return to shader part epilog 1102; 1103; GFX10-GISEL-LABEL: v_s_s_bitselect_i64_pat_1: 1104; GFX10-GISEL: ; %bb.0: 1105; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 1106; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, s3, v1 1107; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 1108; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 1109; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 1110; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, s3, v1 1111; GFX10-GISEL-NEXT: ; return to shader part epilog 1112 %xor.0 = xor i64 %a, %mask 1113 %and = and i64 %xor.0, %b 1114 %bitselect = xor i64 %and, %mask 1115 %cast = bitcast i64 %bitselect to <2 x float> 1116 ret <2 x float> %cast 1117} 1118 1119define amdgpu_ps <2 x float> @s_s_v_bitselect_i64_pat_1(i64 inreg %a, i64 inreg %b, i64 %mask) { 1120; GFX7-LABEL: s_s_v_bitselect_i64_pat_1: 1121; GFX7: ; %bb.0: 1122; GFX7-NEXT: v_mov_b32_e32 v2, s1 1123; GFX7-NEXT: v_bfi_b32 v1, s3, v2, v1 1124; GFX7-NEXT: v_mov_b32_e32 v2, s0 1125; GFX7-NEXT: v_bfi_b32 v0, s2, v2, v0 1126; GFX7-NEXT: ; return to shader part epilog 1127; 1128; GFX8-LABEL: s_s_v_bitselect_i64_pat_1: 1129; GFX8: ; %bb.0: 1130; GFX8-NEXT: v_mov_b32_e32 v2, s1 1131; GFX8-NEXT: v_bfi_b32 v1, s3, v2, v1 1132; GFX8-NEXT: v_mov_b32_e32 v2, s0 1133; GFX8-NEXT: v_bfi_b32 v0, s2, v2, v0 1134; GFX8-NEXT: ; return to shader part epilog 1135; 1136; GFX10-LABEL: s_s_v_bitselect_i64_pat_1: 1137; GFX10: ; %bb.0: 1138; GFX10-NEXT: v_bfi_b32 v0, s2, s0, v0 1139; GFX10-NEXT: v_bfi_b32 v1, s3, s1, v1 1140; GFX10-NEXT: ; return to shader part epilog 1141; 1142; GFX8-GISEL-LABEL: s_s_v_bitselect_i64_pat_1: 1143; GFX8-GISEL: ; %bb.0: 1144; GFX8-GISEL-NEXT: v_xor_b32_e32 v2, s0, v0 1145; GFX8-GISEL-NEXT: v_xor_b32_e32 v3, s1, v1 1146; GFX8-GISEL-NEXT: v_and_b32_e32 v2, s2, v2 1147; GFX8-GISEL-NEXT: v_and_b32_e32 v3, s3, v3 1148; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v2, v0 1149; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v3, v1 1150; GFX8-GISEL-NEXT: ; return to shader part epilog 1151; 1152; GFX10-GISEL-LABEL: s_s_v_bitselect_i64_pat_1: 1153; GFX10-GISEL: ; %bb.0: 1154; GFX10-GISEL-NEXT: v_xor_b32_e32 v2, s0, v0 1155; GFX10-GISEL-NEXT: v_xor_b32_e32 v3, s1, v1 1156; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s2, v2 1157; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s3, v3 1158; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v2, v0 1159; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v3, v1 1160; GFX10-GISEL-NEXT: ; return to shader part epilog 1161 %xor.0 = xor i64 %a, %mask 1162 %and = and i64 %xor.0, %b 1163 %bitselect = xor i64 %and, %mask 1164 %cast = bitcast i64 %bitselect to <2 x float> 1165 ret <2 x float> %cast 1166} 1167 1168define amdgpu_ps <2 x float> @s_v_s_bitselect_i64_pat_1(i64 inreg %a, i64 %b, i64 inreg %mask) { 1169; GFX7-LABEL: s_v_s_bitselect_i64_pat_1: 1170; GFX7: ; %bb.0: 1171; GFX7-NEXT: v_mov_b32_e32 v2, s3 1172; GFX7-NEXT: v_bfi_b32 v1, v1, s1, v2 1173; GFX7-NEXT: v_mov_b32_e32 v2, s2 1174; GFX7-NEXT: v_bfi_b32 v0, v0, s0, v2 1175; GFX7-NEXT: ; return to shader part epilog 1176; 1177; GFX8-LABEL: s_v_s_bitselect_i64_pat_1: 1178; GFX8: ; %bb.0: 1179; GFX8-NEXT: v_mov_b32_e32 v2, s3 1180; GFX8-NEXT: v_bfi_b32 v1, v1, s1, v2 1181; GFX8-NEXT: v_mov_b32_e32 v2, s2 1182; GFX8-NEXT: v_bfi_b32 v0, v0, s0, v2 1183; GFX8-NEXT: ; return to shader part epilog 1184; 1185; GFX10-LABEL: s_v_s_bitselect_i64_pat_1: 1186; GFX10: ; %bb.0: 1187; GFX10-NEXT: v_bfi_b32 v0, v0, s0, s2 1188; GFX10-NEXT: v_bfi_b32 v1, v1, s1, s3 1189; GFX10-NEXT: ; return to shader part epilog 1190; 1191; GFX8-GISEL-LABEL: s_v_s_bitselect_i64_pat_1: 1192; GFX8-GISEL: ; %bb.0: 1193; GFX8-GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] 1194; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 1195; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 1196; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 1197; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, s3, v1 1198; GFX8-GISEL-NEXT: ; return to shader part epilog 1199; 1200; GFX10-GISEL-LABEL: s_v_s_bitselect_i64_pat_1: 1201; GFX10-GISEL: ; %bb.0: 1202; GFX10-GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] 1203; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 1204; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 1205; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 1206; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, s3, v1 1207; GFX10-GISEL-NEXT: ; return to shader part epilog 1208 %xor.0 = xor i64 %a, %mask 1209 %and = and i64 %xor.0, %b 1210 %bitselect = xor i64 %and, %mask 1211 %cast = bitcast i64 %bitselect to <2 x float> 1212 ret <2 x float> %cast 1213} 1214 1215define i64 @v_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) { 1216; GFX7-LABEL: v_bitselect_i64_pat_2: 1217; GFX7: ; %bb.0: 1218; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1219; GFX7-NEXT: v_bfi_b32 v1, v3, v1, v5 1220; GFX7-NEXT: v_bfi_b32 v0, v2, v0, v4 1221; GFX7-NEXT: s_setpc_b64 s[30:31] 1222; 1223; GFX8-LABEL: v_bitselect_i64_pat_2: 1224; GFX8: ; %bb.0: 1225; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1226; GFX8-NEXT: v_bfi_b32 v1, v3, v1, v5 1227; GFX8-NEXT: v_bfi_b32 v0, v2, v0, v4 1228; GFX8-NEXT: s_setpc_b64 s[30:31] 1229; 1230; GFX10-LABEL: v_bitselect_i64_pat_2: 1231; GFX10: ; %bb.0: 1232; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1233; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1234; GFX10-NEXT: v_bfi_b32 v0, v2, v0, v4 1235; GFX10-NEXT: v_bfi_b32 v1, v3, v1, v5 1236; GFX10-NEXT: s_setpc_b64 s[30:31] 1237; 1238; GFX8-GISEL-LABEL: v_bitselect_i64_pat_2: 1239; GFX8-GISEL: ; %bb.0: 1240; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1241; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 1242; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 1243; GFX8-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 1244; GFX8-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 1245; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 1246; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 1247; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 1248; 1249; GFX10-GISEL-LABEL: v_bitselect_i64_pat_2: 1250; GFX10-GISEL: ; %bb.0: 1251; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1252; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1253; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 1254; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 1255; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 1256; GFX10-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 1257; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 1258; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 1259; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 1260 %xor.0 = xor i64 %a, %mask 1261 %and = and i64 %xor.0, %b 1262 %bitselect = xor i64 %and, %mask 1263 ret i64 %bitselect 1264} 1265 1266define i64 @v_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) { 1267; GFX7-LABEL: v_bfi_sha256_ma_i64: 1268; GFX7: ; %bb.0: ; %entry 1269; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1270; GFX7-NEXT: v_xor_b32_e32 v1, v1, v3 1271; GFX7-NEXT: v_xor_b32_e32 v0, v0, v2 1272; GFX7-NEXT: v_bfi_b32 v1, v1, v5, v3 1273; GFX7-NEXT: v_bfi_b32 v0, v0, v4, v2 1274; GFX7-NEXT: s_setpc_b64 s[30:31] 1275; 1276; GFX8-LABEL: v_bfi_sha256_ma_i64: 1277; GFX8: ; %bb.0: ; %entry 1278; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1279; GFX8-NEXT: v_xor_b32_e32 v1, v1, v3 1280; GFX8-NEXT: v_xor_b32_e32 v0, v0, v2 1281; GFX8-NEXT: v_bfi_b32 v1, v1, v5, v3 1282; GFX8-NEXT: v_bfi_b32 v0, v0, v4, v2 1283; GFX8-NEXT: s_setpc_b64 s[30:31] 1284; 1285; GFX10-LABEL: v_bfi_sha256_ma_i64: 1286; GFX10: ; %bb.0: ; %entry 1287; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1288; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1289; GFX10-NEXT: v_xor_b32_e32 v0, v0, v2 1290; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3 1291; GFX10-NEXT: v_bfi_b32 v0, v0, v4, v2 1292; GFX10-NEXT: v_bfi_b32 v1, v1, v5, v3 1293; GFX10-NEXT: s_setpc_b64 s[30:31] 1294; 1295; GFX8-GISEL-LABEL: v_bfi_sha256_ma_i64: 1296; GFX8-GISEL: ; %bb.0: ; %entry 1297; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1298; GFX8-GISEL-NEXT: v_and_b32_e32 v6, v0, v4 1299; GFX8-GISEL-NEXT: v_and_b32_e32 v7, v1, v5 1300; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 1301; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v1, v5 1302; GFX8-GISEL-NEXT: v_and_b32_e32 v0, v2, v0 1303; GFX8-GISEL-NEXT: v_and_b32_e32 v1, v3, v1 1304; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v6, v0 1305; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v7, v1 1306; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 1307; 1308; GFX10-GISEL-LABEL: v_bfi_sha256_ma_i64: 1309; GFX10-GISEL: ; %bb.0: ; %entry 1310; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1311; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1312; GFX10-GISEL-NEXT: v_or_b32_e32 v6, v0, v4 1313; GFX10-GISEL-NEXT: v_or_b32_e32 v7, v1, v5 1314; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v4 1315; GFX10-GISEL-NEXT: v_and_b32_e32 v1, v1, v5 1316; GFX10-GISEL-NEXT: v_and_b32_e32 v2, v2, v6 1317; GFX10-GISEL-NEXT: v_and_b32_e32 v3, v3, v7 1318; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 1319; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 1320; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 1321entry: 1322 %and0 = and i64 %x, %z 1323 %or0 = or i64 %x, %z 1324 %and1 = and i64 %y, %or0 1325 %or1 = or i64 %and0, %and1 1326 ret i64 %or1 1327} 1328 1329define amdgpu_ps <2 x float> @v_s_s_bfi_sha256_ma_i64(i64 %x, i64 inreg %y, i64 inreg %z) { 1330; GFX7-LABEL: v_s_s_bfi_sha256_ma_i64: 1331; GFX7: ; %bb.0: ; %entry 1332; GFX7-NEXT: v_xor_b32_e32 v1, s1, v1 1333; GFX7-NEXT: v_mov_b32_e32 v2, s1 1334; GFX7-NEXT: v_bfi_b32 v1, v1, s3, v2 1335; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0 1336; GFX7-NEXT: v_mov_b32_e32 v2, s0 1337; GFX7-NEXT: v_bfi_b32 v0, v0, s2, v2 1338; GFX7-NEXT: ; return to shader part epilog 1339; 1340; GFX8-LABEL: v_s_s_bfi_sha256_ma_i64: 1341; GFX8: ; %bb.0: ; %entry 1342; GFX8-NEXT: v_xor_b32_e32 v1, s1, v1 1343; GFX8-NEXT: v_mov_b32_e32 v2, s1 1344; GFX8-NEXT: v_bfi_b32 v1, v1, s3, v2 1345; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 1346; GFX8-NEXT: v_mov_b32_e32 v2, s0 1347; GFX8-NEXT: v_bfi_b32 v0, v0, s2, v2 1348; GFX8-NEXT: ; return to shader part epilog 1349; 1350; GFX10-LABEL: v_s_s_bfi_sha256_ma_i64: 1351; GFX10: ; %bb.0: ; %entry 1352; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0 1353; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1 1354; GFX10-NEXT: v_bfi_b32 v0, v0, s2, s0 1355; GFX10-NEXT: v_bfi_b32 v1, v1, s3, s1 1356; GFX10-NEXT: ; return to shader part epilog 1357; 1358; GFX8-GISEL-LABEL: v_s_s_bfi_sha256_ma_i64: 1359; GFX8-GISEL: ; %bb.0: ; %entry 1360; GFX8-GISEL-NEXT: v_and_b32_e32 v2, s2, v0 1361; GFX8-GISEL-NEXT: v_and_b32_e32 v3, s3, v1 1362; GFX8-GISEL-NEXT: v_or_b32_e32 v0, s2, v0 1363; GFX8-GISEL-NEXT: v_or_b32_e32 v1, s3, v1 1364; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 1365; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 1366; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 1367; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 1368; GFX8-GISEL-NEXT: ; return to shader part epilog 1369; 1370; GFX10-GISEL-LABEL: v_s_s_bfi_sha256_ma_i64: 1371; GFX10-GISEL: ; %bb.0: ; %entry 1372; GFX10-GISEL-NEXT: v_or_b32_e32 v2, s2, v0 1373; GFX10-GISEL-NEXT: v_or_b32_e32 v3, s3, v1 1374; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s2, v0 1375; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s3, v1 1376; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s0, v2 1377; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s1, v3 1378; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 1379; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 1380; GFX10-GISEL-NEXT: ; return to shader part epilog 1381entry: 1382 %and0 = and i64 %x, %z 1383 %or0 = or i64 %x, %z 1384 %and1 = and i64 %y, %or0 1385 %or1 = or i64 %and0, %and1 1386 %cast = bitcast i64 %or1 to <2 x float> 1387 ret <2 x float> %cast 1388} 1389 1390define amdgpu_ps <2 x float> @s_v_s_bfi_sha256_ma_i64(i64 inreg %x, i64 %y, i64 inreg %z) { 1391; GFX7-LABEL: s_v_s_bfi_sha256_ma_i64: 1392; GFX7: ; %bb.0: ; %entry 1393; GFX7-NEXT: v_xor_b32_e32 v2, s1, v1 1394; GFX7-NEXT: v_bfi_b32 v1, v2, s3, v1 1395; GFX7-NEXT: v_xor_b32_e32 v2, s0, v0 1396; GFX7-NEXT: v_bfi_b32 v0, v2, s2, v0 1397; GFX7-NEXT: ; return to shader part epilog 1398; 1399; GFX8-LABEL: s_v_s_bfi_sha256_ma_i64: 1400; GFX8: ; %bb.0: ; %entry 1401; GFX8-NEXT: v_xor_b32_e32 v2, s1, v1 1402; GFX8-NEXT: v_bfi_b32 v1, v2, s3, v1 1403; GFX8-NEXT: v_xor_b32_e32 v2, s0, v0 1404; GFX8-NEXT: v_bfi_b32 v0, v2, s2, v0 1405; GFX8-NEXT: ; return to shader part epilog 1406; 1407; GFX10-LABEL: s_v_s_bfi_sha256_ma_i64: 1408; GFX10: ; %bb.0: ; %entry 1409; GFX10-NEXT: v_xor_b32_e32 v2, s0, v0 1410; GFX10-NEXT: v_xor_b32_e32 v3, s1, v1 1411; GFX10-NEXT: v_bfi_b32 v0, v2, s2, v0 1412; GFX10-NEXT: v_bfi_b32 v1, v3, s3, v1 1413; GFX10-NEXT: ; return to shader part epilog 1414; 1415; GFX8-GISEL-LABEL: s_v_s_bfi_sha256_ma_i64: 1416; GFX8-GISEL: ; %bb.0: ; %entry 1417; GFX8-GISEL-NEXT: s_and_b64 s[4:5], s[0:1], s[2:3] 1418; GFX8-GISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 1419; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 1420; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 1421; GFX8-GISEL-NEXT: v_or_b32_e32 v0, s4, v0 1422; GFX8-GISEL-NEXT: v_or_b32_e32 v1, s5, v1 1423; GFX8-GISEL-NEXT: ; return to shader part epilog 1424; 1425; GFX10-GISEL-LABEL: s_v_s_bfi_sha256_ma_i64: 1426; GFX10-GISEL: ; %bb.0: ; %entry 1427; GFX10-GISEL-NEXT: s_or_b64 s[4:5], s[0:1], s[2:3] 1428; GFX10-GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 1429; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s4, v0 1430; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s5, v1 1431; GFX10-GISEL-NEXT: v_or_b32_e32 v0, s0, v0 1432; GFX10-GISEL-NEXT: v_or_b32_e32 v1, s1, v1 1433; GFX10-GISEL-NEXT: ; return to shader part epilog 1434entry: 1435 %and0 = and i64 %x, %z 1436 %or0 = or i64 %x, %z 1437 %and1 = and i64 %y, %or0 1438 %or1 = or i64 %and0, %and1 1439 %cast = bitcast i64 %or1 to <2 x float> 1440 ret <2 x float> %cast 1441} 1442 1443define amdgpu_ps <2 x float> @s_s_v_bfi_sha256_ma_i64(i64 inreg %x, i64 inreg %y, i64 %z) { 1444; GFX7-LABEL: s_s_v_bfi_sha256_ma_i64: 1445; GFX7: ; %bb.0: ; %entry 1446; GFX7-NEXT: v_mov_b32_e32 v2, s3 1447; GFX7-NEXT: v_xor_b32_e32 v2, s1, v2 1448; GFX7-NEXT: v_bfi_b32 v1, v2, v1, s3 1449; GFX7-NEXT: v_mov_b32_e32 v2, s2 1450; GFX7-NEXT: v_xor_b32_e32 v2, s0, v2 1451; GFX7-NEXT: v_bfi_b32 v0, v2, v0, s2 1452; GFX7-NEXT: ; return to shader part epilog 1453; 1454; GFX8-LABEL: s_s_v_bfi_sha256_ma_i64: 1455; GFX8: ; %bb.0: ; %entry 1456; GFX8-NEXT: v_mov_b32_e32 v2, s3 1457; GFX8-NEXT: v_xor_b32_e32 v2, s1, v2 1458; GFX8-NEXT: v_bfi_b32 v1, v2, v1, s3 1459; GFX8-NEXT: v_mov_b32_e32 v2, s2 1460; GFX8-NEXT: v_xor_b32_e32 v2, s0, v2 1461; GFX8-NEXT: v_bfi_b32 v0, v2, v0, s2 1462; GFX8-NEXT: ; return to shader part epilog 1463; 1464; GFX10-LABEL: s_s_v_bfi_sha256_ma_i64: 1465; GFX10: ; %bb.0: ; %entry 1466; GFX10-NEXT: v_xor_b32_e64 v2, s0, s2 1467; GFX10-NEXT: v_xor_b32_e64 v3, s1, s3 1468; GFX10-NEXT: v_bfi_b32 v0, v2, v0, s2 1469; GFX10-NEXT: v_bfi_b32 v1, v3, v1, s3 1470; GFX10-NEXT: ; return to shader part epilog 1471; 1472; GFX8-GISEL-LABEL: s_s_v_bfi_sha256_ma_i64: 1473; GFX8-GISEL: ; %bb.0: ; %entry 1474; GFX8-GISEL-NEXT: v_and_b32_e32 v2, s0, v0 1475; GFX8-GISEL-NEXT: v_and_b32_e32 v3, s1, v1 1476; GFX8-GISEL-NEXT: v_or_b32_e32 v0, s0, v0 1477; GFX8-GISEL-NEXT: v_or_b32_e32 v1, s1, v1 1478; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s2, v0 1479; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s3, v1 1480; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 1481; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 1482; GFX8-GISEL-NEXT: ; return to shader part epilog 1483; 1484; GFX10-GISEL-LABEL: s_s_v_bfi_sha256_ma_i64: 1485; GFX10-GISEL: ; %bb.0: ; %entry 1486; GFX10-GISEL-NEXT: v_or_b32_e32 v2, s0, v0 1487; GFX10-GISEL-NEXT: v_or_b32_e32 v3, s1, v1 1488; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 1489; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 1490; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s2, v2 1491; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s3, v3 1492; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 1493; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 1494; GFX10-GISEL-NEXT: ; return to shader part epilog 1495entry: 1496 %and0 = and i64 %x, %z 1497 %or0 = or i64 %x, %z 1498 %and1 = and i64 %y, %or0 1499 %or1 = or i64 %and0, %and1 1500 %cast = bitcast i64 %or1 to <2 x float> 1501 ret <2 x float> %cast 1502} 1503 1504define amdgpu_ps <2 x float> @v_s_v_bfi_sha256_ma_i64(i64 %x, i64 inreg %y, i64 %z) { 1505; GFX7-LABEL: v_s_v_bfi_sha256_ma_i64: 1506; GFX7: ; %bb.0: ; %entry 1507; GFX7-NEXT: v_xor_b32_e32 v1, s1, v1 1508; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0 1509; GFX7-NEXT: v_bfi_b32 v1, v1, v3, s1 1510; GFX7-NEXT: v_bfi_b32 v0, v0, v2, s0 1511; GFX7-NEXT: ; return to shader part epilog 1512; 1513; GFX8-LABEL: v_s_v_bfi_sha256_ma_i64: 1514; GFX8: ; %bb.0: ; %entry 1515; GFX8-NEXT: v_xor_b32_e32 v1, s1, v1 1516; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 1517; GFX8-NEXT: v_bfi_b32 v1, v1, v3, s1 1518; GFX8-NEXT: v_bfi_b32 v0, v0, v2, s0 1519; GFX8-NEXT: ; return to shader part epilog 1520; 1521; GFX10-LABEL: v_s_v_bfi_sha256_ma_i64: 1522; GFX10: ; %bb.0: ; %entry 1523; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0 1524; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1 1525; GFX10-NEXT: v_bfi_b32 v0, v0, v2, s0 1526; GFX10-NEXT: v_bfi_b32 v1, v1, v3, s1 1527; GFX10-NEXT: ; return to shader part epilog 1528; 1529; GFX8-GISEL-LABEL: v_s_v_bfi_sha256_ma_i64: 1530; GFX8-GISEL: ; %bb.0: ; %entry 1531; GFX8-GISEL-NEXT: v_and_b32_e32 v4, v0, v2 1532; GFX8-GISEL-NEXT: v_and_b32_e32 v5, v1, v3 1533; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 1534; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 1535; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 1536; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 1537; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 1538; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v5, v1 1539; GFX8-GISEL-NEXT: ; return to shader part epilog 1540; 1541; GFX10-GISEL-LABEL: v_s_v_bfi_sha256_ma_i64: 1542; GFX10-GISEL: ; %bb.0: ; %entry 1543; GFX10-GISEL-NEXT: v_or_b32_e32 v4, v0, v2 1544; GFX10-GISEL-NEXT: v_or_b32_e32 v5, v1, v3 1545; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 1546; GFX10-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 1547; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s0, v4 1548; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s1, v5 1549; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 1550; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 1551; GFX10-GISEL-NEXT: ; return to shader part epilog 1552entry: 1553 %and0 = and i64 %x, %z 1554 %or0 = or i64 %x, %z 1555 %and1 = and i64 %y, %or0 1556 %or1 = or i64 %and0, %and1 1557 %cast = bitcast i64 %or1 to <2 x float> 1558 ret <2 x float> %cast 1559} 1560 1561define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) { 1562; GFX7-LABEL: s_bitselect_i64_pat_0: 1563; GFX7: ; %bb.0: 1564; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 1565; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd 1566; GFX7-NEXT: s_mov_b32 s3, 0xf000 1567; GFX7-NEXT: s_mov_b32 s2, -1 1568; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1569; GFX7-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7] 1570; GFX7-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 1571; GFX7-NEXT: s_or_b64 s[0:1], s[6:7], s[0:1] 1572; GFX7-NEXT: s_add_u32 s0, s0, 10 1573; GFX7-NEXT: s_addc_u32 s1, s1, 0 1574; GFX7-NEXT: v_mov_b32_e32 v0, s0 1575; GFX7-NEXT: v_mov_b32_e32 v1, s1 1576; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1577; GFX7-NEXT: s_endpgm 1578; 1579; GFX8-LABEL: s_bitselect_i64_pat_0: 1580; GFX8: ; %bb.0: 1581; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1582; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1583; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1584; GFX8-NEXT: s_and_b64 s[2:3], s[4:5], s[6:7] 1585; GFX8-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 1586; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 1587; GFX8-NEXT: s_add_u32 s0, s0, 10 1588; GFX8-NEXT: s_addc_u32 s1, s1, 0 1589; GFX8-NEXT: v_mov_b32_e32 v0, s0 1590; GFX8-NEXT: v_mov_b32_e32 v1, s1 1591; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1592; GFX8-NEXT: s_endpgm 1593; 1594; GFX10-LABEL: s_bitselect_i64_pat_0: 1595; GFX10: ; %bb.0: 1596; GFX10-NEXT: s_clause 0x1 1597; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1598; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1599; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1600; GFX10-NEXT: s_and_b64 s[2:3], s[4:5], s[6:7] 1601; GFX10-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 1602; GFX10-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 1603; GFX10-NEXT: s_add_u32 s0, s0, 10 1604; GFX10-NEXT: s_addc_u32 s1, s1, 0 1605; GFX10-NEXT: v_mov_b32_e32 v0, s0 1606; GFX10-NEXT: v_mov_b32_e32 v1, s1 1607; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1608; GFX10-NEXT: s_endpgm 1609; 1610; GFX8-GISEL-LABEL: s_bitselect_i64_pat_0: 1611; GFX8-GISEL: ; %bb.0: 1612; GFX8-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1613; GFX8-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1614; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1615; GFX8-GISEL-NEXT: s_and_b64 s[2:3], s[4:5], s[6:7] 1616; GFX8-GISEL-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 1617; GFX8-GISEL-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 1618; GFX8-GISEL-NEXT: s_add_u32 s0, s0, 10 1619; GFX8-GISEL-NEXT: s_cselect_b32 s2, 1, 0 1620; GFX8-GISEL-NEXT: s_and_b32 s2, s2, 1 1621; GFX8-GISEL-NEXT: s_cmp_lg_u32 s2, 0 1622; GFX8-GISEL-NEXT: s_addc_u32 s1, s1, 0 1623; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0 1624; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1 1625; GFX8-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1626; GFX8-GISEL-NEXT: s_endpgm 1627; 1628; GFX10-GISEL-LABEL: s_bitselect_i64_pat_0: 1629; GFX10-GISEL: ; %bb.0: 1630; GFX10-GISEL-NEXT: s_clause 0x1 1631; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1632; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1633; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1634; GFX10-GISEL-NEXT: s_and_b64 s[2:3], s[4:5], s[6:7] 1635; GFX10-GISEL-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 1636; GFX10-GISEL-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 1637; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 10 1638; GFX10-GISEL-NEXT: s_cselect_b32 s2, 1, 0 1639; GFX10-GISEL-NEXT: s_and_b32 s2, s2, 1 1640; GFX10-GISEL-NEXT: s_cmp_lg_u32 s2, 0 1641; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 0 1642; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 1643; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 1644; GFX10-GISEL-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1645; GFX10-GISEL-NEXT: s_endpgm 1646 %and0 = and i64 %a, %b 1647 %not.a = xor i64 %a, -1 1648 %and1 = and i64 %not.a, %mask 1649 %bitselect = or i64 %and0, %and1 1650 %scalar.use = add i64 %bitselect, 10 1651 store i64 %scalar.use, i64 addrspace(1)* undef 1652 ret void 1653} 1654 1655define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) { 1656; GFX7-LABEL: s_bitselect_i64_pat_1: 1657; GFX7: ; %bb.0: 1658; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 1659; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd 1660; GFX7-NEXT: s_mov_b32 s3, 0xf000 1661; GFX7-NEXT: s_mov_b32 s2, -1 1662; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1663; GFX7-NEXT: s_xor_b64 s[4:5], s[4:5], s[0:1] 1664; GFX7-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] 1665; GFX7-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] 1666; GFX7-NEXT: s_add_u32 s0, s0, 10 1667; GFX7-NEXT: s_addc_u32 s1, s1, 0 1668; GFX7-NEXT: v_mov_b32_e32 v0, s0 1669; GFX7-NEXT: v_mov_b32_e32 v1, s1 1670; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1671; GFX7-NEXT: s_endpgm 1672; 1673; GFX8-LABEL: s_bitselect_i64_pat_1: 1674; GFX8: ; %bb.0: 1675; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1676; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1677; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1678; GFX8-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1] 1679; GFX8-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7] 1680; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1] 1681; GFX8-NEXT: s_add_u32 s0, s0, 10 1682; GFX8-NEXT: s_addc_u32 s1, s1, 0 1683; GFX8-NEXT: v_mov_b32_e32 v0, s0 1684; GFX8-NEXT: v_mov_b32_e32 v1, s1 1685; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1686; GFX8-NEXT: s_endpgm 1687; 1688; GFX10-LABEL: s_bitselect_i64_pat_1: 1689; GFX10: ; %bb.0: 1690; GFX10-NEXT: s_clause 0x1 1691; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1692; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1693; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1694; GFX10-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1] 1695; GFX10-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7] 1696; GFX10-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1] 1697; GFX10-NEXT: s_add_u32 s0, s0, 10 1698; GFX10-NEXT: s_addc_u32 s1, s1, 0 1699; GFX10-NEXT: v_mov_b32_e32 v0, s0 1700; GFX10-NEXT: v_mov_b32_e32 v1, s1 1701; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1702; GFX10-NEXT: s_endpgm 1703; 1704; GFX8-GISEL-LABEL: s_bitselect_i64_pat_1: 1705; GFX8-GISEL: ; %bb.0: 1706; GFX8-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1707; GFX8-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1708; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1709; GFX8-GISEL-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1] 1710; GFX8-GISEL-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7] 1711; GFX8-GISEL-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1] 1712; GFX8-GISEL-NEXT: s_add_u32 s0, s0, 10 1713; GFX8-GISEL-NEXT: s_cselect_b32 s2, 1, 0 1714; GFX8-GISEL-NEXT: s_and_b32 s2, s2, 1 1715; GFX8-GISEL-NEXT: s_cmp_lg_u32 s2, 0 1716; GFX8-GISEL-NEXT: s_addc_u32 s1, s1, 0 1717; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0 1718; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1 1719; GFX8-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1720; GFX8-GISEL-NEXT: s_endpgm 1721; 1722; GFX10-GISEL-LABEL: s_bitselect_i64_pat_1: 1723; GFX10-GISEL: ; %bb.0: 1724; GFX10-GISEL-NEXT: s_clause 0x1 1725; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1726; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1727; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1728; GFX10-GISEL-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1] 1729; GFX10-GISEL-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7] 1730; GFX10-GISEL-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1] 1731; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 10 1732; GFX10-GISEL-NEXT: s_cselect_b32 s2, 1, 0 1733; GFX10-GISEL-NEXT: s_and_b32 s2, s2, 1 1734; GFX10-GISEL-NEXT: s_cmp_lg_u32 s2, 0 1735; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 0 1736; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 1737; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 1738; GFX10-GISEL-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1739; GFX10-GISEL-NEXT: s_endpgm 1740 %xor.0 = xor i64 %a, %mask 1741 %and = and i64 %xor.0, %b 1742 %bitselect = xor i64 %and, %mask 1743 1744 %scalar.use = add i64 %bitselect, 10 1745 store i64 %scalar.use, i64 addrspace(1)* undef 1746 ret void 1747} 1748 1749define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) { 1750; GFX7-LABEL: s_bitselect_i64_pat_2: 1751; GFX7: ; %bb.0: 1752; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 1753; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd 1754; GFX7-NEXT: s_mov_b32 s3, 0xf000 1755; GFX7-NEXT: s_mov_b32 s2, -1 1756; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1757; GFX7-NEXT: s_xor_b64 s[4:5], s[4:5], s[0:1] 1758; GFX7-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] 1759; GFX7-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] 1760; GFX7-NEXT: s_add_u32 s0, s0, 10 1761; GFX7-NEXT: s_addc_u32 s1, s1, 0 1762; GFX7-NEXT: v_mov_b32_e32 v0, s0 1763; GFX7-NEXT: v_mov_b32_e32 v1, s1 1764; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1765; GFX7-NEXT: s_endpgm 1766; 1767; GFX8-LABEL: s_bitselect_i64_pat_2: 1768; GFX8: ; %bb.0: 1769; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1770; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1771; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1772; GFX8-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1] 1773; GFX8-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7] 1774; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1] 1775; GFX8-NEXT: s_add_u32 s0, s0, 10 1776; GFX8-NEXT: s_addc_u32 s1, s1, 0 1777; GFX8-NEXT: v_mov_b32_e32 v0, s0 1778; GFX8-NEXT: v_mov_b32_e32 v1, s1 1779; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1780; GFX8-NEXT: s_endpgm 1781; 1782; GFX10-LABEL: s_bitselect_i64_pat_2: 1783; GFX10: ; %bb.0: 1784; GFX10-NEXT: s_clause 0x1 1785; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1786; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1787; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1788; GFX10-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1] 1789; GFX10-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7] 1790; GFX10-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1] 1791; GFX10-NEXT: s_add_u32 s0, s0, 10 1792; GFX10-NEXT: s_addc_u32 s1, s1, 0 1793; GFX10-NEXT: v_mov_b32_e32 v0, s0 1794; GFX10-NEXT: v_mov_b32_e32 v1, s1 1795; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1796; GFX10-NEXT: s_endpgm 1797; 1798; GFX8-GISEL-LABEL: s_bitselect_i64_pat_2: 1799; GFX8-GISEL: ; %bb.0: 1800; GFX8-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1801; GFX8-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1802; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1803; GFX8-GISEL-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1] 1804; GFX8-GISEL-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7] 1805; GFX8-GISEL-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1] 1806; GFX8-GISEL-NEXT: s_add_u32 s0, s0, 10 1807; GFX8-GISEL-NEXT: s_cselect_b32 s2, 1, 0 1808; GFX8-GISEL-NEXT: s_and_b32 s2, s2, 1 1809; GFX8-GISEL-NEXT: s_cmp_lg_u32 s2, 0 1810; GFX8-GISEL-NEXT: s_addc_u32 s1, s1, 0 1811; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0 1812; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1 1813; GFX8-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1814; GFX8-GISEL-NEXT: s_endpgm 1815; 1816; GFX10-GISEL-LABEL: s_bitselect_i64_pat_2: 1817; GFX10-GISEL: ; %bb.0: 1818; GFX10-GISEL-NEXT: s_clause 0x1 1819; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1820; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1821; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1822; GFX10-GISEL-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1] 1823; GFX10-GISEL-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7] 1824; GFX10-GISEL-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1] 1825; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 10 1826; GFX10-GISEL-NEXT: s_cselect_b32 s2, 1, 0 1827; GFX10-GISEL-NEXT: s_and_b32 s2, s2, 1 1828; GFX10-GISEL-NEXT: s_cmp_lg_u32 s2, 0 1829; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 0 1830; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 1831; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 1832; GFX10-GISEL-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1833; GFX10-GISEL-NEXT: s_endpgm 1834 %xor.0 = xor i64 %a, %mask 1835 %and = and i64 %xor.0, %b 1836 %bitselect = xor i64 %and, %mask 1837 1838 %scalar.use = add i64 %bitselect, 10 1839 store i64 %scalar.use, i64 addrspace(1)* undef 1840 ret void 1841} 1842 1843define amdgpu_kernel void @s_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) { 1844; GFX7-LABEL: s_bfi_sha256_ma_i64: 1845; GFX7: ; %bb.0: ; %entry 1846; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 1847; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd 1848; GFX7-NEXT: s_mov_b32 s3, 0xf000 1849; GFX7-NEXT: s_mov_b32 s2, -1 1850; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1851; GFX7-NEXT: s_and_b64 s[8:9], s[4:5], s[0:1] 1852; GFX7-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 1853; GFX7-NEXT: s_and_b64 s[0:1], s[6:7], s[0:1] 1854; GFX7-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 1855; GFX7-NEXT: s_add_u32 s0, s0, 10 1856; GFX7-NEXT: s_addc_u32 s1, s1, 0 1857; GFX7-NEXT: v_mov_b32_e32 v0, s0 1858; GFX7-NEXT: v_mov_b32_e32 v1, s1 1859; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1860; GFX7-NEXT: s_endpgm 1861; 1862; GFX8-LABEL: s_bfi_sha256_ma_i64: 1863; GFX8: ; %bb.0: ; %entry 1864; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1865; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1866; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1867; GFX8-NEXT: s_and_b64 s[2:3], s[4:5], s[0:1] 1868; GFX8-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 1869; GFX8-NEXT: s_and_b64 s[0:1], s[6:7], s[0:1] 1870; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 1871; GFX8-NEXT: s_add_u32 s0, s0, 10 1872; GFX8-NEXT: s_addc_u32 s1, s1, 0 1873; GFX8-NEXT: v_mov_b32_e32 v0, s0 1874; GFX8-NEXT: v_mov_b32_e32 v1, s1 1875; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1876; GFX8-NEXT: s_endpgm 1877; 1878; GFX10-LABEL: s_bfi_sha256_ma_i64: 1879; GFX10: ; %bb.0: ; %entry 1880; GFX10-NEXT: s_clause 0x1 1881; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1882; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1883; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1884; GFX10-NEXT: s_or_b64 s[2:3], s[4:5], s[0:1] 1885; GFX10-NEXT: s_and_b64 s[0:1], s[4:5], s[0:1] 1886; GFX10-NEXT: s_and_b64 s[2:3], s[6:7], s[2:3] 1887; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 1888; GFX10-NEXT: s_add_u32 s0, s0, 10 1889; GFX10-NEXT: s_addc_u32 s1, s1, 0 1890; GFX10-NEXT: v_mov_b32_e32 v0, s0 1891; GFX10-NEXT: v_mov_b32_e32 v1, s1 1892; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1893; GFX10-NEXT: s_endpgm 1894; 1895; GFX8-GISEL-LABEL: s_bfi_sha256_ma_i64: 1896; GFX8-GISEL: ; %bb.0: ; %entry 1897; GFX8-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1898; GFX8-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1899; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1900; GFX8-GISEL-NEXT: s_and_b64 s[2:3], s[4:5], s[0:1] 1901; GFX8-GISEL-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 1902; GFX8-GISEL-NEXT: s_and_b64 s[0:1], s[6:7], s[0:1] 1903; GFX8-GISEL-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 1904; GFX8-GISEL-NEXT: s_add_u32 s0, s0, 10 1905; GFX8-GISEL-NEXT: s_cselect_b32 s2, 1, 0 1906; GFX8-GISEL-NEXT: s_and_b32 s2, s2, 1 1907; GFX8-GISEL-NEXT: s_cmp_lg_u32 s2, 0 1908; GFX8-GISEL-NEXT: s_addc_u32 s1, s1, 0 1909; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0 1910; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1 1911; GFX8-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1912; GFX8-GISEL-NEXT: s_endpgm 1913; 1914; GFX10-GISEL-LABEL: s_bfi_sha256_ma_i64: 1915; GFX10-GISEL: ; %bb.0: ; %entry 1916; GFX10-GISEL-NEXT: s_clause 0x1 1917; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1918; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1919; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1920; GFX10-GISEL-NEXT: s_or_b64 s[2:3], s[4:5], s[0:1] 1921; GFX10-GISEL-NEXT: s_and_b64 s[0:1], s[4:5], s[0:1] 1922; GFX10-GISEL-NEXT: s_and_b64 s[2:3], s[6:7], s[2:3] 1923; GFX10-GISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 1924; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 10 1925; GFX10-GISEL-NEXT: s_cselect_b32 s2, 1, 0 1926; GFX10-GISEL-NEXT: s_and_b32 s2, s2, 1 1927; GFX10-GISEL-NEXT: s_cmp_lg_u32 s2, 0 1928; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 0 1929; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 1930; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 1931; GFX10-GISEL-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1932; GFX10-GISEL-NEXT: s_endpgm 1933entry: 1934 %and0 = and i64 %x, %z 1935 %or0 = or i64 %x, %z 1936 %and1 = and i64 %y, %or0 1937 %or1 = or i64 %and0, %and1 1938 1939 %scalar.use = add i64 %or1, 10 1940 store i64 %scalar.use, i64 addrspace(1)* undef 1941 ret void 1942} 1943