1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX7 %s 3; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s 5; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8-GISEL %s 6; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10-GISEL %s 7 8; BFI_INT Definition pattern from ISA docs 9; (y & x) | (z & ~x) 10; 11define amdgpu_kernel void @s_bfi_def_i32(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) { 12; GFX7-LABEL: s_bfi_def_i32: 13; GFX7: ; %bb.0: ; %entry 14; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xb 15; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 16; GFX7-NEXT: s_mov_b32 s7, 0xf000 17; GFX7-NEXT: s_mov_b32 s6, -1 18; GFX7-NEXT: s_waitcnt lgkmcnt(0) 19; GFX7-NEXT: s_andn2_b32 s2, s2, s0 20; GFX7-NEXT: s_and_b32 s0, s1, s0 21; GFX7-NEXT: s_or_b32 s0, s2, s0 22; GFX7-NEXT: v_mov_b32_e32 v0, s0 23; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 24; GFX7-NEXT: s_endpgm 25; 26; GFX8-LABEL: s_bfi_def_i32: 27; GFX8: ; %bb.0: ; %entry 28; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c 29; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 30; GFX8-NEXT: s_waitcnt lgkmcnt(0) 31; GFX8-NEXT: s_andn2_b32 s2, s2, s0 32; GFX8-NEXT: s_and_b32 s0, s1, s0 33; GFX8-NEXT: s_or_b32 s0, s2, s0 34; GFX8-NEXT: v_mov_b32_e32 v0, s4 35; GFX8-NEXT: v_mov_b32_e32 v1, s5 36; GFX8-NEXT: v_mov_b32_e32 v2, s0 37; GFX8-NEXT: flat_store_dword v[0:1], v2 38; GFX8-NEXT: s_endpgm 39; 40; GFX10-LABEL: s_bfi_def_i32: 41; GFX10: ; %bb.0: ; %entry 42; GFX10-NEXT: s_clause 0x1 43; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c 44; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 45; GFX10-NEXT: v_mov_b32_e32 v0, 0 46; GFX10-NEXT: s_waitcnt lgkmcnt(0) 47; GFX10-NEXT: s_andn2_b32 s2, s2, s0 48; GFX10-NEXT: s_and_b32 s0, s1, s0 49; GFX10-NEXT: s_or_b32 s0, s2, s0 50; GFX10-NEXT: v_mov_b32_e32 v1, s0 51; GFX10-NEXT: global_store_dword v0, v1, s[4:5] 52; GFX10-NEXT: s_endpgm 53; 54; GFX8-GISEL-LABEL: s_bfi_def_i32: 55; GFX8-GISEL: ; %bb.0: ; %entry 56; GFX8-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 57; GFX8-GISEL-NEXT: s_load_dword s4, s[4:5], 0x34 58; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 59; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0 60; GFX8-GISEL-NEXT: s_andn2_b32 s4, s4, s2 61; GFX8-GISEL-NEXT: s_and_b32 s2, s3, s2 62; GFX8-GISEL-NEXT: s_or_b32 s2, s4, s2 63; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s2 64; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1 65; GFX8-GISEL-NEXT: flat_store_dword v[0:1], v2 66; GFX8-GISEL-NEXT: s_endpgm 67; 68; GFX10-GISEL-LABEL: s_bfi_def_i32: 69; GFX10-GISEL: ; %bb.0: ; %entry 70; GFX10-GISEL-NEXT: s_clause 0x1 71; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 72; GFX10-GISEL-NEXT: s_load_dword s4, s[4:5], 0x34 73; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 74; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 75; GFX10-GISEL-NEXT: s_andn2_b32 s4, s4, s2 76; GFX10-GISEL-NEXT: s_and_b32 s2, s3, s2 77; GFX10-GISEL-NEXT: s_or_b32 s2, s4, s2 78; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s2 79; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] 80; GFX10-GISEL-NEXT: s_endpgm 81entry: 82 %0 = xor i32 %x, -1 83 %1 = and i32 %z, %0 84 %2 = and i32 %y, %x 85 %3 = or i32 %1, %2 86 store i32 %3, ptr addrspace(1) %out 87 ret void 88} 89 90define i32 @v_bfi_def_i32(i32 %x, i32 %y, i32 %z) { 91; GFX7-LABEL: v_bfi_def_i32: 92; GFX7: ; %bb.0: ; %entry 93; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 94; GFX7-NEXT: v_bfi_b32 v0, v0, v1, v2 95; GFX7-NEXT: s_setpc_b64 s[30:31] 96; 97; GFX8-LABEL: v_bfi_def_i32: 98; GFX8: ; %bb.0: ; %entry 99; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 100; GFX8-NEXT: v_bfi_b32 v0, v0, v1, v2 101; GFX8-NEXT: s_setpc_b64 s[30:31] 102; 103; GFX10-LABEL: v_bfi_def_i32: 104; GFX10: ; %bb.0: ; %entry 105; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 106; GFX10-NEXT: v_bfi_b32 v0, v0, v1, v2 107; GFX10-NEXT: s_setpc_b64 s[30:31] 108; 109; GFX8-GISEL-LABEL: v_bfi_def_i32: 110; GFX8-GISEL: ; %bb.0: ; %entry 111; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 112; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v1, v2 113; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 114; 115; GFX10-GISEL-LABEL: v_bfi_def_i32: 116; GFX10-GISEL: ; %bb.0: ; %entry 117; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 118; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v1, v2 119; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 120entry: 121 %0 = xor i32 %x, -1 122 %1 = and i32 %z, %0 123 %2 = and i32 %y, %x 124 %3 = or i32 %1, %2 125 ret i32 %3 126} 127 128; SHA-256 Ch function 129; z ^ (x & (y ^ z)) 130define amdgpu_kernel void @s_bfi_sha256_ch(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) { 131; GFX7-LABEL: s_bfi_sha256_ch: 132; GFX7: ; %bb.0: ; %entry 133; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xb 134; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 135; GFX7-NEXT: s_mov_b32 s7, 0xf000 136; GFX7-NEXT: s_mov_b32 s6, -1 137; GFX7-NEXT: s_waitcnt lgkmcnt(0) 138; GFX7-NEXT: s_xor_b32 s1, s1, s2 139; GFX7-NEXT: s_and_b32 s0, s0, s1 140; GFX7-NEXT: s_xor_b32 s0, s2, s0 141; GFX7-NEXT: v_mov_b32_e32 v0, s0 142; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 143; GFX7-NEXT: s_endpgm 144; 145; GFX8-LABEL: s_bfi_sha256_ch: 146; GFX8: ; %bb.0: ; %entry 147; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c 148; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 149; GFX8-NEXT: s_waitcnt lgkmcnt(0) 150; GFX8-NEXT: s_xor_b32 s1, s1, s2 151; GFX8-NEXT: s_and_b32 s0, s0, s1 152; GFX8-NEXT: s_xor_b32 s0, s2, s0 153; GFX8-NEXT: v_mov_b32_e32 v0, s4 154; GFX8-NEXT: v_mov_b32_e32 v1, s5 155; GFX8-NEXT: v_mov_b32_e32 v2, s0 156; GFX8-NEXT: flat_store_dword v[0:1], v2 157; GFX8-NEXT: s_endpgm 158; 159; GFX10-LABEL: s_bfi_sha256_ch: 160; GFX10: ; %bb.0: ; %entry 161; GFX10-NEXT: s_clause 0x1 162; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c 163; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 164; GFX10-NEXT: v_mov_b32_e32 v0, 0 165; GFX10-NEXT: s_waitcnt lgkmcnt(0) 166; GFX10-NEXT: s_xor_b32 s1, s1, s2 167; GFX10-NEXT: s_and_b32 s0, s0, s1 168; GFX10-NEXT: s_xor_b32 s0, s2, s0 169; GFX10-NEXT: v_mov_b32_e32 v1, s0 170; GFX10-NEXT: global_store_dword v0, v1, s[4:5] 171; GFX10-NEXT: s_endpgm 172; 173; GFX8-GISEL-LABEL: s_bfi_sha256_ch: 174; GFX8-GISEL: ; %bb.0: ; %entry 175; GFX8-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 176; GFX8-GISEL-NEXT: s_load_dword s4, s[4:5], 0x34 177; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 178; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0 179; GFX8-GISEL-NEXT: s_xor_b32 s3, s3, s4 180; GFX8-GISEL-NEXT: s_and_b32 s2, s2, s3 181; GFX8-GISEL-NEXT: s_xor_b32 s2, s4, s2 182; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s2 183; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1 184; GFX8-GISEL-NEXT: flat_store_dword v[0:1], v2 185; GFX8-GISEL-NEXT: s_endpgm 186; 187; GFX10-GISEL-LABEL: s_bfi_sha256_ch: 188; GFX10-GISEL: ; %bb.0: ; %entry 189; GFX10-GISEL-NEXT: s_clause 0x1 190; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 191; GFX10-GISEL-NEXT: s_load_dword s4, s[4:5], 0x34 192; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 193; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 194; GFX10-GISEL-NEXT: s_xor_b32 s3, s3, s4 195; GFX10-GISEL-NEXT: s_and_b32 s2, s2, s3 196; GFX10-GISEL-NEXT: s_xor_b32 s2, s4, s2 197; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s2 198; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] 199; GFX10-GISEL-NEXT: s_endpgm 200entry: 201 %0 = xor i32 %y, %z 202 %1 = and i32 %x, %0 203 %2 = xor i32 %z, %1 204 store i32 %2, ptr addrspace(1) %out 205 ret void 206} 207 208define i32 @v_bfi_sha256_ch(i32 %x, i32 %y, i32 %z) { 209; GFX7-LABEL: v_bfi_sha256_ch: 210; GFX7: ; %bb.0: ; %entry 211; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 212; GFX7-NEXT: v_bfi_b32 v0, v0, v1, v2 213; GFX7-NEXT: s_setpc_b64 s[30:31] 214; 215; GFX8-LABEL: v_bfi_sha256_ch: 216; GFX8: ; %bb.0: ; %entry 217; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 218; GFX8-NEXT: v_bfi_b32 v0, v0, v1, v2 219; GFX8-NEXT: s_setpc_b64 s[30:31] 220; 221; GFX10-LABEL: v_bfi_sha256_ch: 222; GFX10: ; %bb.0: ; %entry 223; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 224; GFX10-NEXT: v_bfi_b32 v0, v0, v1, v2 225; GFX10-NEXT: s_setpc_b64 s[30:31] 226; 227; GFX8-GISEL-LABEL: v_bfi_sha256_ch: 228; GFX8-GISEL: ; %bb.0: ; %entry 229; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 230; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v1, v2 231; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 232; 233; GFX10-GISEL-LABEL: v_bfi_sha256_ch: 234; GFX10-GISEL: ; %bb.0: ; %entry 235; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 236; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v1, v2 237; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 238entry: 239 %0 = xor i32 %y, %z 240 %1 = and i32 %x, %0 241 %2 = xor i32 %z, %1 242 ret i32 %2 243} 244 245define amdgpu_ps float @v_s_s_bfi_sha256_ch(i32 %x, i32 inreg %y, i32 inreg %z) { 246; GFX7-LABEL: v_s_s_bfi_sha256_ch: 247; GFX7: ; %bb.0: ; %entry 248; GFX7-NEXT: v_mov_b32_e32 v1, s0 249; GFX7-NEXT: v_bfi_b32 v0, v0, v1, s1 250; GFX7-NEXT: ; return to shader part epilog 251; 252; GFX8-LABEL: v_s_s_bfi_sha256_ch: 253; GFX8: ; %bb.0: ; %entry 254; GFX8-NEXT: v_mov_b32_e32 v1, s0 255; GFX8-NEXT: v_bfi_b32 v0, v0, v1, s1 256; GFX8-NEXT: ; return to shader part epilog 257; 258; GFX10-LABEL: v_s_s_bfi_sha256_ch: 259; GFX10: ; %bb.0: ; %entry 260; GFX10-NEXT: v_bfi_b32 v0, v0, s0, s1 261; GFX10-NEXT: ; return to shader part epilog 262; 263; GFX8-GISEL-LABEL: v_s_s_bfi_sha256_ch: 264; GFX8-GISEL: ; %bb.0: ; %entry 265; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s0 266; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v1, s1 267; GFX8-GISEL-NEXT: ; return to shader part epilog 268; 269; GFX10-GISEL-LABEL: v_s_s_bfi_sha256_ch: 270; GFX10-GISEL: ; %bb.0: ; %entry 271; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, s0, s1 272; GFX10-GISEL-NEXT: ; return to shader part epilog 273entry: 274 %xor0 = xor i32 %y, %z 275 %and = and i32 %x, %xor0 276 %xor1 = xor i32 %z, %and 277 %cast = bitcast i32 %xor1 to float 278 ret float %cast 279} 280 281define amdgpu_ps float @s_v_s_bfi_sha256_ch(i32 inreg %x, i32 %y, i32 inreg %z) { 282; GFX7-LABEL: s_v_s_bfi_sha256_ch: 283; GFX7: ; %bb.0: ; %entry 284; GFX7-NEXT: v_mov_b32_e32 v1, s0 285; GFX7-NEXT: v_bfi_b32 v0, v1, v0, s1 286; GFX7-NEXT: ; return to shader part epilog 287; 288; GFX8-LABEL: s_v_s_bfi_sha256_ch: 289; GFX8: ; %bb.0: ; %entry 290; GFX8-NEXT: v_mov_b32_e32 v1, s0 291; GFX8-NEXT: v_bfi_b32 v0, v1, v0, s1 292; GFX8-NEXT: ; return to shader part epilog 293; 294; GFX10-LABEL: s_v_s_bfi_sha256_ch: 295; GFX10: ; %bb.0: ; %entry 296; GFX10-NEXT: v_bfi_b32 v0, s0, v0, s1 297; GFX10-NEXT: ; return to shader part epilog 298; 299; GFX8-GISEL-LABEL: s_v_s_bfi_sha256_ch: 300; GFX8-GISEL: ; %bb.0: ; %entry 301; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1 302; GFX8-GISEL-NEXT: v_bfi_b32 v0, s0, v0, v1 303; GFX8-GISEL-NEXT: ; return to shader part epilog 304; 305; GFX10-GISEL-LABEL: s_v_s_bfi_sha256_ch: 306; GFX10-GISEL: ; %bb.0: ; %entry 307; GFX10-GISEL-NEXT: v_bfi_b32 v0, s0, v0, s1 308; GFX10-GISEL-NEXT: ; return to shader part epilog 309entry: 310 %xor0 = xor i32 %y, %z 311 %and = and i32 %x, %xor0 312 %xor1 = xor i32 %z, %and 313 %cast = bitcast i32 %xor1 to float 314 ret float %cast 315} 316 317define amdgpu_ps float @s_s_v_bfi_sha256_ch(i32 inreg %x, i32 inreg %y, i32 %z) { 318; GFX7-LABEL: s_s_v_bfi_sha256_ch: 319; GFX7: ; %bb.0: ; %entry 320; GFX7-NEXT: v_mov_b32_e32 v1, s0 321; GFX7-NEXT: v_bfi_b32 v0, v1, s1, v0 322; GFX7-NEXT: ; return to shader part epilog 323; 324; GFX8-LABEL: s_s_v_bfi_sha256_ch: 325; GFX8: ; %bb.0: ; %entry 326; GFX8-NEXT: v_mov_b32_e32 v1, s0 327; GFX8-NEXT: v_bfi_b32 v0, v1, s1, v0 328; GFX8-NEXT: ; return to shader part epilog 329; 330; GFX10-LABEL: s_s_v_bfi_sha256_ch: 331; GFX10: ; %bb.0: ; %entry 332; GFX10-NEXT: v_bfi_b32 v0, s0, s1, v0 333; GFX10-NEXT: ; return to shader part epilog 334; 335; GFX8-GISEL-LABEL: s_s_v_bfi_sha256_ch: 336; GFX8-GISEL: ; %bb.0: ; %entry 337; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s0 338; GFX8-GISEL-NEXT: v_bfi_b32 v0, v1, s1, v0 339; GFX8-GISEL-NEXT: ; return to shader part epilog 340; 341; GFX10-GISEL-LABEL: s_s_v_bfi_sha256_ch: 342; GFX10-GISEL: ; %bb.0: ; %entry 343; GFX10-GISEL-NEXT: v_bfi_b32 v0, s0, s1, v0 344; GFX10-GISEL-NEXT: ; return to shader part epilog 345entry: 346 %xor0 = xor i32 %y, %z 347 %and = and i32 %x, %xor0 348 %xor1 = xor i32 %z, %and 349 %cast = bitcast i32 %xor1 to float 350 ret float %cast 351} 352 353define amdgpu_ps float @s_v_v_bfi_sha256_ch(i32 inreg %x, i32 %y, i32 %z) { 354; GFX7-LABEL: s_v_v_bfi_sha256_ch: 355; GFX7: ; %bb.0: ; %entry 356; GFX7-NEXT: v_bfi_b32 v0, s0, v0, v1 357; GFX7-NEXT: ; return to shader part epilog 358; 359; GFX8-LABEL: s_v_v_bfi_sha256_ch: 360; GFX8: ; %bb.0: ; %entry 361; GFX8-NEXT: v_bfi_b32 v0, s0, v0, v1 362; GFX8-NEXT: ; return to shader part epilog 363; 364; GFX10-LABEL: s_v_v_bfi_sha256_ch: 365; GFX10: ; %bb.0: ; %entry 366; GFX10-NEXT: v_bfi_b32 v0, s0, v0, v1 367; GFX10-NEXT: ; return to shader part epilog 368; 369; GFX8-GISEL-LABEL: s_v_v_bfi_sha256_ch: 370; GFX8-GISEL: ; %bb.0: ; %entry 371; GFX8-GISEL-NEXT: v_bfi_b32 v0, s0, v0, v1 372; GFX8-GISEL-NEXT: ; return to shader part epilog 373; 374; GFX10-GISEL-LABEL: s_v_v_bfi_sha256_ch: 375; GFX10-GISEL: ; %bb.0: ; %entry 376; GFX10-GISEL-NEXT: v_bfi_b32 v0, s0, v0, v1 377; GFX10-GISEL-NEXT: ; return to shader part epilog 378entry: 379 %xor0 = xor i32 %y, %z 380 %and = and i32 %x, %xor0 381 %xor1 = xor i32 %z, %and 382 %cast = bitcast i32 %xor1 to float 383 ret float %cast 384} 385 386define amdgpu_ps float @v_s_v_bfi_sha256_ch(i32 %x, i32 inreg %y, i32 %z) { 387; GFX7-LABEL: v_s_v_bfi_sha256_ch: 388; GFX7: ; %bb.0: ; %entry 389; GFX7-NEXT: v_bfi_b32 v0, v0, s0, v1 390; GFX7-NEXT: ; return to shader part epilog 391; 392; GFX8-LABEL: v_s_v_bfi_sha256_ch: 393; GFX8: ; %bb.0: ; %entry 394; GFX8-NEXT: v_bfi_b32 v0, v0, s0, v1 395; GFX8-NEXT: ; return to shader part epilog 396; 397; GFX10-LABEL: v_s_v_bfi_sha256_ch: 398; GFX10: ; %bb.0: ; %entry 399; GFX10-NEXT: v_bfi_b32 v0, v0, s0, v1 400; GFX10-NEXT: ; return to shader part epilog 401; 402; GFX8-GISEL-LABEL: v_s_v_bfi_sha256_ch: 403; GFX8-GISEL: ; %bb.0: ; %entry 404; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, s0, v1 405; GFX8-GISEL-NEXT: ; return to shader part epilog 406; 407; GFX10-GISEL-LABEL: v_s_v_bfi_sha256_ch: 408; GFX10-GISEL: ; %bb.0: ; %entry 409; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, s0, v1 410; GFX10-GISEL-NEXT: ; return to shader part epilog 411entry: 412 %xor0 = xor i32 %y, %z 413 %and = and i32 %x, %xor0 414 %xor1 = xor i32 %z, %and 415 %cast = bitcast i32 %xor1 to float 416 ret float %cast 417} 418 419define amdgpu_ps float @v_v_s_bfi_sha256_ch(i32 %x, i32 %y, i32 inreg %z) { 420; GFX7-LABEL: v_v_s_bfi_sha256_ch: 421; GFX7: ; %bb.0: ; %entry 422; GFX7-NEXT: v_bfi_b32 v0, v0, v1, s0 423; GFX7-NEXT: ; return to shader part epilog 424; 425; GFX8-LABEL: v_v_s_bfi_sha256_ch: 426; GFX8: ; %bb.0: ; %entry 427; GFX8-NEXT: v_bfi_b32 v0, v0, v1, s0 428; GFX8-NEXT: ; return to shader part epilog 429; 430; GFX10-LABEL: v_v_s_bfi_sha256_ch: 431; GFX10: ; %bb.0: ; %entry 432; GFX10-NEXT: v_bfi_b32 v0, v0, v1, s0 433; GFX10-NEXT: ; return to shader part epilog 434; 435; GFX8-GISEL-LABEL: v_v_s_bfi_sha256_ch: 436; GFX8-GISEL: ; %bb.0: ; %entry 437; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v1, s0 438; GFX8-GISEL-NEXT: ; return to shader part epilog 439; 440; GFX10-GISEL-LABEL: v_v_s_bfi_sha256_ch: 441; GFX10-GISEL: ; %bb.0: ; %entry 442; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v1, s0 443; GFX10-GISEL-NEXT: ; return to shader part epilog 444entry: 445 %xor0 = xor i32 %y, %z 446 %and = and i32 %x, %xor0 447 %xor1 = xor i32 %z, %and 448 %cast = bitcast i32 %xor1 to float 449 ret float %cast 450} 451 452; SHA-256 Ma function 453; ((x & z) | (y & (x | z))) 454define amdgpu_kernel void @s_bfi_sha256_ma(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) { 455; GFX7-LABEL: s_bfi_sha256_ma: 456; GFX7: ; %bb.0: ; %entry 457; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xb 458; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 459; GFX7-NEXT: s_mov_b32 s7, 0xf000 460; GFX7-NEXT: s_mov_b32 s6, -1 461; GFX7-NEXT: s_waitcnt lgkmcnt(0) 462; GFX7-NEXT: s_and_b32 s3, s0, s2 463; GFX7-NEXT: s_or_b32 s0, s0, s2 464; GFX7-NEXT: s_and_b32 s0, s1, s0 465; GFX7-NEXT: s_or_b32 s0, s3, s0 466; GFX7-NEXT: v_mov_b32_e32 v0, s0 467; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 468; GFX7-NEXT: s_endpgm 469; 470; GFX8-LABEL: s_bfi_sha256_ma: 471; GFX8: ; %bb.0: ; %entry 472; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c 473; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 474; GFX8-NEXT: s_waitcnt lgkmcnt(0) 475; GFX8-NEXT: s_and_b32 s3, s0, s2 476; GFX8-NEXT: s_or_b32 s0, s0, s2 477; GFX8-NEXT: s_and_b32 s0, s1, s0 478; GFX8-NEXT: s_or_b32 s0, s3, s0 479; GFX8-NEXT: v_mov_b32_e32 v0, s4 480; GFX8-NEXT: v_mov_b32_e32 v1, s5 481; GFX8-NEXT: v_mov_b32_e32 v2, s0 482; GFX8-NEXT: flat_store_dword v[0:1], v2 483; GFX8-NEXT: s_endpgm 484; 485; GFX10-LABEL: s_bfi_sha256_ma: 486; GFX10: ; %bb.0: ; %entry 487; GFX10-NEXT: s_clause 0x1 488; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c 489; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 490; GFX10-NEXT: v_mov_b32_e32 v0, 0 491; GFX10-NEXT: s_waitcnt lgkmcnt(0) 492; GFX10-NEXT: s_or_b32 s3, s0, s2 493; GFX10-NEXT: s_and_b32 s0, s0, s2 494; GFX10-NEXT: s_and_b32 s1, s1, s3 495; GFX10-NEXT: s_or_b32 s0, s0, s1 496; GFX10-NEXT: v_mov_b32_e32 v1, s0 497; GFX10-NEXT: global_store_dword v0, v1, s[4:5] 498; GFX10-NEXT: s_endpgm 499; 500; GFX8-GISEL-LABEL: s_bfi_sha256_ma: 501; GFX8-GISEL: ; %bb.0: ; %entry 502; GFX8-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 503; GFX8-GISEL-NEXT: s_load_dword s4, s[4:5], 0x34 504; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 505; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0 506; GFX8-GISEL-NEXT: s_and_b32 s5, s2, s4 507; GFX8-GISEL-NEXT: s_or_b32 s2, s2, s4 508; GFX8-GISEL-NEXT: s_and_b32 s2, s3, s2 509; GFX8-GISEL-NEXT: s_or_b32 s2, s5, s2 510; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s2 511; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1 512; GFX8-GISEL-NEXT: flat_store_dword v[0:1], v2 513; GFX8-GISEL-NEXT: s_endpgm 514; 515; GFX10-GISEL-LABEL: s_bfi_sha256_ma: 516; GFX10-GISEL: ; %bb.0: ; %entry 517; GFX10-GISEL-NEXT: s_clause 0x1 518; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 519; GFX10-GISEL-NEXT: s_load_dword s4, s[4:5], 0x34 520; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 521; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 522; GFX10-GISEL-NEXT: s_or_b32 s5, s2, s4 523; GFX10-GISEL-NEXT: s_and_b32 s2, s2, s4 524; GFX10-GISEL-NEXT: s_and_b32 s3, s3, s5 525; GFX10-GISEL-NEXT: s_or_b32 s2, s2, s3 526; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s2 527; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] 528; GFX10-GISEL-NEXT: s_endpgm 529entry: 530 %0 = and i32 %x, %z 531 %1 = or i32 %x, %z 532 %2 = and i32 %y, %1 533 %3 = or i32 %0, %2 534 store i32 %3, ptr addrspace(1) %out 535 ret void 536} 537 538define i32 @v_bfi_sha256_ma(i32 %x, i32 %y, i32 %z) { 539; GFX7-LABEL: v_bfi_sha256_ma: 540; GFX7: ; %bb.0: ; %entry 541; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 542; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1 543; GFX7-NEXT: v_bfi_b32 v0, v0, v2, v1 544; GFX7-NEXT: s_setpc_b64 s[30:31] 545; 546; GFX8-LABEL: v_bfi_sha256_ma: 547; GFX8: ; %bb.0: ; %entry 548; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 549; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1 550; GFX8-NEXT: v_bfi_b32 v0, v0, v2, v1 551; GFX8-NEXT: s_setpc_b64 s[30:31] 552; 553; GFX10-LABEL: v_bfi_sha256_ma: 554; GFX10: ; %bb.0: ; %entry 555; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 556; GFX10-NEXT: v_xor_b32_e32 v0, v0, v1 557; GFX10-NEXT: v_bfi_b32 v0, v0, v2, v1 558; GFX10-NEXT: s_setpc_b64 s[30:31] 559; 560; GFX8-GISEL-LABEL: v_bfi_sha256_ma: 561; GFX8-GISEL: ; %bb.0: ; %entry 562; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 563; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v1 564; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v2, v1 565; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 566; 567; GFX10-GISEL-LABEL: v_bfi_sha256_ma: 568; GFX10-GISEL: ; %bb.0: ; %entry 569; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 570; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v1 571; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v2, v1 572; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 573entry: 574 %0 = and i32 %x, %z 575 %1 = or i32 %x, %z 576 %2 = and i32 %y, %1 577 %3 = or i32 %0, %2 578 ret i32 %3 579} 580 581define <2 x i32> @v_bitselect_v2i32_pat1(<2 x i32> %a, <2 x i32> %b, <2 x i32> %mask) { 582; GFX7-LABEL: v_bitselect_v2i32_pat1: 583; GFX7: ; %bb.0: 584; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 585; GFX7-NEXT: v_bfi_b32 v0, v2, v0, v4 586; GFX7-NEXT: v_bfi_b32 v1, v3, v1, v5 587; GFX7-NEXT: s_setpc_b64 s[30:31] 588; 589; GFX8-LABEL: v_bitselect_v2i32_pat1: 590; GFX8: ; %bb.0: 591; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 592; GFX8-NEXT: v_bfi_b32 v0, v2, v0, v4 593; GFX8-NEXT: v_bfi_b32 v1, v3, v1, v5 594; GFX8-NEXT: s_setpc_b64 s[30:31] 595; 596; GFX10-LABEL: v_bitselect_v2i32_pat1: 597; GFX10: ; %bb.0: 598; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 599; GFX10-NEXT: v_bfi_b32 v0, v2, v0, v4 600; GFX10-NEXT: v_bfi_b32 v1, v3, v1, v5 601; GFX10-NEXT: s_setpc_b64 s[30:31] 602; 603; GFX8-GISEL-LABEL: v_bitselect_v2i32_pat1: 604; GFX8-GISEL: ; %bb.0: 605; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 606; GFX8-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v4 607; GFX8-GISEL-NEXT: v_bfi_b32 v1, v3, v1, v5 608; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 609; 610; GFX10-GISEL-LABEL: v_bitselect_v2i32_pat1: 611; GFX10-GISEL: ; %bb.0: 612; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 613; GFX10-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v4 614; GFX10-GISEL-NEXT: v_bfi_b32 v1, v3, v1, v5 615; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 616 %xor.0 = xor <2 x i32> %a, %mask 617 %and = and <2 x i32> %xor.0, %b 618 %bitselect = xor <2 x i32> %and, %mask 619 ret <2 x i32> %bitselect 620} 621 622define i64 @v_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) { 623; GFX7-LABEL: v_bitselect_i64_pat_0: 624; GFX7: ; %bb.0: 625; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 626; GFX7-NEXT: v_bfi_b32 v1, v1, v3, v5 627; GFX7-NEXT: v_bfi_b32 v0, v0, v2, v4 628; GFX7-NEXT: s_setpc_b64 s[30:31] 629; 630; GFX8-LABEL: v_bitselect_i64_pat_0: 631; GFX8: ; %bb.0: 632; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 633; GFX8-NEXT: v_bfi_b32 v1, v1, v3, v5 634; GFX8-NEXT: v_bfi_b32 v0, v0, v2, v4 635; GFX8-NEXT: s_setpc_b64 s[30:31] 636; 637; GFX10-LABEL: v_bitselect_i64_pat_0: 638; GFX10: ; %bb.0: 639; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 640; GFX10-NEXT: v_bfi_b32 v0, v0, v2, v4 641; GFX10-NEXT: v_bfi_b32 v1, v1, v3, v5 642; GFX10-NEXT: s_setpc_b64 s[30:31] 643; 644; GFX8-GISEL-LABEL: v_bitselect_i64_pat_0: 645; GFX8-GISEL: ; %bb.0: 646; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 647; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v2, v4 648; GFX8-GISEL-NEXT: v_bfi_b32 v1, v1, v3, v5 649; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 650; 651; GFX10-GISEL-LABEL: v_bitselect_i64_pat_0: 652; GFX10-GISEL: ; %bb.0: 653; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 654; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v2, v4 655; GFX10-GISEL-NEXT: v_bfi_b32 v1, v1, v3, v5 656; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 657 %and0 = and i64 %a, %b 658 %not.a = xor i64 %a, -1 659 %and1 = and i64 %not.a, %mask 660 %bitselect = or i64 %and0, %and1 661 ret i64 %bitselect 662} 663 664define amdgpu_ps <2 x float> @v_s_s_bitselect_i64_pat_0(i64 %a, i64 inreg %b, i64 inreg %mask) { 665; GFX7-LABEL: v_s_s_bitselect_i64_pat_0: 666; GFX7: ; %bb.0: 667; GFX7-NEXT: v_mov_b32_e32 v2, s3 668; GFX7-NEXT: v_bfi_b32 v1, v1, s1, v2 669; GFX7-NEXT: v_mov_b32_e32 v2, s2 670; GFX7-NEXT: v_bfi_b32 v0, v0, s0, v2 671; GFX7-NEXT: ; return to shader part epilog 672; 673; GFX8-LABEL: v_s_s_bitselect_i64_pat_0: 674; GFX8: ; %bb.0: 675; GFX8-NEXT: v_mov_b32_e32 v2, s3 676; GFX8-NEXT: v_bfi_b32 v1, v1, s1, v2 677; GFX8-NEXT: v_mov_b32_e32 v2, s2 678; GFX8-NEXT: v_bfi_b32 v0, v0, s0, v2 679; GFX8-NEXT: ; return to shader part epilog 680; 681; GFX10-LABEL: v_s_s_bitselect_i64_pat_0: 682; GFX10: ; %bb.0: 683; GFX10-NEXT: v_bfi_b32 v0, v0, s0, s2 684; GFX10-NEXT: v_bfi_b32 v1, v1, s1, s3 685; GFX10-NEXT: ; return to shader part epilog 686; 687; GFX8-GISEL-LABEL: v_s_s_bitselect_i64_pat_0: 688; GFX8-GISEL: ; %bb.0: 689; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s0 690; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v2, s2 691; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s1 692; GFX8-GISEL-NEXT: v_bfi_b32 v1, v1, v2, s3 693; GFX8-GISEL-NEXT: ; return to shader part epilog 694; 695; GFX10-GISEL-LABEL: v_s_s_bitselect_i64_pat_0: 696; GFX10-GISEL: ; %bb.0: 697; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, s0, s2 698; GFX10-GISEL-NEXT: v_bfi_b32 v1, v1, s1, s3 699; GFX10-GISEL-NEXT: ; return to shader part epilog 700 %and0 = and i64 %a, %b 701 %not.a = xor i64 %a, -1 702 %and1 = and i64 %not.a, %mask 703 %bitselect = or i64 %and0, %and1 704 %cast = bitcast i64 %bitselect to <2 x float> 705 ret <2 x float> %cast 706} 707 708define amdgpu_ps <2 x float> @s_v_s_bitselect_i64_pat_0(i64 inreg %a, i64 %b, i64 inreg %mask) { 709; GFX7-LABEL: s_v_s_bitselect_i64_pat_0: 710; GFX7: ; %bb.0: 711; GFX7-NEXT: v_mov_b32_e32 v2, s3 712; GFX7-NEXT: v_bfi_b32 v1, s1, v1, v2 713; GFX7-NEXT: v_mov_b32_e32 v2, s2 714; GFX7-NEXT: v_bfi_b32 v0, s0, v0, v2 715; GFX7-NEXT: ; return to shader part epilog 716; 717; GFX8-LABEL: s_v_s_bitselect_i64_pat_0: 718; GFX8: ; %bb.0: 719; GFX8-NEXT: v_mov_b32_e32 v2, s3 720; GFX8-NEXT: v_bfi_b32 v1, s1, v1, v2 721; GFX8-NEXT: v_mov_b32_e32 v2, s2 722; GFX8-NEXT: v_bfi_b32 v0, s0, v0, v2 723; GFX8-NEXT: ; return to shader part epilog 724; 725; GFX10-LABEL: s_v_s_bitselect_i64_pat_0: 726; GFX10: ; %bb.0: 727; GFX10-NEXT: v_bfi_b32 v0, s0, v0, s2 728; GFX10-NEXT: v_bfi_b32 v1, s1, v1, s3 729; GFX10-NEXT: ; return to shader part epilog 730; 731; GFX8-GISEL-LABEL: s_v_s_bitselect_i64_pat_0: 732; GFX8-GISEL: ; %bb.0: 733; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 734; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 735; GFX8-GISEL-NEXT: s_andn2_b64 s[0:1], s[2:3], s[0:1] 736; GFX8-GISEL-NEXT: v_or_b32_e32 v0, s0, v0 737; GFX8-GISEL-NEXT: v_or_b32_e32 v1, s1, v1 738; GFX8-GISEL-NEXT: ; return to shader part epilog 739; 740; GFX10-GISEL-LABEL: s_v_s_bitselect_i64_pat_0: 741; GFX10-GISEL: ; %bb.0: 742; GFX10-GISEL-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1] 743; GFX10-GISEL-NEXT: v_and_or_b32 v0, s0, v0, s2 744; GFX10-GISEL-NEXT: v_and_or_b32 v1, s1, v1, s3 745; GFX10-GISEL-NEXT: ; return to shader part epilog 746 %and0 = and i64 %a, %b 747 %not.a = xor i64 %a, -1 748 %and1 = and i64 %not.a, %mask 749 %bitselect = or i64 %and0, %and1 750 %cast = bitcast i64 %bitselect to <2 x float> 751 ret <2 x float> %cast 752} 753 754define amdgpu_ps <2 x float> @s_s_v_bitselect_i64_pat_0(i64 inreg %a, i64 inreg %b, i64 %mask) { 755; GFX7-LABEL: s_s_v_bitselect_i64_pat_0: 756; GFX7: ; %bb.0: 757; GFX7-NEXT: v_mov_b32_e32 v2, s3 758; GFX7-NEXT: v_bfi_b32 v1, s1, v2, v1 759; GFX7-NEXT: v_mov_b32_e32 v2, s2 760; GFX7-NEXT: v_bfi_b32 v0, s0, v2, v0 761; GFX7-NEXT: ; return to shader part epilog 762; 763; GFX8-LABEL: s_s_v_bitselect_i64_pat_0: 764; GFX8: ; %bb.0: 765; GFX8-NEXT: v_mov_b32_e32 v2, s3 766; GFX8-NEXT: v_bfi_b32 v1, s1, v2, v1 767; GFX8-NEXT: v_mov_b32_e32 v2, s2 768; GFX8-NEXT: v_bfi_b32 v0, s0, v2, v0 769; GFX8-NEXT: ; return to shader part epilog 770; 771; GFX10-LABEL: s_s_v_bitselect_i64_pat_0: 772; GFX10: ; %bb.0: 773; GFX10-NEXT: v_bfi_b32 v0, s0, s2, v0 774; GFX10-NEXT: v_bfi_b32 v1, s1, s3, v1 775; GFX10-NEXT: ; return to shader part epilog 776; 777; GFX8-GISEL-LABEL: s_s_v_bitselect_i64_pat_0: 778; GFX8-GISEL: ; %bb.0: 779; GFX8-GISEL-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3] 780; GFX8-GISEL-NEXT: s_not_b64 s[0:1], s[0:1] 781; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 782; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 783; GFX8-GISEL-NEXT: v_or_b32_e32 v0, s2, v0 784; GFX8-GISEL-NEXT: v_or_b32_e32 v1, s3, v1 785; GFX8-GISEL-NEXT: ; return to shader part epilog 786; 787; GFX10-GISEL-LABEL: s_s_v_bitselect_i64_pat_0: 788; GFX10-GISEL: ; %bb.0: 789; GFX10-GISEL-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3] 790; GFX10-GISEL-NEXT: s_not_b64 s[0:1], s[0:1] 791; GFX10-GISEL-NEXT: v_and_or_b32 v0, s0, v0, s2 792; GFX10-GISEL-NEXT: v_and_or_b32 v1, s1, v1, s3 793; GFX10-GISEL-NEXT: ; return to shader part epilog 794 %and0 = and i64 %a, %b 795 %not.a = xor i64 %a, -1 796 %and1 = and i64 %not.a, %mask 797 %bitselect = or i64 %and0, %and1 798 %cast = bitcast i64 %bitselect to <2 x float> 799 ret <2 x float> %cast 800} 801 802define amdgpu_ps <2 x float> @v_v_s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 inreg %mask) { 803; GFX7-LABEL: v_v_s_bitselect_i64_pat_0: 804; GFX7: ; %bb.0: 805; GFX7-NEXT: v_bfi_b32 v1, v1, v3, s1 806; GFX7-NEXT: v_bfi_b32 v0, v0, v2, s0 807; GFX7-NEXT: ; return to shader part epilog 808; 809; GFX8-LABEL: v_v_s_bitselect_i64_pat_0: 810; GFX8: ; %bb.0: 811; GFX8-NEXT: v_bfi_b32 v1, v1, v3, s1 812; GFX8-NEXT: v_bfi_b32 v0, v0, v2, s0 813; GFX8-NEXT: ; return to shader part epilog 814; 815; GFX10-LABEL: v_v_s_bitselect_i64_pat_0: 816; GFX10: ; %bb.0: 817; GFX10-NEXT: v_bfi_b32 v0, v0, v2, s0 818; GFX10-NEXT: v_bfi_b32 v1, v1, v3, s1 819; GFX10-NEXT: ; return to shader part epilog 820; 821; GFX8-GISEL-LABEL: v_v_s_bitselect_i64_pat_0: 822; GFX8-GISEL: ; %bb.0: 823; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v2, s0 824; GFX8-GISEL-NEXT: v_bfi_b32 v1, v1, v3, s1 825; GFX8-GISEL-NEXT: ; return to shader part epilog 826; 827; GFX10-GISEL-LABEL: v_v_s_bitselect_i64_pat_0: 828; GFX10-GISEL: ; %bb.0: 829; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v2, s0 830; GFX10-GISEL-NEXT: v_bfi_b32 v1, v1, v3, s1 831; GFX10-GISEL-NEXT: ; return to shader part epilog 832 %and0 = and i64 %a, %b 833 %not.a = xor i64 %a, -1 834 %and1 = and i64 %not.a, %mask 835 %bitselect = or i64 %and0, %and1 836 %cast = bitcast i64 %bitselect to <2 x float> 837 ret <2 x float> %cast 838} 839 840define amdgpu_ps <2 x float> @v_s_v_bitselect_i64_pat_0(i64 %a, i64 inreg %b, i64 %mask) { 841; GFX7-LABEL: v_s_v_bitselect_i64_pat_0: 842; GFX7: ; %bb.0: 843; GFX7-NEXT: v_bfi_b32 v1, v1, s1, v3 844; GFX7-NEXT: v_bfi_b32 v0, v0, s0, v2 845; GFX7-NEXT: ; return to shader part epilog 846; 847; GFX8-LABEL: v_s_v_bitselect_i64_pat_0: 848; GFX8: ; %bb.0: 849; GFX8-NEXT: v_bfi_b32 v1, v1, s1, v3 850; GFX8-NEXT: v_bfi_b32 v0, v0, s0, v2 851; GFX8-NEXT: ; return to shader part epilog 852; 853; GFX10-LABEL: v_s_v_bitselect_i64_pat_0: 854; GFX10: ; %bb.0: 855; GFX10-NEXT: v_bfi_b32 v0, v0, s0, v2 856; GFX10-NEXT: v_bfi_b32 v1, v1, s1, v3 857; GFX10-NEXT: ; return to shader part epilog 858; 859; GFX8-GISEL-LABEL: v_s_v_bitselect_i64_pat_0: 860; GFX8-GISEL: ; %bb.0: 861; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, s0, v2 862; GFX8-GISEL-NEXT: v_bfi_b32 v1, v1, s1, v3 863; GFX8-GISEL-NEXT: ; return to shader part epilog 864; 865; GFX10-GISEL-LABEL: v_s_v_bitselect_i64_pat_0: 866; GFX10-GISEL: ; %bb.0: 867; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, s0, v2 868; GFX10-GISEL-NEXT: v_bfi_b32 v1, v1, s1, v3 869; GFX10-GISEL-NEXT: ; return to shader part epilog 870 %and0 = and i64 %a, %b 871 %not.a = xor i64 %a, -1 872 %and1 = and i64 %not.a, %mask 873 %bitselect = or i64 %and0, %and1 874 %cast = bitcast i64 %bitselect to <2 x float> 875 ret <2 x float> %cast 876} 877 878define amdgpu_ps <2 x float> @s_v_v_bitselect_i64_pat_0(i64 inreg %a, i64 %b, i64 %mask) { 879; GFX7-LABEL: s_v_v_bitselect_i64_pat_0: 880; GFX7: ; %bb.0: 881; GFX7-NEXT: v_bfi_b32 v1, s1, v1, v3 882; GFX7-NEXT: v_bfi_b32 v0, s0, v0, v2 883; GFX7-NEXT: ; return to shader part epilog 884; 885; GFX8-LABEL: s_v_v_bitselect_i64_pat_0: 886; GFX8: ; %bb.0: 887; GFX8-NEXT: v_bfi_b32 v1, s1, v1, v3 888; GFX8-NEXT: v_bfi_b32 v0, s0, v0, v2 889; GFX8-NEXT: ; return to shader part epilog 890; 891; GFX10-LABEL: s_v_v_bitselect_i64_pat_0: 892; GFX10: ; %bb.0: 893; GFX10-NEXT: v_bfi_b32 v0, s0, v0, v2 894; GFX10-NEXT: v_bfi_b32 v1, s1, v1, v3 895; GFX10-NEXT: ; return to shader part epilog 896; 897; GFX8-GISEL-LABEL: s_v_v_bitselect_i64_pat_0: 898; GFX8-GISEL: ; %bb.0: 899; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 900; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 901; GFX8-GISEL-NEXT: s_not_b64 s[0:1], s[0:1] 902; GFX8-GISEL-NEXT: v_and_b32_e32 v2, s0, v2 903; GFX8-GISEL-NEXT: v_and_b32_e32 v3, s1, v3 904; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 905; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 906; GFX8-GISEL-NEXT: ; return to shader part epilog 907; 908; GFX10-GISEL-LABEL: s_v_v_bitselect_i64_pat_0: 909; GFX10-GISEL: ; %bb.0: 910; GFX10-GISEL-NEXT: s_not_b64 s[2:3], s[0:1] 911; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s2, v2 912; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s3, v3 913; GFX10-GISEL-NEXT: v_and_or_b32 v0, s0, v0, v2 914; GFX10-GISEL-NEXT: v_and_or_b32 v1, s1, v1, v3 915; GFX10-GISEL-NEXT: ; return to shader part epilog 916 %and0 = and i64 %a, %b 917 %not.a = xor i64 %a, -1 918 %and1 = and i64 %not.a, %mask 919 %bitselect = or i64 %and0, %and1 920 %cast = bitcast i64 %bitselect to <2 x float> 921 ret <2 x float> %cast 922} 923 924define i64 @v_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) { 925; GFX7-LABEL: v_bitselect_i64_pat_1: 926; GFX7: ; %bb.0: 927; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 928; GFX7-NEXT: v_bfi_b32 v1, v3, v1, v5 929; GFX7-NEXT: v_bfi_b32 v0, v2, v0, v4 930; GFX7-NEXT: s_setpc_b64 s[30:31] 931; 932; GFX8-LABEL: v_bitselect_i64_pat_1: 933; GFX8: ; %bb.0: 934; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 935; GFX8-NEXT: v_bfi_b32 v1, v3, v1, v5 936; GFX8-NEXT: v_bfi_b32 v0, v2, v0, v4 937; GFX8-NEXT: s_setpc_b64 s[30:31] 938; 939; GFX10-LABEL: v_bitselect_i64_pat_1: 940; GFX10: ; %bb.0: 941; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 942; GFX10-NEXT: v_bfi_b32 v0, v2, v0, v4 943; GFX10-NEXT: v_bfi_b32 v1, v3, v1, v5 944; GFX10-NEXT: s_setpc_b64 s[30:31] 945; 946; GFX8-GISEL-LABEL: v_bitselect_i64_pat_1: 947; GFX8-GISEL: ; %bb.0: 948; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 949; GFX8-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v4 950; GFX8-GISEL-NEXT: v_bfi_b32 v1, v3, v1, v5 951; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 952; 953; GFX10-GISEL-LABEL: v_bitselect_i64_pat_1: 954; GFX10-GISEL: ; %bb.0: 955; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 956; GFX10-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v4 957; GFX10-GISEL-NEXT: v_bfi_b32 v1, v3, v1, v5 958; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 959 %xor.0 = xor i64 %a, %mask 960 %and = and i64 %xor.0, %b 961 %bitselect = xor i64 %and, %mask 962 ret i64 %bitselect 963} 964 965define amdgpu_ps <2 x float> @v_s_s_bitselect_i64_pat_1(i64 %a, i64 inreg %b, i64 inreg %mask) { 966; GFX7-LABEL: v_s_s_bitselect_i64_pat_1: 967; GFX7: ; %bb.0: 968; GFX7-NEXT: v_mov_b32_e32 v2, s3 969; GFX7-NEXT: v_bfi_b32 v1, s1, v1, v2 970; GFX7-NEXT: v_mov_b32_e32 v2, s2 971; GFX7-NEXT: v_bfi_b32 v0, s0, v0, v2 972; GFX7-NEXT: ; return to shader part epilog 973; 974; GFX8-LABEL: v_s_s_bitselect_i64_pat_1: 975; GFX8: ; %bb.0: 976; GFX8-NEXT: v_mov_b32_e32 v2, s3 977; GFX8-NEXT: v_bfi_b32 v1, s1, v1, v2 978; GFX8-NEXT: v_mov_b32_e32 v2, s2 979; GFX8-NEXT: v_bfi_b32 v0, s0, v0, v2 980; GFX8-NEXT: ; return to shader part epilog 981; 982; GFX10-LABEL: v_s_s_bitselect_i64_pat_1: 983; GFX10: ; %bb.0: 984; GFX10-NEXT: v_bfi_b32 v0, s0, v0, s2 985; GFX10-NEXT: v_bfi_b32 v1, s1, v1, s3 986; GFX10-NEXT: ; return to shader part epilog 987; 988; GFX8-GISEL-LABEL: v_s_s_bitselect_i64_pat_1: 989; GFX8-GISEL: ; %bb.0: 990; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s0 991; GFX8-GISEL-NEXT: v_bfi_b32 v0, v2, v0, s2 992; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s1 993; GFX8-GISEL-NEXT: v_bfi_b32 v1, v2, v1, s3 994; GFX8-GISEL-NEXT: ; return to shader part epilog 995; 996; GFX10-GISEL-LABEL: v_s_s_bitselect_i64_pat_1: 997; GFX10-GISEL: ; %bb.0: 998; GFX10-GISEL-NEXT: v_bfi_b32 v0, s0, v0, s2 999; GFX10-GISEL-NEXT: v_bfi_b32 v1, s1, v1, s3 1000; GFX10-GISEL-NEXT: ; return to shader part epilog 1001 %xor.0 = xor i64 %a, %mask 1002 %and = and i64 %xor.0, %b 1003 %bitselect = xor i64 %and, %mask 1004 %cast = bitcast i64 %bitselect to <2 x float> 1005 ret <2 x float> %cast 1006} 1007 1008define amdgpu_ps <2 x float> @s_s_v_bitselect_i64_pat_1(i64 inreg %a, i64 inreg %b, i64 %mask) { 1009; GFX7-LABEL: s_s_v_bitselect_i64_pat_1: 1010; GFX7: ; %bb.0: 1011; GFX7-NEXT: v_mov_b32_e32 v2, s1 1012; GFX7-NEXT: v_bfi_b32 v1, s3, v2, v1 1013; GFX7-NEXT: v_mov_b32_e32 v2, s0 1014; GFX7-NEXT: v_bfi_b32 v0, s2, v2, v0 1015; GFX7-NEXT: ; return to shader part epilog 1016; 1017; GFX8-LABEL: s_s_v_bitselect_i64_pat_1: 1018; GFX8: ; %bb.0: 1019; GFX8-NEXT: v_mov_b32_e32 v2, s1 1020; GFX8-NEXT: v_bfi_b32 v1, s3, v2, v1 1021; GFX8-NEXT: v_mov_b32_e32 v2, s0 1022; GFX8-NEXT: v_bfi_b32 v0, s2, v2, v0 1023; GFX8-NEXT: ; return to shader part epilog 1024; 1025; GFX10-LABEL: s_s_v_bitselect_i64_pat_1: 1026; GFX10: ; %bb.0: 1027; GFX10-NEXT: v_bfi_b32 v0, s2, s0, v0 1028; GFX10-NEXT: v_bfi_b32 v1, s3, s1, v1 1029; GFX10-NEXT: ; return to shader part epilog 1030; 1031; GFX8-GISEL-LABEL: s_s_v_bitselect_i64_pat_1: 1032; GFX8-GISEL: ; %bb.0: 1033; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s2 1034; GFX8-GISEL-NEXT: v_bfi_b32 v0, v2, s0, v0 1035; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s3 1036; GFX8-GISEL-NEXT: v_bfi_b32 v1, v2, s1, v1 1037; GFX8-GISEL-NEXT: ; return to shader part epilog 1038; 1039; GFX10-GISEL-LABEL: s_s_v_bitselect_i64_pat_1: 1040; GFX10-GISEL: ; %bb.0: 1041; GFX10-GISEL-NEXT: v_bfi_b32 v0, s2, s0, v0 1042; GFX10-GISEL-NEXT: v_bfi_b32 v1, s3, s1, v1 1043; GFX10-GISEL-NEXT: ; return to shader part epilog 1044 %xor.0 = xor i64 %a, %mask 1045 %and = and i64 %xor.0, %b 1046 %bitselect = xor i64 %and, %mask 1047 %cast = bitcast i64 %bitselect to <2 x float> 1048 ret <2 x float> %cast 1049} 1050 1051define amdgpu_ps <2 x float> @s_v_s_bitselect_i64_pat_1(i64 inreg %a, i64 %b, i64 inreg %mask) { 1052; GFX7-LABEL: s_v_s_bitselect_i64_pat_1: 1053; GFX7: ; %bb.0: 1054; GFX7-NEXT: v_mov_b32_e32 v2, s3 1055; GFX7-NEXT: v_bfi_b32 v1, v1, s1, v2 1056; GFX7-NEXT: v_mov_b32_e32 v2, s2 1057; GFX7-NEXT: v_bfi_b32 v0, v0, s0, v2 1058; GFX7-NEXT: ; return to shader part epilog 1059; 1060; GFX8-LABEL: s_v_s_bitselect_i64_pat_1: 1061; GFX8: ; %bb.0: 1062; GFX8-NEXT: v_mov_b32_e32 v2, s3 1063; GFX8-NEXT: v_bfi_b32 v1, v1, s1, v2 1064; GFX8-NEXT: v_mov_b32_e32 v2, s2 1065; GFX8-NEXT: v_bfi_b32 v0, v0, s0, v2 1066; GFX8-NEXT: ; return to shader part epilog 1067; 1068; GFX10-LABEL: s_v_s_bitselect_i64_pat_1: 1069; GFX10: ; %bb.0: 1070; GFX10-NEXT: v_bfi_b32 v0, v0, s0, s2 1071; GFX10-NEXT: v_bfi_b32 v1, v1, s1, s3 1072; GFX10-NEXT: ; return to shader part epilog 1073; 1074; GFX8-GISEL-LABEL: s_v_s_bitselect_i64_pat_1: 1075; GFX8-GISEL: ; %bb.0: 1076; GFX8-GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] 1077; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 1078; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 1079; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 1080; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, s3, v1 1081; GFX8-GISEL-NEXT: ; return to shader part epilog 1082; 1083; GFX10-GISEL-LABEL: s_v_s_bitselect_i64_pat_1: 1084; GFX10-GISEL: ; %bb.0: 1085; GFX10-GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] 1086; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 1087; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 1088; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 1089; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, s3, v1 1090; GFX10-GISEL-NEXT: ; return to shader part epilog 1091 %xor.0 = xor i64 %a, %mask 1092 %and = and i64 %xor.0, %b 1093 %bitselect = xor i64 %and, %mask 1094 %cast = bitcast i64 %bitselect to <2 x float> 1095 ret <2 x float> %cast 1096} 1097 1098define i64 @v_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) { 1099; GFX7-LABEL: v_bitselect_i64_pat_2: 1100; GFX7: ; %bb.0: 1101; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1102; GFX7-NEXT: v_bfi_b32 v1, v3, v1, v5 1103; GFX7-NEXT: v_bfi_b32 v0, v2, v0, v4 1104; GFX7-NEXT: s_setpc_b64 s[30:31] 1105; 1106; GFX8-LABEL: v_bitselect_i64_pat_2: 1107; GFX8: ; %bb.0: 1108; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1109; GFX8-NEXT: v_bfi_b32 v1, v3, v1, v5 1110; GFX8-NEXT: v_bfi_b32 v0, v2, v0, v4 1111; GFX8-NEXT: s_setpc_b64 s[30:31] 1112; 1113; GFX10-LABEL: v_bitselect_i64_pat_2: 1114; GFX10: ; %bb.0: 1115; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1116; GFX10-NEXT: v_bfi_b32 v0, v2, v0, v4 1117; GFX10-NEXT: v_bfi_b32 v1, v3, v1, v5 1118; GFX10-NEXT: s_setpc_b64 s[30:31] 1119; 1120; GFX8-GISEL-LABEL: v_bitselect_i64_pat_2: 1121; GFX8-GISEL: ; %bb.0: 1122; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1123; GFX8-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v4 1124; GFX8-GISEL-NEXT: v_bfi_b32 v1, v3, v1, v5 1125; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 1126; 1127; GFX10-GISEL-LABEL: v_bitselect_i64_pat_2: 1128; GFX10-GISEL: ; %bb.0: 1129; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1130; GFX10-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v4 1131; GFX10-GISEL-NEXT: v_bfi_b32 v1, v3, v1, v5 1132; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 1133 %xor.0 = xor i64 %a, %mask 1134 %and = and i64 %xor.0, %b 1135 %bitselect = xor i64 %and, %mask 1136 ret i64 %bitselect 1137} 1138 1139define i64 @v_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) { 1140; GFX7-LABEL: v_bfi_sha256_ma_i64: 1141; GFX7: ; %bb.0: ; %entry 1142; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1143; GFX7-NEXT: v_xor_b32_e32 v1, v1, v3 1144; GFX7-NEXT: v_xor_b32_e32 v0, v0, v2 1145; GFX7-NEXT: v_bfi_b32 v1, v1, v5, v3 1146; GFX7-NEXT: v_bfi_b32 v0, v0, v4, v2 1147; GFX7-NEXT: s_setpc_b64 s[30:31] 1148; 1149; GFX8-LABEL: v_bfi_sha256_ma_i64: 1150; GFX8: ; %bb.0: ; %entry 1151; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1152; GFX8-NEXT: v_xor_b32_e32 v1, v1, v3 1153; GFX8-NEXT: v_xor_b32_e32 v0, v0, v2 1154; GFX8-NEXT: v_bfi_b32 v1, v1, v5, v3 1155; GFX8-NEXT: v_bfi_b32 v0, v0, v4, v2 1156; GFX8-NEXT: s_setpc_b64 s[30:31] 1157; 1158; GFX10-LABEL: v_bfi_sha256_ma_i64: 1159; GFX10: ; %bb.0: ; %entry 1160; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1161; GFX10-NEXT: v_xor_b32_e32 v0, v0, v2 1162; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3 1163; GFX10-NEXT: v_bfi_b32 v0, v0, v4, v2 1164; GFX10-NEXT: v_bfi_b32 v1, v1, v5, v3 1165; GFX10-NEXT: s_setpc_b64 s[30:31] 1166; 1167; GFX8-GISEL-LABEL: v_bfi_sha256_ma_i64: 1168; GFX8-GISEL: ; %bb.0: ; %entry 1169; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1170; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 1171; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 1172; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v4, v2 1173; GFX8-GISEL-NEXT: v_bfi_b32 v1, v1, v5, v3 1174; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 1175; 1176; GFX10-GISEL-LABEL: v_bfi_sha256_ma_i64: 1177; GFX10-GISEL: ; %bb.0: ; %entry 1178; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1179; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 1180; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 1181; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v4, v2 1182; GFX10-GISEL-NEXT: v_bfi_b32 v1, v1, v5, v3 1183; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 1184entry: 1185 %and0 = and i64 %x, %z 1186 %or0 = or i64 %x, %z 1187 %and1 = and i64 %y, %or0 1188 %or1 = or i64 %and0, %and1 1189 ret i64 %or1 1190} 1191 1192define amdgpu_ps <2 x float> @v_s_s_bfi_sha256_ma_i64(i64 %x, i64 inreg %y, i64 inreg %z) { 1193; GFX7-LABEL: v_s_s_bfi_sha256_ma_i64: 1194; GFX7: ; %bb.0: ; %entry 1195; GFX7-NEXT: v_xor_b32_e32 v1, s1, v1 1196; GFX7-NEXT: v_mov_b32_e32 v2, s1 1197; GFX7-NEXT: v_bfi_b32 v1, v1, s3, v2 1198; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0 1199; GFX7-NEXT: v_mov_b32_e32 v2, s0 1200; GFX7-NEXT: v_bfi_b32 v0, v0, s2, v2 1201; GFX7-NEXT: ; return to shader part epilog 1202; 1203; GFX8-LABEL: v_s_s_bfi_sha256_ma_i64: 1204; GFX8: ; %bb.0: ; %entry 1205; GFX8-NEXT: v_xor_b32_e32 v1, s1, v1 1206; GFX8-NEXT: v_mov_b32_e32 v2, s1 1207; GFX8-NEXT: v_bfi_b32 v1, v1, s3, v2 1208; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 1209; GFX8-NEXT: v_mov_b32_e32 v2, s0 1210; GFX8-NEXT: v_bfi_b32 v0, v0, s2, v2 1211; GFX8-NEXT: ; return to shader part epilog 1212; 1213; GFX10-LABEL: v_s_s_bfi_sha256_ma_i64: 1214; GFX10: ; %bb.0: ; %entry 1215; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0 1216; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1 1217; GFX10-NEXT: v_bfi_b32 v0, v0, s2, s0 1218; GFX10-NEXT: v_bfi_b32 v1, v1, s3, s1 1219; GFX10-NEXT: ; return to shader part epilog 1220; 1221; GFX8-GISEL-LABEL: v_s_s_bfi_sha256_ma_i64: 1222; GFX8-GISEL: ; %bb.0: ; %entry 1223; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s2 1224; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, s0, v0 1225; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v2, s0 1226; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s3 1227; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, s1, v1 1228; GFX8-GISEL-NEXT: v_bfi_b32 v1, v1, v2, s1 1229; GFX8-GISEL-NEXT: ; return to shader part epilog 1230; 1231; GFX10-GISEL-LABEL: v_s_s_bfi_sha256_ma_i64: 1232; GFX10-GISEL: ; %bb.0: ; %entry 1233; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, s0, v0 1234; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, s1, v1 1235; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, s2, s0 1236; GFX10-GISEL-NEXT: v_bfi_b32 v1, v1, s3, s1 1237; GFX10-GISEL-NEXT: ; return to shader part epilog 1238entry: 1239 %and0 = and i64 %x, %z 1240 %or0 = or i64 %x, %z 1241 %and1 = and i64 %y, %or0 1242 %or1 = or i64 %and0, %and1 1243 %cast = bitcast i64 %or1 to <2 x float> 1244 ret <2 x float> %cast 1245} 1246 1247define amdgpu_ps <2 x float> @s_v_s_bfi_sha256_ma_i64(i64 inreg %x, i64 %y, i64 inreg %z) { 1248; GFX7-LABEL: s_v_s_bfi_sha256_ma_i64: 1249; GFX7: ; %bb.0: ; %entry 1250; GFX7-NEXT: v_xor_b32_e32 v2, s1, v1 1251; GFX7-NEXT: v_bfi_b32 v1, v2, s3, v1 1252; GFX7-NEXT: v_xor_b32_e32 v2, s0, v0 1253; GFX7-NEXT: v_bfi_b32 v0, v2, s2, v0 1254; GFX7-NEXT: ; return to shader part epilog 1255; 1256; GFX8-LABEL: s_v_s_bfi_sha256_ma_i64: 1257; GFX8: ; %bb.0: ; %entry 1258; GFX8-NEXT: v_xor_b32_e32 v2, s1, v1 1259; GFX8-NEXT: v_bfi_b32 v1, v2, s3, v1 1260; GFX8-NEXT: v_xor_b32_e32 v2, s0, v0 1261; GFX8-NEXT: v_bfi_b32 v0, v2, s2, v0 1262; GFX8-NEXT: ; return to shader part epilog 1263; 1264; GFX10-LABEL: s_v_s_bfi_sha256_ma_i64: 1265; GFX10: ; %bb.0: ; %entry 1266; GFX10-NEXT: v_xor_b32_e32 v2, s0, v0 1267; GFX10-NEXT: v_xor_b32_e32 v3, s1, v1 1268; GFX10-NEXT: v_bfi_b32 v0, v2, s2, v0 1269; GFX10-NEXT: v_bfi_b32 v1, v3, s3, v1 1270; GFX10-NEXT: ; return to shader part epilog 1271; 1272; GFX8-GISEL-LABEL: s_v_s_bfi_sha256_ma_i64: 1273; GFX8-GISEL: ; %bb.0: ; %entry 1274; GFX8-GISEL-NEXT: s_and_b64 s[4:5], s[0:1], s[2:3] 1275; GFX8-GISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 1276; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 1277; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 1278; GFX8-GISEL-NEXT: v_or_b32_e32 v0, s4, v0 1279; GFX8-GISEL-NEXT: v_or_b32_e32 v1, s5, v1 1280; GFX8-GISEL-NEXT: ; return to shader part epilog 1281; 1282; GFX10-GISEL-LABEL: s_v_s_bfi_sha256_ma_i64: 1283; GFX10-GISEL: ; %bb.0: ; %entry 1284; GFX10-GISEL-NEXT: s_and_b64 s[4:5], s[0:1], s[2:3] 1285; GFX10-GISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 1286; GFX10-GISEL-NEXT: v_and_or_b32 v0, v0, s0, s4 1287; GFX10-GISEL-NEXT: v_and_or_b32 v1, v1, s1, s5 1288; GFX10-GISEL-NEXT: ; return to shader part epilog 1289entry: 1290 %and0 = and i64 %x, %z 1291 %or0 = or i64 %x, %z 1292 %and1 = and i64 %y, %or0 1293 %or1 = or i64 %and0, %and1 1294 %cast = bitcast i64 %or1 to <2 x float> 1295 ret <2 x float> %cast 1296} 1297 1298define amdgpu_ps <2 x float> @s_s_v_bfi_sha256_ma_i64(i64 inreg %x, i64 inreg %y, i64 %z) { 1299; GFX7-LABEL: s_s_v_bfi_sha256_ma_i64: 1300; GFX7: ; %bb.0: ; %entry 1301; GFX7-NEXT: v_mov_b32_e32 v2, s3 1302; GFX7-NEXT: v_xor_b32_e32 v2, s1, v2 1303; GFX7-NEXT: v_bfi_b32 v1, v2, v1, s3 1304; GFX7-NEXT: v_mov_b32_e32 v2, s2 1305; GFX7-NEXT: v_xor_b32_e32 v2, s0, v2 1306; GFX7-NEXT: v_bfi_b32 v0, v2, v0, s2 1307; GFX7-NEXT: ; return to shader part epilog 1308; 1309; GFX8-LABEL: s_s_v_bfi_sha256_ma_i64: 1310; GFX8: ; %bb.0: ; %entry 1311; GFX8-NEXT: v_mov_b32_e32 v2, s3 1312; GFX8-NEXT: v_xor_b32_e32 v2, s1, v2 1313; GFX8-NEXT: v_bfi_b32 v1, v2, v1, s3 1314; GFX8-NEXT: v_mov_b32_e32 v2, s2 1315; GFX8-NEXT: v_xor_b32_e32 v2, s0, v2 1316; GFX8-NEXT: v_bfi_b32 v0, v2, v0, s2 1317; GFX8-NEXT: ; return to shader part epilog 1318; 1319; GFX10-LABEL: s_s_v_bfi_sha256_ma_i64: 1320; GFX10: ; %bb.0: ; %entry 1321; GFX10-NEXT: v_xor_b32_e64 v2, s0, s2 1322; GFX10-NEXT: v_xor_b32_e64 v3, s1, s3 1323; GFX10-NEXT: v_bfi_b32 v0, v2, v0, s2 1324; GFX10-NEXT: v_bfi_b32 v1, v3, v1, s3 1325; GFX10-NEXT: ; return to shader part epilog 1326; 1327; GFX8-GISEL-LABEL: s_s_v_bfi_sha256_ma_i64: 1328; GFX8-GISEL: ; %bb.0: ; %entry 1329; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s0 1330; GFX8-GISEL-NEXT: v_xor_b32_e32 v2, s2, v2 1331; GFX8-GISEL-NEXT: v_bfi_b32 v0, v2, v0, s2 1332; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s1 1333; GFX8-GISEL-NEXT: v_xor_b32_e32 v2, s3, v2 1334; GFX8-GISEL-NEXT: v_bfi_b32 v1, v2, v1, s3 1335; GFX8-GISEL-NEXT: ; return to shader part epilog 1336; 1337; GFX10-GISEL-LABEL: s_s_v_bfi_sha256_ma_i64: 1338; GFX10-GISEL: ; %bb.0: ; %entry 1339; GFX10-GISEL-NEXT: v_xor_b32_e64 v2, s0, s2 1340; GFX10-GISEL-NEXT: v_xor_b32_e64 v3, s1, s3 1341; GFX10-GISEL-NEXT: v_bfi_b32 v0, v2, v0, s2 1342; GFX10-GISEL-NEXT: v_bfi_b32 v1, v3, v1, s3 1343; GFX10-GISEL-NEXT: ; return to shader part epilog 1344entry: 1345 %and0 = and i64 %x, %z 1346 %or0 = or i64 %x, %z 1347 %and1 = and i64 %y, %or0 1348 %or1 = or i64 %and0, %and1 1349 %cast = bitcast i64 %or1 to <2 x float> 1350 ret <2 x float> %cast 1351} 1352 1353define amdgpu_ps <2 x float> @v_s_v_bfi_sha256_ma_i64(i64 %x, i64 inreg %y, i64 %z) { 1354; GFX7-LABEL: v_s_v_bfi_sha256_ma_i64: 1355; GFX7: ; %bb.0: ; %entry 1356; GFX7-NEXT: v_xor_b32_e32 v1, s1, v1 1357; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0 1358; GFX7-NEXT: v_bfi_b32 v1, v1, v3, s1 1359; GFX7-NEXT: v_bfi_b32 v0, v0, v2, s0 1360; GFX7-NEXT: ; return to shader part epilog 1361; 1362; GFX8-LABEL: v_s_v_bfi_sha256_ma_i64: 1363; GFX8: ; %bb.0: ; %entry 1364; GFX8-NEXT: v_xor_b32_e32 v1, s1, v1 1365; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 1366; GFX8-NEXT: v_bfi_b32 v1, v1, v3, s1 1367; GFX8-NEXT: v_bfi_b32 v0, v0, v2, s0 1368; GFX8-NEXT: ; return to shader part epilog 1369; 1370; GFX10-LABEL: v_s_v_bfi_sha256_ma_i64: 1371; GFX10: ; %bb.0: ; %entry 1372; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0 1373; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1 1374; GFX10-NEXT: v_bfi_b32 v0, v0, v2, s0 1375; GFX10-NEXT: v_bfi_b32 v1, v1, v3, s1 1376; GFX10-NEXT: ; return to shader part epilog 1377; 1378; GFX8-GISEL-LABEL: v_s_v_bfi_sha256_ma_i64: 1379; GFX8-GISEL: ; %bb.0: ; %entry 1380; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, s0, v0 1381; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, s1, v1 1382; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v2, s0 1383; GFX8-GISEL-NEXT: v_bfi_b32 v1, v1, v3, s1 1384; GFX8-GISEL-NEXT: ; return to shader part epilog 1385; 1386; GFX10-GISEL-LABEL: v_s_v_bfi_sha256_ma_i64: 1387; GFX10-GISEL: ; %bb.0: ; %entry 1388; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, s0, v0 1389; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, s1, v1 1390; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v2, s0 1391; GFX10-GISEL-NEXT: v_bfi_b32 v1, v1, v3, s1 1392; GFX10-GISEL-NEXT: ; return to shader part epilog 1393entry: 1394 %and0 = and i64 %x, %z 1395 %or0 = or i64 %x, %z 1396 %and1 = and i64 %y, %or0 1397 %or1 = or i64 %and0, %and1 1398 %cast = bitcast i64 %or1 to <2 x float> 1399 ret <2 x float> %cast 1400} 1401 1402define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) { 1403; GFX7-LABEL: s_bitselect_i64_pat_0: 1404; GFX7: ; %bb.0: 1405; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1406; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd 1407; GFX7-NEXT: s_mov_b32 s7, 0xf000 1408; GFX7-NEXT: s_mov_b32 s6, -1 1409; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1410; GFX7-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3] 1411; GFX7-NEXT: s_andn2_b64 s[0:1], s[4:5], s[0:1] 1412; GFX7-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 1413; GFX7-NEXT: s_add_u32 s0, s0, 10 1414; GFX7-NEXT: s_addc_u32 s1, s1, 0 1415; GFX7-NEXT: v_mov_b32_e32 v0, s0 1416; GFX7-NEXT: v_mov_b32_e32 v1, s1 1417; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1418; GFX7-NEXT: s_endpgm 1419; 1420; GFX8-LABEL: s_bitselect_i64_pat_0: 1421; GFX8: ; %bb.0: 1422; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1423; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 1424; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1425; GFX8-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3] 1426; GFX8-NEXT: s_andn2_b64 s[0:1], s[4:5], s[0:1] 1427; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 1428; GFX8-NEXT: s_add_u32 s0, s0, 10 1429; GFX8-NEXT: s_addc_u32 s1, s1, 0 1430; GFX8-NEXT: v_mov_b32_e32 v0, s0 1431; GFX8-NEXT: v_mov_b32_e32 v1, s1 1432; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1433; GFX8-NEXT: s_endpgm 1434; 1435; GFX10-LABEL: s_bitselect_i64_pat_0: 1436; GFX10: ; %bb.0: 1437; GFX10-NEXT: s_clause 0x1 1438; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1439; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 1440; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1441; GFX10-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3] 1442; GFX10-NEXT: s_andn2_b64 s[0:1], s[4:5], s[0:1] 1443; GFX10-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 1444; GFX10-NEXT: s_add_u32 s0, s0, 10 1445; GFX10-NEXT: s_addc_u32 s1, s1, 0 1446; GFX10-NEXT: v_mov_b32_e32 v0, s0 1447; GFX10-NEXT: v_mov_b32_e32 v1, s1 1448; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1449; GFX10-NEXT: s_endpgm 1450; 1451; GFX8-GISEL-LABEL: s_bitselect_i64_pat_0: 1452; GFX8-GISEL: ; %bb.0: 1453; GFX8-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1454; GFX8-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 1455; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1456; GFX8-GISEL-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3] 1457; GFX8-GISEL-NEXT: s_andn2_b64 s[0:1], s[4:5], s[0:1] 1458; GFX8-GISEL-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 1459; GFX8-GISEL-NEXT: s_add_u32 s0, s0, 10 1460; GFX8-GISEL-NEXT: s_addc_u32 s1, s1, 0 1461; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0 1462; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1 1463; GFX8-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1464; GFX8-GISEL-NEXT: s_endpgm 1465; 1466; GFX10-GISEL-LABEL: s_bitselect_i64_pat_0: 1467; GFX10-GISEL: ; %bb.0: 1468; GFX10-GISEL-NEXT: s_clause 0x1 1469; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1470; GFX10-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 1471; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1472; GFX10-GISEL-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3] 1473; GFX10-GISEL-NEXT: s_andn2_b64 s[0:1], s[4:5], s[0:1] 1474; GFX10-GISEL-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 1475; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 10 1476; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 0 1477; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 1478; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 1479; GFX10-GISEL-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1480; GFX10-GISEL-NEXT: s_endpgm 1481 %and0 = and i64 %a, %b 1482 %not.a = xor i64 %a, -1 1483 %and1 = and i64 %not.a, %mask 1484 %bitselect = or i64 %and0, %and1 1485 %scalar.use = add i64 %bitselect, 10 1486 store i64 %scalar.use, ptr addrspace(1) undef 1487 ret void 1488} 1489 1490define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) { 1491; GFX7-LABEL: s_bitselect_i64_pat_1: 1492; GFX7: ; %bb.0: 1493; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1494; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd 1495; GFX7-NEXT: s_mov_b32 s7, 0xf000 1496; GFX7-NEXT: s_mov_b32 s6, -1 1497; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1498; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 1499; GFX7-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 1500; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 1501; GFX7-NEXT: s_add_u32 s0, s0, 10 1502; GFX7-NEXT: s_addc_u32 s1, s1, 0 1503; GFX7-NEXT: v_mov_b32_e32 v0, s0 1504; GFX7-NEXT: v_mov_b32_e32 v1, s1 1505; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1506; GFX7-NEXT: s_endpgm 1507; 1508; GFX8-LABEL: s_bitselect_i64_pat_1: 1509; GFX8: ; %bb.0: 1510; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1511; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 1512; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1513; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 1514; GFX8-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 1515; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 1516; GFX8-NEXT: s_add_u32 s0, s0, 10 1517; GFX8-NEXT: s_addc_u32 s1, s1, 0 1518; GFX8-NEXT: v_mov_b32_e32 v0, s0 1519; GFX8-NEXT: v_mov_b32_e32 v1, s1 1520; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1521; GFX8-NEXT: s_endpgm 1522; 1523; GFX10-LABEL: s_bitselect_i64_pat_1: 1524; GFX10: ; %bb.0: 1525; GFX10-NEXT: s_clause 0x1 1526; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1527; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 1528; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1529; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 1530; GFX10-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 1531; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 1532; GFX10-NEXT: s_add_u32 s0, s0, 10 1533; GFX10-NEXT: s_addc_u32 s1, s1, 0 1534; GFX10-NEXT: v_mov_b32_e32 v0, s0 1535; GFX10-NEXT: v_mov_b32_e32 v1, s1 1536; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1537; GFX10-NEXT: s_endpgm 1538; 1539; GFX8-GISEL-LABEL: s_bitselect_i64_pat_1: 1540; GFX8-GISEL: ; %bb.0: 1541; GFX8-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1542; GFX8-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 1543; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1544; GFX8-GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 1545; GFX8-GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 1546; GFX8-GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 1547; GFX8-GISEL-NEXT: s_add_u32 s0, s0, 10 1548; GFX8-GISEL-NEXT: s_addc_u32 s1, s1, 0 1549; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0 1550; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1 1551; GFX8-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1552; GFX8-GISEL-NEXT: s_endpgm 1553; 1554; GFX10-GISEL-LABEL: s_bitselect_i64_pat_1: 1555; GFX10-GISEL: ; %bb.0: 1556; GFX10-GISEL-NEXT: s_clause 0x1 1557; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1558; GFX10-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 1559; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1560; GFX10-GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 1561; GFX10-GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 1562; GFX10-GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 1563; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 10 1564; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 0 1565; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 1566; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 1567; GFX10-GISEL-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1568; GFX10-GISEL-NEXT: s_endpgm 1569 %xor.0 = xor i64 %a, %mask 1570 %and = and i64 %xor.0, %b 1571 %bitselect = xor i64 %and, %mask 1572 1573 %scalar.use = add i64 %bitselect, 10 1574 store i64 %scalar.use, ptr addrspace(1) undef 1575 ret void 1576} 1577 1578define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) { 1579; GFX7-LABEL: s_bitselect_i64_pat_2: 1580; GFX7: ; %bb.0: 1581; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1582; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd 1583; GFX7-NEXT: s_mov_b32 s7, 0xf000 1584; GFX7-NEXT: s_mov_b32 s6, -1 1585; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1586; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 1587; GFX7-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 1588; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 1589; GFX7-NEXT: s_add_u32 s0, s0, 10 1590; GFX7-NEXT: s_addc_u32 s1, s1, 0 1591; GFX7-NEXT: v_mov_b32_e32 v0, s0 1592; GFX7-NEXT: v_mov_b32_e32 v1, s1 1593; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1594; GFX7-NEXT: s_endpgm 1595; 1596; GFX8-LABEL: s_bitselect_i64_pat_2: 1597; GFX8: ; %bb.0: 1598; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1599; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 1600; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1601; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 1602; GFX8-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 1603; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 1604; GFX8-NEXT: s_add_u32 s0, s0, 10 1605; GFX8-NEXT: s_addc_u32 s1, s1, 0 1606; GFX8-NEXT: v_mov_b32_e32 v0, s0 1607; GFX8-NEXT: v_mov_b32_e32 v1, s1 1608; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1609; GFX8-NEXT: s_endpgm 1610; 1611; GFX10-LABEL: s_bitselect_i64_pat_2: 1612; GFX10: ; %bb.0: 1613; GFX10-NEXT: s_clause 0x1 1614; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1615; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 1616; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1617; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 1618; GFX10-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 1619; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 1620; GFX10-NEXT: s_add_u32 s0, s0, 10 1621; GFX10-NEXT: s_addc_u32 s1, s1, 0 1622; GFX10-NEXT: v_mov_b32_e32 v0, s0 1623; GFX10-NEXT: v_mov_b32_e32 v1, s1 1624; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1625; GFX10-NEXT: s_endpgm 1626; 1627; GFX8-GISEL-LABEL: s_bitselect_i64_pat_2: 1628; GFX8-GISEL: ; %bb.0: 1629; GFX8-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1630; GFX8-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 1631; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1632; GFX8-GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 1633; GFX8-GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 1634; GFX8-GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 1635; GFX8-GISEL-NEXT: s_add_u32 s0, s0, 10 1636; GFX8-GISEL-NEXT: s_addc_u32 s1, s1, 0 1637; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0 1638; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1 1639; GFX8-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1640; GFX8-GISEL-NEXT: s_endpgm 1641; 1642; GFX10-GISEL-LABEL: s_bitselect_i64_pat_2: 1643; GFX10-GISEL: ; %bb.0: 1644; GFX10-GISEL-NEXT: s_clause 0x1 1645; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1646; GFX10-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 1647; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1648; GFX10-GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 1649; GFX10-GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 1650; GFX10-GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 1651; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 10 1652; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 0 1653; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 1654; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 1655; GFX10-GISEL-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1656; GFX10-GISEL-NEXT: s_endpgm 1657 %xor.0 = xor i64 %a, %mask 1658 %and = and i64 %xor.0, %b 1659 %bitselect = xor i64 %and, %mask 1660 1661 %scalar.use = add i64 %bitselect, 10 1662 store i64 %scalar.use, ptr addrspace(1) undef 1663 ret void 1664} 1665 1666define amdgpu_kernel void @s_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) { 1667; GFX7-LABEL: s_bfi_sha256_ma_i64: 1668; GFX7: ; %bb.0: ; %entry 1669; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1670; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd 1671; GFX7-NEXT: s_mov_b32 s7, 0xf000 1672; GFX7-NEXT: s_mov_b32 s6, -1 1673; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1674; GFX7-NEXT: s_and_b64 s[8:9], s[0:1], s[4:5] 1675; GFX7-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 1676; GFX7-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] 1677; GFX7-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 1678; GFX7-NEXT: s_add_u32 s0, s0, 10 1679; GFX7-NEXT: s_addc_u32 s1, s1, 0 1680; GFX7-NEXT: v_mov_b32_e32 v0, s0 1681; GFX7-NEXT: v_mov_b32_e32 v1, s1 1682; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1683; GFX7-NEXT: s_endpgm 1684; 1685; GFX8-LABEL: s_bfi_sha256_ma_i64: 1686; GFX8: ; %bb.0: ; %entry 1687; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1688; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 1689; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1690; GFX8-NEXT: s_and_b64 s[6:7], s[0:1], s[4:5] 1691; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 1692; GFX8-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] 1693; GFX8-NEXT: s_or_b64 s[0:1], s[6:7], s[0:1] 1694; GFX8-NEXT: s_add_u32 s0, s0, 10 1695; GFX8-NEXT: s_addc_u32 s1, s1, 0 1696; GFX8-NEXT: v_mov_b32_e32 v0, s0 1697; GFX8-NEXT: v_mov_b32_e32 v1, s1 1698; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1699; GFX8-NEXT: s_endpgm 1700; 1701; GFX10-LABEL: s_bfi_sha256_ma_i64: 1702; GFX10: ; %bb.0: ; %entry 1703; GFX10-NEXT: s_clause 0x1 1704; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1705; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 1706; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1707; GFX10-NEXT: s_or_b64 s[6:7], s[0:1], s[4:5] 1708; GFX10-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5] 1709; GFX10-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7] 1710; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 1711; GFX10-NEXT: s_add_u32 s0, s0, 10 1712; GFX10-NEXT: s_addc_u32 s1, s1, 0 1713; GFX10-NEXT: v_mov_b32_e32 v0, s0 1714; GFX10-NEXT: v_mov_b32_e32 v1, s1 1715; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1716; GFX10-NEXT: s_endpgm 1717; 1718; GFX8-GISEL-LABEL: s_bfi_sha256_ma_i64: 1719; GFX8-GISEL: ; %bb.0: ; %entry 1720; GFX8-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1721; GFX8-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 1722; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1723; GFX8-GISEL-NEXT: s_and_b64 s[6:7], s[0:1], s[4:5] 1724; GFX8-GISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 1725; GFX8-GISEL-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] 1726; GFX8-GISEL-NEXT: s_or_b64 s[0:1], s[6:7], s[0:1] 1727; GFX8-GISEL-NEXT: s_add_u32 s0, s0, 10 1728; GFX8-GISEL-NEXT: s_addc_u32 s1, s1, 0 1729; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0 1730; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1 1731; GFX8-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1732; GFX8-GISEL-NEXT: s_endpgm 1733; 1734; GFX10-GISEL-LABEL: s_bfi_sha256_ma_i64: 1735; GFX10-GISEL: ; %bb.0: ; %entry 1736; GFX10-GISEL-NEXT: s_clause 0x1 1737; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1738; GFX10-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 1739; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1740; GFX10-GISEL-NEXT: s_or_b64 s[6:7], s[0:1], s[4:5] 1741; GFX10-GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5] 1742; GFX10-GISEL-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7] 1743; GFX10-GISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 1744; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 10 1745; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 0 1746; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 1747; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 1748; GFX10-GISEL-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1749; GFX10-GISEL-NEXT: s_endpgm 1750entry: 1751 %and0 = and i64 %x, %z 1752 %or0 = or i64 %x, %z 1753 %and1 = and i64 %y, %or0 1754 %or1 = or i64 %and0, %and1 1755 1756 %scalar.use = add i64 %or1, 10 1757 store i64 %scalar.use, ptr addrspace(1) undef 1758 ret void 1759} 1760