1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX7 %s 3; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s 4; RUN: llc -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s 5; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8-GISEL %s 6; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10-GISEL %s 7 8; BFI_INT Definition pattern from ISA docs 9; (y & x) | (z & ~x) 10; 11define amdgpu_kernel void @s_bfi_def_i32(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { 12; GFX7-LABEL: s_bfi_def_i32: 13; GFX7: ; %bb.0: ; %entry 14; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 15; GFX7-NEXT: s_load_dword s8, s[0:1], 0xd 16; GFX7-NEXT: s_mov_b32 s3, 0xf000 17; GFX7-NEXT: s_mov_b32 s2, -1 18; GFX7-NEXT: s_waitcnt lgkmcnt(0) 19; GFX7-NEXT: s_mov_b32 s0, s4 20; GFX7-NEXT: s_mov_b32 s1, s5 21; GFX7-NEXT: s_andn2_b32 s4, s8, s6 22; GFX7-NEXT: s_and_b32 s5, s7, s6 23; GFX7-NEXT: s_or_b32 s4, s4, s5 24; GFX7-NEXT: v_mov_b32_e32 v0, s4 25; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 26; GFX7-NEXT: s_endpgm 27; 28; GFX8-LABEL: s_bfi_def_i32: 29; GFX8: ; %bb.0: ; %entry 30; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 31; GFX8-NEXT: s_load_dword s0, s[0:1], 0x34 32; GFX8-NEXT: s_waitcnt lgkmcnt(0) 33; GFX8-NEXT: s_and_b32 s1, s7, s6 34; GFX8-NEXT: s_andn2_b32 s0, s0, s6 35; GFX8-NEXT: s_or_b32 s0, s0, s1 36; GFX8-NEXT: v_mov_b32_e32 v0, s4 37; GFX8-NEXT: v_mov_b32_e32 v1, s5 38; GFX8-NEXT: v_mov_b32_e32 v2, s0 39; GFX8-NEXT: flat_store_dword v[0:1], v2 40; GFX8-NEXT: s_endpgm 41; 42; GFX10-LABEL: s_bfi_def_i32: 43; GFX10: ; %bb.0: ; %entry 44; GFX10-NEXT: s_clause 0x1 45; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 46; GFX10-NEXT: s_load_dword s0, s[0:1], 0x34 47; GFX10-NEXT: v_mov_b32_e32 v0, 0 48; GFX10-NEXT: s_waitcnt lgkmcnt(0) 49; GFX10-NEXT: s_and_b32 s1, s7, s6 50; GFX10-NEXT: s_andn2_b32 s0, s0, s6 51; GFX10-NEXT: s_or_b32 s0, s0, s1 52; GFX10-NEXT: v_mov_b32_e32 v1, s0 53; GFX10-NEXT: global_store_dword v0, v1, s[4:5] 54; GFX10-NEXT: s_endpgm 55; 56; GFX8-GISEL-LABEL: s_bfi_def_i32: 57; GFX8-GISEL: ; %bb.0: ; %entry 58; GFX8-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 59; GFX8-GISEL-NEXT: s_load_dword s0, s[0:1], 0x34 60; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 61; GFX8-GISEL-NEXT: s_and_b32 s1, s7, s6 62; GFX8-GISEL-NEXT: s_andn2_b32 s0, s0, s6 63; GFX8-GISEL-NEXT: s_or_b32 s0, s0, s1 64; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s4 65; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s0 66; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s5 67; GFX8-GISEL-NEXT: flat_store_dword v[0:1], v2 68; GFX8-GISEL-NEXT: s_endpgm 69; 70; GFX10-GISEL-LABEL: s_bfi_def_i32: 71; GFX10-GISEL: ; %bb.0: ; %entry 72; GFX10-GISEL-NEXT: s_clause 0x1 73; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 74; GFX10-GISEL-NEXT: s_load_dword s0, s[0:1], 0x34 75; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 76; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 77; GFX10-GISEL-NEXT: s_and_b32 s1, s7, s6 78; GFX10-GISEL-NEXT: s_andn2_b32 s0, s0, s6 79; GFX10-GISEL-NEXT: s_or_b32 s0, s0, s1 80; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 81; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[4:5] 82; GFX10-GISEL-NEXT: s_endpgm 83entry: 84 %0 = xor i32 %x, -1 85 %1 = and i32 %z, %0 86 %2 = and i32 %y, %x 87 %3 = or i32 %1, %2 88 store i32 %3, i32 addrspace(1)* %out 89 ret void 90} 91 92define i32 @v_bfi_def_i32(i32 %x, i32 %y, i32 %z) { 93; GFX7-LABEL: v_bfi_def_i32: 94; GFX7: ; %bb.0: ; %entry 95; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 96; GFX7-NEXT: v_bfi_b32 v0, v0, v1, v2 97; GFX7-NEXT: s_setpc_b64 s[30:31] 98; 99; GFX8-LABEL: v_bfi_def_i32: 100; GFX8: ; %bb.0: ; %entry 101; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 102; GFX8-NEXT: v_bfi_b32 v0, v0, v1, v2 103; GFX8-NEXT: s_setpc_b64 s[30:31] 104; 105; GFX10-LABEL: v_bfi_def_i32: 106; GFX10: ; %bb.0: ; %entry 107; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 108; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 109; GFX10-NEXT: v_bfi_b32 v0, v0, v1, v2 110; GFX10-NEXT: s_setpc_b64 s[30:31] 111; 112; GFX8-GISEL-LABEL: v_bfi_def_i32: 113; GFX8-GISEL: ; %bb.0: ; %entry 114; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 115; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v1, v2 116; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 117; 118; GFX10-GISEL-LABEL: v_bfi_def_i32: 119; GFX10-GISEL: ; %bb.0: ; %entry 120; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 121; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 122; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v1, v2 123; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 124entry: 125 %0 = xor i32 %x, -1 126 %1 = and i32 %z, %0 127 %2 = and i32 %y, %x 128 %3 = or i32 %1, %2 129 ret i32 %3 130} 131 132; SHA-256 Ch function 133; z ^ (x & (y ^ z)) 134define amdgpu_kernel void @s_bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { 135; GFX7-LABEL: s_bfi_sha256_ch: 136; GFX7: ; %bb.0: ; %entry 137; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 138; GFX7-NEXT: s_load_dword s8, s[0:1], 0xd 139; GFX7-NEXT: s_mov_b32 s3, 0xf000 140; GFX7-NEXT: s_mov_b32 s2, -1 141; GFX7-NEXT: s_waitcnt lgkmcnt(0) 142; GFX7-NEXT: s_mov_b32 s0, s4 143; GFX7-NEXT: s_xor_b32 s4, s7, s8 144; GFX7-NEXT: s_and_b32 s4, s6, s4 145; GFX7-NEXT: s_xor_b32 s4, s8, s4 146; GFX7-NEXT: s_mov_b32 s1, s5 147; GFX7-NEXT: v_mov_b32_e32 v0, s4 148; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 149; GFX7-NEXT: s_endpgm 150; 151; GFX8-LABEL: s_bfi_sha256_ch: 152; GFX8: ; %bb.0: ; %entry 153; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 154; GFX8-NEXT: s_load_dword s0, s[0:1], 0x34 155; GFX8-NEXT: s_waitcnt lgkmcnt(0) 156; GFX8-NEXT: v_mov_b32_e32 v0, s4 157; GFX8-NEXT: s_xor_b32 s1, s7, s0 158; GFX8-NEXT: s_and_b32 s1, s6, s1 159; GFX8-NEXT: s_xor_b32 s0, s0, s1 160; GFX8-NEXT: v_mov_b32_e32 v1, s5 161; GFX8-NEXT: v_mov_b32_e32 v2, s0 162; GFX8-NEXT: flat_store_dword v[0:1], v2 163; GFX8-NEXT: s_endpgm 164; 165; GFX10-LABEL: s_bfi_sha256_ch: 166; GFX10: ; %bb.0: ; %entry 167; GFX10-NEXT: s_clause 0x1 168; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 169; GFX10-NEXT: s_load_dword s0, s[0:1], 0x34 170; GFX10-NEXT: v_mov_b32_e32 v0, 0 171; GFX10-NEXT: s_waitcnt lgkmcnt(0) 172; GFX10-NEXT: s_xor_b32 s1, s7, s0 173; GFX10-NEXT: s_and_b32 s1, s6, s1 174; GFX10-NEXT: s_xor_b32 s0, s0, s1 175; GFX10-NEXT: v_mov_b32_e32 v1, s0 176; GFX10-NEXT: global_store_dword v0, v1, s[4:5] 177; GFX10-NEXT: s_endpgm 178; 179; GFX8-GISEL-LABEL: s_bfi_sha256_ch: 180; GFX8-GISEL: ; %bb.0: ; %entry 181; GFX8-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 182; GFX8-GISEL-NEXT: s_load_dword s0, s[0:1], 0x34 183; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 184; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s4 185; GFX8-GISEL-NEXT: s_xor_b32 s1, s7, s0 186; GFX8-GISEL-NEXT: s_and_b32 s1, s6, s1 187; GFX8-GISEL-NEXT: s_xor_b32 s0, s0, s1 188; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s0 189; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s5 190; GFX8-GISEL-NEXT: flat_store_dword v[0:1], v2 191; GFX8-GISEL-NEXT: s_endpgm 192; 193; GFX10-GISEL-LABEL: s_bfi_sha256_ch: 194; GFX10-GISEL: ; %bb.0: ; %entry 195; GFX10-GISEL-NEXT: s_clause 0x1 196; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 197; GFX10-GISEL-NEXT: s_load_dword s0, s[0:1], 0x34 198; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 199; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 200; GFX10-GISEL-NEXT: s_xor_b32 s1, s7, s0 201; GFX10-GISEL-NEXT: s_and_b32 s1, s6, s1 202; GFX10-GISEL-NEXT: s_xor_b32 s0, s0, s1 203; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 204; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[4:5] 205; GFX10-GISEL-NEXT: s_endpgm 206entry: 207 %0 = xor i32 %y, %z 208 %1 = and i32 %x, %0 209 %2 = xor i32 %z, %1 210 store i32 %2, i32 addrspace(1)* %out 211 ret void 212} 213 214define i32 @v_bfi_sha256_ch(i32 %x, i32 %y, i32 %z) { 215; GFX7-LABEL: v_bfi_sha256_ch: 216; GFX7: ; %bb.0: ; %entry 217; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 218; GFX7-NEXT: v_bfi_b32 v0, v0, v1, v2 219; GFX7-NEXT: s_setpc_b64 s[30:31] 220; 221; GFX8-LABEL: v_bfi_sha256_ch: 222; GFX8: ; %bb.0: ; %entry 223; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 224; GFX8-NEXT: v_bfi_b32 v0, v0, v1, v2 225; GFX8-NEXT: s_setpc_b64 s[30:31] 226; 227; GFX10-LABEL: v_bfi_sha256_ch: 228; GFX10: ; %bb.0: ; %entry 229; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 230; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 231; GFX10-NEXT: v_bfi_b32 v0, v0, v1, v2 232; GFX10-NEXT: s_setpc_b64 s[30:31] 233; 234; GFX8-GISEL-LABEL: v_bfi_sha256_ch: 235; GFX8-GISEL: ; %bb.0: ; %entry 236; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 237; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v1, v2 238; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 239; 240; GFX10-GISEL-LABEL: v_bfi_sha256_ch: 241; GFX10-GISEL: ; %bb.0: ; %entry 242; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 243; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 244; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v1, v2 245; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 246entry: 247 %0 = xor i32 %y, %z 248 %1 = and i32 %x, %0 249 %2 = xor i32 %z, %1 250 ret i32 %2 251} 252 253define amdgpu_ps float @v_s_s_bfi_sha256_ch(i32 %x, i32 inreg %y, i32 inreg %z) { 254; GFX7-LABEL: v_s_s_bfi_sha256_ch: 255; GFX7: ; %bb.0: ; %entry 256; GFX7-NEXT: v_mov_b32_e32 v1, s1 257; GFX7-NEXT: v_bfi_b32 v0, v0, s0, v1 258; GFX7-NEXT: ; return to shader part epilog 259; 260; GFX8-LABEL: v_s_s_bfi_sha256_ch: 261; GFX8: ; %bb.0: ; %entry 262; GFX8-NEXT: v_mov_b32_e32 v1, s1 263; GFX8-NEXT: v_bfi_b32 v0, v0, s0, v1 264; GFX8-NEXT: ; return to shader part epilog 265; 266; GFX10-LABEL: v_s_s_bfi_sha256_ch: 267; GFX10: ; %bb.0: ; %entry 268; GFX10-NEXT: v_bfi_b32 v0, v0, s0, s1 269; GFX10-NEXT: ; return to shader part epilog 270; 271; GFX8-GISEL-LABEL: v_s_s_bfi_sha256_ch: 272; GFX8-GISEL: ; %bb.0: ; %entry 273; GFX8-GISEL-NEXT: s_xor_b32 s0, s0, s1 274; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 275; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, s1, v0 276; GFX8-GISEL-NEXT: ; return to shader part epilog 277; 278; GFX10-GISEL-LABEL: v_s_s_bfi_sha256_ch: 279; GFX10-GISEL: ; %bb.0: ; %entry 280; GFX10-GISEL-NEXT: s_xor_b32 s0, s0, s1 281; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 282; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, s1, v0 283; GFX10-GISEL-NEXT: ; return to shader part epilog 284entry: 285 %xor0 = xor i32 %y, %z 286 %and = and i32 %x, %xor0 287 %xor1 = xor i32 %z, %and 288 %cast = bitcast i32 %xor1 to float 289 ret float %cast 290} 291 292define amdgpu_ps float @s_v_s_bfi_sha256_ch(i32 inreg %x, i32 %y, i32 inreg %z) { 293; GFX7-LABEL: s_v_s_bfi_sha256_ch: 294; GFX7: ; %bb.0: ; %entry 295; GFX7-NEXT: v_mov_b32_e32 v1, s1 296; GFX7-NEXT: v_bfi_b32 v0, s0, v0, v1 297; GFX7-NEXT: ; return to shader part epilog 298; 299; GFX8-LABEL: s_v_s_bfi_sha256_ch: 300; GFX8: ; %bb.0: ; %entry 301; GFX8-NEXT: v_mov_b32_e32 v1, s1 302; GFX8-NEXT: v_bfi_b32 v0, s0, v0, v1 303; GFX8-NEXT: ; return to shader part epilog 304; 305; GFX10-LABEL: s_v_s_bfi_sha256_ch: 306; GFX10: ; %bb.0: ; %entry 307; GFX10-NEXT: v_bfi_b32 v0, s0, v0, s1 308; GFX10-NEXT: ; return to shader part epilog 309; 310; GFX8-GISEL-LABEL: s_v_s_bfi_sha256_ch: 311; GFX8-GISEL: ; %bb.0: ; %entry 312; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, s1, v0 313; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 314; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, s1, v0 315; GFX8-GISEL-NEXT: ; return to shader part epilog 316; 317; GFX10-GISEL-LABEL: s_v_s_bfi_sha256_ch: 318; GFX10-GISEL: ; %bb.0: ; %entry 319; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, s1, v0 320; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 321; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, s1, v0 322; GFX10-GISEL-NEXT: ; return to shader part epilog 323entry: 324 %xor0 = xor i32 %y, %z 325 %and = and i32 %x, %xor0 326 %xor1 = xor i32 %z, %and 327 %cast = bitcast i32 %xor1 to float 328 ret float %cast 329} 330 331define amdgpu_ps float @s_s_v_bfi_sha256_ch(i32 inreg %x, i32 inreg %y, i32 %z) { 332; GFX7-LABEL: s_s_v_bfi_sha256_ch: 333; GFX7: ; %bb.0: ; %entry 334; GFX7-NEXT: v_mov_b32_e32 v1, s1 335; GFX7-NEXT: v_bfi_b32 v0, s0, v1, v0 336; GFX7-NEXT: ; return to shader part epilog 337; 338; GFX8-LABEL: s_s_v_bfi_sha256_ch: 339; GFX8: ; %bb.0: ; %entry 340; GFX8-NEXT: v_mov_b32_e32 v1, s1 341; GFX8-NEXT: v_bfi_b32 v0, s0, v1, v0 342; GFX8-NEXT: ; return to shader part epilog 343; 344; GFX10-LABEL: s_s_v_bfi_sha256_ch: 345; GFX10: ; %bb.0: ; %entry 346; GFX10-NEXT: v_bfi_b32 v0, s0, s1, v0 347; GFX10-NEXT: ; return to shader part epilog 348; 349; GFX8-GISEL-LABEL: s_s_v_bfi_sha256_ch: 350; GFX8-GISEL: ; %bb.0: ; %entry 351; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s0 352; GFX8-GISEL-NEXT: v_bfi_b32 v0, v1, s1, v0 353; GFX8-GISEL-NEXT: ; return to shader part epilog 354; 355; GFX10-GISEL-LABEL: s_s_v_bfi_sha256_ch: 356; GFX10-GISEL: ; %bb.0: ; %entry 357; GFX10-GISEL-NEXT: v_bfi_b32 v0, s0, s1, v0 358; GFX10-GISEL-NEXT: ; return to shader part epilog 359entry: 360 %xor0 = xor i32 %y, %z 361 %and = and i32 %x, %xor0 362 %xor1 = xor i32 %z, %and 363 %cast = bitcast i32 %xor1 to float 364 ret float %cast 365} 366 367define amdgpu_ps float @s_v_v_bfi_sha256_ch(i32 inreg %x, i32 %y, i32 %z) { 368; GFX7-LABEL: s_v_v_bfi_sha256_ch: 369; GFX7: ; %bb.0: ; %entry 370; GFX7-NEXT: v_bfi_b32 v0, s0, v0, v1 371; GFX7-NEXT: ; return to shader part epilog 372; 373; GFX8-LABEL: s_v_v_bfi_sha256_ch: 374; GFX8: ; %bb.0: ; %entry 375; GFX8-NEXT: v_bfi_b32 v0, s0, v0, v1 376; GFX8-NEXT: ; return to shader part epilog 377; 378; GFX10-LABEL: s_v_v_bfi_sha256_ch: 379; GFX10: ; %bb.0: ; %entry 380; GFX10-NEXT: v_bfi_b32 v0, s0, v0, v1 381; GFX10-NEXT: ; return to shader part epilog 382; 383; GFX8-GISEL-LABEL: s_v_v_bfi_sha256_ch: 384; GFX8-GISEL: ; %bb.0: ; %entry 385; GFX8-GISEL-NEXT: v_bfi_b32 v0, s0, v0, v1 386; GFX8-GISEL-NEXT: ; return to shader part epilog 387; 388; GFX10-GISEL-LABEL: s_v_v_bfi_sha256_ch: 389; GFX10-GISEL: ; %bb.0: ; %entry 390; GFX10-GISEL-NEXT: v_bfi_b32 v0, s0, v0, v1 391; GFX10-GISEL-NEXT: ; return to shader part epilog 392entry: 393 %xor0 = xor i32 %y, %z 394 %and = and i32 %x, %xor0 395 %xor1 = xor i32 %z, %and 396 %cast = bitcast i32 %xor1 to float 397 ret float %cast 398} 399 400define amdgpu_ps float @v_s_v_bfi_sha256_ch(i32 %x, i32 inreg %y, i32 %z) { 401; GFX7-LABEL: v_s_v_bfi_sha256_ch: 402; GFX7: ; %bb.0: ; %entry 403; GFX7-NEXT: v_bfi_b32 v0, v0, s0, v1 404; GFX7-NEXT: ; return to shader part epilog 405; 406; GFX8-LABEL: v_s_v_bfi_sha256_ch: 407; GFX8: ; %bb.0: ; %entry 408; GFX8-NEXT: v_bfi_b32 v0, v0, s0, v1 409; GFX8-NEXT: ; return to shader part epilog 410; 411; GFX10-LABEL: v_s_v_bfi_sha256_ch: 412; GFX10: ; %bb.0: ; %entry 413; GFX10-NEXT: v_bfi_b32 v0, v0, s0, v1 414; GFX10-NEXT: ; return to shader part epilog 415; 416; GFX8-GISEL-LABEL: v_s_v_bfi_sha256_ch: 417; GFX8-GISEL: ; %bb.0: ; %entry 418; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, s0, v1 419; GFX8-GISEL-NEXT: ; return to shader part epilog 420; 421; GFX10-GISEL-LABEL: v_s_v_bfi_sha256_ch: 422; GFX10-GISEL: ; %bb.0: ; %entry 423; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, s0, v1 424; GFX10-GISEL-NEXT: ; return to shader part epilog 425entry: 426 %xor0 = xor i32 %y, %z 427 %and = and i32 %x, %xor0 428 %xor1 = xor i32 %z, %and 429 %cast = bitcast i32 %xor1 to float 430 ret float %cast 431} 432 433define amdgpu_ps float @v_v_s_bfi_sha256_ch(i32 %x, i32 %y, i32 inreg %z) { 434; GFX7-LABEL: v_v_s_bfi_sha256_ch: 435; GFX7: ; %bb.0: ; %entry 436; GFX7-NEXT: v_bfi_b32 v0, v0, v1, s0 437; GFX7-NEXT: ; return to shader part epilog 438; 439; GFX8-LABEL: v_v_s_bfi_sha256_ch: 440; GFX8: ; %bb.0: ; %entry 441; GFX8-NEXT: v_bfi_b32 v0, v0, v1, s0 442; GFX8-NEXT: ; return to shader part epilog 443; 444; GFX10-LABEL: v_v_s_bfi_sha256_ch: 445; GFX10: ; %bb.0: ; %entry 446; GFX10-NEXT: v_bfi_b32 v0, v0, v1, s0 447; GFX10-NEXT: ; return to shader part epilog 448; 449; GFX8-GISEL-LABEL: v_v_s_bfi_sha256_ch: 450; GFX8-GISEL: ; %bb.0: ; %entry 451; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, s0, v1 452; GFX8-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 453; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, s0, v0 454; GFX8-GISEL-NEXT: ; return to shader part epilog 455; 456; GFX10-GISEL-LABEL: v_v_s_bfi_sha256_ch: 457; GFX10-GISEL: ; %bb.0: ; %entry 458; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, s0, v1 459; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 460; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, s0, v0 461; GFX10-GISEL-NEXT: ; return to shader part epilog 462entry: 463 %xor0 = xor i32 %y, %z 464 %and = and i32 %x, %xor0 465 %xor1 = xor i32 %z, %and 466 %cast = bitcast i32 %xor1 to float 467 ret float %cast 468} 469 470; SHA-256 Ma function 471; ((x & z) | (y & (x | z))) 472define amdgpu_kernel void @s_bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { 473; GFX7-LABEL: s_bfi_sha256_ma: 474; GFX7: ; %bb.0: ; %entry 475; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 476; GFX7-NEXT: s_load_dword s8, s[0:1], 0xd 477; GFX7-NEXT: s_mov_b32 s3, 0xf000 478; GFX7-NEXT: s_mov_b32 s2, -1 479; GFX7-NEXT: s_waitcnt lgkmcnt(0) 480; GFX7-NEXT: s_mov_b32 s1, s5 481; GFX7-NEXT: s_or_b32 s5, s6, s8 482; GFX7-NEXT: s_mov_b32 s0, s4 483; GFX7-NEXT: s_and_b32 s4, s6, s8 484; GFX7-NEXT: s_and_b32 s5, s7, s5 485; GFX7-NEXT: s_or_b32 s4, s4, s5 486; GFX7-NEXT: v_mov_b32_e32 v0, s4 487; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 488; GFX7-NEXT: s_endpgm 489; 490; GFX8-LABEL: s_bfi_sha256_ma: 491; GFX8: ; %bb.0: ; %entry 492; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 493; GFX8-NEXT: s_load_dword s0, s[0:1], 0x34 494; GFX8-NEXT: s_waitcnt lgkmcnt(0) 495; GFX8-NEXT: v_mov_b32_e32 v0, s4 496; GFX8-NEXT: s_and_b32 s1, s6, s0 497; GFX8-NEXT: s_or_b32 s0, s6, s0 498; GFX8-NEXT: s_and_b32 s0, s7, s0 499; GFX8-NEXT: s_or_b32 s0, s1, s0 500; GFX8-NEXT: v_mov_b32_e32 v1, s5 501; GFX8-NEXT: v_mov_b32_e32 v2, s0 502; GFX8-NEXT: flat_store_dword v[0:1], v2 503; GFX8-NEXT: s_endpgm 504; 505; GFX10-LABEL: s_bfi_sha256_ma: 506; GFX10: ; %bb.0: ; %entry 507; GFX10-NEXT: s_clause 0x1 508; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 509; GFX10-NEXT: s_load_dword s0, s[0:1], 0x34 510; GFX10-NEXT: v_mov_b32_e32 v0, 0 511; GFX10-NEXT: s_waitcnt lgkmcnt(0) 512; GFX10-NEXT: s_or_b32 s1, s6, s0 513; GFX10-NEXT: s_and_b32 s0, s6, s0 514; GFX10-NEXT: s_and_b32 s1, s7, s1 515; GFX10-NEXT: s_or_b32 s0, s0, s1 516; GFX10-NEXT: v_mov_b32_e32 v1, s0 517; GFX10-NEXT: global_store_dword v0, v1, s[4:5] 518; GFX10-NEXT: s_endpgm 519; 520; GFX8-GISEL-LABEL: s_bfi_sha256_ma: 521; GFX8-GISEL: ; %bb.0: ; %entry 522; GFX8-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 523; GFX8-GISEL-NEXT: s_load_dword s0, s[0:1], 0x34 524; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 525; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s4 526; GFX8-GISEL-NEXT: s_and_b32 s1, s6, s0 527; GFX8-GISEL-NEXT: s_or_b32 s0, s6, s0 528; GFX8-GISEL-NEXT: s_and_b32 s0, s7, s0 529; GFX8-GISEL-NEXT: s_or_b32 s0, s1, s0 530; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s0 531; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s5 532; GFX8-GISEL-NEXT: flat_store_dword v[0:1], v2 533; GFX8-GISEL-NEXT: s_endpgm 534; 535; GFX10-GISEL-LABEL: s_bfi_sha256_ma: 536; GFX10-GISEL: ; %bb.0: ; %entry 537; GFX10-GISEL-NEXT: s_clause 0x1 538; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 539; GFX10-GISEL-NEXT: s_load_dword s0, s[0:1], 0x34 540; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 541; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 542; GFX10-GISEL-NEXT: s_or_b32 s1, s6, s0 543; GFX10-GISEL-NEXT: s_and_b32 s0, s6, s0 544; GFX10-GISEL-NEXT: s_and_b32 s1, s7, s1 545; GFX10-GISEL-NEXT: s_or_b32 s0, s0, s1 546; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 547; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[4:5] 548; GFX10-GISEL-NEXT: s_endpgm 549entry: 550 %0 = and i32 %x, %z 551 %1 = or i32 %x, %z 552 %2 = and i32 %y, %1 553 %3 = or i32 %0, %2 554 store i32 %3, i32 addrspace(1)* %out 555 ret void 556} 557 558define i32 @v_bfi_sha256_ma(i32 %x, i32 %y, i32 %z) { 559; GFX7-LABEL: v_bfi_sha256_ma: 560; GFX7: ; %bb.0: ; %entry 561; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 562; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1 563; GFX7-NEXT: v_bfi_b32 v0, v0, v2, v1 564; GFX7-NEXT: s_setpc_b64 s[30:31] 565; 566; GFX8-LABEL: v_bfi_sha256_ma: 567; GFX8: ; %bb.0: ; %entry 568; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 569; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1 570; GFX8-NEXT: v_bfi_b32 v0, v0, v2, v1 571; GFX8-NEXT: s_setpc_b64 s[30:31] 572; 573; GFX10-LABEL: v_bfi_sha256_ma: 574; GFX10: ; %bb.0: ; %entry 575; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 576; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 577; GFX10-NEXT: v_xor_b32_e32 v0, v0, v1 578; GFX10-NEXT: v_bfi_b32 v0, v0, v2, v1 579; GFX10-NEXT: s_setpc_b64 s[30:31] 580; 581; GFX8-GISEL-LABEL: v_bfi_sha256_ma: 582; GFX8-GISEL: ; %bb.0: ; %entry 583; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 584; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v1 585; GFX8-GISEL-NEXT: v_bfi_b32 v0, v0, v2, v1 586; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 587; 588; GFX10-GISEL-LABEL: v_bfi_sha256_ma: 589; GFX10-GISEL: ; %bb.0: ; %entry 590; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 591; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 592; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v1 593; GFX10-GISEL-NEXT: v_bfi_b32 v0, v0, v2, v1 594; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 595entry: 596 %0 = and i32 %x, %z 597 %1 = or i32 %x, %z 598 %2 = and i32 %y, %1 599 %3 = or i32 %0, %2 600 ret i32 %3 601} 602 603define <2 x i32> @v_bitselect_v2i32_pat1(<2 x i32> %a, <2 x i32> %b, <2 x i32> %mask) { 604; GFX7-LABEL: v_bitselect_v2i32_pat1: 605; GFX7: ; %bb.0: 606; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 607; GFX7-NEXT: v_bfi_b32 v0, v2, v0, v4 608; GFX7-NEXT: v_bfi_b32 v1, v3, v1, v5 609; GFX7-NEXT: s_setpc_b64 s[30:31] 610; 611; GFX8-LABEL: v_bitselect_v2i32_pat1: 612; GFX8: ; %bb.0: 613; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 614; GFX8-NEXT: v_bfi_b32 v0, v2, v0, v4 615; GFX8-NEXT: v_bfi_b32 v1, v3, v1, v5 616; GFX8-NEXT: s_setpc_b64 s[30:31] 617; 618; GFX10-LABEL: v_bitselect_v2i32_pat1: 619; GFX10: ; %bb.0: 620; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 621; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 622; GFX10-NEXT: v_bfi_b32 v0, v2, v0, v4 623; GFX10-NEXT: v_bfi_b32 v1, v3, v1, v5 624; GFX10-NEXT: s_setpc_b64 s[30:31] 625; 626; GFX8-GISEL-LABEL: v_bitselect_v2i32_pat1: 627; GFX8-GISEL: ; %bb.0: 628; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 629; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 630; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 631; GFX8-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 632; GFX8-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 633; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 634; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 635; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 636; 637; GFX10-GISEL-LABEL: v_bitselect_v2i32_pat1: 638; GFX10-GISEL: ; %bb.0: 639; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 640; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 641; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 642; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 643; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 644; GFX10-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 645; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 646; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 647; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 648 %xor.0 = xor <2 x i32> %a, %mask 649 %and = and <2 x i32> %xor.0, %b 650 %bitselect = xor <2 x i32> %and, %mask 651 ret <2 x i32> %bitselect 652} 653 654define i64 @v_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) { 655; GFX7-LABEL: v_bitselect_i64_pat_0: 656; GFX7: ; %bb.0: 657; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 658; GFX7-NEXT: v_bfi_b32 v1, v1, v3, v5 659; GFX7-NEXT: v_bfi_b32 v0, v0, v2, v4 660; GFX7-NEXT: s_setpc_b64 s[30:31] 661; 662; GFX8-LABEL: v_bitselect_i64_pat_0: 663; GFX8: ; %bb.0: 664; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 665; GFX8-NEXT: v_bfi_b32 v1, v1, v3, v5 666; GFX8-NEXT: v_bfi_b32 v0, v0, v2, v4 667; GFX8-NEXT: s_setpc_b64 s[30:31] 668; 669; GFX10-LABEL: v_bitselect_i64_pat_0: 670; GFX10: ; %bb.0: 671; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 672; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 673; GFX10-NEXT: v_bfi_b32 v0, v0, v2, v4 674; GFX10-NEXT: v_bfi_b32 v1, v1, v3, v5 675; GFX10-NEXT: s_setpc_b64 s[30:31] 676; 677; GFX8-GISEL-LABEL: v_bitselect_i64_pat_0: 678; GFX8-GISEL: ; %bb.0: 679; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 680; GFX8-GISEL-NEXT: v_and_b32_e32 v2, v0, v2 681; GFX8-GISEL-NEXT: v_and_b32_e32 v3, v1, v3 682; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, -1, v0 683; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, -1, v1 684; GFX8-GISEL-NEXT: v_and_b32_e32 v0, v0, v4 685; GFX8-GISEL-NEXT: v_and_b32_e32 v1, v1, v5 686; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 687; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 688; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 689; 690; GFX10-GISEL-LABEL: v_bitselect_i64_pat_0: 691; GFX10-GISEL: ; %bb.0: 692; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 693; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 694; GFX10-GISEL-NEXT: v_xor_b32_e32 v6, -1, v0 695; GFX10-GISEL-NEXT: v_xor_b32_e32 v7, -1, v1 696; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 697; GFX10-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 698; GFX10-GISEL-NEXT: v_and_b32_e32 v2, v6, v4 699; GFX10-GISEL-NEXT: v_and_b32_e32 v3, v7, v5 700; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 701; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 702; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 703 %and0 = and i64 %a, %b 704 %not.a = xor i64 %a, -1 705 %and1 = and i64 %not.a, %mask 706 %bitselect = or i64 %and0, %and1 707 ret i64 %bitselect 708} 709 710define amdgpu_ps <2 x float> @v_s_s_bitselect_i64_pat_0(i64 %a, i64 inreg %b, i64 inreg %mask) { 711; GFX7-LABEL: v_s_s_bitselect_i64_pat_0: 712; GFX7: ; %bb.0: 713; GFX7-NEXT: v_mov_b32_e32 v2, s3 714; GFX7-NEXT: v_bfi_b32 v1, v1, s1, v2 715; GFX7-NEXT: v_mov_b32_e32 v2, s2 716; GFX7-NEXT: v_bfi_b32 v0, v0, s0, v2 717; GFX7-NEXT: ; return to shader part epilog 718; 719; GFX8-LABEL: v_s_s_bitselect_i64_pat_0: 720; GFX8: ; %bb.0: 721; GFX8-NEXT: v_mov_b32_e32 v2, s3 722; GFX8-NEXT: v_bfi_b32 v1, v1, s1, v2 723; GFX8-NEXT: v_mov_b32_e32 v2, s2 724; GFX8-NEXT: v_bfi_b32 v0, v0, s0, v2 725; GFX8-NEXT: ; return to shader part epilog 726; 727; GFX10-LABEL: v_s_s_bitselect_i64_pat_0: 728; GFX10: ; %bb.0: 729; GFX10-NEXT: v_bfi_b32 v0, v0, s0, s2 730; GFX10-NEXT: v_bfi_b32 v1, v1, s1, s3 731; GFX10-NEXT: ; return to shader part epilog 732; 733; GFX8-GISEL-LABEL: v_s_s_bitselect_i64_pat_0: 734; GFX8-GISEL: ; %bb.0: 735; GFX8-GISEL-NEXT: v_and_b32_e32 v2, s0, v0 736; GFX8-GISEL-NEXT: v_and_b32_e32 v3, s1, v1 737; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, -1, v0 738; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, -1, v1 739; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s2, v0 740; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s3, v1 741; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 742; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 743; GFX8-GISEL-NEXT: ; return to shader part epilog 744; 745; GFX10-GISEL-LABEL: v_s_s_bitselect_i64_pat_0: 746; GFX10-GISEL: ; %bb.0: 747; GFX10-GISEL-NEXT: v_xor_b32_e32 v2, -1, v0 748; GFX10-GISEL-NEXT: v_xor_b32_e32 v3, -1, v1 749; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 750; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 751; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s2, v2 752; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s3, v3 753; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 754; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 755; GFX10-GISEL-NEXT: ; return to shader part epilog 756 %and0 = and i64 %a, %b 757 %not.a = xor i64 %a, -1 758 %and1 = and i64 %not.a, %mask 759 %bitselect = or i64 %and0, %and1 760 %cast = bitcast i64 %bitselect to <2 x float> 761 ret <2 x float> %cast 762} 763 764define amdgpu_ps <2 x float> @s_v_s_bitselect_i64_pat_0(i64 inreg %a, i64 %b, i64 inreg %mask) { 765; GFX7-LABEL: s_v_s_bitselect_i64_pat_0: 766; GFX7: ; %bb.0: 767; GFX7-NEXT: v_mov_b32_e32 v2, s3 768; GFX7-NEXT: v_bfi_b32 v1, s1, v1, v2 769; GFX7-NEXT: v_mov_b32_e32 v2, s2 770; GFX7-NEXT: v_bfi_b32 v0, s0, v0, v2 771; GFX7-NEXT: ; return to shader part epilog 772; 773; GFX8-LABEL: s_v_s_bitselect_i64_pat_0: 774; GFX8: ; %bb.0: 775; GFX8-NEXT: v_mov_b32_e32 v2, s3 776; GFX8-NEXT: v_bfi_b32 v1, s1, v1, v2 777; GFX8-NEXT: v_mov_b32_e32 v2, s2 778; GFX8-NEXT: v_bfi_b32 v0, s0, v0, v2 779; GFX8-NEXT: ; return to shader part epilog 780; 781; GFX10-LABEL: s_v_s_bitselect_i64_pat_0: 782; GFX10: ; %bb.0: 783; GFX10-NEXT: v_bfi_b32 v0, s0, v0, s2 784; GFX10-NEXT: v_bfi_b32 v1, s1, v1, s3 785; GFX10-NEXT: ; return to shader part epilog 786; 787; GFX8-GISEL-LABEL: s_v_s_bitselect_i64_pat_0: 788; GFX8-GISEL: ; %bb.0: 789; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 790; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 791; GFX8-GISEL-NEXT: s_andn2_b64 s[0:1], s[2:3], s[0:1] 792; GFX8-GISEL-NEXT: v_or_b32_e32 v0, s0, v0 793; GFX8-GISEL-NEXT: v_or_b32_e32 v1, s1, v1 794; GFX8-GISEL-NEXT: ; return to shader part epilog 795; 796; GFX10-GISEL-LABEL: s_v_s_bitselect_i64_pat_0: 797; GFX10-GISEL: ; %bb.0: 798; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 799; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 800; GFX10-GISEL-NEXT: s_andn2_b64 s[0:1], s[2:3], s[0:1] 801; GFX10-GISEL-NEXT: v_or_b32_e32 v0, s0, v0 802; GFX10-GISEL-NEXT: v_or_b32_e32 v1, s1, v1 803; GFX10-GISEL-NEXT: ; return to shader part epilog 804 %and0 = and i64 %a, %b 805 %not.a = xor i64 %a, -1 806 %and1 = and i64 %not.a, %mask 807 %bitselect = or i64 %and0, %and1 808 %cast = bitcast i64 %bitselect to <2 x float> 809 ret <2 x float> %cast 810} 811 812define amdgpu_ps <2 x float> @s_s_v_bitselect_i64_pat_0(i64 inreg %a, i64 inreg %b, i64 %mask) { 813; GFX7-LABEL: s_s_v_bitselect_i64_pat_0: 814; GFX7: ; %bb.0: 815; GFX7-NEXT: v_mov_b32_e32 v2, s3 816; GFX7-NEXT: v_bfi_b32 v1, s1, v2, v1 817; GFX7-NEXT: v_mov_b32_e32 v2, s2 818; GFX7-NEXT: v_bfi_b32 v0, s0, v2, v0 819; GFX7-NEXT: ; return to shader part epilog 820; 821; GFX8-LABEL: s_s_v_bitselect_i64_pat_0: 822; GFX8: ; %bb.0: 823; GFX8-NEXT: v_mov_b32_e32 v2, s3 824; GFX8-NEXT: v_bfi_b32 v1, s1, v2, v1 825; GFX8-NEXT: v_mov_b32_e32 v2, s2 826; GFX8-NEXT: v_bfi_b32 v0, s0, v2, v0 827; GFX8-NEXT: ; return to shader part epilog 828; 829; GFX10-LABEL: s_s_v_bitselect_i64_pat_0: 830; GFX10: ; %bb.0: 831; GFX10-NEXT: v_bfi_b32 v0, s0, s2, v0 832; GFX10-NEXT: v_bfi_b32 v1, s1, s3, v1 833; GFX10-NEXT: ; return to shader part epilog 834; 835; GFX8-GISEL-LABEL: s_s_v_bitselect_i64_pat_0: 836; GFX8-GISEL: ; %bb.0: 837; GFX8-GISEL-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3] 838; GFX8-GISEL-NEXT: s_not_b64 s[0:1], s[0:1] 839; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 840; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 841; GFX8-GISEL-NEXT: v_or_b32_e32 v0, s2, v0 842; GFX8-GISEL-NEXT: v_or_b32_e32 v1, s3, v1 843; GFX8-GISEL-NEXT: ; return to shader part epilog 844; 845; GFX10-GISEL-LABEL: s_s_v_bitselect_i64_pat_0: 846; GFX10-GISEL: ; %bb.0: 847; GFX10-GISEL-NEXT: s_not_b64 s[4:5], s[0:1] 848; GFX10-GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 849; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s4, v0 850; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s5, v1 851; GFX10-GISEL-NEXT: v_or_b32_e32 v0, s0, v0 852; GFX10-GISEL-NEXT: v_or_b32_e32 v1, s1, v1 853; GFX10-GISEL-NEXT: ; return to shader part epilog 854 %and0 = and i64 %a, %b 855 %not.a = xor i64 %a, -1 856 %and1 = and i64 %not.a, %mask 857 %bitselect = or i64 %and0, %and1 858 %cast = bitcast i64 %bitselect to <2 x float> 859 ret <2 x float> %cast 860} 861 862define amdgpu_ps <2 x float> @v_v_s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 inreg %mask) { 863; GFX7-LABEL: v_v_s_bitselect_i64_pat_0: 864; GFX7: ; %bb.0: 865; GFX7-NEXT: v_bfi_b32 v1, v1, v3, s1 866; GFX7-NEXT: v_bfi_b32 v0, v0, v2, s0 867; GFX7-NEXT: ; return to shader part epilog 868; 869; GFX8-LABEL: v_v_s_bitselect_i64_pat_0: 870; GFX8: ; %bb.0: 871; GFX8-NEXT: v_bfi_b32 v1, v1, v3, s1 872; GFX8-NEXT: v_bfi_b32 v0, v0, v2, s0 873; GFX8-NEXT: ; return to shader part epilog 874; 875; GFX10-LABEL: v_v_s_bitselect_i64_pat_0: 876; GFX10: ; %bb.0: 877; GFX10-NEXT: v_bfi_b32 v0, v0, v2, s0 878; GFX10-NEXT: v_bfi_b32 v1, v1, v3, s1 879; GFX10-NEXT: ; return to shader part epilog 880; 881; GFX8-GISEL-LABEL: v_v_s_bitselect_i64_pat_0: 882; GFX8-GISEL: ; %bb.0: 883; GFX8-GISEL-NEXT: v_and_b32_e32 v2, v0, v2 884; GFX8-GISEL-NEXT: v_and_b32_e32 v3, v1, v3 885; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, -1, v0 886; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, -1, v1 887; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 888; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 889; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 890; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 891; GFX8-GISEL-NEXT: ; return to shader part epilog 892; 893; GFX10-GISEL-LABEL: v_v_s_bitselect_i64_pat_0: 894; GFX10-GISEL: ; %bb.0: 895; GFX10-GISEL-NEXT: v_xor_b32_e32 v4, -1, v0 896; GFX10-GISEL-NEXT: v_xor_b32_e32 v5, -1, v1 897; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 898; GFX10-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 899; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s0, v4 900; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s1, v5 901; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 902; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 903; GFX10-GISEL-NEXT: ; return to shader part epilog 904 %and0 = and i64 %a, %b 905 %not.a = xor i64 %a, -1 906 %and1 = and i64 %not.a, %mask 907 %bitselect = or i64 %and0, %and1 908 %cast = bitcast i64 %bitselect to <2 x float> 909 ret <2 x float> %cast 910} 911 912define amdgpu_ps <2 x float> @v_s_v_bitselect_i64_pat_0(i64 %a, i64 inreg %b, i64 %mask) { 913; GFX7-LABEL: v_s_v_bitselect_i64_pat_0: 914; GFX7: ; %bb.0: 915; GFX7-NEXT: v_bfi_b32 v1, v1, s1, v3 916; GFX7-NEXT: v_bfi_b32 v0, v0, s0, v2 917; GFX7-NEXT: ; return to shader part epilog 918; 919; GFX8-LABEL: v_s_v_bitselect_i64_pat_0: 920; GFX8: ; %bb.0: 921; GFX8-NEXT: v_bfi_b32 v1, v1, s1, v3 922; GFX8-NEXT: v_bfi_b32 v0, v0, s0, v2 923; GFX8-NEXT: ; return to shader part epilog 924; 925; GFX10-LABEL: v_s_v_bitselect_i64_pat_0: 926; GFX10: ; %bb.0: 927; GFX10-NEXT: v_bfi_b32 v0, v0, s0, v2 928; GFX10-NEXT: v_bfi_b32 v1, v1, s1, v3 929; GFX10-NEXT: ; return to shader part epilog 930; 931; GFX8-GISEL-LABEL: v_s_v_bitselect_i64_pat_0: 932; GFX8-GISEL: ; %bb.0: 933; GFX8-GISEL-NEXT: v_and_b32_e32 v4, s0, v0 934; GFX8-GISEL-NEXT: v_and_b32_e32 v5, s1, v1 935; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, -1, v0 936; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, -1, v1 937; GFX8-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 938; GFX8-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 939; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 940; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v5, v1 941; GFX8-GISEL-NEXT: ; return to shader part epilog 942; 943; GFX10-GISEL-LABEL: v_s_v_bitselect_i64_pat_0: 944; GFX10-GISEL: ; %bb.0: 945; GFX10-GISEL-NEXT: v_xor_b32_e32 v4, -1, v0 946; GFX10-GISEL-NEXT: v_xor_b32_e32 v5, -1, v1 947; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 948; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 949; GFX10-GISEL-NEXT: v_and_b32_e32 v2, v4, v2 950; GFX10-GISEL-NEXT: v_and_b32_e32 v3, v5, v3 951; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 952; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 953; GFX10-GISEL-NEXT: ; return to shader part epilog 954 %and0 = and i64 %a, %b 955 %not.a = xor i64 %a, -1 956 %and1 = and i64 %not.a, %mask 957 %bitselect = or i64 %and0, %and1 958 %cast = bitcast i64 %bitselect to <2 x float> 959 ret <2 x float> %cast 960} 961 962define amdgpu_ps <2 x float> @s_v_v_bitselect_i64_pat_0(i64 inreg %a, i64 %b, i64 %mask) { 963; GFX7-LABEL: s_v_v_bitselect_i64_pat_0: 964; GFX7: ; %bb.0: 965; GFX7-NEXT: v_bfi_b32 v1, s1, v1, v3 966; GFX7-NEXT: v_bfi_b32 v0, s0, v0, v2 967; GFX7-NEXT: ; return to shader part epilog 968; 969; GFX8-LABEL: s_v_v_bitselect_i64_pat_0: 970; GFX8: ; %bb.0: 971; GFX8-NEXT: v_bfi_b32 v1, s1, v1, v3 972; GFX8-NEXT: v_bfi_b32 v0, s0, v0, v2 973; GFX8-NEXT: ; return to shader part epilog 974; 975; GFX10-LABEL: s_v_v_bitselect_i64_pat_0: 976; GFX10: ; %bb.0: 977; GFX10-NEXT: v_bfi_b32 v0, s0, v0, v2 978; GFX10-NEXT: v_bfi_b32 v1, s1, v1, v3 979; GFX10-NEXT: ; return to shader part epilog 980; 981; GFX8-GISEL-LABEL: s_v_v_bitselect_i64_pat_0: 982; GFX8-GISEL: ; %bb.0: 983; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 984; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 985; GFX8-GISEL-NEXT: s_not_b64 s[0:1], s[0:1] 986; GFX8-GISEL-NEXT: v_and_b32_e32 v2, s0, v2 987; GFX8-GISEL-NEXT: v_and_b32_e32 v3, s1, v3 988; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 989; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 990; GFX8-GISEL-NEXT: ; return to shader part epilog 991; 992; GFX10-GISEL-LABEL: s_v_v_bitselect_i64_pat_0: 993; GFX10-GISEL: ; %bb.0: 994; GFX10-GISEL-NEXT: s_not_b64 s[2:3], s[0:1] 995; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 996; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 997; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s2, v2 998; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s3, v3 999; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 1000; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 1001; GFX10-GISEL-NEXT: ; return to shader part epilog 1002 %and0 = and i64 %a, %b 1003 %not.a = xor i64 %a, -1 1004 %and1 = and i64 %not.a, %mask 1005 %bitselect = or i64 %and0, %and1 1006 %cast = bitcast i64 %bitselect to <2 x float> 1007 ret <2 x float> %cast 1008} 1009 1010define i64 @v_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) { 1011; GFX7-LABEL: v_bitselect_i64_pat_1: 1012; GFX7: ; %bb.0: 1013; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1014; GFX7-NEXT: v_bfi_b32 v1, v3, v1, v5 1015; GFX7-NEXT: v_bfi_b32 v0, v2, v0, v4 1016; GFX7-NEXT: s_setpc_b64 s[30:31] 1017; 1018; GFX8-LABEL: v_bitselect_i64_pat_1: 1019; GFX8: ; %bb.0: 1020; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1021; GFX8-NEXT: v_bfi_b32 v1, v3, v1, v5 1022; GFX8-NEXT: v_bfi_b32 v0, v2, v0, v4 1023; GFX8-NEXT: s_setpc_b64 s[30:31] 1024; 1025; GFX10-LABEL: v_bitselect_i64_pat_1: 1026; GFX10: ; %bb.0: 1027; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1028; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1029; GFX10-NEXT: v_bfi_b32 v0, v2, v0, v4 1030; GFX10-NEXT: v_bfi_b32 v1, v3, v1, v5 1031; GFX10-NEXT: s_setpc_b64 s[30:31] 1032; 1033; GFX8-GISEL-LABEL: v_bitselect_i64_pat_1: 1034; GFX8-GISEL: ; %bb.0: 1035; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1036; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 1037; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 1038; GFX8-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 1039; GFX8-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 1040; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 1041; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 1042; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 1043; 1044; GFX10-GISEL-LABEL: v_bitselect_i64_pat_1: 1045; GFX10-GISEL: ; %bb.0: 1046; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1047; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1048; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 1049; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 1050; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 1051; GFX10-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 1052; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 1053; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 1054; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 1055 %xor.0 = xor i64 %a, %mask 1056 %and = and i64 %xor.0, %b 1057 %bitselect = xor i64 %and, %mask 1058 ret i64 %bitselect 1059} 1060 1061define amdgpu_ps <2 x float> @v_s_s_bitselect_i64_pat_1(i64 %a, i64 inreg %b, i64 inreg %mask) { 1062; GFX7-LABEL: v_s_s_bitselect_i64_pat_1: 1063; GFX7: ; %bb.0: 1064; GFX7-NEXT: v_mov_b32_e32 v2, s3 1065; GFX7-NEXT: v_bfi_b32 v1, s1, v1, v2 1066; GFX7-NEXT: v_mov_b32_e32 v2, s2 1067; GFX7-NEXT: v_bfi_b32 v0, s0, v0, v2 1068; GFX7-NEXT: ; return to shader part epilog 1069; 1070; GFX8-LABEL: v_s_s_bitselect_i64_pat_1: 1071; GFX8: ; %bb.0: 1072; GFX8-NEXT: v_mov_b32_e32 v2, s3 1073; GFX8-NEXT: v_bfi_b32 v1, s1, v1, v2 1074; GFX8-NEXT: v_mov_b32_e32 v2, s2 1075; GFX8-NEXT: v_bfi_b32 v0, s0, v0, v2 1076; GFX8-NEXT: ; return to shader part epilog 1077; 1078; GFX10-LABEL: v_s_s_bitselect_i64_pat_1: 1079; GFX10: ; %bb.0: 1080; GFX10-NEXT: v_bfi_b32 v0, s0, v0, s2 1081; GFX10-NEXT: v_bfi_b32 v1, s1, v1, s3 1082; GFX10-NEXT: ; return to shader part epilog 1083; 1084; GFX8-GISEL-LABEL: v_s_s_bitselect_i64_pat_1: 1085; GFX8-GISEL: ; %bb.0: 1086; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 1087; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, s3, v1 1088; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 1089; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 1090; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 1091; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, s3, v1 1092; GFX8-GISEL-NEXT: ; return to shader part epilog 1093; 1094; GFX10-GISEL-LABEL: v_s_s_bitselect_i64_pat_1: 1095; GFX10-GISEL: ; %bb.0: 1096; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 1097; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, s3, v1 1098; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 1099; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 1100; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 1101; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, s3, v1 1102; GFX10-GISEL-NEXT: ; return to shader part epilog 1103 %xor.0 = xor i64 %a, %mask 1104 %and = and i64 %xor.0, %b 1105 %bitselect = xor i64 %and, %mask 1106 %cast = bitcast i64 %bitselect to <2 x float> 1107 ret <2 x float> %cast 1108} 1109 1110define amdgpu_ps <2 x float> @s_s_v_bitselect_i64_pat_1(i64 inreg %a, i64 inreg %b, i64 %mask) { 1111; GFX7-LABEL: s_s_v_bitselect_i64_pat_1: 1112; GFX7: ; %bb.0: 1113; GFX7-NEXT: v_mov_b32_e32 v2, s1 1114; GFX7-NEXT: v_bfi_b32 v1, s3, v2, v1 1115; GFX7-NEXT: v_mov_b32_e32 v2, s0 1116; GFX7-NEXT: v_bfi_b32 v0, s2, v2, v0 1117; GFX7-NEXT: ; return to shader part epilog 1118; 1119; GFX8-LABEL: s_s_v_bitselect_i64_pat_1: 1120; GFX8: ; %bb.0: 1121; GFX8-NEXT: v_mov_b32_e32 v2, s1 1122; GFX8-NEXT: v_bfi_b32 v1, s3, v2, v1 1123; GFX8-NEXT: v_mov_b32_e32 v2, s0 1124; GFX8-NEXT: v_bfi_b32 v0, s2, v2, v0 1125; GFX8-NEXT: ; return to shader part epilog 1126; 1127; GFX10-LABEL: s_s_v_bitselect_i64_pat_1: 1128; GFX10: ; %bb.0: 1129; GFX10-NEXT: v_bfi_b32 v0, s2, s0, v0 1130; GFX10-NEXT: v_bfi_b32 v1, s3, s1, v1 1131; GFX10-NEXT: ; return to shader part epilog 1132; 1133; GFX8-GISEL-LABEL: s_s_v_bitselect_i64_pat_1: 1134; GFX8-GISEL: ; %bb.0: 1135; GFX8-GISEL-NEXT: v_xor_b32_e32 v2, s0, v0 1136; GFX8-GISEL-NEXT: v_xor_b32_e32 v3, s1, v1 1137; GFX8-GISEL-NEXT: v_and_b32_e32 v2, s2, v2 1138; GFX8-GISEL-NEXT: v_and_b32_e32 v3, s3, v3 1139; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v2, v0 1140; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v3, v1 1141; GFX8-GISEL-NEXT: ; return to shader part epilog 1142; 1143; GFX10-GISEL-LABEL: s_s_v_bitselect_i64_pat_1: 1144; GFX10-GISEL: ; %bb.0: 1145; GFX10-GISEL-NEXT: v_xor_b32_e32 v2, s0, v0 1146; GFX10-GISEL-NEXT: v_xor_b32_e32 v3, s1, v1 1147; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s2, v2 1148; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s3, v3 1149; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v2, v0 1150; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v3, v1 1151; GFX10-GISEL-NEXT: ; return to shader part epilog 1152 %xor.0 = xor i64 %a, %mask 1153 %and = and i64 %xor.0, %b 1154 %bitselect = xor i64 %and, %mask 1155 %cast = bitcast i64 %bitselect to <2 x float> 1156 ret <2 x float> %cast 1157} 1158 1159define amdgpu_ps <2 x float> @s_v_s_bitselect_i64_pat_1(i64 inreg %a, i64 %b, i64 inreg %mask) { 1160; GFX7-LABEL: s_v_s_bitselect_i64_pat_1: 1161; GFX7: ; %bb.0: 1162; GFX7-NEXT: v_mov_b32_e32 v2, s3 1163; GFX7-NEXT: v_bfi_b32 v1, v1, s1, v2 1164; GFX7-NEXT: v_mov_b32_e32 v2, s2 1165; GFX7-NEXT: v_bfi_b32 v0, v0, s0, v2 1166; GFX7-NEXT: ; return to shader part epilog 1167; 1168; GFX8-LABEL: s_v_s_bitselect_i64_pat_1: 1169; GFX8: ; %bb.0: 1170; GFX8-NEXT: v_mov_b32_e32 v2, s3 1171; GFX8-NEXT: v_bfi_b32 v1, v1, s1, v2 1172; GFX8-NEXT: v_mov_b32_e32 v2, s2 1173; GFX8-NEXT: v_bfi_b32 v0, v0, s0, v2 1174; GFX8-NEXT: ; return to shader part epilog 1175; 1176; GFX10-LABEL: s_v_s_bitselect_i64_pat_1: 1177; GFX10: ; %bb.0: 1178; GFX10-NEXT: v_bfi_b32 v0, v0, s0, s2 1179; GFX10-NEXT: v_bfi_b32 v1, v1, s1, s3 1180; GFX10-NEXT: ; return to shader part epilog 1181; 1182; GFX8-GISEL-LABEL: s_v_s_bitselect_i64_pat_1: 1183; GFX8-GISEL: ; %bb.0: 1184; GFX8-GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] 1185; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 1186; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 1187; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 1188; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, s3, v1 1189; GFX8-GISEL-NEXT: ; return to shader part epilog 1190; 1191; GFX10-GISEL-LABEL: s_v_s_bitselect_i64_pat_1: 1192; GFX10-GISEL: ; %bb.0: 1193; GFX10-GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] 1194; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 1195; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 1196; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 1197; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, s3, v1 1198; GFX10-GISEL-NEXT: ; return to shader part epilog 1199 %xor.0 = xor i64 %a, %mask 1200 %and = and i64 %xor.0, %b 1201 %bitselect = xor i64 %and, %mask 1202 %cast = bitcast i64 %bitselect to <2 x float> 1203 ret <2 x float> %cast 1204} 1205 1206define i64 @v_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) { 1207; GFX7-LABEL: v_bitselect_i64_pat_2: 1208; GFX7: ; %bb.0: 1209; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1210; GFX7-NEXT: v_bfi_b32 v1, v3, v1, v5 1211; GFX7-NEXT: v_bfi_b32 v0, v2, v0, v4 1212; GFX7-NEXT: s_setpc_b64 s[30:31] 1213; 1214; GFX8-LABEL: v_bitselect_i64_pat_2: 1215; GFX8: ; %bb.0: 1216; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1217; GFX8-NEXT: v_bfi_b32 v1, v3, v1, v5 1218; GFX8-NEXT: v_bfi_b32 v0, v2, v0, v4 1219; GFX8-NEXT: s_setpc_b64 s[30:31] 1220; 1221; GFX10-LABEL: v_bitselect_i64_pat_2: 1222; GFX10: ; %bb.0: 1223; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1224; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1225; GFX10-NEXT: v_bfi_b32 v0, v2, v0, v4 1226; GFX10-NEXT: v_bfi_b32 v1, v3, v1, v5 1227; GFX10-NEXT: s_setpc_b64 s[30:31] 1228; 1229; GFX8-GISEL-LABEL: v_bitselect_i64_pat_2: 1230; GFX8-GISEL: ; %bb.0: 1231; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1232; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 1233; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 1234; GFX8-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 1235; GFX8-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 1236; GFX8-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 1237; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 1238; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 1239; 1240; GFX10-GISEL-LABEL: v_bitselect_i64_pat_2: 1241; GFX10-GISEL: ; %bb.0: 1242; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1243; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1244; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 1245; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 1246; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 1247; GFX10-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 1248; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 1249; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 1250; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 1251 %xor.0 = xor i64 %a, %mask 1252 %and = and i64 %xor.0, %b 1253 %bitselect = xor i64 %and, %mask 1254 ret i64 %bitselect 1255} 1256 1257define i64 @v_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) { 1258; GFX7-LABEL: v_bfi_sha256_ma_i64: 1259; GFX7: ; %bb.0: ; %entry 1260; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1261; GFX7-NEXT: v_xor_b32_e32 v1, v1, v3 1262; GFX7-NEXT: v_xor_b32_e32 v0, v0, v2 1263; GFX7-NEXT: v_bfi_b32 v1, v1, v5, v3 1264; GFX7-NEXT: v_bfi_b32 v0, v0, v4, v2 1265; GFX7-NEXT: s_setpc_b64 s[30:31] 1266; 1267; GFX8-LABEL: v_bfi_sha256_ma_i64: 1268; GFX8: ; %bb.0: ; %entry 1269; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1270; GFX8-NEXT: v_xor_b32_e32 v1, v1, v3 1271; GFX8-NEXT: v_xor_b32_e32 v0, v0, v2 1272; GFX8-NEXT: v_bfi_b32 v1, v1, v5, v3 1273; GFX8-NEXT: v_bfi_b32 v0, v0, v4, v2 1274; GFX8-NEXT: s_setpc_b64 s[30:31] 1275; 1276; GFX10-LABEL: v_bfi_sha256_ma_i64: 1277; GFX10: ; %bb.0: ; %entry 1278; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1279; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1280; GFX10-NEXT: v_xor_b32_e32 v0, v0, v2 1281; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3 1282; GFX10-NEXT: v_bfi_b32 v0, v0, v4, v2 1283; GFX10-NEXT: v_bfi_b32 v1, v1, v5, v3 1284; GFX10-NEXT: s_setpc_b64 s[30:31] 1285; 1286; GFX8-GISEL-LABEL: v_bfi_sha256_ma_i64: 1287; GFX8-GISEL: ; %bb.0: ; %entry 1288; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1289; GFX8-GISEL-NEXT: v_and_b32_e32 v6, v0, v4 1290; GFX8-GISEL-NEXT: v_and_b32_e32 v7, v1, v5 1291; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 1292; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v1, v5 1293; GFX8-GISEL-NEXT: v_and_b32_e32 v0, v2, v0 1294; GFX8-GISEL-NEXT: v_and_b32_e32 v1, v3, v1 1295; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v6, v0 1296; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v7, v1 1297; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 1298; 1299; GFX10-GISEL-LABEL: v_bfi_sha256_ma_i64: 1300; GFX10-GISEL: ; %bb.0: ; %entry 1301; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1302; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1303; GFX10-GISEL-NEXT: v_or_b32_e32 v6, v0, v4 1304; GFX10-GISEL-NEXT: v_or_b32_e32 v7, v1, v5 1305; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v4 1306; GFX10-GISEL-NEXT: v_and_b32_e32 v1, v1, v5 1307; GFX10-GISEL-NEXT: v_and_b32_e32 v2, v2, v6 1308; GFX10-GISEL-NEXT: v_and_b32_e32 v3, v3, v7 1309; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 1310; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 1311; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 1312entry: 1313 %and0 = and i64 %x, %z 1314 %or0 = or i64 %x, %z 1315 %and1 = and i64 %y, %or0 1316 %or1 = or i64 %and0, %and1 1317 ret i64 %or1 1318} 1319 1320define amdgpu_ps <2 x float> @v_s_s_bfi_sha256_ma_i64(i64 %x, i64 inreg %y, i64 inreg %z) { 1321; GFX7-LABEL: v_s_s_bfi_sha256_ma_i64: 1322; GFX7: ; %bb.0: ; %entry 1323; GFX7-NEXT: v_xor_b32_e32 v1, s1, v1 1324; GFX7-NEXT: v_mov_b32_e32 v2, s1 1325; GFX7-NEXT: v_bfi_b32 v1, v1, s3, v2 1326; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0 1327; GFX7-NEXT: v_mov_b32_e32 v2, s0 1328; GFX7-NEXT: v_bfi_b32 v0, v0, s2, v2 1329; GFX7-NEXT: ; return to shader part epilog 1330; 1331; GFX8-LABEL: v_s_s_bfi_sha256_ma_i64: 1332; GFX8: ; %bb.0: ; %entry 1333; GFX8-NEXT: v_xor_b32_e32 v1, s1, v1 1334; GFX8-NEXT: v_mov_b32_e32 v2, s1 1335; GFX8-NEXT: v_bfi_b32 v1, v1, s3, v2 1336; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 1337; GFX8-NEXT: v_mov_b32_e32 v2, s0 1338; GFX8-NEXT: v_bfi_b32 v0, v0, s2, v2 1339; GFX8-NEXT: ; return to shader part epilog 1340; 1341; GFX10-LABEL: v_s_s_bfi_sha256_ma_i64: 1342; GFX10: ; %bb.0: ; %entry 1343; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0 1344; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1 1345; GFX10-NEXT: v_bfi_b32 v0, v0, s2, s0 1346; GFX10-NEXT: v_bfi_b32 v1, v1, s3, s1 1347; GFX10-NEXT: ; return to shader part epilog 1348; 1349; GFX8-GISEL-LABEL: v_s_s_bfi_sha256_ma_i64: 1350; GFX8-GISEL: ; %bb.0: ; %entry 1351; GFX8-GISEL-NEXT: v_and_b32_e32 v2, s2, v0 1352; GFX8-GISEL-NEXT: v_and_b32_e32 v3, s3, v1 1353; GFX8-GISEL-NEXT: v_or_b32_e32 v0, s2, v0 1354; GFX8-GISEL-NEXT: v_or_b32_e32 v1, s3, v1 1355; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 1356; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 1357; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 1358; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 1359; GFX8-GISEL-NEXT: ; return to shader part epilog 1360; 1361; GFX10-GISEL-LABEL: v_s_s_bfi_sha256_ma_i64: 1362; GFX10-GISEL: ; %bb.0: ; %entry 1363; GFX10-GISEL-NEXT: v_or_b32_e32 v2, s2, v0 1364; GFX10-GISEL-NEXT: v_or_b32_e32 v3, s3, v1 1365; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s2, v0 1366; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s3, v1 1367; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s0, v2 1368; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s1, v3 1369; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 1370; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 1371; GFX10-GISEL-NEXT: ; return to shader part epilog 1372entry: 1373 %and0 = and i64 %x, %z 1374 %or0 = or i64 %x, %z 1375 %and1 = and i64 %y, %or0 1376 %or1 = or i64 %and0, %and1 1377 %cast = bitcast i64 %or1 to <2 x float> 1378 ret <2 x float> %cast 1379} 1380 1381define amdgpu_ps <2 x float> @s_v_s_bfi_sha256_ma_i64(i64 inreg %x, i64 %y, i64 inreg %z) { 1382; GFX7-LABEL: s_v_s_bfi_sha256_ma_i64: 1383; GFX7: ; %bb.0: ; %entry 1384; GFX7-NEXT: v_xor_b32_e32 v2, s1, v1 1385; GFX7-NEXT: v_bfi_b32 v1, v2, s3, v1 1386; GFX7-NEXT: v_xor_b32_e32 v2, s0, v0 1387; GFX7-NEXT: v_bfi_b32 v0, v2, s2, v0 1388; GFX7-NEXT: ; return to shader part epilog 1389; 1390; GFX8-LABEL: s_v_s_bfi_sha256_ma_i64: 1391; GFX8: ; %bb.0: ; %entry 1392; GFX8-NEXT: v_xor_b32_e32 v2, s1, v1 1393; GFX8-NEXT: v_bfi_b32 v1, v2, s3, v1 1394; GFX8-NEXT: v_xor_b32_e32 v2, s0, v0 1395; GFX8-NEXT: v_bfi_b32 v0, v2, s2, v0 1396; GFX8-NEXT: ; return to shader part epilog 1397; 1398; GFX10-LABEL: s_v_s_bfi_sha256_ma_i64: 1399; GFX10: ; %bb.0: ; %entry 1400; GFX10-NEXT: v_xor_b32_e32 v2, s0, v0 1401; GFX10-NEXT: v_xor_b32_e32 v3, s1, v1 1402; GFX10-NEXT: v_bfi_b32 v0, v2, s2, v0 1403; GFX10-NEXT: v_bfi_b32 v1, v3, s3, v1 1404; GFX10-NEXT: ; return to shader part epilog 1405; 1406; GFX8-GISEL-LABEL: s_v_s_bfi_sha256_ma_i64: 1407; GFX8-GISEL: ; %bb.0: ; %entry 1408; GFX8-GISEL-NEXT: s_and_b64 s[4:5], s[0:1], s[2:3] 1409; GFX8-GISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 1410; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 1411; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 1412; GFX8-GISEL-NEXT: v_or_b32_e32 v0, s4, v0 1413; GFX8-GISEL-NEXT: v_or_b32_e32 v1, s5, v1 1414; GFX8-GISEL-NEXT: ; return to shader part epilog 1415; 1416; GFX10-GISEL-LABEL: s_v_s_bfi_sha256_ma_i64: 1417; GFX10-GISEL: ; %bb.0: ; %entry 1418; GFX10-GISEL-NEXT: s_or_b64 s[4:5], s[0:1], s[2:3] 1419; GFX10-GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 1420; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s4, v0 1421; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s5, v1 1422; GFX10-GISEL-NEXT: v_or_b32_e32 v0, s0, v0 1423; GFX10-GISEL-NEXT: v_or_b32_e32 v1, s1, v1 1424; GFX10-GISEL-NEXT: ; return to shader part epilog 1425entry: 1426 %and0 = and i64 %x, %z 1427 %or0 = or i64 %x, %z 1428 %and1 = and i64 %y, %or0 1429 %or1 = or i64 %and0, %and1 1430 %cast = bitcast i64 %or1 to <2 x float> 1431 ret <2 x float> %cast 1432} 1433 1434define amdgpu_ps <2 x float> @s_s_v_bfi_sha256_ma_i64(i64 inreg %x, i64 inreg %y, i64 %z) { 1435; GFX7-LABEL: s_s_v_bfi_sha256_ma_i64: 1436; GFX7: ; %bb.0: ; %entry 1437; GFX7-NEXT: v_mov_b32_e32 v2, s3 1438; GFX7-NEXT: v_xor_b32_e32 v2, s1, v2 1439; GFX7-NEXT: v_bfi_b32 v1, v2, v1, s3 1440; GFX7-NEXT: v_mov_b32_e32 v2, s2 1441; GFX7-NEXT: v_xor_b32_e32 v2, s0, v2 1442; GFX7-NEXT: v_bfi_b32 v0, v2, v0, s2 1443; GFX7-NEXT: ; return to shader part epilog 1444; 1445; GFX8-LABEL: s_s_v_bfi_sha256_ma_i64: 1446; GFX8: ; %bb.0: ; %entry 1447; GFX8-NEXT: v_mov_b32_e32 v2, s3 1448; GFX8-NEXT: v_xor_b32_e32 v2, s1, v2 1449; GFX8-NEXT: v_bfi_b32 v1, v2, v1, s3 1450; GFX8-NEXT: v_mov_b32_e32 v2, s2 1451; GFX8-NEXT: v_xor_b32_e32 v2, s0, v2 1452; GFX8-NEXT: v_bfi_b32 v0, v2, v0, s2 1453; GFX8-NEXT: ; return to shader part epilog 1454; 1455; GFX10-LABEL: s_s_v_bfi_sha256_ma_i64: 1456; GFX10: ; %bb.0: ; %entry 1457; GFX10-NEXT: v_xor_b32_e64 v2, s0, s2 1458; GFX10-NEXT: v_xor_b32_e64 v3, s1, s3 1459; GFX10-NEXT: v_bfi_b32 v0, v2, v0, s2 1460; GFX10-NEXT: v_bfi_b32 v1, v3, v1, s3 1461; GFX10-NEXT: ; return to shader part epilog 1462; 1463; GFX8-GISEL-LABEL: s_s_v_bfi_sha256_ma_i64: 1464; GFX8-GISEL: ; %bb.0: ; %entry 1465; GFX8-GISEL-NEXT: v_and_b32_e32 v2, s0, v0 1466; GFX8-GISEL-NEXT: v_and_b32_e32 v3, s1, v1 1467; GFX8-GISEL-NEXT: v_or_b32_e32 v0, s0, v0 1468; GFX8-GISEL-NEXT: v_or_b32_e32 v1, s1, v1 1469; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s2, v0 1470; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s3, v1 1471; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 1472; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 1473; GFX8-GISEL-NEXT: ; return to shader part epilog 1474; 1475; GFX10-GISEL-LABEL: s_s_v_bfi_sha256_ma_i64: 1476; GFX10-GISEL: ; %bb.0: ; %entry 1477; GFX10-GISEL-NEXT: v_or_b32_e32 v2, s0, v0 1478; GFX10-GISEL-NEXT: v_or_b32_e32 v3, s1, v1 1479; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 1480; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 1481; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s2, v2 1482; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s3, v3 1483; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 1484; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 1485; GFX10-GISEL-NEXT: ; return to shader part epilog 1486entry: 1487 %and0 = and i64 %x, %z 1488 %or0 = or i64 %x, %z 1489 %and1 = and i64 %y, %or0 1490 %or1 = or i64 %and0, %and1 1491 %cast = bitcast i64 %or1 to <2 x float> 1492 ret <2 x float> %cast 1493} 1494 1495define amdgpu_ps <2 x float> @v_s_v_bfi_sha256_ma_i64(i64 %x, i64 inreg %y, i64 %z) { 1496; GFX7-LABEL: v_s_v_bfi_sha256_ma_i64: 1497; GFX7: ; %bb.0: ; %entry 1498; GFX7-NEXT: v_xor_b32_e32 v1, s1, v1 1499; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0 1500; GFX7-NEXT: v_bfi_b32 v1, v1, v3, s1 1501; GFX7-NEXT: v_bfi_b32 v0, v0, v2, s0 1502; GFX7-NEXT: ; return to shader part epilog 1503; 1504; GFX8-LABEL: v_s_v_bfi_sha256_ma_i64: 1505; GFX8: ; %bb.0: ; %entry 1506; GFX8-NEXT: v_xor_b32_e32 v1, s1, v1 1507; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 1508; GFX8-NEXT: v_bfi_b32 v1, v1, v3, s1 1509; GFX8-NEXT: v_bfi_b32 v0, v0, v2, s0 1510; GFX8-NEXT: ; return to shader part epilog 1511; 1512; GFX10-LABEL: v_s_v_bfi_sha256_ma_i64: 1513; GFX10: ; %bb.0: ; %entry 1514; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0 1515; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1 1516; GFX10-NEXT: v_bfi_b32 v0, v0, v2, s0 1517; GFX10-NEXT: v_bfi_b32 v1, v1, v3, s1 1518; GFX10-NEXT: ; return to shader part epilog 1519; 1520; GFX8-GISEL-LABEL: v_s_v_bfi_sha256_ma_i64: 1521; GFX8-GISEL: ; %bb.0: ; %entry 1522; GFX8-GISEL-NEXT: v_and_b32_e32 v4, v0, v2 1523; GFX8-GISEL-NEXT: v_and_b32_e32 v5, v1, v3 1524; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 1525; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 1526; GFX8-GISEL-NEXT: v_and_b32_e32 v0, s0, v0 1527; GFX8-GISEL-NEXT: v_and_b32_e32 v1, s1, v1 1528; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 1529; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v5, v1 1530; GFX8-GISEL-NEXT: ; return to shader part epilog 1531; 1532; GFX10-GISEL-LABEL: v_s_v_bfi_sha256_ma_i64: 1533; GFX10-GISEL: ; %bb.0: ; %entry 1534; GFX10-GISEL-NEXT: v_or_b32_e32 v4, v0, v2 1535; GFX10-GISEL-NEXT: v_or_b32_e32 v5, v1, v3 1536; GFX10-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 1537; GFX10-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 1538; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s0, v4 1539; GFX10-GISEL-NEXT: v_and_b32_e32 v3, s1, v5 1540; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 1541; GFX10-GISEL-NEXT: v_or_b32_e32 v1, v1, v3 1542; GFX10-GISEL-NEXT: ; return to shader part epilog 1543entry: 1544 %and0 = and i64 %x, %z 1545 %or0 = or i64 %x, %z 1546 %and1 = and i64 %y, %or0 1547 %or1 = or i64 %and0, %and1 1548 %cast = bitcast i64 %or1 to <2 x float> 1549 ret <2 x float> %cast 1550} 1551 1552define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) { 1553; GFX7-LABEL: s_bitselect_i64_pat_0: 1554; GFX7: ; %bb.0: 1555; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 1556; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd 1557; GFX7-NEXT: s_mov_b32 s3, 0xf000 1558; GFX7-NEXT: s_mov_b32 s2, -1 1559; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1560; GFX7-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7] 1561; GFX7-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 1562; GFX7-NEXT: s_or_b64 s[0:1], s[6:7], s[0:1] 1563; GFX7-NEXT: s_add_u32 s0, s0, 10 1564; GFX7-NEXT: s_addc_u32 s1, s1, 0 1565; GFX7-NEXT: v_mov_b32_e32 v0, s0 1566; GFX7-NEXT: v_mov_b32_e32 v1, s1 1567; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1568; GFX7-NEXT: s_endpgm 1569; 1570; GFX8-LABEL: s_bitselect_i64_pat_0: 1571; GFX8: ; %bb.0: 1572; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1573; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1574; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1575; GFX8-NEXT: s_and_b64 s[2:3], s[4:5], s[6:7] 1576; GFX8-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 1577; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 1578; GFX8-NEXT: s_add_u32 s0, s0, 10 1579; GFX8-NEXT: s_addc_u32 s1, s1, 0 1580; GFX8-NEXT: v_mov_b32_e32 v0, s0 1581; GFX8-NEXT: v_mov_b32_e32 v1, s1 1582; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1583; GFX8-NEXT: s_endpgm 1584; 1585; GFX10-LABEL: s_bitselect_i64_pat_0: 1586; GFX10: ; %bb.0: 1587; GFX10-NEXT: s_clause 0x1 1588; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1589; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1590; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1591; GFX10-NEXT: s_and_b64 s[2:3], s[4:5], s[6:7] 1592; GFX10-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 1593; GFX10-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 1594; GFX10-NEXT: s_add_u32 s0, s0, 10 1595; GFX10-NEXT: s_addc_u32 s1, s1, 0 1596; GFX10-NEXT: v_mov_b32_e32 v0, s0 1597; GFX10-NEXT: v_mov_b32_e32 v1, s1 1598; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1599; GFX10-NEXT: s_endpgm 1600; 1601; GFX8-GISEL-LABEL: s_bitselect_i64_pat_0: 1602; GFX8-GISEL: ; %bb.0: 1603; GFX8-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1604; GFX8-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1605; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1606; GFX8-GISEL-NEXT: s_and_b64 s[2:3], s[4:5], s[6:7] 1607; GFX8-GISEL-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 1608; GFX8-GISEL-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 1609; GFX8-GISEL-NEXT: s_add_u32 s0, s0, 10 1610; GFX8-GISEL-NEXT: s_addc_u32 s1, s1, 0 1611; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0 1612; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1 1613; GFX8-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1614; GFX8-GISEL-NEXT: s_endpgm 1615; 1616; GFX10-GISEL-LABEL: s_bitselect_i64_pat_0: 1617; GFX10-GISEL: ; %bb.0: 1618; GFX10-GISEL-NEXT: s_clause 0x1 1619; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1620; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1621; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1622; GFX10-GISEL-NEXT: s_and_b64 s[2:3], s[4:5], s[6:7] 1623; GFX10-GISEL-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5] 1624; GFX10-GISEL-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 1625; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 10 1626; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 0 1627; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 1628; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 1629; GFX10-GISEL-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1630; GFX10-GISEL-NEXT: s_endpgm 1631 %and0 = and i64 %a, %b 1632 %not.a = xor i64 %a, -1 1633 %and1 = and i64 %not.a, %mask 1634 %bitselect = or i64 %and0, %and1 1635 %scalar.use = add i64 %bitselect, 10 1636 store i64 %scalar.use, i64 addrspace(1)* undef 1637 ret void 1638} 1639 1640define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) { 1641; GFX7-LABEL: s_bitselect_i64_pat_1: 1642; GFX7: ; %bb.0: 1643; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 1644; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd 1645; GFX7-NEXT: s_mov_b32 s3, 0xf000 1646; GFX7-NEXT: s_mov_b32 s2, -1 1647; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1648; GFX7-NEXT: s_xor_b64 s[4:5], s[4:5], s[0:1] 1649; GFX7-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] 1650; GFX7-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] 1651; GFX7-NEXT: s_add_u32 s0, s0, 10 1652; GFX7-NEXT: s_addc_u32 s1, s1, 0 1653; GFX7-NEXT: v_mov_b32_e32 v0, s0 1654; GFX7-NEXT: v_mov_b32_e32 v1, s1 1655; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1656; GFX7-NEXT: s_endpgm 1657; 1658; GFX8-LABEL: s_bitselect_i64_pat_1: 1659; GFX8: ; %bb.0: 1660; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1661; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1662; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1663; GFX8-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1] 1664; GFX8-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7] 1665; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1] 1666; GFX8-NEXT: s_add_u32 s0, s0, 10 1667; GFX8-NEXT: s_addc_u32 s1, s1, 0 1668; GFX8-NEXT: v_mov_b32_e32 v0, s0 1669; GFX8-NEXT: v_mov_b32_e32 v1, s1 1670; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1671; GFX8-NEXT: s_endpgm 1672; 1673; GFX10-LABEL: s_bitselect_i64_pat_1: 1674; GFX10: ; %bb.0: 1675; GFX10-NEXT: s_clause 0x1 1676; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1677; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1678; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1679; GFX10-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1] 1680; GFX10-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7] 1681; GFX10-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1] 1682; GFX10-NEXT: s_add_u32 s0, s0, 10 1683; GFX10-NEXT: s_addc_u32 s1, s1, 0 1684; GFX10-NEXT: v_mov_b32_e32 v0, s0 1685; GFX10-NEXT: v_mov_b32_e32 v1, s1 1686; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1687; GFX10-NEXT: s_endpgm 1688; 1689; GFX8-GISEL-LABEL: s_bitselect_i64_pat_1: 1690; GFX8-GISEL: ; %bb.0: 1691; GFX8-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1692; GFX8-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1693; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1694; GFX8-GISEL-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1] 1695; GFX8-GISEL-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7] 1696; GFX8-GISEL-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1] 1697; GFX8-GISEL-NEXT: s_add_u32 s0, s0, 10 1698; GFX8-GISEL-NEXT: s_addc_u32 s1, s1, 0 1699; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0 1700; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1 1701; GFX8-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1702; GFX8-GISEL-NEXT: s_endpgm 1703; 1704; GFX10-GISEL-LABEL: s_bitselect_i64_pat_1: 1705; GFX10-GISEL: ; %bb.0: 1706; GFX10-GISEL-NEXT: s_clause 0x1 1707; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1708; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1709; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1710; GFX10-GISEL-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1] 1711; GFX10-GISEL-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7] 1712; GFX10-GISEL-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1] 1713; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 10 1714; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 0 1715; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 1716; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 1717; GFX10-GISEL-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1718; GFX10-GISEL-NEXT: s_endpgm 1719 %xor.0 = xor i64 %a, %mask 1720 %and = and i64 %xor.0, %b 1721 %bitselect = xor i64 %and, %mask 1722 1723 %scalar.use = add i64 %bitselect, 10 1724 store i64 %scalar.use, i64 addrspace(1)* undef 1725 ret void 1726} 1727 1728define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) { 1729; GFX7-LABEL: s_bitselect_i64_pat_2: 1730; GFX7: ; %bb.0: 1731; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 1732; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd 1733; GFX7-NEXT: s_mov_b32 s3, 0xf000 1734; GFX7-NEXT: s_mov_b32 s2, -1 1735; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1736; GFX7-NEXT: s_xor_b64 s[4:5], s[4:5], s[0:1] 1737; GFX7-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] 1738; GFX7-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] 1739; GFX7-NEXT: s_add_u32 s0, s0, 10 1740; GFX7-NEXT: s_addc_u32 s1, s1, 0 1741; GFX7-NEXT: v_mov_b32_e32 v0, s0 1742; GFX7-NEXT: v_mov_b32_e32 v1, s1 1743; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1744; GFX7-NEXT: s_endpgm 1745; 1746; GFX8-LABEL: s_bitselect_i64_pat_2: 1747; GFX8: ; %bb.0: 1748; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1749; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1750; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1751; GFX8-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1] 1752; GFX8-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7] 1753; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1] 1754; GFX8-NEXT: s_add_u32 s0, s0, 10 1755; GFX8-NEXT: s_addc_u32 s1, s1, 0 1756; GFX8-NEXT: v_mov_b32_e32 v0, s0 1757; GFX8-NEXT: v_mov_b32_e32 v1, s1 1758; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1759; GFX8-NEXT: s_endpgm 1760; 1761; GFX10-LABEL: s_bitselect_i64_pat_2: 1762; GFX10: ; %bb.0: 1763; GFX10-NEXT: s_clause 0x1 1764; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1765; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1766; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1767; GFX10-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1] 1768; GFX10-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7] 1769; GFX10-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1] 1770; GFX10-NEXT: s_add_u32 s0, s0, 10 1771; GFX10-NEXT: s_addc_u32 s1, s1, 0 1772; GFX10-NEXT: v_mov_b32_e32 v0, s0 1773; GFX10-NEXT: v_mov_b32_e32 v1, s1 1774; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1775; GFX10-NEXT: s_endpgm 1776; 1777; GFX8-GISEL-LABEL: s_bitselect_i64_pat_2: 1778; GFX8-GISEL: ; %bb.0: 1779; GFX8-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1780; GFX8-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1781; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1782; GFX8-GISEL-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1] 1783; GFX8-GISEL-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7] 1784; GFX8-GISEL-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1] 1785; GFX8-GISEL-NEXT: s_add_u32 s0, s0, 10 1786; GFX8-GISEL-NEXT: s_addc_u32 s1, s1, 0 1787; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0 1788; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1 1789; GFX8-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1790; GFX8-GISEL-NEXT: s_endpgm 1791; 1792; GFX10-GISEL-LABEL: s_bitselect_i64_pat_2: 1793; GFX10-GISEL: ; %bb.0: 1794; GFX10-GISEL-NEXT: s_clause 0x1 1795; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1796; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1797; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1798; GFX10-GISEL-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1] 1799; GFX10-GISEL-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7] 1800; GFX10-GISEL-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1] 1801; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 10 1802; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 0 1803; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 1804; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 1805; GFX10-GISEL-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1806; GFX10-GISEL-NEXT: s_endpgm 1807 %xor.0 = xor i64 %a, %mask 1808 %and = and i64 %xor.0, %b 1809 %bitselect = xor i64 %and, %mask 1810 1811 %scalar.use = add i64 %bitselect, 10 1812 store i64 %scalar.use, i64 addrspace(1)* undef 1813 ret void 1814} 1815 1816define amdgpu_kernel void @s_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) { 1817; GFX7-LABEL: s_bfi_sha256_ma_i64: 1818; GFX7: ; %bb.0: ; %entry 1819; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 1820; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd 1821; GFX7-NEXT: s_mov_b32 s3, 0xf000 1822; GFX7-NEXT: s_mov_b32 s2, -1 1823; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1824; GFX7-NEXT: s_and_b64 s[8:9], s[4:5], s[0:1] 1825; GFX7-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 1826; GFX7-NEXT: s_and_b64 s[0:1], s[6:7], s[0:1] 1827; GFX7-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 1828; GFX7-NEXT: s_add_u32 s0, s0, 10 1829; GFX7-NEXT: s_addc_u32 s1, s1, 0 1830; GFX7-NEXT: v_mov_b32_e32 v0, s0 1831; GFX7-NEXT: v_mov_b32_e32 v1, s1 1832; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1833; GFX7-NEXT: s_endpgm 1834; 1835; GFX8-LABEL: s_bfi_sha256_ma_i64: 1836; GFX8: ; %bb.0: ; %entry 1837; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1838; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1839; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1840; GFX8-NEXT: s_and_b64 s[2:3], s[4:5], s[0:1] 1841; GFX8-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 1842; GFX8-NEXT: s_and_b64 s[0:1], s[6:7], s[0:1] 1843; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 1844; GFX8-NEXT: s_add_u32 s0, s0, 10 1845; GFX8-NEXT: s_addc_u32 s1, s1, 0 1846; GFX8-NEXT: v_mov_b32_e32 v0, s0 1847; GFX8-NEXT: v_mov_b32_e32 v1, s1 1848; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1849; GFX8-NEXT: s_endpgm 1850; 1851; GFX10-LABEL: s_bfi_sha256_ma_i64: 1852; GFX10: ; %bb.0: ; %entry 1853; GFX10-NEXT: s_clause 0x1 1854; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1855; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1856; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1857; GFX10-NEXT: s_or_b64 s[2:3], s[4:5], s[0:1] 1858; GFX10-NEXT: s_and_b64 s[0:1], s[4:5], s[0:1] 1859; GFX10-NEXT: s_and_b64 s[2:3], s[6:7], s[2:3] 1860; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 1861; GFX10-NEXT: s_add_u32 s0, s0, 10 1862; GFX10-NEXT: s_addc_u32 s1, s1, 0 1863; GFX10-NEXT: v_mov_b32_e32 v0, s0 1864; GFX10-NEXT: v_mov_b32_e32 v1, s1 1865; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1866; GFX10-NEXT: s_endpgm 1867; 1868; GFX8-GISEL-LABEL: s_bfi_sha256_ma_i64: 1869; GFX8-GISEL: ; %bb.0: ; %entry 1870; GFX8-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1871; GFX8-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1872; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1873; GFX8-GISEL-NEXT: s_and_b64 s[2:3], s[4:5], s[0:1] 1874; GFX8-GISEL-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 1875; GFX8-GISEL-NEXT: s_and_b64 s[0:1], s[6:7], s[0:1] 1876; GFX8-GISEL-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 1877; GFX8-GISEL-NEXT: s_add_u32 s0, s0, 10 1878; GFX8-GISEL-NEXT: s_addc_u32 s1, s1, 0 1879; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0 1880; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1 1881; GFX8-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1882; GFX8-GISEL-NEXT: s_endpgm 1883; 1884; GFX10-GISEL-LABEL: s_bfi_sha256_ma_i64: 1885; GFX10-GISEL: ; %bb.0: ; %entry 1886; GFX10-GISEL-NEXT: s_clause 0x1 1887; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1888; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 1889; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1890; GFX10-GISEL-NEXT: s_or_b64 s[2:3], s[4:5], s[0:1] 1891; GFX10-GISEL-NEXT: s_and_b64 s[0:1], s[4:5], s[0:1] 1892; GFX10-GISEL-NEXT: s_and_b64 s[2:3], s[6:7], s[2:3] 1893; GFX10-GISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 1894; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 10 1895; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 0 1896; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 1897; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 1898; GFX10-GISEL-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1899; GFX10-GISEL-NEXT: s_endpgm 1900entry: 1901 %and0 = and i64 %x, %z 1902 %or0 = or i64 %x, %z 1903 %and1 = and i64 %y, %or0 1904 %or1 = or i64 %and0, %and1 1905 1906 %scalar.use = add i64 %or1, 10 1907 store i64 %scalar.use, i64 addrspace(1)* undef 1908 ret void 1909} 1910