1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI %s 3; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI %s 4 5define amdgpu_kernel void @s_bfm_pattern(ptr addrspace(1) %out, i32 %x, i32 %y) #0 { 6; SI-LABEL: s_bfm_pattern: 7; SI: ; %bb.0: 8; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 9; SI-NEXT: s_mov_b32 s7, 0xf000 10; SI-NEXT: s_waitcnt lgkmcnt(0) 11; SI-NEXT: s_bfm_b32 s2, s2, s3 12; SI-NEXT: s_mov_b32 s6, -1 13; SI-NEXT: s_mov_b32 s4, s0 14; SI-NEXT: s_mov_b32 s5, s1 15; SI-NEXT: v_mov_b32_e32 v0, s2 16; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 17; SI-NEXT: s_endpgm 18; 19; VI-LABEL: s_bfm_pattern: 20; VI: ; %bb.0: 21; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 22; VI-NEXT: s_waitcnt lgkmcnt(0) 23; VI-NEXT: s_bfm_b32 s2, s2, s3 24; VI-NEXT: v_mov_b32_e32 v0, s0 25; VI-NEXT: v_mov_b32_e32 v1, s1 26; VI-NEXT: v_mov_b32_e32 v2, s2 27; VI-NEXT: flat_store_dword v[0:1], v2 28; VI-NEXT: s_endpgm 29 %a = shl i32 1, %x 30 %b = sub i32 %a, 1 31 %c = shl i32 %b, %y 32 store i32 %c, ptr addrspace(1) %out 33 ret void 34} 35 36define amdgpu_kernel void @s_bfm_pattern_simple(ptr addrspace(1) %out, i32 %x) #0 { 37; SI-LABEL: s_bfm_pattern_simple: 38; SI: ; %bb.0: 39; SI-NEXT: s_load_dword s2, s[4:5], 0xb 40; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 41; SI-NEXT: s_mov_b32 s3, 0xf000 42; SI-NEXT: s_waitcnt lgkmcnt(0) 43; SI-NEXT: s_bfm_b32 s4, s2, 0 44; SI-NEXT: s_mov_b32 s2, -1 45; SI-NEXT: v_mov_b32_e32 v0, s4 46; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 47; SI-NEXT: s_endpgm 48; 49; VI-LABEL: s_bfm_pattern_simple: 50; VI: ; %bb.0: 51; VI-NEXT: s_load_dword s2, s[4:5], 0x2c 52; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 53; VI-NEXT: s_waitcnt lgkmcnt(0) 54; VI-NEXT: s_bfm_b32 s2, s2, 0 55; VI-NEXT: v_mov_b32_e32 v0, s0 56; VI-NEXT: v_mov_b32_e32 v1, s1 57; VI-NEXT: v_mov_b32_e32 v2, s2 58; VI-NEXT: flat_store_dword v[0:1], v2 59; VI-NEXT: s_endpgm 60 %a = shl i32 1, %x 61 %b = sub i32 %a, 1 62 store i32 %b, ptr addrspace(1) %out 63 ret void 64} 65 66define void @v_bfm_pattern(ptr addrspace(1) %out, i32 %x, i32 %y) #0 { 67; SI-LABEL: v_bfm_pattern: 68; SI: ; %bb.0: 69; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 70; SI-NEXT: s_mov_b32 s7, 0xf000 71; SI-NEXT: s_mov_b32 s6, 0 72; SI-NEXT: v_bfm_b32_e32 v2, v2, v3 73; SI-NEXT: s_mov_b32 s4, s6 74; SI-NEXT: s_mov_b32 s5, s6 75; SI-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 76; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 77; SI-NEXT: s_setpc_b64 s[30:31] 78; 79; VI-LABEL: v_bfm_pattern: 80; VI: ; %bb.0: 81; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 82; VI-NEXT: v_bfm_b32 v2, v2, v3 83; VI-NEXT: flat_store_dword v[0:1], v2 84; VI-NEXT: s_waitcnt vmcnt(0) 85; VI-NEXT: s_setpc_b64 s[30:31] 86 %a = shl i32 1, %x 87 %b = sub i32 %a, 1 88 %c = shl i32 %b, %y 89 store i32 %c, ptr addrspace(1) %out 90 ret void 91} 92 93define void @v_bfm_pattern_simple(ptr addrspace(1) %out, i32 %x) #0 { 94; SI-LABEL: v_bfm_pattern_simple: 95; SI: ; %bb.0: 96; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 97; SI-NEXT: s_mov_b32 s7, 0xf000 98; SI-NEXT: s_mov_b32 s6, 0 99; SI-NEXT: v_bfm_b32_e64 v2, v2, 0 100; SI-NEXT: s_mov_b32 s4, s6 101; SI-NEXT: s_mov_b32 s5, s6 102; SI-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 103; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 104; SI-NEXT: s_setpc_b64 s[30:31] 105; 106; VI-LABEL: v_bfm_pattern_simple: 107; VI: ; %bb.0: 108; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 109; VI-NEXT: v_bfm_b32 v2, v2, 0 110; VI-NEXT: flat_store_dword v[0:1], v2 111; VI-NEXT: s_waitcnt vmcnt(0) 112; VI-NEXT: s_setpc_b64 s[30:31] 113 %a = shl i32 1, %x 114 %b = sub i32 %a, 1 115 store i32 %b, ptr addrspace(1) %out 116 ret void 117} 118 119attributes #0 = { nounwind } 120