1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -amdgpu-load-store-vectorizer=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s 3 4define i32 @v_bfe_i32_arg_arg_arg(i32 %src0, i32 %src1, i32 %src2) #0 { 5; GFX6-LABEL: v_bfe_i32_arg_arg_arg: 6; GFX6: ; %bb.0: 7; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8; GFX6-NEXT: v_bfe_u32 v0, v0, v1, v2 9; GFX6-NEXT: s_setpc_b64 s[30:31] 10 %bfe_i32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2) 11 ret i32 %bfe_i32 12} 13 14define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 { 15; GFX6-LABEL: s_bfe_i32_arg_arg_arg: 16; GFX6: ; %bb.0: 17; GFX6-NEXT: s_and_b32 s1, s1, 63 18; GFX6-NEXT: s_lshl_b32 s2, s2, 16 19; GFX6-NEXT: s_or_b32 s1, s1, s2 20; GFX6-NEXT: s_bfe_u32 s0, s0, s1 21; GFX6-NEXT: ; return to shader part epilog 22 %bfe_i32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2) 23 ret i32 %bfe_i32 24} 25 26; TODO: Need to expand this. 27; define i64 @v_bfe_i64_arg_arg_arg(i64 %src0, i32 %src1, i32 %src2) #0 { 28; %bfe_i64 = call i32 @llvm.amdgcn.ubfe.i64(i32 %src0, i32 %src1, i32 %src2) 29; ret i64 %bfe_i64 30; } 31 32define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 { 33; GFX6-LABEL: s_bfe_i64_arg_arg_arg: 34; GFX6: ; %bb.0: 35; GFX6-NEXT: s_and_b32 s2, s2, 63 36; GFX6-NEXT: s_lshl_b32 s3, s3, 16 37; GFX6-NEXT: s_or_b32 s2, s2, s3 38; GFX6-NEXT: s_bfe_u64 s[0:1], s[0:1], s2 39; GFX6-NEXT: ; return to shader part epilog 40 %bfe_i32 = call i64 @llvm.amdgcn.ubfe.i64(i64 %src0, i32 %src1, i32 %src2) 41 ret i64 %bfe_i32 42} 43 44define amdgpu_kernel void @bfe_u32_arg_arg_arg(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 { 45; GFX6-LABEL: bfe_u32_arg_arg_arg: 46; GFX6: ; %bb.0: 47; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 48; GFX6-NEXT: s_waitcnt lgkmcnt(0) 49; GFX6-NEXT: s_and_b32 s4, s3, 63 50; GFX6-NEXT: s_lshl_b32 s3, s3, 16 51; GFX6-NEXT: s_or_b32 s3, s4, s3 52; GFX6-NEXT: s_bfe_u32 s3, s2, s3 53; GFX6-NEXT: s_mov_b32 s2, -1 54; GFX6-NEXT: v_mov_b32_e32 v0, s3 55; GFX6-NEXT: s_mov_b32 s3, 0xf000 56; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 57; GFX6-NEXT: s_endpgm 58 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src1) 59 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 60 ret void 61} 62 63define amdgpu_kernel void @bfe_u32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { 64; GFX6-LABEL: bfe_u32_arg_arg_imm: 65; GFX6: ; %bb.0: 66; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 67; GFX6-NEXT: s_waitcnt lgkmcnt(0) 68; GFX6-NEXT: s_and_b32 s3, s3, 63 69; GFX6-NEXT: s_or_b32 s3, s3, 0x7b0000 70; GFX6-NEXT: s_bfe_u32 s3, s2, s3 71; GFX6-NEXT: s_mov_b32 s2, -1 72; GFX6-NEXT: v_mov_b32_e32 v0, s3 73; GFX6-NEXT: s_mov_b32 s3, 0xf000 74; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 75; GFX6-NEXT: s_endpgm 76 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 123) 77 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 78 ret void 79} 80 81define amdgpu_kernel void @bfe_u32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0, i32 %src2) #0 { 82; GFX6-LABEL: bfe_u32_arg_imm_arg: 83; GFX6: ; %bb.0: 84; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 85; GFX6-NEXT: s_waitcnt lgkmcnt(0) 86; GFX6-NEXT: s_lshl_b32 s3, s3, 16 87; GFX6-NEXT: s_or_b32 s3, 59, s3 88; GFX6-NEXT: s_bfe_u32 s3, s2, s3 89; GFX6-NEXT: s_mov_b32 s2, -1 90; GFX6-NEXT: v_mov_b32_e32 v0, s3 91; GFX6-NEXT: s_mov_b32 s3, 0xf000 92; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 93; GFX6-NEXT: s_endpgm 94 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 123, i32 %src2) 95 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 96 ret void 97} 98 99define amdgpu_kernel void @bfe_u32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1, i32 %src2) #0 { 100; GFX6-LABEL: bfe_u32_imm_arg_arg: 101; GFX6: ; %bb.0: 102; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 103; GFX6-NEXT: s_waitcnt lgkmcnt(0) 104; GFX6-NEXT: s_and_b32 s4, s2, 63 105; GFX6-NEXT: s_lshl_b32 s3, s3, 16 106; GFX6-NEXT: s_or_b32 s3, s4, s3 107; GFX6-NEXT: s_bfe_u32 s3, 0x7b, s3 108; GFX6-NEXT: s_mov_b32 s2, -1 109; GFX6-NEXT: v_mov_b32_e32 v0, s3 110; GFX6-NEXT: s_mov_b32 s3, 0xf000 111; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 112; GFX6-NEXT: s_endpgm 113 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 123, i32 %src1, i32 %src2) 114 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 115 ret void 116} 117 118define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { 119; GFX6-LABEL: bfe_u32_arg_0_width_reg_offset: 120; GFX6: ; %bb.0: 121; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 122; GFX6-NEXT: s_waitcnt lgkmcnt(0) 123; GFX6-NEXT: s_and_b32 s3, s3, 63 124; GFX6-NEXT: s_bfe_u32 s3, s2, s3 125; GFX6-NEXT: s_mov_b32 s2, -1 126; GFX6-NEXT: v_mov_b32_e32 v0, s3 127; GFX6-NEXT: s_mov_b32 s3, 0xf000 128; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 129; GFX6-NEXT: s_endpgm 130 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 0) 131 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 132 ret void 133} 134 135define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { 136; GFX6-LABEL: bfe_u32_arg_0_width_imm_offset: 137; GFX6: ; %bb.0: 138; GFX6-NEXT: s_load_dword s3, s[4:5], 0x2 139; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 140; GFX6-NEXT: s_mov_b32 s2, -1 141; GFX6-NEXT: s_waitcnt lgkmcnt(0) 142; GFX6-NEXT: s_bfe_u32 s3, s3, 8 143; GFX6-NEXT: v_mov_b32_e32 v0, s3 144; GFX6-NEXT: s_mov_b32 s3, 0xf000 145; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 146; GFX6-NEXT: s_endpgm 147 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 8, i32 0) 148 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 149 ret void 150} 151 152define amdgpu_kernel void @bfe_u32_zextload_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 153; GFX6-LABEL: bfe_u32_zextload_i8: 154; GFX6: ; %bb.0: 155; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 156; GFX6-NEXT: s_mov_b32 s6, -1 157; GFX6-NEXT: s_mov_b32 s7, 0xf000 158; GFX6-NEXT: s_waitcnt lgkmcnt(0) 159; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3] 160; GFX6-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 161; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7] 162; GFX6-NEXT: s_waitcnt vmcnt(0) 163; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 8 164; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 165; GFX6-NEXT: s_endpgm 166 %load = load i8, ptr addrspace(1) %in 167 %ext = zext i8 %load to i32 168 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8) 169 store i32 %bfe, ptr addrspace(1) %out, align 4 170 ret void 171} 172 173; FIXME: Should be using s_add_i32 174define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 175; GFX6-LABEL: bfe_u32_zext_in_reg_i8: 176; GFX6: ; %bb.0: 177; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 178; GFX6-NEXT: s_waitcnt lgkmcnt(0) 179; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 180; GFX6-NEXT: s_mov_b32 s2, -1 181; GFX6-NEXT: s_waitcnt lgkmcnt(0) 182; GFX6-NEXT: s_add_i32 s3, s3, 1 183; GFX6-NEXT: s_and_b32 s3, s3, 0xff 184; GFX6-NEXT: s_bfe_u32 s3, s3, 0x80000 185; GFX6-NEXT: v_mov_b32_e32 v0, s3 186; GFX6-NEXT: s_mov_b32 s3, 0xf000 187; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 188; GFX6-NEXT: s_endpgm 189 %load = load i32, ptr addrspace(1) %in, align 4 190 %add = add i32 %load, 1 191 %ext = and i32 %add, 255 192 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8) 193 store i32 %bfe, ptr addrspace(1) %out, align 4 194 ret void 195} 196 197define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 198; GFX6-LABEL: bfe_u32_zext_in_reg_i16: 199; GFX6: ; %bb.0: 200; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 201; GFX6-NEXT: s_waitcnt lgkmcnt(0) 202; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 203; GFX6-NEXT: s_mov_b32 s2, -1 204; GFX6-NEXT: s_waitcnt lgkmcnt(0) 205; GFX6-NEXT: s_add_i32 s3, s3, 1 206; GFX6-NEXT: s_and_b32 s3, s3, 0xffff 207; GFX6-NEXT: s_bfe_u32 s3, s3, 0x100000 208; GFX6-NEXT: v_mov_b32_e32 v0, s3 209; GFX6-NEXT: s_mov_b32 s3, 0xf000 210; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 211; GFX6-NEXT: s_endpgm 212 %load = load i32, ptr addrspace(1) %in, align 4 213 %add = add i32 %load, 1 214 %ext = and i32 %add, 65535 215 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 16) 216 store i32 %bfe, ptr addrspace(1) %out, align 4 217 ret void 218} 219 220define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 221; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_1: 222; GFX6: ; %bb.0: 223; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 224; GFX6-NEXT: s_waitcnt lgkmcnt(0) 225; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 226; GFX6-NEXT: s_mov_b32 s2, -1 227; GFX6-NEXT: s_waitcnt lgkmcnt(0) 228; GFX6-NEXT: s_add_i32 s3, s3, 1 229; GFX6-NEXT: s_and_b32 s3, s3, 0xff 230; GFX6-NEXT: s_bfe_u32 s3, s3, 0x80001 231; GFX6-NEXT: v_mov_b32_e32 v0, s3 232; GFX6-NEXT: s_mov_b32 s3, 0xf000 233; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 234; GFX6-NEXT: s_endpgm 235 %load = load i32, ptr addrspace(1) %in, align 4 236 %add = add i32 %load, 1 237 %ext = and i32 %add, 255 238 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 1, i32 8) 239 store i32 %bfe, ptr addrspace(1) %out, align 4 240 ret void 241} 242 243define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 244; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_3: 245; GFX6: ; %bb.0: 246; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 247; GFX6-NEXT: s_waitcnt lgkmcnt(0) 248; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 249; GFX6-NEXT: s_mov_b32 s2, -1 250; GFX6-NEXT: s_waitcnt lgkmcnt(0) 251; GFX6-NEXT: s_add_i32 s3, s3, 1 252; GFX6-NEXT: s_and_b32 s3, s3, 0xff 253; GFX6-NEXT: s_bfe_u32 s3, s3, 0x80003 254; GFX6-NEXT: v_mov_b32_e32 v0, s3 255; GFX6-NEXT: s_mov_b32 s3, 0xf000 256; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 257; GFX6-NEXT: s_endpgm 258 %load = load i32, ptr addrspace(1) %in, align 4 259 %add = add i32 %load, 1 260 %ext = and i32 %add, 255 261 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 3, i32 8) 262 store i32 %bfe, ptr addrspace(1) %out, align 4 263 ret void 264} 265 266define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 267; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_7: 268; GFX6: ; %bb.0: 269; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 270; GFX6-NEXT: s_waitcnt lgkmcnt(0) 271; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 272; GFX6-NEXT: s_mov_b32 s2, -1 273; GFX6-NEXT: s_waitcnt lgkmcnt(0) 274; GFX6-NEXT: s_add_i32 s3, s3, 1 275; GFX6-NEXT: s_and_b32 s3, s3, 0xff 276; GFX6-NEXT: s_bfe_u32 s3, s3, 0x80007 277; GFX6-NEXT: v_mov_b32_e32 v0, s3 278; GFX6-NEXT: s_mov_b32 s3, 0xf000 279; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 280; GFX6-NEXT: s_endpgm 281 %load = load i32, ptr addrspace(1) %in, align 4 282 %add = add i32 %load, 1 283 %ext = and i32 %add, 255 284 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 7, i32 8) 285 store i32 %bfe, ptr addrspace(1) %out, align 4 286 ret void 287} 288 289define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 290; GFX6-LABEL: bfe_u32_zext_in_reg_i16_offset_8: 291; GFX6: ; %bb.0: 292; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 293; GFX6-NEXT: s_waitcnt lgkmcnt(0) 294; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 295; GFX6-NEXT: s_mov_b32 s2, -1 296; GFX6-NEXT: s_waitcnt lgkmcnt(0) 297; GFX6-NEXT: s_add_i32 s3, s3, 1 298; GFX6-NEXT: s_and_b32 s3, s3, 0xffff 299; GFX6-NEXT: s_bfe_u32 s3, s3, 0x80008 300; GFX6-NEXT: v_mov_b32_e32 v0, s3 301; GFX6-NEXT: s_mov_b32 s3, 0xf000 302; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 303; GFX6-NEXT: s_endpgm 304 %load = load i32, ptr addrspace(1) %in, align 4 305 %add = add i32 %load, 1 306 %ext = and i32 %add, 65535 307 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 8, i32 8) 308 store i32 %bfe, ptr addrspace(1) %out, align 4 309 ret void 310} 311 312define amdgpu_kernel void @bfe_u32_test_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 313; GFX6-LABEL: bfe_u32_test_1: 314; GFX6: ; %bb.0: 315; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 316; GFX6-NEXT: s_waitcnt lgkmcnt(0) 317; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 318; GFX6-NEXT: s_mov_b32 s2, -1 319; GFX6-NEXT: s_waitcnt lgkmcnt(0) 320; GFX6-NEXT: s_bfe_u32 s3, s3, 0x10000 321; GFX6-NEXT: v_mov_b32_e32 v0, s3 322; GFX6-NEXT: s_mov_b32 s3, 0xf000 323; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 324; GFX6-NEXT: s_endpgm 325 %x = load i32, ptr addrspace(1) %in, align 4 326 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 0, i32 1) 327 store i32 %bfe, ptr addrspace(1) %out, align 4 328 ret void 329} 330 331define amdgpu_kernel void @bfe_u32_test_2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 332; GFX6-LABEL: bfe_u32_test_2: 333; GFX6: ; %bb.0: 334; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 335; GFX6-NEXT: s_waitcnt lgkmcnt(0) 336; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 337; GFX6-NEXT: s_mov_b32 s2, -1 338; GFX6-NEXT: s_waitcnt lgkmcnt(0) 339; GFX6-NEXT: s_lshl_b32 s3, s3, 31 340; GFX6-NEXT: s_bfe_u32 s3, s3, 0x80000 341; GFX6-NEXT: v_mov_b32_e32 v0, s3 342; GFX6-NEXT: s_mov_b32 s3, 0xf000 343; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 344; GFX6-NEXT: s_endpgm 345 %x = load i32, ptr addrspace(1) %in, align 4 346 %shl = shl i32 %x, 31 347 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 8) 348 store i32 %bfe, ptr addrspace(1) %out, align 4 349 ret void 350} 351 352define amdgpu_kernel void @bfe_u32_test_3(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 353; GFX6-LABEL: bfe_u32_test_3: 354; GFX6: ; %bb.0: 355; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 356; GFX6-NEXT: s_waitcnt lgkmcnt(0) 357; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 358; GFX6-NEXT: s_mov_b32 s2, -1 359; GFX6-NEXT: s_waitcnt lgkmcnt(0) 360; GFX6-NEXT: s_lshl_b32 s3, s3, 31 361; GFX6-NEXT: s_bfe_u32 s3, s3, 0x10000 362; GFX6-NEXT: v_mov_b32_e32 v0, s3 363; GFX6-NEXT: s_mov_b32 s3, 0xf000 364; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 365; GFX6-NEXT: s_endpgm 366 %x = load i32, ptr addrspace(1) %in, align 4 367 %shl = shl i32 %x, 31 368 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 1) 369 store i32 %bfe, ptr addrspace(1) %out, align 4 370 ret void 371} 372 373define amdgpu_kernel void @bfe_u32_test_4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 374; GFX6-LABEL: bfe_u32_test_4: 375; GFX6: ; %bb.0: 376; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 377; GFX6-NEXT: s_waitcnt lgkmcnt(0) 378; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 379; GFX6-NEXT: s_mov_b32 s2, -1 380; GFX6-NEXT: s_waitcnt lgkmcnt(0) 381; GFX6-NEXT: s_bfe_u32 s3, s3, 0x10000 382; GFX6-NEXT: s_bfe_u32 s3, s3, 0x1001f 383; GFX6-NEXT: v_mov_b32_e32 v0, s3 384; GFX6-NEXT: s_mov_b32 s3, 0xf000 385; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 386; GFX6-NEXT: s_endpgm 387 %x = load i32, ptr addrspace(1) %in, align 4 388 %shl = shl i32 %x, 31 389 %shr = lshr i32 %shl, 31 390 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 31, i32 1) 391 store i32 %bfe, ptr addrspace(1) %out, align 4 392 ret void 393} 394 395define amdgpu_kernel void @bfe_u32_test_5(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 396; GFX6-LABEL: bfe_u32_test_5: 397; GFX6: ; %bb.0: 398; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 399; GFX6-NEXT: s_waitcnt lgkmcnt(0) 400; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 401; GFX6-NEXT: s_mov_b32 s2, -1 402; GFX6-NEXT: s_waitcnt lgkmcnt(0) 403; GFX6-NEXT: s_bfe_i32 s3, s3, 0x10000 404; GFX6-NEXT: s_bfe_u32 s3, s3, 0x10000 405; GFX6-NEXT: v_mov_b32_e32 v0, s3 406; GFX6-NEXT: s_mov_b32 s3, 0xf000 407; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 408; GFX6-NEXT: s_endpgm 409 %x = load i32, ptr addrspace(1) %in, align 4 410 %shl = shl i32 %x, 31 411 %shr = ashr i32 %shl, 31 412 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 0, i32 1) 413 store i32 %bfe, ptr addrspace(1) %out, align 4 414 ret void 415} 416 417define amdgpu_kernel void @bfe_u32_test_6(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 418; GFX6-LABEL: bfe_u32_test_6: 419; GFX6: ; %bb.0: 420; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 421; GFX6-NEXT: s_waitcnt lgkmcnt(0) 422; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 423; GFX6-NEXT: s_mov_b32 s2, -1 424; GFX6-NEXT: s_waitcnt lgkmcnt(0) 425; GFX6-NEXT: s_lshl_b32 s3, s3, 31 426; GFX6-NEXT: s_bfe_u32 s3, s3, 0x1f0001 427; GFX6-NEXT: v_mov_b32_e32 v0, s3 428; GFX6-NEXT: s_mov_b32 s3, 0xf000 429; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 430; GFX6-NEXT: s_endpgm 431 %x = load i32, ptr addrspace(1) %in, align 4 432 %shl = shl i32 %x, 31 433 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 1, i32 31) 434 store i32 %bfe, ptr addrspace(1) %out, align 4 435 ret void 436} 437 438define amdgpu_kernel void @bfe_u32_test_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 439; GFX6-LABEL: bfe_u32_test_7: 440; GFX6: ; %bb.0: 441; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 442; GFX6-NEXT: s_waitcnt lgkmcnt(0) 443; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 444; GFX6-NEXT: s_mov_b32 s2, -1 445; GFX6-NEXT: s_waitcnt lgkmcnt(0) 446; GFX6-NEXT: s_lshl_b32 s3, s3, 31 447; GFX6-NEXT: s_bfe_u32 s3, s3, 0x1f0000 448; GFX6-NEXT: v_mov_b32_e32 v0, s3 449; GFX6-NEXT: s_mov_b32 s3, 0xf000 450; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 451; GFX6-NEXT: s_endpgm 452 %x = load i32, ptr addrspace(1) %in, align 4 453 %shl = shl i32 %x, 31 454 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 31) 455 store i32 %bfe, ptr addrspace(1) %out, align 4 456 ret void 457} 458 459define amdgpu_kernel void @bfe_u32_test_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 460; GFX6-LABEL: bfe_u32_test_8: 461; GFX6: ; %bb.0: 462; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 463; GFX6-NEXT: s_waitcnt lgkmcnt(0) 464; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 465; GFX6-NEXT: s_mov_b32 s2, -1 466; GFX6-NEXT: s_waitcnt lgkmcnt(0) 467; GFX6-NEXT: s_lshl_b32 s3, s3, 31 468; GFX6-NEXT: s_bfe_u32 s3, s3, 0x1001f 469; GFX6-NEXT: v_mov_b32_e32 v0, s3 470; GFX6-NEXT: s_mov_b32 s3, 0xf000 471; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 472; GFX6-NEXT: s_endpgm 473 %x = load i32, ptr addrspace(1) %in, align 4 474 %shl = shl i32 %x, 31 475 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) 476 store i32 %bfe, ptr addrspace(1) %out, align 4 477 ret void 478} 479 480define amdgpu_kernel void @bfe_u32_test_9(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 481; GFX6-LABEL: bfe_u32_test_9: 482; GFX6: ; %bb.0: 483; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 484; GFX6-NEXT: s_waitcnt lgkmcnt(0) 485; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 486; GFX6-NEXT: s_mov_b32 s2, -1 487; GFX6-NEXT: s_waitcnt lgkmcnt(0) 488; GFX6-NEXT: s_bfe_u32 s3, s3, 0x1001f 489; GFX6-NEXT: v_mov_b32_e32 v0, s3 490; GFX6-NEXT: s_mov_b32 s3, 0xf000 491; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 492; GFX6-NEXT: s_endpgm 493 %x = load i32, ptr addrspace(1) %in, align 4 494 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 31, i32 1) 495 store i32 %bfe, ptr addrspace(1) %out, align 4 496 ret void 497} 498 499define amdgpu_kernel void @bfe_u32_test_10(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 500; GFX6-LABEL: bfe_u32_test_10: 501; GFX6: ; %bb.0: 502; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 503; GFX6-NEXT: s_waitcnt lgkmcnt(0) 504; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 505; GFX6-NEXT: s_mov_b32 s2, -1 506; GFX6-NEXT: s_waitcnt lgkmcnt(0) 507; GFX6-NEXT: s_bfe_u32 s3, s3, 0x1f0001 508; GFX6-NEXT: v_mov_b32_e32 v0, s3 509; GFX6-NEXT: s_mov_b32 s3, 0xf000 510; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 511; GFX6-NEXT: s_endpgm 512 %x = load i32, ptr addrspace(1) %in, align 4 513 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 1, i32 31) 514 store i32 %bfe, ptr addrspace(1) %out, align 4 515 ret void 516} 517 518define amdgpu_kernel void @bfe_u32_test_11(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 519; GFX6-LABEL: bfe_u32_test_11: 520; GFX6: ; %bb.0: 521; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 522; GFX6-NEXT: s_waitcnt lgkmcnt(0) 523; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 524; GFX6-NEXT: s_mov_b32 s2, -1 525; GFX6-NEXT: s_waitcnt lgkmcnt(0) 526; GFX6-NEXT: s_bfe_u32 s3, s3, 0x180008 527; GFX6-NEXT: v_mov_b32_e32 v0, s3 528; GFX6-NEXT: s_mov_b32 s3, 0xf000 529; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 530; GFX6-NEXT: s_endpgm 531 %x = load i32, ptr addrspace(1) %in, align 4 532 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 8, i32 24) 533 store i32 %bfe, ptr addrspace(1) %out, align 4 534 ret void 535} 536 537define amdgpu_kernel void @bfe_u32_test_12(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 538; GFX6-LABEL: bfe_u32_test_12: 539; GFX6: ; %bb.0: 540; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 541; GFX6-NEXT: s_waitcnt lgkmcnt(0) 542; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 543; GFX6-NEXT: s_mov_b32 s2, -1 544; GFX6-NEXT: s_waitcnt lgkmcnt(0) 545; GFX6-NEXT: s_bfe_u32 s3, s3, 0x80018 546; GFX6-NEXT: v_mov_b32_e32 v0, s3 547; GFX6-NEXT: s_mov_b32 s3, 0xf000 548; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 549; GFX6-NEXT: s_endpgm 550 %x = load i32, ptr addrspace(1) %in, align 4 551 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 24, i32 8) 552 store i32 %bfe, ptr addrspace(1) %out, align 4 553 ret void 554} 555 556; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} 557define amdgpu_kernel void @bfe_u32_test_13(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 558; GFX6-LABEL: bfe_u32_test_13: 559; GFX6: ; %bb.0: 560; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 561; GFX6-NEXT: s_waitcnt lgkmcnt(0) 562; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 563; GFX6-NEXT: s_mov_b32 s2, -1 564; GFX6-NEXT: s_waitcnt lgkmcnt(0) 565; GFX6-NEXT: s_ashr_i32 s3, s3, 31 566; GFX6-NEXT: s_bfe_u32 s3, s3, 0x1001f 567; GFX6-NEXT: v_mov_b32_e32 v0, s3 568; GFX6-NEXT: s_mov_b32 s3, 0xf000 569; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 570; GFX6-NEXT: s_endpgm 571 %x = load i32, ptr addrspace(1) %in, align 4 572 %shl = ashr i32 %x, 31 573 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) 574 store i32 %bfe, ptr addrspace(1) %out, align 4 ret void 575} 576 577define amdgpu_kernel void @bfe_u32_test_14(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 578; GFX6-LABEL: bfe_u32_test_14: 579; GFX6: ; %bb.0: 580; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 581; GFX6-NEXT: s_waitcnt lgkmcnt(0) 582; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 583; GFX6-NEXT: s_mov_b32 s2, -1 584; GFX6-NEXT: s_waitcnt lgkmcnt(0) 585; GFX6-NEXT: s_lshr_b32 s3, s3, 31 586; GFX6-NEXT: s_bfe_u32 s3, s3, 0x1001f 587; GFX6-NEXT: v_mov_b32_e32 v0, s3 588; GFX6-NEXT: s_mov_b32 s3, 0xf000 589; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 590; GFX6-NEXT: s_endpgm 591 %x = load i32, ptr addrspace(1) %in, align 4 592 %shl = lshr i32 %x, 31 593 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) 594 store i32 %bfe, ptr addrspace(1) %out, align 4 ret void 595} 596 597define amdgpu_kernel void @bfe_u32_constant_fold_test_0(ptr addrspace(1) %out) #0 { 598; GFX6-LABEL: bfe_u32_constant_fold_test_0: 599; GFX6: ; %bb.0: 600; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 601; GFX6-NEXT: s_bfe_u32 s2, 0, 0 602; GFX6-NEXT: v_mov_b32_e32 v0, s2 603; GFX6-NEXT: s_mov_b32 s2, -1 604; GFX6-NEXT: s_mov_b32 s3, 0xf000 605; GFX6-NEXT: s_waitcnt lgkmcnt(0) 606; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 607; GFX6-NEXT: s_endpgm 608 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0) 609 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 610 ret void 611} 612 613define amdgpu_kernel void @bfe_u32_constant_fold_test_1(ptr addrspace(1) %out) #0 { 614; GFX6-LABEL: bfe_u32_constant_fold_test_1: 615; GFX6: ; %bb.0: 616; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 617; GFX6-NEXT: s_bfe_u32 s2, 0x302e, 0 618; GFX6-NEXT: v_mov_b32_e32 v0, s2 619; GFX6-NEXT: s_mov_b32 s2, -1 620; GFX6-NEXT: s_mov_b32 s3, 0xf000 621; GFX6-NEXT: s_waitcnt lgkmcnt(0) 622; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 623; GFX6-NEXT: s_endpgm 624 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 12334, i32 0, i32 0) 625 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 626 ret void 627} 628 629define amdgpu_kernel void @bfe_u32_constant_fold_test_2(ptr addrspace(1) %out) #0 { 630; GFX6-LABEL: bfe_u32_constant_fold_test_2: 631; GFX6: ; %bb.0: 632; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 633; GFX6-NEXT: s_bfe_u32 s2, 0, 0x10000 634; GFX6-NEXT: v_mov_b32_e32 v0, s2 635; GFX6-NEXT: s_mov_b32 s2, -1 636; GFX6-NEXT: s_mov_b32 s3, 0xf000 637; GFX6-NEXT: s_waitcnt lgkmcnt(0) 638; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 639; GFX6-NEXT: s_endpgm 640 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 1) 641 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 642 ret void 643} 644 645define amdgpu_kernel void @bfe_u32_constant_fold_test_3(ptr addrspace(1) %out) #0 { 646; GFX6-LABEL: bfe_u32_constant_fold_test_3: 647; GFX6: ; %bb.0: 648; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 649; GFX6-NEXT: s_bfe_u32 s2, 1, 0x10000 650; GFX6-NEXT: v_mov_b32_e32 v0, s2 651; GFX6-NEXT: s_mov_b32 s2, -1 652; GFX6-NEXT: s_mov_b32 s3, 0xf000 653; GFX6-NEXT: s_waitcnt lgkmcnt(0) 654; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 655; GFX6-NEXT: s_endpgm 656 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 1, i32 0, i32 1) 657 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 658 ret void 659} 660 661define amdgpu_kernel void @bfe_u32_constant_fold_test_4(ptr addrspace(1) %out) #0 { 662; GFX6-LABEL: bfe_u32_constant_fold_test_4: 663; GFX6: ; %bb.0: 664; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 665; GFX6-NEXT: s_bfe_u32 s2, -1, 0x10000 666; GFX6-NEXT: v_mov_b32_e32 v0, s2 667; GFX6-NEXT: s_mov_b32 s2, -1 668; GFX6-NEXT: s_mov_b32 s3, 0xf000 669; GFX6-NEXT: s_waitcnt lgkmcnt(0) 670; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 671; GFX6-NEXT: s_endpgm 672 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 0, i32 1) 673 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 674 ret void 675} 676 677define amdgpu_kernel void @bfe_u32_constant_fold_test_5(ptr addrspace(1) %out) #0 { 678; GFX6-LABEL: bfe_u32_constant_fold_test_5: 679; GFX6: ; %bb.0: 680; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 681; GFX6-NEXT: s_mov_b32 s2, 0x10007 682; GFX6-NEXT: s_bfe_u32 s2, 0x80, s2 683; GFX6-NEXT: v_mov_b32_e32 v0, s2 684; GFX6-NEXT: s_mov_b32 s2, -1 685; GFX6-NEXT: s_mov_b32 s3, 0xf000 686; GFX6-NEXT: s_waitcnt lgkmcnt(0) 687; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 688; GFX6-NEXT: s_endpgm 689 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 7, i32 1) 690 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 691 ret void 692} 693 694define amdgpu_kernel void @bfe_u32_constant_fold_test_6(ptr addrspace(1) %out) #0 { 695; GFX6-LABEL: bfe_u32_constant_fold_test_6: 696; GFX6: ; %bb.0: 697; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 698; GFX6-NEXT: s_mov_b32 s2, 0x80000 699; GFX6-NEXT: s_bfe_u32 s2, 0x80, s2 700; GFX6-NEXT: v_mov_b32_e32 v0, s2 701; GFX6-NEXT: s_mov_b32 s2, -1 702; GFX6-NEXT: s_mov_b32 s3, 0xf000 703; GFX6-NEXT: s_waitcnt lgkmcnt(0) 704; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 705; GFX6-NEXT: s_endpgm 706 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 0, i32 8) 707 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 708 ret void 709} 710 711define amdgpu_kernel void @bfe_u32_constant_fold_test_7(ptr addrspace(1) %out) #0 { 712; GFX6-LABEL: bfe_u32_constant_fold_test_7: 713; GFX6: ; %bb.0: 714; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 715; GFX6-NEXT: s_mov_b32 s2, 0x80000 716; GFX6-NEXT: s_bfe_u32 s2, 0x7f, s2 717; GFX6-NEXT: v_mov_b32_e32 v0, s2 718; GFX6-NEXT: s_mov_b32 s2, -1 719; GFX6-NEXT: s_mov_b32 s3, 0xf000 720; GFX6-NEXT: s_waitcnt lgkmcnt(0) 721; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 722; GFX6-NEXT: s_endpgm 723 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 0, i32 8) 724 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 725 ret void 726} 727 728define amdgpu_kernel void @bfe_u32_constant_fold_test_8(ptr addrspace(1) %out) #0 { 729; GFX6-LABEL: bfe_u32_constant_fold_test_8: 730; GFX6: ; %bb.0: 731; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 732; GFX6-NEXT: s_mov_b32 s2, 0x80006 733; GFX6-NEXT: s_bfe_u32 s2, 0x7f, s2 734; GFX6-NEXT: v_mov_b32_e32 v0, s2 735; GFX6-NEXT: s_mov_b32 s2, -1 736; GFX6-NEXT: s_mov_b32 s3, 0xf000 737; GFX6-NEXT: s_waitcnt lgkmcnt(0) 738; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 739; GFX6-NEXT: s_endpgm 740 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 6, i32 8) 741 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 742 ret void 743} 744 745define amdgpu_kernel void @bfe_u32_constant_fold_test_9(ptr addrspace(1) %out) #0 { 746; GFX6-LABEL: bfe_u32_constant_fold_test_9: 747; GFX6: ; %bb.0: 748; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 749; GFX6-NEXT: s_mov_b32 s2, 0x80010 750; GFX6-NEXT: s_bfe_u32 s2, 0x10000, s2 751; GFX6-NEXT: v_mov_b32_e32 v0, s2 752; GFX6-NEXT: s_mov_b32 s2, -1 753; GFX6-NEXT: s_mov_b32 s3, 0xf000 754; GFX6-NEXT: s_waitcnt lgkmcnt(0) 755; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 756; GFX6-NEXT: s_endpgm 757 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65536, i32 16, i32 8) 758 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 759 ret void 760} 761 762define amdgpu_kernel void @bfe_u32_constant_fold_test_10(ptr addrspace(1) %out) #0 { 763; GFX6-LABEL: bfe_u32_constant_fold_test_10: 764; GFX6: ; %bb.0: 765; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 766; GFX6-NEXT: s_mov_b32 s2, 0x100010 767; GFX6-NEXT: s_bfe_u32 s2, 0xffff, s2 768; GFX6-NEXT: v_mov_b32_e32 v0, s2 769; GFX6-NEXT: s_mov_b32 s2, -1 770; GFX6-NEXT: s_mov_b32 s3, 0xf000 771; GFX6-NEXT: s_waitcnt lgkmcnt(0) 772; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 773; GFX6-NEXT: s_endpgm 774 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65535, i32 16, i32 16) 775 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 776 ret void 777} 778 779define amdgpu_kernel void @bfe_u32_constant_fold_test_11(ptr addrspace(1) %out) #0 { 780; GFX6-LABEL: bfe_u32_constant_fold_test_11: 781; GFX6: ; %bb.0: 782; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 783; GFX6-NEXT: s_mov_b32 s2, 0x40004 784; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 785; GFX6-NEXT: v_mov_b32_e32 v0, s2 786; GFX6-NEXT: s_mov_b32 s2, -1 787; GFX6-NEXT: s_mov_b32 s3, 0xf000 788; GFX6-NEXT: s_waitcnt lgkmcnt(0) 789; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 790; GFX6-NEXT: s_endpgm 791 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 4) 792 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 793 ret void 794} 795 796define amdgpu_kernel void @bfe_u32_constant_fold_test_12(ptr addrspace(1) %out) #0 { 797; GFX6-LABEL: bfe_u32_constant_fold_test_12: 798; GFX6: ; %bb.0: 799; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 800; GFX6-NEXT: s_mov_b32 s2, 0x1001f 801; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 802; GFX6-NEXT: v_mov_b32_e32 v0, s2 803; GFX6-NEXT: s_mov_b32 s2, -1 804; GFX6-NEXT: s_mov_b32 s3, 0xf000 805; GFX6-NEXT: s_waitcnt lgkmcnt(0) 806; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 807; GFX6-NEXT: s_endpgm 808 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 31, i32 1) 809 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 810 ret void 811} 812 813define amdgpu_kernel void @bfe_u32_constant_fold_test_13(ptr addrspace(1) %out) #0 { 814; GFX6-LABEL: bfe_u32_constant_fold_test_13: 815; GFX6: ; %bb.0: 816; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 817; GFX6-NEXT: s_mov_b32 s2, 0x100010 818; GFX6-NEXT: s_bfe_u32 s2, 0x1fffe, s2 819; GFX6-NEXT: v_mov_b32_e32 v0, s2 820; GFX6-NEXT: s_mov_b32 s2, -1 821; GFX6-NEXT: s_mov_b32 s3, 0xf000 822; GFX6-NEXT: s_waitcnt lgkmcnt(0) 823; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 824; GFX6-NEXT: s_endpgm 825 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 131070, i32 16, i32 16) 826 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 827 ret void 828} 829 830define amdgpu_kernel void @bfe_u32_constant_fold_test_14(ptr addrspace(1) %out) #0 { 831; GFX6-LABEL: bfe_u32_constant_fold_test_14: 832; GFX6: ; %bb.0: 833; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 834; GFX6-NEXT: s_mov_b32 s2, 0x1e0002 835; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 836; GFX6-NEXT: v_mov_b32_e32 v0, s2 837; GFX6-NEXT: s_mov_b32 s2, -1 838; GFX6-NEXT: s_mov_b32 s3, 0xf000 839; GFX6-NEXT: s_waitcnt lgkmcnt(0) 840; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 841; GFX6-NEXT: s_endpgm 842 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 2, i32 30) 843 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 844 ret void 845} 846 847define amdgpu_kernel void @bfe_u32_constant_fold_test_15(ptr addrspace(1) %out) #0 { 848; GFX6-LABEL: bfe_u32_constant_fold_test_15: 849; GFX6: ; %bb.0: 850; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 851; GFX6-NEXT: s_mov_b32 s2, 0x1c0004 852; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 853; GFX6-NEXT: v_mov_b32_e32 v0, s2 854; GFX6-NEXT: s_mov_b32 s2, -1 855; GFX6-NEXT: s_mov_b32 s3, 0xf000 856; GFX6-NEXT: s_waitcnt lgkmcnt(0) 857; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 858; GFX6-NEXT: s_endpgm 859 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 28) 860 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 861 ret void 862} 863 864define amdgpu_kernel void @bfe_u32_constant_fold_test_16(ptr addrspace(1) %out) #0 { 865; GFX6-LABEL: bfe_u32_constant_fold_test_16: 866; GFX6: ; %bb.0: 867; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 868; GFX6-NEXT: s_bfe_u32 s2, -1, 0x70001 869; GFX6-NEXT: v_mov_b32_e32 v0, s2 870; GFX6-NEXT: s_mov_b32 s2, -1 871; GFX6-NEXT: s_mov_b32 s3, 0xf000 872; GFX6-NEXT: s_waitcnt lgkmcnt(0) 873; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 874; GFX6-NEXT: s_endpgm 875 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 1, i32 7) 876 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 877 ret void 878} 879 880define amdgpu_kernel void @bfe_u32_constant_fold_test_17(ptr addrspace(1) %out) #0 { 881; GFX6-LABEL: bfe_u32_constant_fold_test_17: 882; GFX6: ; %bb.0: 883; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 884; GFX6-NEXT: s_mov_b32 s2, 0x1f0001 885; GFX6-NEXT: s_bfe_u32 s2, 0xff, s2 886; GFX6-NEXT: v_mov_b32_e32 v0, s2 887; GFX6-NEXT: s_mov_b32 s2, -1 888; GFX6-NEXT: s_mov_b32 s3, 0xf000 889; GFX6-NEXT: s_waitcnt lgkmcnt(0) 890; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 891; GFX6-NEXT: s_endpgm 892 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 1, i32 31) 893 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 894 ret void 895} 896 897define amdgpu_kernel void @bfe_u32_constant_fold_test_18(ptr addrspace(1) %out) #0 { 898; GFX6-LABEL: bfe_u32_constant_fold_test_18: 899; GFX6: ; %bb.0: 900; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 901; GFX6-NEXT: s_mov_b32 s2, 0x1001f 902; GFX6-NEXT: s_bfe_u32 s2, 0xff, s2 903; GFX6-NEXT: v_mov_b32_e32 v0, s2 904; GFX6-NEXT: s_mov_b32 s2, -1 905; GFX6-NEXT: s_mov_b32 s3, 0xf000 906; GFX6-NEXT: s_waitcnt lgkmcnt(0) 907; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 908; GFX6-NEXT: s_endpgm 909 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 31, i32 1) 910 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 911 ret void 912} 913 914; Make sure that SimplifyDemandedBits doesn't cause the and to be 915; reduced to the bits demanded by the bfe. 916 917; XXX: The operand to v_bfe_u32 could also just directly be the load register. 918define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(ptr addrspace(1) %out0, 919; GFX6-LABEL: simplify_bfe_u32_multi_use_arg: 920; GFX6: ; %bb.0: 921; GFX6-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4 922; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 923; GFX6-NEXT: s_waitcnt lgkmcnt(0) 924; GFX6-NEXT: s_load_dword s8, s[6:7], 0x0 925; GFX6-NEXT: s_mov_b64 s[4:5], s[0:1] 926; GFX6-NEXT: s_mov_b32 s6, -1 927; GFX6-NEXT: s_mov_b32 s7, 0xf000 928; GFX6-NEXT: s_waitcnt lgkmcnt(0) 929; GFX6-NEXT: s_and_b32 s0, s8, 63 930; GFX6-NEXT: s_bfe_u32 s1, s0, 0x20002 931; GFX6-NEXT: v_mov_b32_e32 v1, s1 932; GFX6-NEXT: v_mov_b32_e32 v0, s0 933; GFX6-NEXT: buffer_store_dword v1, off, s[4:7], 0 934; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3] 935; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 936; GFX6-NEXT: s_endpgm 937 ptr addrspace(1) %out1, 938 ptr addrspace(1) %in) #0 { 939 %src = load i32, ptr addrspace(1) %in, align 4 940 %and = and i32 %src, 63 941 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2) 942 store i32 %bfe_u32, ptr addrspace(1) %out0, align 4 943 store i32 %and, ptr addrspace(1) %out1, align 4 944 ret void 945} 946 947define amdgpu_kernel void @lshr_and(ptr addrspace(1) %out, i32 %a) #0 { 948; GFX6-LABEL: lshr_and: 949; GFX6: ; %bb.0: 950; GFX6-NEXT: s_load_dword s3, s[4:5], 0x2 951; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 952; GFX6-NEXT: s_mov_b32 s2, -1 953; GFX6-NEXT: s_waitcnt lgkmcnt(0) 954; GFX6-NEXT: s_bfe_u32 s3, s3, 0x30006 955; GFX6-NEXT: v_mov_b32_e32 v0, s3 956; GFX6-NEXT: s_mov_b32 s3, 0xf000 957; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 958; GFX6-NEXT: s_endpgm 959 %b = lshr i32 %a, 6 960 %c = and i32 %b, 7 961 store i32 %c, ptr addrspace(1) %out, align 8 962 ret void 963} 964 965define amdgpu_kernel void @v_lshr_and(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { 966; GFX6-LABEL: v_lshr_and: 967; GFX6: ; %bb.0: 968; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 969; GFX6-NEXT: s_waitcnt lgkmcnt(0) 970; GFX6-NEXT: s_lshr_b32 s3, s2, s3 971; GFX6-NEXT: s_and_b32 s3, s3, 7 972; GFX6-NEXT: s_mov_b32 s2, -1 973; GFX6-NEXT: v_mov_b32_e32 v0, s3 974; GFX6-NEXT: s_mov_b32 s3, 0xf000 975; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 976; GFX6-NEXT: s_endpgm 977 %c = lshr i32 %a, %b 978 %d = and i32 %c, 7 979 store i32 %d, ptr addrspace(1) %out, align 8 980 ret void 981} 982 983define amdgpu_kernel void @and_lshr(ptr addrspace(1) %out, i32 %a) #0 { 984; GFX6-LABEL: and_lshr: 985; GFX6: ; %bb.0: 986; GFX6-NEXT: s_load_dword s3, s[4:5], 0x2 987; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 988; GFX6-NEXT: s_mov_b32 s2, -1 989; GFX6-NEXT: s_waitcnt lgkmcnt(0) 990; GFX6-NEXT: s_bfe_u32 s3, s3, 0x30006 991; GFX6-NEXT: v_mov_b32_e32 v0, s3 992; GFX6-NEXT: s_mov_b32 s3, 0xf000 993; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 994; GFX6-NEXT: s_endpgm 995 %b = and i32 %a, 448 996 %c = lshr i32 %b, 6 997 store i32 %c, ptr addrspace(1) %out, align 8 998 ret void 999} 1000 1001define amdgpu_kernel void @and_lshr2(ptr addrspace(1) %out, i32 %a) #0 { 1002; GFX6-LABEL: and_lshr2: 1003; GFX6: ; %bb.0: 1004; GFX6-NEXT: s_load_dword s3, s[4:5], 0x2 1005; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1006; GFX6-NEXT: s_mov_b32 s2, -1 1007; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1008; GFX6-NEXT: s_bfe_u32 s3, s3, 0x30006 1009; GFX6-NEXT: v_mov_b32_e32 v0, s3 1010; GFX6-NEXT: s_mov_b32 s3, 0xf000 1011; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 1012; GFX6-NEXT: s_endpgm 1013 %b = and i32 %a, 511 1014 %c = lshr i32 %b, 6 1015 store i32 %c, ptr addrspace(1) %out, align 8 1016 ret void 1017} 1018 1019define amdgpu_kernel void @shl_lshr(ptr addrspace(1) %out, i32 %a) #0 { 1020; GFX6-LABEL: shl_lshr: 1021; GFX6: ; %bb.0: 1022; GFX6-NEXT: s_load_dword s3, s[4:5], 0x2 1023; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1024; GFX6-NEXT: s_mov_b32 s2, -1 1025; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1026; GFX6-NEXT: s_bfe_u32 s3, s3, 0x150002 1027; GFX6-NEXT: v_mov_b32_e32 v0, s3 1028; GFX6-NEXT: s_mov_b32 s3, 0xf000 1029; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 1030; GFX6-NEXT: s_endpgm 1031 %b = shl i32 %a, 9 1032 %c = lshr i32 %b, 11 1033 store i32 %c, ptr addrspace(1) %out, align 8 1034 ret void 1035} 1036 1037declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1 1038declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) #1 1039 1040attributes #0 = { nounwind } 1041attributes #1 = { nounwind readnone } 1042