1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI %s 3; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=VI %s 4 5define amdgpu_kernel void @bfe_u32_arg_arg_arg(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 { 6; SI-LABEL: bfe_u32_arg_arg_arg: 7; SI: ; %bb.0: 8; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 9; SI-NEXT: s_mov_b32 s7, 0xf000 10; SI-NEXT: s_mov_b32 s6, -1 11; SI-NEXT: s_waitcnt lgkmcnt(0) 12; SI-NEXT: s_mov_b32 s4, s0 13; SI-NEXT: s_mov_b32 s5, s1 14; SI-NEXT: v_mov_b32_e32 v0, s2 15; SI-NEXT: v_bfe_u32 v0, v0, s3, s3 16; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 17; SI-NEXT: s_endpgm 18; 19; VI-LABEL: bfe_u32_arg_arg_arg: 20; VI: ; %bb.0: 21; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 22; VI-NEXT: s_mov_b32 s7, 0xf000 23; VI-NEXT: s_mov_b32 s6, -1 24; VI-NEXT: s_waitcnt lgkmcnt(0) 25; VI-NEXT: v_mov_b32_e32 v0, s2 26; VI-NEXT: s_mov_b32 s4, s0 27; VI-NEXT: s_mov_b32 s5, s1 28; VI-NEXT: v_bfe_u32 v0, v0, s3, s3 29; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 30; VI-NEXT: s_endpgm 31 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src1) 32 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 33 ret void 34} 35 36define amdgpu_kernel void @bfe_u32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { 37; SI-LABEL: bfe_u32_arg_arg_imm: 38; SI: ; %bb.0: 39; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 40; SI-NEXT: s_mov_b32 s7, 0xf000 41; SI-NEXT: s_mov_b32 s6, -1 42; SI-NEXT: v_mov_b32_e32 v0, 0x7b 43; SI-NEXT: s_waitcnt lgkmcnt(0) 44; SI-NEXT: s_mov_b32 s4, s0 45; SI-NEXT: s_mov_b32 s5, s1 46; SI-NEXT: v_mov_b32_e32 v1, s3 47; SI-NEXT: v_bfe_u32 v0, s2, v1, v0 48; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 49; SI-NEXT: s_endpgm 50; 51; VI-LABEL: bfe_u32_arg_arg_imm: 52; VI: ; %bb.0: 53; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 54; VI-NEXT: v_mov_b32_e32 v1, 0x7b 55; VI-NEXT: s_mov_b32 s7, 0xf000 56; VI-NEXT: s_mov_b32 s6, -1 57; VI-NEXT: s_waitcnt lgkmcnt(0) 58; VI-NEXT: v_mov_b32_e32 v0, s3 59; VI-NEXT: s_mov_b32 s4, s0 60; VI-NEXT: s_mov_b32 s5, s1 61; VI-NEXT: v_bfe_u32 v0, s2, v0, v1 62; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 63; VI-NEXT: s_endpgm 64 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 123) 65 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 66 ret void 67} 68 69define amdgpu_kernel void @bfe_u32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0, i32 %src2) #0 { 70; SI-LABEL: bfe_u32_arg_imm_arg: 71; SI: ; %bb.0: 72; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 73; SI-NEXT: s_mov_b32 s7, 0xf000 74; SI-NEXT: s_mov_b32 s6, -1 75; SI-NEXT: v_mov_b32_e32 v0, 0x7b 76; SI-NEXT: s_waitcnt lgkmcnt(0) 77; SI-NEXT: s_mov_b32 s4, s0 78; SI-NEXT: s_mov_b32 s5, s1 79; SI-NEXT: v_mov_b32_e32 v1, s3 80; SI-NEXT: v_bfe_u32 v0, s2, v0, v1 81; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 82; SI-NEXT: s_endpgm 83; 84; VI-LABEL: bfe_u32_arg_imm_arg: 85; VI: ; %bb.0: 86; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 87; VI-NEXT: v_mov_b32_e32 v0, 0x7b 88; VI-NEXT: s_mov_b32 s7, 0xf000 89; VI-NEXT: s_mov_b32 s6, -1 90; VI-NEXT: s_waitcnt lgkmcnt(0) 91; VI-NEXT: v_mov_b32_e32 v1, s3 92; VI-NEXT: s_mov_b32 s4, s0 93; VI-NEXT: s_mov_b32 s5, s1 94; VI-NEXT: v_bfe_u32 v0, s2, v0, v1 95; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 96; VI-NEXT: s_endpgm 97 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 123, i32 %src2) 98 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 99 ret void 100} 101 102define amdgpu_kernel void @bfe_u32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1, i32 %src2) #0 { 103; SI-LABEL: bfe_u32_imm_arg_arg: 104; SI: ; %bb.0: 105; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 106; SI-NEXT: s_mov_b32 s7, 0xf000 107; SI-NEXT: s_mov_b32 s6, -1 108; SI-NEXT: s_movk_i32 s8, 0x7b 109; SI-NEXT: s_waitcnt lgkmcnt(0) 110; SI-NEXT: s_mov_b32 s4, s0 111; SI-NEXT: s_mov_b32 s5, s1 112; SI-NEXT: v_mov_b32_e32 v0, s2 113; SI-NEXT: v_mov_b32_e32 v1, s3 114; SI-NEXT: v_bfe_u32 v0, s8, v0, v1 115; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 116; SI-NEXT: s_endpgm 117; 118; VI-LABEL: bfe_u32_imm_arg_arg: 119; VI: ; %bb.0: 120; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 121; VI-NEXT: s_movk_i32 s8, 0x7b 122; VI-NEXT: s_mov_b32 s7, 0xf000 123; VI-NEXT: s_mov_b32 s6, -1 124; VI-NEXT: s_waitcnt lgkmcnt(0) 125; VI-NEXT: v_mov_b32_e32 v0, s2 126; VI-NEXT: v_mov_b32_e32 v1, s3 127; VI-NEXT: s_mov_b32 s4, s0 128; VI-NEXT: s_mov_b32 s5, s1 129; VI-NEXT: v_bfe_u32 v0, s8, v0, v1 130; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 131; VI-NEXT: s_endpgm 132 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 123, i32 %src1, i32 %src2) 133 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 134 ret void 135} 136 137define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { 138; SI-LABEL: bfe_u32_arg_0_width_reg_offset: 139; SI: ; %bb.0: 140; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 141; SI-NEXT: s_mov_b32 s3, 0xf000 142; SI-NEXT: s_mov_b32 s2, -1 143; SI-NEXT: v_mov_b32_e32 v0, 0 144; SI-NEXT: s_waitcnt lgkmcnt(0) 145; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 146; SI-NEXT: s_endpgm 147; 148; VI-LABEL: bfe_u32_arg_0_width_reg_offset: 149; VI: ; %bb.0: 150; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 151; VI-NEXT: s_mov_b32 s3, 0xf000 152; VI-NEXT: s_mov_b32 s2, -1 153; VI-NEXT: v_mov_b32_e32 v0, 0 154; VI-NEXT: s_waitcnt lgkmcnt(0) 155; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 156; VI-NEXT: s_endpgm 157 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 0) 158 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 159 ret void 160} 161 162define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { 163; SI-LABEL: bfe_u32_arg_0_width_imm_offset: 164; SI: ; %bb.0: 165; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 166; SI-NEXT: s_mov_b32 s3, 0xf000 167; SI-NEXT: s_mov_b32 s2, -1 168; SI-NEXT: v_mov_b32_e32 v0, 0 169; SI-NEXT: s_waitcnt lgkmcnt(0) 170; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 171; SI-NEXT: s_endpgm 172; 173; VI-LABEL: bfe_u32_arg_0_width_imm_offset: 174; VI: ; %bb.0: 175; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 176; VI-NEXT: s_mov_b32 s3, 0xf000 177; VI-NEXT: s_mov_b32 s2, -1 178; VI-NEXT: v_mov_b32_e32 v0, 0 179; VI-NEXT: s_waitcnt lgkmcnt(0) 180; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 181; VI-NEXT: s_endpgm 182 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 8, i32 0) 183 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 184 ret void 185} 186 187define amdgpu_kernel void @bfe_u32_zextload_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 188; SI-LABEL: bfe_u32_zextload_i8: 189; SI: ; %bb.0: 190; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 191; SI-NEXT: s_mov_b32 s7, 0xf000 192; SI-NEXT: s_mov_b32 s6, -1 193; SI-NEXT: s_mov_b32 s10, s6 194; SI-NEXT: s_mov_b32 s11, s7 195; SI-NEXT: s_waitcnt lgkmcnt(0) 196; SI-NEXT: s_mov_b32 s8, s2 197; SI-NEXT: s_mov_b32 s9, s3 198; SI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 199; SI-NEXT: s_mov_b32 s4, s0 200; SI-NEXT: s_mov_b32 s5, s1 201; SI-NEXT: s_waitcnt vmcnt(0) 202; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 203; SI-NEXT: s_endpgm 204; 205; VI-LABEL: bfe_u32_zextload_i8: 206; VI: ; %bb.0: 207; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 208; VI-NEXT: s_mov_b32 s7, 0xf000 209; VI-NEXT: s_mov_b32 s6, -1 210; VI-NEXT: s_mov_b32 s10, s6 211; VI-NEXT: s_mov_b32 s11, s7 212; VI-NEXT: s_waitcnt lgkmcnt(0) 213; VI-NEXT: s_mov_b32 s8, s2 214; VI-NEXT: s_mov_b32 s9, s3 215; VI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 216; VI-NEXT: s_mov_b32 s4, s0 217; VI-NEXT: s_mov_b32 s5, s1 218; VI-NEXT: s_waitcnt vmcnt(0) 219; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 220; VI-NEXT: s_endpgm 221 %load = load i8, ptr addrspace(1) %in 222 %ext = zext i8 %load to i32 223 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8) 224 store i32 %bfe, ptr addrspace(1) %out, align 4 225 ret void 226} 227 228; FIXME: Should be using s_add_i32 229define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 230; SI-LABEL: bfe_u32_zext_in_reg_i8: 231; SI: ; %bb.0: 232; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 233; SI-NEXT: s_mov_b32 s7, 0xf000 234; SI-NEXT: s_mov_b32 s6, -1 235; SI-NEXT: s_mov_b32 s10, s6 236; SI-NEXT: s_mov_b32 s11, s7 237; SI-NEXT: s_waitcnt lgkmcnt(0) 238; SI-NEXT: s_mov_b32 s8, s2 239; SI-NEXT: s_mov_b32 s9, s3 240; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 241; SI-NEXT: s_mov_b32 s4, s0 242; SI-NEXT: s_mov_b32 s5, s1 243; SI-NEXT: s_waitcnt vmcnt(0) 244; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0 245; SI-NEXT: v_and_b32_e32 v0, 0xff, v0 246; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 247; SI-NEXT: s_endpgm 248; 249; VI-LABEL: bfe_u32_zext_in_reg_i8: 250; VI: ; %bb.0: 251; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 252; VI-NEXT: s_mov_b32 s7, 0xf000 253; VI-NEXT: s_mov_b32 s6, -1 254; VI-NEXT: s_mov_b32 s10, s6 255; VI-NEXT: s_mov_b32 s11, s7 256; VI-NEXT: s_waitcnt lgkmcnt(0) 257; VI-NEXT: s_mov_b32 s8, s2 258; VI-NEXT: s_mov_b32 s9, s3 259; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 260; VI-NEXT: s_mov_b32 s4, s0 261; VI-NEXT: s_mov_b32 s5, s1 262; VI-NEXT: s_waitcnt vmcnt(0) 263; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0 264; VI-NEXT: v_and_b32_e32 v0, 0xff, v0 265; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 266; VI-NEXT: s_endpgm 267 %load = load i32, ptr addrspace(1) %in, align 4 268 %add = add i32 %load, 1 269 %ext = and i32 %add, 255 270 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8) 271 store i32 %bfe, ptr addrspace(1) %out, align 4 272 ret void 273} 274 275define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 276; SI-LABEL: bfe_u32_zext_in_reg_i16: 277; SI: ; %bb.0: 278; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 279; SI-NEXT: s_mov_b32 s7, 0xf000 280; SI-NEXT: s_mov_b32 s6, -1 281; SI-NEXT: s_mov_b32 s10, s6 282; SI-NEXT: s_mov_b32 s11, s7 283; SI-NEXT: s_waitcnt lgkmcnt(0) 284; SI-NEXT: s_mov_b32 s8, s2 285; SI-NEXT: s_mov_b32 s9, s3 286; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 287; SI-NEXT: s_mov_b32 s4, s0 288; SI-NEXT: s_mov_b32 s5, s1 289; SI-NEXT: s_waitcnt vmcnt(0) 290; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0 291; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 292; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 293; SI-NEXT: s_endpgm 294; 295; VI-LABEL: bfe_u32_zext_in_reg_i16: 296; VI: ; %bb.0: 297; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 298; VI-NEXT: s_mov_b32 s7, 0xf000 299; VI-NEXT: s_mov_b32 s6, -1 300; VI-NEXT: s_mov_b32 s10, s6 301; VI-NEXT: s_mov_b32 s11, s7 302; VI-NEXT: s_waitcnt lgkmcnt(0) 303; VI-NEXT: s_mov_b32 s8, s2 304; VI-NEXT: s_mov_b32 s9, s3 305; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 306; VI-NEXT: s_mov_b32 s4, s0 307; VI-NEXT: s_mov_b32 s5, s1 308; VI-NEXT: s_waitcnt vmcnt(0) 309; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0 310; VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 311; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 312; VI-NEXT: s_endpgm 313 %load = load i32, ptr addrspace(1) %in, align 4 314 %add = add i32 %load, 1 315 %ext = and i32 %add, 65535 316 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 16) 317 store i32 %bfe, ptr addrspace(1) %out, align 4 318 ret void 319} 320 321define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 322; SI-LABEL: bfe_u32_zext_in_reg_i8_offset_1: 323; SI: ; %bb.0: 324; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 325; SI-NEXT: s_mov_b32 s7, 0xf000 326; SI-NEXT: s_mov_b32 s6, -1 327; SI-NEXT: s_mov_b32 s10, s6 328; SI-NEXT: s_mov_b32 s11, s7 329; SI-NEXT: s_waitcnt lgkmcnt(0) 330; SI-NEXT: s_mov_b32 s8, s2 331; SI-NEXT: s_mov_b32 s9, s3 332; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 333; SI-NEXT: s_mov_b32 s4, s0 334; SI-NEXT: s_mov_b32 s5, s1 335; SI-NEXT: s_waitcnt vmcnt(0) 336; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0 337; SI-NEXT: v_and_b32_e32 v0, 0xfe, v0 338; SI-NEXT: v_bfe_u32 v0, v0, 1, 8 339; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 340; SI-NEXT: s_endpgm 341; 342; VI-LABEL: bfe_u32_zext_in_reg_i8_offset_1: 343; VI: ; %bb.0: 344; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 345; VI-NEXT: s_mov_b32 s7, 0xf000 346; VI-NEXT: s_mov_b32 s6, -1 347; VI-NEXT: s_mov_b32 s10, s6 348; VI-NEXT: s_mov_b32 s11, s7 349; VI-NEXT: s_waitcnt lgkmcnt(0) 350; VI-NEXT: s_mov_b32 s8, s2 351; VI-NEXT: s_mov_b32 s9, s3 352; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 353; VI-NEXT: s_mov_b32 s4, s0 354; VI-NEXT: s_mov_b32 s5, s1 355; VI-NEXT: s_waitcnt vmcnt(0) 356; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0 357; VI-NEXT: v_and_b32_e32 v0, 0xfe, v0 358; VI-NEXT: v_bfe_u32 v0, v0, 1, 8 359; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 360; VI-NEXT: s_endpgm 361 %load = load i32, ptr addrspace(1) %in, align 4 362 %add = add i32 %load, 1 363 %ext = and i32 %add, 255 364 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 1, i32 8) 365 store i32 %bfe, ptr addrspace(1) %out, align 4 366 ret void 367} 368 369define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 370; SI-LABEL: bfe_u32_zext_in_reg_i8_offset_3: 371; SI: ; %bb.0: 372; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 373; SI-NEXT: s_mov_b32 s7, 0xf000 374; SI-NEXT: s_mov_b32 s6, -1 375; SI-NEXT: s_mov_b32 s10, s6 376; SI-NEXT: s_mov_b32 s11, s7 377; SI-NEXT: s_waitcnt lgkmcnt(0) 378; SI-NEXT: s_mov_b32 s8, s2 379; SI-NEXT: s_mov_b32 s9, s3 380; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 381; SI-NEXT: s_mov_b32 s4, s0 382; SI-NEXT: s_mov_b32 s5, s1 383; SI-NEXT: s_waitcnt vmcnt(0) 384; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0 385; SI-NEXT: v_and_b32_e32 v0, 0xf8, v0 386; SI-NEXT: v_bfe_u32 v0, v0, 3, 8 387; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 388; SI-NEXT: s_endpgm 389; 390; VI-LABEL: bfe_u32_zext_in_reg_i8_offset_3: 391; VI: ; %bb.0: 392; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 393; VI-NEXT: s_mov_b32 s7, 0xf000 394; VI-NEXT: s_mov_b32 s6, -1 395; VI-NEXT: s_mov_b32 s10, s6 396; VI-NEXT: s_mov_b32 s11, s7 397; VI-NEXT: s_waitcnt lgkmcnt(0) 398; VI-NEXT: s_mov_b32 s8, s2 399; VI-NEXT: s_mov_b32 s9, s3 400; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 401; VI-NEXT: s_mov_b32 s4, s0 402; VI-NEXT: s_mov_b32 s5, s1 403; VI-NEXT: s_waitcnt vmcnt(0) 404; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0 405; VI-NEXT: v_and_b32_e32 v0, 0xf8, v0 406; VI-NEXT: v_bfe_u32 v0, v0, 3, 8 407; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 408; VI-NEXT: s_endpgm 409 %load = load i32, ptr addrspace(1) %in, align 4 410 %add = add i32 %load, 1 411 %ext = and i32 %add, 255 412 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 3, i32 8) 413 store i32 %bfe, ptr addrspace(1) %out, align 4 414 ret void 415} 416 417define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 418; SI-LABEL: bfe_u32_zext_in_reg_i8_offset_7: 419; SI: ; %bb.0: 420; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 421; SI-NEXT: s_mov_b32 s7, 0xf000 422; SI-NEXT: s_mov_b32 s6, -1 423; SI-NEXT: s_mov_b32 s10, s6 424; SI-NEXT: s_mov_b32 s11, s7 425; SI-NEXT: s_waitcnt lgkmcnt(0) 426; SI-NEXT: s_mov_b32 s8, s2 427; SI-NEXT: s_mov_b32 s9, s3 428; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 429; SI-NEXT: s_mov_b32 s4, s0 430; SI-NEXT: s_mov_b32 s5, s1 431; SI-NEXT: s_waitcnt vmcnt(0) 432; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0 433; SI-NEXT: v_and_b32_e32 v0, 0x80, v0 434; SI-NEXT: v_bfe_u32 v0, v0, 7, 8 435; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 436; SI-NEXT: s_endpgm 437; 438; VI-LABEL: bfe_u32_zext_in_reg_i8_offset_7: 439; VI: ; %bb.0: 440; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 441; VI-NEXT: s_mov_b32 s7, 0xf000 442; VI-NEXT: s_mov_b32 s6, -1 443; VI-NEXT: s_mov_b32 s10, s6 444; VI-NEXT: s_mov_b32 s11, s7 445; VI-NEXT: s_waitcnt lgkmcnt(0) 446; VI-NEXT: s_mov_b32 s8, s2 447; VI-NEXT: s_mov_b32 s9, s3 448; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 449; VI-NEXT: s_mov_b32 s4, s0 450; VI-NEXT: s_mov_b32 s5, s1 451; VI-NEXT: s_waitcnt vmcnt(0) 452; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0 453; VI-NEXT: v_and_b32_e32 v0, 0x80, v0 454; VI-NEXT: v_bfe_u32 v0, v0, 7, 8 455; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 456; VI-NEXT: s_endpgm 457 %load = load i32, ptr addrspace(1) %in, align 4 458 %add = add i32 %load, 1 459 %ext = and i32 %add, 255 460 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 7, i32 8) 461 store i32 %bfe, ptr addrspace(1) %out, align 4 462 ret void 463} 464 465define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 466; SI-LABEL: bfe_u32_zext_in_reg_i16_offset_8: 467; SI: ; %bb.0: 468; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 469; SI-NEXT: s_mov_b32 s7, 0xf000 470; SI-NEXT: s_mov_b32 s6, -1 471; SI-NEXT: s_mov_b32 s10, s6 472; SI-NEXT: s_mov_b32 s11, s7 473; SI-NEXT: s_waitcnt lgkmcnt(0) 474; SI-NEXT: s_mov_b32 s8, s2 475; SI-NEXT: s_mov_b32 s9, s3 476; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 477; SI-NEXT: s_mov_b32 s4, s0 478; SI-NEXT: s_mov_b32 s5, s1 479; SI-NEXT: s_waitcnt vmcnt(0) 480; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0 481; SI-NEXT: v_bfe_u32 v0, v0, 8, 8 482; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 483; SI-NEXT: s_endpgm 484; 485; VI-LABEL: bfe_u32_zext_in_reg_i16_offset_8: 486; VI: ; %bb.0: 487; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 488; VI-NEXT: s_mov_b32 s7, 0xf000 489; VI-NEXT: s_mov_b32 s6, -1 490; VI-NEXT: s_mov_b32 s10, s6 491; VI-NEXT: s_mov_b32 s11, s7 492; VI-NEXT: s_waitcnt lgkmcnt(0) 493; VI-NEXT: s_mov_b32 s8, s2 494; VI-NEXT: s_mov_b32 s9, s3 495; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 496; VI-NEXT: s_mov_b32 s4, s0 497; VI-NEXT: s_mov_b32 s5, s1 498; VI-NEXT: s_waitcnt vmcnt(0) 499; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0 500; VI-NEXT: v_bfe_u32 v0, v0, 8, 8 501; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 502; VI-NEXT: s_endpgm 503 %load = load i32, ptr addrspace(1) %in, align 4 504 %add = add i32 %load, 1 505 %ext = and i32 %add, 65535 506 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 8, i32 8) 507 store i32 %bfe, ptr addrspace(1) %out, align 4 508 ret void 509} 510 511define amdgpu_kernel void @bfe_u32_test_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 512; SI-LABEL: bfe_u32_test_1: 513; SI: ; %bb.0: 514; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 515; SI-NEXT: s_mov_b32 s7, 0xf000 516; SI-NEXT: s_mov_b32 s6, -1 517; SI-NEXT: s_mov_b32 s10, s6 518; SI-NEXT: s_mov_b32 s11, s7 519; SI-NEXT: s_waitcnt lgkmcnt(0) 520; SI-NEXT: s_mov_b32 s8, s2 521; SI-NEXT: s_mov_b32 s9, s3 522; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 523; SI-NEXT: s_mov_b32 s4, s0 524; SI-NEXT: s_mov_b32 s5, s1 525; SI-NEXT: s_waitcnt vmcnt(0) 526; SI-NEXT: v_and_b32_e32 v0, 1, v0 527; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 528; SI-NEXT: s_endpgm 529; 530; VI-LABEL: bfe_u32_test_1: 531; VI: ; %bb.0: 532; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 533; VI-NEXT: s_mov_b32 s7, 0xf000 534; VI-NEXT: s_mov_b32 s6, -1 535; VI-NEXT: s_mov_b32 s10, s6 536; VI-NEXT: s_mov_b32 s11, s7 537; VI-NEXT: s_waitcnt lgkmcnt(0) 538; VI-NEXT: s_mov_b32 s8, s2 539; VI-NEXT: s_mov_b32 s9, s3 540; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 541; VI-NEXT: s_mov_b32 s4, s0 542; VI-NEXT: s_mov_b32 s5, s1 543; VI-NEXT: s_waitcnt vmcnt(0) 544; VI-NEXT: v_and_b32_e32 v0, 1, v0 545; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 546; VI-NEXT: s_endpgm 547 %x = load i32, ptr addrspace(1) %in, align 4 548 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 0, i32 1) 549 store i32 %bfe, ptr addrspace(1) %out, align 4 550 ret void 551} 552 553define amdgpu_kernel void @bfe_u32_test_2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 554; SI-LABEL: bfe_u32_test_2: 555; SI: ; %bb.0: 556; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 557; SI-NEXT: s_waitcnt lgkmcnt(0) 558; SI-NEXT: s_mov_b32 s3, 0xf000 559; SI-NEXT: s_mov_b32 s2, -1 560; SI-NEXT: v_mov_b32_e32 v0, 0 561; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 562; SI-NEXT: s_endpgm 563; 564; VI-LABEL: bfe_u32_test_2: 565; VI: ; %bb.0: 566; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 567; VI-NEXT: s_waitcnt lgkmcnt(0) 568; VI-NEXT: s_mov_b32 s3, 0xf000 569; VI-NEXT: s_mov_b32 s2, -1 570; VI-NEXT: v_mov_b32_e32 v0, 0 571; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 572; VI-NEXT: s_endpgm 573 %x = load i32, ptr addrspace(1) %in, align 4 574 %shl = shl i32 %x, 31 575 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 8) 576 store i32 %bfe, ptr addrspace(1) %out, align 4 577 ret void 578} 579 580define amdgpu_kernel void @bfe_u32_test_3(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 581; SI-LABEL: bfe_u32_test_3: 582; SI: ; %bb.0: 583; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 584; SI-NEXT: s_waitcnt lgkmcnt(0) 585; SI-NEXT: s_mov_b32 s3, 0xf000 586; SI-NEXT: s_mov_b32 s2, -1 587; SI-NEXT: v_mov_b32_e32 v0, 0 588; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 589; SI-NEXT: s_endpgm 590; 591; VI-LABEL: bfe_u32_test_3: 592; VI: ; %bb.0: 593; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 594; VI-NEXT: s_waitcnt lgkmcnt(0) 595; VI-NEXT: s_mov_b32 s3, 0xf000 596; VI-NEXT: s_mov_b32 s2, -1 597; VI-NEXT: v_mov_b32_e32 v0, 0 598; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 599; VI-NEXT: s_endpgm 600 %x = load i32, ptr addrspace(1) %in, align 4 601 %shl = shl i32 %x, 31 602 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 1) 603 store i32 %bfe, ptr addrspace(1) %out, align 4 604 ret void 605} 606 607define amdgpu_kernel void @bfe_u32_test_4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 608; SI-LABEL: bfe_u32_test_4: 609; SI: ; %bb.0: 610; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 611; SI-NEXT: s_waitcnt lgkmcnt(0) 612; SI-NEXT: s_mov_b32 s3, 0xf000 613; SI-NEXT: s_mov_b32 s2, -1 614; SI-NEXT: v_mov_b32_e32 v0, 0 615; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 616; SI-NEXT: s_endpgm 617; 618; VI-LABEL: bfe_u32_test_4: 619; VI: ; %bb.0: 620; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 621; VI-NEXT: s_waitcnt lgkmcnt(0) 622; VI-NEXT: s_mov_b32 s3, 0xf000 623; VI-NEXT: s_mov_b32 s2, -1 624; VI-NEXT: v_mov_b32_e32 v0, 0 625; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 626; VI-NEXT: s_endpgm 627 %x = load i32, ptr addrspace(1) %in, align 4 628 %shl = shl i32 %x, 31 629 %shr = lshr i32 %shl, 31 630 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 31, i32 1) 631 store i32 %bfe, ptr addrspace(1) %out, align 4 632 ret void 633} 634 635define amdgpu_kernel void @bfe_u32_test_5(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 636; SI-LABEL: bfe_u32_test_5: 637; SI: ; %bb.0: 638; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 639; SI-NEXT: s_mov_b32 s7, 0xf000 640; SI-NEXT: s_mov_b32 s6, -1 641; SI-NEXT: s_mov_b32 s10, s6 642; SI-NEXT: s_mov_b32 s11, s7 643; SI-NEXT: s_waitcnt lgkmcnt(0) 644; SI-NEXT: s_mov_b32 s8, s2 645; SI-NEXT: s_mov_b32 s9, s3 646; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 647; SI-NEXT: s_mov_b32 s4, s0 648; SI-NEXT: s_mov_b32 s5, s1 649; SI-NEXT: s_waitcnt vmcnt(0) 650; SI-NEXT: v_bfe_i32 v0, v0, 0, 1 651; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 652; SI-NEXT: s_endpgm 653; 654; VI-LABEL: bfe_u32_test_5: 655; VI: ; %bb.0: 656; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 657; VI-NEXT: s_mov_b32 s7, 0xf000 658; VI-NEXT: s_mov_b32 s6, -1 659; VI-NEXT: s_mov_b32 s10, s6 660; VI-NEXT: s_mov_b32 s11, s7 661; VI-NEXT: s_waitcnt lgkmcnt(0) 662; VI-NEXT: s_mov_b32 s8, s2 663; VI-NEXT: s_mov_b32 s9, s3 664; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 665; VI-NEXT: s_mov_b32 s4, s0 666; VI-NEXT: s_mov_b32 s5, s1 667; VI-NEXT: s_waitcnt vmcnt(0) 668; VI-NEXT: v_bfe_i32 v0, v0, 0, 1 669; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 670; VI-NEXT: s_endpgm 671 %x = load i32, ptr addrspace(1) %in, align 4 672 %shl = shl i32 %x, 31 673 %shr = ashr i32 %shl, 31 674 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 0, i32 1) 675 store i32 %bfe, ptr addrspace(1) %out, align 4 676 ret void 677} 678 679define amdgpu_kernel void @bfe_u32_test_6(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 680; SI-LABEL: bfe_u32_test_6: 681; SI: ; %bb.0: 682; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 683; SI-NEXT: s_mov_b32 s7, 0xf000 684; SI-NEXT: s_mov_b32 s6, -1 685; SI-NEXT: s_mov_b32 s10, s6 686; SI-NEXT: s_mov_b32 s11, s7 687; SI-NEXT: s_waitcnt lgkmcnt(0) 688; SI-NEXT: s_mov_b32 s8, s2 689; SI-NEXT: s_mov_b32 s9, s3 690; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 691; SI-NEXT: s_mov_b32 s4, s0 692; SI-NEXT: s_mov_b32 s5, s1 693; SI-NEXT: s_waitcnt vmcnt(0) 694; SI-NEXT: v_lshlrev_b32_e32 v0, 30, v0 695; SI-NEXT: v_and_b32_e32 v0, 2.0, v0 696; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 697; SI-NEXT: s_endpgm 698; 699; VI-LABEL: bfe_u32_test_6: 700; VI: ; %bb.0: 701; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 702; VI-NEXT: s_mov_b32 s7, 0xf000 703; VI-NEXT: s_mov_b32 s6, -1 704; VI-NEXT: s_mov_b32 s10, s6 705; VI-NEXT: s_mov_b32 s11, s7 706; VI-NEXT: s_waitcnt lgkmcnt(0) 707; VI-NEXT: s_mov_b32 s8, s2 708; VI-NEXT: s_mov_b32 s9, s3 709; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 710; VI-NEXT: s_mov_b32 s4, s0 711; VI-NEXT: s_mov_b32 s5, s1 712; VI-NEXT: s_waitcnt vmcnt(0) 713; VI-NEXT: v_lshlrev_b32_e32 v0, 30, v0 714; VI-NEXT: v_and_b32_e32 v0, 2.0, v0 715; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 716; VI-NEXT: s_endpgm 717 %x = load i32, ptr addrspace(1) %in, align 4 718 %shl = shl i32 %x, 31 719 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 1, i32 31) 720 store i32 %bfe, ptr addrspace(1) %out, align 4 721 ret void 722} 723 724define amdgpu_kernel void @bfe_u32_test_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 725; SI-LABEL: bfe_u32_test_7: 726; SI: ; %bb.0: 727; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 728; SI-NEXT: s_mov_b32 s7, 0xf000 729; SI-NEXT: s_mov_b32 s6, -1 730; SI-NEXT: s_mov_b32 s10, s6 731; SI-NEXT: s_mov_b32 s11, s7 732; SI-NEXT: s_waitcnt lgkmcnt(0) 733; SI-NEXT: s_mov_b32 s8, s2 734; SI-NEXT: s_mov_b32 s9, s3 735; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 736; SI-NEXT: s_mov_b32 s4, s0 737; SI-NEXT: s_mov_b32 s5, s1 738; SI-NEXT: s_waitcnt vmcnt(0) 739; SI-NEXT: v_lshlrev_b32_e32 v0, 31, v0 740; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 741; SI-NEXT: s_endpgm 742; 743; VI-LABEL: bfe_u32_test_7: 744; VI: ; %bb.0: 745; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 746; VI-NEXT: s_mov_b32 s7, 0xf000 747; VI-NEXT: s_mov_b32 s6, -1 748; VI-NEXT: s_mov_b32 s10, s6 749; VI-NEXT: s_mov_b32 s11, s7 750; VI-NEXT: s_waitcnt lgkmcnt(0) 751; VI-NEXT: s_mov_b32 s8, s2 752; VI-NEXT: s_mov_b32 s9, s3 753; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 754; VI-NEXT: s_mov_b32 s4, s0 755; VI-NEXT: s_mov_b32 s5, s1 756; VI-NEXT: s_waitcnt vmcnt(0) 757; VI-NEXT: v_lshlrev_b32_e32 v0, 31, v0 758; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 759; VI-NEXT: s_endpgm 760 %x = load i32, ptr addrspace(1) %in, align 4 761 %shl = shl i32 %x, 31 762 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 31) 763 store i32 %bfe, ptr addrspace(1) %out, align 4 764 ret void 765} 766 767define amdgpu_kernel void @bfe_u32_test_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 768; SI-LABEL: bfe_u32_test_8: 769; SI: ; %bb.0: 770; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 771; SI-NEXT: s_mov_b32 s7, 0xf000 772; SI-NEXT: s_mov_b32 s6, -1 773; SI-NEXT: s_mov_b32 s10, s6 774; SI-NEXT: s_mov_b32 s11, s7 775; SI-NEXT: s_waitcnt lgkmcnt(0) 776; SI-NEXT: s_mov_b32 s8, s2 777; SI-NEXT: s_mov_b32 s9, s3 778; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 779; SI-NEXT: s_mov_b32 s4, s0 780; SI-NEXT: s_mov_b32 s5, s1 781; SI-NEXT: s_waitcnt vmcnt(0) 782; SI-NEXT: v_and_b32_e32 v0, 1, v0 783; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 784; SI-NEXT: s_endpgm 785; 786; VI-LABEL: bfe_u32_test_8: 787; VI: ; %bb.0: 788; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 789; VI-NEXT: s_mov_b32 s7, 0xf000 790; VI-NEXT: s_mov_b32 s6, -1 791; VI-NEXT: s_mov_b32 s10, s6 792; VI-NEXT: s_mov_b32 s11, s7 793; VI-NEXT: s_waitcnt lgkmcnt(0) 794; VI-NEXT: s_mov_b32 s8, s2 795; VI-NEXT: s_mov_b32 s9, s3 796; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 797; VI-NEXT: s_mov_b32 s4, s0 798; VI-NEXT: s_mov_b32 s5, s1 799; VI-NEXT: s_waitcnt vmcnt(0) 800; VI-NEXT: v_and_b32_e32 v0, 1, v0 801; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 802; VI-NEXT: s_endpgm 803 %x = load i32, ptr addrspace(1) %in, align 4 804 %shl = shl i32 %x, 31 805 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) 806 store i32 %bfe, ptr addrspace(1) %out, align 4 807 ret void 808} 809 810define amdgpu_kernel void @bfe_u32_test_9(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 811; SI-LABEL: bfe_u32_test_9: 812; SI: ; %bb.0: 813; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 814; SI-NEXT: s_mov_b32 s7, 0xf000 815; SI-NEXT: s_mov_b32 s6, -1 816; SI-NEXT: s_mov_b32 s10, s6 817; SI-NEXT: s_mov_b32 s11, s7 818; SI-NEXT: s_waitcnt lgkmcnt(0) 819; SI-NEXT: s_mov_b32 s8, s2 820; SI-NEXT: s_mov_b32 s9, s3 821; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 822; SI-NEXT: s_mov_b32 s4, s0 823; SI-NEXT: s_mov_b32 s5, s1 824; SI-NEXT: s_waitcnt vmcnt(0) 825; SI-NEXT: v_lshrrev_b32_e32 v0, 31, v0 826; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 827; SI-NEXT: s_endpgm 828; 829; VI-LABEL: bfe_u32_test_9: 830; VI: ; %bb.0: 831; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 832; VI-NEXT: s_mov_b32 s7, 0xf000 833; VI-NEXT: s_mov_b32 s6, -1 834; VI-NEXT: s_mov_b32 s10, s6 835; VI-NEXT: s_mov_b32 s11, s7 836; VI-NEXT: s_waitcnt lgkmcnt(0) 837; VI-NEXT: s_mov_b32 s8, s2 838; VI-NEXT: s_mov_b32 s9, s3 839; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 840; VI-NEXT: s_mov_b32 s4, s0 841; VI-NEXT: s_mov_b32 s5, s1 842; VI-NEXT: s_waitcnt vmcnt(0) 843; VI-NEXT: v_lshrrev_b32_e32 v0, 31, v0 844; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 845; VI-NEXT: s_endpgm 846 %x = load i32, ptr addrspace(1) %in, align 4 847 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 31, i32 1) 848 store i32 %bfe, ptr addrspace(1) %out, align 4 849 ret void 850} 851 852define amdgpu_kernel void @bfe_u32_test_10(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 853; SI-LABEL: bfe_u32_test_10: 854; SI: ; %bb.0: 855; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 856; SI-NEXT: s_mov_b32 s7, 0xf000 857; SI-NEXT: s_mov_b32 s6, -1 858; SI-NEXT: s_mov_b32 s10, s6 859; SI-NEXT: s_mov_b32 s11, s7 860; SI-NEXT: s_waitcnt lgkmcnt(0) 861; SI-NEXT: s_mov_b32 s8, s2 862; SI-NEXT: s_mov_b32 s9, s3 863; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 864; SI-NEXT: s_mov_b32 s4, s0 865; SI-NEXT: s_mov_b32 s5, s1 866; SI-NEXT: s_waitcnt vmcnt(0) 867; SI-NEXT: v_lshrrev_b32_e32 v0, 1, v0 868; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 869; SI-NEXT: s_endpgm 870; 871; VI-LABEL: bfe_u32_test_10: 872; VI: ; %bb.0: 873; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 874; VI-NEXT: s_mov_b32 s7, 0xf000 875; VI-NEXT: s_mov_b32 s6, -1 876; VI-NEXT: s_mov_b32 s10, s6 877; VI-NEXT: s_mov_b32 s11, s7 878; VI-NEXT: s_waitcnt lgkmcnt(0) 879; VI-NEXT: s_mov_b32 s8, s2 880; VI-NEXT: s_mov_b32 s9, s3 881; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 882; VI-NEXT: s_mov_b32 s4, s0 883; VI-NEXT: s_mov_b32 s5, s1 884; VI-NEXT: s_waitcnt vmcnt(0) 885; VI-NEXT: v_lshrrev_b32_e32 v0, 1, v0 886; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 887; VI-NEXT: s_endpgm 888 %x = load i32, ptr addrspace(1) %in, align 4 889 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 1, i32 31) 890 store i32 %bfe, ptr addrspace(1) %out, align 4 891 ret void 892} 893 894define amdgpu_kernel void @bfe_u32_test_11(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 895; SI-LABEL: bfe_u32_test_11: 896; SI: ; %bb.0: 897; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 898; SI-NEXT: s_mov_b32 s7, 0xf000 899; SI-NEXT: s_mov_b32 s6, -1 900; SI-NEXT: s_mov_b32 s10, s6 901; SI-NEXT: s_mov_b32 s11, s7 902; SI-NEXT: s_waitcnt lgkmcnt(0) 903; SI-NEXT: s_mov_b32 s8, s2 904; SI-NEXT: s_mov_b32 s9, s3 905; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 906; SI-NEXT: s_mov_b32 s4, s0 907; SI-NEXT: s_mov_b32 s5, s1 908; SI-NEXT: s_waitcnt vmcnt(0) 909; SI-NEXT: v_lshrrev_b32_e32 v0, 8, v0 910; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 911; SI-NEXT: s_endpgm 912; 913; VI-LABEL: bfe_u32_test_11: 914; VI: ; %bb.0: 915; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 916; VI-NEXT: s_mov_b32 s7, 0xf000 917; VI-NEXT: s_mov_b32 s6, -1 918; VI-NEXT: s_mov_b32 s10, s6 919; VI-NEXT: s_mov_b32 s11, s7 920; VI-NEXT: s_waitcnt lgkmcnt(0) 921; VI-NEXT: s_mov_b32 s8, s2 922; VI-NEXT: s_mov_b32 s9, s3 923; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 924; VI-NEXT: s_mov_b32 s4, s0 925; VI-NEXT: s_mov_b32 s5, s1 926; VI-NEXT: s_waitcnt vmcnt(0) 927; VI-NEXT: v_lshrrev_b32_e32 v0, 8, v0 928; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 929; VI-NEXT: s_endpgm 930 %x = load i32, ptr addrspace(1) %in, align 4 931 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 8, i32 24) 932 store i32 %bfe, ptr addrspace(1) %out, align 4 933 ret void 934} 935 936define amdgpu_kernel void @bfe_u32_test_12(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 937; SI-LABEL: bfe_u32_test_12: 938; SI: ; %bb.0: 939; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 940; SI-NEXT: s_mov_b32 s7, 0xf000 941; SI-NEXT: s_mov_b32 s6, -1 942; SI-NEXT: s_mov_b32 s10, s6 943; SI-NEXT: s_mov_b32 s11, s7 944; SI-NEXT: s_waitcnt lgkmcnt(0) 945; SI-NEXT: s_mov_b32 s8, s2 946; SI-NEXT: s_mov_b32 s9, s3 947; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 948; SI-NEXT: s_mov_b32 s4, s0 949; SI-NEXT: s_mov_b32 s5, s1 950; SI-NEXT: s_waitcnt vmcnt(0) 951; SI-NEXT: v_lshrrev_b32_e32 v0, 24, v0 952; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 953; SI-NEXT: s_endpgm 954; 955; VI-LABEL: bfe_u32_test_12: 956; VI: ; %bb.0: 957; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 958; VI-NEXT: s_mov_b32 s7, 0xf000 959; VI-NEXT: s_mov_b32 s6, -1 960; VI-NEXT: s_mov_b32 s10, s6 961; VI-NEXT: s_mov_b32 s11, s7 962; VI-NEXT: s_waitcnt lgkmcnt(0) 963; VI-NEXT: s_mov_b32 s8, s2 964; VI-NEXT: s_mov_b32 s9, s3 965; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 966; VI-NEXT: s_mov_b32 s4, s0 967; VI-NEXT: s_mov_b32 s5, s1 968; VI-NEXT: s_waitcnt vmcnt(0) 969; VI-NEXT: v_lshrrev_b32_e32 v0, 24, v0 970; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 971; VI-NEXT: s_endpgm 972 %x = load i32, ptr addrspace(1) %in, align 4 973 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 24, i32 8) 974 store i32 %bfe, ptr addrspace(1) %out, align 4 975 ret void 976} 977 978; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} 979define amdgpu_kernel void @bfe_u32_test_13(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 980; SI-LABEL: bfe_u32_test_13: 981; SI: ; %bb.0: 982; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 983; SI-NEXT: s_mov_b32 s7, 0xf000 984; SI-NEXT: s_mov_b32 s6, -1 985; SI-NEXT: s_mov_b32 s10, s6 986; SI-NEXT: s_mov_b32 s11, s7 987; SI-NEXT: s_waitcnt lgkmcnt(0) 988; SI-NEXT: s_mov_b32 s8, s2 989; SI-NEXT: s_mov_b32 s9, s3 990; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 991; SI-NEXT: s_mov_b32 s4, s0 992; SI-NEXT: s_mov_b32 s5, s1 993; SI-NEXT: s_waitcnt vmcnt(0) 994; SI-NEXT: v_lshrrev_b32_e32 v0, 31, v0 995; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 996; SI-NEXT: s_endpgm 997; 998; VI-LABEL: bfe_u32_test_13: 999; VI: ; %bb.0: 1000; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1001; VI-NEXT: s_mov_b32 s7, 0xf000 1002; VI-NEXT: s_mov_b32 s6, -1 1003; VI-NEXT: s_mov_b32 s10, s6 1004; VI-NEXT: s_mov_b32 s11, s7 1005; VI-NEXT: s_waitcnt lgkmcnt(0) 1006; VI-NEXT: s_mov_b32 s8, s2 1007; VI-NEXT: s_mov_b32 s9, s3 1008; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 1009; VI-NEXT: s_mov_b32 s4, s0 1010; VI-NEXT: s_mov_b32 s5, s1 1011; VI-NEXT: s_waitcnt vmcnt(0) 1012; VI-NEXT: v_lshrrev_b32_e32 v0, 31, v0 1013; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 1014; VI-NEXT: s_endpgm 1015 %x = load i32, ptr addrspace(1) %in, align 4 1016 %shl = ashr i32 %x, 31 1017 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) 1018 store i32 %bfe, ptr addrspace(1) %out, align 4 ret void 1019} 1020 1021define amdgpu_kernel void @bfe_u32_test_14(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 1022; SI-LABEL: bfe_u32_test_14: 1023; SI: ; %bb.0: 1024; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1025; SI-NEXT: s_waitcnt lgkmcnt(0) 1026; SI-NEXT: s_mov_b32 s3, 0xf000 1027; SI-NEXT: s_mov_b32 s2, -1 1028; SI-NEXT: v_mov_b32_e32 v0, 0 1029; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1030; SI-NEXT: s_endpgm 1031; 1032; VI-LABEL: bfe_u32_test_14: 1033; VI: ; %bb.0: 1034; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1035; VI-NEXT: s_waitcnt lgkmcnt(0) 1036; VI-NEXT: s_mov_b32 s3, 0xf000 1037; VI-NEXT: s_mov_b32 s2, -1 1038; VI-NEXT: v_mov_b32_e32 v0, 0 1039; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1040; VI-NEXT: s_endpgm 1041 %x = load i32, ptr addrspace(1) %in, align 4 1042 %shl = lshr i32 %x, 31 1043 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) 1044 store i32 %bfe, ptr addrspace(1) %out, align 4 ret void 1045} 1046 1047define amdgpu_kernel void @bfe_u32_constant_fold_test_0(ptr addrspace(1) %out) #0 { 1048; SI-LABEL: bfe_u32_constant_fold_test_0: 1049; SI: ; %bb.0: 1050; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1051; SI-NEXT: s_mov_b32 s3, 0xf000 1052; SI-NEXT: s_mov_b32 s2, -1 1053; SI-NEXT: v_mov_b32_e32 v0, 0 1054; SI-NEXT: s_waitcnt lgkmcnt(0) 1055; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1056; SI-NEXT: s_endpgm 1057; 1058; VI-LABEL: bfe_u32_constant_fold_test_0: 1059; VI: ; %bb.0: 1060; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1061; VI-NEXT: s_mov_b32 s3, 0xf000 1062; VI-NEXT: s_mov_b32 s2, -1 1063; VI-NEXT: v_mov_b32_e32 v0, 0 1064; VI-NEXT: s_waitcnt lgkmcnt(0) 1065; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1066; VI-NEXT: s_endpgm 1067 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0) 1068 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 1069 ret void 1070} 1071 1072define amdgpu_kernel void @bfe_u32_constant_fold_test_1(ptr addrspace(1) %out) #0 { 1073; SI-LABEL: bfe_u32_constant_fold_test_1: 1074; SI: ; %bb.0: 1075; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1076; SI-NEXT: s_mov_b32 s3, 0xf000 1077; SI-NEXT: s_mov_b32 s2, -1 1078; SI-NEXT: v_mov_b32_e32 v0, 0 1079; SI-NEXT: s_waitcnt lgkmcnt(0) 1080; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1081; SI-NEXT: s_endpgm 1082; 1083; VI-LABEL: bfe_u32_constant_fold_test_1: 1084; VI: ; %bb.0: 1085; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1086; VI-NEXT: s_mov_b32 s3, 0xf000 1087; VI-NEXT: s_mov_b32 s2, -1 1088; VI-NEXT: v_mov_b32_e32 v0, 0 1089; VI-NEXT: s_waitcnt lgkmcnt(0) 1090; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1091; VI-NEXT: s_endpgm 1092 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 12334, i32 0, i32 0) 1093 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 1094 ret void 1095} 1096 1097define amdgpu_kernel void @bfe_u32_constant_fold_test_2(ptr addrspace(1) %out) #0 { 1098; SI-LABEL: bfe_u32_constant_fold_test_2: 1099; SI: ; %bb.0: 1100; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1101; SI-NEXT: s_mov_b32 s3, 0xf000 1102; SI-NEXT: s_mov_b32 s2, -1 1103; SI-NEXT: v_mov_b32_e32 v0, 0 1104; SI-NEXT: s_waitcnt lgkmcnt(0) 1105; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1106; SI-NEXT: s_endpgm 1107; 1108; VI-LABEL: bfe_u32_constant_fold_test_2: 1109; VI: ; %bb.0: 1110; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1111; VI-NEXT: s_mov_b32 s3, 0xf000 1112; VI-NEXT: s_mov_b32 s2, -1 1113; VI-NEXT: v_mov_b32_e32 v0, 0 1114; VI-NEXT: s_waitcnt lgkmcnt(0) 1115; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1116; VI-NEXT: s_endpgm 1117 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 1) 1118 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 1119 ret void 1120} 1121 1122define amdgpu_kernel void @bfe_u32_constant_fold_test_3(ptr addrspace(1) %out) #0 { 1123; SI-LABEL: bfe_u32_constant_fold_test_3: 1124; SI: ; %bb.0: 1125; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1126; SI-NEXT: s_mov_b32 s3, 0xf000 1127; SI-NEXT: s_mov_b32 s2, -1 1128; SI-NEXT: v_mov_b32_e32 v0, 1 1129; SI-NEXT: s_waitcnt lgkmcnt(0) 1130; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1131; SI-NEXT: s_endpgm 1132; 1133; VI-LABEL: bfe_u32_constant_fold_test_3: 1134; VI: ; %bb.0: 1135; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1136; VI-NEXT: s_mov_b32 s3, 0xf000 1137; VI-NEXT: s_mov_b32 s2, -1 1138; VI-NEXT: v_mov_b32_e32 v0, 1 1139; VI-NEXT: s_waitcnt lgkmcnt(0) 1140; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1141; VI-NEXT: s_endpgm 1142 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 1, i32 0, i32 1) 1143 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 1144 ret void 1145} 1146 1147define amdgpu_kernel void @bfe_u32_constant_fold_test_4(ptr addrspace(1) %out) #0 { 1148; SI-LABEL: bfe_u32_constant_fold_test_4: 1149; SI: ; %bb.0: 1150; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1151; SI-NEXT: s_mov_b32 s3, 0xf000 1152; SI-NEXT: s_mov_b32 s2, -1 1153; SI-NEXT: v_mov_b32_e32 v0, -1 1154; SI-NEXT: s_waitcnt lgkmcnt(0) 1155; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1156; SI-NEXT: s_endpgm 1157; 1158; VI-LABEL: bfe_u32_constant_fold_test_4: 1159; VI: ; %bb.0: 1160; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1161; VI-NEXT: s_mov_b32 s3, 0xf000 1162; VI-NEXT: s_mov_b32 s2, -1 1163; VI-NEXT: v_mov_b32_e32 v0, -1 1164; VI-NEXT: s_waitcnt lgkmcnt(0) 1165; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1166; VI-NEXT: s_endpgm 1167 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 0, i32 1) 1168 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 1169 ret void 1170} 1171 1172define amdgpu_kernel void @bfe_u32_constant_fold_test_5(ptr addrspace(1) %out) #0 { 1173; SI-LABEL: bfe_u32_constant_fold_test_5: 1174; SI: ; %bb.0: 1175; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1176; SI-NEXT: s_mov_b32 s3, 0xf000 1177; SI-NEXT: s_mov_b32 s2, -1 1178; SI-NEXT: v_mov_b32_e32 v0, 1 1179; SI-NEXT: s_waitcnt lgkmcnt(0) 1180; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1181; SI-NEXT: s_endpgm 1182; 1183; VI-LABEL: bfe_u32_constant_fold_test_5: 1184; VI: ; %bb.0: 1185; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1186; VI-NEXT: s_mov_b32 s3, 0xf000 1187; VI-NEXT: s_mov_b32 s2, -1 1188; VI-NEXT: v_mov_b32_e32 v0, 1 1189; VI-NEXT: s_waitcnt lgkmcnt(0) 1190; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1191; VI-NEXT: s_endpgm 1192 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 7, i32 1) 1193 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 1194 ret void 1195} 1196 1197define amdgpu_kernel void @bfe_u32_constant_fold_test_6(ptr addrspace(1) %out) #0 { 1198; SI-LABEL: bfe_u32_constant_fold_test_6: 1199; SI: ; %bb.0: 1200; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1201; SI-NEXT: s_mov_b32 s3, 0xf000 1202; SI-NEXT: s_mov_b32 s2, -1 1203; SI-NEXT: v_mov_b32_e32 v0, 0x80 1204; SI-NEXT: s_waitcnt lgkmcnt(0) 1205; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1206; SI-NEXT: s_endpgm 1207; 1208; VI-LABEL: bfe_u32_constant_fold_test_6: 1209; VI: ; %bb.0: 1210; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1211; VI-NEXT: s_mov_b32 s3, 0xf000 1212; VI-NEXT: s_mov_b32 s2, -1 1213; VI-NEXT: v_mov_b32_e32 v0, 0x80 1214; VI-NEXT: s_waitcnt lgkmcnt(0) 1215; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1216; VI-NEXT: s_endpgm 1217 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 0, i32 8) 1218 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 1219 ret void 1220} 1221 1222define amdgpu_kernel void @bfe_u32_constant_fold_test_7(ptr addrspace(1) %out) #0 { 1223; SI-LABEL: bfe_u32_constant_fold_test_7: 1224; SI: ; %bb.0: 1225; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1226; SI-NEXT: s_mov_b32 s3, 0xf000 1227; SI-NEXT: s_mov_b32 s2, -1 1228; SI-NEXT: v_mov_b32_e32 v0, 0x7f 1229; SI-NEXT: s_waitcnt lgkmcnt(0) 1230; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1231; SI-NEXT: s_endpgm 1232; 1233; VI-LABEL: bfe_u32_constant_fold_test_7: 1234; VI: ; %bb.0: 1235; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1236; VI-NEXT: s_mov_b32 s3, 0xf000 1237; VI-NEXT: s_mov_b32 s2, -1 1238; VI-NEXT: v_mov_b32_e32 v0, 0x7f 1239; VI-NEXT: s_waitcnt lgkmcnt(0) 1240; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1241; VI-NEXT: s_endpgm 1242 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 0, i32 8) 1243 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 1244 ret void 1245} 1246 1247define amdgpu_kernel void @bfe_u32_constant_fold_test_8(ptr addrspace(1) %out) #0 { 1248; SI-LABEL: bfe_u32_constant_fold_test_8: 1249; SI: ; %bb.0: 1250; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1251; SI-NEXT: s_mov_b32 s3, 0xf000 1252; SI-NEXT: s_mov_b32 s2, -1 1253; SI-NEXT: v_mov_b32_e32 v0, 1 1254; SI-NEXT: s_waitcnt lgkmcnt(0) 1255; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1256; SI-NEXT: s_endpgm 1257; 1258; VI-LABEL: bfe_u32_constant_fold_test_8: 1259; VI: ; %bb.0: 1260; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1261; VI-NEXT: s_mov_b32 s3, 0xf000 1262; VI-NEXT: s_mov_b32 s2, -1 1263; VI-NEXT: v_mov_b32_e32 v0, 1 1264; VI-NEXT: s_waitcnt lgkmcnt(0) 1265; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1266; VI-NEXT: s_endpgm 1267 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 6, i32 8) 1268 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 1269 ret void 1270} 1271 1272define amdgpu_kernel void @bfe_u32_constant_fold_test_9(ptr addrspace(1) %out) #0 { 1273; SI-LABEL: bfe_u32_constant_fold_test_9: 1274; SI: ; %bb.0: 1275; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1276; SI-NEXT: s_mov_b32 s3, 0xf000 1277; SI-NEXT: s_mov_b32 s2, -1 1278; SI-NEXT: v_mov_b32_e32 v0, 1 1279; SI-NEXT: s_waitcnt lgkmcnt(0) 1280; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1281; SI-NEXT: s_endpgm 1282; 1283; VI-LABEL: bfe_u32_constant_fold_test_9: 1284; VI: ; %bb.0: 1285; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1286; VI-NEXT: s_mov_b32 s3, 0xf000 1287; VI-NEXT: s_mov_b32 s2, -1 1288; VI-NEXT: v_mov_b32_e32 v0, 1 1289; VI-NEXT: s_waitcnt lgkmcnt(0) 1290; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1291; VI-NEXT: s_endpgm 1292 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65536, i32 16, i32 8) 1293 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 1294 ret void 1295} 1296 1297define amdgpu_kernel void @bfe_u32_constant_fold_test_10(ptr addrspace(1) %out) #0 { 1298; SI-LABEL: bfe_u32_constant_fold_test_10: 1299; SI: ; %bb.0: 1300; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1301; SI-NEXT: s_mov_b32 s3, 0xf000 1302; SI-NEXT: s_mov_b32 s2, -1 1303; SI-NEXT: v_mov_b32_e32 v0, 0 1304; SI-NEXT: s_waitcnt lgkmcnt(0) 1305; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1306; SI-NEXT: s_endpgm 1307; 1308; VI-LABEL: bfe_u32_constant_fold_test_10: 1309; VI: ; %bb.0: 1310; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1311; VI-NEXT: s_mov_b32 s3, 0xf000 1312; VI-NEXT: s_mov_b32 s2, -1 1313; VI-NEXT: v_mov_b32_e32 v0, 0 1314; VI-NEXT: s_waitcnt lgkmcnt(0) 1315; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1316; VI-NEXT: s_endpgm 1317 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65535, i32 16, i32 16) 1318 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 1319 ret void 1320} 1321 1322define amdgpu_kernel void @bfe_u32_constant_fold_test_11(ptr addrspace(1) %out) #0 { 1323; SI-LABEL: bfe_u32_constant_fold_test_11: 1324; SI: ; %bb.0: 1325; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1326; SI-NEXT: s_mov_b32 s3, 0xf000 1327; SI-NEXT: s_mov_b32 s2, -1 1328; SI-NEXT: v_mov_b32_e32 v0, 10 1329; SI-NEXT: s_waitcnt lgkmcnt(0) 1330; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1331; SI-NEXT: s_endpgm 1332; 1333; VI-LABEL: bfe_u32_constant_fold_test_11: 1334; VI: ; %bb.0: 1335; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1336; VI-NEXT: s_mov_b32 s3, 0xf000 1337; VI-NEXT: s_mov_b32 s2, -1 1338; VI-NEXT: v_mov_b32_e32 v0, 10 1339; VI-NEXT: s_waitcnt lgkmcnt(0) 1340; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1341; VI-NEXT: s_endpgm 1342 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 4) 1343 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 1344 ret void 1345} 1346 1347define amdgpu_kernel void @bfe_u32_constant_fold_test_12(ptr addrspace(1) %out) #0 { 1348; SI-LABEL: bfe_u32_constant_fold_test_12: 1349; SI: ; %bb.0: 1350; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1351; SI-NEXT: s_mov_b32 s3, 0xf000 1352; SI-NEXT: s_mov_b32 s2, -1 1353; SI-NEXT: v_mov_b32_e32 v0, 0 1354; SI-NEXT: s_waitcnt lgkmcnt(0) 1355; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1356; SI-NEXT: s_endpgm 1357; 1358; VI-LABEL: bfe_u32_constant_fold_test_12: 1359; VI: ; %bb.0: 1360; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1361; VI-NEXT: s_mov_b32 s3, 0xf000 1362; VI-NEXT: s_mov_b32 s2, -1 1363; VI-NEXT: v_mov_b32_e32 v0, 0 1364; VI-NEXT: s_waitcnt lgkmcnt(0) 1365; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1366; VI-NEXT: s_endpgm 1367 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 31, i32 1) 1368 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 1369 ret void 1370} 1371 1372define amdgpu_kernel void @bfe_u32_constant_fold_test_13(ptr addrspace(1) %out) #0 { 1373; SI-LABEL: bfe_u32_constant_fold_test_13: 1374; SI: ; %bb.0: 1375; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1376; SI-NEXT: s_mov_b32 s3, 0xf000 1377; SI-NEXT: s_mov_b32 s2, -1 1378; SI-NEXT: v_mov_b32_e32 v0, 1 1379; SI-NEXT: s_waitcnt lgkmcnt(0) 1380; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1381; SI-NEXT: s_endpgm 1382; 1383; VI-LABEL: bfe_u32_constant_fold_test_13: 1384; VI: ; %bb.0: 1385; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1386; VI-NEXT: s_mov_b32 s3, 0xf000 1387; VI-NEXT: s_mov_b32 s2, -1 1388; VI-NEXT: v_mov_b32_e32 v0, 1 1389; VI-NEXT: s_waitcnt lgkmcnt(0) 1390; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1391; VI-NEXT: s_endpgm 1392 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 131070, i32 16, i32 16) 1393 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 1394 ret void 1395} 1396 1397define amdgpu_kernel void @bfe_u32_constant_fold_test_14(ptr addrspace(1) %out) #0 { 1398; SI-LABEL: bfe_u32_constant_fold_test_14: 1399; SI: ; %bb.0: 1400; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1401; SI-NEXT: s_mov_b32 s3, 0xf000 1402; SI-NEXT: s_mov_b32 s2, -1 1403; SI-NEXT: v_mov_b32_e32 v0, 40 1404; SI-NEXT: s_waitcnt lgkmcnt(0) 1405; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1406; SI-NEXT: s_endpgm 1407; 1408; VI-LABEL: bfe_u32_constant_fold_test_14: 1409; VI: ; %bb.0: 1410; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1411; VI-NEXT: s_mov_b32 s3, 0xf000 1412; VI-NEXT: s_mov_b32 s2, -1 1413; VI-NEXT: v_mov_b32_e32 v0, 40 1414; VI-NEXT: s_waitcnt lgkmcnt(0) 1415; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1416; VI-NEXT: s_endpgm 1417 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 2, i32 30) 1418 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 1419 ret void 1420} 1421 1422define amdgpu_kernel void @bfe_u32_constant_fold_test_15(ptr addrspace(1) %out) #0 { 1423; SI-LABEL: bfe_u32_constant_fold_test_15: 1424; SI: ; %bb.0: 1425; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1426; SI-NEXT: s_mov_b32 s3, 0xf000 1427; SI-NEXT: s_mov_b32 s2, -1 1428; SI-NEXT: v_mov_b32_e32 v0, 10 1429; SI-NEXT: s_waitcnt lgkmcnt(0) 1430; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1431; SI-NEXT: s_endpgm 1432; 1433; VI-LABEL: bfe_u32_constant_fold_test_15: 1434; VI: ; %bb.0: 1435; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1436; VI-NEXT: s_mov_b32 s3, 0xf000 1437; VI-NEXT: s_mov_b32 s2, -1 1438; VI-NEXT: v_mov_b32_e32 v0, 10 1439; VI-NEXT: s_waitcnt lgkmcnt(0) 1440; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1441; VI-NEXT: s_endpgm 1442 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 28) 1443 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 1444 ret void 1445} 1446 1447define amdgpu_kernel void @bfe_u32_constant_fold_test_16(ptr addrspace(1) %out) #0 { 1448; SI-LABEL: bfe_u32_constant_fold_test_16: 1449; SI: ; %bb.0: 1450; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1451; SI-NEXT: s_mov_b32 s3, 0xf000 1452; SI-NEXT: s_mov_b32 s2, -1 1453; SI-NEXT: v_mov_b32_e32 v0, 0x7f 1454; SI-NEXT: s_waitcnt lgkmcnt(0) 1455; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1456; SI-NEXT: s_endpgm 1457; 1458; VI-LABEL: bfe_u32_constant_fold_test_16: 1459; VI: ; %bb.0: 1460; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1461; VI-NEXT: s_mov_b32 s3, 0xf000 1462; VI-NEXT: s_mov_b32 s2, -1 1463; VI-NEXT: v_mov_b32_e32 v0, 0x7f 1464; VI-NEXT: s_waitcnt lgkmcnt(0) 1465; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1466; VI-NEXT: s_endpgm 1467 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 1, i32 7) 1468 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 1469 ret void 1470} 1471 1472define amdgpu_kernel void @bfe_u32_constant_fold_test_17(ptr addrspace(1) %out) #0 { 1473; SI-LABEL: bfe_u32_constant_fold_test_17: 1474; SI: ; %bb.0: 1475; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1476; SI-NEXT: s_mov_b32 s3, 0xf000 1477; SI-NEXT: s_mov_b32 s2, -1 1478; SI-NEXT: v_mov_b32_e32 v0, 0x7f 1479; SI-NEXT: s_waitcnt lgkmcnt(0) 1480; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1481; SI-NEXT: s_endpgm 1482; 1483; VI-LABEL: bfe_u32_constant_fold_test_17: 1484; VI: ; %bb.0: 1485; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1486; VI-NEXT: s_mov_b32 s3, 0xf000 1487; VI-NEXT: s_mov_b32 s2, -1 1488; VI-NEXT: v_mov_b32_e32 v0, 0x7f 1489; VI-NEXT: s_waitcnt lgkmcnt(0) 1490; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1491; VI-NEXT: s_endpgm 1492 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 1, i32 31) 1493 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 1494 ret void 1495} 1496 1497define amdgpu_kernel void @bfe_u32_constant_fold_test_18(ptr addrspace(1) %out) #0 { 1498; SI-LABEL: bfe_u32_constant_fold_test_18: 1499; SI: ; %bb.0: 1500; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1501; SI-NEXT: s_mov_b32 s3, 0xf000 1502; SI-NEXT: s_mov_b32 s2, -1 1503; SI-NEXT: v_mov_b32_e32 v0, 0 1504; SI-NEXT: s_waitcnt lgkmcnt(0) 1505; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1506; SI-NEXT: s_endpgm 1507; 1508; VI-LABEL: bfe_u32_constant_fold_test_18: 1509; VI: ; %bb.0: 1510; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1511; VI-NEXT: s_mov_b32 s3, 0xf000 1512; VI-NEXT: s_mov_b32 s2, -1 1513; VI-NEXT: v_mov_b32_e32 v0, 0 1514; VI-NEXT: s_waitcnt lgkmcnt(0) 1515; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1516; VI-NEXT: s_endpgm 1517 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 31, i32 1) 1518 store i32 %bfe_u32, ptr addrspace(1) %out, align 4 1519 ret void 1520} 1521 1522; Make sure that SimplifyDemandedBits doesn't cause the and to be 1523; reduced to the bits demanded by the bfe. 1524 1525; XXX: The operand to v_bfe_u32 could also just directly be the load register. 1526define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(ptr addrspace(1) %out0, 1527; SI-LABEL: simplify_bfe_u32_multi_use_arg: 1528; SI: ; %bb.0: 1529; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd 1530; SI-NEXT: s_mov_b32 s7, 0xf000 1531; SI-NEXT: s_mov_b32 s6, -1 1532; SI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 1533; SI-NEXT: s_mov_b32 s2, s6 1534; SI-NEXT: s_mov_b32 s3, s7 1535; SI-NEXT: s_waitcnt lgkmcnt(0) 1536; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 1537; SI-NEXT: s_mov_b32 s4, s8 1538; SI-NEXT: s_mov_b32 s5, s9 1539; SI-NEXT: s_mov_b32 s0, s10 1540; SI-NEXT: s_mov_b32 s1, s11 1541; SI-NEXT: s_waitcnt vmcnt(0) 1542; SI-NEXT: v_and_b32_e32 v0, 63, v0 1543; SI-NEXT: v_bfe_u32 v1, v0, 2, 2 1544; SI-NEXT: buffer_store_dword v1, off, s[4:7], 0 1545; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1546; SI-NEXT: s_endpgm 1547; 1548; VI-LABEL: simplify_bfe_u32_multi_use_arg: 1549; VI: ; %bb.0: 1550; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x34 1551; VI-NEXT: s_mov_b32 s7, 0xf000 1552; VI-NEXT: s_mov_b32 s6, -1 1553; VI-NEXT: s_mov_b32 s2, s6 1554; VI-NEXT: s_mov_b32 s3, s7 1555; VI-NEXT: s_waitcnt lgkmcnt(0) 1556; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 1557; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1558; VI-NEXT: s_mov_b32 s10, s6 1559; VI-NEXT: s_mov_b32 s11, s7 1560; VI-NEXT: s_waitcnt lgkmcnt(0) 1561; VI-NEXT: s_mov_b32 s4, s0 1562; VI-NEXT: s_mov_b32 s5, s1 1563; VI-NEXT: s_mov_b32 s8, s2 1564; VI-NEXT: s_mov_b32 s9, s3 1565; VI-NEXT: s_waitcnt vmcnt(0) 1566; VI-NEXT: v_and_b32_e32 v0, 63, v0 1567; VI-NEXT: v_bfe_u32 v1, v0, 2, 2 1568; VI-NEXT: buffer_store_dword v1, off, s[4:7], 0 1569; VI-NEXT: buffer_store_dword v0, off, s[8:11], 0 1570; VI-NEXT: s_endpgm 1571 ptr addrspace(1) %out1, 1572 ptr addrspace(1) %in) #0 { 1573 %src = load i32, ptr addrspace(1) %in, align 4 1574 %and = and i32 %src, 63 1575 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2) 1576 store i32 %bfe_u32, ptr addrspace(1) %out0, align 4 1577 store i32 %and, ptr addrspace(1) %out1, align 4 1578 ret void 1579} 1580 1581define amdgpu_kernel void @lshr_and(ptr addrspace(1) %out, i32 %a) #0 { 1582; SI-LABEL: lshr_and: 1583; SI: ; %bb.0: 1584; SI-NEXT: s_load_dword s2, s[4:5], 0xb 1585; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1586; SI-NEXT: s_mov_b32 s3, 0xf000 1587; SI-NEXT: s_waitcnt lgkmcnt(0) 1588; SI-NEXT: s_bfe_u32 s4, s2, 0x30006 1589; SI-NEXT: s_mov_b32 s2, -1 1590; SI-NEXT: v_mov_b32_e32 v0, s4 1591; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1592; SI-NEXT: s_endpgm 1593; 1594; VI-LABEL: lshr_and: 1595; VI: ; %bb.0: 1596; VI-NEXT: s_load_dword s6, s[4:5], 0x2c 1597; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1598; VI-NEXT: s_mov_b32 s3, 0xf000 1599; VI-NEXT: s_mov_b32 s2, -1 1600; VI-NEXT: s_waitcnt lgkmcnt(0) 1601; VI-NEXT: s_bfe_u32 s4, s6, 0x30006 1602; VI-NEXT: v_mov_b32_e32 v0, s4 1603; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1604; VI-NEXT: s_endpgm 1605 %b = lshr i32 %a, 6 1606 %c = and i32 %b, 7 1607 store i32 %c, ptr addrspace(1) %out, align 8 1608 ret void 1609} 1610 1611define amdgpu_kernel void @v_lshr_and(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { 1612; SI-LABEL: v_lshr_and: 1613; SI: ; %bb.0: 1614; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1615; SI-NEXT: s_mov_b32 s7, 0xf000 1616; SI-NEXT: s_waitcnt lgkmcnt(0) 1617; SI-NEXT: s_lshr_b32 s2, s2, s3 1618; SI-NEXT: s_and_b32 s2, s2, 7 1619; SI-NEXT: s_mov_b32 s6, -1 1620; SI-NEXT: s_mov_b32 s4, s0 1621; SI-NEXT: s_mov_b32 s5, s1 1622; SI-NEXT: v_mov_b32_e32 v0, s2 1623; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 1624; SI-NEXT: s_endpgm 1625; 1626; VI-LABEL: v_lshr_and: 1627; VI: ; %bb.0: 1628; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1629; VI-NEXT: s_mov_b32 s7, 0xf000 1630; VI-NEXT: s_mov_b32 s6, -1 1631; VI-NEXT: s_waitcnt lgkmcnt(0) 1632; VI-NEXT: s_mov_b32 s4, s0 1633; VI-NEXT: s_lshr_b32 s0, s2, s3 1634; VI-NEXT: s_and_b32 s0, s0, 7 1635; VI-NEXT: s_mov_b32 s5, s1 1636; VI-NEXT: v_mov_b32_e32 v0, s0 1637; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 1638; VI-NEXT: s_endpgm 1639 %c = lshr i32 %a, %b 1640 %d = and i32 %c, 7 1641 store i32 %d, ptr addrspace(1) %out, align 8 1642 ret void 1643} 1644 1645define amdgpu_kernel void @and_lshr(ptr addrspace(1) %out, i32 %a) #0 { 1646; SI-LABEL: and_lshr: 1647; SI: ; %bb.0: 1648; SI-NEXT: s_load_dword s2, s[4:5], 0xb 1649; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1650; SI-NEXT: s_mov_b32 s3, 0xf000 1651; SI-NEXT: s_waitcnt lgkmcnt(0) 1652; SI-NEXT: s_bfe_u32 s4, s2, 0x30006 1653; SI-NEXT: s_mov_b32 s2, -1 1654; SI-NEXT: v_mov_b32_e32 v0, s4 1655; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1656; SI-NEXT: s_endpgm 1657; 1658; VI-LABEL: and_lshr: 1659; VI: ; %bb.0: 1660; VI-NEXT: s_load_dword s6, s[4:5], 0x2c 1661; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1662; VI-NEXT: s_mov_b32 s3, 0xf000 1663; VI-NEXT: s_mov_b32 s2, -1 1664; VI-NEXT: s_waitcnt lgkmcnt(0) 1665; VI-NEXT: s_bfe_u32 s4, s6, 0x30006 1666; VI-NEXT: v_mov_b32_e32 v0, s4 1667; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1668; VI-NEXT: s_endpgm 1669 %b = and i32 %a, 448 1670 %c = lshr i32 %b, 6 1671 store i32 %c, ptr addrspace(1) %out, align 8 1672 ret void 1673} 1674 1675define amdgpu_kernel void @and_lshr2(ptr addrspace(1) %out, i32 %a) #0 { 1676; SI-LABEL: and_lshr2: 1677; SI: ; %bb.0: 1678; SI-NEXT: s_load_dword s2, s[4:5], 0xb 1679; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1680; SI-NEXT: s_mov_b32 s3, 0xf000 1681; SI-NEXT: s_waitcnt lgkmcnt(0) 1682; SI-NEXT: s_bfe_u32 s4, s2, 0x30006 1683; SI-NEXT: s_mov_b32 s2, -1 1684; SI-NEXT: v_mov_b32_e32 v0, s4 1685; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1686; SI-NEXT: s_endpgm 1687; 1688; VI-LABEL: and_lshr2: 1689; VI: ; %bb.0: 1690; VI-NEXT: s_load_dword s6, s[4:5], 0x2c 1691; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1692; VI-NEXT: s_mov_b32 s3, 0xf000 1693; VI-NEXT: s_mov_b32 s2, -1 1694; VI-NEXT: s_waitcnt lgkmcnt(0) 1695; VI-NEXT: s_bfe_u32 s4, s6, 0x30006 1696; VI-NEXT: v_mov_b32_e32 v0, s4 1697; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1698; VI-NEXT: s_endpgm 1699 %b = and i32 %a, 511 1700 %c = lshr i32 %b, 6 1701 store i32 %c, ptr addrspace(1) %out, align 8 1702 ret void 1703} 1704 1705define amdgpu_kernel void @shl_lshr(ptr addrspace(1) %out, i32 %a) #0 { 1706; SI-LABEL: shl_lshr: 1707; SI: ; %bb.0: 1708; SI-NEXT: s_load_dword s2, s[4:5], 0xb 1709; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1710; SI-NEXT: s_mov_b32 s3, 0xf000 1711; SI-NEXT: s_waitcnt lgkmcnt(0) 1712; SI-NEXT: s_bfe_u32 s4, s2, 0x150002 1713; SI-NEXT: s_mov_b32 s2, -1 1714; SI-NEXT: v_mov_b32_e32 v0, s4 1715; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1716; SI-NEXT: s_endpgm 1717; 1718; VI-LABEL: shl_lshr: 1719; VI: ; %bb.0: 1720; VI-NEXT: s_load_dword s6, s[4:5], 0x2c 1721; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1722; VI-NEXT: s_mov_b32 s3, 0xf000 1723; VI-NEXT: s_mov_b32 s2, -1 1724; VI-NEXT: s_waitcnt lgkmcnt(0) 1725; VI-NEXT: s_bfe_u32 s4, s6, 0x150002 1726; VI-NEXT: v_mov_b32_e32 v0, s4 1727; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1728; VI-NEXT: s_endpgm 1729 %b = shl i32 %a, 9 1730 %c = lshr i32 %b, 11 1731 store i32 %c, ptr addrspace(1) %out, align 8 1732 ret void 1733} 1734 1735declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1 1736 1737attributes #0 = { nounwind } 1738attributes #1 = { nounwind readnone } 1739