1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6-NOHSA %s 3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GFX7-HSA %s 4; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-NOHSA %s 5; RUN: llc -mtriple=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s 7 8; TODO: NOT AND 9define amdgpu_kernel void @constant_load_i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 10; GFX6-NOHSA-LABEL: constant_load_i8: 11; GFX6-NOHSA: ; %bb.0: ; %entry 12; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 13; GFX6-NOHSA-NEXT: s_mov_b32 s7, 0xf000 14; GFX6-NOHSA-NEXT: s_mov_b32 s6, -1 15; GFX6-NOHSA-NEXT: s_mov_b32 s10, s6 16; GFX6-NOHSA-NEXT: s_mov_b32 s11, s7 17; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 18; GFX6-NOHSA-NEXT: s_mov_b32 s8, s2 19; GFX6-NOHSA-NEXT: s_mov_b32 s9, s3 20; GFX6-NOHSA-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 21; GFX6-NOHSA-NEXT: s_mov_b32 s4, s0 22; GFX6-NOHSA-NEXT: s_mov_b32 s5, s1 23; GFX6-NOHSA-NEXT: s_waitcnt vmcnt(0) 24; GFX6-NOHSA-NEXT: buffer_store_byte v0, off, s[4:7], 0 25; GFX6-NOHSA-NEXT: s_endpgm 26; 27; GFX7-HSA-LABEL: constant_load_i8: 28; GFX7-HSA: ; %bb.0: ; %entry 29; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 30; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 31; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 32; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 33; GFX7-HSA-NEXT: flat_load_ubyte v2, v[0:1] 34; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 35; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1 36; GFX7-HSA-NEXT: s_waitcnt vmcnt(0) 37; GFX7-HSA-NEXT: flat_store_byte v[0:1], v2 38; GFX7-HSA-NEXT: s_endpgm 39; 40; GFX8-NOHSA-LABEL: constant_load_i8: 41; GFX8-NOHSA: ; %bb.0: ; %entry 42; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 43; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 44; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 45; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 46; GFX8-NOHSA-NEXT: flat_load_ubyte v2, v[0:1] 47; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 48; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 49; GFX8-NOHSA-NEXT: s_waitcnt vmcnt(0) 50; GFX8-NOHSA-NEXT: flat_store_byte v[0:1], v2 51; GFX8-NOHSA-NEXT: s_endpgm 52; 53; EG-LABEL: constant_load_i8: 54; EG: ; %bb.0: ; %entry 55; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 56; EG-NEXT: TEX 0 @6 57; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 58; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 59; EG-NEXT: CF_END 60; EG-NEXT: PAD 61; EG-NEXT: Fetch clause starting at 6: 62; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 63; EG-NEXT: ALU clause starting at 8: 64; EG-NEXT: MOV * T0.X, KC0[2].Z, 65; EG-NEXT: ALU clause starting at 9: 66; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 67; EG-NEXT: AND_INT * T1.W, T0.X, literal.y, 68; EG-NEXT: 3(4.203895e-45), 255(3.573311e-43) 69; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 70; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 71; EG-NEXT: LSHL T0.X, T1.W, PV.W, 72; EG-NEXT: LSHL * T0.W, literal.x, PV.W, 73; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 74; EG-NEXT: MOV T0.Y, 0.0, 75; EG-NEXT: MOV * T0.Z, 0.0, 76; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 77; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 78; 79; GFX12-LABEL: constant_load_i8: 80; GFX12: ; %bb.0: ; %entry 81; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 82; GFX12-NEXT: v_mov_b32_e32 v0, 0 83; GFX12-NEXT: s_wait_kmcnt 0x0 84; GFX12-NEXT: global_load_u8 v1, v0, s[2:3] 85; GFX12-NEXT: s_wait_loadcnt 0x0 86; GFX12-NEXT: global_store_b8 v0, v1, s[0:1] 87; GFX12-NEXT: s_endpgm 88entry: 89 %ld = load i8, ptr addrspace(4) %in 90 store i8 %ld, ptr addrspace(1) %out 91 ret void 92} 93 94define amdgpu_kernel void @constant_load_v2i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 95; GFX6-NOHSA-LABEL: constant_load_v2i8: 96; GFX6-NOHSA: ; %bb.0: ; %entry 97; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 98; GFX6-NOHSA-NEXT: s_mov_b32 s7, 0xf000 99; GFX6-NOHSA-NEXT: s_mov_b32 s6, -1 100; GFX6-NOHSA-NEXT: s_mov_b32 s10, s6 101; GFX6-NOHSA-NEXT: s_mov_b32 s11, s7 102; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 103; GFX6-NOHSA-NEXT: s_mov_b32 s8, s2 104; GFX6-NOHSA-NEXT: s_mov_b32 s9, s3 105; GFX6-NOHSA-NEXT: buffer_load_ushort v0, off, s[8:11], 0 106; GFX6-NOHSA-NEXT: s_mov_b32 s4, s0 107; GFX6-NOHSA-NEXT: s_mov_b32 s5, s1 108; GFX6-NOHSA-NEXT: s_waitcnt vmcnt(0) 109; GFX6-NOHSA-NEXT: buffer_store_short v0, off, s[4:7], 0 110; GFX6-NOHSA-NEXT: s_endpgm 111; 112; GFX7-HSA-LABEL: constant_load_v2i8: 113; GFX7-HSA: ; %bb.0: ; %entry 114; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 115; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 116; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 117; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 118; GFX7-HSA-NEXT: flat_load_ushort v2, v[0:1] 119; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 120; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1 121; GFX7-HSA-NEXT: s_waitcnt vmcnt(0) 122; GFX7-HSA-NEXT: flat_store_short v[0:1], v2 123; GFX7-HSA-NEXT: s_endpgm 124; 125; GFX8-NOHSA-LABEL: constant_load_v2i8: 126; GFX8-NOHSA: ; %bb.0: ; %entry 127; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 128; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 129; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 130; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 131; GFX8-NOHSA-NEXT: flat_load_ushort v2, v[0:1] 132; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 133; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 134; GFX8-NOHSA-NEXT: s_waitcnt vmcnt(0) 135; GFX8-NOHSA-NEXT: flat_store_short v[0:1], v2 136; GFX8-NOHSA-NEXT: s_endpgm 137; 138; EG-LABEL: constant_load_v2i8: 139; EG: ; %bb.0: ; %entry 140; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 141; EG-NEXT: TEX 0 @6 142; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 143; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 144; EG-NEXT: CF_END 145; EG-NEXT: PAD 146; EG-NEXT: Fetch clause starting at 6: 147; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 148; EG-NEXT: ALU clause starting at 8: 149; EG-NEXT: MOV * T0.X, KC0[2].Z, 150; EG-NEXT: ALU clause starting at 9: 151; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 152; EG-NEXT: AND_INT * T1.W, T0.X, literal.y, 153; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41) 154; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 155; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 156; EG-NEXT: LSHL T0.X, T1.W, PV.W, 157; EG-NEXT: LSHL * T0.W, literal.x, PV.W, 158; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 159; EG-NEXT: MOV T0.Y, 0.0, 160; EG-NEXT: MOV * T0.Z, 0.0, 161; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 162; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 163; 164; GFX12-LABEL: constant_load_v2i8: 165; GFX12: ; %bb.0: ; %entry 166; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 167; GFX12-NEXT: v_mov_b32_e32 v0, 0 168; GFX12-NEXT: s_wait_kmcnt 0x0 169; GFX12-NEXT: global_load_u16 v1, v0, s[2:3] 170; GFX12-NEXT: s_wait_loadcnt 0x0 171; GFX12-NEXT: global_store_b16 v0, v1, s[0:1] 172; GFX12-NEXT: s_endpgm 173entry: 174 %ld = load <2 x i8>, ptr addrspace(4) %in 175 store <2 x i8> %ld, ptr addrspace(1) %out 176 ret void 177} 178 179define amdgpu_kernel void @constant_load_v3i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 180; GFX6-NOHSA-LABEL: constant_load_v3i8: 181; GFX6-NOHSA: ; %bb.0: ; %entry 182; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 183; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 184; GFX6-NOHSA-NEXT: s_load_dword s4, s[2:3], 0x0 185; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 186; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 187; GFX6-NOHSA-NEXT: s_lshr_b32 s5, s4, 16 188; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 189; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 190; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s5 191; GFX6-NOHSA-NEXT: buffer_store_byte v1, off, s[0:3], 0 offset:2 192; GFX6-NOHSA-NEXT: buffer_store_short v0, off, s[0:3], 0 193; GFX6-NOHSA-NEXT: s_endpgm 194; 195; GFX7-HSA-LABEL: constant_load_v3i8: 196; GFX7-HSA: ; %bb.0: ; %entry 197; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 198; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 199; GFX7-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 200; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 201; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1 202; GFX7-HSA-NEXT: s_add_u32 s0, s0, 2 203; GFX7-HSA-NEXT: s_addc_u32 s1, s1, 0 204; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s1 205; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s0 206; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 207; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 208; GFX7-HSA-NEXT: s_lshr_b32 s0, s2, 16 209; GFX7-HSA-NEXT: flat_store_short v[0:1], v4 210; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 211; GFX7-HSA-NEXT: flat_store_byte v[2:3], v0 212; GFX7-HSA-NEXT: s_endpgm 213; 214; GFX8-NOHSA-LABEL: constant_load_v3i8: 215; GFX8-NOHSA: ; %bb.0: ; %entry 216; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 217; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 218; GFX8-NOHSA-NEXT: s_load_dword s2, s[2:3], 0x0 219; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 220; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 221; GFX8-NOHSA-NEXT: s_add_u32 s0, s0, 2 222; GFX8-NOHSA-NEXT: s_addc_u32 s1, s1, 0 223; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s1 224; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s0 225; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 226; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 227; GFX8-NOHSA-NEXT: s_lshr_b32 s0, s2, 16 228; GFX8-NOHSA-NEXT: flat_store_short v[0:1], v4 229; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 230; GFX8-NOHSA-NEXT: flat_store_byte v[2:3], v0 231; GFX8-NOHSA-NEXT: s_endpgm 232; 233; EG-LABEL: constant_load_v3i8: 234; EG: ; %bb.0: ; %entry 235; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 236; EG-NEXT: TEX 0 @6 237; EG-NEXT: ALU 27, @9, KC0[CB0:0-32], KC1[] 238; EG-NEXT: MEM_RAT MSKOR T6.XW, T8.X 239; EG-NEXT: MEM_RAT MSKOR T5.XW, T7.X 240; EG-NEXT: CF_END 241; EG-NEXT: Fetch clause starting at 6: 242; EG-NEXT: VTX_READ_32 T5.X, T5.X, 0, #1 243; EG-NEXT: ALU clause starting at 8: 244; EG-NEXT: MOV * T5.X, KC0[2].Z, 245; EG-NEXT: ALU clause starting at 9: 246; EG-NEXT: MOV * T2.X, T5.X, 247; EG-NEXT: MOV T0.Y, PV.X, 248; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 249; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 250; EG-NEXT: AND_INT T1.W, PV.W, literal.x, 251; EG-NEXT: MOV * T2.W, literal.y, 252; EG-NEXT: 3(4.203895e-45), 8(1.121039e-44) 253; EG-NEXT: BFE_UINT T2.W, T0.Y, literal.x, PS, 254; EG-NEXT: LSHL * T1.W, PV.W, literal.y, 255; EG-NEXT: 16(2.242078e-44), 3(4.203895e-45) 256; EG-NEXT: LSHL T6.X, PV.W, PS, 257; EG-NEXT: LSHL * T6.W, literal.x, PS, 258; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 259; EG-NEXT: MOV T6.Y, 0.0, 260; EG-NEXT: AND_INT T1.W, KC0[2].Y, literal.x, 261; EG-NEXT: AND_INT * T2.W, T5.X, literal.y, 262; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41) 263; EG-NEXT: LSHL * T1.W, PV.W, literal.x, 264; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 265; EG-NEXT: LSHL T5.X, T2.W, PV.W, 266; EG-NEXT: LSHL * T5.W, literal.x, PV.W, 267; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 268; EG-NEXT: MOV T5.Y, 0.0, 269; EG-NEXT: MOV T6.Z, 0.0, 270; EG-NEXT: MOV * T5.Z, 0.0, 271; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x, 272; EG-NEXT: LSHR * T8.X, T0.W, literal.x, 273; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 274; 275; GFX12-LABEL: constant_load_v3i8: 276; GFX12: ; %bb.0: ; %entry 277; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 278; GFX12-NEXT: s_wait_kmcnt 0x0 279; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0 280; GFX12-NEXT: s_wait_kmcnt 0x0 281; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 282; GFX12-NEXT: s_clause 0x1 283; GFX12-NEXT: global_store_d16_hi_b8 v0, v1, s[0:1] offset:2 284; GFX12-NEXT: global_store_b16 v0, v1, s[0:1] 285; GFX12-NEXT: s_endpgm 286entry: 287 %ld = load <3 x i8>, ptr addrspace(4) %in 288 store <3 x i8> %ld, ptr addrspace(1) %out 289 ret void 290} 291 292define amdgpu_kernel void @constant_load_v4i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 293; GFX6-NOHSA-LABEL: constant_load_v4i8: 294; GFX6-NOHSA: ; %bb.0: ; %entry 295; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 296; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 297; GFX6-NOHSA-NEXT: s_load_dword s4, s[2:3], 0x0 298; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 299; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 300; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 301; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 302; GFX6-NOHSA-NEXT: buffer_store_dword v0, off, s[0:3], 0 303; GFX6-NOHSA-NEXT: s_endpgm 304; 305; GFX7-HSA-LABEL: constant_load_v4i8: 306; GFX7-HSA: ; %bb.0: ; %entry 307; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 308; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 309; GFX7-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 310; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 311; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1 312; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 313; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s2 314; GFX7-HSA-NEXT: flat_store_dword v[0:1], v2 315; GFX7-HSA-NEXT: s_endpgm 316; 317; GFX8-NOHSA-LABEL: constant_load_v4i8: 318; GFX8-NOHSA: ; %bb.0: ; %entry 319; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 320; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 321; GFX8-NOHSA-NEXT: s_load_dword s2, s[2:3], 0x0 322; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 323; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 324; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 325; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s2 326; GFX8-NOHSA-NEXT: flat_store_dword v[0:1], v2 327; GFX8-NOHSA-NEXT: s_endpgm 328; 329; EG-LABEL: constant_load_v4i8: 330; EG: ; %bb.0: ; %entry 331; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 332; EG-NEXT: TEX 0 @6 333; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 334; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 335; EG-NEXT: CF_END 336; EG-NEXT: PAD 337; EG-NEXT: Fetch clause starting at 6: 338; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 339; EG-NEXT: ALU clause starting at 8: 340; EG-NEXT: MOV * T0.X, KC0[2].Z, 341; EG-NEXT: ALU clause starting at 9: 342; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 343; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 344; 345; GFX12-LABEL: constant_load_v4i8: 346; GFX12: ; %bb.0: ; %entry 347; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 348; GFX12-NEXT: s_wait_kmcnt 0x0 349; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0 350; GFX12-NEXT: s_wait_kmcnt 0x0 351; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 352; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] 353; GFX12-NEXT: s_endpgm 354entry: 355 %ld = load <4 x i8>, ptr addrspace(4) %in 356 store <4 x i8> %ld, ptr addrspace(1) %out 357 ret void 358} 359 360define amdgpu_kernel void @constant_load_v8i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 361; GFX6-NOHSA-LABEL: constant_load_v8i8: 362; GFX6-NOHSA: ; %bb.0: ; %entry 363; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 364; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 365; GFX6-NOHSA-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 366; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 367; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 368; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 369; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 370; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s5 371; GFX6-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 372; GFX6-NOHSA-NEXT: s_endpgm 373; 374; GFX7-HSA-LABEL: constant_load_v8i8: 375; GFX7-HSA: ; %bb.0: ; %entry 376; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 377; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 378; GFX7-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 379; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 380; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1 381; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 382; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s2 383; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s3 384; GFX7-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 385; GFX7-HSA-NEXT: s_endpgm 386; 387; GFX8-NOHSA-LABEL: constant_load_v8i8: 388; GFX8-NOHSA: ; %bb.0: ; %entry 389; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 390; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 391; GFX8-NOHSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 392; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 393; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 394; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 395; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s2 396; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s3 397; GFX8-NOHSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 398; GFX8-NOHSA-NEXT: s_endpgm 399; 400; EG-LABEL: constant_load_v8i8: 401; EG: ; %bb.0: ; %entry 402; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 403; EG-NEXT: TEX 0 @6 404; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 405; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 406; EG-NEXT: CF_END 407; EG-NEXT: PAD 408; EG-NEXT: Fetch clause starting at 6: 409; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 410; EG-NEXT: ALU clause starting at 8: 411; EG-NEXT: MOV * T0.X, KC0[2].Z, 412; EG-NEXT: ALU clause starting at 9: 413; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 414; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 415; 416; GFX12-LABEL: constant_load_v8i8: 417; GFX12: ; %bb.0: ; %entry 418; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 419; GFX12-NEXT: s_wait_kmcnt 0x0 420; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 421; GFX12-NEXT: v_mov_b32_e32 v2, 0 422; GFX12-NEXT: s_wait_kmcnt 0x0 423; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 424; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] 425; GFX12-NEXT: s_endpgm 426entry: 427 %ld = load <8 x i8>, ptr addrspace(4) %in 428 store <8 x i8> %ld, ptr addrspace(1) %out 429 ret void 430} 431 432define amdgpu_kernel void @constant_load_v16i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 433; GFX6-NOHSA-LABEL: constant_load_v16i8: 434; GFX6-NOHSA: ; %bb.0: ; %entry 435; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 436; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 437; GFX6-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 438; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 439; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 440; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 441; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 442; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s5 443; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s6 444; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s7 445; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 446; GFX6-NOHSA-NEXT: s_endpgm 447; 448; GFX7-HSA-LABEL: constant_load_v16i8: 449; GFX7-HSA: ; %bb.0: ; %entry 450; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 451; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 452; GFX7-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 453; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 454; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 455; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 456; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s4 457; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s5 458; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s6 459; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s7 460; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 461; GFX7-HSA-NEXT: s_endpgm 462; 463; GFX8-NOHSA-LABEL: constant_load_v16i8: 464; GFX8-NOHSA: ; %bb.0: ; %entry 465; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 466; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 467; GFX8-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 468; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 469; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 470; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 471; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s4 472; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s5 473; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s6 474; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s7 475; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 476; GFX8-NOHSA-NEXT: s_endpgm 477; 478; EG-LABEL: constant_load_v16i8: 479; EG: ; %bb.0: ; %entry 480; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 481; EG-NEXT: TEX 0 @6 482; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 483; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 484; EG-NEXT: CF_END 485; EG-NEXT: PAD 486; EG-NEXT: Fetch clause starting at 6: 487; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 488; EG-NEXT: ALU clause starting at 8: 489; EG-NEXT: MOV * T0.X, KC0[2].Z, 490; EG-NEXT: ALU clause starting at 9: 491; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 492; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 493; 494; GFX12-LABEL: constant_load_v16i8: 495; GFX12: ; %bb.0: ; %entry 496; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 497; GFX12-NEXT: s_wait_kmcnt 0x0 498; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 499; GFX12-NEXT: v_mov_b32_e32 v4, 0 500; GFX12-NEXT: s_wait_kmcnt 0x0 501; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v3, s7 502; GFX12-NEXT: v_dual_mov_b32 v1, s5 :: v_dual_mov_b32 v2, s6 503; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1] 504; GFX12-NEXT: s_endpgm 505entry: 506 %ld = load <16 x i8>, ptr addrspace(4) %in 507 store <16 x i8> %ld, ptr addrspace(1) %out 508 ret void 509} 510 511define amdgpu_kernel void @constant_zextload_i8_to_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 512; GFX6-NOHSA-LABEL: constant_zextload_i8_to_i32: 513; GFX6-NOHSA: ; %bb.0: 514; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 515; GFX6-NOHSA-NEXT: s_mov_b32 s7, 0xf000 516; GFX6-NOHSA-NEXT: s_mov_b32 s6, -1 517; GFX6-NOHSA-NEXT: s_mov_b32 s10, s6 518; GFX6-NOHSA-NEXT: s_mov_b32 s11, s7 519; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 520; GFX6-NOHSA-NEXT: s_mov_b32 s8, s2 521; GFX6-NOHSA-NEXT: s_mov_b32 s9, s3 522; GFX6-NOHSA-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 523; GFX6-NOHSA-NEXT: s_mov_b32 s4, s0 524; GFX6-NOHSA-NEXT: s_mov_b32 s5, s1 525; GFX6-NOHSA-NEXT: s_waitcnt vmcnt(0) 526; GFX6-NOHSA-NEXT: buffer_store_dword v0, off, s[4:7], 0 527; GFX6-NOHSA-NEXT: s_endpgm 528; 529; GFX7-HSA-LABEL: constant_zextload_i8_to_i32: 530; GFX7-HSA: ; %bb.0: 531; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 532; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 533; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 534; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 535; GFX7-HSA-NEXT: flat_load_ubyte v2, v[0:1] 536; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 537; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1 538; GFX7-HSA-NEXT: s_waitcnt vmcnt(0) 539; GFX7-HSA-NEXT: flat_store_dword v[0:1], v2 540; GFX7-HSA-NEXT: s_endpgm 541; 542; GFX8-NOHSA-LABEL: constant_zextload_i8_to_i32: 543; GFX8-NOHSA: ; %bb.0: 544; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 545; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 546; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 547; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 548; GFX8-NOHSA-NEXT: flat_load_ubyte v2, v[0:1] 549; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 550; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 551; GFX8-NOHSA-NEXT: s_waitcnt vmcnt(0) 552; GFX8-NOHSA-NEXT: flat_store_dword v[0:1], v2 553; GFX8-NOHSA-NEXT: s_endpgm 554; 555; EG-LABEL: constant_zextload_i8_to_i32: 556; EG: ; %bb.0: 557; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 558; EG-NEXT: TEX 0 @6 559; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 560; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 561; EG-NEXT: CF_END 562; EG-NEXT: PAD 563; EG-NEXT: Fetch clause starting at 6: 564; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 565; EG-NEXT: ALU clause starting at 8: 566; EG-NEXT: MOV * T0.X, KC0[2].Z, 567; EG-NEXT: ALU clause starting at 9: 568; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 569; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 570; 571; GFX12-LABEL: constant_zextload_i8_to_i32: 572; GFX12: ; %bb.0: 573; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 574; GFX12-NEXT: s_wait_kmcnt 0x0 575; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0 576; GFX12-NEXT: s_wait_kmcnt 0x0 577; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 578; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] 579; GFX12-NEXT: s_endpgm 580 %a = load i8, ptr addrspace(4) %in 581 %ext = zext i8 %a to i32 582 store i32 %ext, ptr addrspace(1) %out 583 ret void 584} 585 586define amdgpu_kernel void @constant_sextload_i8_to_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 587; GFX6-NOHSA-LABEL: constant_sextload_i8_to_i32: 588; GFX6-NOHSA: ; %bb.0: 589; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 590; GFX6-NOHSA-NEXT: s_mov_b32 s7, 0xf000 591; GFX6-NOHSA-NEXT: s_mov_b32 s6, -1 592; GFX6-NOHSA-NEXT: s_mov_b32 s10, s6 593; GFX6-NOHSA-NEXT: s_mov_b32 s11, s7 594; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 595; GFX6-NOHSA-NEXT: s_mov_b32 s8, s2 596; GFX6-NOHSA-NEXT: s_mov_b32 s9, s3 597; GFX6-NOHSA-NEXT: buffer_load_sbyte v0, off, s[8:11], 0 598; GFX6-NOHSA-NEXT: s_mov_b32 s4, s0 599; GFX6-NOHSA-NEXT: s_mov_b32 s5, s1 600; GFX6-NOHSA-NEXT: s_waitcnt vmcnt(0) 601; GFX6-NOHSA-NEXT: buffer_store_dword v0, off, s[4:7], 0 602; GFX6-NOHSA-NEXT: s_endpgm 603; 604; GFX7-HSA-LABEL: constant_sextload_i8_to_i32: 605; GFX7-HSA: ; %bb.0: 606; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 607; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 608; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 609; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 610; GFX7-HSA-NEXT: flat_load_sbyte v2, v[0:1] 611; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 612; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1 613; GFX7-HSA-NEXT: s_waitcnt vmcnt(0) 614; GFX7-HSA-NEXT: flat_store_dword v[0:1], v2 615; GFX7-HSA-NEXT: s_endpgm 616; 617; GFX8-NOHSA-LABEL: constant_sextload_i8_to_i32: 618; GFX8-NOHSA: ; %bb.0: 619; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 620; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 621; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 622; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 623; GFX8-NOHSA-NEXT: flat_load_sbyte v2, v[0:1] 624; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 625; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 626; GFX8-NOHSA-NEXT: s_waitcnt vmcnt(0) 627; GFX8-NOHSA-NEXT: flat_store_dword v[0:1], v2 628; GFX8-NOHSA-NEXT: s_endpgm 629; 630; EG-LABEL: constant_sextload_i8_to_i32: 631; EG: ; %bb.0: 632; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 633; EG-NEXT: TEX 0 @6 634; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 635; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 636; EG-NEXT: CF_END 637; EG-NEXT: PAD 638; EG-NEXT: Fetch clause starting at 6: 639; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 640; EG-NEXT: ALU clause starting at 8: 641; EG-NEXT: MOV * T0.X, KC0[2].Z, 642; EG-NEXT: ALU clause starting at 9: 643; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 644; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 645; EG-NEXT: 8(1.121039e-44), 2(2.802597e-45) 646; 647; GFX12-LABEL: constant_sextload_i8_to_i32: 648; GFX12: ; %bb.0: 649; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 650; GFX12-NEXT: s_wait_kmcnt 0x0 651; GFX12-NEXT: s_load_i8 s2, s[2:3], 0x0 652; GFX12-NEXT: s_wait_kmcnt 0x0 653; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 654; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] 655; GFX12-NEXT: s_endpgm 656 %ld = load i8, ptr addrspace(4) %in 657 %ext = sext i8 %ld to i32 658 store i32 %ext, ptr addrspace(1) %out 659 ret void 660} 661 662define amdgpu_kernel void @constant_zextload_v1i8_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 663; GFX6-NOHSA-LABEL: constant_zextload_v1i8_to_v1i32: 664; GFX6-NOHSA: ; %bb.0: 665; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 666; GFX6-NOHSA-NEXT: s_mov_b32 s7, 0xf000 667; GFX6-NOHSA-NEXT: s_mov_b32 s6, -1 668; GFX6-NOHSA-NEXT: s_mov_b32 s10, s6 669; GFX6-NOHSA-NEXT: s_mov_b32 s11, s7 670; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 671; GFX6-NOHSA-NEXT: s_mov_b32 s8, s2 672; GFX6-NOHSA-NEXT: s_mov_b32 s9, s3 673; GFX6-NOHSA-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 674; GFX6-NOHSA-NEXT: s_mov_b32 s4, s0 675; GFX6-NOHSA-NEXT: s_mov_b32 s5, s1 676; GFX6-NOHSA-NEXT: s_waitcnt vmcnt(0) 677; GFX6-NOHSA-NEXT: buffer_store_dword v0, off, s[4:7], 0 678; GFX6-NOHSA-NEXT: s_endpgm 679; 680; GFX7-HSA-LABEL: constant_zextload_v1i8_to_v1i32: 681; GFX7-HSA: ; %bb.0: 682; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 683; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 684; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 685; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 686; GFX7-HSA-NEXT: flat_load_ubyte v2, v[0:1] 687; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 688; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1 689; GFX7-HSA-NEXT: s_waitcnt vmcnt(0) 690; GFX7-HSA-NEXT: flat_store_dword v[0:1], v2 691; GFX7-HSA-NEXT: s_endpgm 692; 693; GFX8-NOHSA-LABEL: constant_zextload_v1i8_to_v1i32: 694; GFX8-NOHSA: ; %bb.0: 695; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 696; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 697; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 698; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 699; GFX8-NOHSA-NEXT: flat_load_ubyte v2, v[0:1] 700; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 701; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 702; GFX8-NOHSA-NEXT: s_waitcnt vmcnt(0) 703; GFX8-NOHSA-NEXT: flat_store_dword v[0:1], v2 704; GFX8-NOHSA-NEXT: s_endpgm 705; 706; EG-LABEL: constant_zextload_v1i8_to_v1i32: 707; EG: ; %bb.0: 708; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 709; EG-NEXT: TEX 0 @6 710; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 711; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 712; EG-NEXT: CF_END 713; EG-NEXT: PAD 714; EG-NEXT: Fetch clause starting at 6: 715; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 716; EG-NEXT: ALU clause starting at 8: 717; EG-NEXT: MOV * T0.X, KC0[2].Z, 718; EG-NEXT: ALU clause starting at 9: 719; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 720; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 721; 722; GFX12-LABEL: constant_zextload_v1i8_to_v1i32: 723; GFX12: ; %bb.0: 724; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 725; GFX12-NEXT: s_wait_kmcnt 0x0 726; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0 727; GFX12-NEXT: s_wait_kmcnt 0x0 728; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 729; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] 730; GFX12-NEXT: s_endpgm 731 %load = load <1 x i8>, ptr addrspace(4) %in 732 %ext = zext <1 x i8> %load to <1 x i32> 733 store <1 x i32> %ext, ptr addrspace(1) %out 734 ret void 735} 736 737define amdgpu_kernel void @constant_sextload_v1i8_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 738; GFX6-NOHSA-LABEL: constant_sextload_v1i8_to_v1i32: 739; GFX6-NOHSA: ; %bb.0: 740; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 741; GFX6-NOHSA-NEXT: s_mov_b32 s7, 0xf000 742; GFX6-NOHSA-NEXT: s_mov_b32 s6, -1 743; GFX6-NOHSA-NEXT: s_mov_b32 s10, s6 744; GFX6-NOHSA-NEXT: s_mov_b32 s11, s7 745; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 746; GFX6-NOHSA-NEXT: s_mov_b32 s8, s2 747; GFX6-NOHSA-NEXT: s_mov_b32 s9, s3 748; GFX6-NOHSA-NEXT: buffer_load_sbyte v0, off, s[8:11], 0 749; GFX6-NOHSA-NEXT: s_mov_b32 s4, s0 750; GFX6-NOHSA-NEXT: s_mov_b32 s5, s1 751; GFX6-NOHSA-NEXT: s_waitcnt vmcnt(0) 752; GFX6-NOHSA-NEXT: buffer_store_dword v0, off, s[4:7], 0 753; GFX6-NOHSA-NEXT: s_endpgm 754; 755; GFX7-HSA-LABEL: constant_sextload_v1i8_to_v1i32: 756; GFX7-HSA: ; %bb.0: 757; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 758; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 759; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 760; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 761; GFX7-HSA-NEXT: flat_load_sbyte v2, v[0:1] 762; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 763; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1 764; GFX7-HSA-NEXT: s_waitcnt vmcnt(0) 765; GFX7-HSA-NEXT: flat_store_dword v[0:1], v2 766; GFX7-HSA-NEXT: s_endpgm 767; 768; GFX8-NOHSA-LABEL: constant_sextload_v1i8_to_v1i32: 769; GFX8-NOHSA: ; %bb.0: 770; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 771; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 772; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 773; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 774; GFX8-NOHSA-NEXT: flat_load_sbyte v2, v[0:1] 775; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 776; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 777; GFX8-NOHSA-NEXT: s_waitcnt vmcnt(0) 778; GFX8-NOHSA-NEXT: flat_store_dword v[0:1], v2 779; GFX8-NOHSA-NEXT: s_endpgm 780; 781; EG-LABEL: constant_sextload_v1i8_to_v1i32: 782; EG: ; %bb.0: 783; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 784; EG-NEXT: TEX 0 @6 785; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 786; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 787; EG-NEXT: CF_END 788; EG-NEXT: PAD 789; EG-NEXT: Fetch clause starting at 6: 790; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 791; EG-NEXT: ALU clause starting at 8: 792; EG-NEXT: MOV * T0.X, KC0[2].Z, 793; EG-NEXT: ALU clause starting at 9: 794; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 795; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 796; EG-NEXT: 8(1.121039e-44), 2(2.802597e-45) 797; 798; GFX12-LABEL: constant_sextload_v1i8_to_v1i32: 799; GFX12: ; %bb.0: 800; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 801; GFX12-NEXT: s_wait_kmcnt 0x0 802; GFX12-NEXT: s_load_i8 s2, s[2:3], 0x0 803; GFX12-NEXT: s_wait_kmcnt 0x0 804; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 805; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] 806; GFX12-NEXT: s_endpgm 807 %load = load <1 x i8>, ptr addrspace(4) %in 808 %ext = sext <1 x i8> %load to <1 x i32> 809 store <1 x i32> %ext, ptr addrspace(1) %out 810 ret void 811} 812 813; TODO: This should use DST, but for some there are redundant MOVs 814define amdgpu_kernel void @constant_zextload_v2i8_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 815; GFX6-NOHSA-LABEL: constant_zextload_v2i8_to_v2i32: 816; GFX6-NOHSA: ; %bb.0: 817; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 818; GFX6-NOHSA-NEXT: s_mov_b32 s7, 0xf000 819; GFX6-NOHSA-NEXT: s_mov_b32 s6, -1 820; GFX6-NOHSA-NEXT: s_mov_b32 s10, s6 821; GFX6-NOHSA-NEXT: s_mov_b32 s11, s7 822; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 823; GFX6-NOHSA-NEXT: s_mov_b32 s8, s2 824; GFX6-NOHSA-NEXT: s_mov_b32 s9, s3 825; GFX6-NOHSA-NEXT: buffer_load_ushort v0, off, s[8:11], 0 826; GFX6-NOHSA-NEXT: s_mov_b32 s4, s0 827; GFX6-NOHSA-NEXT: s_mov_b32 s5, s1 828; GFX6-NOHSA-NEXT: s_waitcnt vmcnt(0) 829; GFX6-NOHSA-NEXT: v_lshrrev_b32_e32 v1, 8, v0 830; GFX6-NOHSA-NEXT: v_and_b32_e32 v0, 0xff, v0 831; GFX6-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 832; GFX6-NOHSA-NEXT: s_endpgm 833; 834; GFX7-HSA-LABEL: constant_zextload_v2i8_to_v2i32: 835; GFX7-HSA: ; %bb.0: 836; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 837; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 838; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 839; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 840; GFX7-HSA-NEXT: flat_load_ushort v2, v[0:1] 841; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 842; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1 843; GFX7-HSA-NEXT: s_waitcnt vmcnt(0) 844; GFX7-HSA-NEXT: v_lshrrev_b32_e32 v3, 8, v2 845; GFX7-HSA-NEXT: v_and_b32_e32 v2, 0xff, v2 846; GFX7-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 847; GFX7-HSA-NEXT: s_endpgm 848; 849; GFX8-NOHSA-LABEL: constant_zextload_v2i8_to_v2i32: 850; GFX8-NOHSA: ; %bb.0: 851; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 852; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, 8 853; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 854; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 855; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 856; GFX8-NOHSA-NEXT: flat_load_ushort v2, v[0:1] 857; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 858; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 859; GFX8-NOHSA-NEXT: s_waitcnt vmcnt(0) 860; GFX8-NOHSA-NEXT: v_lshrrev_b32_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 861; GFX8-NOHSA-NEXT: v_and_b32_e32 v2, 0xff, v2 862; GFX8-NOHSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 863; GFX8-NOHSA-NEXT: s_endpgm 864; 865; EG-LABEL: constant_zextload_v2i8_to_v2i32: 866; EG: ; %bb.0: 867; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[] 868; EG-NEXT: TEX 0 @6 869; EG-NEXT: ALU 12, @10, KC0[CB0:0-32], KC1[] 870; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1 871; EG-NEXT: CF_END 872; EG-NEXT: PAD 873; EG-NEXT: Fetch clause starting at 6: 874; EG-NEXT: VTX_READ_16 T4.X, T4.X, 0, #1 875; EG-NEXT: ALU clause starting at 8: 876; EG-NEXT: MOV * T0.Y, T2.X, 877; EG-NEXT: MOV * T4.X, KC0[2].Z, 878; EG-NEXT: ALU clause starting at 10: 879; EG-NEXT: AND_INT T0.W, T4.X, literal.x, 880; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y, 881; EG-NEXT: 65535(9.183409e-41), -65536(nan) 882; EG-NEXT: OR_INT * T0.W, PS, PV.W, 883; EG-NEXT: MOV * T2.X, PV.W, 884; EG-NEXT: MOV T0.Y, PV.X, 885; EG-NEXT: MOV * T1.W, literal.x, 886; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 887; EG-NEXT: BFE_UINT * T4.Y, PV.Y, literal.x, PV.W, 888; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 889; EG-NEXT: AND_INT T4.X, T0.W, literal.x, 890; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 891; EG-NEXT: 255(3.573311e-43), 2(2.802597e-45) 892; 893; GFX12-LABEL: constant_zextload_v2i8_to_v2i32: 894; GFX12: ; %bb.0: 895; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 896; GFX12-NEXT: v_mov_b32_e32 v2, 0 897; GFX12-NEXT: s_wait_kmcnt 0x0 898; GFX12-NEXT: global_load_u16 v0, v2, s[2:3] 899; GFX12-NEXT: s_wait_loadcnt 0x0 900; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v0 901; GFX12-NEXT: v_and_b32_e32 v0, 0xff, v0 902; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 903; GFX12-NEXT: v_lshrrev_b32_e32 v1, 8, v1 904; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] 905; GFX12-NEXT: s_endpgm 906 %load = load <2 x i8>, ptr addrspace(4) %in 907 %ext = zext <2 x i8> %load to <2 x i32> 908 store <2 x i32> %ext, ptr addrspace(1) %out 909 ret void 910} 911 912; TODO: These should use DST, but for some there are redundant MOVs 913define amdgpu_kernel void @constant_sextload_v2i8_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 914; GFX6-NOHSA-LABEL: constant_sextload_v2i8_to_v2i32: 915; GFX6-NOHSA: ; %bb.0: 916; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 917; GFX6-NOHSA-NEXT: s_mov_b32 s7, 0xf000 918; GFX6-NOHSA-NEXT: s_mov_b32 s6, -1 919; GFX6-NOHSA-NEXT: s_mov_b32 s10, s6 920; GFX6-NOHSA-NEXT: s_mov_b32 s11, s7 921; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 922; GFX6-NOHSA-NEXT: s_mov_b32 s8, s2 923; GFX6-NOHSA-NEXT: s_mov_b32 s9, s3 924; GFX6-NOHSA-NEXT: buffer_load_ushort v0, off, s[8:11], 0 925; GFX6-NOHSA-NEXT: s_mov_b32 s4, s0 926; GFX6-NOHSA-NEXT: s_mov_b32 s5, s1 927; GFX6-NOHSA-NEXT: s_waitcnt vmcnt(0) 928; GFX6-NOHSA-NEXT: v_bfe_i32 v1, v0, 8, 8 929; GFX6-NOHSA-NEXT: v_bfe_i32 v0, v0, 0, 8 930; GFX6-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 931; GFX6-NOHSA-NEXT: s_endpgm 932; 933; GFX7-HSA-LABEL: constant_sextload_v2i8_to_v2i32: 934; GFX7-HSA: ; %bb.0: 935; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 936; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 937; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 938; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 939; GFX7-HSA-NEXT: flat_load_ushort v2, v[0:1] 940; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 941; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1 942; GFX7-HSA-NEXT: s_waitcnt vmcnt(0) 943; GFX7-HSA-NEXT: v_bfe_i32 v3, v2, 8, 8 944; GFX7-HSA-NEXT: v_bfe_i32 v2, v2, 0, 8 945; GFX7-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 946; GFX7-HSA-NEXT: s_endpgm 947; 948; GFX8-NOHSA-LABEL: constant_sextload_v2i8_to_v2i32: 949; GFX8-NOHSA: ; %bb.0: 950; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 951; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 952; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 953; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 954; GFX8-NOHSA-NEXT: flat_load_ushort v2, v[0:1] 955; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 956; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 957; GFX8-NOHSA-NEXT: s_waitcnt vmcnt(0) 958; GFX8-NOHSA-NEXT: v_bfe_i32 v3, v2, 8, 8 959; GFX8-NOHSA-NEXT: v_bfe_i32 v2, v2, 0, 8 960; GFX8-NOHSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 961; GFX8-NOHSA-NEXT: s_endpgm 962; 963; EG-LABEL: constant_sextload_v2i8_to_v2i32: 964; EG: ; %bb.0: 965; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[] 966; EG-NEXT: TEX 0 @6 967; EG-NEXT: ALU 11, @10, KC0[CB0:0-32], KC1[] 968; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1 969; EG-NEXT: CF_END 970; EG-NEXT: PAD 971; EG-NEXT: Fetch clause starting at 6: 972; EG-NEXT: VTX_READ_16 T4.X, T4.X, 0, #1 973; EG-NEXT: ALU clause starting at 8: 974; EG-NEXT: MOV * T0.Y, T2.X, 975; EG-NEXT: MOV * T4.X, KC0[2].Z, 976; EG-NEXT: ALU clause starting at 10: 977; EG-NEXT: AND_INT T0.W, T4.X, literal.x, 978; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y, 979; EG-NEXT: 65535(9.183409e-41), -65536(nan) 980; EG-NEXT: OR_INT * T0.W, PS, PV.W, 981; EG-NEXT: MOV * T2.X, PV.W, 982; EG-NEXT: MOV * T0.Y, PV.X, 983; EG-NEXT: BFE_INT T4.X, T0.W, 0.0, literal.x, 984; EG-NEXT: LSHR T0.W, PV.Y, literal.x, 985; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 986; EG-NEXT: 8(1.121039e-44), 2(2.802597e-45) 987; EG-NEXT: BFE_INT * T4.Y, PV.W, 0.0, literal.x, 988; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 989; 990; GFX12-LABEL: constant_sextload_v2i8_to_v2i32: 991; GFX12: ; %bb.0: 992; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 993; GFX12-NEXT: s_wait_kmcnt 0x0 994; GFX12-NEXT: s_load_u16 s2, s[2:3], 0x0 995; GFX12-NEXT: s_wait_kmcnt 0x0 996; GFX12-NEXT: s_sext_i32_i8 s3, s2 997; GFX12-NEXT: s_bfe_i32 s2, s2, 0x80008 998; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 999; GFX12-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2 1000; GFX12-NEXT: v_mov_b32_e32 v0, s3 1001; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1002; GFX12-NEXT: s_endpgm 1003 %load = load <2 x i8>, ptr addrspace(4) %in 1004 %ext = sext <2 x i8> %load to <2 x i32> 1005 store <2 x i32> %ext, ptr addrspace(1) %out 1006 ret void 1007} 1008 1009; TODO: These should use DST, but for some there are redundant MOVs 1010define amdgpu_kernel void @constant_zextload_v3i8_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 1011; GFX6-NOHSA-LABEL: constant_zextload_v3i8_to_v3i32: 1012; GFX6-NOHSA: ; %bb.0: ; %entry 1013; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1014; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1015; GFX6-NOHSA-NEXT: s_load_dword s4, s[2:3], 0x0 1016; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 1017; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 1018; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1019; GFX6-NOHSA-NEXT: s_bfe_u32 s5, s4, 0x80008 1020; GFX6-NOHSA-NEXT: s_bfe_u32 s6, s4, 0x80010 1021; GFX6-NOHSA-NEXT: s_and_b32 s4, s4, 0xff 1022; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s6 1023; GFX6-NOHSA-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 1024; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 1025; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 1026; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s5 1027; GFX6-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1028; GFX6-NOHSA-NEXT: s_endpgm 1029; 1030; GFX7-HSA-LABEL: constant_zextload_v3i8_to_v3i32: 1031; GFX7-HSA: ; %bb.0: ; %entry 1032; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1033; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 1034; GFX7-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 1035; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s0 1036; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s1 1037; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 1038; GFX7-HSA-NEXT: s_bfe_u32 s0, s2, 0x80008 1039; GFX7-HSA-NEXT: s_and_b32 s1, s2, 0xff 1040; GFX7-HSA-NEXT: s_bfe_u32 s2, s2, 0x80010 1041; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s1 1042; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s0 1043; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s2 1044; GFX7-HSA-NEXT: flat_store_dwordx3 v[3:4], v[0:2] 1045; GFX7-HSA-NEXT: s_endpgm 1046; 1047; GFX8-NOHSA-LABEL: constant_zextload_v3i8_to_v3i32: 1048; GFX8-NOHSA: ; %bb.0: ; %entry 1049; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1050; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1051; GFX8-NOHSA-NEXT: s_load_dword s2, s[2:3], 0x0 1052; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s0 1053; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s1 1054; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1055; GFX8-NOHSA-NEXT: s_bfe_u32 s0, s2, 0x80008 1056; GFX8-NOHSA-NEXT: s_and_b32 s1, s2, 0xff 1057; GFX8-NOHSA-NEXT: s_bfe_u32 s2, s2, 0x80010 1058; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s1 1059; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s0 1060; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s2 1061; GFX8-NOHSA-NEXT: flat_store_dwordx3 v[3:4], v[0:2] 1062; GFX8-NOHSA-NEXT: s_endpgm 1063; 1064; EG-LABEL: constant_zextload_v3i8_to_v3i32: 1065; EG: ; %bb.0: ; %entry 1066; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1067; EG-NEXT: TEX 0 @6 1068; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 1069; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.X, T7.X, 0 1070; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XY, T6.X, 1 1071; EG-NEXT: CF_END 1072; EG-NEXT: Fetch clause starting at 6: 1073; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 1074; EG-NEXT: ALU clause starting at 8: 1075; EG-NEXT: MOV * T4.X, KC0[2].Z, 1076; EG-NEXT: ALU clause starting at 9: 1077; EG-NEXT: MOV * T0.W, literal.x, 1078; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 1079; EG-NEXT: BFE_UINT * T5.Y, T4.X, literal.x, PV.W, 1080; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 1081; EG-NEXT: AND_INT T5.X, T4.X, literal.x, 1082; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.y, 1083; EG-NEXT: 255(3.573311e-43), 2(2.802597e-45) 1084; EG-NEXT: BFE_UINT T4.X, T4.X, literal.x, T0.W, 1085; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1086; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) 1087; EG-NEXT: LSHR * T7.X, PV.W, literal.x, 1088; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1089; 1090; GFX12-LABEL: constant_zextload_v3i8_to_v3i32: 1091; GFX12: ; %bb.0: ; %entry 1092; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1093; GFX12-NEXT: s_wait_kmcnt 0x0 1094; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0 1095; GFX12-NEXT: s_wait_kmcnt 0x0 1096; GFX12-NEXT: s_bfe_u32 s3, s2, 0x80008 1097; GFX12-NEXT: s_and_b32 s4, s2, 0xff 1098; GFX12-NEXT: s_bfe_u32 s2, s2, 0x80010 1099; GFX12-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s4 1100; GFX12-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mov_b32 v2, s2 1101; GFX12-NEXT: global_store_b96 v3, v[0:2], s[0:1] 1102; GFX12-NEXT: s_endpgm 1103entry: 1104 %ld = load <3 x i8>, ptr addrspace(4) %in 1105 %ext = zext <3 x i8> %ld to <3 x i32> 1106 store <3 x i32> %ext, ptr addrspace(1) %out 1107 ret void 1108} 1109 1110; TODO: These should use DST, but for some there are redundant MOVs 1111define amdgpu_kernel void @constant_sextload_v3i8_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 1112; GFX6-NOHSA-LABEL: constant_sextload_v3i8_to_v3i32: 1113; GFX6-NOHSA: ; %bb.0: ; %entry 1114; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1115; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1116; GFX6-NOHSA-NEXT: s_load_dword s4, s[2:3], 0x0 1117; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 1118; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 1119; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1120; GFX6-NOHSA-NEXT: s_bfe_i32 s5, s4, 0x80008 1121; GFX6-NOHSA-NEXT: s_bfe_i32 s6, s4, 0x80010 1122; GFX6-NOHSA-NEXT: s_sext_i32_i8 s4, s4 1123; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s6 1124; GFX6-NOHSA-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 1125; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 1126; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 1127; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s5 1128; GFX6-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1129; GFX6-NOHSA-NEXT: s_endpgm 1130; 1131; GFX7-HSA-LABEL: constant_sextload_v3i8_to_v3i32: 1132; GFX7-HSA: ; %bb.0: ; %entry 1133; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1134; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 1135; GFX7-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 1136; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s0 1137; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s1 1138; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 1139; GFX7-HSA-NEXT: s_bfe_i32 s0, s2, 0x80010 1140; GFX7-HSA-NEXT: s_bfe_i32 s1, s2, 0x80008 1141; GFX7-HSA-NEXT: s_sext_i32_i8 s2, s2 1142; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 1143; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1 1144; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s0 1145; GFX7-HSA-NEXT: flat_store_dwordx3 v[3:4], v[0:2] 1146; GFX7-HSA-NEXT: s_endpgm 1147; 1148; GFX8-NOHSA-LABEL: constant_sextload_v3i8_to_v3i32: 1149; GFX8-NOHSA: ; %bb.0: ; %entry 1150; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1151; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1152; GFX8-NOHSA-NEXT: s_load_dword s2, s[2:3], 0x0 1153; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s0 1154; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s1 1155; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1156; GFX8-NOHSA-NEXT: s_bfe_i32 s0, s2, 0x80010 1157; GFX8-NOHSA-NEXT: s_bfe_i32 s1, s2, 0x80008 1158; GFX8-NOHSA-NEXT: s_sext_i32_i8 s2, s2 1159; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 1160; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 1161; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s0 1162; GFX8-NOHSA-NEXT: flat_store_dwordx3 v[3:4], v[0:2] 1163; GFX8-NOHSA-NEXT: s_endpgm 1164; 1165; EG-LABEL: constant_sextload_v3i8_to_v3i32: 1166; EG: ; %bb.0: ; %entry 1167; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1168; EG-NEXT: TEX 0 @6 1169; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 1170; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T4.X, 0 1171; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XY, T5.X, 1 1172; EG-NEXT: CF_END 1173; EG-NEXT: Fetch clause starting at 6: 1174; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 1175; EG-NEXT: ALU clause starting at 8: 1176; EG-NEXT: MOV * T4.X, KC0[2].Z, 1177; EG-NEXT: ALU clause starting at 9: 1178; EG-NEXT: LSHR T5.X, KC0[2].Y, literal.x, 1179; EG-NEXT: LSHR * T0.W, T4.X, literal.y, 1180; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1181; EG-NEXT: BFE_INT * T6.X, PV.W, 0.0, literal.x, 1182; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 1183; EG-NEXT: BFE_INT T7.X, T4.X, 0.0, literal.x, 1184; EG-NEXT: LSHR T0.W, T4.X, literal.x, 1185; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, 1186; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 1187; EG-NEXT: LSHR T4.X, PS, literal.x, 1188; EG-NEXT: BFE_INT * T7.Y, PV.W, 0.0, literal.y, 1189; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44) 1190; 1191; GFX12-LABEL: constant_sextload_v3i8_to_v3i32: 1192; GFX12: ; %bb.0: ; %entry 1193; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1194; GFX12-NEXT: s_wait_kmcnt 0x0 1195; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0 1196; GFX12-NEXT: s_wait_kmcnt 0x0 1197; GFX12-NEXT: s_bfe_i32 s3, s2, 0x80010 1198; GFX12-NEXT: s_sext_i32_i8 s4, s2 1199; GFX12-NEXT: s_bfe_i32 s2, s2, 0x80008 1200; GFX12-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s4 1201; GFX12-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s3 1202; GFX12-NEXT: global_store_b96 v3, v[0:2], s[0:1] 1203; GFX12-NEXT: s_endpgm 1204entry: 1205 %ld = load <3 x i8>, ptr addrspace(4) %in 1206 %ext = sext <3 x i8> %ld to <3 x i32> 1207 store <3 x i32> %ext, ptr addrspace(1) %out 1208 ret void 1209} 1210 1211; TODO: These should use DST, but for some there are redundant MOVs 1212define amdgpu_kernel void @constant_zextload_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 1213; GFX6-NOHSA-LABEL: constant_zextload_v4i8_to_v4i32: 1214; GFX6-NOHSA: ; %bb.0: 1215; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1216; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1217; GFX6-NOHSA-NEXT: s_load_dword s2, s[2:3], 0x0 1218; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 1219; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1220; GFX6-NOHSA-NEXT: s_lshr_b32 s4, s2, 24 1221; GFX6-NOHSA-NEXT: s_bfe_u32 s5, s2, 0x80008 1222; GFX6-NOHSA-NEXT: s_and_b32 s6, s2, 0xff 1223; GFX6-NOHSA-NEXT: s_bfe_u32 s7, s2, 0x80010 1224; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 1225; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s6 1226; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s5 1227; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s7 1228; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s4 1229; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1230; GFX6-NOHSA-NEXT: s_endpgm 1231; 1232; GFX7-HSA-LABEL: constant_zextload_v4i8_to_v4i32: 1233; GFX7-HSA: ; %bb.0: 1234; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1235; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 1236; GFX7-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 1237; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 1238; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 1239; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 1240; GFX7-HSA-NEXT: s_lshr_b32 s0, s2, 24 1241; GFX7-HSA-NEXT: s_bfe_u32 s1, s2, 0x80008 1242; GFX7-HSA-NEXT: s_and_b32 s3, s2, 0xff 1243; GFX7-HSA-NEXT: s_bfe_u32 s2, s2, 0x80010 1244; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s3 1245; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1 1246; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s2 1247; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s0 1248; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1249; GFX7-HSA-NEXT: s_endpgm 1250; 1251; GFX8-NOHSA-LABEL: constant_zextload_v4i8_to_v4i32: 1252; GFX8-NOHSA: ; %bb.0: 1253; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1254; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1255; GFX8-NOHSA-NEXT: s_load_dword s2, s[2:3], 0x0 1256; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 1257; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 1258; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1259; GFX8-NOHSA-NEXT: s_lshr_b32 s0, s2, 24 1260; GFX8-NOHSA-NEXT: s_bfe_u32 s1, s2, 0x80008 1261; GFX8-NOHSA-NEXT: s_and_b32 s3, s2, 0xff 1262; GFX8-NOHSA-NEXT: s_bfe_u32 s2, s2, 0x80010 1263; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s3 1264; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 1265; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s2 1266; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s0 1267; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1268; GFX8-NOHSA-NEXT: s_endpgm 1269; 1270; EG-LABEL: constant_zextload_v4i8_to_v4i32: 1271; EG: ; %bb.0: 1272; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1273; EG-NEXT: TEX 0 @6 1274; EG-NEXT: ALU 9, @9, KC0[CB0:0-32], KC1[] 1275; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1 1276; EG-NEXT: CF_END 1277; EG-NEXT: PAD 1278; EG-NEXT: Fetch clause starting at 6: 1279; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 1280; EG-NEXT: ALU clause starting at 8: 1281; EG-NEXT: MOV * T4.X, KC0[2].Z, 1282; EG-NEXT: ALU clause starting at 9: 1283; EG-NEXT: MOV * T0.W, literal.x, 1284; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 1285; EG-NEXT: BFE_UINT * T4.Z, T4.X, literal.x, PV.W, 1286; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1287; EG-NEXT: BFE_UINT T4.Y, T4.X, literal.x, T0.W, 1288; EG-NEXT: LSHR * T4.W, T4.X, literal.y, 1289; EG-NEXT: 8(1.121039e-44), 24(3.363116e-44) 1290; EG-NEXT: AND_INT T4.X, T4.X, literal.x, 1291; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 1292; EG-NEXT: 255(3.573311e-43), 2(2.802597e-45) 1293; 1294; GFX12-LABEL: constant_zextload_v4i8_to_v4i32: 1295; GFX12: ; %bb.0: 1296; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1297; GFX12-NEXT: s_wait_kmcnt 0x0 1298; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0 1299; GFX12-NEXT: s_wait_kmcnt 0x0 1300; GFX12-NEXT: s_lshr_b32 s3, s2, 24 1301; GFX12-NEXT: s_bfe_u32 s4, s2, 0x80008 1302; GFX12-NEXT: s_and_b32 s5, s2, 0xff 1303; GFX12-NEXT: s_bfe_u32 s2, s2, 0x80010 1304; GFX12-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s4 1305; GFX12-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v3, s3 1306; GFX12-NEXT: v_mov_b32_e32 v2, s2 1307; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1] 1308; GFX12-NEXT: s_endpgm 1309 %load = load <4 x i8>, ptr addrspace(4) %in 1310 %ext = zext <4 x i8> %load to <4 x i32> 1311 store <4 x i32> %ext, ptr addrspace(1) %out 1312 ret void 1313} 1314 1315; TODO: These should use DST, but for some there are redundant MOVs 1316define amdgpu_kernel void @constant_sextload_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 1317; GFX6-NOHSA-LABEL: constant_sextload_v4i8_to_v4i32: 1318; GFX6-NOHSA: ; %bb.0: 1319; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1320; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1321; GFX6-NOHSA-NEXT: s_load_dword s2, s[2:3], 0x0 1322; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 1323; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1324; GFX6-NOHSA-NEXT: s_ashr_i32 s4, s2, 24 1325; GFX6-NOHSA-NEXT: s_bfe_i32 s5, s2, 0x80010 1326; GFX6-NOHSA-NEXT: s_bfe_i32 s6, s2, 0x80008 1327; GFX6-NOHSA-NEXT: s_sext_i32_i8 s7, s2 1328; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 1329; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s7 1330; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s6 1331; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s5 1332; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s4 1333; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1334; GFX6-NOHSA-NEXT: s_endpgm 1335; 1336; GFX7-HSA-LABEL: constant_sextload_v4i8_to_v4i32: 1337; GFX7-HSA: ; %bb.0: 1338; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1339; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 1340; GFX7-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 1341; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 1342; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 1343; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 1344; GFX7-HSA-NEXT: s_ashr_i32 s0, s2, 24 1345; GFX7-HSA-NEXT: s_bfe_i32 s1, s2, 0x80010 1346; GFX7-HSA-NEXT: s_bfe_i32 s3, s2, 0x80008 1347; GFX7-HSA-NEXT: s_sext_i32_i8 s2, s2 1348; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 1349; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 1350; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s1 1351; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s0 1352; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1353; GFX7-HSA-NEXT: s_endpgm 1354; 1355; GFX8-NOHSA-LABEL: constant_sextload_v4i8_to_v4i32: 1356; GFX8-NOHSA: ; %bb.0: 1357; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1358; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1359; GFX8-NOHSA-NEXT: s_load_dword s2, s[2:3], 0x0 1360; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 1361; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 1362; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1363; GFX8-NOHSA-NEXT: s_ashr_i32 s0, s2, 24 1364; GFX8-NOHSA-NEXT: s_bfe_i32 s1, s2, 0x80010 1365; GFX8-NOHSA-NEXT: s_bfe_i32 s3, s2, 0x80008 1366; GFX8-NOHSA-NEXT: s_sext_i32_i8 s2, s2 1367; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 1368; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 1369; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s1 1370; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s0 1371; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1372; GFX8-NOHSA-NEXT: s_endpgm 1373; 1374; EG-LABEL: constant_sextload_v4i8_to_v4i32: 1375; EG: ; %bb.0: 1376; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1377; EG-NEXT: TEX 0 @6 1378; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 1379; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T4.X, 1 1380; EG-NEXT: CF_END 1381; EG-NEXT: PAD 1382; EG-NEXT: Fetch clause starting at 6: 1383; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 1384; EG-NEXT: ALU clause starting at 8: 1385; EG-NEXT: MOV * T4.X, KC0[2].Z, 1386; EG-NEXT: ALU clause starting at 9: 1387; EG-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x, 1388; EG-NEXT: LSHR * T0.W, T4.X, literal.y, 1389; EG-NEXT: 8(1.121039e-44), 24(3.363116e-44) 1390; EG-NEXT: BFE_INT T5.W, PV.W, 0.0, literal.x, 1391; EG-NEXT: LSHR * T0.W, T4.X, literal.y, 1392; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44) 1393; EG-NEXT: BFE_INT T5.Z, PS, 0.0, literal.x, 1394; EG-NEXT: LSHR * T0.W, T4.X, literal.x, 1395; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 1396; EG-NEXT: LSHR T4.X, KC0[2].Y, literal.x, 1397; EG-NEXT: BFE_INT * T5.Y, PV.W, 0.0, literal.y, 1398; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44) 1399; 1400; GFX12-LABEL: constant_sextload_v4i8_to_v4i32: 1401; GFX12: ; %bb.0: 1402; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1403; GFX12-NEXT: s_wait_kmcnt 0x0 1404; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0 1405; GFX12-NEXT: s_wait_kmcnt 0x0 1406; GFX12-NEXT: s_ashr_i32 s3, s2, 24 1407; GFX12-NEXT: s_bfe_i32 s4, s2, 0x80010 1408; GFX12-NEXT: s_sext_i32_i8 s5, s2 1409; GFX12-NEXT: s_bfe_i32 s2, s2, 0x80008 1410; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1411; GFX12-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s2 1412; GFX12-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v3, s3 1413; GFX12-NEXT: v_mov_b32_e32 v2, s4 1414; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1] 1415; GFX12-NEXT: s_endpgm 1416 %load = load <4 x i8>, ptr addrspace(4) %in 1417 %ext = sext <4 x i8> %load to <4 x i32> 1418 store <4 x i32> %ext, ptr addrspace(1) %out 1419 ret void 1420} 1421 1422; TODO: These should use DST, but for some there are redundant MOVs 1423define amdgpu_kernel void @constant_zextload_v8i8_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 1424; GFX6-NOHSA-LABEL: constant_zextload_v8i8_to_v8i32: 1425; GFX6-NOHSA: ; %bb.0: 1426; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1427; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1428; GFX6-NOHSA-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1429; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 1430; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 1431; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1432; GFX6-NOHSA-NEXT: s_lshr_b32 s6, s4, 24 1433; GFX6-NOHSA-NEXT: s_bfe_u32 s7, s4, 0x80008 1434; GFX6-NOHSA-NEXT: s_lshr_b32 s8, s5, 24 1435; GFX6-NOHSA-NEXT: s_bfe_u32 s9, s5, 0x80008 1436; GFX6-NOHSA-NEXT: s_and_b32 s10, s4, 0xff 1437; GFX6-NOHSA-NEXT: s_and_b32 s11, s5, 0xff 1438; GFX6-NOHSA-NEXT: s_bfe_u32 s5, s5, 0x80010 1439; GFX6-NOHSA-NEXT: s_bfe_u32 s4, s4, 0x80010 1440; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s11 1441; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s9 1442; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s5 1443; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s8 1444; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1445; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 1446; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s10 1447; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s7 1448; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s4 1449; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s6 1450; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1451; GFX6-NOHSA-NEXT: s_endpgm 1452; 1453; GFX7-HSA-LABEL: constant_zextload_v8i8_to_v8i32: 1454; GFX7-HSA: ; %bb.0: 1455; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1456; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 1457; GFX7-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1458; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 1459; GFX7-HSA-NEXT: s_lshr_b32 s4, s2, 24 1460; GFX7-HSA-NEXT: s_bfe_u32 s5, s2, 0x80008 1461; GFX7-HSA-NEXT: s_lshr_b32 s6, s3, 24 1462; GFX7-HSA-NEXT: s_bfe_u32 s7, s3, 0x80008 1463; GFX7-HSA-NEXT: s_and_b32 s8, s2, 0xff 1464; GFX7-HSA-NEXT: s_bfe_u32 s9, s2, 0x80010 1465; GFX7-HSA-NEXT: s_and_b32 s2, s3, 0xff 1466; GFX7-HSA-NEXT: s_bfe_u32 s3, s3, 0x80010 1467; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 1468; GFX7-HSA-NEXT: s_add_u32 s2, s0, 16 1469; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s3 1470; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 1471; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 1472; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s7 1473; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s6 1474; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 1475; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1476; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 1477; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s8 1478; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s5 1479; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s9 1480; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s4 1481; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 1482; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1483; GFX7-HSA-NEXT: s_endpgm 1484; 1485; GFX8-NOHSA-LABEL: constant_zextload_v8i8_to_v8i32: 1486; GFX8-NOHSA: ; %bb.0: 1487; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1488; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1489; GFX8-NOHSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1490; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1491; GFX8-NOHSA-NEXT: s_lshr_b32 s4, s2, 24 1492; GFX8-NOHSA-NEXT: s_bfe_u32 s5, s2, 0x80008 1493; GFX8-NOHSA-NEXT: s_lshr_b32 s6, s3, 24 1494; GFX8-NOHSA-NEXT: s_bfe_u32 s7, s3, 0x80008 1495; GFX8-NOHSA-NEXT: s_and_b32 s8, s2, 0xff 1496; GFX8-NOHSA-NEXT: s_bfe_u32 s9, s2, 0x80010 1497; GFX8-NOHSA-NEXT: s_and_b32 s2, s3, 0xff 1498; GFX8-NOHSA-NEXT: s_bfe_u32 s3, s3, 0x80010 1499; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 1500; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 16 1501; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s3 1502; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 1503; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 1504; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s7 1505; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s6 1506; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 1507; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1508; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 1509; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s8 1510; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s5 1511; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s9 1512; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s4 1513; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 1514; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1515; GFX8-NOHSA-NEXT: s_endpgm 1516; 1517; EG-LABEL: constant_zextload_v8i8_to_v8i32: 1518; EG: ; %bb.0: 1519; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1520; EG-NEXT: TEX 0 @6 1521; EG-NEXT: ALU 20, @9, KC0[CB0:0-32], KC1[] 1522; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 0 1523; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1 1524; EG-NEXT: CF_END 1525; EG-NEXT: Fetch clause starting at 6: 1526; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1527; EG-NEXT: ALU clause starting at 8: 1528; EG-NEXT: MOV * T5.X, KC0[2].Z, 1529; EG-NEXT: ALU clause starting at 9: 1530; EG-NEXT: MOV * T0.W, literal.x, 1531; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 1532; EG-NEXT: BFE_UINT * T6.Z, T5.X, literal.x, PV.W, 1533; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1534; EG-NEXT: BFE_UINT T6.Y, T5.X, literal.x, T0.W, 1535; EG-NEXT: BFE_UINT T7.Z, T5.Y, literal.y, T0.W, 1536; EG-NEXT: LSHR * T6.W, T5.X, literal.z, 1537; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44) 1538; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 1539; EG-NEXT: AND_INT T6.X, T5.X, literal.x, 1540; EG-NEXT: BFE_UINT T7.Y, T5.Y, literal.y, T0.W, 1541; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.z, 1542; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44) 1543; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1544; EG-NEXT: LSHR * T7.W, T5.Y, literal.x, 1545; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 1546; EG-NEXT: AND_INT T7.X, T5.Y, literal.x, 1547; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1548; EG-NEXT: 255(3.573311e-43), 16(2.242078e-44) 1549; EG-NEXT: LSHR * T8.X, PV.W, literal.x, 1550; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1551; 1552; GFX12-LABEL: constant_zextload_v8i8_to_v8i32: 1553; GFX12: ; %bb.0: 1554; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1555; GFX12-NEXT: s_wait_kmcnt 0x0 1556; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 1557; GFX12-NEXT: s_wait_kmcnt 0x0 1558; GFX12-NEXT: s_lshr_b32 s6, s3, 24 1559; GFX12-NEXT: s_bfe_u32 s7, s3, 0x80008 1560; GFX12-NEXT: s_and_b32 s9, s3, 0xff 1561; GFX12-NEXT: s_bfe_u32 s3, s3, 0x80010 1562; GFX12-NEXT: s_lshr_b32 s4, s2, 24 1563; GFX12-NEXT: s_bfe_u32 s5, s2, 0x80008 1564; GFX12-NEXT: s_and_b32 s8, s2, 0xff 1565; GFX12-NEXT: s_bfe_u32 s2, s2, 0x80010 1566; GFX12-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s7 1567; GFX12-NEXT: v_dual_mov_b32 v0, s9 :: v_dual_mov_b32 v3, s6 1568; GFX12-NEXT: v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v5, s5 1569; GFX12-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v7, s4 1570; GFX12-NEXT: v_mov_b32_e32 v6, s2 1571; GFX12-NEXT: s_clause 0x1 1572; GFX12-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:16 1573; GFX12-NEXT: global_store_b128 v8, v[4:7], s[0:1] 1574; GFX12-NEXT: s_endpgm 1575 %load = load <8 x i8>, ptr addrspace(4) %in 1576 %ext = zext <8 x i8> %load to <8 x i32> 1577 store <8 x i32> %ext, ptr addrspace(1) %out 1578 ret void 1579} 1580 1581; TODO: These should use DST, but for some there are redundant MOVs 1582define amdgpu_kernel void @constant_sextload_v8i8_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 1583; GFX6-NOHSA-LABEL: constant_sextload_v8i8_to_v8i32: 1584; GFX6-NOHSA: ; %bb.0: 1585; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1586; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1587; GFX6-NOHSA-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1588; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 1589; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 1590; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1591; GFX6-NOHSA-NEXT: s_ashr_i32 s6, s4, 24 1592; GFX6-NOHSA-NEXT: s_bfe_i32 s7, s4, 0x80010 1593; GFX6-NOHSA-NEXT: s_bfe_i32 s8, s4, 0x80008 1594; GFX6-NOHSA-NEXT: s_ashr_i32 s9, s5, 24 1595; GFX6-NOHSA-NEXT: s_bfe_i32 s10, s5, 0x80010 1596; GFX6-NOHSA-NEXT: s_bfe_i32 s11, s5, 0x80008 1597; GFX6-NOHSA-NEXT: s_sext_i32_i8 s5, s5 1598; GFX6-NOHSA-NEXT: s_sext_i32_i8 s4, s4 1599; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s5 1600; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s11 1601; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s10 1602; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s9 1603; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1604; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 1605; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 1606; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s8 1607; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s7 1608; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s6 1609; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1610; GFX6-NOHSA-NEXT: s_endpgm 1611; 1612; GFX7-HSA-LABEL: constant_sextload_v8i8_to_v8i32: 1613; GFX7-HSA: ; %bb.0: 1614; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1615; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 1616; GFX7-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1617; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 1618; GFX7-HSA-NEXT: s_ashr_i32 s4, s2, 24 1619; GFX7-HSA-NEXT: s_bfe_i32 s5, s2, 0x80010 1620; GFX7-HSA-NEXT: s_bfe_i32 s6, s2, 0x80008 1621; GFX7-HSA-NEXT: s_sext_i32_i8 s7, s2 1622; GFX7-HSA-NEXT: s_ashr_i32 s2, s3, 24 1623; GFX7-HSA-NEXT: s_bfe_i32 s8, s3, 0x80010 1624; GFX7-HSA-NEXT: s_bfe_i32 s9, s3, 0x80008 1625; GFX7-HSA-NEXT: s_sext_i32_i8 s3, s3 1626; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s2 1627; GFX7-HSA-NEXT: s_add_u32 s2, s0, 16 1628; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s3 1629; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 1630; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 1631; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s9 1632; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s8 1633; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 1634; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1635; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 1636; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s7 1637; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s6 1638; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s5 1639; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s4 1640; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 1641; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1642; GFX7-HSA-NEXT: s_endpgm 1643; 1644; GFX8-NOHSA-LABEL: constant_sextload_v8i8_to_v8i32: 1645; GFX8-NOHSA: ; %bb.0: 1646; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1647; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1648; GFX8-NOHSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1649; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1650; GFX8-NOHSA-NEXT: s_ashr_i32 s4, s2, 24 1651; GFX8-NOHSA-NEXT: s_bfe_i32 s5, s2, 0x80010 1652; GFX8-NOHSA-NEXT: s_bfe_i32 s6, s2, 0x80008 1653; GFX8-NOHSA-NEXT: s_sext_i32_i8 s7, s2 1654; GFX8-NOHSA-NEXT: s_ashr_i32 s2, s3, 24 1655; GFX8-NOHSA-NEXT: s_bfe_i32 s8, s3, 0x80010 1656; GFX8-NOHSA-NEXT: s_bfe_i32 s9, s3, 0x80008 1657; GFX8-NOHSA-NEXT: s_sext_i32_i8 s3, s3 1658; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s2 1659; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 16 1660; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s3 1661; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 1662; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 1663; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s9 1664; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s8 1665; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 1666; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1667; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 1668; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s7 1669; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s6 1670; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s5 1671; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s4 1672; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 1673; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1674; GFX8-NOHSA-NEXT: s_endpgm 1675; 1676; EG-LABEL: constant_sextload_v8i8_to_v8i32: 1677; EG: ; %bb.0: 1678; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1679; EG-NEXT: TEX 0 @6 1680; EG-NEXT: ALU 23, @9, KC0[CB0:0-32], KC1[] 1681; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 0 1682; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1 1683; EG-NEXT: CF_END 1684; EG-NEXT: Fetch clause starting at 6: 1685; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1686; EG-NEXT: ALU clause starting at 8: 1687; EG-NEXT: MOV * T5.X, KC0[2].Z, 1688; EG-NEXT: ALU clause starting at 9: 1689; EG-NEXT: BFE_INT T6.X, T5.X, 0.0, literal.x, 1690; EG-NEXT: LSHR * T0.W, T5.X, literal.y, 1691; EG-NEXT: 8(1.121039e-44), 24(3.363116e-44) 1692; EG-NEXT: BFE_INT T7.X, T5.Y, 0.0, literal.x, 1693; EG-NEXT: LSHR T0.Z, T5.Y, literal.y, 1694; EG-NEXT: BFE_INT T6.W, PV.W, 0.0, literal.x, 1695; EG-NEXT: LSHR * T0.W, T5.X, literal.z, 1696; EG-NEXT: 8(1.121039e-44), 24(3.363116e-44) 1697; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1698; EG-NEXT: LSHR T0.Y, T5.Y, literal.x, 1699; EG-NEXT: BFE_INT T6.Z, PS, 0.0, literal.y, 1700; EG-NEXT: BFE_INT T7.W, PV.Z, 0.0, literal.y, 1701; EG-NEXT: LSHR * T0.W, T5.X, literal.y, 1702; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) 1703; EG-NEXT: LSHR T5.X, KC0[2].Y, literal.x, 1704; EG-NEXT: BFE_INT T6.Y, PS, 0.0, literal.y, 1705; EG-NEXT: BFE_INT T7.Z, PV.Y, 0.0, literal.y, 1706; EG-NEXT: LSHR T0.W, T5.Y, literal.y, 1707; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 1708; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44) 1709; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1710; EG-NEXT: LSHR T8.X, PS, literal.x, 1711; EG-NEXT: BFE_INT * T7.Y, PV.W, 0.0, literal.y, 1712; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44) 1713; 1714; GFX12-LABEL: constant_sextload_v8i8_to_v8i32: 1715; GFX12: ; %bb.0: 1716; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1717; GFX12-NEXT: s_wait_kmcnt 0x0 1718; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 1719; GFX12-NEXT: s_wait_kmcnt 0x0 1720; GFX12-NEXT: s_ashr_i32 s7, s3, 24 1721; GFX12-NEXT: s_bfe_i32 s8, s3, 0x80010 1722; GFX12-NEXT: s_sext_i32_i8 s9, s3 1723; GFX12-NEXT: s_bfe_i32 s3, s3, 0x80008 1724; GFX12-NEXT: s_ashr_i32 s4, s2, 24 1725; GFX12-NEXT: s_bfe_i32 s5, s2, 0x80010 1726; GFX12-NEXT: s_bfe_i32 s6, s2, 0x80008 1727; GFX12-NEXT: s_sext_i32_i8 s2, s2 1728; GFX12-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s3 1729; GFX12-NEXT: v_dual_mov_b32 v0, s9 :: v_dual_mov_b32 v3, s7 1730; GFX12-NEXT: v_dual_mov_b32 v2, s8 :: v_dual_mov_b32 v5, s6 1731; GFX12-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v7, s4 1732; GFX12-NEXT: v_mov_b32_e32 v6, s5 1733; GFX12-NEXT: s_clause 0x1 1734; GFX12-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:16 1735; GFX12-NEXT: global_store_b128 v8, v[4:7], s[0:1] 1736; GFX12-NEXT: s_endpgm 1737 %load = load <8 x i8>, ptr addrspace(4) %in 1738 %ext = sext <8 x i8> %load to <8 x i32> 1739 store <8 x i32> %ext, ptr addrspace(1) %out 1740 ret void 1741} 1742 1743; TODO: These should use DST, but for some there are redundant MOVs 1744define amdgpu_kernel void @constant_zextload_v16i8_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 1745; GFX6-NOHSA-LABEL: constant_zextload_v16i8_to_v16i32: 1746; GFX6-NOHSA: ; %bb.0: 1747; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1748; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1749; GFX6-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1750; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 1751; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 1752; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1753; GFX6-NOHSA-NEXT: s_lshr_b32 s8, s4, 24 1754; GFX6-NOHSA-NEXT: s_bfe_u32 s9, s4, 0x80008 1755; GFX6-NOHSA-NEXT: s_lshr_b32 s10, s5, 24 1756; GFX6-NOHSA-NEXT: s_bfe_u32 s11, s5, 0x80008 1757; GFX6-NOHSA-NEXT: s_lshr_b32 s12, s6, 24 1758; GFX6-NOHSA-NEXT: s_bfe_u32 s13, s6, 0x80008 1759; GFX6-NOHSA-NEXT: s_lshr_b32 s14, s7, 24 1760; GFX6-NOHSA-NEXT: s_bfe_u32 s15, s7, 0x80008 1761; GFX6-NOHSA-NEXT: s_and_b32 s16, s4, 0xff 1762; GFX6-NOHSA-NEXT: s_bfe_u32 s4, s4, 0x80010 1763; GFX6-NOHSA-NEXT: s_and_b32 s17, s5, 0xff 1764; GFX6-NOHSA-NEXT: s_bfe_u32 s5, s5, 0x80010 1765; GFX6-NOHSA-NEXT: s_and_b32 s18, s6, 0xff 1766; GFX6-NOHSA-NEXT: s_and_b32 s19, s7, 0xff 1767; GFX6-NOHSA-NEXT: s_bfe_u32 s7, s7, 0x80010 1768; GFX6-NOHSA-NEXT: s_bfe_u32 s6, s6, 0x80010 1769; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s19 1770; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s15 1771; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s7 1772; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s14 1773; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 1774; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 1775; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s18 1776; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s13 1777; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s6 1778; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s12 1779; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 1780; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 1781; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s17 1782; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s11 1783; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s5 1784; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s10 1785; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1786; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 1787; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s16 1788; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s9 1789; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s4 1790; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s8 1791; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1792; GFX6-NOHSA-NEXT: s_endpgm 1793; 1794; GFX7-HSA-LABEL: constant_zextload_v16i8_to_v16i32: 1795; GFX7-HSA: ; %bb.0: 1796; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1797; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 1798; GFX7-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1799; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 1800; GFX7-HSA-NEXT: s_lshr_b32 s8, s4, 24 1801; GFX7-HSA-NEXT: s_bfe_u32 s9, s4, 0x80008 1802; GFX7-HSA-NEXT: s_lshr_b32 s10, s5, 24 1803; GFX7-HSA-NEXT: s_bfe_u32 s11, s5, 0x80008 1804; GFX7-HSA-NEXT: s_lshr_b32 s12, s6, 24 1805; GFX7-HSA-NEXT: s_bfe_u32 s13, s6, 0x80008 1806; GFX7-HSA-NEXT: s_lshr_b32 s2, s7, 24 1807; GFX7-HSA-NEXT: s_bfe_u32 s3, s7, 0x80008 1808; GFX7-HSA-NEXT: s_and_b32 s14, s4, 0xff 1809; GFX7-HSA-NEXT: s_bfe_u32 s4, s4, 0x80010 1810; GFX7-HSA-NEXT: s_and_b32 s15, s5, 0xff 1811; GFX7-HSA-NEXT: s_bfe_u32 s5, s5, 0x80010 1812; GFX7-HSA-NEXT: s_and_b32 s16, s6, 0xff 1813; GFX7-HSA-NEXT: s_bfe_u32 s6, s6, 0x80010 1814; GFX7-HSA-NEXT: s_and_b32 s17, s7, 0xff 1815; GFX7-HSA-NEXT: s_bfe_u32 s7, s7, 0x80010 1816; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s2 1817; GFX7-HSA-NEXT: s_add_u32 s2, s0, 48 1818; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 1819; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 1820; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 1821; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 1822; GFX7-HSA-NEXT: s_add_u32 s2, s0, 32 1823; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s17 1824; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s7 1825; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 1826; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1827; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 1828; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 1829; GFX7-HSA-NEXT: s_add_u32 s2, s0, 16 1830; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s16 1831; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s13 1832; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s6 1833; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s12 1834; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 1835; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1836; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 1837; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s15 1838; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s11 1839; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s5 1840; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s10 1841; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 1842; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1843; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 1844; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s14 1845; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s9 1846; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s4 1847; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s8 1848; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 1849; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1850; GFX7-HSA-NEXT: s_endpgm 1851; 1852; GFX8-NOHSA-LABEL: constant_zextload_v16i8_to_v16i32: 1853; GFX8-NOHSA: ; %bb.0: 1854; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1855; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1856; GFX8-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1857; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 1858; GFX8-NOHSA-NEXT: s_lshr_b32 s8, s4, 24 1859; GFX8-NOHSA-NEXT: s_bfe_u32 s9, s4, 0x80008 1860; GFX8-NOHSA-NEXT: s_lshr_b32 s10, s5, 24 1861; GFX8-NOHSA-NEXT: s_bfe_u32 s11, s5, 0x80008 1862; GFX8-NOHSA-NEXT: s_lshr_b32 s12, s6, 24 1863; GFX8-NOHSA-NEXT: s_bfe_u32 s13, s6, 0x80008 1864; GFX8-NOHSA-NEXT: s_lshr_b32 s2, s7, 24 1865; GFX8-NOHSA-NEXT: s_bfe_u32 s3, s7, 0x80008 1866; GFX8-NOHSA-NEXT: s_and_b32 s14, s4, 0xff 1867; GFX8-NOHSA-NEXT: s_bfe_u32 s4, s4, 0x80010 1868; GFX8-NOHSA-NEXT: s_and_b32 s15, s5, 0xff 1869; GFX8-NOHSA-NEXT: s_bfe_u32 s5, s5, 0x80010 1870; GFX8-NOHSA-NEXT: s_and_b32 s16, s6, 0xff 1871; GFX8-NOHSA-NEXT: s_bfe_u32 s6, s6, 0x80010 1872; GFX8-NOHSA-NEXT: s_and_b32 s17, s7, 0xff 1873; GFX8-NOHSA-NEXT: s_bfe_u32 s7, s7, 0x80010 1874; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s2 1875; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 48 1876; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 1877; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 1878; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 1879; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 1880; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 32 1881; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s17 1882; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s7 1883; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 1884; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1885; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 1886; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 1887; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 16 1888; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s16 1889; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s13 1890; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s6 1891; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s12 1892; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 1893; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1894; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 1895; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s15 1896; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s11 1897; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s5 1898; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s10 1899; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 1900; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1901; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 1902; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s14 1903; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s9 1904; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s4 1905; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s8 1906; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 1907; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1908; GFX8-NOHSA-NEXT: s_endpgm 1909; 1910; EG-LABEL: constant_zextload_v16i8_to_v16i32: 1911; EG: ; %bb.0: 1912; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 1913; EG-NEXT: TEX 0 @8 1914; EG-NEXT: ALU 39, @11, KC0[CB0:0-32], KC1[] 1915; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T14.X, 0 1916; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T13.X, 0 1917; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T11.X, 0 1918; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1 1919; EG-NEXT: CF_END 1920; EG-NEXT: Fetch clause starting at 8: 1921; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 1922; EG-NEXT: ALU clause starting at 10: 1923; EG-NEXT: MOV * T7.X, KC0[2].Z, 1924; EG-NEXT: ALU clause starting at 11: 1925; EG-NEXT: MOV * T0.W, literal.x, 1926; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 1927; EG-NEXT: BFE_UINT * T8.Z, T7.X, literal.x, PV.W, 1928; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1929; EG-NEXT: BFE_UINT T8.Y, T7.X, literal.x, T0.W, 1930; EG-NEXT: BFE_UINT T9.Z, T7.Y, literal.y, T0.W, 1931; EG-NEXT: LSHR * T8.W, T7.X, literal.z, 1932; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44) 1933; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 1934; EG-NEXT: AND_INT T8.X, T7.X, literal.x, 1935; EG-NEXT: BFE_UINT T9.Y, T7.Y, literal.y, T0.W, 1936; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.z, 1937; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44) 1938; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1939; EG-NEXT: BFE_UINT T10.Z, T7.Z, literal.x, T0.W, 1940; EG-NEXT: LSHR * T9.W, T7.Y, literal.y, 1941; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) 1942; EG-NEXT: AND_INT T9.X, T7.Y, literal.x, 1943; EG-NEXT: BFE_UINT T10.Y, T7.Z, literal.y, T0.W, 1944; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 1945; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44) 1946; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1947; EG-NEXT: LSHR T11.X, PV.W, literal.x, 1948; EG-NEXT: BFE_UINT T12.Z, T7.W, literal.y, T0.W, 1949; EG-NEXT: LSHR T10.W, T7.Z, literal.z, 1950; EG-NEXT: AND_INT * T10.X, T7.Z, literal.w, 1951; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1952; EG-NEXT: 24(3.363116e-44), 255(3.573311e-43) 1953; EG-NEXT: BFE_UINT T12.Y, T7.W, literal.x, T0.W, 1954; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1955; EG-NEXT: 8(1.121039e-44), 32(4.484155e-44) 1956; EG-NEXT: LSHR T13.X, PV.W, literal.x, 1957; EG-NEXT: LSHR T12.W, T7.W, literal.y, 1958; EG-NEXT: AND_INT * T12.X, T7.W, literal.z, 1959; EG-NEXT: 2(2.802597e-45), 24(3.363116e-44) 1960; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 1961; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 1962; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 1963; EG-NEXT: LSHR * T14.X, PV.W, literal.x, 1964; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1965; 1966; GFX12-LABEL: constant_zextload_v16i8_to_v16i32: 1967; GFX12: ; %bb.0: 1968; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1969; GFX12-NEXT: s_wait_kmcnt 0x0 1970; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 1971; GFX12-NEXT: s_wait_kmcnt 0x0 1972; GFX12-NEXT: s_lshr_b32 s12, s7, 24 1973; GFX12-NEXT: s_bfe_u32 s13, s7, 0x80008 1974; GFX12-NEXT: s_and_b32 s17, s7, 0xff 1975; GFX12-NEXT: s_bfe_u32 s7, s7, 0x80010 1976; GFX12-NEXT: s_lshr_b32 s10, s6, 24 1977; GFX12-NEXT: s_bfe_u32 s11, s6, 0x80008 1978; GFX12-NEXT: s_and_b32 s16, s6, 0xff 1979; GFX12-NEXT: s_bfe_u32 s6, s6, 0x80010 1980; GFX12-NEXT: v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v1, s13 1981; GFX12-NEXT: s_lshr_b32 s8, s5, 24 1982; GFX12-NEXT: s_bfe_u32 s9, s5, 0x80008 1983; GFX12-NEXT: s_and_b32 s15, s5, 0xff 1984; GFX12-NEXT: s_bfe_u32 s5, s5, 0x80010 1985; GFX12-NEXT: v_dual_mov_b32 v0, s17 :: v_dual_mov_b32 v3, s12 1986; GFX12-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v5, s11 1987; GFX12-NEXT: s_lshr_b32 s2, s4, 24 1988; GFX12-NEXT: s_bfe_u32 s3, s4, 0x80008 1989; GFX12-NEXT: s_and_b32 s14, s4, 0xff 1990; GFX12-NEXT: s_bfe_u32 s4, s4, 0x80010 1991; GFX12-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v7, s10 1992; GFX12-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v9, s9 1993; GFX12-NEXT: v_dual_mov_b32 v8, s15 :: v_dual_mov_b32 v11, s8 1994; GFX12-NEXT: v_dual_mov_b32 v10, s5 :: v_dual_mov_b32 v13, s3 1995; GFX12-NEXT: v_dual_mov_b32 v12, s14 :: v_dual_mov_b32 v15, s2 1996; GFX12-NEXT: v_mov_b32_e32 v14, s4 1997; GFX12-NEXT: s_clause 0x3 1998; GFX12-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:48 1999; GFX12-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:32 2000; GFX12-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:16 2001; GFX12-NEXT: global_store_b128 v16, v[12:15], s[0:1] 2002; GFX12-NEXT: s_endpgm 2003 %load = load <16 x i8>, ptr addrspace(4) %in 2004 %ext = zext <16 x i8> %load to <16 x i32> 2005 store <16 x i32> %ext, ptr addrspace(1) %out 2006 ret void 2007} 2008 2009; TODO: These should use DST, but for some there are redundant MOVs 2010define amdgpu_kernel void @constant_sextload_v16i8_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 2011; GFX6-NOHSA-LABEL: constant_sextload_v16i8_to_v16i32: 2012; GFX6-NOHSA: ; %bb.0: 2013; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2014; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 2015; GFX6-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 2016; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 2017; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 2018; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 2019; GFX6-NOHSA-NEXT: s_ashr_i32 s8, s4, 24 2020; GFX6-NOHSA-NEXT: s_bfe_i32 s9, s4, 0x80010 2021; GFX6-NOHSA-NEXT: s_bfe_i32 s10, s4, 0x80008 2022; GFX6-NOHSA-NEXT: s_sext_i32_i8 s4, s4 2023; GFX6-NOHSA-NEXT: s_ashr_i32 s11, s5, 24 2024; GFX6-NOHSA-NEXT: s_bfe_i32 s12, s5, 0x80010 2025; GFX6-NOHSA-NEXT: s_bfe_i32 s13, s5, 0x80008 2026; GFX6-NOHSA-NEXT: s_sext_i32_i8 s5, s5 2027; GFX6-NOHSA-NEXT: s_ashr_i32 s14, s6, 24 2028; GFX6-NOHSA-NEXT: s_bfe_i32 s15, s6, 0x80010 2029; GFX6-NOHSA-NEXT: s_bfe_i32 s16, s6, 0x80008 2030; GFX6-NOHSA-NEXT: s_ashr_i32 s17, s7, 24 2031; GFX6-NOHSA-NEXT: s_bfe_i32 s18, s7, 0x80010 2032; GFX6-NOHSA-NEXT: s_bfe_i32 s19, s7, 0x80008 2033; GFX6-NOHSA-NEXT: s_sext_i32_i8 s7, s7 2034; GFX6-NOHSA-NEXT: s_sext_i32_i8 s6, s6 2035; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s7 2036; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s19 2037; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s18 2038; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s17 2039; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 2040; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 2041; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s6 2042; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s16 2043; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s15 2044; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s14 2045; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2046; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 2047; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s5 2048; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s13 2049; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s12 2050; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s11 2051; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 2052; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 2053; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 2054; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s10 2055; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s9 2056; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s8 2057; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2058; GFX6-NOHSA-NEXT: s_endpgm 2059; 2060; GFX7-HSA-LABEL: constant_sextload_v16i8_to_v16i32: 2061; GFX7-HSA: ; %bb.0: 2062; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 2063; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 2064; GFX7-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 2065; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 2066; GFX7-HSA-NEXT: s_ashr_i32 s8, s4, 24 2067; GFX7-HSA-NEXT: s_bfe_i32 s9, s4, 0x80010 2068; GFX7-HSA-NEXT: s_bfe_i32 s10, s4, 0x80008 2069; GFX7-HSA-NEXT: s_ashr_i32 s11, s5, 24 2070; GFX7-HSA-NEXT: s_bfe_i32 s12, s5, 0x80010 2071; GFX7-HSA-NEXT: s_bfe_i32 s13, s5, 0x80008 2072; GFX7-HSA-NEXT: s_ashr_i32 s14, s6, 24 2073; GFX7-HSA-NEXT: s_bfe_i32 s15, s6, 0x80010 2074; GFX7-HSA-NEXT: s_bfe_i32 s16, s6, 0x80008 2075; GFX7-HSA-NEXT: s_ashr_i32 s2, s7, 24 2076; GFX7-HSA-NEXT: s_bfe_i32 s3, s7, 0x80010 2077; GFX7-HSA-NEXT: s_bfe_i32 s17, s7, 0x80008 2078; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s2 2079; GFX7-HSA-NEXT: s_add_u32 s2, s0, 48 2080; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s3 2081; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 2082; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 2083; GFX7-HSA-NEXT: s_sext_i32_i8 s7, s7 2084; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 2085; GFX7-HSA-NEXT: s_add_u32 s2, s0, 32 2086; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s7 2087; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s17 2088; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 2089; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2090; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 2091; GFX7-HSA-NEXT: s_sext_i32_i8 s6, s6 2092; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 2093; GFX7-HSA-NEXT: s_add_u32 s2, s0, 16 2094; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s6 2095; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s16 2096; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s15 2097; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s14 2098; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 2099; GFX7-HSA-NEXT: s_sext_i32_i8 s5, s5 2100; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2101; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 2102; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s5 2103; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s13 2104; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s12 2105; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s11 2106; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 2107; GFX7-HSA-NEXT: s_sext_i32_i8 s4, s4 2108; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2109; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 2110; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s4 2111; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s10 2112; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s9 2113; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s8 2114; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 2115; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2116; GFX7-HSA-NEXT: s_endpgm 2117; 2118; GFX8-NOHSA-LABEL: constant_sextload_v16i8_to_v16i32: 2119; GFX8-NOHSA: ; %bb.0: 2120; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2121; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 2122; GFX8-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 2123; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 2124; GFX8-NOHSA-NEXT: s_ashr_i32 s8, s4, 24 2125; GFX8-NOHSA-NEXT: s_bfe_i32 s9, s4, 0x80010 2126; GFX8-NOHSA-NEXT: s_bfe_i32 s10, s4, 0x80008 2127; GFX8-NOHSA-NEXT: s_ashr_i32 s11, s5, 24 2128; GFX8-NOHSA-NEXT: s_bfe_i32 s12, s5, 0x80010 2129; GFX8-NOHSA-NEXT: s_bfe_i32 s13, s5, 0x80008 2130; GFX8-NOHSA-NEXT: s_ashr_i32 s14, s6, 24 2131; GFX8-NOHSA-NEXT: s_bfe_i32 s15, s6, 0x80010 2132; GFX8-NOHSA-NEXT: s_bfe_i32 s16, s6, 0x80008 2133; GFX8-NOHSA-NEXT: s_ashr_i32 s2, s7, 24 2134; GFX8-NOHSA-NEXT: s_bfe_i32 s3, s7, 0x80010 2135; GFX8-NOHSA-NEXT: s_bfe_i32 s17, s7, 0x80008 2136; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s2 2137; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 48 2138; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s3 2139; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 2140; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 2141; GFX8-NOHSA-NEXT: s_sext_i32_i8 s7, s7 2142; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 2143; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 32 2144; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s7 2145; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s17 2146; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 2147; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2148; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 2149; GFX8-NOHSA-NEXT: s_sext_i32_i8 s6, s6 2150; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 2151; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 16 2152; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s6 2153; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s16 2154; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s15 2155; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s14 2156; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 2157; GFX8-NOHSA-NEXT: s_sext_i32_i8 s5, s5 2158; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2159; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 2160; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s5 2161; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s13 2162; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s12 2163; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s11 2164; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 2165; GFX8-NOHSA-NEXT: s_sext_i32_i8 s4, s4 2166; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2167; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 2168; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s4 2169; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s10 2170; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s9 2171; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s8 2172; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 2173; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2174; GFX8-NOHSA-NEXT: s_endpgm 2175; 2176; EG-LABEL: constant_sextload_v16i8_to_v16i32: 2177; EG: ; %bb.0: 2178; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 2179; EG-NEXT: TEX 0 @8 2180; EG-NEXT: ALU 47, @11, KC0[CB0:0-32], KC1[] 2181; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T14.X, 0 2182; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T13.X, 0 2183; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T7.X, 0 2184; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T8.X, 1 2185; EG-NEXT: CF_END 2186; EG-NEXT: Fetch clause starting at 8: 2187; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 2188; EG-NEXT: ALU clause starting at 10: 2189; EG-NEXT: MOV * T7.X, KC0[2].Z, 2190; EG-NEXT: ALU clause starting at 11: 2191; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x, 2192; EG-NEXT: LSHR T0.W, T7.W, literal.y, 2193; EG-NEXT: LSHR * T1.W, T7.Z, literal.z, 2194; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2195; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 2196; EG-NEXT: BFE_INT T9.X, T7.X, 0.0, literal.x, 2197; EG-NEXT: LSHR T0.Y, T7.W, literal.y, 2198; EG-NEXT: LSHR T0.Z, T7.Z, literal.z, 2199; EG-NEXT: LSHR T2.W, T7.Y, literal.x, 2200; EG-NEXT: LSHR * T3.W, T7.X, literal.y, 2201; EG-NEXT: 8(1.121039e-44), 24(3.363116e-44) 2202; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2203; EG-NEXT: BFE_INT T10.X, T7.Y, 0.0, literal.x, 2204; EG-NEXT: LSHR T1.Y, T7.Z, literal.y, 2205; EG-NEXT: LSHR T1.Z, T7.Y, literal.y, 2206; EG-NEXT: BFE_INT T9.W, PS, 0.0, literal.x, 2207; EG-NEXT: LSHR * T3.W, T7.X, literal.z, 2208; EG-NEXT: 8(1.121039e-44), 24(3.363116e-44) 2209; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2210; EG-NEXT: BFE_INT T11.X, T7.Z, 0.0, literal.x, 2211; EG-NEXT: LSHR T2.Y, T7.Y, literal.y, 2212; EG-NEXT: BFE_INT T9.Z, PS, 0.0, literal.x, 2213; EG-NEXT: BFE_INT T10.W, PV.Z, 0.0, literal.x, 2214; EG-NEXT: LSHR * T3.W, T7.X, literal.x, 2215; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44) 2216; EG-NEXT: BFE_INT T12.X, T7.W, 0.0, literal.x, 2217; EG-NEXT: BFE_INT T9.Y, PS, 0.0, literal.x, 2218; EG-NEXT: BFE_INT T10.Z, PV.Y, 0.0, literal.x, 2219; EG-NEXT: BFE_INT T11.W, T1.Y, 0.0, literal.x, 2220; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, 2221; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44) 2222; EG-NEXT: LSHR T7.X, PS, literal.x, 2223; EG-NEXT: BFE_INT T10.Y, T2.W, 0.0, literal.y, 2224; EG-NEXT: BFE_INT T11.Z, T0.Z, 0.0, literal.y, 2225; EG-NEXT: BFE_INT T12.W, T0.Y, 0.0, literal.y, 2226; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.z, 2227; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44) 2228; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 2229; EG-NEXT: LSHR T13.X, PS, literal.x, 2230; EG-NEXT: BFE_INT T11.Y, T1.W, 0.0, literal.y, 2231; EG-NEXT: BFE_INT T12.Z, T0.W, 0.0, literal.y, BS:VEC_120/SCL_212 2232; EG-NEXT: LSHR T0.W, T7.W, literal.y, BS:VEC_201 2233; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 2234; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44) 2235; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 2236; EG-NEXT: LSHR T14.X, PS, literal.x, 2237; EG-NEXT: BFE_INT * T12.Y, PV.W, 0.0, literal.y, 2238; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44) 2239; 2240; GFX12-LABEL: constant_sextload_v16i8_to_v16i32: 2241; GFX12: ; %bb.0: 2242; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 2243; GFX12-NEXT: s_wait_kmcnt 0x0 2244; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 2245; GFX12-NEXT: s_wait_kmcnt 0x0 2246; GFX12-NEXT: s_ashr_i32 s15, s7, 24 2247; GFX12-NEXT: s_bfe_i32 s16, s7, 0x80010 2248; GFX12-NEXT: s_sext_i32_i8 s17, s7 2249; GFX12-NEXT: s_bfe_i32 s7, s7, 0x80008 2250; GFX12-NEXT: s_ashr_i32 s12, s6, 24 2251; GFX12-NEXT: s_bfe_i32 s13, s6, 0x80010 2252; GFX12-NEXT: s_bfe_i32 s14, s6, 0x80008 2253; GFX12-NEXT: s_sext_i32_i8 s6, s6 2254; GFX12-NEXT: v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v1, s7 2255; GFX12-NEXT: s_ashr_i32 s9, s5, 24 2256; GFX12-NEXT: s_bfe_i32 s10, s5, 0x80010 2257; GFX12-NEXT: s_bfe_i32 s11, s5, 0x80008 2258; GFX12-NEXT: s_sext_i32_i8 s5, s5 2259; GFX12-NEXT: v_dual_mov_b32 v0, s17 :: v_dual_mov_b32 v3, s15 2260; GFX12-NEXT: v_dual_mov_b32 v2, s16 :: v_dual_mov_b32 v5, s14 2261; GFX12-NEXT: s_ashr_i32 s2, s4, 24 2262; GFX12-NEXT: s_bfe_i32 s3, s4, 0x80010 2263; GFX12-NEXT: s_bfe_i32 s8, s4, 0x80008 2264; GFX12-NEXT: s_sext_i32_i8 s4, s4 2265; GFX12-NEXT: v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v7, s12 2266; GFX12-NEXT: v_dual_mov_b32 v6, s13 :: v_dual_mov_b32 v9, s11 2267; GFX12-NEXT: v_dual_mov_b32 v8, s5 :: v_dual_mov_b32 v11, s9 2268; GFX12-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v13, s8 2269; GFX12-NEXT: v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v15, s2 2270; GFX12-NEXT: v_mov_b32_e32 v14, s3 2271; GFX12-NEXT: s_clause 0x3 2272; GFX12-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:48 2273; GFX12-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:32 2274; GFX12-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:16 2275; GFX12-NEXT: global_store_b128 v16, v[12:15], s[0:1] 2276; GFX12-NEXT: s_endpgm 2277 %load = load <16 x i8>, ptr addrspace(4) %in 2278 %ext = sext <16 x i8> %load to <16 x i32> 2279 store <16 x i32> %ext, ptr addrspace(1) %out 2280 ret void 2281} 2282 2283; TODO: These should use DST, but for some there are redundant MOVs 2284define amdgpu_kernel void @constant_zextload_v32i8_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 2285; GFX6-NOHSA-LABEL: constant_zextload_v32i8_to_v32i32: 2286; GFX6-NOHSA: ; %bb.0: 2287; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2288; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 2289; GFX6-NOHSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 2290; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 2291; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 2292; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 2293; GFX6-NOHSA-NEXT: s_lshr_b32 s12, s4, 24 2294; GFX6-NOHSA-NEXT: s_bfe_u32 s13, s4, 0x80008 2295; GFX6-NOHSA-NEXT: s_lshr_b32 s14, s5, 24 2296; GFX6-NOHSA-NEXT: s_bfe_u32 s15, s5, 0x80008 2297; GFX6-NOHSA-NEXT: s_lshr_b32 s16, s6, 24 2298; GFX6-NOHSA-NEXT: s_bfe_u32 s17, s6, 0x80008 2299; GFX6-NOHSA-NEXT: s_lshr_b32 s18, s7, 24 2300; GFX6-NOHSA-NEXT: s_bfe_u32 s19, s7, 0x80008 2301; GFX6-NOHSA-NEXT: s_lshr_b32 s20, s8, 24 2302; GFX6-NOHSA-NEXT: s_bfe_u32 s21, s8, 0x80008 2303; GFX6-NOHSA-NEXT: s_lshr_b32 s22, s9, 24 2304; GFX6-NOHSA-NEXT: s_bfe_u32 s23, s9, 0x80008 2305; GFX6-NOHSA-NEXT: s_lshr_b32 s24, s10, 24 2306; GFX6-NOHSA-NEXT: s_bfe_u32 s25, s10, 0x80008 2307; GFX6-NOHSA-NEXT: s_lshr_b32 s26, s11, 24 2308; GFX6-NOHSA-NEXT: s_bfe_u32 s27, s11, 0x80008 2309; GFX6-NOHSA-NEXT: s_and_b32 s28, s4, 0xff 2310; GFX6-NOHSA-NEXT: s_bfe_u32 s4, s4, 0x80010 2311; GFX6-NOHSA-NEXT: s_and_b32 s29, s5, 0xff 2312; GFX6-NOHSA-NEXT: s_bfe_u32 s5, s5, 0x80010 2313; GFX6-NOHSA-NEXT: s_and_b32 s30, s6, 0xff 2314; GFX6-NOHSA-NEXT: s_bfe_u32 s6, s6, 0x80010 2315; GFX6-NOHSA-NEXT: s_and_b32 s31, s7, 0xff 2316; GFX6-NOHSA-NEXT: s_bfe_u32 s7, s7, 0x80010 2317; GFX6-NOHSA-NEXT: s_and_b32 s33, s8, 0xff 2318; GFX6-NOHSA-NEXT: s_bfe_u32 s8, s8, 0x80010 2319; GFX6-NOHSA-NEXT: s_and_b32 s34, s9, 0xff 2320; GFX6-NOHSA-NEXT: s_bfe_u32 s9, s9, 0x80010 2321; GFX6-NOHSA-NEXT: s_and_b32 s35, s10, 0xff 2322; GFX6-NOHSA-NEXT: s_and_b32 s36, s11, 0xff 2323; GFX6-NOHSA-NEXT: s_bfe_u32 s11, s11, 0x80010 2324; GFX6-NOHSA-NEXT: s_bfe_u32 s10, s10, 0x80010 2325; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s36 2326; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s27 2327; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s11 2328; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s26 2329; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 2330; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 2331; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s35 2332; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s25 2333; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s10 2334; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s24 2335; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 2336; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 2337; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s34 2338; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s23 2339; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s9 2340; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s22 2341; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 2342; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 2343; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s33 2344; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s21 2345; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s8 2346; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s20 2347; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 2348; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 2349; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s31 2350; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s19 2351; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s7 2352; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s18 2353; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 2354; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 2355; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s30 2356; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s17 2357; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s6 2358; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s16 2359; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2360; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 2361; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s29 2362; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s15 2363; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s5 2364; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s14 2365; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 2366; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 2367; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s28 2368; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s13 2369; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s4 2370; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s12 2371; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2372; GFX6-NOHSA-NEXT: s_endpgm 2373; 2374; GFX7-HSA-LABEL: constant_zextload_v32i8_to_v32i32: 2375; GFX7-HSA: ; %bb.0: 2376; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 2377; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 2378; GFX7-HSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 2379; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 2380; GFX7-HSA-NEXT: s_lshr_b32 s12, s4, 24 2381; GFX7-HSA-NEXT: s_bfe_u32 s13, s4, 0x80008 2382; GFX7-HSA-NEXT: s_lshr_b32 s14, s5, 24 2383; GFX7-HSA-NEXT: s_bfe_u32 s15, s5, 0x80008 2384; GFX7-HSA-NEXT: s_lshr_b32 s16, s6, 24 2385; GFX7-HSA-NEXT: s_bfe_u32 s17, s6, 0x80008 2386; GFX7-HSA-NEXT: s_lshr_b32 s18, s7, 24 2387; GFX7-HSA-NEXT: s_bfe_u32 s19, s7, 0x80008 2388; GFX7-HSA-NEXT: s_lshr_b32 s20, s8, 24 2389; GFX7-HSA-NEXT: s_bfe_u32 s21, s8, 0x80008 2390; GFX7-HSA-NEXT: s_lshr_b32 s22, s9, 24 2391; GFX7-HSA-NEXT: s_bfe_u32 s23, s9, 0x80008 2392; GFX7-HSA-NEXT: s_lshr_b32 s24, s10, 24 2393; GFX7-HSA-NEXT: s_bfe_u32 s25, s10, 0x80008 2394; GFX7-HSA-NEXT: s_lshr_b32 s26, s11, 24 2395; GFX7-HSA-NEXT: s_bfe_u32 s27, s11, 0x80008 2396; GFX7-HSA-NEXT: s_and_b32 s28, s4, 0xff 2397; GFX7-HSA-NEXT: s_bfe_u32 s4, s4, 0x80010 2398; GFX7-HSA-NEXT: s_and_b32 s29, s5, 0xff 2399; GFX7-HSA-NEXT: s_bfe_u32 s5, s5, 0x80010 2400; GFX7-HSA-NEXT: s_and_b32 s30, s6, 0xff 2401; GFX7-HSA-NEXT: s_bfe_u32 s6, s6, 0x80010 2402; GFX7-HSA-NEXT: s_and_b32 s31, s7, 0xff 2403; GFX7-HSA-NEXT: s_bfe_u32 s7, s7, 0x80010 2404; GFX7-HSA-NEXT: s_and_b32 s33, s8, 0xff 2405; GFX7-HSA-NEXT: s_bfe_u32 s8, s8, 0x80010 2406; GFX7-HSA-NEXT: s_and_b32 s34, s9, 0xff 2407; GFX7-HSA-NEXT: s_bfe_u32 s9, s9, 0x80010 2408; GFX7-HSA-NEXT: s_and_b32 s35, s10, 0xff 2409; GFX7-HSA-NEXT: s_bfe_u32 s10, s10, 0x80010 2410; GFX7-HSA-NEXT: s_and_b32 s36, s11, 0xff 2411; GFX7-HSA-NEXT: s_bfe_u32 s11, s11, 0x80010 2412; GFX7-HSA-NEXT: s_add_u32 s2, s0, 0x70 2413; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 2414; GFX7-HSA-NEXT: v_mov_b32_e32 v9, s3 2415; GFX7-HSA-NEXT: v_mov_b32_e32 v8, s2 2416; GFX7-HSA-NEXT: s_add_u32 s2, s0, 0x60 2417; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 2418; GFX7-HSA-NEXT: v_mov_b32_e32 v11, s3 2419; GFX7-HSA-NEXT: v_mov_b32_e32 v10, s2 2420; GFX7-HSA-NEXT: s_add_u32 s2, s0, 0x50 2421; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s36 2422; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s27 2423; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s11 2424; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s26 2425; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s35 2426; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s25 2427; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 2428; GFX7-HSA-NEXT: v_mov_b32_e32 v6, s10 2429; GFX7-HSA-NEXT: v_mov_b32_e32 v7, s24 2430; GFX7-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 2431; GFX7-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 2432; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s34 2433; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 2434; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 2435; GFX7-HSA-NEXT: s_add_u32 s2, s0, 64 2436; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s23 2437; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s9 2438; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s22 2439; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 2440; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2441; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 2442; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 2443; GFX7-HSA-NEXT: s_add_u32 s2, s0, 48 2444; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s33 2445; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s21 2446; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s8 2447; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s20 2448; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 2449; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2450; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 2451; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 2452; GFX7-HSA-NEXT: s_add_u32 s2, s0, 32 2453; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s31 2454; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s19 2455; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s7 2456; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s18 2457; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 2458; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2459; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 2460; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 2461; GFX7-HSA-NEXT: s_add_u32 s2, s0, 16 2462; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s30 2463; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s17 2464; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s6 2465; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s16 2466; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 2467; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2468; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 2469; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s29 2470; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s15 2471; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s5 2472; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s14 2473; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 2474; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2475; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 2476; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s28 2477; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s13 2478; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s4 2479; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s12 2480; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 2481; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2482; GFX7-HSA-NEXT: s_endpgm 2483; 2484; GFX8-NOHSA-LABEL: constant_zextload_v32i8_to_v32i32: 2485; GFX8-NOHSA: ; %bb.0: 2486; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2487; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 2488; GFX8-NOHSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 2489; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 2490; GFX8-NOHSA-NEXT: s_lshr_b32 s12, s4, 24 2491; GFX8-NOHSA-NEXT: s_bfe_u32 s13, s4, 0x80008 2492; GFX8-NOHSA-NEXT: s_lshr_b32 s14, s5, 24 2493; GFX8-NOHSA-NEXT: s_bfe_u32 s15, s5, 0x80008 2494; GFX8-NOHSA-NEXT: s_lshr_b32 s16, s6, 24 2495; GFX8-NOHSA-NEXT: s_bfe_u32 s17, s6, 0x80008 2496; GFX8-NOHSA-NEXT: s_lshr_b32 s18, s7, 24 2497; GFX8-NOHSA-NEXT: s_bfe_u32 s19, s7, 0x80008 2498; GFX8-NOHSA-NEXT: s_lshr_b32 s20, s8, 24 2499; GFX8-NOHSA-NEXT: s_bfe_u32 s21, s8, 0x80008 2500; GFX8-NOHSA-NEXT: s_lshr_b32 s22, s9, 24 2501; GFX8-NOHSA-NEXT: s_bfe_u32 s23, s9, 0x80008 2502; GFX8-NOHSA-NEXT: s_lshr_b32 s24, s10, 24 2503; GFX8-NOHSA-NEXT: s_bfe_u32 s25, s10, 0x80008 2504; GFX8-NOHSA-NEXT: s_lshr_b32 s2, s11, 24 2505; GFX8-NOHSA-NEXT: s_bfe_u32 s3, s11, 0x80008 2506; GFX8-NOHSA-NEXT: s_and_b32 s26, s4, 0xff 2507; GFX8-NOHSA-NEXT: s_bfe_u32 s4, s4, 0x80010 2508; GFX8-NOHSA-NEXT: s_and_b32 s27, s5, 0xff 2509; GFX8-NOHSA-NEXT: s_bfe_u32 s5, s5, 0x80010 2510; GFX8-NOHSA-NEXT: s_and_b32 s28, s6, 0xff 2511; GFX8-NOHSA-NEXT: s_bfe_u32 s6, s6, 0x80010 2512; GFX8-NOHSA-NEXT: s_and_b32 s29, s7, 0xff 2513; GFX8-NOHSA-NEXT: s_bfe_u32 s7, s7, 0x80010 2514; GFX8-NOHSA-NEXT: s_and_b32 s30, s8, 0xff 2515; GFX8-NOHSA-NEXT: s_bfe_u32 s8, s8, 0x80010 2516; GFX8-NOHSA-NEXT: s_and_b32 s31, s9, 0xff 2517; GFX8-NOHSA-NEXT: s_bfe_u32 s9, s9, 0x80010 2518; GFX8-NOHSA-NEXT: s_and_b32 s33, s10, 0xff 2519; GFX8-NOHSA-NEXT: s_bfe_u32 s10, s10, 0x80010 2520; GFX8-NOHSA-NEXT: s_and_b32 s34, s11, 0xff 2521; GFX8-NOHSA-NEXT: s_bfe_u32 s11, s11, 0x80010 2522; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s2 2523; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 0x70 2524; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 2525; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 2526; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 2527; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 2528; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 0x60 2529; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s34 2530; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s11 2531; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 2532; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2533; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 2534; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 2535; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 0x50 2536; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s33 2537; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s25 2538; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s10 2539; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s24 2540; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 2541; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2542; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 2543; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 2544; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 64 2545; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s31 2546; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s23 2547; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s9 2548; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s22 2549; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 2550; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2551; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 2552; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 2553; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 48 2554; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s30 2555; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s21 2556; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s8 2557; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s20 2558; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 2559; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2560; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 2561; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 2562; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 32 2563; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s29 2564; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s19 2565; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s7 2566; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s18 2567; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 2568; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2569; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 2570; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 2571; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 16 2572; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s28 2573; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s17 2574; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s6 2575; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s16 2576; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 2577; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2578; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 2579; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s27 2580; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s15 2581; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s5 2582; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s14 2583; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 2584; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2585; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 2586; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s26 2587; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s13 2588; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s4 2589; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s12 2590; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 2591; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2592; GFX8-NOHSA-NEXT: s_endpgm 2593; 2594; EG-LABEL: constant_zextload_v32i8_to_v32i32: 2595; EG: ; %bb.0: 2596; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 2597; EG-NEXT: TEX 1 @12 2598; EG-NEXT: ALU 75, @17, KC0[CB0:0-32], KC1[] 2599; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T26.X, 0 2600; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T25.X, 0 2601; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T23.X, 0 2602; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T12.X, 0 2603; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T20.X, 0 2604; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T18.X, 0 2605; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0 2606; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 1 2607; EG-NEXT: CF_END 2608; EG-NEXT: Fetch clause starting at 12: 2609; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 2610; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 2611; EG-NEXT: ALU clause starting at 16: 2612; EG-NEXT: MOV * T11.X, KC0[2].Z, 2613; EG-NEXT: ALU clause starting at 17: 2614; EG-NEXT: MOV * T0.W, literal.x, 2615; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 2616; EG-NEXT: BFE_UINT * T13.Z, T11.X, literal.x, PV.W, 2617; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2618; EG-NEXT: BFE_UINT T13.Y, T11.X, literal.x, T0.W, 2619; EG-NEXT: BFE_UINT T14.Z, T11.Y, literal.y, T0.W, 2620; EG-NEXT: LSHR * T13.W, T11.X, literal.z, 2621; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44) 2622; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 2623; EG-NEXT: AND_INT T13.X, T11.X, literal.x, 2624; EG-NEXT: BFE_UINT T14.Y, T11.Y, literal.y, T0.W, 2625; EG-NEXT: LSHR * T11.X, KC0[2].Y, literal.z, 2626; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44) 2627; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2628; EG-NEXT: BFE_UINT T15.Z, T11.Z, literal.x, T0.W, 2629; EG-NEXT: LSHR * T14.W, T11.Y, literal.y, 2630; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) 2631; EG-NEXT: AND_INT T14.X, T11.Y, literal.x, 2632; EG-NEXT: BFE_UINT T15.Y, T11.Z, literal.y, T0.W, 2633; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 2634; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44) 2635; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2636; EG-NEXT: LSHR T16.X, PV.W, literal.x, 2637; EG-NEXT: BFE_UINT T17.Z, T11.W, literal.y, T0.W, 2638; EG-NEXT: LSHR T15.W, T11.Z, literal.z, 2639; EG-NEXT: AND_INT * T15.X, T11.Z, literal.w, 2640; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2641; EG-NEXT: 24(3.363116e-44), 255(3.573311e-43) 2642; EG-NEXT: BFE_UINT T17.Y, T11.W, literal.x, T0.W, 2643; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 2644; EG-NEXT: 8(1.121039e-44), 32(4.484155e-44) 2645; EG-NEXT: LSHR T18.X, PV.W, literal.x, 2646; EG-NEXT: BFE_UINT T19.Z, T12.X, literal.y, T0.W, BS:VEC_021/SCL_122 2647; EG-NEXT: LSHR T17.W, T11.W, literal.z, 2648; EG-NEXT: AND_INT * T17.X, T11.W, literal.w, 2649; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2650; EG-NEXT: 24(3.363116e-44), 255(3.573311e-43) 2651; EG-NEXT: BFE_UINT T19.Y, T12.X, literal.x, T0.W, 2652; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 2653; EG-NEXT: 8(1.121039e-44), 48(6.726233e-44) 2654; EG-NEXT: LSHR T20.X, PV.W, literal.x, 2655; EG-NEXT: BFE_UINT T21.Z, T12.Y, literal.y, T0.W, 2656; EG-NEXT: LSHR T19.W, T12.X, literal.z, 2657; EG-NEXT: AND_INT * T19.X, T12.X, literal.w, 2658; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2659; EG-NEXT: 24(3.363116e-44), 255(3.573311e-43) 2660; EG-NEXT: BFE_UINT T21.Y, T12.Y, literal.x, T0.W, 2661; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 2662; EG-NEXT: 8(1.121039e-44), 64(8.968310e-44) 2663; EG-NEXT: LSHR T12.X, PV.W, literal.x, 2664; EG-NEXT: BFE_UINT T22.Z, T12.Z, literal.y, T0.W, 2665; EG-NEXT: LSHR T21.W, T12.Y, literal.z, 2666; EG-NEXT: AND_INT * T21.X, T12.Y, literal.w, 2667; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2668; EG-NEXT: 24(3.363116e-44), 255(3.573311e-43) 2669; EG-NEXT: BFE_UINT T22.Y, T12.Z, literal.x, T0.W, 2670; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 2671; EG-NEXT: 8(1.121039e-44), 80(1.121039e-43) 2672; EG-NEXT: LSHR T23.X, PV.W, literal.x, 2673; EG-NEXT: BFE_UINT T24.Z, T12.W, literal.y, T0.W, 2674; EG-NEXT: LSHR T22.W, T12.Z, literal.z, 2675; EG-NEXT: AND_INT * T22.X, T12.Z, literal.w, 2676; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2677; EG-NEXT: 24(3.363116e-44), 255(3.573311e-43) 2678; EG-NEXT: BFE_UINT T24.Y, T12.W, literal.x, T0.W, 2679; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2680; EG-NEXT: 8(1.121039e-44), 96(1.345247e-43) 2681; EG-NEXT: LSHR T25.X, PV.W, literal.x, 2682; EG-NEXT: LSHR T24.W, T12.W, literal.y, 2683; EG-NEXT: AND_INT * T24.X, T12.W, literal.z, 2684; EG-NEXT: 2(2.802597e-45), 24(3.363116e-44) 2685; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 2686; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 2687; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 2688; EG-NEXT: LSHR * T26.X, PV.W, literal.x, 2689; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2690; 2691; GFX12-LABEL: constant_zextload_v32i8_to_v32i32: 2692; GFX12: ; %bb.0: 2693; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 2694; GFX12-NEXT: s_wait_kmcnt 0x0 2695; GFX12-NEXT: s_load_b256 s[4:11], s[2:3], 0x0 2696; GFX12-NEXT: s_wait_kmcnt 0x0 2697; GFX12-NEXT: s_lshr_b32 s24, s11, 24 2698; GFX12-NEXT: s_bfe_u32 s25, s11, 0x80008 2699; GFX12-NEXT: s_and_b32 s34, s11, 0xff 2700; GFX12-NEXT: s_bfe_u32 s11, s11, 0x80010 2701; GFX12-NEXT: s_lshr_b32 s22, s10, 24 2702; GFX12-NEXT: s_bfe_u32 s23, s10, 0x80008 2703; GFX12-NEXT: s_and_b32 s33, s10, 0xff 2704; GFX12-NEXT: s_bfe_u32 s10, s10, 0x80010 2705; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s25 2706; GFX12-NEXT: v_dual_mov_b32 v0, s34 :: v_dual_mov_b32 v3, s24 2707; GFX12-NEXT: v_dual_mov_b32 v2, s11 :: v_dual_mov_b32 v5, s23 2708; GFX12-NEXT: s_bfe_u32 s21, s9, 0x80008 2709; GFX12-NEXT: v_dual_mov_b32 v4, s33 :: v_dual_mov_b32 v7, s22 2710; GFX12-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v9, s21 2711; GFX12-NEXT: s_lshr_b32 s20, s9, 24 2712; GFX12-NEXT: s_and_b32 s31, s9, 0xff 2713; GFX12-NEXT: s_bfe_u32 s9, s9, 0x80010 2714; GFX12-NEXT: s_lshr_b32 s18, s8, 24 2715; GFX12-NEXT: s_bfe_u32 s19, s8, 0x80008 2716; GFX12-NEXT: s_and_b32 s30, s8, 0xff 2717; GFX12-NEXT: s_bfe_u32 s8, s8, 0x80010 2718; GFX12-NEXT: s_lshr_b32 s16, s7, 24 2719; GFX12-NEXT: s_bfe_u32 s17, s7, 0x80008 2720; GFX12-NEXT: s_and_b32 s29, s7, 0xff 2721; GFX12-NEXT: s_bfe_u32 s7, s7, 0x80010 2722; GFX12-NEXT: s_wait_alu 0xfffe 2723; GFX12-NEXT: v_dual_mov_b32 v8, s31 :: v_dual_mov_b32 v11, s20 2724; GFX12-NEXT: v_mov_b32_e32 v10, s9 2725; GFX12-NEXT: s_lshr_b32 s14, s6, 24 2726; GFX12-NEXT: s_bfe_u32 s15, s6, 0x80008 2727; GFX12-NEXT: s_and_b32 s28, s6, 0xff 2728; GFX12-NEXT: s_bfe_u32 s6, s6, 0x80010 2729; GFX12-NEXT: s_clause 0x1 2730; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:112 2731; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:96 2732; GFX12-NEXT: v_dual_mov_b32 v1, s19 :: v_dual_mov_b32 v0, s30 2733; GFX12-NEXT: v_dual_mov_b32 v3, s18 :: v_dual_mov_b32 v2, s8 2734; GFX12-NEXT: v_mov_b32_e32 v5, s17 2735; GFX12-NEXT: s_lshr_b32 s12, s5, 24 2736; GFX12-NEXT: s_bfe_u32 s13, s5, 0x80008 2737; GFX12-NEXT: s_and_b32 s27, s5, 0xff 2738; GFX12-NEXT: s_bfe_u32 s5, s5, 0x80010 2739; GFX12-NEXT: v_dual_mov_b32 v4, s29 :: v_dual_mov_b32 v7, s16 2740; GFX12-NEXT: v_dual_mov_b32 v6, s7 :: v_dual_mov_b32 v13, s15 2741; GFX12-NEXT: s_lshr_b32 s2, s4, 24 2742; GFX12-NEXT: s_bfe_u32 s3, s4, 0x80008 2743; GFX12-NEXT: s_and_b32 s26, s4, 0xff 2744; GFX12-NEXT: s_bfe_u32 s4, s4, 0x80010 2745; GFX12-NEXT: v_dual_mov_b32 v12, s28 :: v_dual_mov_b32 v15, s14 2746; GFX12-NEXT: v_dual_mov_b32 v14, s6 :: v_dual_mov_b32 v17, s13 2747; GFX12-NEXT: v_dual_mov_b32 v16, s27 :: v_dual_mov_b32 v19, s12 2748; GFX12-NEXT: v_dual_mov_b32 v18, s5 :: v_dual_mov_b32 v21, s3 2749; GFX12-NEXT: v_dual_mov_b32 v20, s26 :: v_dual_mov_b32 v23, s2 2750; GFX12-NEXT: v_mov_b32_e32 v22, s4 2751; GFX12-NEXT: s_clause 0x5 2752; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:80 2753; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:64 2754; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:48 2755; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:32 2756; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:16 2757; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1] 2758; GFX12-NEXT: s_endpgm 2759 %load = load <32 x i8>, ptr addrspace(4) %in 2760 %ext = zext <32 x i8> %load to <32 x i32> 2761 store <32 x i32> %ext, ptr addrspace(1) %out 2762 ret void 2763} 2764 2765; TODO: These should use DST, but for some there are redundant MOVs 2766define amdgpu_kernel void @constant_sextload_v32i8_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 2767; GFX6-NOHSA-LABEL: constant_sextload_v32i8_to_v32i32: 2768; GFX6-NOHSA: ; %bb.0: 2769; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2770; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 2771; GFX6-NOHSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 2772; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 2773; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 2774; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 2775; GFX6-NOHSA-NEXT: s_ashr_i32 s12, s4, 24 2776; GFX6-NOHSA-NEXT: s_bfe_i32 s13, s4, 0x80010 2777; GFX6-NOHSA-NEXT: s_bfe_i32 s14, s4, 0x80008 2778; GFX6-NOHSA-NEXT: s_sext_i32_i8 s4, s4 2779; GFX6-NOHSA-NEXT: s_ashr_i32 s15, s5, 24 2780; GFX6-NOHSA-NEXT: s_bfe_i32 s16, s5, 0x80010 2781; GFX6-NOHSA-NEXT: s_bfe_i32 s17, s5, 0x80008 2782; GFX6-NOHSA-NEXT: s_sext_i32_i8 s5, s5 2783; GFX6-NOHSA-NEXT: s_ashr_i32 s18, s6, 24 2784; GFX6-NOHSA-NEXT: s_bfe_i32 s19, s6, 0x80010 2785; GFX6-NOHSA-NEXT: s_bfe_i32 s20, s6, 0x80008 2786; GFX6-NOHSA-NEXT: s_sext_i32_i8 s6, s6 2787; GFX6-NOHSA-NEXT: s_ashr_i32 s21, s7, 24 2788; GFX6-NOHSA-NEXT: s_bfe_i32 s22, s7, 0x80010 2789; GFX6-NOHSA-NEXT: s_bfe_i32 s23, s7, 0x80008 2790; GFX6-NOHSA-NEXT: s_sext_i32_i8 s7, s7 2791; GFX6-NOHSA-NEXT: s_ashr_i32 s24, s8, 24 2792; GFX6-NOHSA-NEXT: s_bfe_i32 s25, s8, 0x80010 2793; GFX6-NOHSA-NEXT: s_bfe_i32 s26, s8, 0x80008 2794; GFX6-NOHSA-NEXT: s_sext_i32_i8 s8, s8 2795; GFX6-NOHSA-NEXT: s_ashr_i32 s27, s9, 24 2796; GFX6-NOHSA-NEXT: s_bfe_i32 s28, s9, 0x80010 2797; GFX6-NOHSA-NEXT: s_bfe_i32 s29, s9, 0x80008 2798; GFX6-NOHSA-NEXT: s_sext_i32_i8 s9, s9 2799; GFX6-NOHSA-NEXT: s_ashr_i32 s30, s10, 24 2800; GFX6-NOHSA-NEXT: s_bfe_i32 s31, s10, 0x80010 2801; GFX6-NOHSA-NEXT: s_bfe_i32 s33, s10, 0x80008 2802; GFX6-NOHSA-NEXT: s_ashr_i32 s34, s11, 24 2803; GFX6-NOHSA-NEXT: s_bfe_i32 s35, s11, 0x80010 2804; GFX6-NOHSA-NEXT: s_bfe_i32 s36, s11, 0x80008 2805; GFX6-NOHSA-NEXT: s_sext_i32_i8 s11, s11 2806; GFX6-NOHSA-NEXT: s_sext_i32_i8 s10, s10 2807; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s11 2808; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s36 2809; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s35 2810; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s34 2811; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 2812; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 2813; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s10 2814; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s33 2815; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s31 2816; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s30 2817; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 2818; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 2819; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s9 2820; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s29 2821; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s28 2822; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s27 2823; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 2824; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 2825; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s8 2826; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s26 2827; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s25 2828; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s24 2829; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 2830; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 2831; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s7 2832; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s23 2833; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s22 2834; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s21 2835; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 2836; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 2837; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s6 2838; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s20 2839; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s19 2840; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s18 2841; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2842; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 2843; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s5 2844; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s17 2845; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s16 2846; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s15 2847; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 2848; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 2849; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 2850; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s14 2851; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s13 2852; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s12 2853; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2854; GFX6-NOHSA-NEXT: s_endpgm 2855; 2856; GFX7-HSA-LABEL: constant_sextload_v32i8_to_v32i32: 2857; GFX7-HSA: ; %bb.0: 2858; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 2859; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 2860; GFX7-HSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 2861; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 2862; GFX7-HSA-NEXT: s_ashr_i32 s12, s4, 24 2863; GFX7-HSA-NEXT: s_bfe_i32 s13, s4, 0x80010 2864; GFX7-HSA-NEXT: s_bfe_i32 s14, s4, 0x80008 2865; GFX7-HSA-NEXT: s_ashr_i32 s15, s5, 24 2866; GFX7-HSA-NEXT: s_bfe_i32 s16, s5, 0x80010 2867; GFX7-HSA-NEXT: s_bfe_i32 s17, s5, 0x80008 2868; GFX7-HSA-NEXT: s_ashr_i32 s18, s6, 24 2869; GFX7-HSA-NEXT: s_bfe_i32 s19, s6, 0x80010 2870; GFX7-HSA-NEXT: s_bfe_i32 s20, s6, 0x80008 2871; GFX7-HSA-NEXT: s_ashr_i32 s21, s7, 24 2872; GFX7-HSA-NEXT: s_bfe_i32 s22, s7, 0x80010 2873; GFX7-HSA-NEXT: s_bfe_i32 s23, s7, 0x80008 2874; GFX7-HSA-NEXT: s_ashr_i32 s24, s8, 24 2875; GFX7-HSA-NEXT: s_bfe_i32 s25, s8, 0x80010 2876; GFX7-HSA-NEXT: s_bfe_i32 s26, s8, 0x80008 2877; GFX7-HSA-NEXT: s_ashr_i32 s27, s9, 24 2878; GFX7-HSA-NEXT: s_bfe_i32 s28, s9, 0x80010 2879; GFX7-HSA-NEXT: s_bfe_i32 s29, s9, 0x80008 2880; GFX7-HSA-NEXT: s_ashr_i32 s30, s10, 24 2881; GFX7-HSA-NEXT: s_bfe_i32 s31, s10, 0x80010 2882; GFX7-HSA-NEXT: s_bfe_i32 s33, s10, 0x80008 2883; GFX7-HSA-NEXT: s_ashr_i32 s34, s11, 24 2884; GFX7-HSA-NEXT: s_bfe_i32 s35, s11, 0x80010 2885; GFX7-HSA-NEXT: s_bfe_i32 s36, s11, 0x80008 2886; GFX7-HSA-NEXT: s_add_u32 s2, s0, 0x70 2887; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 2888; GFX7-HSA-NEXT: v_mov_b32_e32 v9, s3 2889; GFX7-HSA-NEXT: v_mov_b32_e32 v8, s2 2890; GFX7-HSA-NEXT: s_add_u32 s2, s0, 0x60 2891; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 2892; GFX7-HSA-NEXT: v_mov_b32_e32 v11, s3 2893; GFX7-HSA-NEXT: s_sext_i32_i8 s10, s10 2894; GFX7-HSA-NEXT: s_sext_i32_i8 s11, s11 2895; GFX7-HSA-NEXT: v_mov_b32_e32 v10, s2 2896; GFX7-HSA-NEXT: s_add_u32 s2, s0, 0x50 2897; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s11 2898; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s36 2899; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s35 2900; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s34 2901; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s10 2902; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s33 2903; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 2904; GFX7-HSA-NEXT: v_mov_b32_e32 v6, s31 2905; GFX7-HSA-NEXT: v_mov_b32_e32 v7, s30 2906; GFX7-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 2907; GFX7-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 2908; GFX7-HSA-NEXT: s_sext_i32_i8 s9, s9 2909; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 2910; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 2911; GFX7-HSA-NEXT: s_add_u32 s2, s0, 64 2912; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s9 2913; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s29 2914; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s28 2915; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s27 2916; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 2917; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2918; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 2919; GFX7-HSA-NEXT: s_sext_i32_i8 s8, s8 2920; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 2921; GFX7-HSA-NEXT: s_add_u32 s2, s0, 48 2922; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s8 2923; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s26 2924; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s25 2925; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s24 2926; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 2927; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2928; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 2929; GFX7-HSA-NEXT: s_sext_i32_i8 s7, s7 2930; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 2931; GFX7-HSA-NEXT: s_add_u32 s2, s0, 32 2932; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s7 2933; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s23 2934; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s22 2935; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s21 2936; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 2937; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2938; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 2939; GFX7-HSA-NEXT: s_sext_i32_i8 s6, s6 2940; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 2941; GFX7-HSA-NEXT: s_add_u32 s2, s0, 16 2942; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s6 2943; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s20 2944; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s19 2945; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s18 2946; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 2947; GFX7-HSA-NEXT: s_sext_i32_i8 s5, s5 2948; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2949; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 2950; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s5 2951; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s17 2952; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s16 2953; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s15 2954; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 2955; GFX7-HSA-NEXT: s_sext_i32_i8 s4, s4 2956; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2957; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 2958; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s4 2959; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s14 2960; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s13 2961; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s12 2962; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 2963; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2964; GFX7-HSA-NEXT: s_endpgm 2965; 2966; GFX8-NOHSA-LABEL: constant_sextload_v32i8_to_v32i32: 2967; GFX8-NOHSA: ; %bb.0: 2968; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2969; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 2970; GFX8-NOHSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 2971; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 2972; GFX8-NOHSA-NEXT: s_ashr_i32 s12, s4, 24 2973; GFX8-NOHSA-NEXT: s_bfe_i32 s13, s4, 0x80010 2974; GFX8-NOHSA-NEXT: s_bfe_i32 s14, s4, 0x80008 2975; GFX8-NOHSA-NEXT: s_ashr_i32 s15, s5, 24 2976; GFX8-NOHSA-NEXT: s_bfe_i32 s16, s5, 0x80010 2977; GFX8-NOHSA-NEXT: s_bfe_i32 s17, s5, 0x80008 2978; GFX8-NOHSA-NEXT: s_ashr_i32 s18, s6, 24 2979; GFX8-NOHSA-NEXT: s_bfe_i32 s19, s6, 0x80010 2980; GFX8-NOHSA-NEXT: s_bfe_i32 s20, s6, 0x80008 2981; GFX8-NOHSA-NEXT: s_ashr_i32 s21, s7, 24 2982; GFX8-NOHSA-NEXT: s_bfe_i32 s22, s7, 0x80010 2983; GFX8-NOHSA-NEXT: s_bfe_i32 s23, s7, 0x80008 2984; GFX8-NOHSA-NEXT: s_ashr_i32 s24, s8, 24 2985; GFX8-NOHSA-NEXT: s_bfe_i32 s25, s8, 0x80010 2986; GFX8-NOHSA-NEXT: s_bfe_i32 s26, s8, 0x80008 2987; GFX8-NOHSA-NEXT: s_ashr_i32 s27, s9, 24 2988; GFX8-NOHSA-NEXT: s_bfe_i32 s28, s9, 0x80010 2989; GFX8-NOHSA-NEXT: s_bfe_i32 s29, s9, 0x80008 2990; GFX8-NOHSA-NEXT: s_ashr_i32 s30, s10, 24 2991; GFX8-NOHSA-NEXT: s_bfe_i32 s31, s10, 0x80010 2992; GFX8-NOHSA-NEXT: s_bfe_i32 s33, s10, 0x80008 2993; GFX8-NOHSA-NEXT: s_ashr_i32 s2, s11, 24 2994; GFX8-NOHSA-NEXT: s_bfe_i32 s3, s11, 0x80010 2995; GFX8-NOHSA-NEXT: s_bfe_i32 s34, s11, 0x80008 2996; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s2 2997; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 0x70 2998; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s3 2999; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 3000; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 3001; GFX8-NOHSA-NEXT: s_sext_i32_i8 s11, s11 3002; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 3003; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 0x60 3004; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s11 3005; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s34 3006; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 3007; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3008; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 3009; GFX8-NOHSA-NEXT: s_sext_i32_i8 s10, s10 3010; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 3011; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 0x50 3012; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s10 3013; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s33 3014; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s31 3015; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s30 3016; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 3017; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3018; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 3019; GFX8-NOHSA-NEXT: s_sext_i32_i8 s9, s9 3020; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 3021; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 64 3022; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s9 3023; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s29 3024; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s28 3025; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s27 3026; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 3027; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3028; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 3029; GFX8-NOHSA-NEXT: s_sext_i32_i8 s8, s8 3030; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 3031; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 48 3032; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s8 3033; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s26 3034; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s25 3035; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s24 3036; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 3037; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3038; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 3039; GFX8-NOHSA-NEXT: s_sext_i32_i8 s7, s7 3040; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 3041; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 32 3042; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s7 3043; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s23 3044; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s22 3045; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s21 3046; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 3047; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3048; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 3049; GFX8-NOHSA-NEXT: s_sext_i32_i8 s6, s6 3050; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 3051; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 16 3052; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s6 3053; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s20 3054; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s19 3055; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s18 3056; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 3057; GFX8-NOHSA-NEXT: s_sext_i32_i8 s5, s5 3058; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3059; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 3060; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s5 3061; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s17 3062; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s16 3063; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s15 3064; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 3065; GFX8-NOHSA-NEXT: s_sext_i32_i8 s4, s4 3066; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3067; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 3068; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s4 3069; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s14 3070; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s13 3071; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s12 3072; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 3073; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3074; GFX8-NOHSA-NEXT: s_endpgm 3075; 3076; EG-LABEL: constant_sextload_v32i8_to_v32i32: 3077; EG: ; %bb.0: 3078; EG-NEXT: ALU 0, @18, KC0[CB0:0-32], KC1[] 3079; EG-NEXT: TEX 0 @14 3080; EG-NEXT: ALU 18, @19, KC0[CB0:0-32], KC1[] 3081; EG-NEXT: TEX 0 @16 3082; EG-NEXT: ALU 75, @38, KC0[CB0:0-32], KC1[] 3083; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T26.X, 0 3084; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T25.X, 0 3085; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T12.X, 0 3086; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T17.X, 0 3087; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T16.X, 0 3088; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T15.X, 0 3089; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T14.X, 0 3090; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T13.X, 1 3091; EG-NEXT: CF_END 3092; EG-NEXT: Fetch clause starting at 14: 3093; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 3094; EG-NEXT: Fetch clause starting at 16: 3095; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 3096; EG-NEXT: ALU clause starting at 18: 3097; EG-NEXT: MOV * T11.X, KC0[2].Z, 3098; EG-NEXT: ALU clause starting at 19: 3099; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x, 3100; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3101; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3102; EG-NEXT: LSHR T14.X, PV.W, literal.x, 3103; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3104; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 3105; EG-NEXT: LSHR T15.X, PV.W, literal.x, 3106; EG-NEXT: LSHR T0.Z, T12.W, literal.y, 3107; EG-NEXT: LSHR T0.W, T12.Z, literal.z, 3108; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.w, 3109; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3110; EG-NEXT: 8(1.121039e-44), 48(6.726233e-44) 3111; EG-NEXT: LSHR T16.X, PS, literal.x, 3112; EG-NEXT: LSHR T0.Y, T12.W, literal.y, 3113; EG-NEXT: LSHR T1.Z, T12.Z, literal.z, 3114; EG-NEXT: LSHR T1.W, T12.Y, literal.w, 3115; EG-NEXT: LSHR * T2.W, T12.Z, literal.y, 3116; EG-NEXT: 2(2.802597e-45), 24(3.363116e-44) 3117; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) 3118; EG-NEXT: ALU clause starting at 38: 3119; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.x, 3120; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00) 3121; EG-NEXT: LSHR T17.X, PV.W, literal.x, 3122; EG-NEXT: LSHR T1.Y, T12.Y, literal.y, 3123; EG-NEXT: LSHR T2.Z, T12.Y, literal.z, 3124; EG-NEXT: LSHR T3.W, T12.X, literal.y, 3125; EG-NEXT: LSHR * T4.W, T12.X, literal.z, 3126; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3127; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 3128; EG-NEXT: BFE_INT T18.X, T11.X, 0.0, literal.x, 3129; EG-NEXT: LSHR T2.Y, T11.W, literal.y, 3130; EG-NEXT: LSHR T3.Z, T11.W, literal.z, 3131; EG-NEXT: LSHR T5.W, T11.Z, literal.y, 3132; EG-NEXT: LSHR * T6.W, T11.X, literal.z, 3133; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44) 3134; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 3135; EG-NEXT: BFE_INT T19.X, T11.Y, 0.0, literal.x, 3136; EG-NEXT: LSHR T3.Y, T11.Z, literal.y, 3137; EG-NEXT: LSHR T4.Z, T11.Y, literal.y, 3138; EG-NEXT: BFE_INT T18.W, PS, 0.0, literal.x, 3139; EG-NEXT: LSHR * T6.W, T11.X, literal.z, 3140; EG-NEXT: 8(1.121039e-44), 24(3.363116e-44) 3141; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3142; EG-NEXT: BFE_INT T20.X, T11.Z, 0.0, literal.x, 3143; EG-NEXT: LSHR T4.Y, T11.Y, literal.y, 3144; EG-NEXT: BFE_INT T18.Z, PS, 0.0, literal.x, 3145; EG-NEXT: BFE_INT T19.W, PV.Z, 0.0, literal.x, 3146; EG-NEXT: LSHR * T6.W, T11.X, literal.x, 3147; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44) 3148; EG-NEXT: BFE_INT T21.X, T11.W, 0.0, literal.x, 3149; EG-NEXT: BFE_INT T18.Y, PS, 0.0, literal.x, 3150; EG-NEXT: BFE_INT T19.Z, PV.Y, 0.0, literal.x, 3151; EG-NEXT: BFE_INT T20.W, T3.Y, 0.0, literal.x, 3152; EG-NEXT: LSHR * T6.W, T11.Y, literal.x, 3153; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 3154; EG-NEXT: BFE_INT T22.X, T12.X, 0.0, literal.x, 3155; EG-NEXT: BFE_INT T19.Y, PS, 0.0, literal.x, 3156; EG-NEXT: BFE_INT T20.Z, T5.W, 0.0, literal.x, 3157; EG-NEXT: BFE_INT T21.W, T3.Z, 0.0, literal.x, 3158; EG-NEXT: LSHR * T5.W, T11.Z, literal.x, 3159; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 3160; EG-NEXT: BFE_INT T11.X, T12.Y, 0.0, literal.x, 3161; EG-NEXT: BFE_INT T20.Y, PS, 0.0, literal.x, 3162; EG-NEXT: BFE_INT T21.Z, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212 3163; EG-NEXT: BFE_INT T22.W, T4.W, 0.0, literal.x, 3164; EG-NEXT: LSHR * T4.W, T11.W, literal.x, 3165; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 3166; EG-NEXT: BFE_INT T23.X, T12.Z, 0.0, literal.x, 3167; EG-NEXT: BFE_INT T21.Y, PS, 0.0, literal.x, 3168; EG-NEXT: BFE_INT T22.Z, T3.W, 0.0, literal.x, 3169; EG-NEXT: BFE_INT T11.W, T2.Z, 0.0, literal.x, BS:VEC_120/SCL_212 3170; EG-NEXT: LSHR * T3.W, T12.X, literal.x, 3171; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 3172; EG-NEXT: BFE_INT T24.X, T12.W, 0.0, literal.x, 3173; EG-NEXT: BFE_INT T22.Y, PS, 0.0, literal.x, 3174; EG-NEXT: BFE_INT T11.Z, T1.Y, 0.0, literal.x, 3175; EG-NEXT: BFE_INT T23.W, T2.W, 0.0, literal.x, BS:VEC_120/SCL_212 3176; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, 3177; EG-NEXT: 8(1.121039e-44), 80(1.121039e-43) 3178; EG-NEXT: LSHR T12.X, PS, literal.x, 3179; EG-NEXT: BFE_INT T11.Y, T1.W, 0.0, literal.y, 3180; EG-NEXT: BFE_INT T23.Z, T1.Z, 0.0, literal.y, 3181; EG-NEXT: BFE_INT T24.W, T0.Y, 0.0, literal.y, 3182; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 3183; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44) 3184; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) 3185; EG-NEXT: LSHR T25.X, PS, literal.x, 3186; EG-NEXT: BFE_INT T23.Y, T0.W, 0.0, literal.y, 3187; EG-NEXT: BFE_INT T24.Z, T0.Z, 0.0, literal.y, 3188; EG-NEXT: LSHR T0.W, T12.W, literal.y, BS:VEC_120/SCL_212 3189; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 3190; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44) 3191; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 3192; EG-NEXT: LSHR T26.X, PS, literal.x, 3193; EG-NEXT: BFE_INT * T24.Y, PV.W, 0.0, literal.y, 3194; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44) 3195; 3196; GFX12-LABEL: constant_sextload_v32i8_to_v32i32: 3197; GFX12: ; %bb.0: 3198; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 3199; GFX12-NEXT: s_wait_kmcnt 0x0 3200; GFX12-NEXT: s_load_b256 s[4:11], s[2:3], 0x0 3201; GFX12-NEXT: s_wait_kmcnt 0x0 3202; GFX12-NEXT: s_ashr_i32 s31, s11, 24 3203; GFX12-NEXT: s_bfe_i32 s33, s11, 0x80010 3204; GFX12-NEXT: s_sext_i32_i8 s34, s11 3205; GFX12-NEXT: s_bfe_i32 s11, s11, 0x80008 3206; GFX12-NEXT: s_ashr_i32 s28, s10, 24 3207; GFX12-NEXT: s_bfe_i32 s29, s10, 0x80010 3208; GFX12-NEXT: s_bfe_i32 s30, s10, 0x80008 3209; GFX12-NEXT: s_sext_i32_i8 s10, s10 3210; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s11 3211; GFX12-NEXT: v_dual_mov_b32 v0, s34 :: v_dual_mov_b32 v3, s31 3212; GFX12-NEXT: v_dual_mov_b32 v2, s33 :: v_dual_mov_b32 v5, s30 3213; GFX12-NEXT: s_bfe_i32 s27, s9, 0x80008 3214; GFX12-NEXT: v_dual_mov_b32 v4, s10 :: v_dual_mov_b32 v7, s28 3215; GFX12-NEXT: v_dual_mov_b32 v6, s29 :: v_dual_mov_b32 v9, s27 3216; GFX12-NEXT: s_ashr_i32 s25, s9, 24 3217; GFX12-NEXT: s_bfe_i32 s26, s9, 0x80010 3218; GFX12-NEXT: s_sext_i32_i8 s9, s9 3219; GFX12-NEXT: s_ashr_i32 s22, s8, 24 3220; GFX12-NEXT: s_bfe_i32 s23, s8, 0x80010 3221; GFX12-NEXT: s_bfe_i32 s24, s8, 0x80008 3222; GFX12-NEXT: s_sext_i32_i8 s8, s8 3223; GFX12-NEXT: s_ashr_i32 s19, s7, 24 3224; GFX12-NEXT: s_bfe_i32 s20, s7, 0x80010 3225; GFX12-NEXT: s_bfe_i32 s21, s7, 0x80008 3226; GFX12-NEXT: s_sext_i32_i8 s7, s7 3227; GFX12-NEXT: v_dual_mov_b32 v8, s9 :: v_dual_mov_b32 v11, s25 3228; GFX12-NEXT: s_wait_alu 0xfffe 3229; GFX12-NEXT: v_mov_b32_e32 v10, s26 3230; GFX12-NEXT: s_ashr_i32 s16, s6, 24 3231; GFX12-NEXT: s_bfe_i32 s17, s6, 0x80010 3232; GFX12-NEXT: s_bfe_i32 s18, s6, 0x80008 3233; GFX12-NEXT: s_sext_i32_i8 s6, s6 3234; GFX12-NEXT: s_clause 0x1 3235; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:112 3236; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:96 3237; GFX12-NEXT: v_dual_mov_b32 v1, s24 :: v_dual_mov_b32 v0, s8 3238; GFX12-NEXT: v_dual_mov_b32 v3, s22 :: v_dual_mov_b32 v2, s23 3239; GFX12-NEXT: v_mov_b32_e32 v5, s21 3240; GFX12-NEXT: s_ashr_i32 s13, s5, 24 3241; GFX12-NEXT: s_bfe_i32 s14, s5, 0x80010 3242; GFX12-NEXT: s_bfe_i32 s15, s5, 0x80008 3243; GFX12-NEXT: s_sext_i32_i8 s5, s5 3244; GFX12-NEXT: v_dual_mov_b32 v4, s7 :: v_dual_mov_b32 v7, s19 3245; GFX12-NEXT: v_dual_mov_b32 v6, s20 :: v_dual_mov_b32 v13, s18 3246; GFX12-NEXT: s_ashr_i32 s2, s4, 24 3247; GFX12-NEXT: s_bfe_i32 s3, s4, 0x80010 3248; GFX12-NEXT: s_bfe_i32 s12, s4, 0x80008 3249; GFX12-NEXT: s_sext_i32_i8 s4, s4 3250; GFX12-NEXT: v_dual_mov_b32 v12, s6 :: v_dual_mov_b32 v15, s16 3251; GFX12-NEXT: v_dual_mov_b32 v14, s17 :: v_dual_mov_b32 v17, s15 3252; GFX12-NEXT: v_dual_mov_b32 v16, s5 :: v_dual_mov_b32 v19, s13 3253; GFX12-NEXT: v_dual_mov_b32 v18, s14 :: v_dual_mov_b32 v21, s12 3254; GFX12-NEXT: v_dual_mov_b32 v20, s4 :: v_dual_mov_b32 v23, s2 3255; GFX12-NEXT: v_mov_b32_e32 v22, s3 3256; GFX12-NEXT: s_clause 0x5 3257; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:80 3258; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:64 3259; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:48 3260; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:32 3261; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:16 3262; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1] 3263; GFX12-NEXT: s_endpgm 3264 %load = load <32 x i8>, ptr addrspace(4) %in 3265 %ext = sext <32 x i8> %load to <32 x i32> 3266 store <32 x i32> %ext, ptr addrspace(1) %out 3267 ret void 3268} 3269 3270define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 3271; GFX6-NOHSA-LABEL: constant_zextload_v64i8_to_v64i32: 3272; GFX6-NOHSA: ; %bb.0: 3273; GFX6-NOHSA-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x9 3274; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 3275; GFX6-NOHSA-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 3276; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 3277; GFX6-NOHSA-NEXT: s_lshr_b32 s18, s0, 24 3278; GFX6-NOHSA-NEXT: s_bfe_u32 s19, s0, 0x80008 3279; GFX6-NOHSA-NEXT: s_lshr_b32 s20, s1, 24 3280; GFX6-NOHSA-NEXT: s_bfe_u32 s21, s1, 0x80008 3281; GFX6-NOHSA-NEXT: s_lshr_b32 s22, s2, 24 3282; GFX6-NOHSA-NEXT: s_bfe_u32 s23, s2, 0x80008 3283; GFX6-NOHSA-NEXT: s_lshr_b32 s24, s3, 24 3284; GFX6-NOHSA-NEXT: s_bfe_u32 s27, s3, 0x80008 3285; GFX6-NOHSA-NEXT: s_lshr_b32 s28, s4, 24 3286; GFX6-NOHSA-NEXT: s_bfe_u32 s29, s4, 0x80008 3287; GFX6-NOHSA-NEXT: s_lshr_b32 s30, s5, 24 3288; GFX6-NOHSA-NEXT: s_bfe_u32 s31, s5, 0x80008 3289; GFX6-NOHSA-NEXT: s_lshr_b32 s33, s6, 24 3290; GFX6-NOHSA-NEXT: s_bfe_u32 s34, s6, 0x80008 3291; GFX6-NOHSA-NEXT: s_lshr_b32 s35, s7, 24 3292; GFX6-NOHSA-NEXT: s_bfe_u32 s36, s7, 0x80008 3293; GFX6-NOHSA-NEXT: s_lshr_b32 s37, s8, 24 3294; GFX6-NOHSA-NEXT: s_bfe_u32 s38, s8, 0x80008 3295; GFX6-NOHSA-NEXT: s_lshr_b32 s39, s9, 24 3296; GFX6-NOHSA-NEXT: s_bfe_u32 s40, s9, 0x80008 3297; GFX6-NOHSA-NEXT: s_lshr_b32 s41, s10, 24 3298; GFX6-NOHSA-NEXT: s_bfe_u32 s42, s10, 0x80008 3299; GFX6-NOHSA-NEXT: s_lshr_b32 s43, s11, 24 3300; GFX6-NOHSA-NEXT: s_bfe_u32 s44, s11, 0x80008 3301; GFX6-NOHSA-NEXT: s_lshr_b32 s45, s12, 24 3302; GFX6-NOHSA-NEXT: s_bfe_u32 s46, s12, 0x80008 3303; GFX6-NOHSA-NEXT: s_lshr_b32 s47, s13, 24 3304; GFX6-NOHSA-NEXT: s_bfe_u32 s48, s13, 0x80008 3305; GFX6-NOHSA-NEXT: s_lshr_b32 s49, s14, 24 3306; GFX6-NOHSA-NEXT: s_bfe_u32 s50, s14, 0x80008 3307; GFX6-NOHSA-NEXT: s_lshr_b32 s51, s15, 24 3308; GFX6-NOHSA-NEXT: s_bfe_u32 s52, s15, 0x80008 3309; GFX6-NOHSA-NEXT: s_and_b32 s26, s0, 0xff 3310; GFX6-NOHSA-NEXT: s_bfe_u32 s25, s0, 0x80010 3311; GFX6-NOHSA-NEXT: s_and_b32 s53, s1, 0xff 3312; GFX6-NOHSA-NEXT: s_bfe_u32 s54, s1, 0x80010 3313; GFX6-NOHSA-NEXT: s_and_b32 s55, s2, 0xff 3314; GFX6-NOHSA-NEXT: s_bfe_u32 s56, s2, 0x80010 3315; GFX6-NOHSA-NEXT: s_and_b32 s57, s3, 0xff 3316; GFX6-NOHSA-NEXT: s_bfe_u32 s58, s3, 0x80010 3317; GFX6-NOHSA-NEXT: s_and_b32 s59, s4, 0xff 3318; GFX6-NOHSA-NEXT: s_bfe_u32 s4, s4, 0x80010 3319; GFX6-NOHSA-NEXT: s_and_b32 s60, s5, 0xff 3320; GFX6-NOHSA-NEXT: s_bfe_u32 s5, s5, 0x80010 3321; GFX6-NOHSA-NEXT: s_and_b32 s61, s6, 0xff 3322; GFX6-NOHSA-NEXT: s_bfe_u32 s6, s6, 0x80010 3323; GFX6-NOHSA-NEXT: s_and_b32 s62, s7, 0xff 3324; GFX6-NOHSA-NEXT: s_bfe_u32 s7, s7, 0x80010 3325; GFX6-NOHSA-NEXT: s_and_b32 s63, s8, 0xff 3326; GFX6-NOHSA-NEXT: s_bfe_u32 s8, s8, 0x80010 3327; GFX6-NOHSA-NEXT: s_and_b32 s64, s9, 0xff 3328; GFX6-NOHSA-NEXT: s_bfe_u32 s9, s9, 0x80010 3329; GFX6-NOHSA-NEXT: s_and_b32 s65, s10, 0xff 3330; GFX6-NOHSA-NEXT: s_and_b32 s66, s11, 0xff 3331; GFX6-NOHSA-NEXT: s_bfe_u32 s11, s11, 0x80010 3332; GFX6-NOHSA-NEXT: s_and_b32 s67, s12, 0xff 3333; GFX6-NOHSA-NEXT: s_bfe_u32 s12, s12, 0x80010 3334; GFX6-NOHSA-NEXT: s_and_b32 s68, s13, 0xff 3335; GFX6-NOHSA-NEXT: s_bfe_u32 s13, s13, 0x80010 3336; GFX6-NOHSA-NEXT: s_and_b32 s69, s14, 0xff 3337; GFX6-NOHSA-NEXT: s_bfe_u32 s14, s14, 0x80010 3338; GFX6-NOHSA-NEXT: s_and_b32 s70, s15, 0xff 3339; GFX6-NOHSA-NEXT: s_bfe_u32 s15, s15, 0x80010 3340; GFX6-NOHSA-NEXT: s_bfe_u32 s10, s10, 0x80010 3341; GFX6-NOHSA-NEXT: s_mov_b32 s0, s16 3342; GFX6-NOHSA-NEXT: s_mov_b32 s1, s17 3343; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 3344; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 3345; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s70 3346; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s52 3347; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s15 3348; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s51 3349; GFX6-NOHSA-NEXT: v_mov_b32_e32 v4, s69 3350; GFX6-NOHSA-NEXT: v_mov_b32_e32 v5, s50 3351; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s14 3352; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s49 3353; GFX6-NOHSA-NEXT: v_mov_b32_e32 v8, s68 3354; GFX6-NOHSA-NEXT: v_mov_b32_e32 v9, s48 3355; GFX6-NOHSA-NEXT: v_mov_b32_e32 v10, s13 3356; GFX6-NOHSA-NEXT: v_mov_b32_e32 v11, s47 3357; GFX6-NOHSA-NEXT: v_mov_b32_e32 v12, s67 3358; GFX6-NOHSA-NEXT: v_mov_b32_e32 v13, s46 3359; GFX6-NOHSA-NEXT: v_mov_b32_e32 v14, s12 3360; GFX6-NOHSA-NEXT: v_mov_b32_e32 v15, s45 3361; GFX6-NOHSA-NEXT: v_mov_b32_e32 v16, s66 3362; GFX6-NOHSA-NEXT: v_mov_b32_e32 v17, s44 3363; GFX6-NOHSA-NEXT: v_mov_b32_e32 v18, s11 3364; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 3365; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 3366; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s65 3367; GFX6-NOHSA-NEXT: v_mov_b32_e32 v19, s43 3368; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s42 3369; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s10 3370; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s41 3371; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224 3372; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208 3373; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192 3374; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176 3375; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 3376; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 3377; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s64 3378; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s40 3379; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s9 3380; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s39 3381; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 3382; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 3383; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s63 3384; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s38 3385; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s8 3386; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s37 3387; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 3388; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 3389; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s62 3390; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s36 3391; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s7 3392; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s35 3393; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 3394; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 3395; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s61 3396; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s34 3397; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s6 3398; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s33 3399; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 3400; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 3401; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s60 3402; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s31 3403; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s5 3404; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s30 3405; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 3406; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 3407; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s59 3408; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s29 3409; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s4 3410; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s28 3411; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 3412; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 3413; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s57 3414; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s27 3415; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s58 3416; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s24 3417; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 3418; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 3419; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s55 3420; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s23 3421; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s56 3422; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s22 3423; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 3424; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 3425; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s53 3426; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s21 3427; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s54 3428; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s20 3429; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 3430; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 3431; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s26 3432; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s19 3433; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s25 3434; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s18 3435; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 3436; GFX6-NOHSA-NEXT: s_endpgm 3437; 3438; GFX7-HSA-LABEL: constant_zextload_v64i8_to_v64i32: 3439; GFX7-HSA: ; %bb.0: 3440; GFX7-HSA-NEXT: s_load_dwordx4 s[16:19], s[8:9], 0x0 3441; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 3442; GFX7-HSA-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 3443; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 3444; GFX7-HSA-NEXT: s_lshr_b32 s18, s0, 24 3445; GFX7-HSA-NEXT: s_bfe_u32 s19, s0, 0x80008 3446; GFX7-HSA-NEXT: s_lshr_b32 s20, s1, 24 3447; GFX7-HSA-NEXT: s_bfe_u32 s21, s1, 0x80008 3448; GFX7-HSA-NEXT: s_lshr_b32 s22, s2, 24 3449; GFX7-HSA-NEXT: s_bfe_u32 s23, s2, 0x80008 3450; GFX7-HSA-NEXT: s_lshr_b32 s25, s3, 24 3451; GFX7-HSA-NEXT: s_bfe_u32 s26, s3, 0x80008 3452; GFX7-HSA-NEXT: s_lshr_b32 s28, s4, 24 3453; GFX7-HSA-NEXT: s_bfe_u32 s29, s4, 0x80008 3454; GFX7-HSA-NEXT: s_lshr_b32 s31, s5, 24 3455; GFX7-HSA-NEXT: s_bfe_u32 s33, s5, 0x80008 3456; GFX7-HSA-NEXT: s_lshr_b32 s35, s6, 24 3457; GFX7-HSA-NEXT: s_bfe_u32 s37, s6, 0x80008 3458; GFX7-HSA-NEXT: s_lshr_b32 s38, s7, 24 3459; GFX7-HSA-NEXT: s_bfe_u32 s39, s7, 0x80008 3460; GFX7-HSA-NEXT: s_lshr_b32 s41, s8, 24 3461; GFX7-HSA-NEXT: s_bfe_u32 s43, s8, 0x80008 3462; GFX7-HSA-NEXT: s_lshr_b32 s44, s9, 24 3463; GFX7-HSA-NEXT: s_bfe_u32 s46, s9, 0x80008 3464; GFX7-HSA-NEXT: s_lshr_b32 s47, s10, 24 3465; GFX7-HSA-NEXT: s_bfe_u32 s48, s10, 0x80008 3466; GFX7-HSA-NEXT: s_lshr_b32 s49, s11, 24 3467; GFX7-HSA-NEXT: s_bfe_u32 s50, s11, 0x80008 3468; GFX7-HSA-NEXT: s_lshr_b32 s51, s12, 24 3469; GFX7-HSA-NEXT: s_bfe_u32 s52, s12, 0x80008 3470; GFX7-HSA-NEXT: s_lshr_b32 s53, s13, 24 3471; GFX7-HSA-NEXT: s_bfe_u32 s54, s13, 0x80008 3472; GFX7-HSA-NEXT: s_lshr_b32 s55, s14, 24 3473; GFX7-HSA-NEXT: s_bfe_u32 s56, s14, 0x80008 3474; GFX7-HSA-NEXT: s_lshr_b32 s57, s15, 24 3475; GFX7-HSA-NEXT: s_bfe_u32 s58, s15, 0x80008 3476; GFX7-HSA-NEXT: s_and_b32 s24, s0, 0xff 3477; GFX7-HSA-NEXT: s_bfe_u32 s0, s0, 0x80010 3478; GFX7-HSA-NEXT: s_and_b32 s27, s1, 0xff 3479; GFX7-HSA-NEXT: s_bfe_u32 s1, s1, 0x80010 3480; GFX7-HSA-NEXT: s_and_b32 s30, s2, 0xff 3481; GFX7-HSA-NEXT: s_bfe_u32 s2, s2, 0x80010 3482; GFX7-HSA-NEXT: s_and_b32 s34, s3, 0xff 3483; GFX7-HSA-NEXT: s_bfe_u32 s3, s3, 0x80010 3484; GFX7-HSA-NEXT: s_and_b32 s36, s4, 0xff 3485; GFX7-HSA-NEXT: s_bfe_u32 s4, s4, 0x80010 3486; GFX7-HSA-NEXT: s_and_b32 s40, s5, 0xff 3487; GFX7-HSA-NEXT: s_bfe_u32 s5, s5, 0x80010 3488; GFX7-HSA-NEXT: s_and_b32 s42, s6, 0xff 3489; GFX7-HSA-NEXT: s_bfe_u32 s6, s6, 0x80010 3490; GFX7-HSA-NEXT: s_and_b32 s45, s7, 0xff 3491; GFX7-HSA-NEXT: s_bfe_u32 s7, s7, 0x80010 3492; GFX7-HSA-NEXT: s_and_b32 s59, s8, 0xff 3493; GFX7-HSA-NEXT: s_bfe_u32 s60, s8, 0x80010 3494; GFX7-HSA-NEXT: s_and_b32 s61, s9, 0xff 3495; GFX7-HSA-NEXT: s_bfe_u32 s62, s9, 0x80010 3496; GFX7-HSA-NEXT: s_and_b32 s63, s10, 0xff 3497; GFX7-HSA-NEXT: s_bfe_u32 s10, s10, 0x80010 3498; GFX7-HSA-NEXT: s_and_b32 s64, s11, 0xff 3499; GFX7-HSA-NEXT: s_bfe_u32 s11, s11, 0x80010 3500; GFX7-HSA-NEXT: s_and_b32 s65, s12, 0xff 3501; GFX7-HSA-NEXT: s_bfe_u32 s12, s12, 0x80010 3502; GFX7-HSA-NEXT: s_and_b32 s66, s13, 0xff 3503; GFX7-HSA-NEXT: s_bfe_u32 s13, s13, 0x80010 3504; GFX7-HSA-NEXT: s_and_b32 s67, s14, 0xff 3505; GFX7-HSA-NEXT: s_bfe_u32 s14, s14, 0x80010 3506; GFX7-HSA-NEXT: s_and_b32 s68, s15, 0xff 3507; GFX7-HSA-NEXT: s_bfe_u32 s15, s15, 0x80010 3508; GFX7-HSA-NEXT: s_add_u32 s8, s16, 0xf0 3509; GFX7-HSA-NEXT: s_addc_u32 s9, s17, 0 3510; GFX7-HSA-NEXT: v_mov_b32_e32 v20, s9 3511; GFX7-HSA-NEXT: v_mov_b32_e32 v19, s8 3512; GFX7-HSA-NEXT: s_add_u32 s8, s16, 0xe0 3513; GFX7-HSA-NEXT: s_addc_u32 s9, s17, 0 3514; GFX7-HSA-NEXT: v_mov_b32_e32 v22, s9 3515; GFX7-HSA-NEXT: v_mov_b32_e32 v21, s8 3516; GFX7-HSA-NEXT: s_add_u32 s8, s16, 0xd0 3517; GFX7-HSA-NEXT: s_addc_u32 s9, s17, 0 3518; GFX7-HSA-NEXT: v_mov_b32_e32 v24, s9 3519; GFX7-HSA-NEXT: v_mov_b32_e32 v23, s8 3520; GFX7-HSA-NEXT: s_add_u32 s8, s16, 0xc0 3521; GFX7-HSA-NEXT: s_addc_u32 s9, s17, 0 3522; GFX7-HSA-NEXT: v_mov_b32_e32 v26, s9 3523; GFX7-HSA-NEXT: v_mov_b32_e32 v25, s8 3524; GFX7-HSA-NEXT: s_add_u32 s8, s16, 0xb0 3525; GFX7-HSA-NEXT: s_addc_u32 s9, s17, 0 3526; GFX7-HSA-NEXT: v_mov_b32_e32 v28, s9 3527; GFX7-HSA-NEXT: v_mov_b32_e32 v27, s8 3528; GFX7-HSA-NEXT: s_add_u32 s8, s16, 0xa0 3529; GFX7-HSA-NEXT: v_mov_b32_e32 v8, s66 3530; GFX7-HSA-NEXT: v_mov_b32_e32 v9, s54 3531; GFX7-HSA-NEXT: v_mov_b32_e32 v10, s13 3532; GFX7-HSA-NEXT: v_mov_b32_e32 v11, s53 3533; GFX7-HSA-NEXT: s_addc_u32 s9, s17, 0 3534; GFX7-HSA-NEXT: flat_store_dwordx4 v[23:24], v[8:11] 3535; GFX7-HSA-NEXT: v_mov_b32_e32 v12, s65 3536; GFX7-HSA-NEXT: v_mov_b32_e32 v10, s9 3537; GFX7-HSA-NEXT: v_mov_b32_e32 v9, s8 3538; GFX7-HSA-NEXT: s_add_u32 s8, s16, 0x90 3539; GFX7-HSA-NEXT: v_mov_b32_e32 v13, s52 3540; GFX7-HSA-NEXT: v_mov_b32_e32 v14, s12 3541; GFX7-HSA-NEXT: v_mov_b32_e32 v15, s51 3542; GFX7-HSA-NEXT: s_addc_u32 s9, s17, 0 3543; GFX7-HSA-NEXT: flat_store_dwordx4 v[25:26], v[12:15] 3544; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s68 3545; GFX7-HSA-NEXT: v_mov_b32_e32 v13, s9 3546; GFX7-HSA-NEXT: v_mov_b32_e32 v12, s8 3547; GFX7-HSA-NEXT: s_add_u32 s8, s16, 0x80 3548; GFX7-HSA-NEXT: s_addc_u32 s9, s17, 0 3549; GFX7-HSA-NEXT: v_mov_b32_e32 v15, s9 3550; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s58 3551; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s15 3552; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s57 3553; GFX7-HSA-NEXT: v_mov_b32_e32 v14, s8 3554; GFX7-HSA-NEXT: s_add_u32 s8, s16, 0x70 3555; GFX7-HSA-NEXT: flat_store_dwordx4 v[19:20], v[0:3] 3556; GFX7-HSA-NEXT: s_addc_u32 s9, s17, 0 3557; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s63 3558; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s48 3559; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s10 3560; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s47 3561; GFX7-HSA-NEXT: flat_store_dwordx4 v[9:10], v[0:3] 3562; GFX7-HSA-NEXT: v_mov_b32_e32 v16, s64 3563; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s6 3564; GFX7-HSA-NEXT: s_add_u32 s6, s16, 0x60 3565; GFX7-HSA-NEXT: v_mov_b32_e32 v17, s50 3566; GFX7-HSA-NEXT: v_mov_b32_e32 v18, s11 3567; GFX7-HSA-NEXT: v_mov_b32_e32 v19, s49 3568; GFX7-HSA-NEXT: v_mov_b32_e32 v10, s7 3569; GFX7-HSA-NEXT: s_addc_u32 s7, s17, 0 3570; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s67 3571; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s56 3572; GFX7-HSA-NEXT: v_mov_b32_e32 v6, s14 3573; GFX7-HSA-NEXT: v_mov_b32_e32 v7, s55 3574; GFX7-HSA-NEXT: flat_store_dwordx4 v[27:28], v[16:19] 3575; GFX7-HSA-NEXT: v_mov_b32_e32 v20, s61 3576; GFX7-HSA-NEXT: v_mov_b32_e32 v19, s7 3577; GFX7-HSA-NEXT: flat_store_dwordx4 v[21:22], v[4:7] 3578; GFX7-HSA-NEXT: v_mov_b32_e32 v21, s46 3579; GFX7-HSA-NEXT: v_mov_b32_e32 v22, s62 3580; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s59 3581; GFX7-HSA-NEXT: v_mov_b32_e32 v23, s44 3582; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s43 3583; GFX7-HSA-NEXT: v_mov_b32_e32 v17, s9 3584; GFX7-HSA-NEXT: v_mov_b32_e32 v18, s6 3585; GFX7-HSA-NEXT: s_add_u32 s6, s16, 0x50 3586; GFX7-HSA-NEXT: v_mov_b32_e32 v6, s60 3587; GFX7-HSA-NEXT: v_mov_b32_e32 v8, s45 3588; GFX7-HSA-NEXT: v_mov_b32_e32 v7, s41 3589; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s42 3590; GFX7-HSA-NEXT: v_mov_b32_e32 v9, s39 3591; GFX7-HSA-NEXT: v_mov_b32_e32 v11, s38 3592; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s37 3593; GFX7-HSA-NEXT: v_mov_b32_e32 v16, s8 3594; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s35 3595; GFX7-HSA-NEXT: flat_store_dwordx4 v[12:13], v[20:23] 3596; GFX7-HSA-NEXT: flat_store_dwordx4 v[14:15], v[4:7] 3597; GFX7-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 3598; GFX7-HSA-NEXT: flat_store_dwordx4 v[18:19], v[0:3] 3599; GFX7-HSA-NEXT: s_addc_u32 s7, s17, 0 3600; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s6 3601; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s40 3602; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s33 3603; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s5 3604; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s31 3605; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s7 3606; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3607; GFX7-HSA-NEXT: s_nop 0 3608; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s4 3609; GFX7-HSA-NEXT: s_add_u32 s4, s16, 64 3610; GFX7-HSA-NEXT: s_addc_u32 s5, s17, 0 3611; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s4 3612; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s36 3613; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s29 3614; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s28 3615; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s5 3616; GFX7-HSA-NEXT: s_add_u32 s4, s16, 48 3617; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3618; GFX7-HSA-NEXT: s_addc_u32 s5, s17, 0 3619; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s4 3620; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s34 3621; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s26 3622; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s3 3623; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s25 3624; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s5 3625; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3626; GFX7-HSA-NEXT: s_nop 0 3627; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s2 3628; GFX7-HSA-NEXT: s_add_u32 s2, s16, 32 3629; GFX7-HSA-NEXT: s_addc_u32 s3, s17, 0 3630; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 3631; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 3632; GFX7-HSA-NEXT: s_add_u32 s2, s16, 16 3633; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s30 3634; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s23 3635; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s22 3636; GFX7-HSA-NEXT: s_addc_u32 s3, s17, 0 3637; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3638; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 3639; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s27 3640; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s21 3641; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s1 3642; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s20 3643; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 3644; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3645; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s16 3646; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s24 3647; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s19 3648; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s0 3649; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s18 3650; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s17 3651; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3652; GFX7-HSA-NEXT: s_endpgm 3653; 3654; GFX8-NOHSA-LABEL: constant_zextload_v64i8_to_v64i32: 3655; GFX8-NOHSA: ; %bb.0: 3656; GFX8-NOHSA-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x24 3657; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 3658; GFX8-NOHSA-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 3659; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 3660; GFX8-NOHSA-NEXT: s_lshr_b32 s18, s0, 24 3661; GFX8-NOHSA-NEXT: s_bfe_u32 s19, s0, 0x80008 3662; GFX8-NOHSA-NEXT: s_lshr_b32 s20, s1, 24 3663; GFX8-NOHSA-NEXT: s_bfe_u32 s21, s1, 0x80008 3664; GFX8-NOHSA-NEXT: s_lshr_b32 s22, s2, 24 3665; GFX8-NOHSA-NEXT: s_bfe_u32 s23, s2, 0x80008 3666; GFX8-NOHSA-NEXT: s_lshr_b32 s25, s3, 24 3667; GFX8-NOHSA-NEXT: s_bfe_u32 s26, s3, 0x80008 3668; GFX8-NOHSA-NEXT: s_lshr_b32 s28, s4, 24 3669; GFX8-NOHSA-NEXT: s_bfe_u32 s29, s4, 0x80008 3670; GFX8-NOHSA-NEXT: s_lshr_b32 s31, s5, 24 3671; GFX8-NOHSA-NEXT: s_bfe_u32 s33, s5, 0x80008 3672; GFX8-NOHSA-NEXT: s_lshr_b32 s35, s6, 24 3673; GFX8-NOHSA-NEXT: s_bfe_u32 s36, s6, 0x80008 3674; GFX8-NOHSA-NEXT: s_lshr_b32 s37, s7, 24 3675; GFX8-NOHSA-NEXT: s_bfe_u32 s38, s7, 0x80008 3676; GFX8-NOHSA-NEXT: s_lshr_b32 s39, s8, 24 3677; GFX8-NOHSA-NEXT: s_bfe_u32 s40, s8, 0x80008 3678; GFX8-NOHSA-NEXT: s_lshr_b32 s41, s9, 24 3679; GFX8-NOHSA-NEXT: s_bfe_u32 s42, s9, 0x80008 3680; GFX8-NOHSA-NEXT: s_lshr_b32 s43, s10, 24 3681; GFX8-NOHSA-NEXT: s_bfe_u32 s44, s10, 0x80008 3682; GFX8-NOHSA-NEXT: s_lshr_b32 s45, s11, 24 3683; GFX8-NOHSA-NEXT: s_bfe_u32 s46, s11, 0x80008 3684; GFX8-NOHSA-NEXT: s_lshr_b32 s47, s12, 24 3685; GFX8-NOHSA-NEXT: s_bfe_u32 s48, s12, 0x80008 3686; GFX8-NOHSA-NEXT: s_lshr_b32 s49, s13, 24 3687; GFX8-NOHSA-NEXT: s_bfe_u32 s50, s13, 0x80008 3688; GFX8-NOHSA-NEXT: s_lshr_b32 s51, s14, 24 3689; GFX8-NOHSA-NEXT: s_bfe_u32 s52, s14, 0x80008 3690; GFX8-NOHSA-NEXT: s_lshr_b32 s53, s15, 24 3691; GFX8-NOHSA-NEXT: s_bfe_u32 s54, s15, 0x80008 3692; GFX8-NOHSA-NEXT: s_and_b32 s24, s0, 0xff 3693; GFX8-NOHSA-NEXT: s_bfe_u32 s0, s0, 0x80010 3694; GFX8-NOHSA-NEXT: s_and_b32 s27, s1, 0xff 3695; GFX8-NOHSA-NEXT: s_bfe_u32 s1, s1, 0x80010 3696; GFX8-NOHSA-NEXT: s_and_b32 s30, s2, 0xff 3697; GFX8-NOHSA-NEXT: s_bfe_u32 s2, s2, 0x80010 3698; GFX8-NOHSA-NEXT: s_and_b32 s34, s3, 0xff 3699; GFX8-NOHSA-NEXT: s_bfe_u32 s3, s3, 0x80010 3700; GFX8-NOHSA-NEXT: s_and_b32 s55, s4, 0xff 3701; GFX8-NOHSA-NEXT: s_bfe_u32 s4, s4, 0x80010 3702; GFX8-NOHSA-NEXT: s_and_b32 s56, s5, 0xff 3703; GFX8-NOHSA-NEXT: s_bfe_u32 s5, s5, 0x80010 3704; GFX8-NOHSA-NEXT: s_and_b32 s57, s6, 0xff 3705; GFX8-NOHSA-NEXT: s_bfe_u32 s58, s6, 0x80010 3706; GFX8-NOHSA-NEXT: s_and_b32 s59, s7, 0xff 3707; GFX8-NOHSA-NEXT: s_bfe_u32 s60, s7, 0x80010 3708; GFX8-NOHSA-NEXT: s_and_b32 s61, s8, 0xff 3709; GFX8-NOHSA-NEXT: s_bfe_u32 s8, s8, 0x80010 3710; GFX8-NOHSA-NEXT: s_and_b32 s62, s9, 0xff 3711; GFX8-NOHSA-NEXT: s_bfe_u32 s9, s9, 0x80010 3712; GFX8-NOHSA-NEXT: s_and_b32 s63, s10, 0xff 3713; GFX8-NOHSA-NEXT: s_bfe_u32 s10, s10, 0x80010 3714; GFX8-NOHSA-NEXT: s_and_b32 s64, s11, 0xff 3715; GFX8-NOHSA-NEXT: s_bfe_u32 s11, s11, 0x80010 3716; GFX8-NOHSA-NEXT: s_and_b32 s65, s12, 0xff 3717; GFX8-NOHSA-NEXT: s_bfe_u32 s12, s12, 0x80010 3718; GFX8-NOHSA-NEXT: s_and_b32 s66, s13, 0xff 3719; GFX8-NOHSA-NEXT: s_bfe_u32 s13, s13, 0x80010 3720; GFX8-NOHSA-NEXT: s_and_b32 s67, s14, 0xff 3721; GFX8-NOHSA-NEXT: s_bfe_u32 s14, s14, 0x80010 3722; GFX8-NOHSA-NEXT: s_and_b32 s6, s15, 0xff 3723; GFX8-NOHSA-NEXT: s_bfe_u32 s7, s15, 0x80010 3724; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s6 3725; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0xf0 3726; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s7 3727; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 3728; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 3729; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s54 3730; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s53 3731; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 3732; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0xe0 3733; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3734; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 3735; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 3736; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s67 3737; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s52 3738; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s14 3739; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s51 3740; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 3741; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0xd0 3742; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3743; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 3744; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 3745; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s66 3746; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s50 3747; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s13 3748; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s49 3749; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 3750; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0xc0 3751; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3752; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 3753; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 3754; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s65 3755; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s48 3756; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s12 3757; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s47 3758; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 3759; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0xb0 3760; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3761; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 3762; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 3763; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s64 3764; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s46 3765; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s11 3766; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s45 3767; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 3768; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0xa0 3769; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3770; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 3771; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 3772; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s63 3773; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s44 3774; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s10 3775; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s43 3776; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 3777; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0x90 3778; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3779; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 3780; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 3781; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s62 3782; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s42 3783; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s9 3784; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s41 3785; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 3786; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0x80 3787; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3788; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 3789; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 3790; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s61 3791; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s40 3792; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s8 3793; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s39 3794; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 3795; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0x70 3796; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3797; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 3798; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 3799; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s59 3800; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s38 3801; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s60 3802; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s37 3803; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 3804; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0x60 3805; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3806; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 3807; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 3808; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s57 3809; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s36 3810; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s58 3811; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s35 3812; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 3813; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0x50 3814; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3815; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 3816; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 3817; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s56 3818; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s33 3819; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s5 3820; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s31 3821; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 3822; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3823; GFX8-NOHSA-NEXT: s_nop 0 3824; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s4 3825; GFX8-NOHSA-NEXT: s_add_u32 s4, s16, 64 3826; GFX8-NOHSA-NEXT: s_addc_u32 s5, s17, 0 3827; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s4 3828; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s55 3829; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s29 3830; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s28 3831; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s5 3832; GFX8-NOHSA-NEXT: s_add_u32 s4, s16, 48 3833; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3834; GFX8-NOHSA-NEXT: s_addc_u32 s5, s17, 0 3835; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s4 3836; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s34 3837; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s26 3838; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s3 3839; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s25 3840; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s5 3841; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3842; GFX8-NOHSA-NEXT: s_nop 0 3843; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s2 3844; GFX8-NOHSA-NEXT: s_add_u32 s2, s16, 32 3845; GFX8-NOHSA-NEXT: s_addc_u32 s3, s17, 0 3846; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 3847; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 3848; GFX8-NOHSA-NEXT: s_add_u32 s2, s16, 16 3849; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s30 3850; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s23 3851; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s22 3852; GFX8-NOHSA-NEXT: s_addc_u32 s3, s17, 0 3853; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3854; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 3855; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s27 3856; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s21 3857; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s1 3858; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s20 3859; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 3860; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3861; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s16 3862; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s24 3863; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s19 3864; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s0 3865; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s18 3866; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s17 3867; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3868; GFX8-NOHSA-NEXT: s_endpgm 3869; 3870; EG-LABEL: constant_zextload_v64i8_to_v64i32: 3871; EG: ; %bb.0: 3872; EG-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[] 3873; EG-NEXT: TEX 1 @22 3874; EG-NEXT: ALU 59, @31, KC0[CB0:0-32], KC1[] 3875; EG-NEXT: TEX 1 @26 3876; EG-NEXT: ALU 88, @91, KC0[CB0:0-32], KC1[] 3877; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T50.X, 0 3878; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T49.X, 0 3879; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T47.X, 0 3880; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T32.X, 0 3881; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T44.X, 0 3882; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T42.X, 0 3883; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T40.X, 0 3884; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T33.X, 0 3885; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T37.X, 0 3886; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T35.X, 0 3887; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T31.X, 0 3888; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T22.X, 0 3889; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T29.X, 0 3890; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T27.X, 0 3891; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T25.X, 0 3892; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T23.X, 1 3893; EG-NEXT: CF_END 3894; EG-NEXT: Fetch clause starting at 22: 3895; EG-NEXT: VTX_READ_128 T22.XYZW, T21.X, 16, #1 3896; EG-NEXT: VTX_READ_128 T23.XYZW, T21.X, 0, #1 3897; EG-NEXT: Fetch clause starting at 26: 3898; EG-NEXT: VTX_READ_128 T32.XYZW, T21.X, 48, #1 3899; EG-NEXT: VTX_READ_128 T33.XYZW, T21.X, 32, #1 3900; EG-NEXT: ALU clause starting at 30: 3901; EG-NEXT: MOV * T21.X, KC0[2].Z, 3902; EG-NEXT: ALU clause starting at 31: 3903; EG-NEXT: MOV * T0.W, literal.x, 3904; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 3905; EG-NEXT: BFE_UINT * T19.Z, T23.X, literal.x, PV.W, 3906; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3907; EG-NEXT: BFE_UINT T19.Y, T23.X, literal.x, T0.W, 3908; EG-NEXT: BFE_UINT T20.Z, T23.Y, literal.y, T0.W, 3909; EG-NEXT: LSHR * T19.W, T23.X, literal.z, 3910; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44) 3911; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 3912; EG-NEXT: AND_INT T19.X, T23.X, literal.x, 3913; EG-NEXT: BFE_UINT T20.Y, T23.Y, literal.y, T0.W, 3914; EG-NEXT: LSHR * T23.X, KC0[2].Y, literal.z, 3915; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44) 3916; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 3917; EG-NEXT: BFE_UINT T24.Z, T23.Z, literal.x, T0.W, 3918; EG-NEXT: LSHR * T20.W, T23.Y, literal.y, 3919; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) 3920; EG-NEXT: AND_INT T20.X, T23.Y, literal.x, 3921; EG-NEXT: BFE_UINT T24.Y, T23.Z, literal.y, T0.W, 3922; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 3923; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44) 3924; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3925; EG-NEXT: LSHR T25.X, PV.W, literal.x, 3926; EG-NEXT: BFE_UINT T26.Z, T23.W, literal.y, T0.W, 3927; EG-NEXT: LSHR T24.W, T23.Z, literal.z, 3928; EG-NEXT: AND_INT * T24.X, T23.Z, literal.w, 3929; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3930; EG-NEXT: 24(3.363116e-44), 255(3.573311e-43) 3931; EG-NEXT: BFE_UINT T26.Y, T23.W, literal.x, T0.W, 3932; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 3933; EG-NEXT: 8(1.121039e-44), 32(4.484155e-44) 3934; EG-NEXT: LSHR T27.X, PV.W, literal.x, 3935; EG-NEXT: BFE_UINT T28.Z, T22.X, literal.y, T0.W, BS:VEC_021/SCL_122 3936; EG-NEXT: LSHR T26.W, T23.W, literal.z, 3937; EG-NEXT: AND_INT * T26.X, T23.W, literal.w, 3938; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3939; EG-NEXT: 24(3.363116e-44), 255(3.573311e-43) 3940; EG-NEXT: BFE_UINT T28.Y, T22.X, literal.x, T0.W, 3941; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 3942; EG-NEXT: 8(1.121039e-44), 48(6.726233e-44) 3943; EG-NEXT: LSHR T29.X, PV.W, literal.x, 3944; EG-NEXT: BFE_UINT T30.Z, T22.Y, literal.y, T0.W, 3945; EG-NEXT: LSHR T28.W, T22.X, literal.z, 3946; EG-NEXT: AND_INT * T28.X, T22.X, literal.w, 3947; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3948; EG-NEXT: 24(3.363116e-44), 255(3.573311e-43) 3949; EG-NEXT: BFE_UINT T30.Y, T22.Y, literal.x, T0.W, 3950; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 3951; EG-NEXT: 8(1.121039e-44), 64(8.968310e-44) 3952; EG-NEXT: LSHR T22.X, PV.W, literal.x, 3953; EG-NEXT: LSHR T30.W, T22.Y, literal.y, 3954; EG-NEXT: AND_INT * T30.X, T22.Y, literal.z, 3955; EG-NEXT: 2(2.802597e-45), 24(3.363116e-44) 3956; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 3957; EG-NEXT: BFE_UINT T21.Z, T22.Z, literal.x, T0.W, 3958; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 3959; EG-NEXT: 16(2.242078e-44), 80(1.121039e-43) 3960; EG-NEXT: LSHR T31.X, PV.W, literal.x, 3961; EG-NEXT: BFE_UINT * T21.Y, T22.Z, literal.y, T0.W, 3962; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44) 3963; EG-NEXT: ALU clause starting at 91: 3964; EG-NEXT: BFE_UINT T34.Z, T22.W, literal.x, T0.W, 3965; EG-NEXT: LSHR * T21.W, T22.Z, literal.y, 3966; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) 3967; EG-NEXT: AND_INT T21.X, T22.Z, literal.x, 3968; EG-NEXT: BFE_UINT T34.Y, T22.W, literal.y, T0.W, 3969; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 3970; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44) 3971; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) 3972; EG-NEXT: LSHR T35.X, PV.W, literal.x, 3973; EG-NEXT: BFE_UINT T36.Z, T33.X, literal.y, T0.W, BS:VEC_021/SCL_122 3974; EG-NEXT: LSHR T34.W, T22.W, literal.z, 3975; EG-NEXT: AND_INT * T34.X, T22.W, literal.w, 3976; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3977; EG-NEXT: 24(3.363116e-44), 255(3.573311e-43) 3978; EG-NEXT: BFE_UINT T36.Y, T33.X, literal.x, T0.W, 3979; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 3980; EG-NEXT: 8(1.121039e-44), 112(1.569454e-43) 3981; EG-NEXT: LSHR T37.X, PV.W, literal.x, 3982; EG-NEXT: BFE_UINT T38.Z, T33.Y, literal.y, T0.W, 3983; EG-NEXT: LSHR T36.W, T33.X, literal.z, 3984; EG-NEXT: AND_INT * T36.X, T33.X, literal.w, 3985; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3986; EG-NEXT: 24(3.363116e-44), 255(3.573311e-43) 3987; EG-NEXT: BFE_UINT T38.Y, T33.Y, literal.x, T0.W, 3988; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 3989; EG-NEXT: 8(1.121039e-44), 128(1.793662e-43) 3990; EG-NEXT: LSHR T33.X, PV.W, literal.x, 3991; EG-NEXT: BFE_UINT T39.Z, T33.Z, literal.y, T0.W, 3992; EG-NEXT: LSHR T38.W, T33.Y, literal.z, 3993; EG-NEXT: AND_INT * T38.X, T33.Y, literal.w, 3994; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3995; EG-NEXT: 24(3.363116e-44), 255(3.573311e-43) 3996; EG-NEXT: BFE_UINT T39.Y, T33.Z, literal.x, T0.W, 3997; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 3998; EG-NEXT: 8(1.121039e-44), 144(2.017870e-43) 3999; EG-NEXT: LSHR T40.X, PV.W, literal.x, 4000; EG-NEXT: BFE_UINT T41.Z, T33.W, literal.y, T0.W, 4001; EG-NEXT: LSHR T39.W, T33.Z, literal.z, 4002; EG-NEXT: AND_INT * T39.X, T33.Z, literal.w, 4003; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4004; EG-NEXT: 24(3.363116e-44), 255(3.573311e-43) 4005; EG-NEXT: BFE_UINT T41.Y, T33.W, literal.x, T0.W, 4006; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 4007; EG-NEXT: 8(1.121039e-44), 160(2.242078e-43) 4008; EG-NEXT: LSHR T42.X, PV.W, literal.x, 4009; EG-NEXT: BFE_UINT T43.Z, T32.X, literal.y, T0.W, BS:VEC_021/SCL_122 4010; EG-NEXT: LSHR T41.W, T33.W, literal.z, 4011; EG-NEXT: AND_INT * T41.X, T33.W, literal.w, 4012; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4013; EG-NEXT: 24(3.363116e-44), 255(3.573311e-43) 4014; EG-NEXT: BFE_UINT T43.Y, T32.X, literal.x, T0.W, 4015; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 4016; EG-NEXT: 8(1.121039e-44), 176(2.466285e-43) 4017; EG-NEXT: LSHR T44.X, PV.W, literal.x, 4018; EG-NEXT: BFE_UINT T45.Z, T32.Y, literal.y, T0.W, 4019; EG-NEXT: LSHR T43.W, T32.X, literal.z, 4020; EG-NEXT: AND_INT * T43.X, T32.X, literal.w, 4021; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4022; EG-NEXT: 24(3.363116e-44), 255(3.573311e-43) 4023; EG-NEXT: BFE_UINT T45.Y, T32.Y, literal.x, T0.W, 4024; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 4025; EG-NEXT: 8(1.121039e-44), 192(2.690493e-43) 4026; EG-NEXT: LSHR T32.X, PV.W, literal.x, 4027; EG-NEXT: BFE_UINT T46.Z, T32.Z, literal.y, T0.W, 4028; EG-NEXT: LSHR T45.W, T32.Y, literal.z, 4029; EG-NEXT: AND_INT * T45.X, T32.Y, literal.w, 4030; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4031; EG-NEXT: 24(3.363116e-44), 255(3.573311e-43) 4032; EG-NEXT: BFE_UINT T46.Y, T32.Z, literal.x, T0.W, 4033; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 4034; EG-NEXT: 8(1.121039e-44), 208(2.914701e-43) 4035; EG-NEXT: LSHR T47.X, PV.W, literal.x, 4036; EG-NEXT: BFE_UINT T48.Z, T32.W, literal.y, T0.W, 4037; EG-NEXT: LSHR T46.W, T32.Z, literal.z, 4038; EG-NEXT: AND_INT * T46.X, T32.Z, literal.w, 4039; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4040; EG-NEXT: 24(3.363116e-44), 255(3.573311e-43) 4041; EG-NEXT: BFE_UINT T48.Y, T32.W, literal.x, T0.W, 4042; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4043; EG-NEXT: 8(1.121039e-44), 224(3.138909e-43) 4044; EG-NEXT: LSHR T49.X, PV.W, literal.x, 4045; EG-NEXT: LSHR T48.W, T32.W, literal.y, 4046; EG-NEXT: AND_INT * T48.X, T32.W, literal.z, 4047; EG-NEXT: 2(2.802597e-45), 24(3.363116e-44) 4048; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 4049; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 4050; EG-NEXT: 240(3.363116e-43), 0(0.000000e+00) 4051; EG-NEXT: LSHR * T50.X, PV.W, literal.x, 4052; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4053; 4054; GFX12-LABEL: constant_zextload_v64i8_to_v64i32: 4055; GFX12: ; %bb.0: 4056; GFX12-NEXT: s_load_b128 s[16:19], s[4:5], 0x24 4057; GFX12-NEXT: s_wait_kmcnt 0x0 4058; GFX12-NEXT: s_load_b512 s[0:15], s[18:19], 0x0 4059; GFX12-NEXT: s_wait_kmcnt 0x0 4060; GFX12-NEXT: s_lshr_b32 s49, s15, 24 4061; GFX12-NEXT: s_bfe_u32 s50, s15, 0x80008 4062; GFX12-NEXT: s_and_b32 s66, s15, 0xff 4063; GFX12-NEXT: s_bfe_u32 s15, s15, 0x80010 4064; GFX12-NEXT: s_lshr_b32 s47, s14, 24 4065; GFX12-NEXT: s_bfe_u32 s48, s14, 0x80008 4066; GFX12-NEXT: s_and_b32 s65, s14, 0xff 4067; GFX12-NEXT: s_bfe_u32 s14, s14, 0x80010 4068; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s50 4069; GFX12-NEXT: s_lshr_b32 s45, s13, 24 4070; GFX12-NEXT: s_bfe_u32 s46, s13, 0x80008 4071; GFX12-NEXT: s_and_b32 s64, s13, 0xff 4072; GFX12-NEXT: s_bfe_u32 s13, s13, 0x80010 4073; GFX12-NEXT: v_dual_mov_b32 v0, s66 :: v_dual_mov_b32 v3, s49 4074; GFX12-NEXT: v_dual_mov_b32 v2, s15 :: v_dual_mov_b32 v5, s48 4075; GFX12-NEXT: s_lshr_b32 s43, s12, 24 4076; GFX12-NEXT: s_bfe_u32 s44, s12, 0x80008 4077; GFX12-NEXT: s_and_b32 s63, s12, 0xff 4078; GFX12-NEXT: s_bfe_u32 s12, s12, 0x80010 4079; GFX12-NEXT: v_dual_mov_b32 v4, s65 :: v_dual_mov_b32 v7, s47 4080; GFX12-NEXT: v_dual_mov_b32 v6, s14 :: v_dual_mov_b32 v9, s46 4081; GFX12-NEXT: v_dual_mov_b32 v8, s64 :: v_dual_mov_b32 v11, s45 4082; GFX12-NEXT: v_dual_mov_b32 v10, s13 :: v_dual_mov_b32 v13, s44 4083; GFX12-NEXT: s_lshr_b32 s41, s11, 24 4084; GFX12-NEXT: s_bfe_u32 s42, s11, 0x80008 4085; GFX12-NEXT: s_and_b32 s62, s11, 0xff 4086; GFX12-NEXT: v_dual_mov_b32 v12, s63 :: v_dual_mov_b32 v15, s43 4087; GFX12-NEXT: v_mov_b32_e32 v14, s12 4088; GFX12-NEXT: s_bfe_u32 s11, s11, 0x80010 4089; GFX12-NEXT: s_lshr_b32 s39, s10, 24 4090; GFX12-NEXT: s_bfe_u32 s40, s10, 0x80008 4091; GFX12-NEXT: s_and_b32 s61, s10, 0xff 4092; GFX12-NEXT: s_bfe_u32 s10, s10, 0x80010 4093; GFX12-NEXT: s_lshr_b32 s37, s9, 24 4094; GFX12-NEXT: s_bfe_u32 s38, s9, 0x80008 4095; GFX12-NEXT: s_and_b32 s60, s9, 0xff 4096; GFX12-NEXT: s_bfe_u32 s9, s9, 0x80010 4097; GFX12-NEXT: s_clause 0x3 4098; GFX12-NEXT: global_store_b128 v24, v[0:3], s[16:17] offset:240 4099; GFX12-NEXT: global_store_b128 v24, v[4:7], s[16:17] offset:224 4100; GFX12-NEXT: global_store_b128 v24, v[8:11], s[16:17] offset:208 4101; GFX12-NEXT: global_store_b128 v24, v[12:15], s[16:17] offset:192 4102; GFX12-NEXT: v_dual_mov_b32 v1, s42 :: v_dual_mov_b32 v0, s62 4103; GFX12-NEXT: v_dual_mov_b32 v3, s41 :: v_dual_mov_b32 v2, s11 4104; GFX12-NEXT: v_mov_b32_e32 v5, s40 4105; GFX12-NEXT: s_lshr_b32 s35, s8, 24 4106; GFX12-NEXT: s_bfe_u32 s36, s8, 0x80008 4107; GFX12-NEXT: s_and_b32 s59, s8, 0xff 4108; GFX12-NEXT: s_bfe_u32 s8, s8, 0x80010 4109; GFX12-NEXT: v_dual_mov_b32 v4, s61 :: v_dual_mov_b32 v7, s39 4110; GFX12-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v9, s38 4111; GFX12-NEXT: s_lshr_b32 s33, s7, 24 4112; GFX12-NEXT: s_bfe_u32 s34, s7, 0x80008 4113; GFX12-NEXT: s_and_b32 s58, s7, 0xff 4114; GFX12-NEXT: s_bfe_u32 s7, s7, 0x80010 4115; GFX12-NEXT: v_dual_mov_b32 v8, s60 :: v_dual_mov_b32 v11, s37 4116; GFX12-NEXT: v_dual_mov_b32 v10, s9 :: v_dual_mov_b32 v13, s36 4117; GFX12-NEXT: s_lshr_b32 s28, s5, 24 4118; GFX12-NEXT: s_bfe_u32 s29, s5, 0x80008 4119; GFX12-NEXT: s_lshr_b32 s30, s6, 24 4120; GFX12-NEXT: s_bfe_u32 s31, s6, 0x80008 4121; GFX12-NEXT: s_and_b32 s56, s5, 0xff 4122; GFX12-NEXT: s_bfe_u32 s5, s5, 0x80010 4123; GFX12-NEXT: s_and_b32 s57, s6, 0xff 4124; GFX12-NEXT: s_bfe_u32 s6, s6, 0x80010 4125; GFX12-NEXT: v_dual_mov_b32 v12, s59 :: v_dual_mov_b32 v15, s35 4126; GFX12-NEXT: v_dual_mov_b32 v14, s8 :: v_dual_mov_b32 v17, s34 4127; GFX12-NEXT: s_lshr_b32 s26, s4, 24 4128; GFX12-NEXT: s_bfe_u32 s27, s4, 0x80008 4129; GFX12-NEXT: s_and_b32 s55, s4, 0xff 4130; GFX12-NEXT: s_bfe_u32 s4, s4, 0x80010 4131; GFX12-NEXT: v_dual_mov_b32 v16, s58 :: v_dual_mov_b32 v19, s33 4132; GFX12-NEXT: v_dual_mov_b32 v18, s7 :: v_dual_mov_b32 v21, s31 4133; GFX12-NEXT: s_lshr_b32 s24, s3, 24 4134; GFX12-NEXT: s_bfe_u32 s25, s3, 0x80008 4135; GFX12-NEXT: s_and_b32 s54, s3, 0xff 4136; GFX12-NEXT: s_bfe_u32 s3, s3, 0x80010 4137; GFX12-NEXT: v_dual_mov_b32 v20, s57 :: v_dual_mov_b32 v23, s30 4138; GFX12-NEXT: v_mov_b32_e32 v22, s6 4139; GFX12-NEXT: s_clause 0x5 4140; GFX12-NEXT: global_store_b128 v24, v[0:3], s[16:17] offset:176 4141; GFX12-NEXT: global_store_b128 v24, v[4:7], s[16:17] offset:160 4142; GFX12-NEXT: global_store_b128 v24, v[8:11], s[16:17] offset:144 4143; GFX12-NEXT: global_store_b128 v24, v[12:15], s[16:17] offset:128 4144; GFX12-NEXT: global_store_b128 v24, v[16:19], s[16:17] offset:112 4145; GFX12-NEXT: global_store_b128 v24, v[20:23], s[16:17] offset:96 4146; GFX12-NEXT: v_dual_mov_b32 v1, s29 :: v_dual_mov_b32 v0, s56 4147; GFX12-NEXT: v_dual_mov_b32 v3, s28 :: v_dual_mov_b32 v2, s5 4148; GFX12-NEXT: v_mov_b32_e32 v5, s27 4149; GFX12-NEXT: s_lshr_b32 s22, s2, 24 4150; GFX12-NEXT: s_bfe_u32 s23, s2, 0x80008 4151; GFX12-NEXT: s_and_b32 s53, s2, 0xff 4152; GFX12-NEXT: s_bfe_u32 s2, s2, 0x80010 4153; GFX12-NEXT: v_dual_mov_b32 v4, s55 :: v_dual_mov_b32 v7, s26 4154; GFX12-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v9, s25 4155; GFX12-NEXT: s_lshr_b32 s20, s1, 24 4156; GFX12-NEXT: s_bfe_u32 s21, s1, 0x80008 4157; GFX12-NEXT: s_and_b32 s52, s1, 0xff 4158; GFX12-NEXT: s_bfe_u32 s1, s1, 0x80010 4159; GFX12-NEXT: v_dual_mov_b32 v8, s54 :: v_dual_mov_b32 v11, s24 4160; GFX12-NEXT: v_dual_mov_b32 v10, s3 :: v_dual_mov_b32 v13, s23 4161; GFX12-NEXT: s_lshr_b32 s18, s0, 24 4162; GFX12-NEXT: s_bfe_u32 s19, s0, 0x80008 4163; GFX12-NEXT: s_and_b32 s51, s0, 0xff 4164; GFX12-NEXT: s_bfe_u32 s0, s0, 0x80010 4165; GFX12-NEXT: v_dual_mov_b32 v12, s53 :: v_dual_mov_b32 v15, s22 4166; GFX12-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v17, s21 4167; GFX12-NEXT: v_dual_mov_b32 v16, s52 :: v_dual_mov_b32 v19, s20 4168; GFX12-NEXT: v_dual_mov_b32 v18, s1 :: v_dual_mov_b32 v21, s19 4169; GFX12-NEXT: s_wait_alu 0xfffe 4170; GFX12-NEXT: v_dual_mov_b32 v20, s51 :: v_dual_mov_b32 v23, s18 4171; GFX12-NEXT: v_mov_b32_e32 v22, s0 4172; GFX12-NEXT: s_clause 0x5 4173; GFX12-NEXT: global_store_b128 v24, v[0:3], s[16:17] offset:80 4174; GFX12-NEXT: global_store_b128 v24, v[4:7], s[16:17] offset:64 4175; GFX12-NEXT: global_store_b128 v24, v[8:11], s[16:17] offset:48 4176; GFX12-NEXT: global_store_b128 v24, v[12:15], s[16:17] offset:32 4177; GFX12-NEXT: global_store_b128 v24, v[16:19], s[16:17] offset:16 4178; GFX12-NEXT: global_store_b128 v24, v[20:23], s[16:17] 4179; GFX12-NEXT: s_endpgm 4180 %load = load <64 x i8>, ptr addrspace(4) %in 4181 %ext = zext <64 x i8> %load to <64 x i32> 4182 store <64 x i32> %ext, ptr addrspace(1) %out 4183 ret void 4184} 4185 4186define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 4187; GFX6-NOHSA-LABEL: constant_sextload_v64i8_to_v64i32: 4188; GFX6-NOHSA: ; %bb.0: 4189; GFX6-NOHSA-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x9 4190; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 4191; GFX6-NOHSA-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 4192; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 4193; GFX6-NOHSA-NEXT: s_ashr_i32 s18, s0, 24 4194; GFX6-NOHSA-NEXT: s_bfe_i32 s19, s0, 0x80010 4195; GFX6-NOHSA-NEXT: s_bfe_i32 s20, s0, 0x80008 4196; GFX6-NOHSA-NEXT: s_sext_i32_i8 s21, s0 4197; GFX6-NOHSA-NEXT: s_ashr_i32 s22, s1, 24 4198; GFX6-NOHSA-NEXT: s_bfe_i32 s23, s1, 0x80010 4199; GFX6-NOHSA-NEXT: s_bfe_i32 s24, s1, 0x80008 4200; GFX6-NOHSA-NEXT: s_sext_i32_i8 s25, s1 4201; GFX6-NOHSA-NEXT: s_ashr_i32 s26, s2, 24 4202; GFX6-NOHSA-NEXT: s_bfe_i32 s27, s2, 0x80010 4203; GFX6-NOHSA-NEXT: s_bfe_i32 s28, s2, 0x80008 4204; GFX6-NOHSA-NEXT: s_sext_i32_i8 s29, s2 4205; GFX6-NOHSA-NEXT: s_ashr_i32 s30, s3, 24 4206; GFX6-NOHSA-NEXT: s_bfe_i32 s31, s3, 0x80010 4207; GFX6-NOHSA-NEXT: s_bfe_i32 s33, s3, 0x80008 4208; GFX6-NOHSA-NEXT: s_sext_i32_i8 s34, s3 4209; GFX6-NOHSA-NEXT: s_ashr_i32 s35, s4, 24 4210; GFX6-NOHSA-NEXT: s_bfe_i32 s36, s4, 0x80010 4211; GFX6-NOHSA-NEXT: s_bfe_i32 s37, s4, 0x80008 4212; GFX6-NOHSA-NEXT: s_sext_i32_i8 s4, s4 4213; GFX6-NOHSA-NEXT: s_ashr_i32 s38, s5, 24 4214; GFX6-NOHSA-NEXT: s_bfe_i32 s39, s5, 0x80010 4215; GFX6-NOHSA-NEXT: s_bfe_i32 s40, s5, 0x80008 4216; GFX6-NOHSA-NEXT: s_sext_i32_i8 s5, s5 4217; GFX6-NOHSA-NEXT: s_ashr_i32 s41, s6, 24 4218; GFX6-NOHSA-NEXT: s_bfe_i32 s42, s6, 0x80010 4219; GFX6-NOHSA-NEXT: s_bfe_i32 s43, s6, 0x80008 4220; GFX6-NOHSA-NEXT: s_sext_i32_i8 s6, s6 4221; GFX6-NOHSA-NEXT: s_ashr_i32 s44, s7, 24 4222; GFX6-NOHSA-NEXT: s_bfe_i32 s45, s7, 0x80010 4223; GFX6-NOHSA-NEXT: s_bfe_i32 s46, s7, 0x80008 4224; GFX6-NOHSA-NEXT: s_sext_i32_i8 s7, s7 4225; GFX6-NOHSA-NEXT: s_ashr_i32 s47, s8, 24 4226; GFX6-NOHSA-NEXT: s_bfe_i32 s48, s8, 0x80010 4227; GFX6-NOHSA-NEXT: s_bfe_i32 s49, s8, 0x80008 4228; GFX6-NOHSA-NEXT: s_sext_i32_i8 s8, s8 4229; GFX6-NOHSA-NEXT: s_ashr_i32 s50, s9, 24 4230; GFX6-NOHSA-NEXT: s_bfe_i32 s51, s9, 0x80010 4231; GFX6-NOHSA-NEXT: s_bfe_i32 s52, s9, 0x80008 4232; GFX6-NOHSA-NEXT: s_sext_i32_i8 s9, s9 4233; GFX6-NOHSA-NEXT: s_ashr_i32 s53, s10, 24 4234; GFX6-NOHSA-NEXT: s_bfe_i32 s54, s10, 0x80010 4235; GFX6-NOHSA-NEXT: s_bfe_i32 s55, s10, 0x80008 4236; GFX6-NOHSA-NEXT: s_sext_i32_i8 s10, s10 4237; GFX6-NOHSA-NEXT: s_bfe_i32 s56, s11, 0x80010 4238; GFX6-NOHSA-NEXT: s_bfe_i32 s57, s11, 0x80008 4239; GFX6-NOHSA-NEXT: s_sext_i32_i8 s58, s11 4240; GFX6-NOHSA-NEXT: s_ashr_i32 s59, s12, 24 4241; GFX6-NOHSA-NEXT: s_bfe_i32 s60, s12, 0x80010 4242; GFX6-NOHSA-NEXT: s_bfe_i32 s61, s12, 0x80008 4243; GFX6-NOHSA-NEXT: s_sext_i32_i8 s12, s12 4244; GFX6-NOHSA-NEXT: s_ashr_i32 s62, s13, 24 4245; GFX6-NOHSA-NEXT: s_bfe_i32 s63, s13, 0x80010 4246; GFX6-NOHSA-NEXT: s_bfe_i32 s64, s13, 0x80008 4247; GFX6-NOHSA-NEXT: s_sext_i32_i8 s13, s13 4248; GFX6-NOHSA-NEXT: s_ashr_i32 s65, s14, 24 4249; GFX6-NOHSA-NEXT: s_bfe_i32 s66, s14, 0x80010 4250; GFX6-NOHSA-NEXT: s_bfe_i32 s67, s14, 0x80008 4251; GFX6-NOHSA-NEXT: s_sext_i32_i8 s14, s14 4252; GFX6-NOHSA-NEXT: s_ashr_i32 s68, s15, 24 4253; GFX6-NOHSA-NEXT: s_bfe_i32 s69, s15, 0x80010 4254; GFX6-NOHSA-NEXT: s_bfe_i32 s70, s15, 0x80008 4255; GFX6-NOHSA-NEXT: s_sext_i32_i8 s15, s15 4256; GFX6-NOHSA-NEXT: s_ashr_i32 s11, s11, 24 4257; GFX6-NOHSA-NEXT: s_mov_b32 s0, s16 4258; GFX6-NOHSA-NEXT: s_mov_b32 s1, s17 4259; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 4260; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 4261; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s15 4262; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s70 4263; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s69 4264; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s68 4265; GFX6-NOHSA-NEXT: v_mov_b32_e32 v4, s14 4266; GFX6-NOHSA-NEXT: v_mov_b32_e32 v5, s67 4267; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s66 4268; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s65 4269; GFX6-NOHSA-NEXT: v_mov_b32_e32 v8, s13 4270; GFX6-NOHSA-NEXT: v_mov_b32_e32 v9, s64 4271; GFX6-NOHSA-NEXT: v_mov_b32_e32 v10, s63 4272; GFX6-NOHSA-NEXT: v_mov_b32_e32 v11, s62 4273; GFX6-NOHSA-NEXT: v_mov_b32_e32 v12, s12 4274; GFX6-NOHSA-NEXT: v_mov_b32_e32 v13, s61 4275; GFX6-NOHSA-NEXT: v_mov_b32_e32 v14, s60 4276; GFX6-NOHSA-NEXT: v_mov_b32_e32 v15, s59 4277; GFX6-NOHSA-NEXT: v_mov_b32_e32 v16, s58 4278; GFX6-NOHSA-NEXT: v_mov_b32_e32 v17, s57 4279; GFX6-NOHSA-NEXT: v_mov_b32_e32 v18, s56 4280; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 4281; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 4282; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s10 4283; GFX6-NOHSA-NEXT: v_mov_b32_e32 v19, s11 4284; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s55 4285; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s54 4286; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s53 4287; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224 4288; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208 4289; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192 4290; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176 4291; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 4292; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 4293; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s9 4294; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s52 4295; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s51 4296; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s50 4297; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 4298; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 4299; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s8 4300; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s49 4301; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s48 4302; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s47 4303; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 4304; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 4305; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s7 4306; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s46 4307; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s45 4308; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s44 4309; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 4310; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 4311; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s6 4312; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s43 4313; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s42 4314; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s41 4315; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 4316; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 4317; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s5 4318; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s40 4319; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s39 4320; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s38 4321; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 4322; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 4323; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 4324; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s37 4325; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s36 4326; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s35 4327; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 4328; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 4329; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s34 4330; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s33 4331; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s31 4332; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s30 4333; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 4334; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 4335; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s29 4336; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s28 4337; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s27 4338; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s26 4339; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 4340; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 4341; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s25 4342; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s24 4343; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s23 4344; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s22 4345; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 4346; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 4347; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s21 4348; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s20 4349; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s19 4350; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s18 4351; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 4352; GFX6-NOHSA-NEXT: s_endpgm 4353; 4354; GFX7-HSA-LABEL: constant_sextload_v64i8_to_v64i32: 4355; GFX7-HSA: ; %bb.0: 4356; GFX7-HSA-NEXT: s_load_dwordx4 s[16:19], s[8:9], 0x0 4357; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 4358; GFX7-HSA-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 4359; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 4360; GFX7-HSA-NEXT: s_ashr_i32 s18, s0, 24 4361; GFX7-HSA-NEXT: s_bfe_i32 s19, s0, 0x80010 4362; GFX7-HSA-NEXT: s_bfe_i32 s20, s0, 0x80008 4363; GFX7-HSA-NEXT: s_ashr_i32 s21, s1, 24 4364; GFX7-HSA-NEXT: s_bfe_i32 s22, s1, 0x80010 4365; GFX7-HSA-NEXT: s_bfe_i32 s23, s1, 0x80008 4366; GFX7-HSA-NEXT: s_ashr_i32 s24, s2, 24 4367; GFX7-HSA-NEXT: s_bfe_i32 s25, s2, 0x80010 4368; GFX7-HSA-NEXT: s_bfe_i32 s26, s2, 0x80008 4369; GFX7-HSA-NEXT: s_ashr_i32 s27, s3, 24 4370; GFX7-HSA-NEXT: s_bfe_i32 s28, s3, 0x80010 4371; GFX7-HSA-NEXT: s_bfe_i32 s29, s3, 0x80008 4372; GFX7-HSA-NEXT: s_ashr_i32 s30, s4, 24 4373; GFX7-HSA-NEXT: s_bfe_i32 s31, s4, 0x80010 4374; GFX7-HSA-NEXT: s_bfe_i32 s33, s4, 0x80008 4375; GFX7-HSA-NEXT: s_ashr_i32 s34, s5, 24 4376; GFX7-HSA-NEXT: s_bfe_i32 s35, s5, 0x80010 4377; GFX7-HSA-NEXT: s_bfe_i32 s36, s5, 0x80008 4378; GFX7-HSA-NEXT: s_ashr_i32 s37, s6, 24 4379; GFX7-HSA-NEXT: s_bfe_i32 s38, s6, 0x80010 4380; GFX7-HSA-NEXT: s_bfe_i32 s39, s6, 0x80008 4381; GFX7-HSA-NEXT: s_sext_i32_i8 s40, s6 4382; GFX7-HSA-NEXT: s_ashr_i32 s6, s7, 24 4383; GFX7-HSA-NEXT: s_bfe_i32 s41, s7, 0x80010 4384; GFX7-HSA-NEXT: s_bfe_i32 s42, s7, 0x80008 4385; GFX7-HSA-NEXT: s_ashr_i32 s43, s8, 24 4386; GFX7-HSA-NEXT: s_bfe_i32 s44, s8, 0x80010 4387; GFX7-HSA-NEXT: s_bfe_i32 s45, s8, 0x80008 4388; GFX7-HSA-NEXT: s_ashr_i32 s47, s9, 24 4389; GFX7-HSA-NEXT: s_bfe_i32 s48, s9, 0x80010 4390; GFX7-HSA-NEXT: s_bfe_i32 s49, s9, 0x80008 4391; GFX7-HSA-NEXT: s_ashr_i32 s51, s10, 24 4392; GFX7-HSA-NEXT: s_bfe_i32 s52, s10, 0x80010 4393; GFX7-HSA-NEXT: s_bfe_i32 s53, s10, 0x80008 4394; GFX7-HSA-NEXT: s_ashr_i32 s54, s11, 24 4395; GFX7-HSA-NEXT: s_bfe_i32 s55, s11, 0x80010 4396; GFX7-HSA-NEXT: s_bfe_i32 s56, s11, 0x80008 4397; GFX7-HSA-NEXT: s_ashr_i32 s57, s12, 24 4398; GFX7-HSA-NEXT: s_bfe_i32 s58, s12, 0x80010 4399; GFX7-HSA-NEXT: s_bfe_i32 s59, s12, 0x80008 4400; GFX7-HSA-NEXT: s_ashr_i32 s60, s13, 24 4401; GFX7-HSA-NEXT: s_bfe_i32 s61, s13, 0x80010 4402; GFX7-HSA-NEXT: s_bfe_i32 s62, s13, 0x80008 4403; GFX7-HSA-NEXT: s_ashr_i32 s63, s14, 24 4404; GFX7-HSA-NEXT: s_bfe_i32 s64, s14, 0x80010 4405; GFX7-HSA-NEXT: s_bfe_i32 s65, s14, 0x80008 4406; GFX7-HSA-NEXT: s_ashr_i32 s66, s15, 24 4407; GFX7-HSA-NEXT: s_bfe_i32 s67, s15, 0x80010 4408; GFX7-HSA-NEXT: s_bfe_i32 s68, s15, 0x80008 4409; GFX7-HSA-NEXT: s_sext_i32_i8 s46, s8 4410; GFX7-HSA-NEXT: s_add_u32 s8, s16, 0xf0 4411; GFX7-HSA-NEXT: s_sext_i32_i8 s50, s9 4412; GFX7-HSA-NEXT: s_addc_u32 s9, s17, 0 4413; GFX7-HSA-NEXT: v_mov_b32_e32 v20, s9 4414; GFX7-HSA-NEXT: v_mov_b32_e32 v19, s8 4415; GFX7-HSA-NEXT: s_add_u32 s8, s16, 0xe0 4416; GFX7-HSA-NEXT: s_addc_u32 s9, s17, 0 4417; GFX7-HSA-NEXT: v_mov_b32_e32 v22, s9 4418; GFX7-HSA-NEXT: v_mov_b32_e32 v21, s8 4419; GFX7-HSA-NEXT: s_add_u32 s8, s16, 0xd0 4420; GFX7-HSA-NEXT: s_addc_u32 s9, s17, 0 4421; GFX7-HSA-NEXT: v_mov_b32_e32 v24, s9 4422; GFX7-HSA-NEXT: v_mov_b32_e32 v23, s8 4423; GFX7-HSA-NEXT: s_add_u32 s8, s16, 0xc0 4424; GFX7-HSA-NEXT: s_addc_u32 s9, s17, 0 4425; GFX7-HSA-NEXT: v_mov_b32_e32 v26, s9 4426; GFX7-HSA-NEXT: v_mov_b32_e32 v25, s8 4427; GFX7-HSA-NEXT: s_add_u32 s8, s16, 0xb0 4428; GFX7-HSA-NEXT: s_addc_u32 s9, s17, 0 4429; GFX7-HSA-NEXT: v_mov_b32_e32 v28, s9 4430; GFX7-HSA-NEXT: s_sext_i32_i8 s13, s13 4431; GFX7-HSA-NEXT: v_mov_b32_e32 v27, s8 4432; GFX7-HSA-NEXT: s_add_u32 s8, s16, 0xa0 4433; GFX7-HSA-NEXT: v_mov_b32_e32 v8, s13 4434; GFX7-HSA-NEXT: v_mov_b32_e32 v9, s62 4435; GFX7-HSA-NEXT: v_mov_b32_e32 v10, s61 4436; GFX7-HSA-NEXT: v_mov_b32_e32 v11, s60 4437; GFX7-HSA-NEXT: s_addc_u32 s9, s17, 0 4438; GFX7-HSA-NEXT: flat_store_dwordx4 v[23:24], v[8:11] 4439; GFX7-HSA-NEXT: s_sext_i32_i8 s12, s12 4440; GFX7-HSA-NEXT: v_mov_b32_e32 v10, s9 4441; GFX7-HSA-NEXT: v_mov_b32_e32 v9, s8 4442; GFX7-HSA-NEXT: s_add_u32 s8, s16, 0x90 4443; GFX7-HSA-NEXT: v_mov_b32_e32 v12, s12 4444; GFX7-HSA-NEXT: v_mov_b32_e32 v13, s59 4445; GFX7-HSA-NEXT: v_mov_b32_e32 v14, s58 4446; GFX7-HSA-NEXT: v_mov_b32_e32 v15, s57 4447; GFX7-HSA-NEXT: s_addc_u32 s9, s17, 0 4448; GFX7-HSA-NEXT: flat_store_dwordx4 v[25:26], v[12:15] 4449; GFX7-HSA-NEXT: s_sext_i32_i8 s15, s15 4450; GFX7-HSA-NEXT: v_mov_b32_e32 v13, s9 4451; GFX7-HSA-NEXT: v_mov_b32_e32 v12, s8 4452; GFX7-HSA-NEXT: s_add_u32 s8, s16, 0x80 4453; GFX7-HSA-NEXT: s_addc_u32 s9, s17, 0 4454; GFX7-HSA-NEXT: s_sext_i32_i8 s7, s7 4455; GFX7-HSA-NEXT: s_sext_i32_i8 s11, s11 4456; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s15 4457; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s68 4458; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s67 4459; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s66 4460; GFX7-HSA-NEXT: v_mov_b32_e32 v11, s6 4461; GFX7-HSA-NEXT: s_add_u32 s6, s16, 0x70 4462; GFX7-HSA-NEXT: v_mov_b32_e32 v16, s11 4463; GFX7-HSA-NEXT: v_mov_b32_e32 v17, s56 4464; GFX7-HSA-NEXT: v_mov_b32_e32 v18, s55 4465; GFX7-HSA-NEXT: flat_store_dwordx4 v[19:20], v[0:3] 4466; GFX7-HSA-NEXT: v_mov_b32_e32 v19, s54 4467; GFX7-HSA-NEXT: v_mov_b32_e32 v8, s7 4468; GFX7-HSA-NEXT: s_addc_u32 s7, s17, 0 4469; GFX7-HSA-NEXT: flat_store_dwordx4 v[27:28], v[16:19] 4470; GFX7-HSA-NEXT: s_sext_i32_i8 s14, s14 4471; GFX7-HSA-NEXT: v_mov_b32_e32 v17, s7 4472; GFX7-HSA-NEXT: v_mov_b32_e32 v16, s6 4473; GFX7-HSA-NEXT: s_add_u32 s6, s16, 0x60 4474; GFX7-HSA-NEXT: s_addc_u32 s7, s17, 0 4475; GFX7-HSA-NEXT: s_sext_i32_i8 s10, s10 4476; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s14 4477; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s65 4478; GFX7-HSA-NEXT: v_mov_b32_e32 v6, s64 4479; GFX7-HSA-NEXT: v_mov_b32_e32 v7, s63 4480; GFX7-HSA-NEXT: v_mov_b32_e32 v19, s7 4481; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s10 4482; GFX7-HSA-NEXT: v_mov_b32_e32 v20, s50 4483; GFX7-HSA-NEXT: flat_store_dwordx4 v[21:22], v[4:7] 4484; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s53 4485; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s52 4486; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s51 4487; GFX7-HSA-NEXT: v_mov_b32_e32 v21, s49 4488; GFX7-HSA-NEXT: v_mov_b32_e32 v22, s48 4489; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s46 4490; GFX7-HSA-NEXT: v_mov_b32_e32 v23, s47 4491; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s45 4492; GFX7-HSA-NEXT: v_mov_b32_e32 v15, s9 4493; GFX7-HSA-NEXT: v_mov_b32_e32 v18, s6 4494; GFX7-HSA-NEXT: s_add_u32 s6, s16, 0x50 4495; GFX7-HSA-NEXT: s_sext_i32_i8 s5, s5 4496; GFX7-HSA-NEXT: v_mov_b32_e32 v14, s8 4497; GFX7-HSA-NEXT: v_mov_b32_e32 v6, s44 4498; GFX7-HSA-NEXT: v_mov_b32_e32 v7, s43 4499; GFX7-HSA-NEXT: flat_store_dwordx4 v[9:10], v[0:3] 4500; GFX7-HSA-NEXT: v_mov_b32_e32 v9, s42 4501; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s40 4502; GFX7-HSA-NEXT: v_mov_b32_e32 v10, s41 4503; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s39 4504; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s38 4505; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s37 4506; GFX7-HSA-NEXT: flat_store_dwordx4 v[12:13], v[20:23] 4507; GFX7-HSA-NEXT: flat_store_dwordx4 v[14:15], v[4:7] 4508; GFX7-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 4509; GFX7-HSA-NEXT: flat_store_dwordx4 v[18:19], v[0:3] 4510; GFX7-HSA-NEXT: s_addc_u32 s7, s17, 0 4511; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s6 4512; GFX7-HSA-NEXT: s_sext_i32_i8 s4, s4 4513; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s5 4514; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s36 4515; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s35 4516; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s34 4517; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s7 4518; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4519; GFX7-HSA-NEXT: s_sext_i32_i8 s3, s3 4520; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s4 4521; GFX7-HSA-NEXT: s_add_u32 s4, s16, 64 4522; GFX7-HSA-NEXT: s_addc_u32 s5, s17, 0 4523; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s4 4524; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s33 4525; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s31 4526; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s30 4527; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s5 4528; GFX7-HSA-NEXT: s_add_u32 s4, s16, 48 4529; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4530; GFX7-HSA-NEXT: s_addc_u32 s5, s17, 0 4531; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s4 4532; GFX7-HSA-NEXT: s_sext_i32_i8 s2, s2 4533; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s3 4534; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s29 4535; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s28 4536; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s27 4537; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s5 4538; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4539; GFX7-HSA-NEXT: s_sext_i32_i8 s1, s1 4540; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 4541; GFX7-HSA-NEXT: s_add_u32 s2, s16, 32 4542; GFX7-HSA-NEXT: s_addc_u32 s3, s17, 0 4543; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 4544; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 4545; GFX7-HSA-NEXT: s_add_u32 s2, s16, 16 4546; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s26 4547; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s25 4548; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s24 4549; GFX7-HSA-NEXT: s_addc_u32 s3, s17, 0 4550; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4551; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 4552; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s1 4553; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s23 4554; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s22 4555; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s21 4556; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 4557; GFX7-HSA-NEXT: s_sext_i32_i8 s0, s0 4558; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4559; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s16 4560; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 4561; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s20 4562; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s19 4563; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s18 4564; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s17 4565; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4566; GFX7-HSA-NEXT: s_endpgm 4567; 4568; GFX8-NOHSA-LABEL: constant_sextload_v64i8_to_v64i32: 4569; GFX8-NOHSA: ; %bb.0: 4570; GFX8-NOHSA-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x24 4571; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 4572; GFX8-NOHSA-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 4573; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 4574; GFX8-NOHSA-NEXT: s_ashr_i32 s18, s0, 24 4575; GFX8-NOHSA-NEXT: s_bfe_i32 s19, s0, 0x80010 4576; GFX8-NOHSA-NEXT: s_bfe_i32 s20, s0, 0x80008 4577; GFX8-NOHSA-NEXT: s_ashr_i32 s21, s1, 24 4578; GFX8-NOHSA-NEXT: s_bfe_i32 s22, s1, 0x80010 4579; GFX8-NOHSA-NEXT: s_bfe_i32 s23, s1, 0x80008 4580; GFX8-NOHSA-NEXT: s_ashr_i32 s24, s2, 24 4581; GFX8-NOHSA-NEXT: s_bfe_i32 s25, s2, 0x80010 4582; GFX8-NOHSA-NEXT: s_bfe_i32 s26, s2, 0x80008 4583; GFX8-NOHSA-NEXT: s_ashr_i32 s27, s3, 24 4584; GFX8-NOHSA-NEXT: s_bfe_i32 s28, s3, 0x80010 4585; GFX8-NOHSA-NEXT: s_bfe_i32 s29, s3, 0x80008 4586; GFX8-NOHSA-NEXT: s_ashr_i32 s30, s4, 24 4587; GFX8-NOHSA-NEXT: s_bfe_i32 s31, s4, 0x80010 4588; GFX8-NOHSA-NEXT: s_bfe_i32 s33, s4, 0x80008 4589; GFX8-NOHSA-NEXT: s_ashr_i32 s34, s5, 24 4590; GFX8-NOHSA-NEXT: s_bfe_i32 s35, s5, 0x80010 4591; GFX8-NOHSA-NEXT: s_bfe_i32 s36, s5, 0x80008 4592; GFX8-NOHSA-NEXT: s_ashr_i32 s37, s6, 24 4593; GFX8-NOHSA-NEXT: s_bfe_i32 s38, s6, 0x80010 4594; GFX8-NOHSA-NEXT: s_bfe_i32 s39, s6, 0x80008 4595; GFX8-NOHSA-NEXT: s_sext_i32_i8 s40, s6 4596; GFX8-NOHSA-NEXT: s_ashr_i32 s41, s7, 24 4597; GFX8-NOHSA-NEXT: s_bfe_i32 s42, s7, 0x80010 4598; GFX8-NOHSA-NEXT: s_bfe_i32 s43, s7, 0x80008 4599; GFX8-NOHSA-NEXT: s_sext_i32_i8 s44, s7 4600; GFX8-NOHSA-NEXT: s_ashr_i32 s45, s8, 24 4601; GFX8-NOHSA-NEXT: s_bfe_i32 s46, s8, 0x80010 4602; GFX8-NOHSA-NEXT: s_bfe_i32 s47, s8, 0x80008 4603; GFX8-NOHSA-NEXT: s_ashr_i32 s48, s9, 24 4604; GFX8-NOHSA-NEXT: s_bfe_i32 s49, s9, 0x80010 4605; GFX8-NOHSA-NEXT: s_bfe_i32 s50, s9, 0x80008 4606; GFX8-NOHSA-NEXT: s_ashr_i32 s51, s10, 24 4607; GFX8-NOHSA-NEXT: s_bfe_i32 s52, s10, 0x80010 4608; GFX8-NOHSA-NEXT: s_bfe_i32 s53, s10, 0x80008 4609; GFX8-NOHSA-NEXT: s_ashr_i32 s54, s11, 24 4610; GFX8-NOHSA-NEXT: s_bfe_i32 s55, s11, 0x80010 4611; GFX8-NOHSA-NEXT: s_bfe_i32 s56, s11, 0x80008 4612; GFX8-NOHSA-NEXT: s_ashr_i32 s57, s12, 24 4613; GFX8-NOHSA-NEXT: s_bfe_i32 s58, s12, 0x80010 4614; GFX8-NOHSA-NEXT: s_bfe_i32 s59, s12, 0x80008 4615; GFX8-NOHSA-NEXT: s_ashr_i32 s60, s13, 24 4616; GFX8-NOHSA-NEXT: s_bfe_i32 s61, s13, 0x80010 4617; GFX8-NOHSA-NEXT: s_bfe_i32 s62, s13, 0x80008 4618; GFX8-NOHSA-NEXT: s_ashr_i32 s63, s14, 24 4619; GFX8-NOHSA-NEXT: s_bfe_i32 s64, s14, 0x80010 4620; GFX8-NOHSA-NEXT: s_bfe_i32 s65, s14, 0x80008 4621; GFX8-NOHSA-NEXT: s_ashr_i32 s6, s15, 24 4622; GFX8-NOHSA-NEXT: s_bfe_i32 s7, s15, 0x80010 4623; GFX8-NOHSA-NEXT: s_bfe_i32 s66, s15, 0x80008 4624; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s6 4625; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0xf0 4626; GFX8-NOHSA-NEXT: s_sext_i32_i8 s15, s15 4627; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s7 4628; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 4629; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 4630; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s15 4631; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s66 4632; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 4633; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0xe0 4634; GFX8-NOHSA-NEXT: s_sext_i32_i8 s14, s14 4635; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4636; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 4637; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 4638; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s14 4639; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s65 4640; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s64 4641; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s63 4642; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 4643; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0xd0 4644; GFX8-NOHSA-NEXT: s_sext_i32_i8 s13, s13 4645; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4646; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 4647; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 4648; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s13 4649; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s62 4650; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s61 4651; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s60 4652; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 4653; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0xc0 4654; GFX8-NOHSA-NEXT: s_sext_i32_i8 s12, s12 4655; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4656; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 4657; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 4658; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s12 4659; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s59 4660; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s58 4661; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s57 4662; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 4663; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0xb0 4664; GFX8-NOHSA-NEXT: s_sext_i32_i8 s11, s11 4665; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4666; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 4667; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 4668; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s11 4669; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s56 4670; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s55 4671; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s54 4672; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 4673; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0xa0 4674; GFX8-NOHSA-NEXT: s_sext_i32_i8 s10, s10 4675; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4676; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 4677; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 4678; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s10 4679; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s53 4680; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s52 4681; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s51 4682; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 4683; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0x90 4684; GFX8-NOHSA-NEXT: s_sext_i32_i8 s9, s9 4685; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4686; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 4687; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 4688; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s9 4689; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s50 4690; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s49 4691; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s48 4692; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 4693; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0x80 4694; GFX8-NOHSA-NEXT: s_sext_i32_i8 s8, s8 4695; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4696; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 4697; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 4698; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s8 4699; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s47 4700; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s46 4701; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s45 4702; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 4703; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0x70 4704; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4705; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 4706; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 4707; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s44 4708; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s43 4709; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s42 4710; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s41 4711; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 4712; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0x60 4713; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4714; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 4715; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 4716; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s40 4717; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s39 4718; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s38 4719; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s37 4720; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 4721; GFX8-NOHSA-NEXT: s_add_u32 s6, s16, 0x50 4722; GFX8-NOHSA-NEXT: s_sext_i32_i8 s5, s5 4723; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4724; GFX8-NOHSA-NEXT: s_addc_u32 s7, s17, 0 4725; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 4726; GFX8-NOHSA-NEXT: s_sext_i32_i8 s4, s4 4727; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s5 4728; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s36 4729; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s35 4730; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s34 4731; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 4732; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4733; GFX8-NOHSA-NEXT: s_sext_i32_i8 s3, s3 4734; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s4 4735; GFX8-NOHSA-NEXT: s_add_u32 s4, s16, 64 4736; GFX8-NOHSA-NEXT: s_addc_u32 s5, s17, 0 4737; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s4 4738; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s33 4739; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s31 4740; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s30 4741; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s5 4742; GFX8-NOHSA-NEXT: s_add_u32 s4, s16, 48 4743; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4744; GFX8-NOHSA-NEXT: s_addc_u32 s5, s17, 0 4745; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s4 4746; GFX8-NOHSA-NEXT: s_sext_i32_i8 s2, s2 4747; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s3 4748; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s29 4749; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s28 4750; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s27 4751; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s5 4752; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4753; GFX8-NOHSA-NEXT: s_sext_i32_i8 s1, s1 4754; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 4755; GFX8-NOHSA-NEXT: s_add_u32 s2, s16, 32 4756; GFX8-NOHSA-NEXT: s_addc_u32 s3, s17, 0 4757; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 4758; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 4759; GFX8-NOHSA-NEXT: s_add_u32 s2, s16, 16 4760; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s26 4761; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s25 4762; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s24 4763; GFX8-NOHSA-NEXT: s_addc_u32 s3, s17, 0 4764; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4765; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 4766; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s1 4767; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s23 4768; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s22 4769; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s21 4770; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 4771; GFX8-NOHSA-NEXT: s_sext_i32_i8 s0, s0 4772; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4773; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s16 4774; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 4775; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s20 4776; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s19 4777; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s18 4778; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s17 4779; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4780; GFX8-NOHSA-NEXT: s_endpgm 4781; 4782; EG-LABEL: constant_sextload_v64i8_to_v64i32: 4783; EG: ; %bb.0: 4784; EG-NEXT: ALU 0, @32, KC0[CB0:0-32], KC1[] 4785; EG-NEXT: TEX 1 @24 4786; EG-NEXT: ALU 40, @33, KC0[CB0:0-32], KC1[] 4787; EG-NEXT: TEX 1 @28 4788; EG-NEXT: ALU 76, @74, KC0[CB0:0-32], KC1[] 4789; EG-NEXT: ALU 72, @151, KC0[CB0:0-32], KC1[] 4790; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T50.X, 0 4791; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T49.X, 0 4792; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T19.X, 0 4793; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T35.X, 0 4794; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T34.X, 0 4795; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T33.X, 0 4796; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T32.X, 0 4797; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T30.X, 0 4798; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T29.X, 0 4799; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T28.X, 0 4800; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T27.X, 0 4801; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T26.X, 0 4802; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T25.X, 0 4803; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T24.X, 0 4804; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T23.X, 0 4805; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T22.X, 1 4806; EG-NEXT: CF_END 4807; EG-NEXT: PAD 4808; EG-NEXT: Fetch clause starting at 24: 4809; EG-NEXT: VTX_READ_128 T20.XYZW, T21.X, 32, #1 4810; EG-NEXT: VTX_READ_128 T19.XYZW, T21.X, 48, #1 4811; EG-NEXT: Fetch clause starting at 28: 4812; EG-NEXT: VTX_READ_128 T31.XYZW, T21.X, 0, #1 4813; EG-NEXT: VTX_READ_128 T21.XYZW, T21.X, 16, #1 4814; EG-NEXT: ALU clause starting at 32: 4815; EG-NEXT: MOV * T21.X, KC0[2].Z, 4816; EG-NEXT: ALU clause starting at 33: 4817; EG-NEXT: LSHR T22.X, KC0[2].Y, literal.x, 4818; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4819; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4820; EG-NEXT: LSHR T23.X, PV.W, literal.x, 4821; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4822; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 4823; EG-NEXT: LSHR T24.X, PV.W, literal.x, 4824; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4825; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 4826; EG-NEXT: LSHR T25.X, PV.W, literal.x, 4827; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4828; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 4829; EG-NEXT: LSHR T26.X, PV.W, literal.x, 4830; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4831; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 4832; EG-NEXT: LSHR T27.X, PV.W, literal.x, 4833; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4834; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 4835; EG-NEXT: LSHR T28.X, PV.W, literal.x, 4836; EG-NEXT: LSHR T0.Y, T19.W, literal.y, 4837; EG-NEXT: LSHR T0.Z, T19.Z, literal.z, 4838; EG-NEXT: LSHR * T0.W, T19.W, literal.w, 4839; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4840; EG-NEXT: 8(1.121039e-44), 24(3.363116e-44) 4841; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, 4842; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 4843; EG-NEXT: LSHR T29.X, PV.W, literal.x, 4844; EG-NEXT: LSHR T1.Y, T19.Z, literal.y, 4845; EG-NEXT: LSHR T1.Z, T19.Y, literal.z, 4846; EG-NEXT: LSHR * T1.W, T19.Z, literal.w, 4847; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4848; EG-NEXT: 8(1.121039e-44), 24(3.363116e-44) 4849; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.x, 4850; EG-NEXT: 128(1.793662e-43), 0(0.000000e+00) 4851; EG-NEXT: LSHR T30.X, PV.W, literal.x, 4852; EG-NEXT: LSHR T2.Y, T19.Y, literal.y, 4853; EG-NEXT: LSHR T2.Z, T19.Y, literal.z, 4854; EG-NEXT: LSHR T2.W, T19.X, literal.y, 4855; EG-NEXT: LSHR * T3.W, T19.X, literal.z, 4856; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4857; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 4858; EG-NEXT: ALU clause starting at 74: 4859; EG-NEXT: LSHR T3.Y, T20.W, literal.x, 4860; EG-NEXT: LSHR T3.Z, T20.W, literal.y, 4861; EG-NEXT: LSHR T4.W, T20.Z, literal.x, 4862; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.z, 4863; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) 4864; EG-NEXT: 144(2.017870e-43), 0(0.000000e+00) 4865; EG-NEXT: LSHR T32.X, PS, literal.x, 4866; EG-NEXT: LSHR T4.Y, T20.Z, literal.y, 4867; EG-NEXT: LSHR T4.Z, T20.Y, literal.z, 4868; EG-NEXT: LSHR T5.W, T20.Y, literal.y, 4869; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.w, 4870; EG-NEXT: 2(2.802597e-45), 24(3.363116e-44) 4871; EG-NEXT: 16(2.242078e-44), 160(2.242078e-43) 4872; EG-NEXT: LSHR T33.X, PS, literal.x, 4873; EG-NEXT: LSHR T5.Y, T20.X, literal.y, 4874; EG-NEXT: LSHR T5.Z, T20.X, literal.z, 4875; EG-NEXT: LSHR T6.W, T21.W, literal.y, 4876; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.w, 4877; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4878; EG-NEXT: 24(3.363116e-44), 176(2.466285e-43) 4879; EG-NEXT: LSHR T34.X, PS, literal.x, 4880; EG-NEXT: LSHR T6.Y, T21.W, literal.y, 4881; EG-NEXT: LSHR T6.Z, T21.Z, literal.z, 4882; EG-NEXT: LSHR T7.W, T21.Z, literal.y, 4883; EG-NEXT: ADD_INT * T8.W, KC0[2].Y, literal.w, 4884; EG-NEXT: 2(2.802597e-45), 24(3.363116e-44) 4885; EG-NEXT: 16(2.242078e-44), 192(2.690493e-43) 4886; EG-NEXT: LSHR T35.X, PS, literal.x, 4887; EG-NEXT: LSHR T7.Y, T21.Y, literal.y, 4888; EG-NEXT: LSHR T7.Z, T21.Y, literal.z, 4889; EG-NEXT: LSHR T8.W, T21.X, literal.y, 4890; EG-NEXT: LSHR * T9.W, T21.X, literal.z, 4891; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4892; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 4893; EG-NEXT: BFE_INT T36.X, T31.X, 0.0, literal.x, 4894; EG-NEXT: LSHR T8.Y, T31.W, literal.y, 4895; EG-NEXT: LSHR T8.Z, T31.W, literal.z, 4896; EG-NEXT: LSHR T10.W, T31.Z, literal.y, 4897; EG-NEXT: LSHR * T11.W, T31.X, literal.z, 4898; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44) 4899; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 4900; EG-NEXT: BFE_INT T37.X, T31.Y, 0.0, literal.x, 4901; EG-NEXT: LSHR T9.Y, T31.Z, literal.y, 4902; EG-NEXT: LSHR T9.Z, T31.Y, literal.y, 4903; EG-NEXT: BFE_INT T36.W, PS, 0.0, literal.x, 4904; EG-NEXT: LSHR * T11.W, T31.X, literal.z, 4905; EG-NEXT: 8(1.121039e-44), 24(3.363116e-44) 4906; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4907; EG-NEXT: BFE_INT T38.X, T31.Z, 0.0, literal.x, 4908; EG-NEXT: LSHR T10.Y, T31.Y, literal.y, 4909; EG-NEXT: BFE_INT T36.Z, PS, 0.0, literal.x, 4910; EG-NEXT: BFE_INT T37.W, PV.Z, 0.0, literal.x, 4911; EG-NEXT: LSHR * T11.W, T31.X, literal.x, 4912; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44) 4913; EG-NEXT: BFE_INT T39.X, T31.W, 0.0, literal.x, 4914; EG-NEXT: BFE_INT T36.Y, PS, 0.0, literal.x, 4915; EG-NEXT: BFE_INT T37.Z, PV.Y, 0.0, literal.x, 4916; EG-NEXT: BFE_INT T38.W, T9.Y, 0.0, literal.x, 4917; EG-NEXT: LSHR * T11.W, T31.Y, literal.x, 4918; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 4919; EG-NEXT: BFE_INT T40.X, T21.X, 0.0, literal.x, 4920; EG-NEXT: BFE_INT T37.Y, PS, 0.0, literal.x, 4921; EG-NEXT: BFE_INT T38.Z, T10.W, 0.0, literal.x, 4922; EG-NEXT: BFE_INT T39.W, T8.Z, 0.0, literal.x, 4923; EG-NEXT: LSHR * T10.W, T31.Z, literal.x, 4924; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 4925; EG-NEXT: BFE_INT T31.X, T21.Y, 0.0, literal.x, 4926; EG-NEXT: BFE_INT T38.Y, PS, 0.0, literal.x, 4927; EG-NEXT: BFE_INT T39.Z, T8.Y, 0.0, literal.x, BS:VEC_120/SCL_212 4928; EG-NEXT: BFE_INT T40.W, T9.W, 0.0, literal.x, 4929; EG-NEXT: LSHR * T9.W, T31.W, literal.x, 4930; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 4931; EG-NEXT: BFE_INT T41.X, T21.Z, 0.0, literal.x, 4932; EG-NEXT: BFE_INT T39.Y, PS, 0.0, literal.x, 4933; EG-NEXT: BFE_INT T40.Z, T8.W, 0.0, literal.x, 4934; EG-NEXT: BFE_INT * T31.W, T7.Z, 0.0, literal.x, BS:VEC_120/SCL_212 4935; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 4936; EG-NEXT: ALU clause starting at 151: 4937; EG-NEXT: LSHR * T8.W, T21.X, literal.x, 4938; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 4939; EG-NEXT: BFE_INT T42.X, T21.W, 0.0, literal.x, 4940; EG-NEXT: BFE_INT T40.Y, PV.W, 0.0, literal.x, 4941; EG-NEXT: BFE_INT T31.Z, T7.Y, 0.0, literal.x, 4942; EG-NEXT: BFE_INT T41.W, T7.W, 0.0, literal.x, BS:VEC_120/SCL_212 4943; EG-NEXT: LSHR * T7.W, T21.Y, literal.x, 4944; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 4945; EG-NEXT: BFE_INT T43.X, T20.X, 0.0, literal.x, 4946; EG-NEXT: BFE_INT T31.Y, PS, 0.0, literal.x, 4947; EG-NEXT: BFE_INT T41.Z, T6.Z, 0.0, literal.x, 4948; EG-NEXT: BFE_INT T42.W, T6.Y, 0.0, literal.x, 4949; EG-NEXT: LSHR * T7.W, T21.Z, literal.x, 4950; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 4951; EG-NEXT: BFE_INT T21.X, T20.Y, 0.0, literal.x, 4952; EG-NEXT: BFE_INT T41.Y, PS, 0.0, literal.x, 4953; EG-NEXT: BFE_INT T42.Z, T6.W, 0.0, literal.x, 4954; EG-NEXT: BFE_INT T43.W, T5.Z, 0.0, literal.x, 4955; EG-NEXT: LSHR * T6.W, T21.W, literal.x, 4956; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 4957; EG-NEXT: BFE_INT T44.X, T20.Z, 0.0, literal.x, 4958; EG-NEXT: BFE_INT T42.Y, PS, 0.0, literal.x, 4959; EG-NEXT: BFE_INT T43.Z, T5.Y, 0.0, literal.x, 4960; EG-NEXT: BFE_INT T21.W, T5.W, 0.0, literal.x, 4961; EG-NEXT: LSHR * T5.W, T20.X, literal.x, 4962; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 4963; EG-NEXT: BFE_INT T45.X, T20.W, 0.0, literal.x, 4964; EG-NEXT: BFE_INT T43.Y, PS, 0.0, literal.x, 4965; EG-NEXT: BFE_INT T21.Z, T4.Z, 0.0, literal.x, 4966; EG-NEXT: BFE_INT T44.W, T4.Y, 0.0, literal.x, 4967; EG-NEXT: LSHR * T5.W, T20.Y, literal.x, 4968; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 4969; EG-NEXT: BFE_INT T46.X, T19.X, 0.0, literal.x, 4970; EG-NEXT: BFE_INT T21.Y, PS, 0.0, literal.x, 4971; EG-NEXT: BFE_INT T44.Z, T4.W, 0.0, literal.x, 4972; EG-NEXT: BFE_INT T45.W, T3.Z, 0.0, literal.x, 4973; EG-NEXT: LSHR * T4.W, T20.Z, literal.x, 4974; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 4975; EG-NEXT: BFE_INT T20.X, T19.Y, 0.0, literal.x, 4976; EG-NEXT: BFE_INT T44.Y, PS, 0.0, literal.x, 4977; EG-NEXT: BFE_INT T45.Z, T3.Y, 0.0, literal.x, BS:VEC_120/SCL_212 4978; EG-NEXT: BFE_INT T46.W, T3.W, 0.0, literal.x, 4979; EG-NEXT: LSHR * T3.W, T20.W, literal.x, 4980; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 4981; EG-NEXT: BFE_INT T47.X, T19.Z, 0.0, literal.x, 4982; EG-NEXT: BFE_INT T45.Y, PS, 0.0, literal.x, 4983; EG-NEXT: BFE_INT T46.Z, T2.W, 0.0, literal.x, 4984; EG-NEXT: BFE_INT T20.W, T2.Z, 0.0, literal.x, BS:VEC_120/SCL_212 4985; EG-NEXT: LSHR * T2.W, T19.X, literal.x, 4986; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 4987; EG-NEXT: BFE_INT T48.X, T19.W, 0.0, literal.x, 4988; EG-NEXT: BFE_INT T46.Y, PS, 0.0, literal.x, 4989; EG-NEXT: BFE_INT T20.Z, T2.Y, 0.0, literal.x, 4990; EG-NEXT: BFE_INT T47.W, T1.W, 0.0, literal.x, BS:VEC_120/SCL_212 4991; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 4992; EG-NEXT: 8(1.121039e-44), 208(2.914701e-43) 4993; EG-NEXT: LSHR T19.X, PS, literal.x, 4994; EG-NEXT: BFE_INT T20.Y, T1.Z, 0.0, literal.y, 4995; EG-NEXT: BFE_INT T47.Z, T1.Y, 0.0, literal.y, 4996; EG-NEXT: BFE_INT T48.W, T0.W, 0.0, literal.y, 4997; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 4998; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44) 4999; EG-NEXT: 224(3.138909e-43), 0(0.000000e+00) 5000; EG-NEXT: LSHR T49.X, PS, literal.x, 5001; EG-NEXT: BFE_INT T47.Y, T0.Z, 0.0, literal.y, 5002; EG-NEXT: BFE_INT T48.Z, T0.Y, 0.0, literal.y, 5003; EG-NEXT: LSHR T0.W, T19.W, literal.y, 5004; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5005; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44) 5006; EG-NEXT: 240(3.363116e-43), 0(0.000000e+00) 5007; EG-NEXT: LSHR T50.X, PS, literal.x, 5008; EG-NEXT: BFE_INT * T48.Y, PV.W, 0.0, literal.y, 5009; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44) 5010; 5011; GFX12-LABEL: constant_sextload_v64i8_to_v64i32: 5012; GFX12: ; %bb.0: 5013; GFX12-NEXT: s_load_b128 s[16:19], s[4:5], 0x24 5014; GFX12-NEXT: s_wait_kmcnt 0x0 5015; GFX12-NEXT: s_load_b512 s[0:15], s[18:19], 0x0 5016; GFX12-NEXT: s_wait_kmcnt 0x0 5017; GFX12-NEXT: s_ashr_i32 s64, s15, 24 5018; GFX12-NEXT: s_bfe_i32 s65, s15, 0x80010 5019; GFX12-NEXT: s_sext_i32_i8 s66, s15 5020; GFX12-NEXT: s_bfe_i32 s15, s15, 0x80008 5021; GFX12-NEXT: s_ashr_i32 s61, s14, 24 5022; GFX12-NEXT: s_bfe_i32 s62, s14, 0x80010 5023; GFX12-NEXT: s_bfe_i32 s63, s14, 0x80008 5024; GFX12-NEXT: s_sext_i32_i8 s14, s14 5025; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s15 5026; GFX12-NEXT: s_ashr_i32 s58, s13, 24 5027; GFX12-NEXT: s_bfe_i32 s59, s13, 0x80010 5028; GFX12-NEXT: s_bfe_i32 s60, s13, 0x80008 5029; GFX12-NEXT: s_sext_i32_i8 s13, s13 5030; GFX12-NEXT: v_dual_mov_b32 v0, s66 :: v_dual_mov_b32 v3, s64 5031; GFX12-NEXT: v_dual_mov_b32 v2, s65 :: v_dual_mov_b32 v5, s63 5032; GFX12-NEXT: s_ashr_i32 s55, s12, 24 5033; GFX12-NEXT: s_bfe_i32 s56, s12, 0x80010 5034; GFX12-NEXT: s_bfe_i32 s57, s12, 0x80008 5035; GFX12-NEXT: s_sext_i32_i8 s12, s12 5036; GFX12-NEXT: v_dual_mov_b32 v4, s14 :: v_dual_mov_b32 v7, s61 5037; GFX12-NEXT: v_dual_mov_b32 v6, s62 :: v_dual_mov_b32 v9, s60 5038; GFX12-NEXT: v_dual_mov_b32 v8, s13 :: v_dual_mov_b32 v11, s58 5039; GFX12-NEXT: v_dual_mov_b32 v10, s59 :: v_dual_mov_b32 v13, s57 5040; GFX12-NEXT: s_ashr_i32 s52, s11, 24 5041; GFX12-NEXT: s_bfe_i32 s53, s11, 0x80010 5042; GFX12-NEXT: s_bfe_i32 s54, s11, 0x80008 5043; GFX12-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v15, s55 5044; GFX12-NEXT: v_mov_b32_e32 v14, s56 5045; GFX12-NEXT: s_sext_i32_i8 s11, s11 5046; GFX12-NEXT: s_ashr_i32 s49, s10, 24 5047; GFX12-NEXT: s_bfe_i32 s50, s10, 0x80010 5048; GFX12-NEXT: s_bfe_i32 s51, s10, 0x80008 5049; GFX12-NEXT: s_sext_i32_i8 s10, s10 5050; GFX12-NEXT: s_ashr_i32 s46, s9, 24 5051; GFX12-NEXT: s_bfe_i32 s47, s9, 0x80010 5052; GFX12-NEXT: s_bfe_i32 s48, s9, 0x80008 5053; GFX12-NEXT: s_sext_i32_i8 s9, s9 5054; GFX12-NEXT: s_clause 0x3 5055; GFX12-NEXT: global_store_b128 v24, v[0:3], s[16:17] offset:240 5056; GFX12-NEXT: global_store_b128 v24, v[4:7], s[16:17] offset:224 5057; GFX12-NEXT: global_store_b128 v24, v[8:11], s[16:17] offset:208 5058; GFX12-NEXT: global_store_b128 v24, v[12:15], s[16:17] offset:192 5059; GFX12-NEXT: v_dual_mov_b32 v1, s54 :: v_dual_mov_b32 v0, s11 5060; GFX12-NEXT: v_dual_mov_b32 v3, s52 :: v_dual_mov_b32 v2, s53 5061; GFX12-NEXT: v_mov_b32_e32 v5, s51 5062; GFX12-NEXT: s_ashr_i32 s43, s8, 24 5063; GFX12-NEXT: s_bfe_i32 s44, s8, 0x80010 5064; GFX12-NEXT: s_bfe_i32 s45, s8, 0x80008 5065; GFX12-NEXT: s_sext_i32_i8 s8, s8 5066; GFX12-NEXT: v_dual_mov_b32 v4, s10 :: v_dual_mov_b32 v7, s49 5067; GFX12-NEXT: v_dual_mov_b32 v6, s50 :: v_dual_mov_b32 v9, s48 5068; GFX12-NEXT: s_ashr_i32 s40, s7, 24 5069; GFX12-NEXT: s_bfe_i32 s41, s7, 0x80010 5070; GFX12-NEXT: s_bfe_i32 s42, s7, 0x80008 5071; GFX12-NEXT: s_sext_i32_i8 s7, s7 5072; GFX12-NEXT: v_dual_mov_b32 v8, s9 :: v_dual_mov_b32 v11, s46 5073; GFX12-NEXT: v_dual_mov_b32 v10, s47 :: v_dual_mov_b32 v13, s45 5074; GFX12-NEXT: s_ashr_i32 s34, s5, 24 5075; GFX12-NEXT: s_bfe_i32 s35, s5, 0x80010 5076; GFX12-NEXT: s_bfe_i32 s36, s5, 0x80008 5077; GFX12-NEXT: s_sext_i32_i8 s5, s5 5078; GFX12-NEXT: s_ashr_i32 s37, s6, 24 5079; GFX12-NEXT: s_bfe_i32 s38, s6, 0x80010 5080; GFX12-NEXT: s_bfe_i32 s39, s6, 0x80008 5081; GFX12-NEXT: s_sext_i32_i8 s6, s6 5082; GFX12-NEXT: v_dual_mov_b32 v12, s8 :: v_dual_mov_b32 v15, s43 5083; GFX12-NEXT: v_dual_mov_b32 v14, s44 :: v_dual_mov_b32 v17, s42 5084; GFX12-NEXT: s_ashr_i32 s30, s4, 24 5085; GFX12-NEXT: s_bfe_i32 s31, s4, 0x80010 5086; GFX12-NEXT: s_bfe_i32 s33, s4, 0x80008 5087; GFX12-NEXT: s_sext_i32_i8 s4, s4 5088; GFX12-NEXT: v_dual_mov_b32 v16, s7 :: v_dual_mov_b32 v19, s40 5089; GFX12-NEXT: v_dual_mov_b32 v18, s41 :: v_dual_mov_b32 v21, s39 5090; GFX12-NEXT: s_ashr_i32 s27, s3, 24 5091; GFX12-NEXT: s_bfe_i32 s28, s3, 0x80010 5092; GFX12-NEXT: s_bfe_i32 s29, s3, 0x80008 5093; GFX12-NEXT: s_sext_i32_i8 s3, s3 5094; GFX12-NEXT: v_dual_mov_b32 v20, s6 :: v_dual_mov_b32 v23, s37 5095; GFX12-NEXT: v_mov_b32_e32 v22, s38 5096; GFX12-NEXT: s_clause 0x5 5097; GFX12-NEXT: global_store_b128 v24, v[0:3], s[16:17] offset:176 5098; GFX12-NEXT: global_store_b128 v24, v[4:7], s[16:17] offset:160 5099; GFX12-NEXT: global_store_b128 v24, v[8:11], s[16:17] offset:144 5100; GFX12-NEXT: global_store_b128 v24, v[12:15], s[16:17] offset:128 5101; GFX12-NEXT: global_store_b128 v24, v[16:19], s[16:17] offset:112 5102; GFX12-NEXT: global_store_b128 v24, v[20:23], s[16:17] offset:96 5103; GFX12-NEXT: v_dual_mov_b32 v1, s36 :: v_dual_mov_b32 v0, s5 5104; GFX12-NEXT: v_dual_mov_b32 v3, s34 :: v_dual_mov_b32 v2, s35 5105; GFX12-NEXT: v_mov_b32_e32 v5, s33 5106; GFX12-NEXT: s_ashr_i32 s24, s2, 24 5107; GFX12-NEXT: s_bfe_i32 s25, s2, 0x80010 5108; GFX12-NEXT: s_bfe_i32 s26, s2, 0x80008 5109; GFX12-NEXT: s_sext_i32_i8 s2, s2 5110; GFX12-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v7, s30 5111; GFX12-NEXT: v_dual_mov_b32 v6, s31 :: v_dual_mov_b32 v9, s29 5112; GFX12-NEXT: s_ashr_i32 s21, s1, 24 5113; GFX12-NEXT: s_bfe_i32 s22, s1, 0x80010 5114; GFX12-NEXT: s_bfe_i32 s23, s1, 0x80008 5115; GFX12-NEXT: s_sext_i32_i8 s1, s1 5116; GFX12-NEXT: v_dual_mov_b32 v8, s3 :: v_dual_mov_b32 v11, s27 5117; GFX12-NEXT: v_dual_mov_b32 v10, s28 :: v_dual_mov_b32 v13, s26 5118; GFX12-NEXT: s_ashr_i32 s18, s0, 24 5119; GFX12-NEXT: s_bfe_i32 s19, s0, 0x80010 5120; GFX12-NEXT: s_bfe_i32 s20, s0, 0x80008 5121; GFX12-NEXT: s_sext_i32_i8 s0, s0 5122; GFX12-NEXT: v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v15, s24 5123; GFX12-NEXT: v_dual_mov_b32 v14, s25 :: v_dual_mov_b32 v17, s23 5124; GFX12-NEXT: v_dual_mov_b32 v16, s1 :: v_dual_mov_b32 v19, s21 5125; GFX12-NEXT: v_dual_mov_b32 v18, s22 :: v_dual_mov_b32 v21, s20 5126; GFX12-NEXT: v_dual_mov_b32 v20, s0 :: v_dual_mov_b32 v23, s18 5127; GFX12-NEXT: v_mov_b32_e32 v22, s19 5128; GFX12-NEXT: s_clause 0x5 5129; GFX12-NEXT: global_store_b128 v24, v[0:3], s[16:17] offset:80 5130; GFX12-NEXT: global_store_b128 v24, v[4:7], s[16:17] offset:64 5131; GFX12-NEXT: global_store_b128 v24, v[8:11], s[16:17] offset:48 5132; GFX12-NEXT: global_store_b128 v24, v[12:15], s[16:17] offset:32 5133; GFX12-NEXT: global_store_b128 v24, v[16:19], s[16:17] offset:16 5134; GFX12-NEXT: global_store_b128 v24, v[20:23], s[16:17] 5135; GFX12-NEXT: s_endpgm 5136 %load = load <64 x i8>, ptr addrspace(4) %in 5137 %ext = sext <64 x i8> %load to <64 x i32> 5138 store <64 x i32> %ext, ptr addrspace(1) %out 5139 ret void 5140} 5141 5142define amdgpu_kernel void @constant_zextload_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 5143; GFX6-NOHSA-LABEL: constant_zextload_i8_to_i64: 5144; GFX6-NOHSA: ; %bb.0: 5145; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5146; GFX6-NOHSA-NEXT: s_mov_b32 s7, 0xf000 5147; GFX6-NOHSA-NEXT: s_mov_b32 s6, -1 5148; GFX6-NOHSA-NEXT: s_mov_b32 s10, s6 5149; GFX6-NOHSA-NEXT: s_mov_b32 s11, s7 5150; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5151; GFX6-NOHSA-NEXT: s_mov_b32 s8, s2 5152; GFX6-NOHSA-NEXT: s_mov_b32 s9, s3 5153; GFX6-NOHSA-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 5154; GFX6-NOHSA-NEXT: s_mov_b32 s4, s0 5155; GFX6-NOHSA-NEXT: s_mov_b32 s5, s1 5156; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, 0 5157; GFX6-NOHSA-NEXT: s_waitcnt vmcnt(0) 5158; GFX6-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5159; GFX6-NOHSA-NEXT: s_endpgm 5160; 5161; GFX7-HSA-LABEL: constant_zextload_i8_to_i64: 5162; GFX7-HSA: ; %bb.0: 5163; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5164; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 5165; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 5166; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 5167; GFX7-HSA-NEXT: flat_load_ubyte v0, v[0:1] 5168; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s0 5169; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s1 5170; GFX7-HSA-NEXT: v_mov_b32_e32 v1, 0 5171; GFX7-HSA-NEXT: s_waitcnt vmcnt(0) 5172; GFX7-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 5173; GFX7-HSA-NEXT: s_endpgm 5174; 5175; GFX8-NOHSA-LABEL: constant_zextload_i8_to_i64: 5176; GFX8-NOHSA: ; %bb.0: 5177; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5178; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, 0 5179; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5180; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 5181; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 5182; GFX8-NOHSA-NEXT: flat_load_ubyte v2, v[0:1] 5183; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 5184; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 5185; GFX8-NOHSA-NEXT: s_waitcnt vmcnt(0) 5186; GFX8-NOHSA-NEXT: v_and_b32_e32 v2, 0xffff, v2 5187; GFX8-NOHSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 5188; GFX8-NOHSA-NEXT: s_endpgm 5189; 5190; EG-LABEL: constant_zextload_i8_to_i64: 5191; EG: ; %bb.0: 5192; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5193; EG-NEXT: TEX 0 @6 5194; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 5195; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 5196; EG-NEXT: CF_END 5197; EG-NEXT: PAD 5198; EG-NEXT: Fetch clause starting at 6: 5199; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 5200; EG-NEXT: ALU clause starting at 8: 5201; EG-NEXT: MOV * T0.X, KC0[2].Z, 5202; EG-NEXT: ALU clause starting at 9: 5203; EG-NEXT: MOV * T0.Y, 0.0, 5204; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 5205; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5206; 5207; GFX12-LABEL: constant_zextload_i8_to_i64: 5208; GFX12: ; %bb.0: 5209; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 5210; GFX12-NEXT: v_mov_b32_e32 v1, 0 5211; GFX12-NEXT: s_wait_kmcnt 0x0 5212; GFX12-NEXT: global_load_u8 v0, v1, s[2:3] 5213; GFX12-NEXT: s_wait_loadcnt 0x0 5214; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0 5215; GFX12-NEXT: global_store_b64 v1, v[0:1], s[0:1] 5216; GFX12-NEXT: s_endpgm 5217 %a = load i8, ptr addrspace(4) %in 5218 %ext = zext i8 %a to i64 5219 store i64 %ext, ptr addrspace(1) %out 5220 ret void 5221} 5222 5223; TODO: Why not 7 ? 5224define amdgpu_kernel void @constant_sextload_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 5225; GFX6-NOHSA-LABEL: constant_sextload_i8_to_i64: 5226; GFX6-NOHSA: ; %bb.0: 5227; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5228; GFX6-NOHSA-NEXT: s_mov_b32 s7, 0xf000 5229; GFX6-NOHSA-NEXT: s_mov_b32 s6, -1 5230; GFX6-NOHSA-NEXT: s_mov_b32 s10, s6 5231; GFX6-NOHSA-NEXT: s_mov_b32 s11, s7 5232; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5233; GFX6-NOHSA-NEXT: s_mov_b32 s8, s2 5234; GFX6-NOHSA-NEXT: s_mov_b32 s9, s3 5235; GFX6-NOHSA-NEXT: buffer_load_sbyte v0, off, s[8:11], 0 5236; GFX6-NOHSA-NEXT: s_mov_b32 s4, s0 5237; GFX6-NOHSA-NEXT: s_mov_b32 s5, s1 5238; GFX6-NOHSA-NEXT: s_waitcnt vmcnt(0) 5239; GFX6-NOHSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5240; GFX6-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5241; GFX6-NOHSA-NEXT: s_endpgm 5242; 5243; GFX7-HSA-LABEL: constant_sextload_i8_to_i64: 5244; GFX7-HSA: ; %bb.0: 5245; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5246; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 5247; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 5248; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 5249; GFX7-HSA-NEXT: flat_load_sbyte v0, v[0:1] 5250; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s0 5251; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s1 5252; GFX7-HSA-NEXT: s_waitcnt vmcnt(0) 5253; GFX7-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5254; GFX7-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 5255; GFX7-HSA-NEXT: s_endpgm 5256; 5257; GFX8-NOHSA-LABEL: constant_sextload_i8_to_i64: 5258; GFX8-NOHSA: ; %bb.0: 5259; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5260; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5261; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 5262; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 5263; GFX8-NOHSA-NEXT: flat_load_sbyte v2, v[0:1] 5264; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 5265; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 5266; GFX8-NOHSA-NEXT: s_waitcnt vmcnt(0) 5267; GFX8-NOHSA-NEXT: v_bfe_i32 v2, v2, 0, 16 5268; GFX8-NOHSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5269; GFX8-NOHSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 5270; GFX8-NOHSA-NEXT: s_endpgm 5271; 5272; EG-LABEL: constant_sextload_i8_to_i64: 5273; EG: ; %bb.0: 5274; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5275; EG-NEXT: TEX 0 @6 5276; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 5277; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 5278; EG-NEXT: CF_END 5279; EG-NEXT: PAD 5280; EG-NEXT: Fetch clause starting at 6: 5281; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 5282; EG-NEXT: ALU clause starting at 8: 5283; EG-NEXT: MOV * T0.X, KC0[2].Z, 5284; EG-NEXT: ALU clause starting at 9: 5285; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 5286; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 5287; EG-NEXT: 8(1.121039e-44), 2(2.802597e-45) 5288; EG-NEXT: ASHR * T0.Y, PV.X, literal.x, 5289; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5290; 5291; GFX12-LABEL: constant_sextload_i8_to_i64: 5292; GFX12: ; %bb.0: 5293; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 5294; GFX12-NEXT: v_mov_b32_e32 v2, 0 5295; GFX12-NEXT: s_wait_kmcnt 0x0 5296; GFX12-NEXT: global_load_i8 v0, v2, s[2:3] 5297; GFX12-NEXT: s_wait_loadcnt 0x0 5298; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16 5299; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 5300; GFX12-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5301; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] 5302; GFX12-NEXT: s_endpgm 5303 %a = load i8, ptr addrspace(4) %in 5304 %ext = sext i8 %a to i64 5305 store i64 %ext, ptr addrspace(1) %out 5306 ret void 5307} 5308 5309define amdgpu_kernel void @constant_zextload_v1i8_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 5310; GFX6-NOHSA-LABEL: constant_zextload_v1i8_to_v1i64: 5311; GFX6-NOHSA: ; %bb.0: 5312; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5313; GFX6-NOHSA-NEXT: s_mov_b32 s7, 0xf000 5314; GFX6-NOHSA-NEXT: s_mov_b32 s6, -1 5315; GFX6-NOHSA-NEXT: s_mov_b32 s10, s6 5316; GFX6-NOHSA-NEXT: s_mov_b32 s11, s7 5317; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5318; GFX6-NOHSA-NEXT: s_mov_b32 s8, s2 5319; GFX6-NOHSA-NEXT: s_mov_b32 s9, s3 5320; GFX6-NOHSA-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 5321; GFX6-NOHSA-NEXT: s_mov_b32 s4, s0 5322; GFX6-NOHSA-NEXT: s_mov_b32 s5, s1 5323; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, 0 5324; GFX6-NOHSA-NEXT: s_waitcnt vmcnt(0) 5325; GFX6-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5326; GFX6-NOHSA-NEXT: s_endpgm 5327; 5328; GFX7-HSA-LABEL: constant_zextload_v1i8_to_v1i64: 5329; GFX7-HSA: ; %bb.0: 5330; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5331; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 5332; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 5333; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 5334; GFX7-HSA-NEXT: flat_load_ubyte v0, v[0:1] 5335; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s0 5336; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s1 5337; GFX7-HSA-NEXT: v_mov_b32_e32 v1, 0 5338; GFX7-HSA-NEXT: s_waitcnt vmcnt(0) 5339; GFX7-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 5340; GFX7-HSA-NEXT: s_endpgm 5341; 5342; GFX8-NOHSA-LABEL: constant_zextload_v1i8_to_v1i64: 5343; GFX8-NOHSA: ; %bb.0: 5344; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5345; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5346; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 5347; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 5348; GFX8-NOHSA-NEXT: flat_load_ubyte v0, v[0:1] 5349; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s0 5350; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s1 5351; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, 0 5352; GFX8-NOHSA-NEXT: s_waitcnt vmcnt(0) 5353; GFX8-NOHSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 5354; GFX8-NOHSA-NEXT: s_endpgm 5355; 5356; EG-LABEL: constant_zextload_v1i8_to_v1i64: 5357; EG: ; %bb.0: 5358; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5359; EG-NEXT: TEX 0 @6 5360; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 5361; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 5362; EG-NEXT: CF_END 5363; EG-NEXT: PAD 5364; EG-NEXT: Fetch clause starting at 6: 5365; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 5366; EG-NEXT: ALU clause starting at 8: 5367; EG-NEXT: MOV * T0.X, KC0[2].Z, 5368; EG-NEXT: ALU clause starting at 9: 5369; EG-NEXT: MOV * T0.Y, 0.0, 5370; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 5371; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5372; 5373; GFX12-LABEL: constant_zextload_v1i8_to_v1i64: 5374; GFX12: ; %bb.0: 5375; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 5376; GFX12-NEXT: s_wait_kmcnt 0x0 5377; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0 5378; GFX12-NEXT: s_wait_kmcnt 0x0 5379; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 5380; GFX12-NEXT: global_store_b64 v1, v[0:1], s[0:1] 5381; GFX12-NEXT: s_endpgm 5382 %load = load <1 x i8>, ptr addrspace(4) %in 5383 %ext = zext <1 x i8> %load to <1 x i64> 5384 store <1 x i64> %ext, ptr addrspace(1) %out 5385 ret void 5386} 5387 5388; TODO: Why not 7 ? 5389define amdgpu_kernel void @constant_sextload_v1i8_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 5390; GFX6-NOHSA-LABEL: constant_sextload_v1i8_to_v1i64: 5391; GFX6-NOHSA: ; %bb.0: 5392; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5393; GFX6-NOHSA-NEXT: s_mov_b32 s7, 0xf000 5394; GFX6-NOHSA-NEXT: s_mov_b32 s6, -1 5395; GFX6-NOHSA-NEXT: s_mov_b32 s10, s6 5396; GFX6-NOHSA-NEXT: s_mov_b32 s11, s7 5397; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5398; GFX6-NOHSA-NEXT: s_mov_b32 s8, s2 5399; GFX6-NOHSA-NEXT: s_mov_b32 s9, s3 5400; GFX6-NOHSA-NEXT: buffer_load_sbyte v0, off, s[8:11], 0 5401; GFX6-NOHSA-NEXT: s_mov_b32 s4, s0 5402; GFX6-NOHSA-NEXT: s_mov_b32 s5, s1 5403; GFX6-NOHSA-NEXT: s_waitcnt vmcnt(0) 5404; GFX6-NOHSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5405; GFX6-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5406; GFX6-NOHSA-NEXT: s_endpgm 5407; 5408; GFX7-HSA-LABEL: constant_sextload_v1i8_to_v1i64: 5409; GFX7-HSA: ; %bb.0: 5410; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5411; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 5412; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 5413; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 5414; GFX7-HSA-NEXT: flat_load_sbyte v0, v[0:1] 5415; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s0 5416; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s1 5417; GFX7-HSA-NEXT: s_waitcnt vmcnt(0) 5418; GFX7-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5419; GFX7-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 5420; GFX7-HSA-NEXT: s_endpgm 5421; 5422; GFX8-NOHSA-LABEL: constant_sextload_v1i8_to_v1i64: 5423; GFX8-NOHSA: ; %bb.0: 5424; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5425; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5426; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 5427; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 5428; GFX8-NOHSA-NEXT: flat_load_sbyte v2, v[0:1] 5429; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 5430; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 5431; GFX8-NOHSA-NEXT: s_waitcnt vmcnt(0) 5432; GFX8-NOHSA-NEXT: v_bfe_i32 v2, v2, 0, 16 5433; GFX8-NOHSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5434; GFX8-NOHSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 5435; GFX8-NOHSA-NEXT: s_endpgm 5436; 5437; EG-LABEL: constant_sextload_v1i8_to_v1i64: 5438; EG: ; %bb.0: 5439; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5440; EG-NEXT: TEX 0 @6 5441; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 5442; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 5443; EG-NEXT: CF_END 5444; EG-NEXT: PAD 5445; EG-NEXT: Fetch clause starting at 6: 5446; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 5447; EG-NEXT: ALU clause starting at 8: 5448; EG-NEXT: MOV * T0.X, KC0[2].Z, 5449; EG-NEXT: ALU clause starting at 9: 5450; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 5451; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 5452; EG-NEXT: 8(1.121039e-44), 2(2.802597e-45) 5453; EG-NEXT: ASHR * T0.Y, PV.X, literal.x, 5454; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5455; 5456; GFX12-LABEL: constant_sextload_v1i8_to_v1i64: 5457; GFX12: ; %bb.0: 5458; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 5459; GFX12-NEXT: v_mov_b32_e32 v2, 0 5460; GFX12-NEXT: s_wait_kmcnt 0x0 5461; GFX12-NEXT: global_load_i8 v0, v2, s[2:3] 5462; GFX12-NEXT: s_wait_loadcnt 0x0 5463; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16 5464; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 5465; GFX12-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5466; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] 5467; GFX12-NEXT: s_endpgm 5468 %load = load <1 x i8>, ptr addrspace(4) %in 5469 %ext = sext <1 x i8> %load to <1 x i64> 5470 store <1 x i64> %ext, ptr addrspace(1) %out 5471 ret void 5472} 5473 5474define amdgpu_kernel void @constant_zextload_v2i8_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 5475; GFX6-NOHSA-LABEL: constant_zextload_v2i8_to_v2i64: 5476; GFX6-NOHSA: ; %bb.0: 5477; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5478; GFX6-NOHSA-NEXT: s_mov_b32 s7, 0xf000 5479; GFX6-NOHSA-NEXT: s_mov_b32 s6, -1 5480; GFX6-NOHSA-NEXT: s_mov_b32 s10, s6 5481; GFX6-NOHSA-NEXT: s_mov_b32 s11, s7 5482; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5483; GFX6-NOHSA-NEXT: s_mov_b32 s8, s2 5484; GFX6-NOHSA-NEXT: s_mov_b32 s9, s3 5485; GFX6-NOHSA-NEXT: buffer_load_ushort v0, off, s[8:11], 0 5486; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, 0 5487; GFX6-NOHSA-NEXT: s_mov_b32 s4, s0 5488; GFX6-NOHSA-NEXT: s_mov_b32 s5, s1 5489; GFX6-NOHSA-NEXT: s_waitcnt vmcnt(0) 5490; GFX6-NOHSA-NEXT: v_lshrrev_b32_e32 v2, 8, v0 5491; GFX6-NOHSA-NEXT: v_and_b32_e32 v0, 0xff, v0 5492; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, v1 5493; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 5494; GFX6-NOHSA-NEXT: s_endpgm 5495; 5496; GFX7-HSA-LABEL: constant_zextload_v2i8_to_v2i64: 5497; GFX7-HSA: ; %bb.0: 5498; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5499; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 5500; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 5501; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 5502; GFX7-HSA-NEXT: flat_load_ushort v0, v[0:1] 5503; GFX7-HSA-NEXT: v_mov_b32_e32 v1, 0 5504; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 5505; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 5506; GFX7-HSA-NEXT: v_mov_b32_e32 v3, v1 5507; GFX7-HSA-NEXT: s_waitcnt vmcnt(0) 5508; GFX7-HSA-NEXT: v_lshrrev_b32_e32 v2, 8, v0 5509; GFX7-HSA-NEXT: v_and_b32_e32 v0, 0xff, v0 5510; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5511; GFX7-HSA-NEXT: s_endpgm 5512; 5513; GFX8-NOHSA-LABEL: constant_zextload_v2i8_to_v2i64: 5514; GFX8-NOHSA: ; %bb.0: 5515; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5516; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, 8 5517; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5518; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 5519; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 5520; GFX8-NOHSA-NEXT: flat_load_ushort v0, v[0:1] 5521; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, 0 5522; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 5523; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 5524; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, v1 5525; GFX8-NOHSA-NEXT: s_waitcnt vmcnt(0) 5526; GFX8-NOHSA-NEXT: v_lshrrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 5527; GFX8-NOHSA-NEXT: v_and_b32_e32 v0, 0xff, v0 5528; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5529; GFX8-NOHSA-NEXT: s_endpgm 5530; 5531; EG-LABEL: constant_zextload_v2i8_to_v2i64: 5532; EG: ; %bb.0: 5533; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[] 5534; EG-NEXT: TEX 0 @6 5535; EG-NEXT: ALU 14, @10, KC0[CB0:0-32], KC1[] 5536; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1 5537; EG-NEXT: CF_END 5538; EG-NEXT: PAD 5539; EG-NEXT: Fetch clause starting at 6: 5540; EG-NEXT: VTX_READ_16 T4.X, T4.X, 0, #1 5541; EG-NEXT: ALU clause starting at 8: 5542; EG-NEXT: MOV * T0.Y, T2.X, 5543; EG-NEXT: MOV * T4.X, KC0[2].Z, 5544; EG-NEXT: ALU clause starting at 10: 5545; EG-NEXT: AND_INT T0.W, T4.X, literal.x, 5546; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y, 5547; EG-NEXT: 65535(9.183409e-41), -65536(nan) 5548; EG-NEXT: OR_INT * T0.W, PS, PV.W, 5549; EG-NEXT: MOV * T2.X, PV.W, 5550; EG-NEXT: MOV T0.Y, PV.X, 5551; EG-NEXT: MOV * T1.W, literal.x, 5552; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 5553; EG-NEXT: BFE_UINT * T4.Z, PV.Y, literal.x, PV.W, 5554; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 5555; EG-NEXT: AND_INT T4.X, T0.W, literal.x, 5556; EG-NEXT: MOV T4.Y, 0.0, 5557; EG-NEXT: MOV T4.W, 0.0, 5558; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 5559; EG-NEXT: 255(3.573311e-43), 2(2.802597e-45) 5560; 5561; GFX12-LABEL: constant_zextload_v2i8_to_v2i64: 5562; GFX12: ; %bb.0: 5563; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 5564; GFX12-NEXT: v_mov_b32_e32 v1, 0 5565; GFX12-NEXT: s_wait_kmcnt 0x0 5566; GFX12-NEXT: global_load_u16 v0, v1, s[2:3] 5567; GFX12-NEXT: s_wait_loadcnt 0x0 5568; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v0 5569; GFX12-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v0, 0xff, v0 5570; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 5571; GFX12-NEXT: v_lshrrev_b32_e32 v2, 8, v2 5572; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] 5573; GFX12-NEXT: s_endpgm 5574 %load = load <2 x i8>, ptr addrspace(4) %in 5575 %ext = zext <2 x i8> %load to <2 x i64> 5576 store <2 x i64> %ext, ptr addrspace(1) %out 5577 ret void 5578} 5579 5580define amdgpu_kernel void @constant_sextload_v2i8_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 5581; GFX6-NOHSA-LABEL: constant_sextload_v2i8_to_v2i64: 5582; GFX6-NOHSA: ; %bb.0: 5583; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5584; GFX6-NOHSA-NEXT: s_mov_b32 s7, 0xf000 5585; GFX6-NOHSA-NEXT: s_mov_b32 s6, -1 5586; GFX6-NOHSA-NEXT: s_mov_b32 s10, s6 5587; GFX6-NOHSA-NEXT: s_mov_b32 s11, s7 5588; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5589; GFX6-NOHSA-NEXT: s_mov_b32 s8, s2 5590; GFX6-NOHSA-NEXT: s_mov_b32 s9, s3 5591; GFX6-NOHSA-NEXT: buffer_load_ushort v0, off, s[8:11], 0 5592; GFX6-NOHSA-NEXT: s_mov_b32 s4, s0 5593; GFX6-NOHSA-NEXT: s_mov_b32 s5, s1 5594; GFX6-NOHSA-NEXT: s_waitcnt vmcnt(0) 5595; GFX6-NOHSA-NEXT: v_lshrrev_b32_e32 v2, 8, v0 5596; GFX6-NOHSA-NEXT: v_bfe_i32 v0, v0, 0, 8 5597; GFX6-NOHSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5598; GFX6-NOHSA-NEXT: v_bfe_i32 v2, v2, 0, 8 5599; GFX6-NOHSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5600; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 5601; GFX6-NOHSA-NEXT: s_endpgm 5602; 5603; GFX7-HSA-LABEL: constant_sextload_v2i8_to_v2i64: 5604; GFX7-HSA: ; %bb.0: 5605; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5606; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 5607; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 5608; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 5609; GFX7-HSA-NEXT: flat_load_ushort v0, v[0:1] 5610; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 5611; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 5612; GFX7-HSA-NEXT: s_waitcnt vmcnt(0) 5613; GFX7-HSA-NEXT: v_lshrrev_b32_e32 v2, 8, v0 5614; GFX7-HSA-NEXT: v_bfe_i32 v0, v0, 0, 8 5615; GFX7-HSA-NEXT: v_bfe_i32 v2, v2, 0, 8 5616; GFX7-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5617; GFX7-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5618; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5619; GFX7-HSA-NEXT: s_endpgm 5620; 5621; GFX8-NOHSA-LABEL: constant_sextload_v2i8_to_v2i64: 5622; GFX8-NOHSA: ; %bb.0: 5623; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5624; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5625; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 5626; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 5627; GFX8-NOHSA-NEXT: flat_load_ushort v0, v[0:1] 5628; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 5629; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 5630; GFX8-NOHSA-NEXT: s_waitcnt vmcnt(0) 5631; GFX8-NOHSA-NEXT: v_lshrrev_b32_e32 v2, 8, v0 5632; GFX8-NOHSA-NEXT: v_bfe_i32 v0, v0, 0, 8 5633; GFX8-NOHSA-NEXT: v_bfe_i32 v2, v2, 0, 8 5634; GFX8-NOHSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5635; GFX8-NOHSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5636; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5637; GFX8-NOHSA-NEXT: s_endpgm 5638; 5639; EG-LABEL: constant_sextload_v2i8_to_v2i64: 5640; EG: ; %bb.0: 5641; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[] 5642; EG-NEXT: TEX 0 @6 5643; EG-NEXT: ALU 15, @10, KC0[CB0:0-32], KC1[] 5644; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1 5645; EG-NEXT: CF_END 5646; EG-NEXT: PAD 5647; EG-NEXT: Fetch clause starting at 6: 5648; EG-NEXT: VTX_READ_16 T4.X, T4.X, 0, #1 5649; EG-NEXT: ALU clause starting at 8: 5650; EG-NEXT: MOV * T0.Y, T2.X, 5651; EG-NEXT: MOV * T4.X, KC0[2].Z, 5652; EG-NEXT: ALU clause starting at 10: 5653; EG-NEXT: AND_INT T0.W, T4.X, literal.x, 5654; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y, 5655; EG-NEXT: 65535(9.183409e-41), -65536(nan) 5656; EG-NEXT: OR_INT * T0.W, PS, PV.W, 5657; EG-NEXT: MOV * T2.X, PV.W, 5658; EG-NEXT: MOV * T0.Y, PV.X, 5659; EG-NEXT: BFE_INT * T4.X, T0.W, 0.0, literal.x, 5660; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 5661; EG-NEXT: ASHR T4.Y, PV.X, literal.x, 5662; EG-NEXT: LSHR * T0.W, T0.Y, literal.y, 5663; EG-NEXT: 31(4.344025e-44), 8(1.121039e-44) 5664; EG-NEXT: BFE_INT * T4.Z, PV.W, 0.0, literal.x, 5665; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 5666; EG-NEXT: LSHR T5.X, KC0[2].Y, literal.x, 5667; EG-NEXT: ASHR * T4.W, PV.Z, literal.y, 5668; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 5669; 5670; GFX12-LABEL: constant_sextload_v2i8_to_v2i64: 5671; GFX12: ; %bb.0: 5672; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 5673; GFX12-NEXT: v_mov_b32_e32 v4, 0 5674; GFX12-NEXT: s_wait_kmcnt 0x0 5675; GFX12-NEXT: global_load_u16 v0, v4, s[2:3] 5676; GFX12-NEXT: s_wait_loadcnt 0x0 5677; GFX12-NEXT: v_lshrrev_b32_e32 v1, 8, v0 5678; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 8 5679; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 5680; GFX12-NEXT: v_bfe_i32 v2, v1, 0, 8 5681; GFX12-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5682; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 5683; GFX12-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5684; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1] 5685; GFX12-NEXT: s_endpgm 5686 %load = load <2 x i8>, ptr addrspace(4) %in 5687 %ext = sext <2 x i8> %load to <2 x i64> 5688 store <2 x i64> %ext, ptr addrspace(1) %out 5689 ret void 5690} 5691 5692define amdgpu_kernel void @constant_zextload_v4i8_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 5693; GFX6-NOHSA-LABEL: constant_zextload_v4i8_to_v4i64: 5694; GFX6-NOHSA: ; %bb.0: 5695; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5696; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5697; GFX6-NOHSA-NEXT: s_load_dword s4, s[2:3], 0x0 5698; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 5699; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, 0 5700; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 5701; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, v1 5702; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5703; GFX6-NOHSA-NEXT: s_bfe_u32 s5, s4, 0x80008 5704; GFX6-NOHSA-NEXT: s_lshr_b32 s6, s4, 24 5705; GFX6-NOHSA-NEXT: s_bfe_u32 s7, s4, 0x80010 5706; GFX6-NOHSA-NEXT: s_and_b32 s4, s4, 0xff 5707; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s7 5708; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s6 5709; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5710; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 5711; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 5712; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s5 5713; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5714; GFX6-NOHSA-NEXT: s_endpgm 5715; 5716; GFX7-HSA-LABEL: constant_zextload_v4i8_to_v4i64: 5717; GFX7-HSA: ; %bb.0: 5718; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5719; GFX7-HSA-NEXT: v_mov_b32_e32 v1, 0 5720; GFX7-HSA-NEXT: v_mov_b32_e32 v3, v1 5721; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 5722; GFX7-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 5723; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 5724; GFX7-HSA-NEXT: s_bfe_u32 s4, s2, 0x80008 5725; GFX7-HSA-NEXT: s_lshr_b32 s3, s2, 24 5726; GFX7-HSA-NEXT: s_and_b32 s5, s2, 0xff 5727; GFX7-HSA-NEXT: s_bfe_u32 s2, s2, 0x80010 5728; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 5729; GFX7-HSA-NEXT: s_add_u32 s2, s0, 16 5730; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s3 5731; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 5732; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 5733; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 5734; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5735; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 5736; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s5 5737; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s4 5738; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 5739; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5740; GFX7-HSA-NEXT: s_endpgm 5741; 5742; GFX8-NOHSA-LABEL: constant_zextload_v4i8_to_v4i64: 5743; GFX8-NOHSA: ; %bb.0: 5744; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5745; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, 0 5746; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, v1 5747; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5748; GFX8-NOHSA-NEXT: s_load_dword s2, s[2:3], 0x0 5749; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5750; GFX8-NOHSA-NEXT: s_lshr_b32 s3, s2, 24 5751; GFX8-NOHSA-NEXT: s_bfe_u32 s4, s2, 0x80008 5752; GFX8-NOHSA-NEXT: s_and_b32 s5, s2, 0xff 5753; GFX8-NOHSA-NEXT: s_bfe_u32 s2, s2, 0x80010 5754; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 5755; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 16 5756; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s3 5757; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 5758; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 5759; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 5760; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5761; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 5762; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s5 5763; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s4 5764; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 5765; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5766; GFX8-NOHSA-NEXT: s_endpgm 5767; 5768; EG-LABEL: constant_zextload_v4i8_to_v4i64: 5769; EG: ; %bb.0: 5770; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5771; EG-NEXT: TEX 0 @6 5772; EG-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[] 5773; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T7.X, 0 5774; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T6.X, 1 5775; EG-NEXT: CF_END 5776; EG-NEXT: Fetch clause starting at 6: 5777; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 5778; EG-NEXT: ALU clause starting at 8: 5779; EG-NEXT: MOV * T4.X, KC0[2].Z, 5780; EG-NEXT: ALU clause starting at 9: 5781; EG-NEXT: MOV * T0.W, literal.x, 5782; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 5783; EG-NEXT: BFE_UINT T5.X, T4.X, literal.x, PV.W, 5784; EG-NEXT: LSHR * T5.Z, T4.X, literal.y, 5785; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) 5786; EG-NEXT: MOV T5.Y, 0.0, 5787; EG-NEXT: BFE_UINT * T4.Z, T4.X, literal.x, T0.W, 5788; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 5789; EG-NEXT: AND_INT T4.X, T4.X, literal.x, 5790; EG-NEXT: MOV T4.Y, 0.0, 5791; EG-NEXT: MOV T5.W, 0.0, 5792; EG-NEXT: MOV * T4.W, 0.0, 5793; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 5794; EG-NEXT: LSHR T6.X, KC0[2].Y, literal.x, 5795; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5796; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5797; EG-NEXT: LSHR * T7.X, PV.W, literal.x, 5798; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5799; 5800; GFX12-LABEL: constant_zextload_v4i8_to_v4i64: 5801; GFX12: ; %bb.0: 5802; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 5803; GFX12-NEXT: s_wait_kmcnt 0x0 5804; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0 5805; GFX12-NEXT: s_wait_kmcnt 0x0 5806; GFX12-NEXT: s_bfe_u32 s3, s2, 0x80010 5807; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) 5808; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s3 5809; GFX12-NEXT: s_lshr_b32 s4, s2, 24 5810; GFX12-NEXT: s_bfe_u32 s3, s2, 0x80008 5811; GFX12-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, v1 5812; GFX12-NEXT: s_and_b32 s2, s2, 0xff 5813; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16 5814; GFX12-NEXT: s_wait_alu 0xfffe 5815; GFX12-NEXT: v_mov_b32_e32 v0, s2 5816; GFX12-NEXT: v_mov_b32_e32 v2, s3 5817; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] 5818; GFX12-NEXT: s_endpgm 5819 %load = load <4 x i8>, ptr addrspace(4) %in 5820 %ext = zext <4 x i8> %load to <4 x i64> 5821 store <4 x i64> %ext, ptr addrspace(1) %out 5822 ret void 5823} 5824 5825define amdgpu_kernel void @constant_sextload_v4i8_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 5826; GFX6-NOHSA-LABEL: constant_sextload_v4i8_to_v4i64: 5827; GFX6-NOHSA: ; %bb.0: 5828; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5829; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5830; GFX6-NOHSA-NEXT: s_load_dword s4, s[2:3], 0x0 5831; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 5832; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 5833; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5834; GFX6-NOHSA-NEXT: s_lshr_b32 s6, s4, 16 5835; GFX6-NOHSA-NEXT: s_lshr_b32 s8, s4, 24 5836; GFX6-NOHSA-NEXT: s_lshr_b32 s10, s4, 8 5837; GFX6-NOHSA-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x80000 5838; GFX6-NOHSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000 5839; GFX6-NOHSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000 5840; GFX6-NOHSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000 5841; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 5842; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s5 5843; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s6 5844; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s7 5845; GFX6-NOHSA-NEXT: v_mov_b32_e32 v4, s8 5846; GFX6-NOHSA-NEXT: v_mov_b32_e32 v5, s9 5847; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:16 5848; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 5849; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s10 5850; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s11 5851; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5852; GFX6-NOHSA-NEXT: s_endpgm 5853; 5854; GFX7-HSA-LABEL: constant_sextload_v4i8_to_v4i64: 5855; GFX7-HSA: ; %bb.0: 5856; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5857; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 5858; GFX7-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 5859; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 5860; GFX7-HSA-NEXT: s_lshr_b32 s4, s2, 16 5861; GFX7-HSA-NEXT: s_lshr_b32 s6, s2, 24 5862; GFX7-HSA-NEXT: s_lshr_b32 s8, s2, 8 5863; GFX7-HSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x80000 5864; GFX7-HSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000 5865; GFX7-HSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000 5866; GFX7-HSA-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x80000 5867; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s4 5868; GFX7-HSA-NEXT: s_add_u32 s4, s0, 16 5869; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s5 5870; GFX7-HSA-NEXT: s_addc_u32 s5, s1, 0 5871; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s4 5872; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s6 5873; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s7 5874; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s5 5875; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5876; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 5877; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 5878; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 5879; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s8 5880; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s9 5881; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 5882; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5883; GFX7-HSA-NEXT: s_endpgm 5884; 5885; GFX8-NOHSA-LABEL: constant_sextload_v4i8_to_v4i64: 5886; GFX8-NOHSA: ; %bb.0: 5887; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5888; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5889; GFX8-NOHSA-NEXT: s_load_dword s2, s[2:3], 0x0 5890; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5891; GFX8-NOHSA-NEXT: s_lshr_b32 s4, s2, 16 5892; GFX8-NOHSA-NEXT: s_lshr_b32 s6, s2, 24 5893; GFX8-NOHSA-NEXT: s_lshr_b32 s8, s2, 8 5894; GFX8-NOHSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x80000 5895; GFX8-NOHSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000 5896; GFX8-NOHSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000 5897; GFX8-NOHSA-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x80000 5898; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s4 5899; GFX8-NOHSA-NEXT: s_add_u32 s4, s0, 16 5900; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s5 5901; GFX8-NOHSA-NEXT: s_addc_u32 s5, s1, 0 5902; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s4 5903; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s6 5904; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s7 5905; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s5 5906; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5907; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 5908; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 5909; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 5910; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s8 5911; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s9 5912; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 5913; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5914; GFX8-NOHSA-NEXT: s_endpgm 5915; 5916; EG-LABEL: constant_sextload_v4i8_to_v4i64: 5917; EG: ; %bb.0: 5918; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5919; EG-NEXT: TEX 0 @6 5920; EG-NEXT: ALU 18, @9, KC0[CB0:0-32], KC1[] 5921; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T7.X, 0 5922; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1 5923; EG-NEXT: CF_END 5924; EG-NEXT: Fetch clause starting at 6: 5925; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 5926; EG-NEXT: ALU clause starting at 8: 5927; EG-NEXT: MOV * T4.X, KC0[2].Z, 5928; EG-NEXT: ALU clause starting at 9: 5929; EG-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x, 5930; EG-NEXT: ASHR T4.W, T4.X, literal.y, 5931; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.z, 5932; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 5933; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5934; EG-NEXT: ASHR T5.Y, PV.X, literal.x, 5935; EG-NEXT: ASHR T4.Z, T4.X, literal.y, 5936; EG-NEXT: LSHR T0.W, T4.X, literal.z, 5937; EG-NEXT: LSHR * T1.W, T4.X, literal.w, 5938; EG-NEXT: 31(4.344025e-44), 24(3.363116e-44) 5939; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44) 5940; EG-NEXT: BFE_INT T4.X, PS, 0.0, literal.x, 5941; EG-NEXT: BFE_INT T5.Z, PV.W, 0.0, literal.x, 5942; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5943; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44) 5944; EG-NEXT: LSHR T7.X, PV.W, literal.x, 5945; EG-NEXT: ASHR T4.Y, PV.X, literal.y, 5946; EG-NEXT: ASHR * T5.W, PV.Z, literal.y, 5947; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 5948; 5949; GFX12-LABEL: constant_sextload_v4i8_to_v4i64: 5950; GFX12: ; %bb.0: 5951; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 5952; GFX12-NEXT: s_wait_kmcnt 0x0 5953; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0 5954; GFX12-NEXT: s_wait_kmcnt 0x0 5955; GFX12-NEXT: s_lshr_b32 s4, s2, 16 5956; GFX12-NEXT: s_lshr_b32 s6, s2, 24 5957; GFX12-NEXT: s_lshr_b32 s8, s2, 8 5958; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x80000 5959; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000 5960; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x80000 5961; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000 5962; GFX12-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s3 5963; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v5, s5 5964; GFX12-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v7, s7 5965; GFX12-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v3, s9 5966; GFX12-NEXT: v_mov_b32_e32 v2, s8 5967; GFX12-NEXT: s_clause 0x1 5968; GFX12-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:16 5969; GFX12-NEXT: global_store_b128 v8, v[0:3], s[0:1] 5970; GFX12-NEXT: s_endpgm 5971 %load = load <4 x i8>, ptr addrspace(4) %in 5972 %ext = sext <4 x i8> %load to <4 x i64> 5973 store <4 x i64> %ext, ptr addrspace(1) %out 5974 ret void 5975} 5976 5977define amdgpu_kernel void @constant_zextload_v8i8_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 5978; GFX6-NOHSA-LABEL: constant_zextload_v8i8_to_v8i64: 5979; GFX6-NOHSA: ; %bb.0: 5980; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5981; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5982; GFX6-NOHSA-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 5983; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 5984; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, 0 5985; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 5986; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, v1 5987; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 5988; GFX6-NOHSA-NEXT: s_lshr_b32 s6, s4, 24 5989; GFX6-NOHSA-NEXT: s_lshr_b32 s7, s5, 24 5990; GFX6-NOHSA-NEXT: s_bfe_u32 s8, s5, 0x80008 5991; GFX6-NOHSA-NEXT: s_bfe_u32 s9, s4, 0x80008 5992; GFX6-NOHSA-NEXT: s_and_b32 s10, s4, 0xff 5993; GFX6-NOHSA-NEXT: s_and_b32 s11, s5, 0xff 5994; GFX6-NOHSA-NEXT: s_bfe_u32 s5, s5, 0x80010 5995; GFX6-NOHSA-NEXT: s_bfe_u32 s4, s4, 0x80010 5996; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s5 5997; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s7 5998; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5999; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 6000; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 6001; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s6 6002; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 6003; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 6004; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s11 6005; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s8 6006; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 6007; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 6008; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s10 6009; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s9 6010; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 6011; GFX6-NOHSA-NEXT: s_endpgm 6012; 6013; GFX7-HSA-LABEL: constant_zextload_v8i8_to_v8i64: 6014; GFX7-HSA: ; %bb.0: 6015; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 6016; GFX7-HSA-NEXT: v_mov_b32_e32 v1, 0 6017; GFX7-HSA-NEXT: v_mov_b32_e32 v3, v1 6018; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 6019; GFX7-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 6020; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 6021; GFX7-HSA-NEXT: s_lshr_b32 s4, s2, 24 6022; GFX7-HSA-NEXT: s_lshr_b32 s5, s3, 24 6023; GFX7-HSA-NEXT: s_bfe_u32 s6, s3, 0x80008 6024; GFX7-HSA-NEXT: s_bfe_u32 s7, s2, 0x80008 6025; GFX7-HSA-NEXT: s_and_b32 s8, s2, 0xff 6026; GFX7-HSA-NEXT: s_and_b32 s9, s3, 0xff 6027; GFX7-HSA-NEXT: s_bfe_u32 s10, s2, 0x80010 6028; GFX7-HSA-NEXT: s_bfe_u32 s2, s3, 0x80010 6029; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 6030; GFX7-HSA-NEXT: s_add_u32 s2, s0, 48 6031; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 6032; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 6033; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 6034; GFX7-HSA-NEXT: s_add_u32 s2, s0, 16 6035; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s5 6036; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 6037; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6038; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 6039; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 6040; GFX7-HSA-NEXT: s_add_u32 s2, s0, 32 6041; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s10 6042; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s4 6043; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 6044; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6045; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 6046; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s9 6047; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s6 6048; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 6049; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6050; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 6051; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s8 6052; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s7 6053; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 6054; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6055; GFX7-HSA-NEXT: s_endpgm 6056; 6057; GFX8-NOHSA-LABEL: constant_zextload_v8i8_to_v8i64: 6058; GFX8-NOHSA: ; %bb.0: 6059; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 6060; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, 0 6061; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, v1 6062; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 6063; GFX8-NOHSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 6064; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 6065; GFX8-NOHSA-NEXT: s_lshr_b32 s4, s3, 24 6066; GFX8-NOHSA-NEXT: s_bfe_u32 s5, s3, 0x80008 6067; GFX8-NOHSA-NEXT: s_lshr_b32 s6, s2, 24 6068; GFX8-NOHSA-NEXT: s_bfe_u32 s7, s2, 0x80008 6069; GFX8-NOHSA-NEXT: s_and_b32 s8, s2, 0xff 6070; GFX8-NOHSA-NEXT: s_bfe_u32 s9, s2, 0x80010 6071; GFX8-NOHSA-NEXT: s_and_b32 s10, s3, 0xff 6072; GFX8-NOHSA-NEXT: s_bfe_u32 s2, s3, 0x80010 6073; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 6074; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 48 6075; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 6076; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 6077; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 6078; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 32 6079; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s4 6080; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 6081; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6082; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 6083; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 6084; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 16 6085; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s10 6086; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s5 6087; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 6088; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6089; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 6090; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s9 6091; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s6 6092; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 6093; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6094; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 6095; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s8 6096; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s7 6097; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 6098; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6099; GFX8-NOHSA-NEXT: s_endpgm 6100; 6101; EG-LABEL: constant_zextload_v8i8_to_v8i64: 6102; EG: ; %bb.0: 6103; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 6104; EG-NEXT: TEX 0 @8 6105; EG-NEXT: ALU 34, @11, KC0[CB0:0-32], KC1[] 6106; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T12.X, 0 6107; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T11.X, 0 6108; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T10.X, 0 6109; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T9.X, 1 6110; EG-NEXT: CF_END 6111; EG-NEXT: Fetch clause starting at 8: 6112; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 6113; EG-NEXT: ALU clause starting at 10: 6114; EG-NEXT: MOV * T5.X, KC0[2].Z, 6115; EG-NEXT: ALU clause starting at 11: 6116; EG-NEXT: MOV * T0.W, literal.x, 6117; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 6118; EG-NEXT: BFE_UINT T6.X, T5.Y, literal.x, PV.W, 6119; EG-NEXT: LSHR * T6.Z, T5.Y, literal.y, 6120; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) 6121; EG-NEXT: MOV T6.Y, 0.0, 6122; EG-NEXT: BFE_UINT * T7.Z, T5.Y, literal.x, T0.W, 6123; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 6124; EG-NEXT: AND_INT T7.X, T5.Y, literal.x, 6125; EG-NEXT: MOV * T7.Y, 0.0, 6126; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 6127; EG-NEXT: BFE_UINT T8.X, T5.X, literal.x, T0.W, 6128; EG-NEXT: LSHR * T8.Z, T5.X, literal.y, 6129; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) 6130; EG-NEXT: MOV T8.Y, 0.0, 6131; EG-NEXT: BFE_UINT * T5.Z, T5.X, literal.x, T0.W, 6132; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 6133; EG-NEXT: AND_INT T5.X, T5.X, literal.x, 6134; EG-NEXT: MOV T5.Y, 0.0, 6135; EG-NEXT: MOV T6.W, 0.0, 6136; EG-NEXT: MOV * T7.W, 0.0, 6137; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 6138; EG-NEXT: MOV T8.W, 0.0, 6139; EG-NEXT: MOV * T5.W, 0.0, 6140; EG-NEXT: LSHR T9.X, KC0[2].Y, literal.x, 6141; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6142; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6143; EG-NEXT: LSHR T10.X, PV.W, literal.x, 6144; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6145; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6146; EG-NEXT: LSHR T11.X, PV.W, literal.x, 6147; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6148; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 6149; EG-NEXT: LSHR * T12.X, PV.W, literal.x, 6150; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6151; 6152; GFX12-LABEL: constant_zextload_v8i8_to_v8i64: 6153; GFX12: ; %bb.0: 6154; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 6155; GFX12-NEXT: s_wait_kmcnt 0x0 6156; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 6157; GFX12-NEXT: s_wait_kmcnt 0x0 6158; GFX12-NEXT: s_bfe_u32 s4, s3, 0x80010 6159; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) 6160; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s4 6161; GFX12-NEXT: s_lshr_b32 s5, s3, 24 6162; GFX12-NEXT: s_bfe_u32 s4, s3, 0x80008 6163; GFX12-NEXT: s_wait_alu 0xfffe 6164; GFX12-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v3, v1 6165; GFX12-NEXT: s_and_b32 s3, s3, 0xff 6166; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:48 6167; GFX12-NEXT: v_mov_b32_e32 v0, s3 6168; GFX12-NEXT: v_mov_b32_e32 v2, s4 6169; GFX12-NEXT: s_lshr_b32 s3, s2, 24 6170; GFX12-NEXT: s_bfe_u32 s4, s2, 0x80010 6171; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:32 6172; GFX12-NEXT: s_wait_alu 0xfffe 6173; GFX12-NEXT: v_mov_b32_e32 v0, s4 6174; GFX12-NEXT: v_mov_b32_e32 v2, s3 6175; GFX12-NEXT: s_bfe_u32 s3, s2, 0x80008 6176; GFX12-NEXT: s_and_b32 s2, s2, 0xff 6177; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16 6178; GFX12-NEXT: s_wait_alu 0xfffe 6179; GFX12-NEXT: v_mov_b32_e32 v0, s2 6180; GFX12-NEXT: v_mov_b32_e32 v2, s3 6181; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] 6182; GFX12-NEXT: s_endpgm 6183 %load = load <8 x i8>, ptr addrspace(4) %in 6184 %ext = zext <8 x i8> %load to <8 x i64> 6185 store <8 x i64> %ext, ptr addrspace(1) %out 6186 ret void 6187} 6188 6189define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 6190; GFX6-NOHSA-LABEL: constant_sextload_v8i8_to_v8i64: 6191; GFX6-NOHSA: ; %bb.0: 6192; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 6193; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 6194; GFX6-NOHSA-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 6195; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 6196; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 6197; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 6198; GFX6-NOHSA-NEXT: s_lshr_b32 s6, s5, 16 6199; GFX6-NOHSA-NEXT: s_lshr_b32 s8, s5, 8 6200; GFX6-NOHSA-NEXT: s_mov_b32 s10, s5 6201; GFX6-NOHSA-NEXT: s_lshr_b32 s12, s4, 16 6202; GFX6-NOHSA-NEXT: s_lshr_b32 s14, s4, 24 6203; GFX6-NOHSA-NEXT: s_lshr_b32 s16, s4, 8 6204; GFX6-NOHSA-NEXT: s_bfe_i64 s[18:19], s[4:5], 0x80000 6205; GFX6-NOHSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000 6206; GFX6-NOHSA-NEXT: s_ashr_i64 s[4:5], s[4:5], 56 6207; GFX6-NOHSA-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x80000 6208; GFX6-NOHSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000 6209; GFX6-NOHSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000 6210; GFX6-NOHSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000 6211; GFX6-NOHSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000 6212; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s4 6213; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s5 6214; GFX6-NOHSA-NEXT: v_mov_b32_e32 v4, s10 6215; GFX6-NOHSA-NEXT: v_mov_b32_e32 v5, s11 6216; GFX6-NOHSA-NEXT: v_mov_b32_e32 v8, s18 6217; GFX6-NOHSA-NEXT: v_mov_b32_e32 v9, s19 6218; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s6 6219; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s7 6220; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 6221; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s8 6222; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s9 6223; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32 6224; GFX6-NOHSA-NEXT: s_waitcnt expcnt(1) 6225; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s12 6226; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s13 6227; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s14 6228; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s15 6229; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 6230; GFX6-NOHSA-NEXT: v_mov_b32_e32 v10, s16 6231; GFX6-NOHSA-NEXT: v_mov_b32_e32 v11, s17 6232; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 6233; GFX6-NOHSA-NEXT: s_endpgm 6234; 6235; GFX7-HSA-LABEL: constant_sextload_v8i8_to_v8i64: 6236; GFX7-HSA: ; %bb.0: 6237; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 6238; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 6239; GFX7-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 6240; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 6241; GFX7-HSA-NEXT: s_lshr_b32 s4, s3, 16 6242; GFX7-HSA-NEXT: s_lshr_b32 s6, s3, 8 6243; GFX7-HSA-NEXT: s_mov_b32 s8, s3 6244; GFX7-HSA-NEXT: s_lshr_b32 s10, s2, 16 6245; GFX7-HSA-NEXT: s_lshr_b32 s12, s2, 24 6246; GFX7-HSA-NEXT: s_lshr_b32 s14, s2, 8 6247; GFX7-HSA-NEXT: s_bfe_i64 s[16:17], s[2:3], 0x80000 6248; GFX7-HSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000 6249; GFX7-HSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000 6250; GFX7-HSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000 6251; GFX7-HSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000 6252; GFX7-HSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000 6253; GFX7-HSA-NEXT: s_ashr_i64 s[2:3], s[2:3], 56 6254; GFX7-HSA-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x80000 6255; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s2 6256; GFX7-HSA-NEXT: s_add_u32 s2, s0, 48 6257; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s3 6258; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 6259; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 6260; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 6261; GFX7-HSA-NEXT: s_add_u32 s2, s0, 32 6262; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s4 6263; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s5 6264; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 6265; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6266; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 6267; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 6268; GFX7-HSA-NEXT: s_add_u32 s2, s0, 16 6269; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s8 6270; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s9 6271; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s6 6272; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s7 6273; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 6274; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6275; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 6276; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s10 6277; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s11 6278; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s12 6279; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s13 6280; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 6281; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6282; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 6283; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s16 6284; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s17 6285; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s14 6286; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s15 6287; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 6288; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6289; GFX7-HSA-NEXT: s_endpgm 6290; 6291; GFX8-NOHSA-LABEL: constant_sextload_v8i8_to_v8i64: 6292; GFX8-NOHSA: ; %bb.0: 6293; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 6294; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 6295; GFX8-NOHSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 6296; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 6297; GFX8-NOHSA-NEXT: s_lshr_b32 s4, s3, 16 6298; GFX8-NOHSA-NEXT: s_lshr_b32 s6, s3, 8 6299; GFX8-NOHSA-NEXT: s_mov_b32 s8, s3 6300; GFX8-NOHSA-NEXT: s_lshr_b32 s10, s2, 16 6301; GFX8-NOHSA-NEXT: s_lshr_b32 s12, s2, 24 6302; GFX8-NOHSA-NEXT: s_lshr_b32 s14, s2, 8 6303; GFX8-NOHSA-NEXT: s_bfe_i64 s[16:17], s[2:3], 0x80000 6304; GFX8-NOHSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000 6305; GFX8-NOHSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000 6306; GFX8-NOHSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000 6307; GFX8-NOHSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000 6308; GFX8-NOHSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000 6309; GFX8-NOHSA-NEXT: s_ashr_i64 s[2:3], s[2:3], 56 6310; GFX8-NOHSA-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x80000 6311; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s2 6312; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 48 6313; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s3 6314; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 6315; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 6316; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 6317; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 32 6318; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s4 6319; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s5 6320; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 6321; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6322; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 6323; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 6324; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 16 6325; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s8 6326; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s9 6327; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s6 6328; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s7 6329; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 6330; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6331; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 6332; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s10 6333; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s11 6334; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s12 6335; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s13 6336; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 6337; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6338; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 6339; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s16 6340; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s17 6341; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s14 6342; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s15 6343; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 6344; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6345; GFX8-NOHSA-NEXT: s_endpgm 6346; 6347; EG-LABEL: constant_sextload_v8i8_to_v8i64: 6348; EG: ; %bb.0: 6349; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 6350; EG-NEXT: TEX 0 @8 6351; EG-NEXT: ALU 39, @11, KC0[CB0:0-32], KC1[] 6352; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T12.X, 0 6353; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T9.X, 0 6354; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T8.X, 0 6355; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T6.X, 1 6356; EG-NEXT: CF_END 6357; EG-NEXT: Fetch clause starting at 8: 6358; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 6359; EG-NEXT: ALU clause starting at 10: 6360; EG-NEXT: MOV * T5.X, KC0[2].Z, 6361; EG-NEXT: ALU clause starting at 11: 6362; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.x, 6363; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6364; EG-NEXT: BFE_INT T7.X, T5.Y, 0.0, literal.x, 6365; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6366; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44) 6367; EG-NEXT: LSHR T8.X, PV.W, literal.x, 6368; EG-NEXT: ASHR T7.Y, PV.X, literal.y, 6369; EG-NEXT: LSHR T0.W, T5.Y, literal.z, 6370; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.w, 6371; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 6372; EG-NEXT: 8(1.121039e-44), 32(4.484155e-44) 6373; EG-NEXT: LSHR T9.X, PS, literal.x, 6374; EG-NEXT: BFE_INT T7.Z, PV.W, 0.0, literal.y, 6375; EG-NEXT: ASHR * T10.W, T5.X, literal.z, 6376; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44) 6377; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6378; EG-NEXT: BFE_INT T11.X, T5.X, 0.0, literal.x, 6379; EG-NEXT: ASHR T10.Z, T5.X, literal.y, 6380; EG-NEXT: LSHR T0.W, T5.X, literal.z, 6381; EG-NEXT: ASHR * T5.W, T5.Y, literal.w, 6382; EG-NEXT: 8(1.121039e-44), 24(3.363116e-44) 6383; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6384; EG-NEXT: BFE_INT T10.X, PV.W, 0.0, literal.x, 6385; EG-NEXT: ASHR T11.Y, PV.X, literal.y, 6386; EG-NEXT: ASHR T5.Z, T5.Y, literal.z, 6387; EG-NEXT: LSHR T0.W, T5.X, literal.x, 6388; EG-NEXT: LSHR * T1.W, T5.Y, literal.w, 6389; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 6390; EG-NEXT: 24(3.363116e-44), 16(2.242078e-44) 6391; EG-NEXT: BFE_INT T5.X, PS, 0.0, literal.x, 6392; EG-NEXT: ASHR T10.Y, PV.X, literal.y, 6393; EG-NEXT: BFE_INT T11.Z, PV.W, 0.0, literal.x, 6394; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 6395; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 6396; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 6397; EG-NEXT: LSHR T12.X, PV.W, literal.x, 6398; EG-NEXT: ASHR T5.Y, PV.X, literal.y, 6399; EG-NEXT: ASHR T11.W, PV.Z, literal.y, 6400; EG-NEXT: ASHR * T7.W, T7.Z, literal.y, 6401; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 6402; 6403; GFX12-LABEL: constant_sextload_v8i8_to_v8i64: 6404; GFX12: ; %bb.0: 6405; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 6406; GFX12-NEXT: s_wait_kmcnt 0x0 6407; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 6408; GFX12-NEXT: s_wait_kmcnt 0x0 6409; GFX12-NEXT: s_lshr_b32 s4, s3, 16 6410; GFX12-NEXT: s_lshr_b32 s6, s3, 8 6411; GFX12-NEXT: s_mov_b32 s8, s3 6412; GFX12-NEXT: s_lshr_b32 s10, s2, 16 6413; GFX12-NEXT: s_lshr_b32 s12, s2, 24 6414; GFX12-NEXT: s_lshr_b32 s14, s2, 8 6415; GFX12-NEXT: s_bfe_i64 s[16:17], s[2:3], 0x80000 6416; GFX12-NEXT: s_ashr_i64 s[2:3], s[2:3], 56 6417; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x80000 6418; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000 6419; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000 6420; GFX12-NEXT: v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v3, s3 6421; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000 6422; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000 6423; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v5, s17 6424; GFX12-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v1, s5 6425; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v9, s9 6426; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000 6427; GFX12-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v11, s7 6428; GFX12-NEXT: v_dual_mov_b32 v10, s6 :: v_dual_mov_b32 v13, s11 6429; GFX12-NEXT: v_dual_mov_b32 v12, s10 :: v_dual_mov_b32 v15, s13 6430; GFX12-NEXT: v_dual_mov_b32 v14, s12 :: v_dual_mov_b32 v7, s15 6431; GFX12-NEXT: v_mov_b32_e32 v6, s14 6432; GFX12-NEXT: s_clause 0x3 6433; GFX12-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:48 6434; GFX12-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:32 6435; GFX12-NEXT: global_store_b128 v16, v[12:15], s[0:1] offset:16 6436; GFX12-NEXT: global_store_b128 v16, v[4:7], s[0:1] 6437; GFX12-NEXT: s_endpgm 6438 %load = load <8 x i8>, ptr addrspace(4) %in 6439 %ext = sext <8 x i8> %load to <8 x i64> 6440 store <8 x i64> %ext, ptr addrspace(1) %out 6441 ret void 6442} 6443 6444define amdgpu_kernel void @constant_zextload_v16i8_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 6445; GFX6-NOHSA-LABEL: constant_zextload_v16i8_to_v16i64: 6446; GFX6-NOHSA: ; %bb.0: 6447; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 6448; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 6449; GFX6-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 6450; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 6451; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, 0 6452; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 6453; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, v1 6454; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 6455; GFX6-NOHSA-NEXT: s_lshr_b32 s8, s5, 24 6456; GFX6-NOHSA-NEXT: s_lshr_b32 s9, s4, 24 6457; GFX6-NOHSA-NEXT: s_lshr_b32 s10, s7, 24 6458; GFX6-NOHSA-NEXT: s_lshr_b32 s11, s6, 24 6459; GFX6-NOHSA-NEXT: s_bfe_u32 s12, s6, 0x80008 6460; GFX6-NOHSA-NEXT: s_bfe_u32 s13, s7, 0x80008 6461; GFX6-NOHSA-NEXT: s_bfe_u32 s14, s4, 0x80008 6462; GFX6-NOHSA-NEXT: s_bfe_u32 s15, s5, 0x80008 6463; GFX6-NOHSA-NEXT: s_and_b32 s16, s5, 0xff 6464; GFX6-NOHSA-NEXT: s_and_b32 s17, s4, 0xff 6465; GFX6-NOHSA-NEXT: s_and_b32 s18, s7, 0xff 6466; GFX6-NOHSA-NEXT: s_and_b32 s19, s6, 0xff 6467; GFX6-NOHSA-NEXT: s_bfe_u32 s5, s5, 0x80010 6468; GFX6-NOHSA-NEXT: s_bfe_u32 s4, s4, 0x80010 6469; GFX6-NOHSA-NEXT: s_bfe_u32 s6, s6, 0x80010 6470; GFX6-NOHSA-NEXT: s_bfe_u32 s7, s7, 0x80010 6471; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s6 6472; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s11 6473; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 6474; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 6475; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s7 6476; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s10 6477; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 6478; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 6479; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 6480; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s9 6481; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 6482; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 6483; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s5 6484; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s8 6485; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 6486; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 6487; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s19 6488; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s12 6489; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 6490; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 6491; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s18 6492; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s13 6493; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 6494; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 6495; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s17 6496; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s14 6497; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 6498; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 6499; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s16 6500; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s15 6501; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 6502; GFX6-NOHSA-NEXT: s_endpgm 6503; 6504; GFX7-HSA-LABEL: constant_zextload_v16i8_to_v16i64: 6505; GFX7-HSA: ; %bb.0: 6506; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 6507; GFX7-HSA-NEXT: v_mov_b32_e32 v1, 0 6508; GFX7-HSA-NEXT: v_mov_b32_e32 v3, v1 6509; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 6510; GFX7-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 6511; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 6512; GFX7-HSA-NEXT: s_lshr_b32 s8, s5, 24 6513; GFX7-HSA-NEXT: s_lshr_b32 s9, s4, 24 6514; GFX7-HSA-NEXT: s_lshr_b32 s10, s7, 24 6515; GFX7-HSA-NEXT: s_lshr_b32 s2, s6, 24 6516; GFX7-HSA-NEXT: s_bfe_u32 s11, s6, 0x80008 6517; GFX7-HSA-NEXT: s_bfe_u32 s12, s7, 0x80008 6518; GFX7-HSA-NEXT: s_bfe_u32 s13, s4, 0x80008 6519; GFX7-HSA-NEXT: s_bfe_u32 s14, s5, 0x80008 6520; GFX7-HSA-NEXT: s_and_b32 s15, s5, 0xff 6521; GFX7-HSA-NEXT: s_and_b32 s16, s4, 0xff 6522; GFX7-HSA-NEXT: s_and_b32 s17, s7, 0xff 6523; GFX7-HSA-NEXT: s_and_b32 s18, s6, 0xff 6524; GFX7-HSA-NEXT: s_bfe_u32 s5, s5, 0x80010 6525; GFX7-HSA-NEXT: s_bfe_u32 s4, s4, 0x80010 6526; GFX7-HSA-NEXT: s_bfe_u32 s7, s7, 0x80010 6527; GFX7-HSA-NEXT: s_bfe_u32 s3, s6, 0x80010 6528; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s2 6529; GFX7-HSA-NEXT: s_add_u32 s2, s0, 0x50 6530; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s3 6531; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 6532; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 6533; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 6534; GFX7-HSA-NEXT: s_add_u32 s2, s0, 0x70 6535; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 6536; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6537; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 6538; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 6539; GFX7-HSA-NEXT: s_add_u32 s2, s0, 16 6540; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s7 6541; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s10 6542; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 6543; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6544; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 6545; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 6546; GFX7-HSA-NEXT: s_add_u32 s2, s0, 48 6547; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s4 6548; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s9 6549; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 6550; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6551; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 6552; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 6553; GFX7-HSA-NEXT: s_add_u32 s2, s0, 64 6554; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s5 6555; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s8 6556; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 6557; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6558; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 6559; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 6560; GFX7-HSA-NEXT: s_add_u32 s2, s0, 0x60 6561; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s18 6562; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s11 6563; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 6564; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6565; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 6566; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s17 6567; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s12 6568; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 6569; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6570; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 6571; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 6572; GFX7-HSA-NEXT: s_add_u32 s0, s0, 32 6573; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s16 6574; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s13 6575; GFX7-HSA-NEXT: s_addc_u32 s1, s1, 0 6576; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6577; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 6578; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s15 6579; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s14 6580; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 6581; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6582; GFX7-HSA-NEXT: s_endpgm 6583; 6584; GFX8-NOHSA-LABEL: constant_zextload_v16i8_to_v16i64: 6585; GFX8-NOHSA: ; %bb.0: 6586; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 6587; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, 0 6588; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, v1 6589; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 6590; GFX8-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 6591; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 6592; GFX8-NOHSA-NEXT: s_lshr_b32 s8, s5, 24 6593; GFX8-NOHSA-NEXT: s_lshr_b32 s2, s7, 24 6594; GFX8-NOHSA-NEXT: s_lshr_b32 s9, s6, 24 6595; GFX8-NOHSA-NEXT: s_bfe_u32 s10, s6, 0x80008 6596; GFX8-NOHSA-NEXT: s_bfe_u32 s11, s7, 0x80008 6597; GFX8-NOHSA-NEXT: s_bfe_u32 s12, s5, 0x80008 6598; GFX8-NOHSA-NEXT: s_lshr_b32 s13, s4, 24 6599; GFX8-NOHSA-NEXT: s_bfe_u32 s14, s4, 0x80008 6600; GFX8-NOHSA-NEXT: s_and_b32 s15, s4, 0xff 6601; GFX8-NOHSA-NEXT: s_bfe_u32 s4, s4, 0x80010 6602; GFX8-NOHSA-NEXT: s_and_b32 s16, s5, 0xff 6603; GFX8-NOHSA-NEXT: s_and_b32 s17, s7, 0xff 6604; GFX8-NOHSA-NEXT: s_and_b32 s18, s6, 0xff 6605; GFX8-NOHSA-NEXT: s_bfe_u32 s6, s6, 0x80010 6606; GFX8-NOHSA-NEXT: s_bfe_u32 s5, s5, 0x80010 6607; GFX8-NOHSA-NEXT: s_bfe_u32 s3, s7, 0x80010 6608; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s2 6609; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 0x70 6610; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s3 6611; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 6612; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 6613; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 6614; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 48 6615; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 6616; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6617; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 6618; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 6619; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 0x50 6620; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s5 6621; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s8 6622; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 6623; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6624; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 6625; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 6626; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 64 6627; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s6 6628; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s9 6629; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 6630; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6631; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 6632; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 6633; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 0x60 6634; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s18 6635; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s10 6636; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 6637; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6638; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 6639; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 6640; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 32 6641; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s17 6642; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s11 6643; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 6644; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6645; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 6646; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 6647; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 16 6648; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s16 6649; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s12 6650; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 6651; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6652; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 6653; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s4 6654; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s13 6655; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 6656; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6657; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 6658; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s15 6659; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s14 6660; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 6661; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6662; GFX8-NOHSA-NEXT: s_endpgm 6663; 6664; EG-LABEL: constant_zextload_v16i8_to_v16i64: 6665; EG: ; %bb.0: 6666; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[] 6667; EG-NEXT: TEX 0 @12 6668; EG-NEXT: ALU 68, @15, KC0[CB0:0-32], KC1[] 6669; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T22.X, 0 6670; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T21.X, 0 6671; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T20.X, 0 6672; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T19.X, 0 6673; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T18.X, 0 6674; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T17.X, 0 6675; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0 6676; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T15.X, 1 6677; EG-NEXT: CF_END 6678; EG-NEXT: Fetch clause starting at 12: 6679; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 6680; EG-NEXT: ALU clause starting at 14: 6681; EG-NEXT: MOV * T7.X, KC0[2].Z, 6682; EG-NEXT: ALU clause starting at 15: 6683; EG-NEXT: MOV * T0.W, literal.x, 6684; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 6685; EG-NEXT: BFE_UINT T8.X, T7.W, literal.x, PV.W, 6686; EG-NEXT: LSHR * T8.Z, T7.W, literal.y, 6687; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) 6688; EG-NEXT: MOV T8.Y, 0.0, 6689; EG-NEXT: BFE_UINT * T9.Z, T7.W, literal.x, T0.W, 6690; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 6691; EG-NEXT: AND_INT T9.X, T7.W, literal.x, 6692; EG-NEXT: MOV * T9.Y, 0.0, 6693; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 6694; EG-NEXT: BFE_UINT T10.X, T7.Z, literal.x, T0.W, 6695; EG-NEXT: LSHR * T10.Z, T7.Z, literal.y, 6696; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) 6697; EG-NEXT: MOV T10.Y, 0.0, 6698; EG-NEXT: BFE_UINT * T11.Z, T7.Z, literal.x, T0.W, 6699; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 6700; EG-NEXT: AND_INT T11.X, T7.Z, literal.x, 6701; EG-NEXT: MOV * T11.Y, 0.0, 6702; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 6703; EG-NEXT: BFE_UINT T12.X, T7.Y, literal.x, T0.W, 6704; EG-NEXT: LSHR * T12.Z, T7.Y, literal.y, 6705; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) 6706; EG-NEXT: MOV T12.Y, 0.0, 6707; EG-NEXT: BFE_UINT * T13.Z, T7.Y, literal.x, T0.W, 6708; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 6709; EG-NEXT: AND_INT T13.X, T7.Y, literal.x, 6710; EG-NEXT: MOV * T13.Y, 0.0, 6711; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 6712; EG-NEXT: BFE_UINT T14.X, T7.X, literal.x, T0.W, 6713; EG-NEXT: LSHR * T14.Z, T7.X, literal.y, 6714; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) 6715; EG-NEXT: MOV T14.Y, 0.0, 6716; EG-NEXT: BFE_UINT * T7.Z, T7.X, literal.x, T0.W, 6717; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 6718; EG-NEXT: AND_INT T7.X, T7.X, literal.x, 6719; EG-NEXT: MOV T7.Y, 0.0, 6720; EG-NEXT: MOV T8.W, 0.0, 6721; EG-NEXT: MOV * T9.W, 0.0, 6722; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 6723; EG-NEXT: MOV T10.W, 0.0, 6724; EG-NEXT: MOV * T11.W, 0.0, 6725; EG-NEXT: MOV T12.W, 0.0, 6726; EG-NEXT: MOV * T13.W, 0.0, 6727; EG-NEXT: MOV T14.W, 0.0, 6728; EG-NEXT: MOV * T7.W, 0.0, 6729; EG-NEXT: LSHR T15.X, KC0[2].Y, literal.x, 6730; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6731; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6732; EG-NEXT: LSHR T16.X, PV.W, literal.x, 6733; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6734; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6735; EG-NEXT: LSHR T17.X, PV.W, literal.x, 6736; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6737; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 6738; EG-NEXT: LSHR T18.X, PV.W, literal.x, 6739; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6740; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 6741; EG-NEXT: LSHR T19.X, PV.W, literal.x, 6742; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6743; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 6744; EG-NEXT: LSHR T20.X, PV.W, literal.x, 6745; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6746; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 6747; EG-NEXT: LSHR T21.X, PV.W, literal.x, 6748; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6749; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 6750; EG-NEXT: LSHR * T22.X, PV.W, literal.x, 6751; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6752; 6753; GFX12-LABEL: constant_zextload_v16i8_to_v16i64: 6754; GFX12: ; %bb.0: 6755; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 6756; GFX12-NEXT: s_wait_kmcnt 0x0 6757; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 6758; GFX12-NEXT: s_wait_kmcnt 0x0 6759; GFX12-NEXT: s_bfe_u32 s2, s7, 0x80010 6760; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) 6761; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 6762; GFX12-NEXT: s_lshr_b32 s3, s7, 24 6763; GFX12-NEXT: s_lshr_b32 s2, s5, 24 6764; GFX12-NEXT: s_wait_alu 0xfffe 6765; GFX12-NEXT: v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v3, v1 6766; GFX12-NEXT: s_bfe_u32 s3, s5, 0x80010 6767; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:112 6768; GFX12-NEXT: s_wait_alu 0xfffe 6769; GFX12-NEXT: v_mov_b32_e32 v0, s3 6770; GFX12-NEXT: v_mov_b32_e32 v2, s2 6771; GFX12-NEXT: s_lshr_b32 s2, s6, 24 6772; GFX12-NEXT: s_bfe_u32 s3, s6, 0x80010 6773; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:48 6774; GFX12-NEXT: s_wait_alu 0xfffe 6775; GFX12-NEXT: v_mov_b32_e32 v0, s3 6776; GFX12-NEXT: v_mov_b32_e32 v2, s2 6777; GFX12-NEXT: s_bfe_u32 s2, s6, 0x80008 6778; GFX12-NEXT: s_and_b32 s3, s6, 0xff 6779; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:80 6780; GFX12-NEXT: s_wait_alu 0xfffe 6781; GFX12-NEXT: v_mov_b32_e32 v0, s3 6782; GFX12-NEXT: v_mov_b32_e32 v2, s2 6783; GFX12-NEXT: s_bfe_u32 s2, s7, 0x80008 6784; GFX12-NEXT: s_and_b32 s3, s7, 0xff 6785; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:64 6786; GFX12-NEXT: s_wait_alu 0xfffe 6787; GFX12-NEXT: v_mov_b32_e32 v0, s3 6788; GFX12-NEXT: v_mov_b32_e32 v2, s2 6789; GFX12-NEXT: s_bfe_u32 s2, s5, 0x80008 6790; GFX12-NEXT: s_and_b32 s3, s5, 0xff 6791; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:96 6792; GFX12-NEXT: s_wait_alu 0xfffe 6793; GFX12-NEXT: v_mov_b32_e32 v0, s3 6794; GFX12-NEXT: v_mov_b32_e32 v2, s2 6795; GFX12-NEXT: s_lshr_b32 s2, s4, 24 6796; GFX12-NEXT: s_bfe_u32 s3, s4, 0x80010 6797; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:32 6798; GFX12-NEXT: s_wait_alu 0xfffe 6799; GFX12-NEXT: v_mov_b32_e32 v0, s3 6800; GFX12-NEXT: v_mov_b32_e32 v2, s2 6801; GFX12-NEXT: s_bfe_u32 s2, s4, 0x80008 6802; GFX12-NEXT: s_and_b32 s3, s4, 0xff 6803; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16 6804; GFX12-NEXT: s_wait_alu 0xfffe 6805; GFX12-NEXT: v_mov_b32_e32 v0, s3 6806; GFX12-NEXT: v_mov_b32_e32 v2, s2 6807; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] 6808; GFX12-NEXT: s_endpgm 6809 %load = load <16 x i8>, ptr addrspace(4) %in 6810 %ext = zext <16 x i8> %load to <16 x i64> 6811 store <16 x i64> %ext, ptr addrspace(1) %out 6812 ret void 6813} 6814 6815define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 6816; GFX6-NOHSA-LABEL: constant_sextload_v16i8_to_v16i64: 6817; GFX6-NOHSA: ; %bb.0: 6818; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 6819; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 6820; GFX6-NOHSA-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0 6821; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 6822; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 6823; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 6824; GFX6-NOHSA-NEXT: s_lshr_b32 s12, s11, 16 6825; GFX6-NOHSA-NEXT: s_lshr_b32 s14, s11, 8 6826; GFX6-NOHSA-NEXT: s_mov_b32 s4, s11 6827; GFX6-NOHSA-NEXT: s_lshr_b32 s16, s10, 16 6828; GFX6-NOHSA-NEXT: s_lshr_b32 s18, s10, 24 6829; GFX6-NOHSA-NEXT: s_lshr_b32 s20, s10, 8 6830; GFX6-NOHSA-NEXT: s_lshr_b32 s22, s9, 16 6831; GFX6-NOHSA-NEXT: s_lshr_b32 s24, s9, 8 6832; GFX6-NOHSA-NEXT: s_mov_b32 s26, s9 6833; GFX6-NOHSA-NEXT: s_lshr_b32 s28, s8, 16 6834; GFX6-NOHSA-NEXT: s_lshr_b32 s30, s8, 24 6835; GFX6-NOHSA-NEXT: s_lshr_b32 s34, s8, 8 6836; GFX6-NOHSA-NEXT: s_bfe_i64 s[6:7], s[8:9], 0x80000 6837; GFX6-NOHSA-NEXT: s_ashr_i64 s[36:37], s[8:9], 56 6838; GFX6-NOHSA-NEXT: s_bfe_i64 s[38:39], s[10:11], 0x80000 6839; GFX6-NOHSA-NEXT: s_ashr_i64 s[10:11], s[10:11], 56 6840; GFX6-NOHSA-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000 6841; GFX6-NOHSA-NEXT: s_bfe_i64 s[40:41], s[4:5], 0x80000 6842; GFX6-NOHSA-NEXT: s_bfe_i64 s[4:5], s[34:35], 0x80000 6843; GFX6-NOHSA-NEXT: s_bfe_i64 s[8:9], s[30:31], 0x80000 6844; GFX6-NOHSA-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x80000 6845; GFX6-NOHSA-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000 6846; GFX6-NOHSA-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000 6847; GFX6-NOHSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000 6848; GFX6-NOHSA-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000 6849; GFX6-NOHSA-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x80000 6850; GFX6-NOHSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000 6851; GFX6-NOHSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000 6852; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s10 6853; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s11 6854; GFX6-NOHSA-NEXT: v_mov_b32_e32 v4, s40 6855; GFX6-NOHSA-NEXT: v_mov_b32_e32 v5, s41 6856; GFX6-NOHSA-NEXT: v_mov_b32_e32 v8, s38 6857; GFX6-NOHSA-NEXT: v_mov_b32_e32 v9, s39 6858; GFX6-NOHSA-NEXT: v_mov_b32_e32 v12, s36 6859; GFX6-NOHSA-NEXT: v_mov_b32_e32 v13, s37 6860; GFX6-NOHSA-NEXT: v_mov_b32_e32 v14, s26 6861; GFX6-NOHSA-NEXT: v_mov_b32_e32 v15, s27 6862; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s12 6863; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s13 6864; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 6865; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 6866; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s6 6867; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s7 6868; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s14 6869; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s15 6870; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:96 6871; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s16 6872; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s17 6873; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 6874; GFX6-NOHSA-NEXT: v_mov_b32_e32 v4, s18 6875; GFX6-NOHSA-NEXT: v_mov_b32_e32 v5, s19 6876; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:80 6877; GFX6-NOHSA-NEXT: v_mov_b32_e32 v10, s20 6878; GFX6-NOHSA-NEXT: v_mov_b32_e32 v11, s21 6879; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:64 6880; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 6881; GFX6-NOHSA-NEXT: v_mov_b32_e32 v10, s22 6882; GFX6-NOHSA-NEXT: v_mov_b32_e32 v11, s23 6883; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:48 6884; GFX6-NOHSA-NEXT: v_mov_b32_e32 v16, s24 6885; GFX6-NOHSA-NEXT: v_mov_b32_e32 v17, s25 6886; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:32 6887; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s28 6888; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s29 6889; GFX6-NOHSA-NEXT: v_mov_b32_e32 v4, s8 6890; GFX6-NOHSA-NEXT: v_mov_b32_e32 v5, s9 6891; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:16 6892; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 6893; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s4 6894; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s5 6895; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 6896; GFX6-NOHSA-NEXT: s_endpgm 6897; 6898; GFX7-HSA-LABEL: constant_sextload_v16i8_to_v16i64: 6899; GFX7-HSA: ; %bb.0: 6900; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 6901; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 6902; GFX7-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 6903; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 6904; GFX7-HSA-NEXT: s_lshr_b32 s8, s7, 16 6905; GFX7-HSA-NEXT: s_lshr_b32 s10, s7, 8 6906; GFX7-HSA-NEXT: s_mov_b32 s12, s7 6907; GFX7-HSA-NEXT: s_lshr_b32 s14, s6, 16 6908; GFX7-HSA-NEXT: s_lshr_b32 s16, s6, 24 6909; GFX7-HSA-NEXT: s_lshr_b32 s18, s6, 8 6910; GFX7-HSA-NEXT: s_lshr_b32 s20, s5, 16 6911; GFX7-HSA-NEXT: s_lshr_b32 s22, s5, 8 6912; GFX7-HSA-NEXT: s_mov_b32 s24, s5 6913; GFX7-HSA-NEXT: s_lshr_b32 s26, s4, 16 6914; GFX7-HSA-NEXT: s_lshr_b32 s28, s4, 24 6915; GFX7-HSA-NEXT: s_lshr_b32 s30, s4, 8 6916; GFX7-HSA-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x80000 6917; GFX7-HSA-NEXT: s_ashr_i64 s[34:35], s[4:5], 56 6918; GFX7-HSA-NEXT: s_bfe_i64 s[36:37], s[6:7], 0x80000 6919; GFX7-HSA-NEXT: s_ashr_i64 s[4:5], s[6:7], 56 6920; GFX7-HSA-NEXT: s_bfe_i64 s[6:7], s[8:9], 0x80000 6921; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s6 6922; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s7 6923; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s4 6924; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s5 6925; GFX7-HSA-NEXT: s_bfe_i64 s[4:5], s[30:31], 0x80000 6926; GFX7-HSA-NEXT: s_bfe_i64 s[6:7], s[28:29], 0x80000 6927; GFX7-HSA-NEXT: s_bfe_i64 s[8:9], s[26:27], 0x80000 6928; GFX7-HSA-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000 6929; GFX7-HSA-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000 6930; GFX7-HSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000 6931; GFX7-HSA-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000 6932; GFX7-HSA-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x80000 6933; GFX7-HSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000 6934; GFX7-HSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000 6935; GFX7-HSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000 6936; GFX7-HSA-NEXT: s_add_u32 s26, s0, 0x70 6937; GFX7-HSA-NEXT: s_addc_u32 s27, s1, 0 6938; GFX7-HSA-NEXT: v_mov_b32_e32 v6, s10 6939; GFX7-HSA-NEXT: s_add_u32 s10, s0, 0x60 6940; GFX7-HSA-NEXT: v_mov_b32_e32 v8, s26 6941; GFX7-HSA-NEXT: v_mov_b32_e32 v7, s11 6942; GFX7-HSA-NEXT: s_addc_u32 s11, s1, 0 6943; GFX7-HSA-NEXT: v_mov_b32_e32 v10, s10 6944; GFX7-HSA-NEXT: v_mov_b32_e32 v9, s27 6945; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s12 6946; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s13 6947; GFX7-HSA-NEXT: v_mov_b32_e32 v11, s11 6948; GFX7-HSA-NEXT: s_add_u32 s10, s0, 0x50 6949; GFX7-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 6950; GFX7-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 6951; GFX7-HSA-NEXT: s_addc_u32 s11, s1, 0 6952; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s10 6953; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s14 6954; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s15 6955; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s16 6956; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s17 6957; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s11 6958; GFX7-HSA-NEXT: s_add_u32 s10, s0, 64 6959; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6960; GFX7-HSA-NEXT: s_addc_u32 s11, s1, 0 6961; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s10 6962; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s36 6963; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s37 6964; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s18 6965; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s19 6966; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s11 6967; GFX7-HSA-NEXT: s_add_u32 s10, s0, 48 6968; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6969; GFX7-HSA-NEXT: s_addc_u32 s11, s1, 0 6970; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s10 6971; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s20 6972; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s21 6973; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s34 6974; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s35 6975; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s11 6976; GFX7-HSA-NEXT: s_add_u32 s10, s0, 32 6977; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6978; GFX7-HSA-NEXT: s_addc_u32 s11, s1, 0 6979; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s10 6980; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s24 6981; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s25 6982; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s22 6983; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s23 6984; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s11 6985; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6986; GFX7-HSA-NEXT: s_nop 0 6987; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s6 6988; GFX7-HSA-NEXT: s_add_u32 s6, s0, 16 6989; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s7 6990; GFX7-HSA-NEXT: s_addc_u32 s7, s1, 0 6991; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s6 6992; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s8 6993; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s9 6994; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s7 6995; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6996; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 6997; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 6998; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 6999; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s4 7000; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s5 7001; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 7002; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7003; GFX7-HSA-NEXT: s_endpgm 7004; 7005; GFX8-NOHSA-LABEL: constant_sextload_v16i8_to_v16i64: 7006; GFX8-NOHSA: ; %bb.0: 7007; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 7008; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 7009; GFX8-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 7010; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 7011; GFX8-NOHSA-NEXT: s_lshr_b32 s10, s7, 16 7012; GFX8-NOHSA-NEXT: s_lshr_b32 s12, s7, 8 7013; GFX8-NOHSA-NEXT: s_mov_b32 s14, s7 7014; GFX8-NOHSA-NEXT: s_lshr_b32 s16, s6, 16 7015; GFX8-NOHSA-NEXT: s_lshr_b32 s18, s6, 24 7016; GFX8-NOHSA-NEXT: s_lshr_b32 s20, s6, 8 7017; GFX8-NOHSA-NEXT: s_lshr_b32 s22, s5, 16 7018; GFX8-NOHSA-NEXT: s_lshr_b32 s24, s5, 8 7019; GFX8-NOHSA-NEXT: s_mov_b32 s26, s5 7020; GFX8-NOHSA-NEXT: s_lshr_b32 s8, s4, 16 7021; GFX8-NOHSA-NEXT: s_lshr_b32 s28, s4, 24 7022; GFX8-NOHSA-NEXT: s_lshr_b32 s30, s4, 8 7023; GFX8-NOHSA-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x80000 7024; GFX8-NOHSA-NEXT: s_ashr_i64 s[34:35], s[4:5], 56 7025; GFX8-NOHSA-NEXT: s_bfe_i64 s[36:37], s[6:7], 0x80000 7026; GFX8-NOHSA-NEXT: s_ashr_i64 s[38:39], s[6:7], 56 7027; GFX8-NOHSA-NEXT: s_bfe_i64 s[4:5], s[30:31], 0x80000 7028; GFX8-NOHSA-NEXT: s_bfe_i64 s[6:7], s[28:29], 0x80000 7029; GFX8-NOHSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000 7030; GFX8-NOHSA-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000 7031; GFX8-NOHSA-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000 7032; GFX8-NOHSA-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000 7033; GFX8-NOHSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000 7034; GFX8-NOHSA-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000 7035; GFX8-NOHSA-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x80000 7036; GFX8-NOHSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000 7037; GFX8-NOHSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000 7038; GFX8-NOHSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000 7039; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s10 7040; GFX8-NOHSA-NEXT: s_add_u32 s10, s0, 0x70 7041; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s11 7042; GFX8-NOHSA-NEXT: s_addc_u32 s11, s1, 0 7043; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s10 7044; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s38 7045; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s39 7046; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s11 7047; GFX8-NOHSA-NEXT: s_add_u32 s10, s0, 0x60 7048; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7049; GFX8-NOHSA-NEXT: s_addc_u32 s11, s1, 0 7050; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s10 7051; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s14 7052; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s15 7053; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s12 7054; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s13 7055; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s11 7056; GFX8-NOHSA-NEXT: s_add_u32 s10, s0, 0x50 7057; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7058; GFX8-NOHSA-NEXT: s_addc_u32 s11, s1, 0 7059; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s10 7060; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s16 7061; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s17 7062; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s18 7063; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s19 7064; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s11 7065; GFX8-NOHSA-NEXT: s_add_u32 s10, s0, 64 7066; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7067; GFX8-NOHSA-NEXT: s_addc_u32 s11, s1, 0 7068; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s10 7069; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s36 7070; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s37 7071; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s20 7072; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s21 7073; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s11 7074; GFX8-NOHSA-NEXT: s_add_u32 s10, s0, 48 7075; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7076; GFX8-NOHSA-NEXT: s_addc_u32 s11, s1, 0 7077; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s10 7078; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s22 7079; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s23 7080; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s34 7081; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s35 7082; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s11 7083; GFX8-NOHSA-NEXT: s_add_u32 s10, s0, 32 7084; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7085; GFX8-NOHSA-NEXT: s_addc_u32 s11, s1, 0 7086; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s10 7087; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s26 7088; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s27 7089; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s24 7090; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s25 7091; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s11 7092; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7093; GFX8-NOHSA-NEXT: s_nop 0 7094; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s6 7095; GFX8-NOHSA-NEXT: s_add_u32 s6, s0, 16 7096; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s7 7097; GFX8-NOHSA-NEXT: s_addc_u32 s7, s1, 0 7098; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 7099; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s8 7100; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s9 7101; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 7102; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7103; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 7104; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 7105; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 7106; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s4 7107; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s5 7108; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 7109; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7110; GFX8-NOHSA-NEXT: s_endpgm 7111; 7112; EG-LABEL: constant_sextload_v16i8_to_v16i64: 7113; EG: ; %bb.0: 7114; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[] 7115; EG-NEXT: TEX 0 @12 7116; EG-NEXT: ALU 78, @15, KC0[CB0:0-32], KC1[] 7117; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T22.X, 0 7118; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T16.X, 0 7119; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T15.X, 0 7120; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T12.X, 0 7121; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T11.X, 0 7122; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T10.X, 0 7123; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T9.X, 0 7124; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T8.X, 1 7125; EG-NEXT: CF_END 7126; EG-NEXT: Fetch clause starting at 12: 7127; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 7128; EG-NEXT: ALU clause starting at 14: 7129; EG-NEXT: MOV * T7.X, KC0[2].Z, 7130; EG-NEXT: ALU clause starting at 15: 7131; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x, 7132; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7133; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7134; EG-NEXT: LSHR T9.X, PV.W, literal.x, 7135; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7136; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 7137; EG-NEXT: LSHR T10.X, PV.W, literal.x, 7138; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7139; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 7140; EG-NEXT: LSHR T11.X, PV.W, literal.x, 7141; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7142; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 7143; EG-NEXT: LSHR * T12.X, PV.W, literal.x, 7144; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 7145; EG-NEXT: BFE_INT * T13.X, T7.W, 0.0, literal.x, 7146; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 7147; EG-NEXT: BFE_INT T14.X, T7.Y, 0.0, literal.x, 7148; EG-NEXT: ASHR T13.Y, PV.X, literal.y, 7149; EG-NEXT: LSHR T0.W, T7.W, literal.x, 7150; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 7151; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 7152; EG-NEXT: 80(1.121039e-43), 0(0.000000e+00) 7153; EG-NEXT: LSHR T15.X, PS, literal.x, 7154; EG-NEXT: ASHR T14.Y, PV.X, literal.y, 7155; EG-NEXT: BFE_INT T13.Z, PV.W, 0.0, literal.z, 7156; EG-NEXT: LSHR T0.W, T7.Y, literal.z, 7157; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.w, 7158; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 7159; EG-NEXT: 8(1.121039e-44), 96(1.345247e-43) 7160; EG-NEXT: LSHR T16.X, PS, literal.x, 7161; EG-NEXT: BFE_INT T14.Z, PV.W, 0.0, literal.y, 7162; EG-NEXT: ASHR * T17.W, T7.X, literal.z, 7163; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44) 7164; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7165; EG-NEXT: BFE_INT T18.X, T7.X, 0.0, literal.x, 7166; EG-NEXT: ASHR T17.Z, T7.X, literal.y, 7167; EG-NEXT: LSHR T0.W, T7.X, literal.z, 7168; EG-NEXT: ASHR * T19.W, T7.Y, literal.w, 7169; EG-NEXT: 8(1.121039e-44), 24(3.363116e-44) 7170; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7171; EG-NEXT: BFE_INT T17.X, PV.W, 0.0, literal.x, 7172; EG-NEXT: ASHR T18.Y, PV.X, literal.y, 7173; EG-NEXT: ASHR T19.Z, T7.Y, literal.z, 7174; EG-NEXT: LSHR T0.W, T7.X, literal.x, 7175; EG-NEXT: LSHR * T1.W, T7.Y, literal.w, 7176; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 7177; EG-NEXT: 24(3.363116e-44), 16(2.242078e-44) 7178; EG-NEXT: BFE_INT T19.X, PS, 0.0, literal.x, 7179; EG-NEXT: ASHR T17.Y, PV.X, literal.y, 7180; EG-NEXT: BFE_INT T18.Z, PV.W, 0.0, literal.x, 7181; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.z, 7182; EG-NEXT: ASHR * T20.W, T7.Z, literal.y, 7183; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 7184; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 7185; EG-NEXT: BFE_INT T7.X, T7.Z, 0.0, literal.x, 7186; EG-NEXT: ASHR T19.Y, PV.X, literal.y, 7187; EG-NEXT: ASHR T20.Z, T7.Z, literal.z, 7188; EG-NEXT: LSHR T1.W, T7.Z, literal.w, 7189; EG-NEXT: ASHR * T21.W, T7.W, literal.y, 7190; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 7191; EG-NEXT: 24(3.363116e-44), 16(2.242078e-44) 7192; EG-NEXT: BFE_INT T20.X, PV.W, 0.0, literal.x, 7193; EG-NEXT: ASHR T7.Y, PV.X, literal.y, 7194; EG-NEXT: ASHR T21.Z, T7.W, literal.z, 7195; EG-NEXT: LSHR T1.W, T7.Z, literal.x, 7196; EG-NEXT: LSHR * T2.W, T7.W, literal.w, 7197; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 7198; EG-NEXT: 24(3.363116e-44), 16(2.242078e-44) 7199; EG-NEXT: BFE_INT T21.X, PS, 0.0, literal.x, 7200; EG-NEXT: ASHR T20.Y, PV.X, literal.y, 7201; EG-NEXT: BFE_INT T7.Z, PV.W, 0.0, literal.x, 7202; EG-NEXT: ASHR T18.W, T18.Z, literal.y, 7203; EG-NEXT: ASHR * T14.W, T14.Z, literal.y, 7204; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 7205; EG-NEXT: LSHR T22.X, T0.W, literal.x, 7206; EG-NEXT: ASHR T21.Y, PV.X, literal.y, 7207; EG-NEXT: ASHR T7.W, PV.Z, literal.y, 7208; EG-NEXT: ASHR * T13.W, T13.Z, literal.y, 7209; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 7210; 7211; GFX12-LABEL: constant_sextload_v16i8_to_v16i64: 7212; GFX12: ; %bb.0: 7213; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 7214; GFX12-NEXT: s_wait_kmcnt 0x0 7215; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 7216; GFX12-NEXT: s_wait_kmcnt 0x0 7217; GFX12-NEXT: s_lshr_b32 s2, s7, 16 7218; GFX12-NEXT: s_lshr_b32 s8, s7, 8 7219; GFX12-NEXT: s_mov_b32 s10, s7 7220; GFX12-NEXT: s_lshr_b32 s12, s6, 16 7221; GFX12-NEXT: s_lshr_b32 s14, s6, 24 7222; GFX12-NEXT: s_lshr_b32 s16, s6, 8 7223; GFX12-NEXT: s_bfe_i64 s[34:35], s[6:7], 0x80000 7224; GFX12-NEXT: s_ashr_i64 s[6:7], s[6:7], 56 7225; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x80000 7226; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000 7227; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000 7228; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v3, s7 7229; GFX12-NEXT: s_lshr_b32 s18, s5, 16 7230; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000 7231; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000 7232; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v5, s35 7233; GFX12-NEXT: v_dual_mov_b32 v4, s34 :: v_dual_mov_b32 v1, s3 7234; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v9, s11 7235; GFX12-NEXT: s_lshr_b32 s20, s5, 8 7236; GFX12-NEXT: s_mov_b32 s22, s5 7237; GFX12-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x80000 7238; GFX12-NEXT: v_dual_mov_b32 v8, s10 :: v_dual_mov_b32 v11, s9 7239; GFX12-NEXT: v_dual_mov_b32 v10, s8 :: v_dual_mov_b32 v13, s13 7240; GFX12-NEXT: s_lshr_b32 s24, s4, 16 7241; GFX12-NEXT: s_lshr_b32 s26, s4, 24 7242; GFX12-NEXT: s_lshr_b32 s28, s4, 8 7243; GFX12-NEXT: s_bfe_i64 s[30:31], s[4:5], 0x80000 7244; GFX12-NEXT: s_ashr_i64 s[4:5], s[4:5], 56 7245; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000 7246; GFX12-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v15, s15 7247; GFX12-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v7, s17 7248; GFX12-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000 7249; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000 7250; GFX12-NEXT: v_dual_mov_b32 v6, s16 :: v_dual_mov_b32 v17, s19 7251; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000 7252; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000 7253; GFX12-NEXT: v_dual_mov_b32 v16, s18 :: v_dual_mov_b32 v19, s5 7254; GFX12-NEXT: v_mov_b32_e32 v18, s4 7255; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x80000 7256; GFX12-NEXT: s_clause 0x1 7257; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:112 7258; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:96 7259; GFX12-NEXT: v_dual_mov_b32 v1, s23 :: v_dual_mov_b32 v0, s22 7260; GFX12-NEXT: v_dual_mov_b32 v3, s21 :: v_dual_mov_b32 v2, s20 7261; GFX12-NEXT: v_dual_mov_b32 v9, s25 :: v_dual_mov_b32 v8, s24 7262; GFX12-NEXT: v_dual_mov_b32 v11, s27 :: v_dual_mov_b32 v10, s26 7263; GFX12-NEXT: v_dual_mov_b32 v21, s31 :: v_dual_mov_b32 v20, s30 7264; GFX12-NEXT: v_dual_mov_b32 v23, s29 :: v_dual_mov_b32 v22, s28 7265; GFX12-NEXT: s_clause 0x5 7266; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:80 7267; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:64 7268; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:48 7269; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:32 7270; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:16 7271; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1] 7272; GFX12-NEXT: s_endpgm 7273 %load = load <16 x i8>, ptr addrspace(4) %in 7274 %ext = sext <16 x i8> %load to <16 x i64> 7275 store <16 x i64> %ext, ptr addrspace(1) %out 7276 ret void 7277} 7278 7279define amdgpu_kernel void @constant_zextload_v32i8_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 7280; GFX6-NOHSA-LABEL: constant_zextload_v32i8_to_v32i64: 7281; GFX6-NOHSA: ; %bb.0: 7282; GFX6-NOHSA-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 7283; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 7284; GFX6-NOHSA-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 7285; GFX6-NOHSA-NEXT: s_mov_b32 s11, 0xf000 7286; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, 0 7287; GFX6-NOHSA-NEXT: s_mov_b32 s10, -1 7288; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, v1 7289; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 7290; GFX6-NOHSA-NEXT: s_lshr_b32 s12, s0, 24 7291; GFX6-NOHSA-NEXT: s_lshr_b32 s13, s1, 24 7292; GFX6-NOHSA-NEXT: s_lshr_b32 s14, s2, 24 7293; GFX6-NOHSA-NEXT: s_lshr_b32 s15, s3, 24 7294; GFX6-NOHSA-NEXT: s_lshr_b32 s16, s4, 24 7295; GFX6-NOHSA-NEXT: s_lshr_b32 s17, s5, 24 7296; GFX6-NOHSA-NEXT: s_lshr_b32 s18, s6, 24 7297; GFX6-NOHSA-NEXT: s_lshr_b32 s19, s7, 24 7298; GFX6-NOHSA-NEXT: s_bfe_u32 s20, s7, 0x80008 7299; GFX6-NOHSA-NEXT: s_bfe_u32 s21, s6, 0x80008 7300; GFX6-NOHSA-NEXT: s_bfe_u32 s22, s5, 0x80008 7301; GFX6-NOHSA-NEXT: s_bfe_u32 s23, s4, 0x80008 7302; GFX6-NOHSA-NEXT: s_bfe_u32 s24, s3, 0x80008 7303; GFX6-NOHSA-NEXT: s_bfe_u32 s25, s2, 0x80008 7304; GFX6-NOHSA-NEXT: s_bfe_u32 s26, s1, 0x80008 7305; GFX6-NOHSA-NEXT: s_bfe_u32 s27, s0, 0x80008 7306; GFX6-NOHSA-NEXT: s_and_b32 s28, s0, 0xff 7307; GFX6-NOHSA-NEXT: s_and_b32 s29, s1, 0xff 7308; GFX6-NOHSA-NEXT: s_and_b32 s30, s2, 0xff 7309; GFX6-NOHSA-NEXT: s_and_b32 s31, s3, 0xff 7310; GFX6-NOHSA-NEXT: s_and_b32 s33, s4, 0xff 7311; GFX6-NOHSA-NEXT: s_and_b32 s34, s5, 0xff 7312; GFX6-NOHSA-NEXT: s_and_b32 s35, s6, 0xff 7313; GFX6-NOHSA-NEXT: s_and_b32 s36, s7, 0xff 7314; GFX6-NOHSA-NEXT: s_bfe_u32 s0, s0, 0x80010 7315; GFX6-NOHSA-NEXT: s_bfe_u32 s1, s1, 0x80010 7316; GFX6-NOHSA-NEXT: s_bfe_u32 s2, s2, 0x80010 7317; GFX6-NOHSA-NEXT: s_bfe_u32 s3, s3, 0x80010 7318; GFX6-NOHSA-NEXT: s_bfe_u32 s4, s4, 0x80010 7319; GFX6-NOHSA-NEXT: s_bfe_u32 s5, s5, 0x80010 7320; GFX6-NOHSA-NEXT: s_bfe_u32 s7, s7, 0x80010 7321; GFX6-NOHSA-NEXT: s_bfe_u32 s6, s6, 0x80010 7322; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s7 7323; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s19 7324; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:240 7325; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 7326; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s6 7327; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s18 7328; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:208 7329; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 7330; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s5 7331; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s17 7332; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:176 7333; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 7334; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 7335; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s16 7336; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:144 7337; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 7338; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s3 7339; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s15 7340; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:112 7341; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 7342; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s2 7343; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s14 7344; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:80 7345; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 7346; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s1 7347; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s13 7348; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48 7349; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 7350; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s0 7351; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s12 7352; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16 7353; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 7354; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s36 7355; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s20 7356; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:224 7357; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 7358; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s35 7359; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s21 7360; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:192 7361; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 7362; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s34 7363; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s22 7364; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:160 7365; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 7366; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s33 7367; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s23 7368; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:128 7369; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 7370; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s31 7371; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s24 7372; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:96 7373; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 7374; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s30 7375; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s25 7376; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:64 7377; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 7378; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s29 7379; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s26 7380; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:32 7381; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 7382; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s28 7383; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s27 7384; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 7385; GFX6-NOHSA-NEXT: s_endpgm 7386; 7387; GFX7-HSA-LABEL: constant_zextload_v32i8_to_v32i64: 7388; GFX7-HSA: ; %bb.0: 7389; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 7390; GFX7-HSA-NEXT: v_mov_b32_e32 v1, 0 7391; GFX7-HSA-NEXT: v_mov_b32_e32 v3, v1 7392; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 7393; GFX7-HSA-NEXT: s_load_dwordx8 s[8:15], s[2:3], 0x0 7394; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 7395; GFX7-HSA-NEXT: s_lshr_b32 s16, s8, 24 7396; GFX7-HSA-NEXT: s_lshr_b32 s17, s9, 24 7397; GFX7-HSA-NEXT: s_lshr_b32 s18, s10, 24 7398; GFX7-HSA-NEXT: s_lshr_b32 s19, s11, 24 7399; GFX7-HSA-NEXT: s_lshr_b32 s20, s12, 24 7400; GFX7-HSA-NEXT: s_lshr_b32 s21, s13, 24 7401; GFX7-HSA-NEXT: s_lshr_b32 s22, s14, 24 7402; GFX7-HSA-NEXT: s_lshr_b32 s23, s15, 24 7403; GFX7-HSA-NEXT: s_bfe_u32 s24, s15, 0x80008 7404; GFX7-HSA-NEXT: s_bfe_u32 s25, s14, 0x80008 7405; GFX7-HSA-NEXT: s_bfe_u32 s26, s13, 0x80008 7406; GFX7-HSA-NEXT: s_bfe_u32 s27, s12, 0x80008 7407; GFX7-HSA-NEXT: s_bfe_u32 s28, s11, 0x80008 7408; GFX7-HSA-NEXT: s_bfe_u32 s29, s10, 0x80008 7409; GFX7-HSA-NEXT: s_bfe_u32 s4, s9, 0x80008 7410; GFX7-HSA-NEXT: s_bfe_u32 s2, s8, 0x80008 7411; GFX7-HSA-NEXT: s_and_b32 s3, s8, 0xff 7412; GFX7-HSA-NEXT: s_and_b32 s5, s9, 0xff 7413; GFX7-HSA-NEXT: s_and_b32 s30, s10, 0xff 7414; GFX7-HSA-NEXT: s_and_b32 s31, s11, 0xff 7415; GFX7-HSA-NEXT: s_and_b32 s33, s12, 0xff 7416; GFX7-HSA-NEXT: s_and_b32 s34, s13, 0xff 7417; GFX7-HSA-NEXT: s_and_b32 s35, s14, 0xff 7418; GFX7-HSA-NEXT: s_and_b32 s36, s15, 0xff 7419; GFX7-HSA-NEXT: s_bfe_u32 s8, s8, 0x80010 7420; GFX7-HSA-NEXT: s_bfe_u32 s9, s9, 0x80010 7421; GFX7-HSA-NEXT: s_bfe_u32 s10, s10, 0x80010 7422; GFX7-HSA-NEXT: s_bfe_u32 s11, s11, 0x80010 7423; GFX7-HSA-NEXT: s_bfe_u32 s12, s12, 0x80010 7424; GFX7-HSA-NEXT: s_bfe_u32 s13, s13, 0x80010 7425; GFX7-HSA-NEXT: s_bfe_u32 s14, s14, 0x80010 7426; GFX7-HSA-NEXT: s_bfe_u32 s15, s15, 0x80010 7427; GFX7-HSA-NEXT: s_add_u32 s6, s0, 0xf0 7428; GFX7-HSA-NEXT: s_addc_u32 s7, s1, 0 7429; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s6 7430; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s7 7431; GFX7-HSA-NEXT: s_add_u32 s6, s0, 0xd0 7432; GFX7-HSA-NEXT: s_addc_u32 s7, s1, 0 7433; GFX7-HSA-NEXT: v_mov_b32_e32 v6, s6 7434; GFX7-HSA-NEXT: v_mov_b32_e32 v7, s7 7435; GFX7-HSA-NEXT: s_add_u32 s6, s0, 0xb0 7436; GFX7-HSA-NEXT: s_addc_u32 s7, s1, 0 7437; GFX7-HSA-NEXT: v_mov_b32_e32 v9, s7 7438; GFX7-HSA-NEXT: v_mov_b32_e32 v8, s6 7439; GFX7-HSA-NEXT: s_add_u32 s6, s0, 0x90 7440; GFX7-HSA-NEXT: s_addc_u32 s7, s1, 0 7441; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s15 7442; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s23 7443; GFX7-HSA-NEXT: v_mov_b32_e32 v11, s7 7444; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7445; GFX7-HSA-NEXT: v_mov_b32_e32 v10, s6 7446; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s14 7447; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s22 7448; GFX7-HSA-NEXT: flat_store_dwordx4 v[6:7], v[0:3] 7449; GFX7-HSA-NEXT: s_add_u32 s6, s0, 0x70 7450; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s13 7451; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s21 7452; GFX7-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 7453; GFX7-HSA-NEXT: s_addc_u32 s7, s1, 0 7454; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s12 7455; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s20 7456; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s6 7457; GFX7-HSA-NEXT: flat_store_dwordx4 v[10:11], v[0:3] 7458; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s7 7459; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s11 7460; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s19 7461; GFX7-HSA-NEXT: s_add_u32 s6, s0, 0x50 7462; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7463; GFX7-HSA-NEXT: s_addc_u32 s7, s1, 0 7464; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s6 7465; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s10 7466; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s18 7467; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s7 7468; GFX7-HSA-NEXT: s_add_u32 s6, s0, 48 7469; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7470; GFX7-HSA-NEXT: s_addc_u32 s7, s1, 0 7471; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s6 7472; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s9 7473; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s17 7474; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s7 7475; GFX7-HSA-NEXT: s_add_u32 s6, s0, 16 7476; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7477; GFX7-HSA-NEXT: s_addc_u32 s7, s1, 0 7478; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s6 7479; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s8 7480; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s16 7481; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s7 7482; GFX7-HSA-NEXT: s_add_u32 s6, s0, 0xe0 7483; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7484; GFX7-HSA-NEXT: s_addc_u32 s7, s1, 0 7485; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s6 7486; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s36 7487; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s24 7488; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s7 7489; GFX7-HSA-NEXT: s_add_u32 s6, s0, 0xc0 7490; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7491; GFX7-HSA-NEXT: s_addc_u32 s7, s1, 0 7492; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s6 7493; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s35 7494; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s25 7495; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s7 7496; GFX7-HSA-NEXT: s_add_u32 s6, s0, 0xa0 7497; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7498; GFX7-HSA-NEXT: s_addc_u32 s7, s1, 0 7499; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s6 7500; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s34 7501; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s26 7502; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s7 7503; GFX7-HSA-NEXT: s_add_u32 s6, s0, 0x80 7504; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7505; GFX7-HSA-NEXT: s_addc_u32 s7, s1, 0 7506; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s6 7507; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s33 7508; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s27 7509; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s7 7510; GFX7-HSA-NEXT: s_add_u32 s6, s0, 0x60 7511; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7512; GFX7-HSA-NEXT: s_addc_u32 s7, s1, 0 7513; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s6 7514; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s31 7515; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s28 7516; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s7 7517; GFX7-HSA-NEXT: s_add_u32 s6, s0, 64 7518; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7519; GFX7-HSA-NEXT: s_addc_u32 s7, s1, 0 7520; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s6 7521; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s30 7522; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s29 7523; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s7 7524; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7525; GFX7-HSA-NEXT: s_nop 0 7526; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s4 7527; GFX7-HSA-NEXT: s_add_u32 s4, s0, 32 7528; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s5 7529; GFX7-HSA-NEXT: s_addc_u32 s5, s1, 0 7530; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s4 7531; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s5 7532; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7533; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 7534; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s3 7535; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s2 7536; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 7537; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7538; GFX7-HSA-NEXT: s_endpgm 7539; 7540; GFX8-NOHSA-LABEL: constant_zextload_v32i8_to_v32i64: 7541; GFX8-NOHSA: ; %bb.0: 7542; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 7543; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, 0 7544; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, v1 7545; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 7546; GFX8-NOHSA-NEXT: s_load_dwordx8 s[8:15], s[2:3], 0x0 7547; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 7548; GFX8-NOHSA-NEXT: s_lshr_b32 s16, s9, 24 7549; GFX8-NOHSA-NEXT: s_lshr_b32 s17, s11, 24 7550; GFX8-NOHSA-NEXT: s_lshr_b32 s18, s13, 24 7551; GFX8-NOHSA-NEXT: s_lshr_b32 s6, s15, 24 7552; GFX8-NOHSA-NEXT: s_bfe_u32 s19, s15, 0x80008 7553; GFX8-NOHSA-NEXT: s_lshr_b32 s20, s14, 24 7554; GFX8-NOHSA-NEXT: s_bfe_u32 s21, s14, 0x80008 7555; GFX8-NOHSA-NEXT: s_bfe_u32 s22, s13, 0x80008 7556; GFX8-NOHSA-NEXT: s_lshr_b32 s23, s12, 24 7557; GFX8-NOHSA-NEXT: s_bfe_u32 s24, s12, 0x80008 7558; GFX8-NOHSA-NEXT: s_bfe_u32 s25, s11, 0x80008 7559; GFX8-NOHSA-NEXT: s_lshr_b32 s26, s10, 24 7560; GFX8-NOHSA-NEXT: s_bfe_u32 s27, s10, 0x80008 7561; GFX8-NOHSA-NEXT: s_bfe_u32 s28, s9, 0x80008 7562; GFX8-NOHSA-NEXT: s_lshr_b32 s4, s8, 24 7563; GFX8-NOHSA-NEXT: s_bfe_u32 s2, s8, 0x80008 7564; GFX8-NOHSA-NEXT: s_and_b32 s3, s8, 0xff 7565; GFX8-NOHSA-NEXT: s_bfe_u32 s5, s8, 0x80010 7566; GFX8-NOHSA-NEXT: s_and_b32 s8, s9, 0xff 7567; GFX8-NOHSA-NEXT: s_and_b32 s29, s10, 0xff 7568; GFX8-NOHSA-NEXT: s_bfe_u32 s10, s10, 0x80010 7569; GFX8-NOHSA-NEXT: s_and_b32 s30, s11, 0xff 7570; GFX8-NOHSA-NEXT: s_and_b32 s31, s12, 0xff 7571; GFX8-NOHSA-NEXT: s_bfe_u32 s12, s12, 0x80010 7572; GFX8-NOHSA-NEXT: s_and_b32 s33, s13, 0xff 7573; GFX8-NOHSA-NEXT: s_and_b32 s34, s14, 0xff 7574; GFX8-NOHSA-NEXT: s_bfe_u32 s14, s14, 0x80010 7575; GFX8-NOHSA-NEXT: s_and_b32 s35, s15, 0xff 7576; GFX8-NOHSA-NEXT: s_bfe_u32 s9, s9, 0x80010 7577; GFX8-NOHSA-NEXT: s_bfe_u32 s11, s11, 0x80010 7578; GFX8-NOHSA-NEXT: s_bfe_u32 s13, s13, 0x80010 7579; GFX8-NOHSA-NEXT: s_bfe_u32 s7, s15, 0x80010 7580; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s6 7581; GFX8-NOHSA-NEXT: s_add_u32 s6, s0, 0xf0 7582; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s7 7583; GFX8-NOHSA-NEXT: s_addc_u32 s7, s1, 0 7584; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 7585; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 7586; GFX8-NOHSA-NEXT: s_add_u32 s6, s0, 0xb0 7587; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7588; GFX8-NOHSA-NEXT: s_addc_u32 s7, s1, 0 7589; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 7590; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s13 7591; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s18 7592; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 7593; GFX8-NOHSA-NEXT: s_add_u32 s6, s0, 0x70 7594; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7595; GFX8-NOHSA-NEXT: s_addc_u32 s7, s1, 0 7596; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 7597; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s11 7598; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s17 7599; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 7600; GFX8-NOHSA-NEXT: s_add_u32 s6, s0, 48 7601; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7602; GFX8-NOHSA-NEXT: s_addc_u32 s7, s1, 0 7603; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 7604; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s9 7605; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s16 7606; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 7607; GFX8-NOHSA-NEXT: s_add_u32 s6, s0, 0xe0 7608; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7609; GFX8-NOHSA-NEXT: s_addc_u32 s7, s1, 0 7610; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 7611; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s35 7612; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s19 7613; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 7614; GFX8-NOHSA-NEXT: s_add_u32 s6, s0, 0xd0 7615; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7616; GFX8-NOHSA-NEXT: s_addc_u32 s7, s1, 0 7617; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 7618; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s14 7619; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s20 7620; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 7621; GFX8-NOHSA-NEXT: s_add_u32 s6, s0, 0xc0 7622; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7623; GFX8-NOHSA-NEXT: s_addc_u32 s7, s1, 0 7624; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 7625; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s34 7626; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s21 7627; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 7628; GFX8-NOHSA-NEXT: s_add_u32 s6, s0, 0xa0 7629; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7630; GFX8-NOHSA-NEXT: s_addc_u32 s7, s1, 0 7631; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 7632; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s33 7633; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s22 7634; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 7635; GFX8-NOHSA-NEXT: s_add_u32 s6, s0, 0x90 7636; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7637; GFX8-NOHSA-NEXT: s_addc_u32 s7, s1, 0 7638; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 7639; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s12 7640; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s23 7641; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 7642; GFX8-NOHSA-NEXT: s_add_u32 s6, s0, 0x80 7643; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7644; GFX8-NOHSA-NEXT: s_addc_u32 s7, s1, 0 7645; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 7646; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s31 7647; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s24 7648; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 7649; GFX8-NOHSA-NEXT: s_add_u32 s6, s0, 0x60 7650; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7651; GFX8-NOHSA-NEXT: s_addc_u32 s7, s1, 0 7652; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 7653; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s30 7654; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s25 7655; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 7656; GFX8-NOHSA-NEXT: s_add_u32 s6, s0, 0x50 7657; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7658; GFX8-NOHSA-NEXT: s_addc_u32 s7, s1, 0 7659; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 7660; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s10 7661; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s26 7662; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 7663; GFX8-NOHSA-NEXT: s_add_u32 s6, s0, 64 7664; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7665; GFX8-NOHSA-NEXT: s_addc_u32 s7, s1, 0 7666; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 7667; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s29 7668; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s27 7669; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 7670; GFX8-NOHSA-NEXT: s_add_u32 s6, s0, 32 7671; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7672; GFX8-NOHSA-NEXT: s_addc_u32 s7, s1, 0 7673; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 7674; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s8 7675; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s28 7676; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 7677; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7678; GFX8-NOHSA-NEXT: s_nop 0 7679; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s4 7680; GFX8-NOHSA-NEXT: s_add_u32 s4, s0, 16 7681; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s5 7682; GFX8-NOHSA-NEXT: s_addc_u32 s5, s1, 0 7683; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s4 7684; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s5 7685; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7686; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 7687; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s3 7688; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s2 7689; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 7690; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7691; GFX8-NOHSA-NEXT: s_endpgm 7692; 7693; EG-LABEL: constant_zextload_v32i8_to_v32i64: 7694; EG: ; %bb.0: 7695; EG-NEXT: ALU 0, @26, KC0[CB0:0-32], KC1[] 7696; EG-NEXT: TEX 1 @22 7697; EG-NEXT: ALU 103, @27, KC0[CB0:0-32], KC1[] 7698; EG-NEXT: ALU 33, @131, KC0[CB0:0-32], KC1[] 7699; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T42.X, 0 7700; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T41.X, 0 7701; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T40.X, 0 7702; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T39.X, 0 7703; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T38.X, 0 7704; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T37.X, 0 7705; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T36.X, 0 7706; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T35.X, 0 7707; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T34.X, 0 7708; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T33.X, 0 7709; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T32.X, 0 7710; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T31.X, 0 7711; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T30.X, 0 7712; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T29.X, 0 7713; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T28.X, 0 7714; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T27.X, 1 7715; EG-NEXT: CF_END 7716; EG-NEXT: PAD 7717; EG-NEXT: Fetch clause starting at 22: 7718; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 0, #1 7719; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 16, #1 7720; EG-NEXT: ALU clause starting at 26: 7721; EG-NEXT: MOV * T11.X, KC0[2].Z, 7722; EG-NEXT: ALU clause starting at 27: 7723; EG-NEXT: MOV * T0.W, literal.x, 7724; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 7725; EG-NEXT: BFE_UINT T13.X, T11.W, literal.x, PV.W, 7726; EG-NEXT: LSHR * T13.Z, T11.W, literal.y, 7727; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) 7728; EG-NEXT: MOV T13.Y, 0.0, 7729; EG-NEXT: BFE_UINT * T14.Z, T11.W, literal.x, T0.W, 7730; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 7731; EG-NEXT: AND_INT T14.X, T11.W, literal.x, 7732; EG-NEXT: MOV * T14.Y, 0.0, 7733; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 7734; EG-NEXT: BFE_UINT T15.X, T11.Z, literal.x, T0.W, 7735; EG-NEXT: LSHR * T15.Z, T11.Z, literal.y, 7736; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) 7737; EG-NEXT: MOV T15.Y, 0.0, 7738; EG-NEXT: BFE_UINT * T16.Z, T11.Z, literal.x, T0.W, 7739; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 7740; EG-NEXT: AND_INT T16.X, T11.Z, literal.x, 7741; EG-NEXT: MOV * T16.Y, 0.0, 7742; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 7743; EG-NEXT: BFE_UINT T17.X, T11.Y, literal.x, T0.W, 7744; EG-NEXT: LSHR * T17.Z, T11.Y, literal.y, 7745; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) 7746; EG-NEXT: MOV T17.Y, 0.0, 7747; EG-NEXT: BFE_UINT * T18.Z, T11.Y, literal.x, T0.W, 7748; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 7749; EG-NEXT: AND_INT T18.X, T11.Y, literal.x, 7750; EG-NEXT: MOV * T18.Y, 0.0, 7751; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 7752; EG-NEXT: BFE_UINT T19.X, T11.X, literal.x, T0.W, 7753; EG-NEXT: LSHR * T19.Z, T11.X, literal.y, 7754; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) 7755; EG-NEXT: MOV T19.Y, 0.0, 7756; EG-NEXT: BFE_UINT * T11.Z, T11.X, literal.x, T0.W, 7757; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 7758; EG-NEXT: AND_INT T11.X, T11.X, literal.x, 7759; EG-NEXT: MOV * T11.Y, 0.0, 7760; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 7761; EG-NEXT: BFE_UINT T20.X, T12.W, literal.x, T0.W, 7762; EG-NEXT: LSHR * T20.Z, T12.W, literal.y, 7763; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) 7764; EG-NEXT: MOV T20.Y, 0.0, 7765; EG-NEXT: BFE_UINT * T21.Z, T12.W, literal.x, T0.W, 7766; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 7767; EG-NEXT: AND_INT T21.X, T12.W, literal.x, 7768; EG-NEXT: MOV * T21.Y, 0.0, 7769; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 7770; EG-NEXT: BFE_UINT T22.X, T12.Z, literal.x, T0.W, 7771; EG-NEXT: LSHR * T22.Z, T12.Z, literal.y, 7772; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) 7773; EG-NEXT: MOV T22.Y, 0.0, 7774; EG-NEXT: BFE_UINT * T23.Z, T12.Z, literal.x, T0.W, 7775; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 7776; EG-NEXT: AND_INT T23.X, T12.Z, literal.x, 7777; EG-NEXT: MOV * T23.Y, 0.0, 7778; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 7779; EG-NEXT: BFE_UINT T24.X, T12.Y, literal.x, T0.W, 7780; EG-NEXT: LSHR * T24.Z, T12.Y, literal.y, 7781; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) 7782; EG-NEXT: MOV T24.Y, 0.0, 7783; EG-NEXT: BFE_UINT * T25.Z, T12.Y, literal.x, T0.W, 7784; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 7785; EG-NEXT: AND_INT T25.X, T12.Y, literal.x, 7786; EG-NEXT: MOV * T25.Y, 0.0, 7787; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 7788; EG-NEXT: BFE_UINT T26.X, T12.X, literal.x, T0.W, 7789; EG-NEXT: LSHR * T26.Z, T12.X, literal.y, 7790; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) 7791; EG-NEXT: MOV T26.Y, 0.0, 7792; EG-NEXT: BFE_UINT * T12.Z, T12.X, literal.x, T0.W, 7793; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 7794; EG-NEXT: AND_INT T12.X, T12.X, literal.x, 7795; EG-NEXT: MOV T12.Y, 0.0, 7796; EG-NEXT: MOV T13.W, 0.0, 7797; EG-NEXT: MOV * T14.W, 0.0, 7798; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 7799; EG-NEXT: MOV T15.W, 0.0, 7800; EG-NEXT: MOV * T16.W, 0.0, 7801; EG-NEXT: MOV T17.W, 0.0, 7802; EG-NEXT: MOV * T18.W, 0.0, 7803; EG-NEXT: MOV T19.W, 0.0, 7804; EG-NEXT: MOV * T11.W, 0.0, 7805; EG-NEXT: MOV T20.W, 0.0, 7806; EG-NEXT: MOV * T21.W, 0.0, 7807; EG-NEXT: MOV T22.W, 0.0, 7808; EG-NEXT: MOV * T23.W, 0.0, 7809; EG-NEXT: MOV T24.W, 0.0, 7810; EG-NEXT: MOV * T25.W, 0.0, 7811; EG-NEXT: MOV T26.W, 0.0, 7812; EG-NEXT: MOV * T12.W, 0.0, 7813; EG-NEXT: LSHR T27.X, KC0[2].Y, literal.x, 7814; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7815; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7816; EG-NEXT: LSHR T28.X, PV.W, literal.x, 7817; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7818; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 7819; EG-NEXT: LSHR T29.X, PV.W, literal.x, 7820; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7821; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 7822; EG-NEXT: LSHR T30.X, PV.W, literal.x, 7823; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7824; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 7825; EG-NEXT: LSHR * T31.X, PV.W, literal.x, 7826; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 7827; EG-NEXT: ALU clause starting at 131: 7828; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 7829; EG-NEXT: 80(1.121039e-43), 0(0.000000e+00) 7830; EG-NEXT: LSHR T32.X, PV.W, literal.x, 7831; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7832; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 7833; EG-NEXT: LSHR T33.X, PV.W, literal.x, 7834; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7835; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 7836; EG-NEXT: LSHR T34.X, PV.W, literal.x, 7837; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7838; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 7839; EG-NEXT: LSHR T35.X, PV.W, literal.x, 7840; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7841; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 7842; EG-NEXT: LSHR T36.X, PV.W, literal.x, 7843; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7844; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 7845; EG-NEXT: LSHR T37.X, PV.W, literal.x, 7846; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7847; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 7848; EG-NEXT: LSHR T38.X, PV.W, literal.x, 7849; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7850; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43) 7851; EG-NEXT: LSHR T39.X, PV.W, literal.x, 7852; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7853; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43) 7854; EG-NEXT: LSHR T40.X, PV.W, literal.x, 7855; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7856; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43) 7857; EG-NEXT: LSHR T41.X, PV.W, literal.x, 7858; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7859; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43) 7860; EG-NEXT: LSHR * T42.X, PV.W, literal.x, 7861; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 7862; 7863; GFX12-LABEL: constant_zextload_v32i8_to_v32i64: 7864; GFX12: ; %bb.0: 7865; GFX12-NEXT: s_load_b128 s[8:11], s[4:5], 0x24 7866; GFX12-NEXT: s_wait_kmcnt 0x0 7867; GFX12-NEXT: s_load_b256 s[0:7], s[10:11], 0x0 7868; GFX12-NEXT: s_wait_kmcnt 0x0 7869; GFX12-NEXT: s_bfe_u32 s10, s7, 0x80010 7870; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) 7871; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s10 7872; GFX12-NEXT: s_lshr_b32 s11, s7, 24 7873; GFX12-NEXT: s_lshr_b32 s10, s5, 24 7874; GFX12-NEXT: s_wait_alu 0xfffe 7875; GFX12-NEXT: v_dual_mov_b32 v2, s11 :: v_dual_mov_b32 v3, v1 7876; GFX12-NEXT: s_bfe_u32 s11, s5, 0x80010 7877; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:240 7878; GFX12-NEXT: s_wait_alu 0xfffe 7879; GFX12-NEXT: v_mov_b32_e32 v0, s11 7880; GFX12-NEXT: v_mov_b32_e32 v2, s10 7881; GFX12-NEXT: s_lshr_b32 s10, s3, 24 7882; GFX12-NEXT: s_bfe_u32 s11, s3, 0x80010 7883; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:176 7884; GFX12-NEXT: s_wait_alu 0xfffe 7885; GFX12-NEXT: v_mov_b32_e32 v0, s11 7886; GFX12-NEXT: v_mov_b32_e32 v2, s10 7887; GFX12-NEXT: s_lshr_b32 s10, s1, 24 7888; GFX12-NEXT: s_bfe_u32 s11, s1, 0x80010 7889; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:112 7890; GFX12-NEXT: s_wait_alu 0xfffe 7891; GFX12-NEXT: v_mov_b32_e32 v0, s11 7892; GFX12-NEXT: v_mov_b32_e32 v2, s10 7893; GFX12-NEXT: s_bfe_u32 s10, s7, 0x80008 7894; GFX12-NEXT: s_and_b32 s7, s7, 0xff 7895; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:48 7896; GFX12-NEXT: v_mov_b32_e32 v0, s7 7897; GFX12-NEXT: s_wait_alu 0xfffe 7898; GFX12-NEXT: v_mov_b32_e32 v2, s10 7899; GFX12-NEXT: s_lshr_b32 s7, s6, 24 7900; GFX12-NEXT: s_bfe_u32 s10, s6, 0x80010 7901; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:224 7902; GFX12-NEXT: s_wait_alu 0xfffe 7903; GFX12-NEXT: v_mov_b32_e32 v0, s10 7904; GFX12-NEXT: v_mov_b32_e32 v2, s7 7905; GFX12-NEXT: s_bfe_u32 s7, s6, 0x80008 7906; GFX12-NEXT: s_and_b32 s6, s6, 0xff 7907; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:208 7908; GFX12-NEXT: s_wait_alu 0xfffe 7909; GFX12-NEXT: v_mov_b32_e32 v0, s6 7910; GFX12-NEXT: v_mov_b32_e32 v2, s7 7911; GFX12-NEXT: s_bfe_u32 s6, s5, 0x80008 7912; GFX12-NEXT: s_and_b32 s5, s5, 0xff 7913; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:192 7914; GFX12-NEXT: v_mov_b32_e32 v0, s5 7915; GFX12-NEXT: s_wait_alu 0xfffe 7916; GFX12-NEXT: v_mov_b32_e32 v2, s6 7917; GFX12-NEXT: s_lshr_b32 s5, s4, 24 7918; GFX12-NEXT: s_bfe_u32 s6, s4, 0x80010 7919; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:160 7920; GFX12-NEXT: s_wait_alu 0xfffe 7921; GFX12-NEXT: v_mov_b32_e32 v0, s6 7922; GFX12-NEXT: v_mov_b32_e32 v2, s5 7923; GFX12-NEXT: s_bfe_u32 s5, s4, 0x80008 7924; GFX12-NEXT: s_and_b32 s4, s4, 0xff 7925; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:144 7926; GFX12-NEXT: s_wait_alu 0xfffe 7927; GFX12-NEXT: v_mov_b32_e32 v0, s4 7928; GFX12-NEXT: v_mov_b32_e32 v2, s5 7929; GFX12-NEXT: s_bfe_u32 s4, s3, 0x80008 7930; GFX12-NEXT: s_and_b32 s3, s3, 0xff 7931; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:128 7932; GFX12-NEXT: v_mov_b32_e32 v0, s3 7933; GFX12-NEXT: s_wait_alu 0xfffe 7934; GFX12-NEXT: v_mov_b32_e32 v2, s4 7935; GFX12-NEXT: s_lshr_b32 s3, s2, 24 7936; GFX12-NEXT: s_bfe_u32 s4, s2, 0x80010 7937; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:96 7938; GFX12-NEXT: s_wait_alu 0xfffe 7939; GFX12-NEXT: v_mov_b32_e32 v0, s4 7940; GFX12-NEXT: v_mov_b32_e32 v2, s3 7941; GFX12-NEXT: s_bfe_u32 s3, s2, 0x80008 7942; GFX12-NEXT: s_and_b32 s2, s2, 0xff 7943; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:80 7944; GFX12-NEXT: s_wait_alu 0xfffe 7945; GFX12-NEXT: v_mov_b32_e32 v0, s2 7946; GFX12-NEXT: v_mov_b32_e32 v2, s3 7947; GFX12-NEXT: s_bfe_u32 s2, s1, 0x80008 7948; GFX12-NEXT: s_and_b32 s1, s1, 0xff 7949; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:64 7950; GFX12-NEXT: v_mov_b32_e32 v0, s1 7951; GFX12-NEXT: s_wait_alu 0xfffe 7952; GFX12-NEXT: v_mov_b32_e32 v2, s2 7953; GFX12-NEXT: s_lshr_b32 s1, s0, 24 7954; GFX12-NEXT: s_bfe_u32 s2, s0, 0x80010 7955; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:32 7956; GFX12-NEXT: s_wait_alu 0xfffe 7957; GFX12-NEXT: v_mov_b32_e32 v0, s2 7958; GFX12-NEXT: v_mov_b32_e32 v2, s1 7959; GFX12-NEXT: s_bfe_u32 s1, s0, 0x80008 7960; GFX12-NEXT: s_and_b32 s0, s0, 0xff 7961; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:16 7962; GFX12-NEXT: s_wait_alu 0xfffe 7963; GFX12-NEXT: v_mov_b32_e32 v0, s0 7964; GFX12-NEXT: v_mov_b32_e32 v2, s1 7965; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] 7966; GFX12-NEXT: s_endpgm 7967 %load = load <32 x i8>, ptr addrspace(4) %in 7968 %ext = zext <32 x i8> %load to <32 x i64> 7969 store <32 x i64> %ext, ptr addrspace(1) %out 7970 ret void 7971} 7972 7973define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 7974; GFX6-NOHSA-LABEL: constant_sextload_v32i8_to_v32i64: 7975; GFX6-NOHSA: ; %bb.0: 7976; GFX6-NOHSA-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 7977; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 7978; GFX6-NOHSA-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 7979; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 7980; GFX6-NOHSA-NEXT: s_lshr_b32 s10, s7, 16 7981; GFX6-NOHSA-NEXT: s_lshr_b32 s28, s7, 8 7982; GFX6-NOHSA-NEXT: s_mov_b32 s38, s7 7983; GFX6-NOHSA-NEXT: s_lshr_b32 s40, s6, 16 7984; GFX6-NOHSA-NEXT: s_lshr_b32 s30, s6, 24 7985; GFX6-NOHSA-NEXT: s_lshr_b32 s36, s6, 8 7986; GFX6-NOHSA-NEXT: s_lshr_b32 s12, s5, 16 7987; GFX6-NOHSA-NEXT: s_lshr_b32 s14, s5, 8 7988; GFX6-NOHSA-NEXT: s_mov_b32 s42, s5 7989; GFX6-NOHSA-NEXT: s_lshr_b32 s16, s4, 16 7990; GFX6-NOHSA-NEXT: s_lshr_b32 s18, s4, 24 7991; GFX6-NOHSA-NEXT: s_lshr_b32 s20, s4, 8 7992; GFX6-NOHSA-NEXT: s_lshr_b32 s22, s3, 16 7993; GFX6-NOHSA-NEXT: s_lshr_b32 s24, s3, 8 7994; GFX6-NOHSA-NEXT: s_mov_b32 s34, s3 7995; GFX6-NOHSA-NEXT: s_lshr_b32 s26, s2, 16 7996; GFX6-NOHSA-NEXT: s_bfe_i64 s[58:59], s[42:43], 0x80000 7997; GFX6-NOHSA-NEXT: s_bfe_i64 s[60:61], s[38:39], 0x80000 7998; GFX6-NOHSA-NEXT: s_bfe_i64 s[62:63], s[10:11], 0x80000 7999; GFX6-NOHSA-NEXT: s_lshr_b32 s42, s2, 24 8000; GFX6-NOHSA-NEXT: s_lshr_b32 s44, s2, 8 8001; GFX6-NOHSA-NEXT: s_lshr_b32 s46, s1, 16 8002; GFX6-NOHSA-NEXT: s_lshr_b32 s48, s1, 8 8003; GFX6-NOHSA-NEXT: s_mov_b32 s54, s1 8004; GFX6-NOHSA-NEXT: s_lshr_b32 s50, s0, 16 8005; GFX6-NOHSA-NEXT: s_lshr_b32 s52, s0, 24 8006; GFX6-NOHSA-NEXT: s_lshr_b32 s56, s0, 8 8007; GFX6-NOHSA-NEXT: s_bfe_i64 s[10:11], s[0:1], 0x80000 8008; GFX6-NOHSA-NEXT: s_ashr_i64 s[38:39], s[0:1], 56 8009; GFX6-NOHSA-NEXT: s_ashr_i64 s[64:65], s[2:3], 56 8010; GFX6-NOHSA-NEXT: s_bfe_i64 s[66:67], s[4:5], 0x80000 8011; GFX6-NOHSA-NEXT: s_ashr_i64 s[68:69], s[4:5], 56 8012; GFX6-NOHSA-NEXT: s_bfe_i64 s[70:71], s[6:7], 0x80000 8013; GFX6-NOHSA-NEXT: s_ashr_i64 s[6:7], s[6:7], 56 8014; GFX6-NOHSA-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x80000 8015; GFX6-NOHSA-NEXT: s_mov_b32 s0, s8 8016; GFX6-NOHSA-NEXT: s_mov_b32 s1, s9 8017; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s6 8018; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s7 8019; GFX6-NOHSA-NEXT: v_mov_b32_e32 v4, s60 8020; GFX6-NOHSA-NEXT: v_mov_b32_e32 v5, s61 8021; GFX6-NOHSA-NEXT: v_mov_b32_e32 v8, s70 8022; GFX6-NOHSA-NEXT: v_mov_b32_e32 v9, s71 8023; GFX6-NOHSA-NEXT: v_mov_b32_e32 v12, s68 8024; GFX6-NOHSA-NEXT: v_mov_b32_e32 v13, s69 8025; GFX6-NOHSA-NEXT: v_mov_b32_e32 v14, s58 8026; GFX6-NOHSA-NEXT: v_mov_b32_e32 v15, s59 8027; GFX6-NOHSA-NEXT: v_mov_b32_e32 v18, s66 8028; GFX6-NOHSA-NEXT: v_mov_b32_e32 v19, s67 8029; GFX6-NOHSA-NEXT: v_mov_b32_e32 v22, s64 8030; GFX6-NOHSA-NEXT: v_mov_b32_e32 v23, s65 8031; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s62 8032; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s63 8033; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 8034; GFX6-NOHSA-NEXT: s_bfe_i64 s[6:7], s[40:41], 0x80000 8035; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 8036; GFX6-NOHSA-NEXT: v_mov_b32_e32 v24, s6 8037; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 8038; GFX6-NOHSA-NEXT: s_bfe_i64 s[8:9], s[36:37], 0x80000 8039; GFX6-NOHSA-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x80000 8040; GFX6-NOHSA-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000 8041; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s28 8042; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s29 8043; GFX6-NOHSA-NEXT: v_mov_b32_e32 v25, s7 8044; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224 8045; GFX6-NOHSA-NEXT: v_mov_b32_e32 v26, s30 8046; GFX6-NOHSA-NEXT: v_mov_b32_e32 v27, s31 8047; GFX6-NOHSA-NEXT: v_mov_b32_e32 v10, s8 8048; GFX6-NOHSA-NEXT: v_mov_b32_e32 v11, s9 8049; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:208 8050; GFX6-NOHSA-NEXT: s_bfe_i64 s[40:41], s[54:55], 0x80000 8051; GFX6-NOHSA-NEXT: s_bfe_i64 s[54:55], s[34:35], 0x80000 8052; GFX6-NOHSA-NEXT: s_bfe_i64 s[6:7], s[56:57], 0x80000 8053; GFX6-NOHSA-NEXT: s_bfe_i64 s[8:9], s[52:53], 0x80000 8054; GFX6-NOHSA-NEXT: s_bfe_i64 s[28:29], s[50:51], 0x80000 8055; GFX6-NOHSA-NEXT: s_bfe_i64 s[30:31], s[48:49], 0x80000 8056; GFX6-NOHSA-NEXT: s_bfe_i64 s[34:35], s[46:47], 0x80000 8057; GFX6-NOHSA-NEXT: s_bfe_i64 s[36:37], s[44:45], 0x80000 8058; GFX6-NOHSA-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x80000 8059; GFX6-NOHSA-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000 8060; GFX6-NOHSA-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000 8061; GFX6-NOHSA-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000 8062; GFX6-NOHSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000 8063; GFX6-NOHSA-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000 8064; GFX6-NOHSA-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x80000 8065; GFX6-NOHSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000 8066; GFX6-NOHSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000 8067; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:192 8068; GFX6-NOHSA-NEXT: s_waitcnt expcnt(3) 8069; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s54 8070; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s55 8071; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 8072; GFX6-NOHSA-NEXT: v_mov_b32_e32 v10, s12 8073; GFX6-NOHSA-NEXT: v_mov_b32_e32 v11, s13 8074; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:176 8075; GFX6-NOHSA-NEXT: v_mov_b32_e32 v4, s4 8076; GFX6-NOHSA-NEXT: v_mov_b32_e32 v5, s5 8077; GFX6-NOHSA-NEXT: v_mov_b32_e32 v16, s14 8078; GFX6-NOHSA-NEXT: v_mov_b32_e32 v17, s15 8079; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:160 8080; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s16 8081; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s17 8082; GFX6-NOHSA-NEXT: v_mov_b32_e32 v8, s18 8083; GFX6-NOHSA-NEXT: v_mov_b32_e32 v9, s19 8084; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:144 8085; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 8086; GFX6-NOHSA-NEXT: v_mov_b32_e32 v8, s38 8087; GFX6-NOHSA-NEXT: v_mov_b32_e32 v9, s39 8088; GFX6-NOHSA-NEXT: v_mov_b32_e32 v20, s20 8089; GFX6-NOHSA-NEXT: v_mov_b32_e32 v21, s21 8090; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:128 8091; GFX6-NOHSA-NEXT: v_mov_b32_e32 v10, s40 8092; GFX6-NOHSA-NEXT: v_mov_b32_e32 v11, s41 8093; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 8094; GFX6-NOHSA-NEXT: v_mov_b32_e32 v20, s22 8095; GFX6-NOHSA-NEXT: v_mov_b32_e32 v21, s23 8096; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:112 8097; GFX6-NOHSA-NEXT: v_mov_b32_e32 v14, s10 8098; GFX6-NOHSA-NEXT: v_mov_b32_e32 v15, s11 8099; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s24 8100; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s25 8101; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 8102; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 8103; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s26 8104; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s27 8105; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s42 8106; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s43 8107; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 8108; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s36 8109; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s37 8110; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64 8111; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 8112; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s34 8113; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s35 8114; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:48 8115; GFX6-NOHSA-NEXT: v_mov_b32_e32 v12, s30 8116; GFX6-NOHSA-NEXT: v_mov_b32_e32 v13, s31 8117; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:32 8118; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s28 8119; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s29 8120; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s8 8121; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s9 8122; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 8123; GFX6-NOHSA-NEXT: v_mov_b32_e32 v16, s6 8124; GFX6-NOHSA-NEXT: v_mov_b32_e32 v17, s7 8125; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[14:17], off, s[0:3], 0 8126; GFX6-NOHSA-NEXT: s_endpgm 8127; 8128; GFX7-HSA-LABEL: constant_sextload_v32i8_to_v32i64: 8129; GFX7-HSA: ; %bb.0: 8130; GFX7-HSA-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x0 8131; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 8132; GFX7-HSA-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 8133; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 8134; GFX7-HSA-NEXT: s_lshr_b32 s14, s7, 16 8135; GFX7-HSA-NEXT: s_lshr_b32 s50, s7, 8 8136; GFX7-HSA-NEXT: s_mov_b32 s52, s7 8137; GFX7-HSA-NEXT: s_lshr_b32 s54, s6, 16 8138; GFX7-HSA-NEXT: s_lshr_b32 s56, s6, 24 8139; GFX7-HSA-NEXT: s_lshr_b32 s58, s6, 8 8140; GFX7-HSA-NEXT: s_lshr_b32 s60, s5, 16 8141; GFX7-HSA-NEXT: s_lshr_b32 s48, s5, 8 8142; GFX7-HSA-NEXT: s_mov_b32 s62, s5 8143; GFX7-HSA-NEXT: s_lshr_b32 s42, s4, 16 8144; GFX7-HSA-NEXT: s_lshr_b32 s40, s4, 24 8145; GFX7-HSA-NEXT: s_lshr_b32 s38, s4, 8 8146; GFX7-HSA-NEXT: s_lshr_b32 s36, s3, 16 8147; GFX7-HSA-NEXT: s_lshr_b32 s30, s3, 8 8148; GFX7-HSA-NEXT: s_mov_b32 s34, s3 8149; GFX7-HSA-NEXT: s_lshr_b32 s26, s2, 16 8150; GFX7-HSA-NEXT: s_lshr_b32 s24, s2, 24 8151; GFX7-HSA-NEXT: s_lshr_b32 s22, s2, 8 8152; GFX7-HSA-NEXT: s_lshr_b32 s20, s1, 16 8153; GFX7-HSA-NEXT: s_lshr_b32 s64, s1, 8 8154; GFX7-HSA-NEXT: s_mov_b32 s16, s1 8155; GFX7-HSA-NEXT: s_lshr_b32 s66, s0, 16 8156; GFX7-HSA-NEXT: s_lshr_b32 s68, s0, 24 8157; GFX7-HSA-NEXT: s_lshr_b32 s70, s0, 8 8158; GFX7-HSA-NEXT: s_bfe_i64 s[12:13], s[2:3], 0x80000 8159; GFX7-HSA-NEXT: s_ashr_i64 s[18:19], s[2:3], 56 8160; GFX7-HSA-NEXT: s_bfe_i64 s[28:29], s[4:5], 0x80000 8161; GFX7-HSA-NEXT: s_ashr_i64 s[44:45], s[4:5], 56 8162; GFX7-HSA-NEXT: s_ashr_i64 s[2:3], s[6:7], 56 8163; GFX7-HSA-NEXT: s_bfe_i64 s[4:5], s[14:15], 0x80000 8164; GFX7-HSA-NEXT: s_bfe_i64 s[10:11], s[0:1], 0x80000 8165; GFX7-HSA-NEXT: s_ashr_i64 s[0:1], s[0:1], 56 8166; GFX7-HSA-NEXT: s_bfe_i64 s[46:47], s[6:7], 0x80000 8167; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s4 8168; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s5 8169; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s2 8170; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s3 8171; GFX7-HSA-NEXT: s_bfe_i64 s[2:3], s[70:71], 0x80000 8172; GFX7-HSA-NEXT: s_bfe_i64 s[4:5], s[68:69], 0x80000 8173; GFX7-HSA-NEXT: s_bfe_i64 s[6:7], s[66:67], 0x80000 8174; GFX7-HSA-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x80000 8175; GFX7-HSA-NEXT: s_bfe_i64 s[14:15], s[64:65], 0x80000 8176; GFX7-HSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000 8177; GFX7-HSA-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000 8178; GFX7-HSA-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000 8179; GFX7-HSA-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000 8180; GFX7-HSA-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x80000 8181; GFX7-HSA-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000 8182; GFX7-HSA-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x80000 8183; GFX7-HSA-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x80000 8184; GFX7-HSA-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x80000 8185; GFX7-HSA-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x80000 8186; GFX7-HSA-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x80000 8187; GFX7-HSA-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x80000 8188; GFX7-HSA-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x80000 8189; GFX7-HSA-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x80000 8190; GFX7-HSA-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x80000 8191; GFX7-HSA-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x80000 8192; GFX7-HSA-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x80000 8193; GFX7-HSA-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x80000 8194; GFX7-HSA-NEXT: s_add_u32 s64, s8, 0xf0 8195; GFX7-HSA-NEXT: s_addc_u32 s65, s9, 0 8196; GFX7-HSA-NEXT: v_mov_b32_e32 v6, s50 8197; GFX7-HSA-NEXT: s_add_u32 s50, s8, 0xe0 8198; GFX7-HSA-NEXT: v_mov_b32_e32 v7, s51 8199; GFX7-HSA-NEXT: s_addc_u32 s51, s9, 0 8200; GFX7-HSA-NEXT: v_mov_b32_e32 v24, s50 8201; GFX7-HSA-NEXT: v_mov_b32_e32 v25, s51 8202; GFX7-HSA-NEXT: s_add_u32 s50, s8, 0xd0 8203; GFX7-HSA-NEXT: s_addc_u32 s51, s9, 0 8204; GFX7-HSA-NEXT: v_mov_b32_e32 v18, s44 8205; GFX7-HSA-NEXT: s_add_u32 s44, s8, 0xc0 8206; GFX7-HSA-NEXT: v_mov_b32_e32 v19, s45 8207; GFX7-HSA-NEXT: s_addc_u32 s45, s9, 0 8208; GFX7-HSA-NEXT: v_mov_b32_e32 v28, s44 8209; GFX7-HSA-NEXT: v_mov_b32_e32 v22, s64 8210; GFX7-HSA-NEXT: v_mov_b32_e32 v29, s45 8211; GFX7-HSA-NEXT: s_add_u32 s44, s8, 0xb0 8212; GFX7-HSA-NEXT: v_mov_b32_e32 v23, s65 8213; GFX7-HSA-NEXT: s_addc_u32 s45, s9, 0 8214; GFX7-HSA-NEXT: flat_store_dwordx4 v[22:23], v[0:3] 8215; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s52 8216; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s42 8217; GFX7-HSA-NEXT: s_add_u32 s42, s8, 0xa0 8218; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s53 8219; GFX7-HSA-NEXT: v_mov_b32_e32 v26, s50 8220; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s43 8221; GFX7-HSA-NEXT: s_addc_u32 s43, s9, 0 8222; GFX7-HSA-NEXT: v_mov_b32_e32 v8, s54 8223; GFX7-HSA-NEXT: v_mov_b32_e32 v9, s55 8224; GFX7-HSA-NEXT: v_mov_b32_e32 v10, s56 8225; GFX7-HSA-NEXT: v_mov_b32_e32 v11, s57 8226; GFX7-HSA-NEXT: v_mov_b32_e32 v27, s51 8227; GFX7-HSA-NEXT: flat_store_dwordx4 v[24:25], v[4:7] 8228; GFX7-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11] 8229; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s28 8230; GFX7-HSA-NEXT: s_add_u32 s28, s8, 0x90 8231; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s29 8232; GFX7-HSA-NEXT: s_addc_u32 s29, s9, 0 8233; GFX7-HSA-NEXT: v_mov_b32_e32 v24, s28 8234; GFX7-HSA-NEXT: v_mov_b32_e32 v10, s42 8235; GFX7-HSA-NEXT: v_mov_b32_e32 v25, s29 8236; GFX7-HSA-NEXT: s_add_u32 s28, s8, 0x80 8237; GFX7-HSA-NEXT: v_mov_b32_e32 v20, s62 8238; GFX7-HSA-NEXT: v_mov_b32_e32 v21, s63 8239; GFX7-HSA-NEXT: v_mov_b32_e32 v22, s48 8240; GFX7-HSA-NEXT: v_mov_b32_e32 v23, s49 8241; GFX7-HSA-NEXT: v_mov_b32_e32 v30, s44 8242; GFX7-HSA-NEXT: v_mov_b32_e32 v11, s43 8243; GFX7-HSA-NEXT: s_addc_u32 s29, s9, 0 8244; GFX7-HSA-NEXT: v_mov_b32_e32 v16, s60 8245; GFX7-HSA-NEXT: v_mov_b32_e32 v17, s61 8246; GFX7-HSA-NEXT: v_mov_b32_e32 v31, s45 8247; GFX7-HSA-NEXT: flat_store_dwordx4 v[10:11], v[20:23] 8248; GFX7-HSA-NEXT: v_mov_b32_e32 v10, s18 8249; GFX7-HSA-NEXT: s_add_u32 s18, s8, 0x70 8250; GFX7-HSA-NEXT: flat_store_dwordx4 v[30:31], v[16:19] 8251; GFX7-HSA-NEXT: v_mov_b32_e32 v11, s19 8252; GFX7-HSA-NEXT: s_addc_u32 s19, s9, 0 8253; GFX7-HSA-NEXT: v_mov_b32_e32 v16, s18 8254; GFX7-HSA-NEXT: v_mov_b32_e32 v17, s19 8255; GFX7-HSA-NEXT: s_add_u32 s18, s8, 0x60 8256; GFX7-HSA-NEXT: s_addc_u32 s19, s9, 0 8257; GFX7-HSA-NEXT: v_mov_b32_e32 v18, s18 8258; GFX7-HSA-NEXT: v_mov_b32_e32 v12, s46 8259; GFX7-HSA-NEXT: v_mov_b32_e32 v13, s47 8260; GFX7-HSA-NEXT: v_mov_b32_e32 v14, s58 8261; GFX7-HSA-NEXT: v_mov_b32_e32 v15, s59 8262; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s40 8263; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s41 8264; GFX7-HSA-NEXT: v_mov_b32_e32 v26, s28 8265; GFX7-HSA-NEXT: v_mov_b32_e32 v19, s19 8266; GFX7-HSA-NEXT: s_add_u32 s18, s8, 0x50 8267; GFX7-HSA-NEXT: flat_store_dwordx4 v[28:29], v[12:15] 8268; GFX7-HSA-NEXT: v_mov_b32_e32 v27, s29 8269; GFX7-HSA-NEXT: v_mov_b32_e32 v6, s38 8270; GFX7-HSA-NEXT: v_mov_b32_e32 v7, s39 8271; GFX7-HSA-NEXT: v_mov_b32_e32 v8, s36 8272; GFX7-HSA-NEXT: v_mov_b32_e32 v9, s37 8273; GFX7-HSA-NEXT: v_mov_b32_e32 v12, s34 8274; GFX7-HSA-NEXT: v_mov_b32_e32 v13, s35 8275; GFX7-HSA-NEXT: v_mov_b32_e32 v14, s30 8276; GFX7-HSA-NEXT: v_mov_b32_e32 v15, s31 8277; GFX7-HSA-NEXT: flat_store_dwordx4 v[24:25], v[0:3] 8278; GFX7-HSA-NEXT: flat_store_dwordx4 v[26:27], v[4:7] 8279; GFX7-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 8280; GFX7-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15] 8281; GFX7-HSA-NEXT: s_addc_u32 s19, s9, 0 8282; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s18 8283; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s26 8284; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s27 8285; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s24 8286; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s25 8287; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s19 8288; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8289; GFX7-HSA-NEXT: s_nop 0 8290; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s12 8291; GFX7-HSA-NEXT: s_add_u32 s12, s8, 64 8292; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s13 8293; GFX7-HSA-NEXT: s_addc_u32 s13, s9, 0 8294; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s12 8295; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s22 8296; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s23 8297; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s13 8298; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8299; GFX7-HSA-NEXT: s_nop 0 8300; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s0 8301; GFX7-HSA-NEXT: s_add_u32 s0, s8, 48 8302; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s1 8303; GFX7-HSA-NEXT: s_addc_u32 s1, s9, 0 8304; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 8305; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 8306; GFX7-HSA-NEXT: s_add_u32 s0, s8, 32 8307; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s20 8308; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s21 8309; GFX7-HSA-NEXT: s_addc_u32 s1, s9, 0 8310; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8311; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 8312; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 8313; GFX7-HSA-NEXT: s_add_u32 s0, s8, 16 8314; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s16 8315; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s17 8316; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s14 8317; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s15 8318; GFX7-HSA-NEXT: s_addc_u32 s1, s9, 0 8319; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8320; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 8321; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s6 8322; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s7 8323; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s4 8324; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s5 8325; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 8326; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8327; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s8 8328; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s10 8329; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s11 8330; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s2 8331; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s3 8332; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s9 8333; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8334; GFX7-HSA-NEXT: s_endpgm 8335; 8336; GFX8-NOHSA-LABEL: constant_sextload_v32i8_to_v32i64: 8337; GFX8-NOHSA: ; %bb.0: 8338; GFX8-NOHSA-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 8339; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 8340; GFX8-NOHSA-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 8341; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 8342; GFX8-NOHSA-NEXT: s_lshr_b32 s46, s7, 16 8343; GFX8-NOHSA-NEXT: s_lshr_b32 s48, s7, 8 8344; GFX8-NOHSA-NEXT: s_mov_b32 s50, s7 8345; GFX8-NOHSA-NEXT: s_lshr_b32 s52, s6, 16 8346; GFX8-NOHSA-NEXT: s_lshr_b32 s54, s6, 24 8347; GFX8-NOHSA-NEXT: s_lshr_b32 s56, s6, 8 8348; GFX8-NOHSA-NEXT: s_lshr_b32 s58, s5, 16 8349; GFX8-NOHSA-NEXT: s_lshr_b32 s60, s5, 8 8350; GFX8-NOHSA-NEXT: s_mov_b32 s62, s5 8351; GFX8-NOHSA-NEXT: s_lshr_b32 s44, s4, 16 8352; GFX8-NOHSA-NEXT: s_lshr_b32 s40, s4, 24 8353; GFX8-NOHSA-NEXT: s_lshr_b32 s38, s4, 8 8354; GFX8-NOHSA-NEXT: s_lshr_b32 s36, s3, 16 8355; GFX8-NOHSA-NEXT: s_lshr_b32 s30, s3, 8 8356; GFX8-NOHSA-NEXT: s_mov_b32 s28, s3 8357; GFX8-NOHSA-NEXT: s_lshr_b32 s24, s2, 16 8358; GFX8-NOHSA-NEXT: s_lshr_b32 s22, s2, 24 8359; GFX8-NOHSA-NEXT: s_lshr_b32 s20, s2, 8 8360; GFX8-NOHSA-NEXT: s_lshr_b32 s18, s1, 16 8361; GFX8-NOHSA-NEXT: s_lshr_b32 s14, s1, 8 8362; GFX8-NOHSA-NEXT: s_mov_b32 s64, s1 8363; GFX8-NOHSA-NEXT: s_lshr_b32 s66, s0, 16 8364; GFX8-NOHSA-NEXT: s_lshr_b32 s68, s0, 24 8365; GFX8-NOHSA-NEXT: s_lshr_b32 s70, s0, 8 8366; GFX8-NOHSA-NEXT: s_bfe_i64 s[10:11], s[0:1], 0x80000 8367; GFX8-NOHSA-NEXT: s_ashr_i64 s[12:13], s[0:1], 56 8368; GFX8-NOHSA-NEXT: s_bfe_i64 s[16:17], s[2:3], 0x80000 8369; GFX8-NOHSA-NEXT: s_ashr_i64 s[26:27], s[2:3], 56 8370; GFX8-NOHSA-NEXT: s_bfe_i64 s[34:35], s[4:5], 0x80000 8371; GFX8-NOHSA-NEXT: s_ashr_i64 s[42:43], s[4:5], 56 8372; GFX8-NOHSA-NEXT: s_bfe_i64 s[72:73], s[6:7], 0x80000 8373; GFX8-NOHSA-NEXT: s_ashr_i64 s[74:75], s[6:7], 56 8374; GFX8-NOHSA-NEXT: s_bfe_i64 s[0:1], s[70:71], 0x80000 8375; GFX8-NOHSA-NEXT: s_bfe_i64 s[2:3], s[68:69], 0x80000 8376; GFX8-NOHSA-NEXT: s_bfe_i64 s[4:5], s[66:67], 0x80000 8377; GFX8-NOHSA-NEXT: s_bfe_i64 s[6:7], s[64:65], 0x80000 8378; GFX8-NOHSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000 8379; GFX8-NOHSA-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000 8380; GFX8-NOHSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000 8381; GFX8-NOHSA-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000 8382; GFX8-NOHSA-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000 8383; GFX8-NOHSA-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x80000 8384; GFX8-NOHSA-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000 8385; GFX8-NOHSA-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x80000 8386; GFX8-NOHSA-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x80000 8387; GFX8-NOHSA-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x80000 8388; GFX8-NOHSA-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x80000 8389; GFX8-NOHSA-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x80000 8390; GFX8-NOHSA-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x80000 8391; GFX8-NOHSA-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x80000 8392; GFX8-NOHSA-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x80000 8393; GFX8-NOHSA-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x80000 8394; GFX8-NOHSA-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x80000 8395; GFX8-NOHSA-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x80000 8396; GFX8-NOHSA-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x80000 8397; GFX8-NOHSA-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x80000 8398; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s46 8399; GFX8-NOHSA-NEXT: s_add_u32 s46, s8, 0xf0 8400; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s47 8401; GFX8-NOHSA-NEXT: s_addc_u32 s47, s9, 0 8402; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s46 8403; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s74 8404; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s75 8405; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s47 8406; GFX8-NOHSA-NEXT: s_add_u32 s46, s8, 0xe0 8407; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8408; GFX8-NOHSA-NEXT: s_addc_u32 s47, s9, 0 8409; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s46 8410; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s50 8411; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s51 8412; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s48 8413; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s49 8414; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s47 8415; GFX8-NOHSA-NEXT: s_add_u32 s46, s8, 0xd0 8416; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8417; GFX8-NOHSA-NEXT: s_addc_u32 s47, s9, 0 8418; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s46 8419; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s52 8420; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s53 8421; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s54 8422; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s55 8423; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s47 8424; GFX8-NOHSA-NEXT: s_add_u32 s46, s8, 0xc0 8425; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8426; GFX8-NOHSA-NEXT: s_addc_u32 s47, s9, 0 8427; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s46 8428; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s72 8429; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s73 8430; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s56 8431; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s57 8432; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s47 8433; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8434; GFX8-NOHSA-NEXT: s_nop 0 8435; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s42 8436; GFX8-NOHSA-NEXT: s_add_u32 s42, s8, 0xb0 8437; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s43 8438; GFX8-NOHSA-NEXT: s_addc_u32 s43, s9, 0 8439; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s42 8440; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s58 8441; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s59 8442; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s43 8443; GFX8-NOHSA-NEXT: s_add_u32 s42, s8, 0xa0 8444; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8445; GFX8-NOHSA-NEXT: s_addc_u32 s43, s9, 0 8446; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s42 8447; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s62 8448; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s63 8449; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s60 8450; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s61 8451; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s43 8452; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8453; GFX8-NOHSA-NEXT: s_nop 0 8454; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s40 8455; GFX8-NOHSA-NEXT: s_add_u32 s40, s8, 0x90 8456; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s41 8457; GFX8-NOHSA-NEXT: s_addc_u32 s41, s9, 0 8458; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s40 8459; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s44 8460; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s45 8461; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s41 8462; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8463; GFX8-NOHSA-NEXT: s_nop 0 8464; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s34 8465; GFX8-NOHSA-NEXT: s_add_u32 s34, s8, 0x80 8466; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s35 8467; GFX8-NOHSA-NEXT: s_addc_u32 s35, s9, 0 8468; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s34 8469; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s38 8470; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s39 8471; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s35 8472; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8473; GFX8-NOHSA-NEXT: s_nop 0 8474; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s26 8475; GFX8-NOHSA-NEXT: s_add_u32 s26, s8, 0x70 8476; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s27 8477; GFX8-NOHSA-NEXT: s_addc_u32 s27, s9, 0 8478; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s26 8479; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s36 8480; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s37 8481; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s27 8482; GFX8-NOHSA-NEXT: s_add_u32 s26, s8, 0x60 8483; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8484; GFX8-NOHSA-NEXT: s_addc_u32 s27, s9, 0 8485; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s26 8486; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s28 8487; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s29 8488; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s30 8489; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s31 8490; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s27 8491; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8492; GFX8-NOHSA-NEXT: s_nop 0 8493; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s22 8494; GFX8-NOHSA-NEXT: s_add_u32 s22, s8, 0x50 8495; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s23 8496; GFX8-NOHSA-NEXT: s_addc_u32 s23, s9, 0 8497; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s22 8498; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s24 8499; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s25 8500; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s23 8501; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8502; GFX8-NOHSA-NEXT: s_nop 0 8503; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s16 8504; GFX8-NOHSA-NEXT: s_add_u32 s16, s8, 64 8505; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s17 8506; GFX8-NOHSA-NEXT: s_addc_u32 s17, s9, 0 8507; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s16 8508; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s20 8509; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s21 8510; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s17 8511; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8512; GFX8-NOHSA-NEXT: s_nop 0 8513; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s12 8514; GFX8-NOHSA-NEXT: s_add_u32 s12, s8, 48 8515; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s13 8516; GFX8-NOHSA-NEXT: s_addc_u32 s13, s9, 0 8517; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s12 8518; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s18 8519; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s19 8520; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s13 8521; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8522; GFX8-NOHSA-NEXT: s_nop 0 8523; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s6 8524; GFX8-NOHSA-NEXT: s_add_u32 s6, s8, 32 8525; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s7 8526; GFX8-NOHSA-NEXT: s_addc_u32 s7, s9, 0 8527; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6 8528; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s14 8529; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s15 8530; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7 8531; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8532; GFX8-NOHSA-NEXT: s_nop 0 8533; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s2 8534; GFX8-NOHSA-NEXT: s_add_u32 s2, s8, 16 8535; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s3 8536; GFX8-NOHSA-NEXT: s_addc_u32 s3, s9, 0 8537; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 8538; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s4 8539; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s5 8540; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 8541; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8542; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s8 8543; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s10 8544; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s11 8545; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s0 8546; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s1 8547; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s9 8548; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8549; GFX8-NOHSA-NEXT: s_endpgm 8550; 8551; EG-LABEL: constant_sextload_v32i8_to_v32i64: 8552; EG: ; %bb.0: 8553; EG-NEXT: ALU 0, @26, KC0[CB0:0-32], KC1[] 8554; EG-NEXT: TEX 1 @22 8555; EG-NEXT: ALU 84, @27, KC0[CB0:0-32], KC1[] 8556; EG-NEXT: ALU 71, @112, KC0[], KC1[] 8557; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T42.X, 0 8558; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T31.X, 0 8559; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T30.X, 0 8560; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T25.X, 0 8561; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T24.X, 0 8562; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T23.X, 0 8563; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T22.X, 0 8564; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T21.X, 0 8565; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T20.X, 0 8566; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T19.X, 0 8567; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T18.X, 0 8568; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T17.X, 0 8569; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T16.X, 0 8570; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T15.X, 0 8571; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T14.X, 0 8572; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T13.X, 1 8573; EG-NEXT: CF_END 8574; EG-NEXT: PAD 8575; EG-NEXT: Fetch clause starting at 22: 8576; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 0, #1 8577; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 16, #1 8578; EG-NEXT: ALU clause starting at 26: 8579; EG-NEXT: MOV * T11.X, KC0[2].Z, 8580; EG-NEXT: ALU clause starting at 27: 8581; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x, 8582; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8583; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 8584; EG-NEXT: LSHR T14.X, PV.W, literal.x, 8585; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8586; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 8587; EG-NEXT: LSHR T15.X, PV.W, literal.x, 8588; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8589; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 8590; EG-NEXT: LSHR T16.X, PV.W, literal.x, 8591; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8592; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 8593; EG-NEXT: LSHR T17.X, PV.W, literal.x, 8594; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8595; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 8596; EG-NEXT: LSHR T18.X, PV.W, literal.x, 8597; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8598; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 8599; EG-NEXT: LSHR T19.X, PV.W, literal.x, 8600; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8601; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 8602; EG-NEXT: LSHR T20.X, PV.W, literal.x, 8603; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8604; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 8605; EG-NEXT: LSHR T21.X, PV.W, literal.x, 8606; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8607; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 8608; EG-NEXT: LSHR T22.X, PV.W, literal.x, 8609; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8610; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 8611; EG-NEXT: LSHR T23.X, PV.W, literal.x, 8612; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8613; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 8614; EG-NEXT: LSHR T24.X, PV.W, literal.x, 8615; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8616; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43) 8617; EG-NEXT: LSHR * T25.X, PV.W, literal.x, 8618; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 8619; EG-NEXT: BFE_INT * T26.X, T11.W, 0.0, literal.x, 8620; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 8621; EG-NEXT: BFE_INT T27.X, T11.Y, 0.0, literal.x, 8622; EG-NEXT: ASHR T26.Y, PV.X, literal.y, 8623; EG-NEXT: LSHR * T0.W, T11.W, literal.x, 8624; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 8625; EG-NEXT: BFE_INT T28.X, T11.X, 0.0, literal.x, 8626; EG-NEXT: ASHR T27.Y, PV.X, literal.y, 8627; EG-NEXT: BFE_INT T26.Z, PV.W, 0.0, literal.x, 8628; EG-NEXT: LSHR * T0.W, T11.Y, literal.x, 8629; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 8630; EG-NEXT: BFE_INT T29.X, T12.W, 0.0, literal.x, 8631; EG-NEXT: ASHR T28.Y, PV.X, literal.y, 8632; EG-NEXT: BFE_INT T27.Z, PV.W, 0.0, literal.x, 8633; EG-NEXT: LSHR T0.W, T11.X, literal.x, 8634; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 8635; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 8636; EG-NEXT: 208(2.914701e-43), 0(0.000000e+00) 8637; EG-NEXT: LSHR T30.X, PS, literal.x, 8638; EG-NEXT: ASHR T29.Y, PV.X, literal.y, 8639; EG-NEXT: BFE_INT T28.Z, PV.W, 0.0, literal.z, 8640; EG-NEXT: LSHR T0.W, T12.W, literal.z, 8641; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.w, 8642; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 8643; EG-NEXT: 8(1.121039e-44), 224(3.138909e-43) 8644; EG-NEXT: LSHR T31.X, PS, literal.x, 8645; EG-NEXT: BFE_INT T29.Z, PV.W, 0.0, literal.y, 8646; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.z, 8647; EG-NEXT: ASHR * T32.W, T12.X, literal.w, 8648; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44) 8649; EG-NEXT: 240(3.363116e-43), 31(4.344025e-44) 8650; EG-NEXT: BFE_INT T33.X, T12.Z, 0.0, literal.x, 8651; EG-NEXT: LSHR T0.Y, T11.Z, literal.x, BS:VEC_120/SCL_212 8652; EG-NEXT: ASHR T32.Z, T12.X, literal.y, 8653; EG-NEXT: LSHR T1.W, T12.X, literal.z, 8654; EG-NEXT: ASHR * T34.W, T12.Y, literal.w, 8655; EG-NEXT: 8(1.121039e-44), 24(3.363116e-44) 8656; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8657; EG-NEXT: BFE_INT T32.X, PV.W, 0.0, literal.x, 8658; EG-NEXT: ASHR T33.Y, PV.X, literal.y, 8659; EG-NEXT: ASHR T34.Z, T12.Y, literal.z, 8660; EG-NEXT: LSHR T1.W, T12.Z, literal.x, 8661; EG-NEXT: LSHR * T2.W, T12.Y, literal.w, 8662; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 8663; EG-NEXT: 24(3.363116e-44), 16(2.242078e-44) 8664; EG-NEXT: BFE_INT * T34.X, PS, 0.0, literal.x, 8665; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 8666; EG-NEXT: ALU clause starting at 112: 8667; EG-NEXT: ASHR T32.Y, T32.X, literal.x, 8668; EG-NEXT: BFE_INT T33.Z, T1.W, 0.0, literal.y, 8669; EG-NEXT: LSHR T1.W, T11.W, literal.z, BS:VEC_120/SCL_212 8670; EG-NEXT: ASHR * T35.W, T12.Z, literal.x, 8671; EG-NEXT: 31(4.344025e-44), 8(1.121039e-44) 8672; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 8673; EG-NEXT: BFE_INT T36.X, T12.X, 0.0, literal.x, 8674; EG-NEXT: ASHR T34.Y, T34.X, literal.y, BS:VEC_120/SCL_212 8675; EG-NEXT: ASHR T35.Z, T12.Z, literal.z, 8676; EG-NEXT: LSHR T2.W, T12.Z, literal.w, 8677; EG-NEXT: ASHR * T37.W, T12.W, literal.y, 8678; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 8679; EG-NEXT: 24(3.363116e-44), 16(2.242078e-44) 8680; EG-NEXT: BFE_INT T35.X, PV.W, 0.0, literal.x, 8681; EG-NEXT: ASHR T36.Y, PV.X, literal.y, 8682; EG-NEXT: ASHR T37.Z, T12.W, literal.z, 8683; EG-NEXT: LSHR T2.W, T12.X, literal.x, 8684; EG-NEXT: LSHR * T3.W, T12.W, literal.w, 8685; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 8686; EG-NEXT: 24(3.363116e-44), 16(2.242078e-44) 8687; EG-NEXT: BFE_INT T37.X, PS, 0.0, literal.x, 8688; EG-NEXT: ASHR T35.Y, PV.X, literal.y, 8689; EG-NEXT: BFE_INT T36.Z, PV.W, 0.0, literal.x, 8690; EG-NEXT: LSHR T2.W, T11.Z, literal.z, 8691; EG-NEXT: ASHR * T12.W, T11.X, literal.y, 8692; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 8693; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 8694; EG-NEXT: BFE_INT T38.X, T12.Y, 0.0, literal.x, 8695; EG-NEXT: ASHR T37.Y, PV.X, literal.y, 8696; EG-NEXT: ASHR T12.Z, T11.X, literal.z, 8697; EG-NEXT: LSHR T3.W, T11.X, literal.w, 8698; EG-NEXT: ASHR * T39.W, T11.Y, literal.y, 8699; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 8700; EG-NEXT: 24(3.363116e-44), 16(2.242078e-44) 8701; EG-NEXT: BFE_INT T12.X, PV.W, 0.0, literal.x, 8702; EG-NEXT: ASHR T38.Y, PV.X, literal.y, 8703; EG-NEXT: ASHR T39.Z, T11.Y, literal.z, 8704; EG-NEXT: LSHR T3.W, T12.Y, literal.x, BS:VEC_120/SCL_212 8705; EG-NEXT: LSHR * T4.W, T11.Y, literal.w, 8706; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 8707; EG-NEXT: 24(3.363116e-44), 16(2.242078e-44) 8708; EG-NEXT: BFE_INT T39.X, PS, 0.0, literal.x, 8709; EG-NEXT: ASHR T12.Y, PV.X, literal.y, 8710; EG-NEXT: BFE_INT T38.Z, PV.W, 0.0, literal.x, 8711; EG-NEXT: ASHR T36.W, T36.Z, literal.y, 8712; EG-NEXT: ASHR * T40.W, T11.Z, literal.y, 8713; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 8714; EG-NEXT: BFE_INT T11.X, T11.Z, 0.0, literal.x, 8715; EG-NEXT: ASHR T39.Y, PV.X, literal.y, 8716; EG-NEXT: ASHR T40.Z, T11.Z, literal.z, 8717; EG-NEXT: ASHR T38.W, PV.Z, literal.y, 8718; EG-NEXT: ASHR * T41.W, T11.W, literal.y, 8719; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 8720; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 8721; EG-NEXT: BFE_INT T40.X, T2.W, 0.0, literal.x, 8722; EG-NEXT: ASHR T11.Y, PV.X, literal.y, 8723; EG-NEXT: ASHR T41.Z, T11.W, literal.z, BS:VEC_120/SCL_212 8724; EG-NEXT: ASHR T33.W, T33.Z, literal.y, 8725; EG-NEXT: ASHR * T29.W, T29.Z, literal.y, 8726; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 8727; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 8728; EG-NEXT: BFE_INT T41.X, T1.W, 0.0, literal.x, 8729; EG-NEXT: ASHR T40.Y, PV.X, literal.y, 8730; EG-NEXT: BFE_INT T11.Z, T0.Y, 0.0, literal.x, 8731; EG-NEXT: ASHR T28.W, T28.Z, literal.y, 8732; EG-NEXT: ASHR * T27.W, T27.Z, literal.y, 8733; EG-NEXT: 8(1.121039e-44), 31(4.344025e-44) 8734; EG-NEXT: LSHR T42.X, T0.W, literal.x, 8735; EG-NEXT: ASHR T41.Y, PV.X, literal.y, 8736; EG-NEXT: ASHR T11.W, PV.Z, literal.y, 8737; EG-NEXT: ASHR * T26.W, T26.Z, literal.y, 8738; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 8739; 8740; GFX12-LABEL: constant_sextload_v32i8_to_v32i64: 8741; GFX12: ; %bb.0: 8742; GFX12-NEXT: s_load_b128 s[8:11], s[4:5], 0x24 8743; GFX12-NEXT: s_wait_kmcnt 0x0 8744; GFX12-NEXT: s_load_b256 s[0:7], s[10:11], 0x0 8745; GFX12-NEXT: s_wait_kmcnt 0x0 8746; GFX12-NEXT: s_lshr_b32 s36, s7, 16 8747; GFX12-NEXT: s_lshr_b32 s38, s7, 8 8748; GFX12-NEXT: s_mov_b32 s40, s7 8749; GFX12-NEXT: s_lshr_b32 s42, s6, 16 8750; GFX12-NEXT: s_lshr_b32 s44, s6, 24 8751; GFX12-NEXT: s_ashr_i64 s[74:75], s[6:7], 56 8752; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x80000 8753; GFX12-NEXT: s_lshr_b32 s46, s6, 8 8754; GFX12-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x80000 8755; GFX12-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x80000 8756; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s37 8757; GFX12-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x80000 8758; GFX12-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x80000 8759; GFX12-NEXT: s_wait_alu 0xfffe 8760; GFX12-NEXT: v_dual_mov_b32 v0, s36 :: v_dual_mov_b32 v3, s75 8761; GFX12-NEXT: v_dual_mov_b32 v2, s74 :: v_dual_mov_b32 v5, s41 8762; GFX12-NEXT: s_lshr_b32 s48, s5, 16 8763; GFX12-NEXT: s_bfe_i64 s[72:73], s[6:7], 0x80000 8764; GFX12-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x80000 8765; GFX12-NEXT: v_dual_mov_b32 v4, s40 :: v_dual_mov_b32 v7, s39 8766; GFX12-NEXT: v_dual_mov_b32 v6, s38 :: v_dual_mov_b32 v9, s43 8767; GFX12-NEXT: s_lshr_b32 s50, s5, 8 8768; GFX12-NEXT: s_mov_b32 s52, s5 8769; GFX12-NEXT: v_dual_mov_b32 v8, s42 :: v_dual_mov_b32 v11, s45 8770; GFX12-NEXT: v_dual_mov_b32 v10, s44 :: v_dual_mov_b32 v13, s73 8771; GFX12-NEXT: s_lshr_b32 s54, s4, 16 8772; GFX12-NEXT: s_lshr_b32 s56, s4, 24 8773; GFX12-NEXT: s_ashr_i64 s[70:71], s[4:5], 56 8774; GFX12-NEXT: v_dual_mov_b32 v12, s72 :: v_dual_mov_b32 v15, s47 8775; GFX12-NEXT: s_bfe_i64 s[36:37], s[48:49], 0x80000 8776; GFX12-NEXT: v_mov_b32_e32 v14, s46 8777; GFX12-NEXT: s_lshr_b32 s58, s4, 8 8778; GFX12-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x80000 8779; GFX12-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x80000 8780; GFX12-NEXT: s_lshr_b32 s60, s3, 16 8781; GFX12-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x80000 8782; GFX12-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x80000 8783; GFX12-NEXT: s_clause 0x3 8784; GFX12-NEXT: global_store_b128 v24, v[0:3], s[8:9] offset:240 8785; GFX12-NEXT: global_store_b128 v24, v[4:7], s[8:9] offset:224 8786; GFX12-NEXT: global_store_b128 v24, v[8:11], s[8:9] offset:208 8787; GFX12-NEXT: global_store_b128 v24, v[12:15], s[8:9] offset:192 8788; GFX12-NEXT: s_wait_alu 0xfffe 8789; GFX12-NEXT: v_dual_mov_b32 v1, s37 :: v_dual_mov_b32 v0, s36 8790; GFX12-NEXT: v_dual_mov_b32 v3, s71 :: v_dual_mov_b32 v2, s70 8791; GFX12-NEXT: v_mov_b32_e32 v5, s53 8792; GFX12-NEXT: s_lshr_b32 s34, s3, 8 8793; GFX12-NEXT: s_mov_b32 s30, s3 8794; GFX12-NEXT: s_lshr_b32 s24, s2, 16 8795; GFX12-NEXT: s_lshr_b32 s22, s2, 24 8796; GFX12-NEXT: s_bfe_i64 s[28:29], s[4:5], 0x80000 8797; GFX12-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x80000 8798; GFX12-NEXT: v_dual_mov_b32 v4, s52 :: v_dual_mov_b32 v7, s51 8799; GFX12-NEXT: v_dual_mov_b32 v6, s50 :: v_dual_mov_b32 v9, s55 8800; GFX12-NEXT: s_lshr_b32 s20, s2, 8 8801; GFX12-NEXT: s_ashr_i64 s[26:27], s[2:3], 56 8802; GFX12-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x80000 8803; GFX12-NEXT: v_dual_mov_b32 v8, s54 :: v_dual_mov_b32 v11, s57 8804; GFX12-NEXT: v_dual_mov_b32 v10, s56 :: v_dual_mov_b32 v13, s29 8805; GFX12-NEXT: s_lshr_b32 s18, s1, 16 8806; GFX12-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000 8807; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000 8808; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000 8809; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x80000 8810; GFX12-NEXT: v_dual_mov_b32 v12, s28 :: v_dual_mov_b32 v15, s59 8811; GFX12-NEXT: v_dual_mov_b32 v14, s58 :: v_dual_mov_b32 v17, s61 8812; GFX12-NEXT: s_lshr_b32 s14, s1, 8 8813; GFX12-NEXT: s_mov_b32 s62, s1 8814; GFX12-NEXT: s_bfe_i64 s[16:17], s[2:3], 0x80000 8815; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000 8816; GFX12-NEXT: v_dual_mov_b32 v16, s60 :: v_dual_mov_b32 v19, s27 8817; GFX12-NEXT: v_dual_mov_b32 v18, s26 :: v_dual_mov_b32 v21, s31 8818; GFX12-NEXT: s_lshr_b32 s64, s0, 16 8819; GFX12-NEXT: s_lshr_b32 s66, s0, 24 8820; GFX12-NEXT: s_ashr_i64 s[12:13], s[0:1], 56 8821; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000 8822; GFX12-NEXT: v_dual_mov_b32 v20, s30 :: v_dual_mov_b32 v23, s35 8823; GFX12-NEXT: v_mov_b32_e32 v22, s34 8824; GFX12-NEXT: s_clause 0x5 8825; GFX12-NEXT: global_store_b128 v24, v[0:3], s[8:9] offset:176 8826; GFX12-NEXT: global_store_b128 v24, v[4:7], s[8:9] offset:160 8827; GFX12-NEXT: global_store_b128 v24, v[8:11], s[8:9] offset:144 8828; GFX12-NEXT: global_store_b128 v24, v[12:15], s[8:9] offset:128 8829; GFX12-NEXT: global_store_b128 v24, v[16:19], s[8:9] offset:112 8830; GFX12-NEXT: global_store_b128 v24, v[20:23], s[8:9] offset:96 8831; GFX12-NEXT: v_dual_mov_b32 v1, s25 :: v_dual_mov_b32 v0, s24 8832; GFX12-NEXT: v_dual_mov_b32 v3, s23 :: v_dual_mov_b32 v2, s22 8833; GFX12-NEXT: v_mov_b32_e32 v5, s17 8834; GFX12-NEXT: s_lshr_b32 s68, s0, 8 8835; GFX12-NEXT: s_bfe_i64 s[6:7], s[62:63], 0x80000 8836; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000 8837; GFX12-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v7, s21 8838; GFX12-NEXT: v_dual_mov_b32 v6, s20 :: v_dual_mov_b32 v9, s19 8839; GFX12-NEXT: s_bfe_i64 s[2:3], s[66:67], 0x80000 8840; GFX12-NEXT: s_bfe_i64 s[4:5], s[64:65], 0x80000 8841; GFX12-NEXT: v_dual_mov_b32 v8, s18 :: v_dual_mov_b32 v11, s13 8842; GFX12-NEXT: v_dual_mov_b32 v10, s12 :: v_dual_mov_b32 v13, s7 8843; GFX12-NEXT: s_bfe_i64 s[10:11], s[0:1], 0x80000 8844; GFX12-NEXT: s_bfe_i64 s[0:1], s[68:69], 0x80000 8845; GFX12-NEXT: v_dual_mov_b32 v12, s6 :: v_dual_mov_b32 v15, s15 8846; GFX12-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v17, s5 8847; GFX12-NEXT: v_dual_mov_b32 v16, s4 :: v_dual_mov_b32 v19, s3 8848; GFX12-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v21, s11 8849; GFX12-NEXT: v_dual_mov_b32 v20, s10 :: v_dual_mov_b32 v23, s1 8850; GFX12-NEXT: v_mov_b32_e32 v22, s0 8851; GFX12-NEXT: s_clause 0x5 8852; GFX12-NEXT: global_store_b128 v24, v[0:3], s[8:9] offset:80 8853; GFX12-NEXT: global_store_b128 v24, v[4:7], s[8:9] offset:64 8854; GFX12-NEXT: global_store_b128 v24, v[8:11], s[8:9] offset:48 8855; GFX12-NEXT: global_store_b128 v24, v[12:15], s[8:9] offset:32 8856; GFX12-NEXT: global_store_b128 v24, v[16:19], s[8:9] offset:16 8857; GFX12-NEXT: global_store_b128 v24, v[20:23], s[8:9] 8858; GFX12-NEXT: s_endpgm 8859 %load = load <32 x i8>, ptr addrspace(4) %in 8860 %ext = sext <32 x i8> %load to <32 x i64> 8861 store <32 x i64> %ext, ptr addrspace(1) %out 8862 ret void 8863} 8864 8865; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i64: 8866; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 8867; %load = load <64 x i8>, ptr addrspace(4) %in 8868; %ext = zext <64 x i8> %load to <64 x i64> 8869; store <64 x i64> %ext, ptr addrspace(1) %out 8870; ret void 8871; } 8872 8873; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i64: 8874; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 8875; %load = load <64 x i8>, ptr addrspace(4) %in 8876; %ext = sext <64 x i8> %load to <64 x i64> 8877; store <64 x i64> %ext, ptr addrspace(1) %out 8878; ret void 8879; } 8880 8881define amdgpu_kernel void @constant_zextload_i8_to_i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 8882; GFX6-NOHSA-LABEL: constant_zextload_i8_to_i16: 8883; GFX6-NOHSA: ; %bb.0: 8884; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 8885; GFX6-NOHSA-NEXT: s_mov_b32 s7, 0xf000 8886; GFX6-NOHSA-NEXT: s_mov_b32 s6, -1 8887; GFX6-NOHSA-NEXT: s_mov_b32 s10, s6 8888; GFX6-NOHSA-NEXT: s_mov_b32 s11, s7 8889; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 8890; GFX6-NOHSA-NEXT: s_mov_b32 s8, s2 8891; GFX6-NOHSA-NEXT: s_mov_b32 s9, s3 8892; GFX6-NOHSA-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 8893; GFX6-NOHSA-NEXT: s_mov_b32 s4, s0 8894; GFX6-NOHSA-NEXT: s_mov_b32 s5, s1 8895; GFX6-NOHSA-NEXT: s_waitcnt vmcnt(0) 8896; GFX6-NOHSA-NEXT: buffer_store_short v0, off, s[4:7], 0 8897; GFX6-NOHSA-NEXT: s_endpgm 8898; 8899; GFX7-HSA-LABEL: constant_zextload_i8_to_i16: 8900; GFX7-HSA: ; %bb.0: 8901; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 8902; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 8903; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 8904; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 8905; GFX7-HSA-NEXT: flat_load_ubyte v2, v[0:1] 8906; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 8907; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1 8908; GFX7-HSA-NEXT: s_waitcnt vmcnt(0) 8909; GFX7-HSA-NEXT: flat_store_short v[0:1], v2 8910; GFX7-HSA-NEXT: s_endpgm 8911; 8912; GFX8-NOHSA-LABEL: constant_zextload_i8_to_i16: 8913; GFX8-NOHSA: ; %bb.0: 8914; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 8915; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 8916; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 8917; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 8918; GFX8-NOHSA-NEXT: flat_load_ubyte v2, v[0:1] 8919; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 8920; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 8921; GFX8-NOHSA-NEXT: s_waitcnt vmcnt(0) 8922; GFX8-NOHSA-NEXT: flat_store_short v[0:1], v2 8923; GFX8-NOHSA-NEXT: s_endpgm 8924; 8925; EG-LABEL: constant_zextload_i8_to_i16: 8926; EG: ; %bb.0: 8927; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 8928; EG-NEXT: TEX 0 @6 8929; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[] 8930; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 8931; EG-NEXT: CF_END 8932; EG-NEXT: PAD 8933; EG-NEXT: Fetch clause starting at 6: 8934; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 8935; EG-NEXT: ALU clause starting at 8: 8936; EG-NEXT: MOV * T0.X, KC0[2].Z, 8937; EG-NEXT: ALU clause starting at 9: 8938; EG-NEXT: AND_INT * T0.W, KC0[2].Y, literal.x, 8939; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 8940; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 8941; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 8942; EG-NEXT: LSHL T0.X, T0.X, PV.W, 8943; EG-NEXT: LSHL * T0.W, literal.x, PV.W, 8944; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 8945; EG-NEXT: MOV T0.Y, 0.0, 8946; EG-NEXT: MOV * T0.Z, 0.0, 8947; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 8948; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 8949; 8950; GFX12-LABEL: constant_zextload_i8_to_i16: 8951; GFX12: ; %bb.0: 8952; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 8953; GFX12-NEXT: v_mov_b32_e32 v0, 0 8954; GFX12-NEXT: s_wait_kmcnt 0x0 8955; GFX12-NEXT: global_load_u8 v1, v0, s[2:3] 8956; GFX12-NEXT: s_wait_loadcnt 0x0 8957; GFX12-NEXT: global_store_b16 v0, v1, s[0:1] 8958; GFX12-NEXT: s_endpgm 8959 %a = load i8, ptr addrspace(4) %in 8960 %ext = zext i8 %a to i16 8961 store i16 %ext, ptr addrspace(1) %out 8962 ret void 8963} 8964 8965define amdgpu_kernel void @constant_sextload_i8_to_i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 8966; GFX6-NOHSA-LABEL: constant_sextload_i8_to_i16: 8967; GFX6-NOHSA: ; %bb.0: 8968; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 8969; GFX6-NOHSA-NEXT: s_mov_b32 s7, 0xf000 8970; GFX6-NOHSA-NEXT: s_mov_b32 s6, -1 8971; GFX6-NOHSA-NEXT: s_mov_b32 s10, s6 8972; GFX6-NOHSA-NEXT: s_mov_b32 s11, s7 8973; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 8974; GFX6-NOHSA-NEXT: s_mov_b32 s8, s2 8975; GFX6-NOHSA-NEXT: s_mov_b32 s9, s3 8976; GFX6-NOHSA-NEXT: buffer_load_sbyte v0, off, s[8:11], 0 8977; GFX6-NOHSA-NEXT: s_mov_b32 s4, s0 8978; GFX6-NOHSA-NEXT: s_mov_b32 s5, s1 8979; GFX6-NOHSA-NEXT: s_waitcnt vmcnt(0) 8980; GFX6-NOHSA-NEXT: buffer_store_short v0, off, s[4:7], 0 8981; GFX6-NOHSA-NEXT: s_endpgm 8982; 8983; GFX7-HSA-LABEL: constant_sextload_i8_to_i16: 8984; GFX7-HSA: ; %bb.0: 8985; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 8986; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 8987; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 8988; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 8989; GFX7-HSA-NEXT: flat_load_sbyte v2, v[0:1] 8990; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 8991; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1 8992; GFX7-HSA-NEXT: s_waitcnt vmcnt(0) 8993; GFX7-HSA-NEXT: flat_store_short v[0:1], v2 8994; GFX7-HSA-NEXT: s_endpgm 8995; 8996; GFX8-NOHSA-LABEL: constant_sextload_i8_to_i16: 8997; GFX8-NOHSA: ; %bb.0: 8998; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 8999; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9000; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 9001; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 9002; GFX8-NOHSA-NEXT: flat_load_sbyte v2, v[0:1] 9003; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 9004; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 9005; GFX8-NOHSA-NEXT: s_waitcnt vmcnt(0) 9006; GFX8-NOHSA-NEXT: flat_store_short v[0:1], v2 9007; GFX8-NOHSA-NEXT: s_endpgm 9008; 9009; EG-LABEL: constant_sextload_i8_to_i16: 9010; EG: ; %bb.0: 9011; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 9012; EG-NEXT: TEX 0 @6 9013; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[] 9014; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 9015; EG-NEXT: CF_END 9016; EG-NEXT: PAD 9017; EG-NEXT: Fetch clause starting at 6: 9018; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 9019; EG-NEXT: ALU clause starting at 8: 9020; EG-NEXT: MOV * T0.X, KC0[2].Z, 9021; EG-NEXT: ALU clause starting at 9: 9022; EG-NEXT: BFE_INT T0.W, T0.X, 0.0, literal.x, 9023; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.y, 9024; EG-NEXT: 8(1.121039e-44), 3(4.203895e-45) 9025; EG-NEXT: AND_INT T0.W, PV.W, literal.x, 9026; EG-NEXT: LSHL * T1.W, PS, literal.y, 9027; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 9028; EG-NEXT: LSHL T0.X, PV.W, PS, 9029; EG-NEXT: LSHL * T0.W, literal.x, PS, 9030; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 9031; EG-NEXT: MOV T0.Y, 0.0, 9032; EG-NEXT: MOV * T0.Z, 0.0, 9033; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 9034; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 9035; 9036; GFX12-LABEL: constant_sextload_i8_to_i16: 9037; GFX12: ; %bb.0: 9038; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 9039; GFX12-NEXT: v_mov_b32_e32 v0, 0 9040; GFX12-NEXT: s_wait_kmcnt 0x0 9041; GFX12-NEXT: global_load_i8 v1, v0, s[2:3] 9042; GFX12-NEXT: s_wait_loadcnt 0x0 9043; GFX12-NEXT: global_store_b16 v0, v1, s[0:1] 9044; GFX12-NEXT: s_endpgm 9045 %a = load i8, ptr addrspace(4) %in 9046 %ext = sext i8 %a to i16 9047 store i16 %ext, ptr addrspace(1) %out 9048 ret void 9049} 9050 9051define amdgpu_kernel void @constant_zextload_v1i8_to_v1i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 9052; GFX6-NOHSA-LABEL: constant_zextload_v1i8_to_v1i16: 9053; GFX6-NOHSA: ; %bb.0: 9054; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 9055; GFX6-NOHSA-NEXT: s_mov_b32 s7, 0xf000 9056; GFX6-NOHSA-NEXT: s_mov_b32 s6, -1 9057; GFX6-NOHSA-NEXT: s_mov_b32 s10, s6 9058; GFX6-NOHSA-NEXT: s_mov_b32 s11, s7 9059; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9060; GFX6-NOHSA-NEXT: s_mov_b32 s8, s2 9061; GFX6-NOHSA-NEXT: s_mov_b32 s9, s3 9062; GFX6-NOHSA-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 9063; GFX6-NOHSA-NEXT: s_mov_b32 s4, s0 9064; GFX6-NOHSA-NEXT: s_mov_b32 s5, s1 9065; GFX6-NOHSA-NEXT: s_waitcnt vmcnt(0) 9066; GFX6-NOHSA-NEXT: buffer_store_short v0, off, s[4:7], 0 9067; GFX6-NOHSA-NEXT: s_endpgm 9068; 9069; GFX7-HSA-LABEL: constant_zextload_v1i8_to_v1i16: 9070; GFX7-HSA: ; %bb.0: 9071; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 9072; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 9073; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 9074; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 9075; GFX7-HSA-NEXT: flat_load_ubyte v2, v[0:1] 9076; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 9077; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1 9078; GFX7-HSA-NEXT: s_waitcnt vmcnt(0) 9079; GFX7-HSA-NEXT: flat_store_short v[0:1], v2 9080; GFX7-HSA-NEXT: s_endpgm 9081; 9082; GFX8-NOHSA-LABEL: constant_zextload_v1i8_to_v1i16: 9083; GFX8-NOHSA: ; %bb.0: 9084; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 9085; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9086; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 9087; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 9088; GFX8-NOHSA-NEXT: flat_load_ubyte v2, v[0:1] 9089; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 9090; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 9091; GFX8-NOHSA-NEXT: s_waitcnt vmcnt(0) 9092; GFX8-NOHSA-NEXT: flat_store_short v[0:1], v2 9093; GFX8-NOHSA-NEXT: s_endpgm 9094; 9095; EG-LABEL: constant_zextload_v1i8_to_v1i16: 9096; EG: ; %bb.0: 9097; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 9098; EG-NEXT: TEX 0 @6 9099; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[] 9100; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 9101; EG-NEXT: CF_END 9102; EG-NEXT: PAD 9103; EG-NEXT: Fetch clause starting at 6: 9104; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 9105; EG-NEXT: ALU clause starting at 8: 9106; EG-NEXT: MOV * T0.X, KC0[2].Z, 9107; EG-NEXT: ALU clause starting at 9: 9108; EG-NEXT: AND_INT * T0.W, KC0[2].Y, literal.x, 9109; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 9110; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 9111; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 9112; EG-NEXT: LSHL T0.X, T0.X, PV.W, 9113; EG-NEXT: LSHL * T0.W, literal.x, PV.W, 9114; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 9115; EG-NEXT: MOV T0.Y, 0.0, 9116; EG-NEXT: MOV * T0.Z, 0.0, 9117; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 9118; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 9119; 9120; GFX12-LABEL: constant_zextload_v1i8_to_v1i16: 9121; GFX12: ; %bb.0: 9122; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 9123; GFX12-NEXT: v_mov_b32_e32 v0, 0 9124; GFX12-NEXT: s_wait_kmcnt 0x0 9125; GFX12-NEXT: global_load_u8 v1, v0, s[2:3] 9126; GFX12-NEXT: s_wait_loadcnt 0x0 9127; GFX12-NEXT: global_store_b16 v0, v1, s[0:1] 9128; GFX12-NEXT: s_endpgm 9129 %load = load <1 x i8>, ptr addrspace(4) %in 9130 %ext = zext <1 x i8> %load to <1 x i16> 9131 store <1 x i16> %ext, ptr addrspace(1) %out 9132 ret void 9133} 9134 9135define amdgpu_kernel void @constant_sextload_v1i8_to_v1i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 9136; GFX6-NOHSA-LABEL: constant_sextload_v1i8_to_v1i16: 9137; GFX6-NOHSA: ; %bb.0: 9138; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 9139; GFX6-NOHSA-NEXT: s_mov_b32 s7, 0xf000 9140; GFX6-NOHSA-NEXT: s_mov_b32 s6, -1 9141; GFX6-NOHSA-NEXT: s_mov_b32 s10, s6 9142; GFX6-NOHSA-NEXT: s_mov_b32 s11, s7 9143; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9144; GFX6-NOHSA-NEXT: s_mov_b32 s8, s2 9145; GFX6-NOHSA-NEXT: s_mov_b32 s9, s3 9146; GFX6-NOHSA-NEXT: buffer_load_sbyte v0, off, s[8:11], 0 9147; GFX6-NOHSA-NEXT: s_mov_b32 s4, s0 9148; GFX6-NOHSA-NEXT: s_mov_b32 s5, s1 9149; GFX6-NOHSA-NEXT: s_waitcnt vmcnt(0) 9150; GFX6-NOHSA-NEXT: buffer_store_short v0, off, s[4:7], 0 9151; GFX6-NOHSA-NEXT: s_endpgm 9152; 9153; GFX7-HSA-LABEL: constant_sextload_v1i8_to_v1i16: 9154; GFX7-HSA: ; %bb.0: 9155; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 9156; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 9157; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 9158; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 9159; GFX7-HSA-NEXT: flat_load_sbyte v2, v[0:1] 9160; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 9161; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1 9162; GFX7-HSA-NEXT: s_waitcnt vmcnt(0) 9163; GFX7-HSA-NEXT: flat_store_short v[0:1], v2 9164; GFX7-HSA-NEXT: s_endpgm 9165; 9166; GFX8-NOHSA-LABEL: constant_sextload_v1i8_to_v1i16: 9167; GFX8-NOHSA: ; %bb.0: 9168; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 9169; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9170; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 9171; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 9172; GFX8-NOHSA-NEXT: flat_load_sbyte v2, v[0:1] 9173; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 9174; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 9175; GFX8-NOHSA-NEXT: s_waitcnt vmcnt(0) 9176; GFX8-NOHSA-NEXT: flat_store_short v[0:1], v2 9177; GFX8-NOHSA-NEXT: s_endpgm 9178; 9179; EG-LABEL: constant_sextload_v1i8_to_v1i16: 9180; EG: ; %bb.0: 9181; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 9182; EG-NEXT: TEX 0 @6 9183; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[] 9184; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 9185; EG-NEXT: CF_END 9186; EG-NEXT: PAD 9187; EG-NEXT: Fetch clause starting at 6: 9188; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 9189; EG-NEXT: ALU clause starting at 8: 9190; EG-NEXT: MOV * T0.X, KC0[2].Z, 9191; EG-NEXT: ALU clause starting at 9: 9192; EG-NEXT: BFE_INT T0.W, T0.X, 0.0, literal.x, 9193; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.y, 9194; EG-NEXT: 8(1.121039e-44), 3(4.203895e-45) 9195; EG-NEXT: AND_INT T0.W, PV.W, literal.x, 9196; EG-NEXT: LSHL * T1.W, PS, literal.y, 9197; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 9198; EG-NEXT: LSHL T0.X, PV.W, PS, 9199; EG-NEXT: LSHL * T0.W, literal.x, PS, 9200; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 9201; EG-NEXT: MOV T0.Y, 0.0, 9202; EG-NEXT: MOV * T0.Z, 0.0, 9203; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 9204; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 9205; 9206; GFX12-LABEL: constant_sextload_v1i8_to_v1i16: 9207; GFX12: ; %bb.0: 9208; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 9209; GFX12-NEXT: v_mov_b32_e32 v0, 0 9210; GFX12-NEXT: s_wait_kmcnt 0x0 9211; GFX12-NEXT: global_load_i8 v1, v0, s[2:3] 9212; GFX12-NEXT: s_wait_loadcnt 0x0 9213; GFX12-NEXT: global_store_b16 v0, v1, s[0:1] 9214; GFX12-NEXT: s_endpgm 9215 %load = load <1 x i8>, ptr addrspace(4) %in 9216 %ext = sext <1 x i8> %load to <1 x i16> 9217 store <1 x i16> %ext, ptr addrspace(1) %out 9218 ret void 9219} 9220 9221define amdgpu_kernel void @constant_zextload_v2i8_to_v2i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 9222; GFX6-NOHSA-LABEL: constant_zextload_v2i8_to_v2i16: 9223; GFX6-NOHSA: ; %bb.0: 9224; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 9225; GFX6-NOHSA-NEXT: s_mov_b32 s7, 0xf000 9226; GFX6-NOHSA-NEXT: s_mov_b32 s6, -1 9227; GFX6-NOHSA-NEXT: s_mov_b32 s10, s6 9228; GFX6-NOHSA-NEXT: s_mov_b32 s11, s7 9229; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9230; GFX6-NOHSA-NEXT: s_mov_b32 s8, s2 9231; GFX6-NOHSA-NEXT: s_mov_b32 s9, s3 9232; GFX6-NOHSA-NEXT: buffer_load_ushort v0, off, s[8:11], 0 9233; GFX6-NOHSA-NEXT: s_mov_b32 s4, s0 9234; GFX6-NOHSA-NEXT: s_mov_b32 s5, s1 9235; GFX6-NOHSA-NEXT: s_waitcnt vmcnt(0) 9236; GFX6-NOHSA-NEXT: v_lshlrev_b32_e32 v1, 8, v0 9237; GFX6-NOHSA-NEXT: v_or_b32_e32 v0, v0, v1 9238; GFX6-NOHSA-NEXT: v_and_b32_e32 v0, 0xff00ff, v0 9239; GFX6-NOHSA-NEXT: buffer_store_dword v0, off, s[4:7], 0 9240; GFX6-NOHSA-NEXT: s_endpgm 9241; 9242; GFX7-HSA-LABEL: constant_zextload_v2i8_to_v2i16: 9243; GFX7-HSA: ; %bb.0: 9244; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 9245; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 9246; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 9247; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 9248; GFX7-HSA-NEXT: flat_load_ushort v2, v[0:1] 9249; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 9250; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1 9251; GFX7-HSA-NEXT: s_waitcnt vmcnt(0) 9252; GFX7-HSA-NEXT: v_lshlrev_b32_e32 v3, 8, v2 9253; GFX7-HSA-NEXT: v_or_b32_e32 v2, v2, v3 9254; GFX7-HSA-NEXT: v_and_b32_e32 v2, 0xff00ff, v2 9255; GFX7-HSA-NEXT: flat_store_dword v[0:1], v2 9256; GFX7-HSA-NEXT: s_endpgm 9257; 9258; GFX8-NOHSA-LABEL: constant_zextload_v2i8_to_v2i16: 9259; GFX8-NOHSA: ; %bb.0: 9260; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 9261; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9262; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 9263; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 9264; GFX8-NOHSA-NEXT: flat_load_ushort v2, v[0:1] 9265; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 9266; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 9267; GFX8-NOHSA-NEXT: s_waitcnt vmcnt(0) 9268; GFX8-NOHSA-NEXT: v_lshlrev_b32_e32 v3, 8, v2 9269; GFX8-NOHSA-NEXT: v_and_b32_e32 v3, 0xff0000, v3 9270; GFX8-NOHSA-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 9271; GFX8-NOHSA-NEXT: flat_store_dword v[0:1], v2 9272; GFX8-NOHSA-NEXT: s_endpgm 9273; 9274; EG-LABEL: constant_zextload_v2i8_to_v2i16: 9275; EG: ; %bb.0: 9276; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 9277; EG-NEXT: TEX 0 @6 9278; EG-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[] 9279; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.X, T6.X, 1 9280; EG-NEXT: CF_END 9281; EG-NEXT: PAD 9282; EG-NEXT: Fetch clause starting at 6: 9283; EG-NEXT: VTX_READ_16 T5.X, T5.X, 0, #1 9284; EG-NEXT: ALU clause starting at 8: 9285; EG-NEXT: MOV * T5.X, KC0[2].Z, 9286; EG-NEXT: ALU clause starting at 9: 9287; EG-NEXT: LSHL * T0.W, T5.X, literal.x, 9288; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 9289; EG-NEXT: AND_INT T0.W, PV.W, literal.x, 9290; EG-NEXT: AND_INT * T1.W, T5.X, literal.y, 9291; EG-NEXT: 16711680(2.341805e-38), 255(3.573311e-43) 9292; EG-NEXT: OR_INT T5.X, PS, PV.W, 9293; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.x, 9294; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 9295; 9296; GFX12-LABEL: constant_zextload_v2i8_to_v2i16: 9297; GFX12: ; %bb.0: 9298; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 9299; GFX12-NEXT: v_mov_b32_e32 v0, 0 9300; GFX12-NEXT: s_wait_kmcnt 0x0 9301; GFX12-NEXT: global_load_u16 v1, v0, s[2:3] 9302; GFX12-NEXT: s_wait_loadcnt 0x0 9303; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v1 9304; GFX12-NEXT: v_and_b32_e32 v1, 0xff, v1 9305; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 9306; GFX12-NEXT: v_lshrrev_b32_e32 v2, 8, v2 9307; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v1 9308; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 9309; GFX12-NEXT: v_lshl_or_b32 v1, v2, 16, v1 9310; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] 9311; GFX12-NEXT: s_endpgm 9312 %load = load <2 x i8>, ptr addrspace(4) %in 9313 %ext = zext <2 x i8> %load to <2 x i16> 9314 store <2 x i16> %ext, ptr addrspace(1) %out 9315 ret void 9316} 9317 9318define amdgpu_kernel void @constant_sextload_v2i8_to_v2i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 9319; GFX6-NOHSA-LABEL: constant_sextload_v2i8_to_v2i16: 9320; GFX6-NOHSA: ; %bb.0: 9321; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 9322; GFX6-NOHSA-NEXT: s_mov_b32 s7, 0xf000 9323; GFX6-NOHSA-NEXT: s_mov_b32 s6, -1 9324; GFX6-NOHSA-NEXT: s_mov_b32 s10, s6 9325; GFX6-NOHSA-NEXT: s_mov_b32 s11, s7 9326; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9327; GFX6-NOHSA-NEXT: s_mov_b32 s8, s2 9328; GFX6-NOHSA-NEXT: s_mov_b32 s9, s3 9329; GFX6-NOHSA-NEXT: buffer_load_ushort v0, off, s[8:11], 0 9330; GFX6-NOHSA-NEXT: s_mov_b32 s4, s0 9331; GFX6-NOHSA-NEXT: s_mov_b32 s5, s1 9332; GFX6-NOHSA-NEXT: s_waitcnt vmcnt(0) 9333; GFX6-NOHSA-NEXT: v_bfe_i32 v1, v0, 8, 8 9334; GFX6-NOHSA-NEXT: v_bfe_i32 v0, v0, 0, 8 9335; GFX6-NOHSA-NEXT: v_lshlrev_b32_e32 v1, 16, v1 9336; GFX6-NOHSA-NEXT: v_and_b32_e32 v0, 0xffff, v0 9337; GFX6-NOHSA-NEXT: v_or_b32_e32 v0, v0, v1 9338; GFX6-NOHSA-NEXT: buffer_store_dword v0, off, s[4:7], 0 9339; GFX6-NOHSA-NEXT: s_endpgm 9340; 9341; GFX7-HSA-LABEL: constant_sextload_v2i8_to_v2i16: 9342; GFX7-HSA: ; %bb.0: 9343; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 9344; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 9345; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 9346; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 9347; GFX7-HSA-NEXT: flat_load_ushort v2, v[0:1] 9348; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 9349; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1 9350; GFX7-HSA-NEXT: s_waitcnt vmcnt(0) 9351; GFX7-HSA-NEXT: v_bfe_i32 v3, v2, 8, 8 9352; GFX7-HSA-NEXT: v_bfe_i32 v2, v2, 0, 8 9353; GFX7-HSA-NEXT: v_lshlrev_b32_e32 v3, 16, v3 9354; GFX7-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v2 9355; GFX7-HSA-NEXT: v_or_b32_e32 v2, v2, v3 9356; GFX7-HSA-NEXT: flat_store_dword v[0:1], v2 9357; GFX7-HSA-NEXT: s_endpgm 9358; 9359; GFX8-NOHSA-LABEL: constant_sextload_v2i8_to_v2i16: 9360; GFX8-NOHSA: ; %bb.0: 9361; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 9362; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, 0xffff 9363; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, 8 9364; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9365; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 9366; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 9367; GFX8-NOHSA-NEXT: flat_load_ushort v2, v[0:1] 9368; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 9369; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 9370; GFX8-NOHSA-NEXT: s_waitcnt vmcnt(0) 9371; GFX8-NOHSA-NEXT: v_and_b32_sdwa v3, v3, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 9372; GFX8-NOHSA-NEXT: v_lshlrev_b32_sdwa v2, v4, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 9373; GFX8-NOHSA-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 9374; GFX8-NOHSA-NEXT: v_or_b32_e32 v2, v3, v2 9375; GFX8-NOHSA-NEXT: flat_store_dword v[0:1], v2 9376; GFX8-NOHSA-NEXT: s_endpgm 9377; 9378; EG-LABEL: constant_sextload_v2i8_to_v2i16: 9379; EG: ; %bb.0: 9380; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[] 9381; EG-NEXT: TEX 0 @6 9382; EG-NEXT: ALU 16, @10, KC0[CB0:0-32], KC1[] 9383; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.X, T6.X, 1 9384; EG-NEXT: CF_END 9385; EG-NEXT: PAD 9386; EG-NEXT: Fetch clause starting at 6: 9387; EG-NEXT: VTX_READ_16 T5.X, T5.X, 0, #1 9388; EG-NEXT: ALU clause starting at 8: 9389; EG-NEXT: MOV * T0.Y, T2.X, 9390; EG-NEXT: MOV * T5.X, KC0[2].Z, 9391; EG-NEXT: ALU clause starting at 10: 9392; EG-NEXT: AND_INT T0.W, T5.X, literal.x, 9393; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y, 9394; EG-NEXT: 65535(9.183409e-41), -65536(nan) 9395; EG-NEXT: OR_INT * T0.W, PS, PV.W, 9396; EG-NEXT: MOV * T2.X, PV.W, 9397; EG-NEXT: MOV * T0.Y, PV.X, 9398; EG-NEXT: LSHR * T1.W, PV.Y, literal.x, 9399; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 9400; EG-NEXT: BFE_INT T0.Z, T0.W, 0.0, literal.x, 9401; EG-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x, 9402; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 9403; EG-NEXT: LSHL T0.W, PV.W, literal.x, 9404; EG-NEXT: AND_INT * T1.W, PV.Z, literal.y, 9405; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41) 9406; EG-NEXT: OR_INT T5.X, PS, PV.W, 9407; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.x, 9408; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 9409; 9410; GFX12-LABEL: constant_sextload_v2i8_to_v2i16: 9411; GFX12: ; %bb.0: 9412; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 9413; GFX12-NEXT: v_mov_b32_e32 v0, 0 9414; GFX12-NEXT: s_wait_kmcnt 0x0 9415; GFX12-NEXT: global_load_u16 v1, v0, s[2:3] 9416; GFX12-NEXT: s_wait_loadcnt 0x0 9417; GFX12-NEXT: v_bfe_i32 v2, v1, 0, 16 9418; GFX12-NEXT: v_bfe_i32 v1, v1, 0, 8 9419; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 9420; GFX12-NEXT: v_lshrrev_b32_e32 v2, 8, v2 9421; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v1 9422; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 9423; GFX12-NEXT: v_lshl_or_b32 v1, v2, 16, v1 9424; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] 9425; GFX12-NEXT: s_endpgm 9426 %load = load <2 x i8>, ptr addrspace(4) %in 9427 %ext = sext <2 x i8> %load to <2 x i16> 9428 store <2 x i16> %ext, ptr addrspace(1) %out 9429 ret void 9430} 9431 9432define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 9433; GFX6-NOHSA-LABEL: constant_zextload_v4i8_to_v4i16: 9434; GFX6-NOHSA: ; %bb.0: 9435; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 9436; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9437; GFX6-NOHSA-NEXT: s_load_dword s4, s[2:3], 0x0 9438; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 9439; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 9440; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9441; GFX6-NOHSA-NEXT: s_and_b32 s5, s4, 0xff00 9442; GFX6-NOHSA-NEXT: s_lshr_b32 s6, s4, 24 9443; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 9444; GFX6-NOHSA-NEXT: s_and_b32 s4, s4, 0xff 9445; GFX6-NOHSA-NEXT: v_alignbit_b32 v0, s6, v0, 16 9446; GFX6-NOHSA-NEXT: s_lshl_b32 s5, s5, 8 9447; GFX6-NOHSA-NEXT: s_or_b32 s4, s4, s5 9448; GFX6-NOHSA-NEXT: v_and_b32_e32 v1, 0xff00ff, v0 9449; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 9450; GFX6-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 9451; GFX6-NOHSA-NEXT: s_endpgm 9452; 9453; GFX7-HSA-LABEL: constant_zextload_v4i8_to_v4i16: 9454; GFX7-HSA: ; %bb.0: 9455; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 9456; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 9457; GFX7-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 9458; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 9459; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1 9460; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 9461; GFX7-HSA-NEXT: s_and_b32 s0, s2, 0xff00 9462; GFX7-HSA-NEXT: s_lshr_b32 s1, s2, 24 9463; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s2 9464; GFX7-HSA-NEXT: s_and_b32 s2, s2, 0xff 9465; GFX7-HSA-NEXT: s_lshl_b32 s0, s0, 8 9466; GFX7-HSA-NEXT: v_alignbit_b32 v2, s1, v2, 16 9467; GFX7-HSA-NEXT: s_or_b32 s0, s2, s0 9468; GFX7-HSA-NEXT: v_and_b32_e32 v3, 0xff00ff, v2 9469; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s0 9470; GFX7-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 9471; GFX7-HSA-NEXT: s_endpgm 9472; 9473; GFX8-NOHSA-LABEL: constant_zextload_v4i8_to_v4i16: 9474; GFX8-NOHSA: ; %bb.0: 9475; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 9476; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9477; GFX8-NOHSA-NEXT: s_load_dword s2, s[2:3], 0x0 9478; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 9479; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 9480; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9481; GFX8-NOHSA-NEXT: s_lshr_b32 s0, s2, 24 9482; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s2 9483; GFX8-NOHSA-NEXT: s_and_b32 s1, s2, 0xff 9484; GFX8-NOHSA-NEXT: s_lshl_b32 s2, s2, 8 9485; GFX8-NOHSA-NEXT: v_alignbit_b32 v2, s0, v2, 16 9486; GFX8-NOHSA-NEXT: s_and_b32 s0, s2, 0xff0000 9487; GFX8-NOHSA-NEXT: s_or_b32 s0, s1, s0 9488; GFX8-NOHSA-NEXT: v_and_b32_e32 v3, 0xff00ff, v2 9489; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s0 9490; GFX8-NOHSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 9491; GFX8-NOHSA-NEXT: s_endpgm 9492; 9493; EG-LABEL: constant_zextload_v4i8_to_v4i16: 9494; EG: ; %bb.0: 9495; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[] 9496; EG-NEXT: TEX 0 @6 9497; EG-NEXT: ALU 31, @10, KC0[CB0:0-32], KC1[] 9498; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XY, T7.X, 1 9499; EG-NEXT: CF_END 9500; EG-NEXT: PAD 9501; EG-NEXT: Fetch clause starting at 6: 9502; EG-NEXT: VTX_READ_32 T7.X, T7.X, 0, #1 9503; EG-NEXT: ALU clause starting at 8: 9504; EG-NEXT: MOV * T0.Y, T4.X, 9505; EG-NEXT: MOV * T7.X, KC0[2].Z, 9506; EG-NEXT: ALU clause starting at 10: 9507; EG-NEXT: AND_INT T0.W, T7.X, literal.x, 9508; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y, 9509; EG-NEXT: 255(3.573311e-43), -65536(nan) 9510; EG-NEXT: OR_INT * T0.W, PS, PV.W, 9511; EG-NEXT: MOV * T4.X, PV.W, 9512; EG-NEXT: MOV T0.Y, PV.X, 9513; EG-NEXT: LSHL * T0.W, T7.X, literal.x, 9514; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 9515; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 9516; EG-NEXT: AND_INT * T0.W, PV.W, literal.y, 9517; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 9518; EG-NEXT: OR_INT * T0.W, PV.W, PS, 9519; EG-NEXT: MOV T4.X, PV.W, 9520; EG-NEXT: MOV T0.Y, T5.X, 9521; EG-NEXT: MOV * T0.W, literal.x, 9522; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 9523; EG-NEXT: BFE_UINT T0.W, T7.X, literal.x, PV.W, 9524; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 9525; EG-NEXT: 16(2.242078e-44), -65536(nan) 9526; EG-NEXT: OR_INT * T0.W, PS, PV.W, 9527; EG-NEXT: MOV * T5.X, PV.W, 9528; EG-NEXT: MOV T0.Y, PV.X, 9529; EG-NEXT: LSHR * T0.W, T7.X, literal.x, 9530; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 9531; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 9532; EG-NEXT: AND_INT * T0.W, PV.W, literal.y, 9533; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 9534; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x, 9535; EG-NEXT: OR_INT * T8.Y, PV.W, PS, 9536; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 9537; EG-NEXT: MOV T5.X, PV.Y, 9538; EG-NEXT: MOV * T8.X, T4.X, 9539; 9540; GFX12-LABEL: constant_zextload_v4i8_to_v4i16: 9541; GFX12: ; %bb.0: 9542; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 9543; GFX12-NEXT: s_wait_kmcnt 0x0 9544; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0 9545; GFX12-NEXT: s_wait_kmcnt 0x0 9546; GFX12-NEXT: s_bfe_u32 s3, s2, 0x80008 9547; GFX12-NEXT: s_lshr_b32 s4, s2, 24 9548; GFX12-NEXT: s_and_b32 s5, s2, 0xff 9549; GFX12-NEXT: s_bfe_u32 s2, s2, 0x80010 9550; GFX12-NEXT: s_pack_ll_b32_b16 s3, s5, s3 9551; GFX12-NEXT: s_pack_ll_b32_b16 s2, s2, s4 9552; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 9553; GFX12-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2 9554; GFX12-NEXT: v_mov_b32_e32 v0, s3 9555; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] 9556; GFX12-NEXT: s_endpgm 9557 %load = load <4 x i8>, ptr addrspace(4) %in 9558 %ext = zext <4 x i8> %load to <4 x i16> 9559 store <4 x i16> %ext, ptr addrspace(1) %out 9560 ret void 9561} 9562 9563define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 9564; GFX6-NOHSA-LABEL: constant_sextload_v4i8_to_v4i16: 9565; GFX6-NOHSA: ; %bb.0: 9566; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 9567; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9568; GFX6-NOHSA-NEXT: s_load_dword s2, s[2:3], 0x0 9569; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 9570; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9571; GFX6-NOHSA-NEXT: s_ashr_i32 s4, s2, 24 9572; GFX6-NOHSA-NEXT: s_bfe_i32 s5, s2, 0x80010 9573; GFX6-NOHSA-NEXT: s_bfe_i32 s6, s2, 0x80008 9574; GFX6-NOHSA-NEXT: s_sext_i32_i8 s2, s2 9575; GFX6-NOHSA-NEXT: s_lshl_b32 s4, s4, 16 9576; GFX6-NOHSA-NEXT: s_and_b32 s5, s5, 0xffff 9577; GFX6-NOHSA-NEXT: s_lshl_b32 s6, s6, 16 9578; GFX6-NOHSA-NEXT: s_and_b32 s2, s2, 0xffff 9579; GFX6-NOHSA-NEXT: s_or_b32 s4, s5, s4 9580; GFX6-NOHSA-NEXT: s_or_b32 s5, s2, s6 9581; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 9582; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s5 9583; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s4 9584; GFX6-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 9585; GFX6-NOHSA-NEXT: s_endpgm 9586; 9587; GFX7-HSA-LABEL: constant_sextload_v4i8_to_v4i16: 9588; GFX7-HSA: ; %bb.0: 9589; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 9590; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 9591; GFX7-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 9592; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 9593; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1 9594; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 9595; GFX7-HSA-NEXT: s_ashr_i32 s0, s2, 24 9596; GFX7-HSA-NEXT: s_bfe_i32 s1, s2, 0x80010 9597; GFX7-HSA-NEXT: s_bfe_i32 s3, s2, 0x80008 9598; GFX7-HSA-NEXT: s_sext_i32_i8 s2, s2 9599; GFX7-HSA-NEXT: s_lshl_b32 s0, s0, 16 9600; GFX7-HSA-NEXT: s_and_b32 s1, s1, 0xffff 9601; GFX7-HSA-NEXT: s_lshl_b32 s3, s3, 16 9602; GFX7-HSA-NEXT: s_and_b32 s2, s2, 0xffff 9603; GFX7-HSA-NEXT: s_or_b32 s0, s1, s0 9604; GFX7-HSA-NEXT: s_or_b32 s1, s2, s3 9605; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s1 9606; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s0 9607; GFX7-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 9608; GFX7-HSA-NEXT: s_endpgm 9609; 9610; GFX8-NOHSA-LABEL: constant_sextload_v4i8_to_v4i16: 9611; GFX8-NOHSA: ; %bb.0: 9612; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 9613; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9614; GFX8-NOHSA-NEXT: s_load_dword s2, s[2:3], 0x0 9615; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 9616; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 9617; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9618; GFX8-NOHSA-NEXT: s_lshr_b32 s0, s2, 16 9619; GFX8-NOHSA-NEXT: s_sext_i32_i16 s1, s2 9620; GFX8-NOHSA-NEXT: s_bfe_i32 s3, s2, 0x80000 9621; GFX8-NOHSA-NEXT: s_ashr_i32 s2, s2, 24 9622; GFX8-NOHSA-NEXT: s_lshl_b32 s1, s1, 8 9623; GFX8-NOHSA-NEXT: s_bfe_i32 s0, s0, 0x80000 9624; GFX8-NOHSA-NEXT: s_and_b32 s3, 0xffff, s3 9625; GFX8-NOHSA-NEXT: s_lshl_b32 s2, s2, 16 9626; GFX8-NOHSA-NEXT: s_and_b32 s1, s1, 0xffff0000 9627; GFX8-NOHSA-NEXT: s_and_b32 s0, 0xffff, s0 9628; GFX8-NOHSA-NEXT: s_or_b32 s1, s3, s1 9629; GFX8-NOHSA-NEXT: s_or_b32 s0, s0, s2 9630; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s1 9631; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s0 9632; GFX8-NOHSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 9633; GFX8-NOHSA-NEXT: s_endpgm 9634; 9635; EG-LABEL: constant_sextload_v4i8_to_v4i16: 9636; EG: ; %bb.0: 9637; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[] 9638; EG-NEXT: TEX 0 @6 9639; EG-NEXT: ALU 37, @10, KC0[CB0:0-32], KC1[] 9640; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XY, T7.X, 1 9641; EG-NEXT: CF_END 9642; EG-NEXT: PAD 9643; EG-NEXT: Fetch clause starting at 6: 9644; EG-NEXT: VTX_READ_32 T7.X, T7.X, 0, #1 9645; EG-NEXT: ALU clause starting at 8: 9646; EG-NEXT: MOV * T0.Y, T4.X, 9647; EG-NEXT: MOV * T7.X, KC0[2].Z, 9648; EG-NEXT: ALU clause starting at 10: 9649; EG-NEXT: BFE_INT * T0.W, T7.X, 0.0, literal.x, 9650; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 9651; EG-NEXT: AND_INT T0.W, PV.W, literal.x, 9652; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y, 9653; EG-NEXT: 65535(9.183409e-41), -65536(nan) 9654; EG-NEXT: OR_INT * T0.W, PS, PV.W, 9655; EG-NEXT: MOV * T4.X, PV.W, 9656; EG-NEXT: MOV T0.Y, PV.X, 9657; EG-NEXT: LSHR * T0.W, T7.X, literal.x, 9658; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 9659; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 9660; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 9661; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41) 9662; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 9663; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 9664; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 9665; EG-NEXT: MOV T4.X, PV.W, 9666; EG-NEXT: MOV T0.Y, T5.X, 9667; EG-NEXT: LSHR * T0.W, T7.X, literal.x, BS:VEC_120/SCL_212 9668; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 9669; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 9670; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 9671; EG-NEXT: 8(1.121039e-44), -65536(nan) 9672; EG-NEXT: AND_INT * T0.W, PV.W, literal.x, 9673; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 9674; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 9675; EG-NEXT: MOV * T5.X, PV.W, 9676; EG-NEXT: MOV T0.Y, PV.X, 9677; EG-NEXT: ASHR * T0.W, T7.X, literal.x, 9678; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 9679; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 9680; EG-NEXT: LSHL * T0.W, PV.W, literal.y, 9681; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 9682; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x, 9683; EG-NEXT: OR_INT * T8.Y, PV.W, PS, 9684; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 9685; EG-NEXT: MOV T5.X, PV.Y, 9686; EG-NEXT: MOV * T8.X, T4.X, 9687; 9688; GFX12-LABEL: constant_sextload_v4i8_to_v4i16: 9689; GFX12: ; %bb.0: 9690; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 9691; GFX12-NEXT: s_wait_kmcnt 0x0 9692; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0 9693; GFX12-NEXT: s_wait_kmcnt 0x0 9694; GFX12-NEXT: s_lshr_b32 s3, s2, 16 9695; GFX12-NEXT: s_sext_i32_i16 s5, s2 9696; GFX12-NEXT: s_ashr_i32 s4, s2, 24 9697; GFX12-NEXT: s_bfe_i32 s2, s2, 0x80000 9698; GFX12-NEXT: s_lshr_b32 s5, s5, 8 9699; GFX12-NEXT: s_bfe_i32 s3, s3, 0x80000 9700; GFX12-NEXT: s_pack_ll_b32_b16 s2, s2, s5 9701; GFX12-NEXT: s_pack_ll_b32_b16 s3, s3, s4 9702; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 9703; GFX12-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 9704; GFX12-NEXT: v_mov_b32_e32 v0, s2 9705; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] 9706; GFX12-NEXT: s_endpgm 9707 %load = load <4 x i8>, ptr addrspace(4) %in 9708 %ext = sext <4 x i8> %load to <4 x i16> 9709 store <4 x i16> %ext, ptr addrspace(1) %out 9710 ret void 9711} 9712 9713define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 9714; GFX6-NOHSA-LABEL: constant_zextload_v8i8_to_v8i16: 9715; GFX6-NOHSA: ; %bb.0: 9716; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 9717; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9718; GFX6-NOHSA-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 9719; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 9720; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 9721; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9722; GFX6-NOHSA-NEXT: s_and_b32 s6, s4, 0xff00 9723; GFX6-NOHSA-NEXT: s_lshr_b32 s7, s4, 24 9724; GFX6-NOHSA-NEXT: s_and_b32 s8, s5, 0xff00 9725; GFX6-NOHSA-NEXT: s_lshr_b32 s9, s5, 24 9726; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s5 9727; GFX6-NOHSA-NEXT: s_and_b32 s5, s5, 0xff 9728; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s4 9729; GFX6-NOHSA-NEXT: s_and_b32 s4, s4, 0xff 9730; GFX6-NOHSA-NEXT: v_alignbit_b32 v0, s9, v0, 16 9731; GFX6-NOHSA-NEXT: s_lshl_b32 s8, s8, 8 9732; GFX6-NOHSA-NEXT: v_alignbit_b32 v1, s7, v1, 16 9733; GFX6-NOHSA-NEXT: s_lshl_b32 s6, s6, 8 9734; GFX6-NOHSA-NEXT: v_and_b32_e32 v3, 0xff00ff, v0 9735; GFX6-NOHSA-NEXT: s_or_b32 s5, s5, s8 9736; GFX6-NOHSA-NEXT: s_or_b32 s4, s4, s6 9737; GFX6-NOHSA-NEXT: v_and_b32_e32 v1, 0xff00ff, v1 9738; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 9739; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s5 9740; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 9741; GFX6-NOHSA-NEXT: s_endpgm 9742; 9743; GFX7-HSA-LABEL: constant_zextload_v8i8_to_v8i16: 9744; GFX7-HSA: ; %bb.0: 9745; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 9746; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 9747; GFX7-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 9748; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 9749; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 9750; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 9751; GFX7-HSA-NEXT: s_lshr_b32 s5, s3, 24 9752; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s3 9753; GFX7-HSA-NEXT: v_alignbit_b32 v0, s5, v0, 16 9754; GFX7-HSA-NEXT: s_and_b32 s0, s2, 0xff00 9755; GFX7-HSA-NEXT: s_lshr_b32 s1, s2, 24 9756; GFX7-HSA-NEXT: s_and_b32 s4, s3, 0xff00 9757; GFX7-HSA-NEXT: v_and_b32_e32 v3, 0xff00ff, v0 9758; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 9759; GFX7-HSA-NEXT: s_and_b32 s3, s3, 0xff 9760; GFX7-HSA-NEXT: s_lshl_b32 s4, s4, 8 9761; GFX7-HSA-NEXT: v_alignbit_b32 v0, s1, v0, 16 9762; GFX7-HSA-NEXT: s_and_b32 s1, s2, 0xff 9763; GFX7-HSA-NEXT: s_lshl_b32 s0, s0, 8 9764; GFX7-HSA-NEXT: s_or_b32 s3, s3, s4 9765; GFX7-HSA-NEXT: s_or_b32 s0, s1, s0 9766; GFX7-HSA-NEXT: v_and_b32_e32 v1, 0xff00ff, v0 9767; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 9768; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s3 9769; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 9770; GFX7-HSA-NEXT: s_endpgm 9771; 9772; GFX8-NOHSA-LABEL: constant_zextload_v8i8_to_v8i16: 9773; GFX8-NOHSA: ; %bb.0: 9774; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 9775; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9776; GFX8-NOHSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 9777; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 9778; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 9779; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9780; GFX8-NOHSA-NEXT: s_lshr_b32 s0, s2, 24 9781; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 9782; GFX8-NOHSA-NEXT: s_lshr_b32 s1, s3, 24 9783; GFX8-NOHSA-NEXT: s_bfe_u32 s4, s3, 0x80010 9784; GFX8-NOHSA-NEXT: s_and_b32 s5, s3, 0xff 9785; GFX8-NOHSA-NEXT: s_lshl_b32 s3, s3, 8 9786; GFX8-NOHSA-NEXT: v_alignbit_b32 v0, s0, v0, 16 9787; GFX8-NOHSA-NEXT: s_and_b32 s0, s2, 0xff 9788; GFX8-NOHSA-NEXT: s_lshl_b32 s2, s2, 8 9789; GFX8-NOHSA-NEXT: s_lshl_b32 s1, s1, 16 9790; GFX8-NOHSA-NEXT: s_and_b32 s3, s3, 0xff0000 9791; GFX8-NOHSA-NEXT: s_and_b32 s2, s2, 0xff0000 9792; GFX8-NOHSA-NEXT: s_or_b32 s1, s4, s1 9793; GFX8-NOHSA-NEXT: s_or_b32 s3, s5, s3 9794; GFX8-NOHSA-NEXT: s_or_b32 s0, s0, s2 9795; GFX8-NOHSA-NEXT: v_and_b32_e32 v1, 0xff00ff, v0 9796; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 9797; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s3 9798; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s1 9799; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 9800; GFX8-NOHSA-NEXT: s_endpgm 9801; 9802; EG-LABEL: constant_zextload_v8i8_to_v8i16: 9803; EG: ; %bb.0: 9804; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[] 9805; EG-NEXT: TEX 0 @6 9806; EG-NEXT: ALU 61, @10, KC0[CB0:0-32], KC1[] 9807; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T11.X, 1 9808; EG-NEXT: CF_END 9809; EG-NEXT: PAD 9810; EG-NEXT: Fetch clause starting at 6: 9811; EG-NEXT: VTX_READ_64 T11.XY, T11.X, 0, #1 9812; EG-NEXT: ALU clause starting at 8: 9813; EG-NEXT: MOV * T0.Y, T8.X, 9814; EG-NEXT: MOV * T11.X, KC0[2].Z, 9815; EG-NEXT: ALU clause starting at 10: 9816; EG-NEXT: AND_INT T0.W, T11.X, literal.x, 9817; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y, 9818; EG-NEXT: 255(3.573311e-43), -65536(nan) 9819; EG-NEXT: OR_INT * T0.W, PS, PV.W, 9820; EG-NEXT: MOV * T8.X, PV.W, 9821; EG-NEXT: MOV T0.Y, PV.X, 9822; EG-NEXT: LSHL * T0.W, T11.X, literal.x, 9823; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 9824; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 9825; EG-NEXT: AND_INT * T0.W, PV.W, literal.y, 9826; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 9827; EG-NEXT: OR_INT * T0.W, PV.W, PS, 9828; EG-NEXT: MOV T8.X, PV.W, 9829; EG-NEXT: MOV T0.Y, T9.X, 9830; EG-NEXT: MOV * T0.W, literal.x, 9831; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 9832; EG-NEXT: BFE_UINT T1.W, T11.X, literal.x, PV.W, 9833; EG-NEXT: AND_INT * T2.W, PV.Y, literal.y, 9834; EG-NEXT: 16(2.242078e-44), -65536(nan) 9835; EG-NEXT: OR_INT * T1.W, PS, PV.W, 9836; EG-NEXT: MOV * T9.X, PV.W, 9837; EG-NEXT: MOV T0.Y, PV.X, 9838; EG-NEXT: LSHR * T1.W, T11.X, literal.x, 9839; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 9840; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 9841; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 9842; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 9843; EG-NEXT: OR_INT * T12.Y, PV.W, PS, 9844; EG-NEXT: MOV T9.X, PV.Y, 9845; EG-NEXT: MOV * T0.Y, T4.X, 9846; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 9847; EG-NEXT: AND_INT * T2.W, T11.Y, literal.y, 9848; EG-NEXT: -65536(nan), 255(3.573311e-43) 9849; EG-NEXT: OR_INT * T1.W, PV.W, PS, 9850; EG-NEXT: MOV * T4.X, PV.W, 9851; EG-NEXT: MOV T0.Y, PV.X, 9852; EG-NEXT: LSHL * T1.W, T11.Y, literal.x, 9853; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 9854; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 9855; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 9856; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 9857; EG-NEXT: OR_INT * T1.W, PV.W, PS, 9858; EG-NEXT: MOV T4.X, PV.W, 9859; EG-NEXT: MOV T0.Y, T5.X, 9860; EG-NEXT: BFE_UINT * T0.W, T11.Y, literal.x, T0.W, 9861; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 9862; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x, 9863; EG-NEXT: -65536(nan), 0(0.000000e+00) 9864; EG-NEXT: OR_INT * T0.W, PV.W, T0.W, 9865; EG-NEXT: MOV * T5.X, PV.W, 9866; EG-NEXT: MOV T0.Y, PV.X, 9867; EG-NEXT: LSHR * T0.W, T11.Y, literal.x, 9868; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 9869; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 9870; EG-NEXT: AND_INT * T0.W, PV.W, literal.y, 9871; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 9872; EG-NEXT: LSHR T11.X, KC0[2].Y, literal.x, 9873; EG-NEXT: OR_INT * T12.W, PV.W, PS, 9874; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 9875; EG-NEXT: MOV T5.X, PV.W, 9876; EG-NEXT: MOV * T12.X, T8.X, 9877; EG-NEXT: MOV * T12.Z, T4.X, 9878; 9879; GFX12-LABEL: constant_zextload_v8i8_to_v8i16: 9880; GFX12: ; %bb.0: 9881; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 9882; GFX12-NEXT: s_wait_kmcnt 0x0 9883; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 9884; GFX12-NEXT: s_wait_kmcnt 0x0 9885; GFX12-NEXT: s_bfe_u32 s4, s2, 0x80008 9886; GFX12-NEXT: s_lshr_b32 s5, s2, 24 9887; GFX12-NEXT: s_bfe_u32 s6, s3, 0x80008 9888; GFX12-NEXT: s_lshr_b32 s7, s3, 24 9889; GFX12-NEXT: s_bfe_u32 s8, s3, 0x80010 9890; GFX12-NEXT: s_and_b32 s3, s3, 0xff 9891; GFX12-NEXT: s_bfe_u32 s9, s2, 0x80010 9892; GFX12-NEXT: s_and_b32 s2, s2, 0xff 9893; GFX12-NEXT: s_pack_ll_b32_b16 s7, s8, s7 9894; GFX12-NEXT: s_pack_ll_b32_b16 s3, s3, s6 9895; GFX12-NEXT: s_pack_ll_b32_b16 s2, s2, s4 9896; GFX12-NEXT: s_pack_ll_b32_b16 s4, s9, s5 9897; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 9898; GFX12-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s4 9899; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, s7 9900; GFX12-NEXT: v_mov_b32_e32 v2, s3 9901; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1] 9902; GFX12-NEXT: s_endpgm 9903 %load = load <8 x i8>, ptr addrspace(4) %in 9904 %ext = zext <8 x i8> %load to <8 x i16> 9905 store <8 x i16> %ext, ptr addrspace(1) %out 9906 ret void 9907} 9908 9909define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 9910; GFX6-NOHSA-LABEL: constant_sextload_v8i8_to_v8i16: 9911; GFX6-NOHSA: ; %bb.0: 9912; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 9913; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9914; GFX6-NOHSA-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 9915; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 9916; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9917; GFX6-NOHSA-NEXT: s_ashr_i32 s2, s5, 24 9918; GFX6-NOHSA-NEXT: s_bfe_i32 s6, s5, 0x80010 9919; GFX6-NOHSA-NEXT: s_bfe_i32 s7, s5, 0x80008 9920; GFX6-NOHSA-NEXT: s_sext_i32_i8 s5, s5 9921; GFX6-NOHSA-NEXT: s_ashr_i32 s8, s4, 24 9922; GFX6-NOHSA-NEXT: s_bfe_i32 s9, s4, 0x80010 9923; GFX6-NOHSA-NEXT: s_bfe_i32 s10, s4, 0x80008 9924; GFX6-NOHSA-NEXT: s_sext_i32_i8 s4, s4 9925; GFX6-NOHSA-NEXT: s_lshl_b32 s2, s2, 16 9926; GFX6-NOHSA-NEXT: s_and_b32 s6, s6, 0xffff 9927; GFX6-NOHSA-NEXT: s_lshl_b32 s7, s7, 16 9928; GFX6-NOHSA-NEXT: s_and_b32 s5, s5, 0xffff 9929; GFX6-NOHSA-NEXT: s_lshl_b32 s8, s8, 16 9930; GFX6-NOHSA-NEXT: s_and_b32 s9, s9, 0xffff 9931; GFX6-NOHSA-NEXT: s_lshl_b32 s10, s10, 16 9932; GFX6-NOHSA-NEXT: s_and_b32 s4, s4, 0xffff 9933; GFX6-NOHSA-NEXT: s_or_b32 s6, s6, s2 9934; GFX6-NOHSA-NEXT: s_or_b32 s5, s5, s7 9935; GFX6-NOHSA-NEXT: s_or_b32 s7, s9, s8 9936; GFX6-NOHSA-NEXT: s_or_b32 s4, s4, s10 9937; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 9938; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 9939; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s7 9940; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s5 9941; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s6 9942; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 9943; GFX6-NOHSA-NEXT: s_endpgm 9944; 9945; GFX7-HSA-LABEL: constant_sextload_v8i8_to_v8i16: 9946; GFX7-HSA: ; %bb.0: 9947; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 9948; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 9949; GFX7-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 9950; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 9951; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 9952; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 9953; GFX7-HSA-NEXT: s_ashr_i32 s0, s3, 24 9954; GFX7-HSA-NEXT: s_bfe_i32 s1, s3, 0x80010 9955; GFX7-HSA-NEXT: s_bfe_i32 s4, s3, 0x80008 9956; GFX7-HSA-NEXT: s_sext_i32_i8 s3, s3 9957; GFX7-HSA-NEXT: s_lshl_b32 s0, s0, 16 9958; GFX7-HSA-NEXT: s_and_b32 s1, s1, 0xffff 9959; GFX7-HSA-NEXT: s_lshl_b32 s4, s4, 16 9960; GFX7-HSA-NEXT: s_and_b32 s3, s3, 0xffff 9961; GFX7-HSA-NEXT: s_or_b32 s0, s1, s0 9962; GFX7-HSA-NEXT: s_or_b32 s1, s3, s4 9963; GFX7-HSA-NEXT: s_ashr_i32 s3, s2, 24 9964; GFX7-HSA-NEXT: s_bfe_i32 s4, s2, 0x80010 9965; GFX7-HSA-NEXT: s_lshl_b32 s3, s3, 16 9966; GFX7-HSA-NEXT: s_and_b32 s4, s4, 0xffff 9967; GFX7-HSA-NEXT: s_or_b32 s3, s4, s3 9968; GFX7-HSA-NEXT: s_bfe_i32 s4, s2, 0x80008 9969; GFX7-HSA-NEXT: s_sext_i32_i8 s2, s2 9970; GFX7-HSA-NEXT: s_lshl_b32 s4, s4, 16 9971; GFX7-HSA-NEXT: s_and_b32 s2, s2, 0xffff 9972; GFX7-HSA-NEXT: s_or_b32 s2, s2, s4 9973; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 9974; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3 9975; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s1 9976; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s0 9977; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 9978; GFX7-HSA-NEXT: s_endpgm 9979; 9980; GFX8-NOHSA-LABEL: constant_sextload_v8i8_to_v8i16: 9981; GFX8-NOHSA: ; %bb.0: 9982; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 9983; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9984; GFX8-NOHSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 9985; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 9986; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 9987; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 9988; GFX8-NOHSA-NEXT: s_sext_i32_i16 s0, s3 9989; GFX8-NOHSA-NEXT: s_bfe_i32 s1, s3, 0x80000 9990; GFX8-NOHSA-NEXT: s_lshl_b32 s0, s0, 8 9991; GFX8-NOHSA-NEXT: s_and_b32 s1, 0xffff, s1 9992; GFX8-NOHSA-NEXT: s_and_b32 s0, s0, 0xffff0000 9993; GFX8-NOHSA-NEXT: s_or_b32 s7, s1, s0 9994; GFX8-NOHSA-NEXT: s_sext_i32_i16 s0, s2 9995; GFX8-NOHSA-NEXT: s_bfe_i32 s6, s2, 0x80000 9996; GFX8-NOHSA-NEXT: s_lshl_b32 s0, s0, 8 9997; GFX8-NOHSA-NEXT: s_and_b32 s6, 0xffff, s6 9998; GFX8-NOHSA-NEXT: s_and_b32 s0, s0, 0xffff0000 9999; GFX8-NOHSA-NEXT: s_lshr_b32 s5, s3, 16 10000; GFX8-NOHSA-NEXT: s_or_b32 s6, s6, s0 10001; GFX8-NOHSA-NEXT: s_ashr_i64 s[0:1], s[2:3], 56 10002; GFX8-NOHSA-NEXT: s_bfe_i32 s1, s5, 0x80000 10003; GFX8-NOHSA-NEXT: s_lshr_b32 s4, s2, 16 10004; GFX8-NOHSA-NEXT: s_lshl_b32 s0, s0, 16 10005; GFX8-NOHSA-NEXT: s_and_b32 s1, 0xffff, s1 10006; GFX8-NOHSA-NEXT: s_or_b32 s0, s1, s0 10007; GFX8-NOHSA-NEXT: s_ashr_i32 s1, s2, 24 10008; GFX8-NOHSA-NEXT: s_bfe_i32 s2, s4, 0x80000 10009; GFX8-NOHSA-NEXT: s_lshl_b32 s1, s1, 16 10010; GFX8-NOHSA-NEXT: s_and_b32 s2, 0xffff, s2 10011; GFX8-NOHSA-NEXT: s_or_b32 s1, s2, s1 10012; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s6 10013; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s1 10014; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s7 10015; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s0 10016; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 10017; GFX8-NOHSA-NEXT: s_endpgm 10018; 10019; EG-LABEL: constant_sextload_v8i8_to_v8i16: 10020; EG: ; %bb.0: 10021; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[] 10022; EG-NEXT: TEX 0 @6 10023; EG-NEXT: ALU 74, @10, KC0[CB0:0-32], KC1[] 10024; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T11.X, 1 10025; EG-NEXT: CF_END 10026; EG-NEXT: PAD 10027; EG-NEXT: Fetch clause starting at 6: 10028; EG-NEXT: VTX_READ_64 T11.XY, T11.X, 0, #1 10029; EG-NEXT: ALU clause starting at 8: 10030; EG-NEXT: MOV * T0.Y, T8.X, 10031; EG-NEXT: MOV * T11.X, KC0[2].Z, 10032; EG-NEXT: ALU clause starting at 10: 10033; EG-NEXT: BFE_INT * T0.W, T11.X, 0.0, literal.x, 10034; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 10035; EG-NEXT: AND_INT T0.W, PV.W, literal.x, 10036; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y, 10037; EG-NEXT: 65535(9.183409e-41), -65536(nan) 10038; EG-NEXT: OR_INT * T0.W, PS, PV.W, 10039; EG-NEXT: MOV * T8.X, PV.W, 10040; EG-NEXT: MOV T0.Y, PV.X, 10041; EG-NEXT: LSHR * T0.W, T11.X, literal.x, 10042; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 10043; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 10044; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 10045; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41) 10046; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 10047; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 10048; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 10049; EG-NEXT: MOV T8.X, PV.W, 10050; EG-NEXT: MOV T0.Y, T9.X, 10051; EG-NEXT: LSHR * T0.W, T11.X, literal.x, BS:VEC_120/SCL_212 10052; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 10053; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 10054; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 10055; EG-NEXT: 8(1.121039e-44), -65536(nan) 10056; EG-NEXT: AND_INT * T0.W, PV.W, literal.x, 10057; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 10058; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 10059; EG-NEXT: MOV * T9.X, PV.W, 10060; EG-NEXT: MOV T0.Y, PV.X, 10061; EG-NEXT: ASHR * T0.W, T11.X, literal.x, 10062; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 10063; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 10064; EG-NEXT: LSHL * T0.W, PV.W, literal.y, 10065; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 10066; EG-NEXT: OR_INT * T12.Y, PV.W, PS, 10067; EG-NEXT: MOV T9.X, PV.Y, 10068; EG-NEXT: MOV T0.Y, T4.X, 10069; EG-NEXT: BFE_INT * T0.W, T11.Y, 0.0, literal.x, 10070; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 10071; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 10072; EG-NEXT: AND_INT * T0.W, PV.W, literal.y, 10073; EG-NEXT: -65536(nan), 65535(9.183409e-41) 10074; EG-NEXT: OR_INT * T0.W, PV.W, PS, 10075; EG-NEXT: MOV * T4.X, PV.W, 10076; EG-NEXT: MOV T0.Y, PV.X, 10077; EG-NEXT: LSHR * T0.W, T11.Y, literal.x, 10078; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 10079; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 10080; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 10081; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41) 10082; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 10083; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 10084; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 10085; EG-NEXT: MOV T4.X, PV.W, 10086; EG-NEXT: MOV T0.Y, T5.X, 10087; EG-NEXT: LSHR * T0.W, T11.Y, literal.x, 10088; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 10089; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 10090; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 10091; EG-NEXT: 8(1.121039e-44), -65536(nan) 10092; EG-NEXT: AND_INT * T0.W, PV.W, literal.x, 10093; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 10094; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 10095; EG-NEXT: MOV * T5.X, PV.W, 10096; EG-NEXT: MOV T0.Y, PV.X, 10097; EG-NEXT: ASHR * T0.W, T11.Y, literal.x, 10098; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 10099; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 10100; EG-NEXT: LSHL * T0.W, PV.W, literal.y, 10101; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 10102; EG-NEXT: LSHR T11.X, KC0[2].Y, literal.x, 10103; EG-NEXT: OR_INT * T12.W, PV.W, PS, 10104; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 10105; EG-NEXT: MOV T5.X, PV.W, 10106; EG-NEXT: MOV * T12.X, T8.X, 10107; EG-NEXT: MOV * T12.Z, T4.X, 10108; 10109; GFX12-LABEL: constant_sextload_v8i8_to_v8i16: 10110; GFX12: ; %bb.0: 10111; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 10112; GFX12-NEXT: s_wait_kmcnt 0x0 10113; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 10114; GFX12-NEXT: s_wait_kmcnt 0x0 10115; GFX12-NEXT: s_ashr_i64 s[4:5], s[2:3], 56 10116; GFX12-NEXT: s_lshr_b32 s6, s2, 16 10117; GFX12-NEXT: s_lshr_b32 s7, s3, 16 10118; GFX12-NEXT: s_bfe_i32 s5, s3, 0x80000 10119; GFX12-NEXT: s_sext_i32_i16 s3, s3 10120; GFX12-NEXT: s_ashr_i32 s8, s2, 24 10121; GFX12-NEXT: s_bfe_i32 s9, s2, 0x80000 10122; GFX12-NEXT: s_sext_i32_i16 s2, s2 10123; GFX12-NEXT: s_bfe_i32 s7, s7, 0x80000 10124; GFX12-NEXT: s_lshr_b32 s3, s3, 8 10125; GFX12-NEXT: s_bfe_i32 s6, s6, 0x80000 10126; GFX12-NEXT: s_lshr_b32 s2, s2, 8 10127; GFX12-NEXT: s_pack_ll_b32_b16 s4, s7, s4 10128; GFX12-NEXT: s_pack_ll_b32_b16 s3, s5, s3 10129; GFX12-NEXT: s_pack_ll_b32_b16 s2, s9, s2 10130; GFX12-NEXT: s_pack_ll_b32_b16 s5, s6, s8 10131; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 10132; GFX12-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s5 10133; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, s4 10134; GFX12-NEXT: v_mov_b32_e32 v2, s3 10135; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1] 10136; GFX12-NEXT: s_endpgm 10137 %load = load <8 x i8>, ptr addrspace(4) %in 10138 %ext = sext <8 x i8> %load to <8 x i16> 10139 store <8 x i16> %ext, ptr addrspace(1) %out 10140 ret void 10141} 10142 10143define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 10144; GFX6-NOHSA-LABEL: constant_zextload_v16i8_to_v16i16: 10145; GFX6-NOHSA: ; %bb.0: 10146; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 10147; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 10148; GFX6-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 10149; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 10150; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 10151; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 10152; GFX6-NOHSA-NEXT: s_and_b32 s8, s6, 0xff00 10153; GFX6-NOHSA-NEXT: s_lshr_b32 s9, s6, 24 10154; GFX6-NOHSA-NEXT: s_and_b32 s10, s7, 0xff00 10155; GFX6-NOHSA-NEXT: s_lshr_b32 s11, s7, 24 10156; GFX6-NOHSA-NEXT: s_and_b32 s12, s4, 0xff00 10157; GFX6-NOHSA-NEXT: s_lshr_b32 s13, s4, 24 10158; GFX6-NOHSA-NEXT: s_and_b32 s14, s5, 0xff00 10159; GFX6-NOHSA-NEXT: s_lshr_b32 s15, s5, 24 10160; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s5 10161; GFX6-NOHSA-NEXT: s_and_b32 s5, s5, 0xff 10162; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s4 10163; GFX6-NOHSA-NEXT: s_and_b32 s4, s4, 0xff 10164; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s7 10165; GFX6-NOHSA-NEXT: s_and_b32 s7, s7, 0xff 10166; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s6 10167; GFX6-NOHSA-NEXT: s_and_b32 s6, s6, 0xff 10168; GFX6-NOHSA-NEXT: v_alignbit_b32 v0, s15, v0, 16 10169; GFX6-NOHSA-NEXT: s_lshl_b32 s14, s14, 8 10170; GFX6-NOHSA-NEXT: v_alignbit_b32 v1, s13, v1, 16 10171; GFX6-NOHSA-NEXT: s_lshl_b32 s12, s12, 8 10172; GFX6-NOHSA-NEXT: v_alignbit_b32 v2, s11, v2, 16 10173; GFX6-NOHSA-NEXT: s_lshl_b32 s10, s10, 8 10174; GFX6-NOHSA-NEXT: v_alignbit_b32 v4, s9, v3, 16 10175; GFX6-NOHSA-NEXT: s_lshl_b32 s8, s8, 8 10176; GFX6-NOHSA-NEXT: v_and_b32_e32 v3, 0xff00ff, v0 10177; GFX6-NOHSA-NEXT: s_or_b32 s5, s5, s14 10178; GFX6-NOHSA-NEXT: v_and_b32_e32 v1, 0xff00ff, v1 10179; GFX6-NOHSA-NEXT: s_or_b32 s4, s4, s12 10180; GFX6-NOHSA-NEXT: v_and_b32_e32 v7, 0xff00ff, v2 10181; GFX6-NOHSA-NEXT: s_or_b32 s7, s7, s10 10182; GFX6-NOHSA-NEXT: s_or_b32 s6, s6, s8 10183; GFX6-NOHSA-NEXT: v_and_b32_e32 v5, 0xff00ff, v4 10184; GFX6-NOHSA-NEXT: v_mov_b32_e32 v4, s6 10185; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s7 10186; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 10187; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 10188; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s5 10189; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 10190; GFX6-NOHSA-NEXT: s_endpgm 10191; 10192; GFX7-HSA-LABEL: constant_zextload_v16i8_to_v16i16: 10193; GFX7-HSA: ; %bb.0: 10194; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 10195; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 10196; GFX7-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 10197; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 10198; GFX7-HSA-NEXT: s_lshr_b32 s13, s5, 24 10199; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s5 10200; GFX7-HSA-NEXT: v_alignbit_b32 v0, s13, v0, 16 10201; GFX7-HSA-NEXT: s_lshr_b32 s11, s4, 24 10202; GFX7-HSA-NEXT: v_and_b32_e32 v3, 0xff00ff, v0 10203; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s4 10204; GFX7-HSA-NEXT: v_alignbit_b32 v0, s11, v0, 16 10205; GFX7-HSA-NEXT: s_lshr_b32 s9, s7, 24 10206; GFX7-HSA-NEXT: v_and_b32_e32 v1, 0xff00ff, v0 10207; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s7 10208; GFX7-HSA-NEXT: v_alignbit_b32 v0, s9, v0, 16 10209; GFX7-HSA-NEXT: s_and_b32 s2, s6, 0xff00 10210; GFX7-HSA-NEXT: s_lshr_b32 s3, s6, 24 10211; GFX7-HSA-NEXT: s_and_b32 s8, s7, 0xff00 10212; GFX7-HSA-NEXT: s_and_b32 s10, s4, 0xff00 10213; GFX7-HSA-NEXT: s_and_b32 s12, s5, 0xff00 10214; GFX7-HSA-NEXT: v_and_b32_e32 v7, 0xff00ff, v0 10215; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s6 10216; GFX7-HSA-NEXT: s_and_b32 s5, s5, 0xff 10217; GFX7-HSA-NEXT: s_lshl_b32 s12, s12, 8 10218; GFX7-HSA-NEXT: s_and_b32 s4, s4, 0xff 10219; GFX7-HSA-NEXT: s_lshl_b32 s10, s10, 8 10220; GFX7-HSA-NEXT: s_and_b32 s7, s7, 0xff 10221; GFX7-HSA-NEXT: s_lshl_b32 s8, s8, 8 10222; GFX7-HSA-NEXT: v_alignbit_b32 v0, s3, v0, 16 10223; GFX7-HSA-NEXT: s_and_b32 s3, s6, 0xff 10224; GFX7-HSA-NEXT: s_lshl_b32 s2, s2, 8 10225; GFX7-HSA-NEXT: s_or_b32 s5, s5, s12 10226; GFX7-HSA-NEXT: s_or_b32 s4, s4, s10 10227; GFX7-HSA-NEXT: s_or_b32 s7, s7, s8 10228; GFX7-HSA-NEXT: s_or_b32 s2, s3, s2 10229; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 10230; GFX7-HSA-NEXT: s_add_u32 s2, s0, 16 10231; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 10232; GFX7-HSA-NEXT: v_mov_b32_e32 v9, s3 10233; GFX7-HSA-NEXT: v_and_b32_e32 v5, 0xff00ff, v0 10234; GFX7-HSA-NEXT: v_mov_b32_e32 v6, s7 10235; GFX7-HSA-NEXT: v_mov_b32_e32 v8, s2 10236; GFX7-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 10237; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s4 10238; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 10239; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s5 10240; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 10241; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 10242; GFX7-HSA-NEXT: s_endpgm 10243; 10244; GFX8-NOHSA-LABEL: constant_zextload_v16i8_to_v16i16: 10245; GFX8-NOHSA: ; %bb.0: 10246; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 10247; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 10248; GFX8-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 10249; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 10250; GFX8-NOHSA-NEXT: s_lshr_b32 s3, s4, 24 10251; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s4 10252; GFX8-NOHSA-NEXT: v_alignbit_b32 v0, s3, v0, 16 10253; GFX8-NOHSA-NEXT: s_and_b32 s3, s4, 0xff 10254; GFX8-NOHSA-NEXT: s_lshl_b32 s4, s4, 8 10255; GFX8-NOHSA-NEXT: s_lshr_b32 s8, s5, 24 10256; GFX8-NOHSA-NEXT: s_and_b32 s4, s4, 0xff0000 10257; GFX8-NOHSA-NEXT: s_bfe_u32 s9, s5, 0x80010 10258; GFX8-NOHSA-NEXT: s_lshl_b32 s8, s8, 16 10259; GFX8-NOHSA-NEXT: s_or_b32 s4, s3, s4 10260; GFX8-NOHSA-NEXT: s_lshr_b32 s3, s7, 24 10261; GFX8-NOHSA-NEXT: s_lshr_b32 s2, s6, 24 10262; GFX8-NOHSA-NEXT: s_or_b32 s8, s9, s8 10263; GFX8-NOHSA-NEXT: v_and_b32_e32 v1, 0xff00ff, v0 10264; GFX8-NOHSA-NEXT: s_lshl_b32 s3, s3, 16 10265; GFX8-NOHSA-NEXT: s_bfe_u32 s9, s7, 0x80010 10266; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s6 10267; GFX8-NOHSA-NEXT: s_and_b32 s10, s5, 0xff 10268; GFX8-NOHSA-NEXT: s_lshl_b32 s5, s5, 8 10269; GFX8-NOHSA-NEXT: s_or_b32 s3, s9, s3 10270; GFX8-NOHSA-NEXT: s_and_b32 s9, s7, 0xff 10271; GFX8-NOHSA-NEXT: s_lshl_b32 s7, s7, 8 10272; GFX8-NOHSA-NEXT: v_alignbit_b32 v0, s2, v0, 16 10273; GFX8-NOHSA-NEXT: s_and_b32 s2, s6, 0xff 10274; GFX8-NOHSA-NEXT: s_lshl_b32 s6, s6, 8 10275; GFX8-NOHSA-NEXT: s_and_b32 s5, s5, 0xff0000 10276; GFX8-NOHSA-NEXT: s_and_b32 s7, s7, 0xff0000 10277; GFX8-NOHSA-NEXT: s_and_b32 s6, s6, 0xff0000 10278; GFX8-NOHSA-NEXT: s_or_b32 s5, s10, s5 10279; GFX8-NOHSA-NEXT: s_or_b32 s7, s9, s7 10280; GFX8-NOHSA-NEXT: s_or_b32 s2, s2, s6 10281; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s2 10282; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 16 10283; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 10284; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 10285; GFX8-NOHSA-NEXT: v_mov_b32_e32 v7, s3 10286; GFX8-NOHSA-NEXT: v_and_b32_e32 v3, 0xff00ff, v0 10287; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s7 10288; GFX8-NOHSA-NEXT: v_mov_b32_e32 v6, s2 10289; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[6:7], v[2:5] 10290; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s4 10291; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 10292; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s5 10293; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s8 10294; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 10295; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 10296; GFX8-NOHSA-NEXT: s_endpgm 10297; 10298; EG-LABEL: constant_zextload_v16i8_to_v16i16: 10299; EG: ; %bb.0: 10300; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[] 10301; EG-NEXT: TEX 0 @8 10302; EG-NEXT: ALU 103, @12, KC0[], KC1[] 10303; EG-NEXT: ALU 20, @116, KC0[CB0:0-32], KC1[] 10304; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T22.X, 0 10305; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T21.X, 1 10306; EG-NEXT: CF_END 10307; EG-NEXT: PAD 10308; EG-NEXT: Fetch clause starting at 8: 10309; EG-NEXT: VTX_READ_128 T19.XYZW, T19.X, 0, #1 10310; EG-NEXT: ALU clause starting at 10: 10311; EG-NEXT: MOV * T0.Y, T16.X, 10312; EG-NEXT: MOV * T19.X, KC0[2].Z, 10313; EG-NEXT: ALU clause starting at 12: 10314; EG-NEXT: AND_INT T0.W, T19.X, literal.x, 10315; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y, 10316; EG-NEXT: 255(3.573311e-43), -65536(nan) 10317; EG-NEXT: OR_INT * T0.W, PS, PV.W, 10318; EG-NEXT: MOV * T16.X, PV.W, 10319; EG-NEXT: MOV T0.Y, PV.X, 10320; EG-NEXT: LSHL * T0.W, T19.X, literal.x, 10321; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 10322; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 10323; EG-NEXT: AND_INT * T0.W, PV.W, literal.y, 10324; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 10325; EG-NEXT: OR_INT * T0.W, PV.W, PS, 10326; EG-NEXT: MOV T16.X, PV.W, 10327; EG-NEXT: MOV T0.Y, T17.X, 10328; EG-NEXT: MOV * T0.W, literal.x, 10329; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 10330; EG-NEXT: BFE_UINT T1.W, T19.X, literal.x, PV.W, 10331; EG-NEXT: AND_INT * T2.W, PV.Y, literal.y, 10332; EG-NEXT: 16(2.242078e-44), -65536(nan) 10333; EG-NEXT: OR_INT * T1.W, PS, PV.W, 10334; EG-NEXT: MOV * T17.X, PV.W, 10335; EG-NEXT: MOV T0.Y, PV.X, 10336; EG-NEXT: LSHR * T1.W, T19.X, literal.x, 10337; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 10338; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 10339; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 10340; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 10341; EG-NEXT: OR_INT * T20.Y, PV.W, PS, 10342; EG-NEXT: MOV T17.X, PV.Y, 10343; EG-NEXT: MOV * T0.Y, T12.X, 10344; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 10345; EG-NEXT: AND_INT * T2.W, T19.Y, literal.y, 10346; EG-NEXT: -65536(nan), 255(3.573311e-43) 10347; EG-NEXT: OR_INT * T1.W, PV.W, PS, 10348; EG-NEXT: MOV * T12.X, PV.W, 10349; EG-NEXT: MOV T0.Y, PV.X, 10350; EG-NEXT: LSHL * T1.W, T19.Y, literal.x, 10351; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 10352; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 10353; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 10354; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 10355; EG-NEXT: OR_INT * T1.W, PV.W, PS, 10356; EG-NEXT: MOV T12.X, PV.W, 10357; EG-NEXT: MOV T0.Y, T13.X, 10358; EG-NEXT: BFE_UINT * T1.W, T19.Y, literal.x, T0.W, 10359; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 10360; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x, 10361; EG-NEXT: -65536(nan), 0(0.000000e+00) 10362; EG-NEXT: OR_INT * T1.W, PV.W, T1.W, 10363; EG-NEXT: MOV * T13.X, PV.W, 10364; EG-NEXT: MOV T0.Y, PV.X, 10365; EG-NEXT: LSHR * T1.W, T19.Y, literal.x, 10366; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 10367; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 10368; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 10369; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 10370; EG-NEXT: OR_INT * T20.W, PV.W, PS, 10371; EG-NEXT: MOV T13.X, PV.W, 10372; EG-NEXT: MOV * T0.Y, T8.X, 10373; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 10374; EG-NEXT: AND_INT * T2.W, T19.Z, literal.y, 10375; EG-NEXT: -65536(nan), 255(3.573311e-43) 10376; EG-NEXT: OR_INT * T1.W, PV.W, PS, 10377; EG-NEXT: MOV * T8.X, PV.W, 10378; EG-NEXT: MOV T0.Y, PV.X, 10379; EG-NEXT: LSHL * T1.W, T19.Z, literal.x, 10380; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 10381; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 10382; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 10383; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 10384; EG-NEXT: OR_INT * T1.W, PV.W, PS, 10385; EG-NEXT: MOV T8.X, PV.W, 10386; EG-NEXT: MOV T0.Y, T9.X, 10387; EG-NEXT: BFE_UINT * T1.W, T19.Z, literal.x, T0.W, 10388; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 10389; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x, 10390; EG-NEXT: -65536(nan), 0(0.000000e+00) 10391; EG-NEXT: OR_INT * T1.W, PV.W, T1.W, 10392; EG-NEXT: MOV * T9.X, PV.W, 10393; EG-NEXT: MOV T0.Y, PV.X, 10394; EG-NEXT: LSHR * T1.W, T19.Z, literal.x, 10395; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 10396; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 10397; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 10398; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 10399; EG-NEXT: OR_INT * T19.Y, PV.W, PS, 10400; EG-NEXT: MOV T9.X, PV.Y, 10401; EG-NEXT: MOV * T0.Y, T4.X, 10402; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 10403; EG-NEXT: AND_INT * T2.W, T19.W, literal.y, 10404; EG-NEXT: -65536(nan), 255(3.573311e-43) 10405; EG-NEXT: OR_INT * T1.W, PV.W, PS, 10406; EG-NEXT: MOV * T4.X, PV.W, 10407; EG-NEXT: MOV T0.Y, PV.X, 10408; EG-NEXT: LSHL * T1.W, T19.W, literal.x, 10409; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 10410; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 10411; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 10412; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 10413; EG-NEXT: OR_INT * T1.W, PV.W, PS, 10414; EG-NEXT: MOV T4.X, PV.W, 10415; EG-NEXT: MOV T0.Y, T5.X, 10416; EG-NEXT: BFE_UINT * T0.W, T19.W, literal.x, T0.W, 10417; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 10418; EG-NEXT: ALU clause starting at 116: 10419; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x, 10420; EG-NEXT: -65536(nan), 0(0.000000e+00) 10421; EG-NEXT: OR_INT * T0.W, PV.W, T0.W, 10422; EG-NEXT: MOV * T5.X, PV.W, 10423; EG-NEXT: MOV T0.Y, PV.X, 10424; EG-NEXT: LSHR T0.W, T19.W, literal.x, 10425; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 10426; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44) 10427; EG-NEXT: LSHR T21.X, PS, literal.x, 10428; EG-NEXT: AND_INT T1.W, PV.Y, literal.y, 10429; EG-NEXT: AND_INT * T0.W, PV.W, literal.z, 10430; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41) 10431; EG-NEXT: 16711680(2.341805e-38), 0(0.000000e+00) 10432; EG-NEXT: LSHR T22.X, KC0[2].Y, literal.x, 10433; EG-NEXT: OR_INT * T19.W, PV.W, PS, 10434; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 10435; EG-NEXT: MOV T5.X, PV.W, 10436; EG-NEXT: MOV * T20.X, T16.X, 10437; EG-NEXT: MOV * T20.Z, T12.X, 10438; EG-NEXT: MOV T19.X, T8.X, 10439; EG-NEXT: MOV * T19.Z, T4.X, BS:VEC_120/SCL_212 10440; 10441; GFX12-LABEL: constant_zextload_v16i8_to_v16i16: 10442; GFX12: ; %bb.0: 10443; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 10444; GFX12-NEXT: s_wait_kmcnt 0x0 10445; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 10446; GFX12-NEXT: s_wait_kmcnt 0x0 10447; GFX12-NEXT: s_bfe_u32 s2, s6, 0x80008 10448; GFX12-NEXT: s_lshr_b32 s3, s6, 24 10449; GFX12-NEXT: s_bfe_u32 s8, s7, 0x80008 10450; GFX12-NEXT: s_lshr_b32 s9, s7, 24 10451; GFX12-NEXT: s_bfe_u32 s16, s7, 0x80010 10452; GFX12-NEXT: s_and_b32 s7, s7, 0xff 10453; GFX12-NEXT: s_bfe_u32 s17, s6, 0x80010 10454; GFX12-NEXT: s_and_b32 s6, s6, 0xff 10455; GFX12-NEXT: s_bfe_u32 s10, s4, 0x80008 10456; GFX12-NEXT: s_lshr_b32 s11, s4, 24 10457; GFX12-NEXT: s_bfe_u32 s12, s5, 0x80008 10458; GFX12-NEXT: s_lshr_b32 s13, s5, 24 10459; GFX12-NEXT: s_bfe_u32 s14, s5, 0x80010 10460; GFX12-NEXT: s_and_b32 s5, s5, 0xff 10461; GFX12-NEXT: s_bfe_u32 s15, s4, 0x80010 10462; GFX12-NEXT: s_and_b32 s4, s4, 0xff 10463; GFX12-NEXT: s_pack_ll_b32_b16 s9, s16, s9 10464; GFX12-NEXT: s_pack_ll_b32_b16 s7, s7, s8 10465; GFX12-NEXT: s_pack_ll_b32_b16 s2, s6, s2 10466; GFX12-NEXT: s_pack_ll_b32_b16 s3, s17, s3 10467; GFX12-NEXT: s_pack_ll_b32_b16 s13, s14, s13 10468; GFX12-NEXT: s_pack_ll_b32_b16 s5, s5, s12 10469; GFX12-NEXT: s_pack_ll_b32_b16 s11, s15, s11 10470; GFX12-NEXT: s_pack_ll_b32_b16 s4, s4, s10 10471; GFX12-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s3 10472; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, s9 10473; GFX12-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v5, s11 10474; GFX12-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v7, s13 10475; GFX12-NEXT: v_mov_b32_e32 v6, s5 10476; GFX12-NEXT: s_clause 0x1 10477; GFX12-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:16 10478; GFX12-NEXT: global_store_b128 v8, v[4:7], s[0:1] 10479; GFX12-NEXT: s_endpgm 10480 %load = load <16 x i8>, ptr addrspace(4) %in 10481 %ext = zext <16 x i8> %load to <16 x i16> 10482 store <16 x i16> %ext, ptr addrspace(1) %out 10483 ret void 10484} 10485 10486define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 10487; GFX6-NOHSA-LABEL: constant_sextload_v16i8_to_v16i16: 10488; GFX6-NOHSA: ; %bb.0: 10489; GFX6-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 10490; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 10491; GFX6-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 10492; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000 10493; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1 10494; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 10495; GFX6-NOHSA-NEXT: s_ashr_i32 s8, s5, 24 10496; GFX6-NOHSA-NEXT: s_bfe_i32 s9, s5, 0x80010 10497; GFX6-NOHSA-NEXT: s_bfe_i32 s10, s5, 0x80008 10498; GFX6-NOHSA-NEXT: s_sext_i32_i8 s5, s5 10499; GFX6-NOHSA-NEXT: s_ashr_i32 s11, s4, 24 10500; GFX6-NOHSA-NEXT: s_bfe_i32 s12, s4, 0x80010 10501; GFX6-NOHSA-NEXT: s_bfe_i32 s13, s4, 0x80008 10502; GFX6-NOHSA-NEXT: s_sext_i32_i8 s4, s4 10503; GFX6-NOHSA-NEXT: s_ashr_i32 s14, s7, 24 10504; GFX6-NOHSA-NEXT: s_bfe_i32 s15, s7, 0x80010 10505; GFX6-NOHSA-NEXT: s_bfe_i32 s16, s7, 0x80008 10506; GFX6-NOHSA-NEXT: s_sext_i32_i8 s7, s7 10507; GFX6-NOHSA-NEXT: s_ashr_i32 s17, s6, 24 10508; GFX6-NOHSA-NEXT: s_bfe_i32 s18, s6, 0x80010 10509; GFX6-NOHSA-NEXT: s_bfe_i32 s19, s6, 0x80008 10510; GFX6-NOHSA-NEXT: s_sext_i32_i8 s6, s6 10511; GFX6-NOHSA-NEXT: s_lshl_b32 s8, s8, 16 10512; GFX6-NOHSA-NEXT: s_and_b32 s9, s9, 0xffff 10513; GFX6-NOHSA-NEXT: s_lshl_b32 s10, s10, 16 10514; GFX6-NOHSA-NEXT: s_and_b32 s5, s5, 0xffff 10515; GFX6-NOHSA-NEXT: s_lshl_b32 s11, s11, 16 10516; GFX6-NOHSA-NEXT: s_and_b32 s12, s12, 0xffff 10517; GFX6-NOHSA-NEXT: s_lshl_b32 s13, s13, 16 10518; GFX6-NOHSA-NEXT: s_and_b32 s4, s4, 0xffff 10519; GFX6-NOHSA-NEXT: s_lshl_b32 s14, s14, 16 10520; GFX6-NOHSA-NEXT: s_and_b32 s15, s15, 0xffff 10521; GFX6-NOHSA-NEXT: s_lshl_b32 s16, s16, 16 10522; GFX6-NOHSA-NEXT: s_and_b32 s7, s7, 0xffff 10523; GFX6-NOHSA-NEXT: s_lshl_b32 s17, s17, 16 10524; GFX6-NOHSA-NEXT: s_and_b32 s18, s18, 0xffff 10525; GFX6-NOHSA-NEXT: s_lshl_b32 s19, s19, 16 10526; GFX6-NOHSA-NEXT: s_and_b32 s6, s6, 0xffff 10527; GFX6-NOHSA-NEXT: s_or_b32 s8, s9, s8 10528; GFX6-NOHSA-NEXT: s_or_b32 s5, s5, s10 10529; GFX6-NOHSA-NEXT: s_or_b32 s9, s12, s11 10530; GFX6-NOHSA-NEXT: s_or_b32 s10, s15, s14 10531; GFX6-NOHSA-NEXT: s_or_b32 s7, s7, s16 10532; GFX6-NOHSA-NEXT: s_or_b32 s11, s18, s17 10533; GFX6-NOHSA-NEXT: s_or_b32 s6, s6, s19 10534; GFX6-NOHSA-NEXT: s_or_b32 s4, s4, s13 10535; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s6 10536; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s11 10537; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s7 10538; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s10 10539; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 10540; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 10541; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 10542; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s9 10543; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s5 10544; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s8 10545; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 10546; GFX6-NOHSA-NEXT: s_endpgm 10547; 10548; GFX7-HSA-LABEL: constant_sextload_v16i8_to_v16i16: 10549; GFX7-HSA: ; %bb.0: 10550; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 10551; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 10552; GFX7-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 10553; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 10554; GFX7-HSA-NEXT: s_ashr_i32 s2, s5, 24 10555; GFX7-HSA-NEXT: s_bfe_i32 s3, s5, 0x80010 10556; GFX7-HSA-NEXT: s_lshl_b32 s2, s2, 16 10557; GFX7-HSA-NEXT: s_and_b32 s3, s3, 0xffff 10558; GFX7-HSA-NEXT: s_bfe_i32 s8, s5, 0x80008 10559; GFX7-HSA-NEXT: s_sext_i32_i8 s5, s5 10560; GFX7-HSA-NEXT: s_ashr_i32 s9, s4, 24 10561; GFX7-HSA-NEXT: s_or_b32 s10, s3, s2 10562; GFX7-HSA-NEXT: s_bfe_i32 s3, s4, 0x80010 10563; GFX7-HSA-NEXT: s_lshl_b32 s8, s8, 16 10564; GFX7-HSA-NEXT: s_and_b32 s5, s5, 0xffff 10565; GFX7-HSA-NEXT: s_lshl_b32 s2, s9, 16 10566; GFX7-HSA-NEXT: s_and_b32 s3, s3, 0xffff 10567; GFX7-HSA-NEXT: s_or_b32 s5, s5, s8 10568; GFX7-HSA-NEXT: s_or_b32 s8, s3, s2 10569; GFX7-HSA-NEXT: s_bfe_i32 s2, s4, 0x80008 10570; GFX7-HSA-NEXT: s_sext_i32_i8 s3, s4 10571; GFX7-HSA-NEXT: s_lshl_b32 s2, s2, 16 10572; GFX7-HSA-NEXT: s_and_b32 s3, s3, 0xffff 10573; GFX7-HSA-NEXT: s_or_b32 s4, s3, s2 10574; GFX7-HSA-NEXT: s_ashr_i32 s2, s7, 24 10575; GFX7-HSA-NEXT: s_bfe_i32 s3, s7, 0x80010 10576; GFX7-HSA-NEXT: s_lshl_b32 s2, s2, 16 10577; GFX7-HSA-NEXT: s_and_b32 s3, s3, 0xffff 10578; GFX7-HSA-NEXT: s_or_b32 s2, s3, s2 10579; GFX7-HSA-NEXT: s_bfe_i32 s3, s7, 0x80008 10580; GFX7-HSA-NEXT: s_sext_i32_i8 s7, s7 10581; GFX7-HSA-NEXT: s_lshl_b32 s3, s3, 16 10582; GFX7-HSA-NEXT: s_and_b32 s7, s7, 0xffff 10583; GFX7-HSA-NEXT: s_or_b32 s3, s7, s3 10584; GFX7-HSA-NEXT: s_ashr_i32 s7, s6, 24 10585; GFX7-HSA-NEXT: s_bfe_i32 s9, s6, 0x80010 10586; GFX7-HSA-NEXT: s_lshl_b32 s7, s7, 16 10587; GFX7-HSA-NEXT: s_and_b32 s9, s9, 0xffff 10588; GFX7-HSA-NEXT: s_or_b32 s7, s9, s7 10589; GFX7-HSA-NEXT: s_bfe_i32 s9, s6, 0x80008 10590; GFX7-HSA-NEXT: s_sext_i32_i8 s6, s6 10591; GFX7-HSA-NEXT: s_lshl_b32 s9, s9, 16 10592; GFX7-HSA-NEXT: s_and_b32 s6, s6, 0xffff 10593; GFX7-HSA-NEXT: s_or_b32 s6, s6, s9 10594; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s2 10595; GFX7-HSA-NEXT: s_add_u32 s2, s0, 16 10596; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s3 10597; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0 10598; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3 10599; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s6 10600; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s7 10601; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 10602; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 10603; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 10604; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s4 10605; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s8 10606; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s5 10607; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s10 10608; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 10609; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 10610; GFX7-HSA-NEXT: s_endpgm 10611; 10612; GFX8-NOHSA-LABEL: constant_sextload_v16i8_to_v16i16: 10613; GFX8-NOHSA: ; %bb.0: 10614; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 10615; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 10616; GFX8-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 10617; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 10618; GFX8-NOHSA-NEXT: s_lshr_b32 s3, s5, 16 10619; GFX8-NOHSA-NEXT: s_sext_i32_i16 s10, s5 10620; GFX8-NOHSA-NEXT: s_bfe_i32 s11, s5, 0x80000 10621; GFX8-NOHSA-NEXT: s_ashr_i32 s5, s5, 16 10622; GFX8-NOHSA-NEXT: s_lshl_b32 s5, s5, 8 10623; GFX8-NOHSA-NEXT: s_bfe_i32 s3, s3, 0x80000 10624; GFX8-NOHSA-NEXT: s_lshr_b32 s2, s4, 16 10625; GFX8-NOHSA-NEXT: s_and_b32 s5, s5, 0xffff0000 10626; GFX8-NOHSA-NEXT: s_and_b32 s3, 0xffff, s3 10627; GFX8-NOHSA-NEXT: s_sext_i32_i16 s12, s4 10628; GFX8-NOHSA-NEXT: s_lshl_b32 s10, s10, 8 10629; GFX8-NOHSA-NEXT: s_or_b32 s5, s3, s5 10630; GFX8-NOHSA-NEXT: s_ashr_i32 s3, s4, 24 10631; GFX8-NOHSA-NEXT: s_bfe_i32 s2, s2, 0x80000 10632; GFX8-NOHSA-NEXT: s_and_b32 s11, 0xffff, s11 10633; GFX8-NOHSA-NEXT: s_lshl_b32 s12, s12, 8 10634; GFX8-NOHSA-NEXT: s_and_b32 s10, s10, 0xffff0000 10635; GFX8-NOHSA-NEXT: s_lshl_b32 s3, s3, 16 10636; GFX8-NOHSA-NEXT: s_and_b32 s2, 0xffff, s2 10637; GFX8-NOHSA-NEXT: s_or_b32 s10, s11, s10 10638; GFX8-NOHSA-NEXT: s_and_b32 s11, s12, 0xffff0000 10639; GFX8-NOHSA-NEXT: s_bfe_i32 s12, s4, 0x80000 10640; GFX8-NOHSA-NEXT: s_or_b32 s4, s2, s3 10641; GFX8-NOHSA-NEXT: s_sext_i32_i16 s2, s7 10642; GFX8-NOHSA-NEXT: s_lshl_b32 s2, s2, 8 10643; GFX8-NOHSA-NEXT: s_bfe_i32 s3, s7, 0x80000 10644; GFX8-NOHSA-NEXT: s_and_b32 s12, 0xffff, s12 10645; GFX8-NOHSA-NEXT: s_and_b32 s2, s2, 0xffff0000 10646; GFX8-NOHSA-NEXT: s_and_b32 s3, 0xffff, s3 10647; GFX8-NOHSA-NEXT: s_or_b32 s11, s12, s11 10648; GFX8-NOHSA-NEXT: s_or_b32 s12, s3, s2 10649; GFX8-NOHSA-NEXT: s_sext_i32_i16 s2, s6 10650; GFX8-NOHSA-NEXT: s_lshl_b32 s2, s2, 8 10651; GFX8-NOHSA-NEXT: s_bfe_i32 s3, s6, 0x80000 10652; GFX8-NOHSA-NEXT: s_and_b32 s2, s2, 0xffff0000 10653; GFX8-NOHSA-NEXT: s_and_b32 s3, 0xffff, s3 10654; GFX8-NOHSA-NEXT: s_lshr_b32 s9, s7, 16 10655; GFX8-NOHSA-NEXT: s_or_b32 s13, s3, s2 10656; GFX8-NOHSA-NEXT: s_ashr_i64 s[2:3], s[6:7], 56 10657; GFX8-NOHSA-NEXT: s_bfe_i32 s3, s9, 0x80000 10658; GFX8-NOHSA-NEXT: s_lshr_b32 s8, s6, 16 10659; GFX8-NOHSA-NEXT: s_lshl_b32 s2, s2, 16 10660; GFX8-NOHSA-NEXT: s_and_b32 s3, 0xffff, s3 10661; GFX8-NOHSA-NEXT: s_or_b32 s2, s3, s2 10662; GFX8-NOHSA-NEXT: s_ashr_i32 s3, s6, 24 10663; GFX8-NOHSA-NEXT: s_bfe_i32 s6, s8, 0x80000 10664; GFX8-NOHSA-NEXT: s_lshl_b32 s3, s3, 16 10665; GFX8-NOHSA-NEXT: s_and_b32 s6, 0xffff, s6 10666; GFX8-NOHSA-NEXT: s_or_b32 s3, s6, s3 10667; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s2 10668; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 16 10669; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3 10670; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0 10671; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3 10672; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s13 10673; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s12 10674; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2 10675; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 10676; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 10677; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s11 10678; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s4 10679; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s10 10680; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s5 10681; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 10682; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 10683; GFX8-NOHSA-NEXT: s_endpgm 10684; 10685; EG-LABEL: constant_sextload_v16i8_to_v16i16: 10686; EG: ; %bb.0: 10687; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[] 10688; EG-NEXT: TEX 0 @8 10689; EG-NEXT: ALU 104, @12, KC0[], KC1[] 10690; EG-NEXT: ALU 46, @117, KC0[CB0:0-32], KC1[] 10691; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T22.X, 0 10692; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T21.X, 1 10693; EG-NEXT: CF_END 10694; EG-NEXT: PAD 10695; EG-NEXT: Fetch clause starting at 8: 10696; EG-NEXT: VTX_READ_128 T19.XYZW, T19.X, 0, #1 10697; EG-NEXT: ALU clause starting at 10: 10698; EG-NEXT: MOV * T0.Y, T16.X, 10699; EG-NEXT: MOV * T19.X, KC0[2].Z, 10700; EG-NEXT: ALU clause starting at 12: 10701; EG-NEXT: BFE_INT * T0.W, T19.X, 0.0, literal.x, 10702; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 10703; EG-NEXT: AND_INT T0.W, PV.W, literal.x, 10704; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y, 10705; EG-NEXT: 65535(9.183409e-41), -65536(nan) 10706; EG-NEXT: OR_INT * T0.W, PS, PV.W, 10707; EG-NEXT: MOV * T16.X, PV.W, 10708; EG-NEXT: MOV T0.Y, PV.X, 10709; EG-NEXT: LSHR * T0.W, T19.X, literal.x, 10710; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 10711; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 10712; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 10713; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41) 10714; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 10715; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 10716; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 10717; EG-NEXT: MOV T16.X, PV.W, 10718; EG-NEXT: MOV T0.Y, T17.X, 10719; EG-NEXT: LSHR * T0.W, T19.X, literal.x, BS:VEC_120/SCL_212 10720; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 10721; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 10722; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 10723; EG-NEXT: 8(1.121039e-44), -65536(nan) 10724; EG-NEXT: AND_INT * T0.W, PV.W, literal.x, 10725; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 10726; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 10727; EG-NEXT: MOV * T17.X, PV.W, 10728; EG-NEXT: MOV T0.Y, PV.X, 10729; EG-NEXT: ASHR * T0.W, T19.X, literal.x, 10730; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 10731; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 10732; EG-NEXT: LSHL * T0.W, PV.W, literal.y, 10733; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 10734; EG-NEXT: OR_INT * T20.Y, PV.W, PS, 10735; EG-NEXT: MOV T17.X, PV.Y, 10736; EG-NEXT: MOV T0.Y, T12.X, 10737; EG-NEXT: BFE_INT * T0.W, T19.Y, 0.0, literal.x, 10738; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 10739; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 10740; EG-NEXT: AND_INT * T0.W, PV.W, literal.y, 10741; EG-NEXT: -65536(nan), 65535(9.183409e-41) 10742; EG-NEXT: OR_INT * T0.W, PV.W, PS, 10743; EG-NEXT: MOV * T12.X, PV.W, 10744; EG-NEXT: MOV T0.Y, PV.X, 10745; EG-NEXT: LSHR * T0.W, T19.Y, literal.x, 10746; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 10747; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 10748; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 10749; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41) 10750; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 10751; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 10752; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 10753; EG-NEXT: MOV T12.X, PV.W, 10754; EG-NEXT: MOV T0.Y, T13.X, 10755; EG-NEXT: LSHR * T0.W, T19.Y, literal.x, 10756; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 10757; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 10758; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 10759; EG-NEXT: 8(1.121039e-44), -65536(nan) 10760; EG-NEXT: AND_INT * T0.W, PV.W, literal.x, 10761; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 10762; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 10763; EG-NEXT: MOV * T13.X, PV.W, 10764; EG-NEXT: MOV T0.Y, PV.X, 10765; EG-NEXT: ASHR * T0.W, T19.Y, literal.x, 10766; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 10767; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 10768; EG-NEXT: LSHL * T0.W, PV.W, literal.y, 10769; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 10770; EG-NEXT: OR_INT * T20.W, PV.W, PS, 10771; EG-NEXT: MOV T13.X, PV.W, 10772; EG-NEXT: MOV T0.Y, T8.X, 10773; EG-NEXT: BFE_INT * T0.W, T19.Z, 0.0, literal.x, 10774; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 10775; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 10776; EG-NEXT: AND_INT * T0.W, PV.W, literal.y, 10777; EG-NEXT: -65536(nan), 65535(9.183409e-41) 10778; EG-NEXT: OR_INT * T0.W, PV.W, PS, 10779; EG-NEXT: MOV * T8.X, PV.W, 10780; EG-NEXT: MOV T0.Y, PV.X, 10781; EG-NEXT: LSHR * T0.W, T19.Z, literal.x, 10782; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 10783; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 10784; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 10785; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41) 10786; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 10787; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 10788; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 10789; EG-NEXT: MOV T8.X, PV.W, 10790; EG-NEXT: MOV T0.Y, T9.X, 10791; EG-NEXT: LSHR * T0.W, T19.Z, literal.x, 10792; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 10793; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 10794; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 10795; EG-NEXT: 8(1.121039e-44), -65536(nan) 10796; EG-NEXT: AND_INT * T0.W, PV.W, literal.x, 10797; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 10798; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 10799; EG-NEXT: MOV * T9.X, PV.W, 10800; EG-NEXT: MOV T0.Y, PV.X, 10801; EG-NEXT: ASHR * T0.W, T19.Z, literal.x, 10802; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 10803; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 10804; EG-NEXT: LSHL * T0.W, PV.W, literal.y, 10805; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 10806; EG-NEXT: ALU clause starting at 117: 10807; EG-NEXT: OR_INT * T19.Y, T1.W, T0.W, 10808; EG-NEXT: MOV T9.X, PV.Y, 10809; EG-NEXT: MOV T0.Y, T4.X, 10810; EG-NEXT: BFE_INT * T0.W, T19.W, 0.0, literal.x, 10811; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 10812; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 10813; EG-NEXT: AND_INT * T0.W, PV.W, literal.y, 10814; EG-NEXT: -65536(nan), 65535(9.183409e-41) 10815; EG-NEXT: OR_INT * T0.W, PV.W, PS, 10816; EG-NEXT: MOV * T4.X, PV.W, 10817; EG-NEXT: MOV T0.Y, PV.X, 10818; EG-NEXT: LSHR * T0.W, T19.W, literal.x, 10819; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 10820; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 10821; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 10822; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41) 10823; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 10824; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 10825; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 10826; EG-NEXT: MOV T4.X, PV.W, 10827; EG-NEXT: MOV T0.Y, T5.X, 10828; EG-NEXT: LSHR * T0.W, T19.W, literal.x, 10829; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 10830; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 10831; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 10832; EG-NEXT: 8(1.121039e-44), -65536(nan) 10833; EG-NEXT: AND_INT * T0.W, PV.W, literal.x, 10834; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 10835; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 10836; EG-NEXT: MOV * T5.X, PV.W, 10837; EG-NEXT: MOV T0.Y, PV.X, 10838; EG-NEXT: ASHR T0.W, T19.W, literal.x, 10839; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 10840; EG-NEXT: 24(3.363116e-44), 16(2.242078e-44) 10841; EG-NEXT: LSHR T21.X, PS, literal.x, 10842; EG-NEXT: AND_INT T1.W, PV.Y, literal.y, 10843; EG-NEXT: LSHL * T0.W, PV.W, literal.z, 10844; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41) 10845; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 10846; EG-NEXT: LSHR T22.X, KC0[2].Y, literal.x, 10847; EG-NEXT: OR_INT * T19.W, PV.W, PS, 10848; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 10849; EG-NEXT: MOV T5.X, PV.W, 10850; EG-NEXT: MOV * T20.X, T16.X, 10851; EG-NEXT: MOV * T20.Z, T12.X, 10852; EG-NEXT: MOV T19.X, T8.X, 10853; EG-NEXT: MOV * T19.Z, T4.X, BS:VEC_120/SCL_212 10854; 10855; GFX12-LABEL: constant_sextload_v16i8_to_v16i16: 10856; GFX12: ; %bb.0: 10857; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 10858; GFX12-NEXT: s_wait_kmcnt 0x0 10859; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 10860; GFX12-NEXT: s_wait_kmcnt 0x0 10861; GFX12-NEXT: s_ashr_i64 s[2:3], s[6:7], 56 10862; GFX12-NEXT: s_lshr_b32 s8, s6, 16 10863; GFX12-NEXT: s_lshr_b32 s9, s7, 16 10864; GFX12-NEXT: s_bfe_i32 s3, s7, 0x80000 10865; GFX12-NEXT: s_sext_i32_i16 s7, s7 10866; GFX12-NEXT: s_ashr_i32 s16, s6, 24 10867; GFX12-NEXT: s_bfe_i32 s17, s6, 0x80000 10868; GFX12-NEXT: s_sext_i32_i16 s6, s6 10869; GFX12-NEXT: s_lshr_b32 s10, s4, 16 10870; GFX12-NEXT: s_lshr_b32 s11, s5, 16 10871; GFX12-NEXT: s_ashr_i32 s12, s5, 16 10872; GFX12-NEXT: s_bfe_i32 s13, s5, 0x80000 10873; GFX12-NEXT: s_sext_i32_i16 s5, s5 10874; GFX12-NEXT: s_ashr_i32 s14, s4, 24 10875; GFX12-NEXT: s_bfe_i32 s15, s4, 0x80000 10876; GFX12-NEXT: s_sext_i32_i16 s4, s4 10877; GFX12-NEXT: s_bfe_i32 s9, s9, 0x80000 10878; GFX12-NEXT: s_lshr_b32 s7, s7, 8 10879; GFX12-NEXT: s_bfe_i32 s8, s8, 0x80000 10880; GFX12-NEXT: s_lshr_b32 s6, s6, 8 10881; GFX12-NEXT: s_lshr_b32 s12, s12, 8 10882; GFX12-NEXT: s_bfe_i32 s11, s11, 0x80000 10883; GFX12-NEXT: s_lshr_b32 s5, s5, 8 10884; GFX12-NEXT: s_bfe_i32 s10, s10, 0x80000 10885; GFX12-NEXT: s_lshr_b32 s4, s4, 8 10886; GFX12-NEXT: s_pack_ll_b32_b16 s2, s9, s2 10887; GFX12-NEXT: s_pack_ll_b32_b16 s3, s3, s7 10888; GFX12-NEXT: s_pack_ll_b32_b16 s6, s17, s6 10889; GFX12-NEXT: s_pack_ll_b32_b16 s7, s8, s16 10890; GFX12-NEXT: s_pack_ll_b32_b16 s11, s11, s12 10891; GFX12-NEXT: s_pack_ll_b32_b16 s5, s13, s5 10892; GFX12-NEXT: s_pack_ll_b32_b16 s10, s10, s14 10893; GFX12-NEXT: s_pack_ll_b32_b16 s4, s15, s4 10894; GFX12-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s7 10895; GFX12-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v3, s2 10896; GFX12-NEXT: v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v5, s10 10897; GFX12-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v7, s11 10898; GFX12-NEXT: v_mov_b32_e32 v6, s5 10899; GFX12-NEXT: s_clause 0x1 10900; GFX12-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:16 10901; GFX12-NEXT: global_store_b128 v8, v[4:7], s[0:1] 10902; GFX12-NEXT: s_endpgm 10903 %load = load <16 x i8>, ptr addrspace(4) %in 10904 %ext = sext <16 x i8> %load to <16 x i16> 10905 store <16 x i16> %ext, ptr addrspace(1) %out 10906 ret void 10907} 10908 10909define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 10910; GFX6-NOHSA-LABEL: constant_zextload_v32i8_to_v32i16: 10911; GFX6-NOHSA: ; %bb.0: 10912; GFX6-NOHSA-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 10913; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 10914; GFX6-NOHSA-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 10915; GFX6-NOHSA-NEXT: s_mov_b32 s11, 0xf000 10916; GFX6-NOHSA-NEXT: s_mov_b32 s10, -1 10917; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 10918; GFX6-NOHSA-NEXT: s_and_b32 s12, s6, 0xff00 10919; GFX6-NOHSA-NEXT: s_lshr_b32 s13, s6, 24 10920; GFX6-NOHSA-NEXT: s_and_b32 s14, s7, 0xff00 10921; GFX6-NOHSA-NEXT: s_lshr_b32 s15, s7, 24 10922; GFX6-NOHSA-NEXT: s_and_b32 s16, s4, 0xff00 10923; GFX6-NOHSA-NEXT: s_lshr_b32 s17, s4, 24 10924; GFX6-NOHSA-NEXT: s_and_b32 s18, s5, 0xff00 10925; GFX6-NOHSA-NEXT: s_lshr_b32 s19, s5, 24 10926; GFX6-NOHSA-NEXT: s_and_b32 s20, s2, 0xff00 10927; GFX6-NOHSA-NEXT: s_lshr_b32 s21, s2, 24 10928; GFX6-NOHSA-NEXT: s_and_b32 s22, s3, 0xff00 10929; GFX6-NOHSA-NEXT: s_lshr_b32 s23, s3, 24 10930; GFX6-NOHSA-NEXT: s_and_b32 s24, s0, 0xff00 10931; GFX6-NOHSA-NEXT: s_lshr_b32 s25, s0, 24 10932; GFX6-NOHSA-NEXT: s_and_b32 s26, s1, 0xff00 10933; GFX6-NOHSA-NEXT: s_lshr_b32 s27, s1, 24 10934; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s1 10935; GFX6-NOHSA-NEXT: s_and_b32 s1, s1, 0xff 10936; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s0 10937; GFX6-NOHSA-NEXT: s_and_b32 s0, s0, 0xff 10938; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s3 10939; GFX6-NOHSA-NEXT: s_and_b32 s3, s3, 0xff 10940; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s2 10941; GFX6-NOHSA-NEXT: s_and_b32 s2, s2, 0xff 10942; GFX6-NOHSA-NEXT: v_mov_b32_e32 v4, s5 10943; GFX6-NOHSA-NEXT: s_and_b32 s5, s5, 0xff 10944; GFX6-NOHSA-NEXT: v_mov_b32_e32 v5, s4 10945; GFX6-NOHSA-NEXT: s_and_b32 s4, s4, 0xff 10946; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s7 10947; GFX6-NOHSA-NEXT: s_and_b32 s7, s7, 0xff 10948; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s6 10949; GFX6-NOHSA-NEXT: s_and_b32 s6, s6, 0xff 10950; GFX6-NOHSA-NEXT: v_alignbit_b32 v0, s27, v0, 16 10951; GFX6-NOHSA-NEXT: s_lshl_b32 s26, s26, 8 10952; GFX6-NOHSA-NEXT: v_alignbit_b32 v1, s25, v1, 16 10953; GFX6-NOHSA-NEXT: s_lshl_b32 s24, s24, 8 10954; GFX6-NOHSA-NEXT: v_alignbit_b32 v2, s23, v2, 16 10955; GFX6-NOHSA-NEXT: s_lshl_b32 s22, s22, 8 10956; GFX6-NOHSA-NEXT: v_alignbit_b32 v8, s21, v3, 16 10957; GFX6-NOHSA-NEXT: s_lshl_b32 s20, s20, 8 10958; GFX6-NOHSA-NEXT: v_alignbit_b32 v4, s19, v4, 16 10959; GFX6-NOHSA-NEXT: s_lshl_b32 s18, s18, 8 10960; GFX6-NOHSA-NEXT: v_alignbit_b32 v9, s17, v5, 16 10961; GFX6-NOHSA-NEXT: s_lshl_b32 s16, s16, 8 10962; GFX6-NOHSA-NEXT: v_alignbit_b32 v6, s15, v6, 16 10963; GFX6-NOHSA-NEXT: s_lshl_b32 s14, s14, 8 10964; GFX6-NOHSA-NEXT: v_alignbit_b32 v10, s13, v7, 16 10965; GFX6-NOHSA-NEXT: s_lshl_b32 s12, s12, 8 10966; GFX6-NOHSA-NEXT: v_and_b32_e32 v3, 0xff00ff, v0 10967; GFX6-NOHSA-NEXT: s_or_b32 s1, s1, s26 10968; GFX6-NOHSA-NEXT: v_and_b32_e32 v1, 0xff00ff, v1 10969; GFX6-NOHSA-NEXT: s_or_b32 s0, s0, s24 10970; GFX6-NOHSA-NEXT: v_and_b32_e32 v7, 0xff00ff, v2 10971; GFX6-NOHSA-NEXT: s_or_b32 s3, s3, s22 10972; GFX6-NOHSA-NEXT: v_and_b32_e32 v5, 0xff00ff, v8 10973; GFX6-NOHSA-NEXT: s_or_b32 s2, s2, s20 10974; GFX6-NOHSA-NEXT: v_and_b32_e32 v11, 0xff00ff, v4 10975; GFX6-NOHSA-NEXT: s_or_b32 s5, s5, s18 10976; GFX6-NOHSA-NEXT: v_and_b32_e32 v9, 0xff00ff, v9 10977; GFX6-NOHSA-NEXT: s_or_b32 s4, s4, s16 10978; GFX6-NOHSA-NEXT: v_and_b32_e32 v15, 0xff00ff, v6 10979; GFX6-NOHSA-NEXT: s_or_b32 s7, s7, s14 10980; GFX6-NOHSA-NEXT: s_or_b32 s6, s6, s12 10981; GFX6-NOHSA-NEXT: v_and_b32_e32 v13, 0xff00ff, v10 10982; GFX6-NOHSA-NEXT: v_mov_b32_e32 v12, s6 10983; GFX6-NOHSA-NEXT: v_mov_b32_e32 v14, s7 10984; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[12:15], off, s[8:11], 0 offset:48 10985; GFX6-NOHSA-NEXT: v_mov_b32_e32 v8, s4 10986; GFX6-NOHSA-NEXT: v_mov_b32_e32 v10, s5 10987; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[8:11], 0 offset:32 10988; GFX6-NOHSA-NEXT: v_mov_b32_e32 v4, s2 10989; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s3 10990; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[8:11], 0 offset:16 10991; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s0 10992; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s1 10993; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 10994; GFX6-NOHSA-NEXT: s_endpgm 10995; 10996; GFX7-HSA-LABEL: constant_zextload_v32i8_to_v32i16: 10997; GFX7-HSA: ; %bb.0: 10998; GFX7-HSA-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x0 10999; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 11000; GFX7-HSA-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 11001; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 11002; GFX7-HSA-NEXT: s_lshr_b32 s25, s1, 24 11003; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s1 11004; GFX7-HSA-NEXT: v_alignbit_b32 v0, s25, v0, 16 11005; GFX7-HSA-NEXT: s_lshr_b32 s23, s0, 24 11006; GFX7-HSA-NEXT: v_and_b32_e32 v3, 0xff00ff, v0 11007; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0 11008; GFX7-HSA-NEXT: v_alignbit_b32 v0, s23, v0, 16 11009; GFX7-HSA-NEXT: s_lshr_b32 s21, s3, 24 11010; GFX7-HSA-NEXT: v_and_b32_e32 v1, 0xff00ff, v0 11011; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s3 11012; GFX7-HSA-NEXT: v_alignbit_b32 v0, s21, v0, 16 11013; GFX7-HSA-NEXT: s_lshr_b32 s19, s2, 24 11014; GFX7-HSA-NEXT: s_and_b32 s24, s1, 0xff00 11015; GFX7-HSA-NEXT: v_and_b32_e32 v7, 0xff00ff, v0 11016; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 11017; GFX7-HSA-NEXT: s_and_b32 s22, s0, 0xff00 11018; GFX7-HSA-NEXT: s_and_b32 s1, s1, 0xff 11019; GFX7-HSA-NEXT: s_lshl_b32 s24, s24, 8 11020; GFX7-HSA-NEXT: v_alignbit_b32 v0, s19, v0, 16 11021; GFX7-HSA-NEXT: s_lshr_b32 s17, s5, 24 11022; GFX7-HSA-NEXT: s_and_b32 s20, s3, 0xff00 11023; GFX7-HSA-NEXT: s_or_b32 s24, s1, s24 11024; GFX7-HSA-NEXT: s_and_b32 s0, s0, 0xff 11025; GFX7-HSA-NEXT: s_lshl_b32 s1, s22, 8 11026; GFX7-HSA-NEXT: v_and_b32_e32 v5, 0xff00ff, v0 11027; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s5 11028; GFX7-HSA-NEXT: s_and_b32 s18, s2, 0xff00 11029; GFX7-HSA-NEXT: s_or_b32 s22, s0, s1 11030; GFX7-HSA-NEXT: s_and_b32 s0, s3, 0xff 11031; GFX7-HSA-NEXT: s_lshl_b32 s1, s20, 8 11032; GFX7-HSA-NEXT: v_alignbit_b32 v0, s17, v0, 16 11033; GFX7-HSA-NEXT: s_lshr_b32 s15, s4, 24 11034; GFX7-HSA-NEXT: s_and_b32 s16, s5, 0xff00 11035; GFX7-HSA-NEXT: s_or_b32 s3, s0, s1 11036; GFX7-HSA-NEXT: s_and_b32 s0, s2, 0xff 11037; GFX7-HSA-NEXT: s_lshl_b32 s1, s18, 8 11038; GFX7-HSA-NEXT: v_and_b32_e32 v11, 0xff00ff, v0 11039; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s4 11040; GFX7-HSA-NEXT: s_and_b32 s14, s4, 0xff00 11041; GFX7-HSA-NEXT: s_or_b32 s2, s0, s1 11042; GFX7-HSA-NEXT: s_and_b32 s0, s5, 0xff 11043; GFX7-HSA-NEXT: s_lshl_b32 s1, s16, 8 11044; GFX7-HSA-NEXT: v_alignbit_b32 v0, s15, v0, 16 11045; GFX7-HSA-NEXT: s_and_b32 s12, s7, 0xff00 11046; GFX7-HSA-NEXT: s_lshr_b32 s13, s7, 24 11047; GFX7-HSA-NEXT: s_or_b32 s5, s0, s1 11048; GFX7-HSA-NEXT: v_and_b32_e32 v9, 0xff00ff, v0 11049; GFX7-HSA-NEXT: s_and_b32 s0, s4, 0xff 11050; GFX7-HSA-NEXT: s_lshl_b32 s1, s14, 8 11051; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s7 11052; GFX7-HSA-NEXT: s_and_b32 s10, s6, 0xff00 11053; GFX7-HSA-NEXT: s_or_b32 s4, s0, s1 11054; GFX7-HSA-NEXT: v_alignbit_b32 v0, s13, v0, 16 11055; GFX7-HSA-NEXT: s_and_b32 s0, s7, 0xff 11056; GFX7-HSA-NEXT: s_lshl_b32 s1, s12, 8 11057; GFX7-HSA-NEXT: s_lshr_b32 s11, s6, 24 11058; GFX7-HSA-NEXT: v_and_b32_e32 v15, 0xff00ff, v0 11059; GFX7-HSA-NEXT: s_or_b32 s0, s0, s1 11060; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s6 11061; GFX7-HSA-NEXT: s_and_b32 s1, s6, 0xff 11062; GFX7-HSA-NEXT: s_lshl_b32 s6, s10, 8 11063; GFX7-HSA-NEXT: s_or_b32 s1, s1, s6 11064; GFX7-HSA-NEXT: v_mov_b32_e32 v14, s0 11065; GFX7-HSA-NEXT: s_add_u32 s0, s8, 48 11066; GFX7-HSA-NEXT: v_mov_b32_e32 v12, s1 11067; GFX7-HSA-NEXT: s_addc_u32 s1, s9, 0 11068; GFX7-HSA-NEXT: v_mov_b32_e32 v17, s1 11069; GFX7-HSA-NEXT: v_alignbit_b32 v0, s11, v0, 16 11070; GFX7-HSA-NEXT: v_mov_b32_e32 v16, s0 11071; GFX7-HSA-NEXT: s_add_u32 s0, s8, 32 11072; GFX7-HSA-NEXT: v_and_b32_e32 v13, 0xff00ff, v0 11073; GFX7-HSA-NEXT: s_addc_u32 s1, s9, 0 11074; GFX7-HSA-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 11075; GFX7-HSA-NEXT: v_mov_b32_e32 v8, s4 11076; GFX7-HSA-NEXT: v_mov_b32_e32 v13, s1 11077; GFX7-HSA-NEXT: v_mov_b32_e32 v12, s0 11078; GFX7-HSA-NEXT: s_add_u32 s0, s8, 16 11079; GFX7-HSA-NEXT: v_mov_b32_e32 v10, s5 11080; GFX7-HSA-NEXT: s_addc_u32 s1, s9, 0 11081; GFX7-HSA-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 11082; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2 11083; GFX7-HSA-NEXT: v_mov_b32_e32 v9, s1 11084; GFX7-HSA-NEXT: v_mov_b32_e32 v6, s3 11085; GFX7-HSA-NEXT: v_mov_b32_e32 v8, s0 11086; GFX7-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 11087; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s22 11088; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s8 11089; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s24 11090; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s9 11091; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 11092; GFX7-HSA-NEXT: s_endpgm 11093; 11094; GFX8-NOHSA-LABEL: constant_zextload_v32i8_to_v32i16: 11095; GFX8-NOHSA: ; %bb.0: 11096; GFX8-NOHSA-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 11097; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 11098; GFX8-NOHSA-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 11099; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 11100; GFX8-NOHSA-NEXT: s_lshr_b32 s14, s1, 24 11101; GFX8-NOHSA-NEXT: s_bfe_u32 s15, s1, 0x80010 11102; GFX8-NOHSA-NEXT: s_and_b32 s16, s1, 0xff 11103; GFX8-NOHSA-NEXT: s_lshl_b32 s1, s1, 8 11104; GFX8-NOHSA-NEXT: s_lshl_b32 s14, s14, 16 11105; GFX8-NOHSA-NEXT: s_and_b32 s1, s1, 0xff0000 11106; GFX8-NOHSA-NEXT: s_lshr_b32 s13, s0, 24 11107; GFX8-NOHSA-NEXT: s_or_b32 s14, s15, s14 11108; GFX8-NOHSA-NEXT: s_or_b32 s15, s16, s1 11109; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s0 11110; GFX8-NOHSA-NEXT: s_and_b32 s1, s0, 0xff 11111; GFX8-NOHSA-NEXT: s_lshl_b32 s0, s0, 8 11112; GFX8-NOHSA-NEXT: s_and_b32 s0, s0, 0xff0000 11113; GFX8-NOHSA-NEXT: v_alignbit_b32 v0, s13, v0, 16 11114; GFX8-NOHSA-NEXT: s_or_b32 s13, s1, s0 11115; GFX8-NOHSA-NEXT: s_lshr_b32 s0, s3, 24 11116; GFX8-NOHSA-NEXT: s_lshl_b32 s0, s0, 16 11117; GFX8-NOHSA-NEXT: s_bfe_u32 s1, s3, 0x80010 11118; GFX8-NOHSA-NEXT: s_or_b32 s16, s1, s0 11119; GFX8-NOHSA-NEXT: s_lshl_b32 s1, s3, 8 11120; GFX8-NOHSA-NEXT: s_and_b32 s0, s3, 0xff 11121; GFX8-NOHSA-NEXT: s_and_b32 s1, s1, 0xff0000 11122; GFX8-NOHSA-NEXT: s_or_b32 s3, s0, s1 11123; GFX8-NOHSA-NEXT: s_lshl_b32 s1, s2, 8 11124; GFX8-NOHSA-NEXT: s_and_b32 s0, s2, 0xff 11125; GFX8-NOHSA-NEXT: s_and_b32 s1, s1, 0xff0000 11126; GFX8-NOHSA-NEXT: s_lshr_b32 s12, s2, 24 11127; GFX8-NOHSA-NEXT: v_and_b32_e32 v1, 0xff00ff, v0 11128; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2 11129; GFX8-NOHSA-NEXT: s_or_b32 s2, s0, s1 11130; GFX8-NOHSA-NEXT: s_lshr_b32 s0, s5, 24 11131; GFX8-NOHSA-NEXT: s_lshl_b32 s0, s0, 16 11132; GFX8-NOHSA-NEXT: s_bfe_u32 s1, s5, 0x80010 11133; GFX8-NOHSA-NEXT: v_alignbit_b32 v0, s12, v0, 16 11134; GFX8-NOHSA-NEXT: s_or_b32 s12, s1, s0 11135; GFX8-NOHSA-NEXT: s_lshl_b32 s1, s5, 8 11136; GFX8-NOHSA-NEXT: s_and_b32 s0, s5, 0xff 11137; GFX8-NOHSA-NEXT: s_and_b32 s1, s1, 0xff0000 11138; GFX8-NOHSA-NEXT: s_or_b32 s5, s0, s1 11139; GFX8-NOHSA-NEXT: s_lshl_b32 s1, s4, 8 11140; GFX8-NOHSA-NEXT: s_and_b32 s0, s4, 0xff 11141; GFX8-NOHSA-NEXT: s_and_b32 s1, s1, 0xff0000 11142; GFX8-NOHSA-NEXT: s_lshr_b32 s11, s4, 24 11143; GFX8-NOHSA-NEXT: v_and_b32_e32 v3, 0xff00ff, v0 11144; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s4 11145; GFX8-NOHSA-NEXT: s_or_b32 s4, s0, s1 11146; GFX8-NOHSA-NEXT: s_lshr_b32 s0, s7, 24 11147; GFX8-NOHSA-NEXT: s_lshl_b32 s0, s0, 16 11148; GFX8-NOHSA-NEXT: s_bfe_u32 s1, s7, 0x80010 11149; GFX8-NOHSA-NEXT: s_or_b32 s0, s1, s0 11150; GFX8-NOHSA-NEXT: s_and_b32 s1, s7, 0xff 11151; GFX8-NOHSA-NEXT: s_lshl_b32 s7, s7, 8 11152; GFX8-NOHSA-NEXT: v_alignbit_b32 v0, s11, v0, 16 11153; GFX8-NOHSA-NEXT: s_and_b32 s7, s7, 0xff0000 11154; GFX8-NOHSA-NEXT: s_lshr_b32 s10, s6, 24 11155; GFX8-NOHSA-NEXT: v_and_b32_e32 v5, 0xff00ff, v0 11156; GFX8-NOHSA-NEXT: s_or_b32 s1, s1, s7 11157; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s6 11158; GFX8-NOHSA-NEXT: s_and_b32 s7, s6, 0xff 11159; GFX8-NOHSA-NEXT: s_lshl_b32 s6, s6, 8 11160; GFX8-NOHSA-NEXT: s_and_b32 s6, s6, 0xff0000 11161; GFX8-NOHSA-NEXT: s_or_b32 s6, s7, s6 11162; GFX8-NOHSA-NEXT: v_mov_b32_e32 v9, s0 11163; GFX8-NOHSA-NEXT: s_add_u32 s0, s8, 48 11164; GFX8-NOHSA-NEXT: v_mov_b32_e32 v8, s1 11165; GFX8-NOHSA-NEXT: s_addc_u32 s1, s9, 0 11166; GFX8-NOHSA-NEXT: v_mov_b32_e32 v11, s1 11167; GFX8-NOHSA-NEXT: v_alignbit_b32 v0, s10, v0, 16 11168; GFX8-NOHSA-NEXT: v_mov_b32_e32 v10, s0 11169; GFX8-NOHSA-NEXT: s_add_u32 s0, s8, 32 11170; GFX8-NOHSA-NEXT: v_and_b32_e32 v7, 0xff00ff, v0 11171; GFX8-NOHSA-NEXT: v_mov_b32_e32 v6, s6 11172; GFX8-NOHSA-NEXT: s_addc_u32 s1, s9, 0 11173; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[10:11], v[6:9] 11174; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s4 11175; GFX8-NOHSA-NEXT: v_mov_b32_e32 v9, s1 11176; GFX8-NOHSA-NEXT: v_mov_b32_e32 v8, s0 11177; GFX8-NOHSA-NEXT: s_add_u32 s0, s8, 16 11178; GFX8-NOHSA-NEXT: v_mov_b32_e32 v6, s5 11179; GFX8-NOHSA-NEXT: v_mov_b32_e32 v7, s12 11180; GFX8-NOHSA-NEXT: s_addc_u32 s1, s9, 0 11181; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 11182; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s2 11183; GFX8-NOHSA-NEXT: v_mov_b32_e32 v7, s1 11184; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s3 11185; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s16 11186; GFX8-NOHSA-NEXT: v_mov_b32_e32 v6, s0 11187; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[6:7], v[2:5] 11188; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s13 11189; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s8 11190; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s15 11191; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s14 11192; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s9 11193; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 11194; GFX8-NOHSA-NEXT: s_endpgm 11195; 11196; EG-LABEL: constant_zextload_v32i8_to_v32i16: 11197; EG: ; %bb.0: 11198; EG-NEXT: ALU 1, @14, KC0[CB0:0-32], KC1[] 11199; EG-NEXT: TEX 1 @10 11200; EG-NEXT: ALU 103, @16, KC0[], KC1[] 11201; EG-NEXT: ALU 104, @120, KC0[], KC1[] 11202; EG-NEXT: ALU 41, @225, KC0[CB0:0-32], KC1[] 11203; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T42.X, 0 11204; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T41.X, 0 11205; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T40.X, 0 11206; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 1 11207; EG-NEXT: CF_END 11208; EG-NEXT: Fetch clause starting at 10: 11209; EG-NEXT: VTX_READ_128 T37.XYZW, T35.X, 16, #1 11210; EG-NEXT: VTX_READ_128 T35.XYZW, T35.X, 0, #1 11211; EG-NEXT: ALU clause starting at 14: 11212; EG-NEXT: MOV * T0.Y, T16.X, 11213; EG-NEXT: MOV * T35.X, KC0[2].Z, 11214; EG-NEXT: ALU clause starting at 16: 11215; EG-NEXT: AND_INT T0.W, T37.X, literal.x, 11216; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y, 11217; EG-NEXT: 255(3.573311e-43), -65536(nan) 11218; EG-NEXT: OR_INT * T0.W, PS, PV.W, 11219; EG-NEXT: MOV * T16.X, PV.W, 11220; EG-NEXT: MOV T0.Y, PV.X, 11221; EG-NEXT: LSHL * T0.W, T37.X, literal.x, 11222; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 11223; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 11224; EG-NEXT: AND_INT * T0.W, PV.W, literal.y, 11225; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 11226; EG-NEXT: OR_INT * T0.W, PV.W, PS, 11227; EG-NEXT: MOV T16.X, PV.W, 11228; EG-NEXT: MOV T0.Y, T17.X, 11229; EG-NEXT: MOV * T0.W, literal.x, 11230; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 11231; EG-NEXT: BFE_UINT T1.W, T37.X, literal.x, PV.W, 11232; EG-NEXT: AND_INT * T2.W, PV.Y, literal.y, 11233; EG-NEXT: 16(2.242078e-44), -65536(nan) 11234; EG-NEXT: OR_INT * T1.W, PS, PV.W, 11235; EG-NEXT: MOV * T17.X, PV.W, 11236; EG-NEXT: MOV T0.Y, PV.X, 11237; EG-NEXT: LSHR * T1.W, T37.X, literal.x, 11238; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 11239; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 11240; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 11241; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 11242; EG-NEXT: OR_INT * T36.Y, PV.W, PS, 11243; EG-NEXT: MOV T17.X, PV.Y, 11244; EG-NEXT: MOV * T0.Y, T12.X, 11245; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 11246; EG-NEXT: AND_INT * T2.W, T37.Y, literal.y, 11247; EG-NEXT: -65536(nan), 255(3.573311e-43) 11248; EG-NEXT: OR_INT * T1.W, PV.W, PS, 11249; EG-NEXT: MOV * T12.X, PV.W, 11250; EG-NEXT: MOV T0.Y, PV.X, 11251; EG-NEXT: LSHL * T1.W, T37.Y, literal.x, 11252; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 11253; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 11254; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 11255; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 11256; EG-NEXT: OR_INT * T1.W, PV.W, PS, 11257; EG-NEXT: MOV T12.X, PV.W, 11258; EG-NEXT: MOV T0.Y, T13.X, 11259; EG-NEXT: BFE_UINT * T1.W, T37.Y, literal.x, T0.W, 11260; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 11261; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x, 11262; EG-NEXT: -65536(nan), 0(0.000000e+00) 11263; EG-NEXT: OR_INT * T1.W, PV.W, T1.W, 11264; EG-NEXT: MOV * T13.X, PV.W, 11265; EG-NEXT: MOV T0.Y, PV.X, 11266; EG-NEXT: LSHR * T1.W, T37.Y, literal.x, 11267; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 11268; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 11269; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 11270; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 11271; EG-NEXT: OR_INT * T36.W, PV.W, PS, 11272; EG-NEXT: MOV T13.X, PV.W, 11273; EG-NEXT: MOV * T0.Y, T8.X, 11274; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 11275; EG-NEXT: AND_INT * T2.W, T37.Z, literal.y, 11276; EG-NEXT: -65536(nan), 255(3.573311e-43) 11277; EG-NEXT: OR_INT * T1.W, PV.W, PS, 11278; EG-NEXT: MOV * T8.X, PV.W, 11279; EG-NEXT: MOV T0.Y, PV.X, 11280; EG-NEXT: LSHL * T1.W, T37.Z, literal.x, 11281; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 11282; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 11283; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 11284; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 11285; EG-NEXT: OR_INT * T1.W, PV.W, PS, 11286; EG-NEXT: MOV T8.X, PV.W, 11287; EG-NEXT: MOV T0.Y, T9.X, 11288; EG-NEXT: BFE_UINT * T1.W, T37.Z, literal.x, T0.W, 11289; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 11290; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x, 11291; EG-NEXT: -65536(nan), 0(0.000000e+00) 11292; EG-NEXT: OR_INT * T1.W, PV.W, T1.W, 11293; EG-NEXT: MOV * T9.X, PV.W, 11294; EG-NEXT: MOV T0.Y, PV.X, 11295; EG-NEXT: LSHR * T1.W, T37.Z, literal.x, 11296; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 11297; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 11298; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 11299; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 11300; EG-NEXT: OR_INT * T37.Y, PV.W, PS, 11301; EG-NEXT: MOV T9.X, PV.Y, 11302; EG-NEXT: MOV * T0.Y, T4.X, 11303; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 11304; EG-NEXT: AND_INT * T2.W, T37.W, literal.y, 11305; EG-NEXT: -65536(nan), 255(3.573311e-43) 11306; EG-NEXT: OR_INT * T1.W, PV.W, PS, 11307; EG-NEXT: MOV * T4.X, PV.W, 11308; EG-NEXT: MOV T0.Y, PV.X, 11309; EG-NEXT: LSHL * T1.W, T37.W, literal.x, 11310; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 11311; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 11312; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 11313; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 11314; EG-NEXT: OR_INT * T1.W, PV.W, PS, 11315; EG-NEXT: MOV T4.X, PV.W, 11316; EG-NEXT: MOV T0.Y, T5.X, 11317; EG-NEXT: BFE_UINT * T1.W, T37.W, literal.x, T0.W, 11318; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 11319; EG-NEXT: ALU clause starting at 120: 11320; EG-NEXT: AND_INT * T2.W, T0.Y, literal.x, 11321; EG-NEXT: -65536(nan), 0(0.000000e+00) 11322; EG-NEXT: OR_INT * T1.W, PV.W, T1.W, 11323; EG-NEXT: MOV * T5.X, PV.W, 11324; EG-NEXT: MOV T0.Y, PV.X, 11325; EG-NEXT: LSHR * T1.W, T37.W, literal.x, 11326; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 11327; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 11328; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 11329; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 11330; EG-NEXT: OR_INT * T37.W, PV.W, PS, 11331; EG-NEXT: MOV T5.X, PV.W, 11332; EG-NEXT: MOV * T0.Y, T32.X, 11333; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 11334; EG-NEXT: AND_INT * T2.W, T35.X, literal.y, 11335; EG-NEXT: -65536(nan), 255(3.573311e-43) 11336; EG-NEXT: OR_INT * T1.W, PV.W, PS, 11337; EG-NEXT: MOV * T32.X, PV.W, 11338; EG-NEXT: MOV T0.Y, PV.X, 11339; EG-NEXT: LSHL * T1.W, T35.X, literal.x, 11340; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 11341; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 11342; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 11343; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 11344; EG-NEXT: OR_INT * T1.W, PV.W, PS, 11345; EG-NEXT: MOV T32.X, PV.W, 11346; EG-NEXT: MOV T0.Y, T33.X, 11347; EG-NEXT: BFE_UINT * T1.W, T35.X, literal.x, T0.W, BS:VEC_120/SCL_212 11348; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 11349; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x, 11350; EG-NEXT: -65536(nan), 0(0.000000e+00) 11351; EG-NEXT: OR_INT * T1.W, PV.W, T1.W, 11352; EG-NEXT: MOV * T33.X, PV.W, 11353; EG-NEXT: MOV T0.Y, PV.X, 11354; EG-NEXT: LSHR * T1.W, T35.X, literal.x, 11355; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 11356; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 11357; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 11358; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 11359; EG-NEXT: OR_INT * T38.Y, PV.W, PS, 11360; EG-NEXT: MOV T33.X, PV.Y, 11361; EG-NEXT: MOV * T0.Y, T28.X, 11362; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 11363; EG-NEXT: AND_INT * T2.W, T35.Y, literal.y, 11364; EG-NEXT: -65536(nan), 255(3.573311e-43) 11365; EG-NEXT: OR_INT * T1.W, PV.W, PS, 11366; EG-NEXT: MOV * T28.X, PV.W, 11367; EG-NEXT: MOV T0.Y, PV.X, 11368; EG-NEXT: LSHL * T1.W, T35.Y, literal.x, 11369; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 11370; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 11371; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 11372; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 11373; EG-NEXT: OR_INT * T1.W, PV.W, PS, 11374; EG-NEXT: MOV T28.X, PV.W, 11375; EG-NEXT: MOV T0.Y, T29.X, 11376; EG-NEXT: BFE_UINT * T1.W, T35.Y, literal.x, T0.W, 11377; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 11378; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x, 11379; EG-NEXT: -65536(nan), 0(0.000000e+00) 11380; EG-NEXT: OR_INT * T1.W, PV.W, T1.W, 11381; EG-NEXT: MOV * T29.X, PV.W, 11382; EG-NEXT: MOV T0.Y, PV.X, 11383; EG-NEXT: LSHR * T1.W, T35.Y, literal.x, 11384; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 11385; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 11386; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 11387; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 11388; EG-NEXT: OR_INT * T38.W, PV.W, PS, 11389; EG-NEXT: MOV T29.X, PV.W, 11390; EG-NEXT: MOV * T0.Y, T24.X, 11391; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 11392; EG-NEXT: AND_INT * T2.W, T35.Z, literal.y, 11393; EG-NEXT: -65536(nan), 255(3.573311e-43) 11394; EG-NEXT: OR_INT * T1.W, PV.W, PS, 11395; EG-NEXT: MOV * T24.X, PV.W, 11396; EG-NEXT: MOV T0.Y, PV.X, 11397; EG-NEXT: LSHL * T1.W, T35.Z, literal.x, 11398; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 11399; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 11400; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 11401; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 11402; EG-NEXT: OR_INT * T1.W, PV.W, PS, 11403; EG-NEXT: MOV T24.X, PV.W, 11404; EG-NEXT: MOV T0.Y, T25.X, 11405; EG-NEXT: BFE_UINT * T1.W, T35.Z, literal.x, T0.W, 11406; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 11407; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x, 11408; EG-NEXT: -65536(nan), 0(0.000000e+00) 11409; EG-NEXT: OR_INT * T1.W, PV.W, T1.W, 11410; EG-NEXT: MOV * T25.X, PV.W, 11411; EG-NEXT: MOV T0.Y, PV.X, 11412; EG-NEXT: LSHR * T1.W, T35.Z, literal.x, 11413; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 11414; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 11415; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 11416; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 11417; EG-NEXT: OR_INT * T35.Y, PV.W, PS, 11418; EG-NEXT: MOV T25.X, PV.Y, 11419; EG-NEXT: MOV * T0.Y, T20.X, 11420; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 11421; EG-NEXT: AND_INT * T2.W, T35.W, literal.y, 11422; EG-NEXT: -65536(nan), 255(3.573311e-43) 11423; EG-NEXT: OR_INT * T1.W, PV.W, PS, 11424; EG-NEXT: MOV * T20.X, PV.W, 11425; EG-NEXT: ALU clause starting at 225: 11426; EG-NEXT: MOV T0.Y, T20.X, 11427; EG-NEXT: LSHL * T1.W, T35.W, literal.x, 11428; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 11429; EG-NEXT: AND_INT T2.W, PV.Y, literal.x, 11430; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, 11431; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38) 11432; EG-NEXT: OR_INT * T1.W, PV.W, PS, 11433; EG-NEXT: MOV T20.X, PV.W, 11434; EG-NEXT: MOV T0.Y, T21.X, 11435; EG-NEXT: BFE_UINT * T0.W, T35.W, literal.x, T0.W, 11436; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 11437; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x, 11438; EG-NEXT: -65536(nan), 0(0.000000e+00) 11439; EG-NEXT: OR_INT * T0.W, PV.W, T0.W, 11440; EG-NEXT: MOV * T21.X, PV.W, 11441; EG-NEXT: MOV T0.Y, PV.X, 11442; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 11443; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 11444; EG-NEXT: LSHR T39.X, PV.W, literal.x, 11445; EG-NEXT: LSHR * T40.X, KC0[2].Y, literal.x, 11446; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 11447; EG-NEXT: LSHR T0.W, T35.W, literal.x, 11448; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 11449; EG-NEXT: 8(1.121039e-44), 48(6.726233e-44) 11450; EG-NEXT: LSHR T41.X, PS, literal.x, 11451; EG-NEXT: AND_INT T0.Z, T0.Y, literal.y, 11452; EG-NEXT: AND_INT T0.W, PV.W, literal.z, 11453; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.w, 11454; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41) 11455; EG-NEXT: 16711680(2.341805e-38), 32(4.484155e-44) 11456; EG-NEXT: LSHR T42.X, PS, literal.x, 11457; EG-NEXT: OR_INT * T35.W, PV.Z, PV.W, 11458; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 11459; EG-NEXT: MOV T21.X, PV.W, 11460; EG-NEXT: MOV * T36.X, T16.X, 11461; EG-NEXT: MOV * T36.Z, T12.X, 11462; EG-NEXT: MOV T37.X, T8.X, 11463; EG-NEXT: MOV T37.Z, T4.X, BS:VEC_120/SCL_212 11464; EG-NEXT: MOV * T38.X, T32.X, 11465; EG-NEXT: MOV * T38.Z, T28.X, 11466; EG-NEXT: MOV T35.X, T24.X, 11467; EG-NEXT: MOV * T35.Z, T20.X, BS:VEC_120/SCL_212 11468; 11469; GFX12-LABEL: constant_zextload_v32i8_to_v32i16: 11470; GFX12: ; %bb.0: 11471; GFX12-NEXT: s_load_b128 s[8:11], s[4:5], 0x24 11472; GFX12-NEXT: s_wait_kmcnt 0x0 11473; GFX12-NEXT: s_load_b256 s[0:7], s[10:11], 0x0 11474; GFX12-NEXT: s_wait_kmcnt 0x0 11475; GFX12-NEXT: s_bfe_u32 s12, s7, 0x80008 11476; GFX12-NEXT: s_lshr_b32 s13, s7, 24 11477; GFX12-NEXT: s_bfe_u32 s33, s7, 0x80010 11478; GFX12-NEXT: s_and_b32 s7, s7, 0xff 11479; GFX12-NEXT: s_bfe_u32 s10, s6, 0x80008 11480; GFX12-NEXT: s_lshr_b32 s11, s6, 24 11481; GFX12-NEXT: s_pack_ll_b32_b16 s7, s7, s12 11482; GFX12-NEXT: s_and_b32 s12, s6, 0xff 11483; GFX12-NEXT: s_bfe_u32 s6, s6, 0x80010 11484; GFX12-NEXT: s_bfe_u32 s14, s4, 0x80008 11485; GFX12-NEXT: s_lshr_b32 s15, s4, 24 11486; GFX12-NEXT: s_bfe_u32 s16, s5, 0x80008 11487; GFX12-NEXT: s_lshr_b32 s17, s5, 24 11488; GFX12-NEXT: s_bfe_u32 s30, s5, 0x80010 11489; GFX12-NEXT: s_and_b32 s5, s5, 0xff 11490; GFX12-NEXT: s_bfe_u32 s31, s4, 0x80010 11491; GFX12-NEXT: s_and_b32 s4, s4, 0xff 11492; GFX12-NEXT: s_bfe_u32 s18, s2, 0x80008 11493; GFX12-NEXT: s_lshr_b32 s19, s2, 24 11494; GFX12-NEXT: s_bfe_u32 s20, s3, 0x80008 11495; GFX12-NEXT: s_lshr_b32 s21, s3, 24 11496; GFX12-NEXT: s_bfe_u32 s28, s3, 0x80010 11497; GFX12-NEXT: s_and_b32 s3, s3, 0xff 11498; GFX12-NEXT: s_bfe_u32 s29, s2, 0x80010 11499; GFX12-NEXT: s_and_b32 s2, s2, 0xff 11500; GFX12-NEXT: s_pack_ll_b32_b16 s13, s33, s13 11501; GFX12-NEXT: s_pack_ll_b32_b16 s10, s12, s10 11502; GFX12-NEXT: s_pack_ll_b32_b16 s6, s6, s11 11503; GFX12-NEXT: s_bfe_u32 s22, s0, 0x80008 11504; GFX12-NEXT: s_lshr_b32 s23, s0, 24 11505; GFX12-NEXT: s_bfe_u32 s24, s1, 0x80008 11506; GFX12-NEXT: s_lshr_b32 s25, s1, 24 11507; GFX12-NEXT: s_bfe_u32 s26, s1, 0x80010 11508; GFX12-NEXT: s_and_b32 s1, s1, 0xff 11509; GFX12-NEXT: s_bfe_u32 s27, s0, 0x80010 11510; GFX12-NEXT: s_and_b32 s0, s0, 0xff 11511; GFX12-NEXT: s_pack_ll_b32_b16 s17, s30, s17 11512; GFX12-NEXT: s_pack_ll_b32_b16 s5, s5, s16 11513; GFX12-NEXT: s_pack_ll_b32_b16 s15, s31, s15 11514; GFX12-NEXT: s_pack_ll_b32_b16 s4, s4, s14 11515; GFX12-NEXT: v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v1, s6 11516; GFX12-NEXT: s_pack_ll_b32_b16 s21, s28, s21 11517; GFX12-NEXT: s_pack_ll_b32_b16 s3, s3, s20 11518; GFX12-NEXT: s_pack_ll_b32_b16 s19, s29, s19 11519; GFX12-NEXT: s_pack_ll_b32_b16 s2, s2, s18 11520; GFX12-NEXT: v_dual_mov_b32 v0, s10 :: v_dual_mov_b32 v3, s13 11521; GFX12-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v5, s15 11522; GFX12-NEXT: s_pack_ll_b32_b16 s25, s26, s25 11523; GFX12-NEXT: s_pack_ll_b32_b16 s1, s1, s24 11524; GFX12-NEXT: s_pack_ll_b32_b16 s23, s27, s23 11525; GFX12-NEXT: s_pack_ll_b32_b16 s0, s0, s22 11526; GFX12-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v7, s17 11527; GFX12-NEXT: v_dual_mov_b32 v6, s5 :: v_dual_mov_b32 v9, s19 11528; GFX12-NEXT: v_dual_mov_b32 v8, s2 :: v_dual_mov_b32 v11, s21 11529; GFX12-NEXT: v_dual_mov_b32 v10, s3 :: v_dual_mov_b32 v13, s23 11530; GFX12-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v15, s25 11531; GFX12-NEXT: v_mov_b32_e32 v14, s1 11532; GFX12-NEXT: s_clause 0x3 11533; GFX12-NEXT: global_store_b128 v16, v[0:3], s[8:9] offset:48 11534; GFX12-NEXT: global_store_b128 v16, v[4:7], s[8:9] offset:32 11535; GFX12-NEXT: global_store_b128 v16, v[8:11], s[8:9] offset:16 11536; GFX12-NEXT: global_store_b128 v16, v[12:15], s[8:9] 11537; GFX12-NEXT: s_endpgm 11538 %load = load <32 x i8>, ptr addrspace(4) %in 11539 %ext = zext <32 x i8> %load to <32 x i16> 11540 store <32 x i16> %ext, ptr addrspace(1) %out 11541 ret void 11542} 11543 11544define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 11545; GFX6-NOHSA-LABEL: constant_sextload_v32i8_to_v32i16: 11546; GFX6-NOHSA: ; %bb.0: 11547; GFX6-NOHSA-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 11548; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 11549; GFX6-NOHSA-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 11550; GFX6-NOHSA-NEXT: s_mov_b32 s11, 0xf000 11551; GFX6-NOHSA-NEXT: s_mov_b32 s10, -1 11552; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 11553; GFX6-NOHSA-NEXT: s_ashr_i32 s12, s1, 24 11554; GFX6-NOHSA-NEXT: s_bfe_i32 s13, s1, 0x80010 11555; GFX6-NOHSA-NEXT: s_bfe_i32 s14, s1, 0x80008 11556; GFX6-NOHSA-NEXT: s_sext_i32_i8 s1, s1 11557; GFX6-NOHSA-NEXT: s_ashr_i32 s15, s0, 24 11558; GFX6-NOHSA-NEXT: s_bfe_i32 s16, s0, 0x80010 11559; GFX6-NOHSA-NEXT: s_bfe_i32 s17, s0, 0x80008 11560; GFX6-NOHSA-NEXT: s_sext_i32_i8 s0, s0 11561; GFX6-NOHSA-NEXT: s_ashr_i32 s18, s3, 24 11562; GFX6-NOHSA-NEXT: s_bfe_i32 s19, s3, 0x80010 11563; GFX6-NOHSA-NEXT: s_bfe_i32 s20, s3, 0x80008 11564; GFX6-NOHSA-NEXT: s_sext_i32_i8 s3, s3 11565; GFX6-NOHSA-NEXT: s_ashr_i32 s21, s2, 24 11566; GFX6-NOHSA-NEXT: s_bfe_i32 s22, s2, 0x80010 11567; GFX6-NOHSA-NEXT: s_bfe_i32 s23, s2, 0x80008 11568; GFX6-NOHSA-NEXT: s_sext_i32_i8 s2, s2 11569; GFX6-NOHSA-NEXT: s_ashr_i32 s24, s5, 24 11570; GFX6-NOHSA-NEXT: s_bfe_i32 s25, s5, 0x80010 11571; GFX6-NOHSA-NEXT: s_bfe_i32 s26, s5, 0x80008 11572; GFX6-NOHSA-NEXT: s_sext_i32_i8 s5, s5 11573; GFX6-NOHSA-NEXT: s_ashr_i32 s27, s4, 24 11574; GFX6-NOHSA-NEXT: s_bfe_i32 s28, s4, 0x80010 11575; GFX6-NOHSA-NEXT: s_bfe_i32 s29, s4, 0x80008 11576; GFX6-NOHSA-NEXT: s_sext_i32_i8 s4, s4 11577; GFX6-NOHSA-NEXT: s_ashr_i32 s30, s7, 24 11578; GFX6-NOHSA-NEXT: s_bfe_i32 s31, s7, 0x80010 11579; GFX6-NOHSA-NEXT: s_bfe_i32 s33, s7, 0x80008 11580; GFX6-NOHSA-NEXT: s_sext_i32_i8 s7, s7 11581; GFX6-NOHSA-NEXT: s_ashr_i32 s34, s6, 24 11582; GFX6-NOHSA-NEXT: s_bfe_i32 s35, s6, 0x80010 11583; GFX6-NOHSA-NEXT: s_bfe_i32 s36, s6, 0x80008 11584; GFX6-NOHSA-NEXT: s_sext_i32_i8 s6, s6 11585; GFX6-NOHSA-NEXT: s_lshl_b32 s12, s12, 16 11586; GFX6-NOHSA-NEXT: s_and_b32 s13, s13, 0xffff 11587; GFX6-NOHSA-NEXT: s_lshl_b32 s14, s14, 16 11588; GFX6-NOHSA-NEXT: s_and_b32 s1, s1, 0xffff 11589; GFX6-NOHSA-NEXT: s_lshl_b32 s15, s15, 16 11590; GFX6-NOHSA-NEXT: s_and_b32 s16, s16, 0xffff 11591; GFX6-NOHSA-NEXT: s_lshl_b32 s17, s17, 16 11592; GFX6-NOHSA-NEXT: s_and_b32 s0, s0, 0xffff 11593; GFX6-NOHSA-NEXT: s_lshl_b32 s18, s18, 16 11594; GFX6-NOHSA-NEXT: s_and_b32 s19, s19, 0xffff 11595; GFX6-NOHSA-NEXT: s_lshl_b32 s20, s20, 16 11596; GFX6-NOHSA-NEXT: s_and_b32 s3, s3, 0xffff 11597; GFX6-NOHSA-NEXT: s_lshl_b32 s21, s21, 16 11598; GFX6-NOHSA-NEXT: s_and_b32 s22, s22, 0xffff 11599; GFX6-NOHSA-NEXT: s_lshl_b32 s23, s23, 16 11600; GFX6-NOHSA-NEXT: s_and_b32 s2, s2, 0xffff 11601; GFX6-NOHSA-NEXT: s_lshl_b32 s24, s24, 16 11602; GFX6-NOHSA-NEXT: s_and_b32 s25, s25, 0xffff 11603; GFX6-NOHSA-NEXT: s_lshl_b32 s26, s26, 16 11604; GFX6-NOHSA-NEXT: s_and_b32 s5, s5, 0xffff 11605; GFX6-NOHSA-NEXT: s_lshl_b32 s27, s27, 16 11606; GFX6-NOHSA-NEXT: s_and_b32 s28, s28, 0xffff 11607; GFX6-NOHSA-NEXT: s_lshl_b32 s29, s29, 16 11608; GFX6-NOHSA-NEXT: s_and_b32 s4, s4, 0xffff 11609; GFX6-NOHSA-NEXT: s_lshl_b32 s30, s30, 16 11610; GFX6-NOHSA-NEXT: s_and_b32 s31, s31, 0xffff 11611; GFX6-NOHSA-NEXT: s_lshl_b32 s33, s33, 16 11612; GFX6-NOHSA-NEXT: s_and_b32 s7, s7, 0xffff 11613; GFX6-NOHSA-NEXT: s_lshl_b32 s34, s34, 16 11614; GFX6-NOHSA-NEXT: s_and_b32 s35, s35, 0xffff 11615; GFX6-NOHSA-NEXT: s_lshl_b32 s36, s36, 16 11616; GFX6-NOHSA-NEXT: s_and_b32 s6, s6, 0xffff 11617; GFX6-NOHSA-NEXT: s_or_b32 s12, s13, s12 11618; GFX6-NOHSA-NEXT: s_or_b32 s1, s1, s14 11619; GFX6-NOHSA-NEXT: s_or_b32 s13, s16, s15 11620; GFX6-NOHSA-NEXT: s_or_b32 s0, s0, s17 11621; GFX6-NOHSA-NEXT: s_or_b32 s14, s19, s18 11622; GFX6-NOHSA-NEXT: s_or_b32 s3, s3, s20 11623; GFX6-NOHSA-NEXT: s_or_b32 s15, s22, s21 11624; GFX6-NOHSA-NEXT: s_or_b32 s2, s2, s23 11625; GFX6-NOHSA-NEXT: s_or_b32 s16, s25, s24 11626; GFX6-NOHSA-NEXT: s_or_b32 s5, s5, s26 11627; GFX6-NOHSA-NEXT: s_or_b32 s17, s28, s27 11628; GFX6-NOHSA-NEXT: s_or_b32 s18, s31, s30 11629; GFX6-NOHSA-NEXT: s_or_b32 s7, s7, s33 11630; GFX6-NOHSA-NEXT: s_or_b32 s19, s35, s34 11631; GFX6-NOHSA-NEXT: s_or_b32 s6, s6, s36 11632; GFX6-NOHSA-NEXT: s_or_b32 s4, s4, s29 11633; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s6 11634; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s19 11635; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s7 11636; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s18 11637; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48 11638; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 11639; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4 11640; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s17 11641; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s5 11642; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s16 11643; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:32 11644; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 11645; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s2 11646; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s15 11647; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s3 11648; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s14 11649; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16 11650; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0) 11651; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s0 11652; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s13 11653; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s1 11654; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s12 11655; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 11656; GFX6-NOHSA-NEXT: s_endpgm 11657; 11658; GFX7-HSA-LABEL: constant_sextload_v32i8_to_v32i16: 11659; GFX7-HSA: ; %bb.0: 11660; GFX7-HSA-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x0 11661; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 11662; GFX7-HSA-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 11663; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0) 11664; GFX7-HSA-NEXT: s_ashr_i32 s10, s1, 24 11665; GFX7-HSA-NEXT: s_bfe_i32 s11, s1, 0x80010 11666; GFX7-HSA-NEXT: s_bfe_i32 s12, s1, 0x80008 11667; GFX7-HSA-NEXT: s_sext_i32_i8 s1, s1 11668; GFX7-HSA-NEXT: s_lshl_b32 s10, s10, 16 11669; GFX7-HSA-NEXT: s_and_b32 s11, s11, 0xffff 11670; GFX7-HSA-NEXT: s_lshl_b32 s12, s12, 16 11671; GFX7-HSA-NEXT: s_and_b32 s1, s1, 0xffff 11672; GFX7-HSA-NEXT: s_ashr_i32 s13, s0, 24 11673; GFX7-HSA-NEXT: s_or_b32 s10, s11, s10 11674; GFX7-HSA-NEXT: s_or_b32 s11, s1, s12 11675; GFX7-HSA-NEXT: s_bfe_i32 s12, s0, 0x80010 11676; GFX7-HSA-NEXT: s_lshl_b32 s1, s13, 16 11677; GFX7-HSA-NEXT: s_and_b32 s12, s12, 0xffff 11678; GFX7-HSA-NEXT: s_or_b32 s12, s12, s1 11679; GFX7-HSA-NEXT: s_bfe_i32 s1, s0, 0x80008 11680; GFX7-HSA-NEXT: s_sext_i32_i8 s0, s0 11681; GFX7-HSA-NEXT: s_lshl_b32 s1, s1, 16 11682; GFX7-HSA-NEXT: s_and_b32 s0, s0, 0xffff 11683; GFX7-HSA-NEXT: s_or_b32 s13, s0, s1 11684; GFX7-HSA-NEXT: s_ashr_i32 s0, s3, 24 11685; GFX7-HSA-NEXT: s_bfe_i32 s1, s3, 0x80010 11686; GFX7-HSA-NEXT: s_lshl_b32 s0, s0, 16 11687; GFX7-HSA-NEXT: s_and_b32 s1, s1, 0xffff 11688; GFX7-HSA-NEXT: s_or_b32 s14, s1, s0 11689; GFX7-HSA-NEXT: s_bfe_i32 s0, s3, 0x80008 11690; GFX7-HSA-NEXT: s_sext_i32_i8 s1, s3 11691; GFX7-HSA-NEXT: s_lshl_b32 s0, s0, 16 11692; GFX7-HSA-NEXT: s_and_b32 s1, s1, 0xffff 11693; GFX7-HSA-NEXT: s_or_b32 s3, s1, s0 11694; GFX7-HSA-NEXT: s_ashr_i32 s0, s2, 24 11695; GFX7-HSA-NEXT: s_bfe_i32 s1, s2, 0x80010 11696; GFX7-HSA-NEXT: s_lshl_b32 s0, s0, 16 11697; GFX7-HSA-NEXT: s_and_b32 s1, s1, 0xffff 11698; GFX7-HSA-NEXT: s_or_b32 s15, s1, s0 11699; GFX7-HSA-NEXT: s_bfe_i32 s0, s2, 0x80008 11700; GFX7-HSA-NEXT: s_sext_i32_i8 s1, s2 11701; GFX7-HSA-NEXT: s_lshl_b32 s0, s0, 16 11702; GFX7-HSA-NEXT: s_and_b32 s1, s1, 0xffff 11703; GFX7-HSA-NEXT: s_or_b32 s2, s1, s0 11704; GFX7-HSA-NEXT: s_ashr_i32 s0, s5, 24 11705; GFX7-HSA-NEXT: s_bfe_i32 s1, s5, 0x80010 11706; GFX7-HSA-NEXT: s_lshl_b32 s0, s0, 16 11707; GFX7-HSA-NEXT: s_and_b32 s1, s1, 0xffff 11708; GFX7-HSA-NEXT: s_or_b32 s16, s1, s0 11709; GFX7-HSA-NEXT: s_bfe_i32 s0, s5, 0x80008 11710; GFX7-HSA-NEXT: s_sext_i32_i8 s1, s5 11711; GFX7-HSA-NEXT: s_lshl_b32 s0, s0, 16 11712; GFX7-HSA-NEXT: s_and_b32 s1, s1, 0xffff 11713; GFX7-HSA-NEXT: s_or_b32 s5, s1, s0 11714; GFX7-HSA-NEXT: s_ashr_i32 s0, s4, 24 11715; GFX7-HSA-NEXT: s_bfe_i32 s1, s4, 0x80010 11716; GFX7-HSA-NEXT: s_lshl_b32 s0, s0, 16 11717; GFX7-HSA-NEXT: s_and_b32 s1, s1, 0xffff 11718; GFX7-HSA-NEXT: s_or_b32 s17, s1, s0 11719; GFX7-HSA-NEXT: s_bfe_i32 s0, s4, 0x80008 11720; GFX7-HSA-NEXT: s_sext_i32_i8 s1, s4 11721; GFX7-HSA-NEXT: s_lshl_b32 s0, s0, 16 11722; GFX7-HSA-NEXT: s_and_b32 s1, s1, 0xffff 11723; GFX7-HSA-NEXT: s_or_b32 s4, s1, s0 11724; GFX7-HSA-NEXT: s_ashr_i32 s0, s7, 24 11725; GFX7-HSA-NEXT: s_bfe_i32 s1, s7, 0x80010 11726; GFX7-HSA-NEXT: s_lshl_b32 s0, s0, 16 11727; GFX7-HSA-NEXT: s_and_b32 s1, s1, 0xffff 11728; GFX7-HSA-NEXT: s_or_b32 s0, s1, s0 11729; GFX7-HSA-NEXT: s_bfe_i32 s1, s7, 0x80008 11730; GFX7-HSA-NEXT: s_sext_i32_i8 s7, s7 11731; GFX7-HSA-NEXT: s_lshl_b32 s1, s1, 16 11732; GFX7-HSA-NEXT: s_and_b32 s7, s7, 0xffff 11733; GFX7-HSA-NEXT: s_or_b32 s1, s7, s1 11734; GFX7-HSA-NEXT: s_ashr_i32 s7, s6, 24 11735; GFX7-HSA-NEXT: s_bfe_i32 s18, s6, 0x80010 11736; GFX7-HSA-NEXT: s_lshl_b32 s7, s7, 16 11737; GFX7-HSA-NEXT: s_and_b32 s18, s18, 0xffff 11738; GFX7-HSA-NEXT: s_or_b32 s7, s18, s7 11739; GFX7-HSA-NEXT: s_bfe_i32 s18, s6, 0x80008 11740; GFX7-HSA-NEXT: s_sext_i32_i8 s6, s6 11741; GFX7-HSA-NEXT: s_lshl_b32 s18, s18, 16 11742; GFX7-HSA-NEXT: s_and_b32 s6, s6, 0xffff 11743; GFX7-HSA-NEXT: s_or_b32 s6, s6, s18 11744; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s0 11745; GFX7-HSA-NEXT: s_add_u32 s0, s8, 48 11746; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s1 11747; GFX7-HSA-NEXT: s_addc_u32 s1, s9, 0 11748; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 11749; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 11750; GFX7-HSA-NEXT: s_add_u32 s0, s8, 32 11751; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s6 11752; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s7 11753; GFX7-HSA-NEXT: s_addc_u32 s1, s9, 0 11754; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 11755; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 11756; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 11757; GFX7-HSA-NEXT: s_add_u32 s0, s8, 16 11758; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s4 11759; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s17 11760; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s5 11761; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s16 11762; GFX7-HSA-NEXT: s_addc_u32 s1, s9, 0 11763; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 11764; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1 11765; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2 11766; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s15 11767; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s3 11768; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s14 11769; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0 11770; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 11771; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s8 11772; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s13 11773; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s12 11774; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s11 11775; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s10 11776; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s9 11777; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 11778; GFX7-HSA-NEXT: s_endpgm 11779; 11780; GFX8-NOHSA-LABEL: constant_sextload_v32i8_to_v32i16: 11781; GFX8-NOHSA: ; %bb.0: 11782; GFX8-NOHSA-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 11783; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 11784; GFX8-NOHSA-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 11785; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0) 11786; GFX8-NOHSA-NEXT: s_lshr_b32 s17, s1, 16 11787; GFX8-NOHSA-NEXT: s_sext_i32_i16 s18, s1 11788; GFX8-NOHSA-NEXT: s_bfe_i32 s19, s1, 0x80000 11789; GFX8-NOHSA-NEXT: s_ashr_i32 s1, s1, 16 11790; GFX8-NOHSA-NEXT: s_lshl_b32 s18, s18, 8 11791; GFX8-NOHSA-NEXT: s_lshl_b32 s1, s1, 8 11792; GFX8-NOHSA-NEXT: s_bfe_i32 s17, s17, 0x80000 11793; GFX8-NOHSA-NEXT: s_lshr_b32 s16, s0, 16 11794; GFX8-NOHSA-NEXT: s_and_b32 s18, s18, 0xffff0000 11795; GFX8-NOHSA-NEXT: s_and_b32 s19, 0xffff, s19 11796; GFX8-NOHSA-NEXT: s_and_b32 s1, s1, 0xffff0000 11797; GFX8-NOHSA-NEXT: s_and_b32 s17, 0xffff, s17 11798; GFX8-NOHSA-NEXT: s_or_b32 s18, s19, s18 11799; GFX8-NOHSA-NEXT: s_sext_i32_i16 s19, s0 11800; GFX8-NOHSA-NEXT: s_bfe_i32 s20, s0, 0x80000 11801; GFX8-NOHSA-NEXT: s_or_b32 s17, s17, s1 11802; GFX8-NOHSA-NEXT: s_ashr_i32 s0, s0, 24 11803; GFX8-NOHSA-NEXT: s_bfe_i32 s1, s16, 0x80000 11804; GFX8-NOHSA-NEXT: s_lshl_b32 s0, s0, 16 11805; GFX8-NOHSA-NEXT: s_and_b32 s1, 0xffff, s1 11806; GFX8-NOHSA-NEXT: s_or_b32 s16, s1, s0 11807; GFX8-NOHSA-NEXT: s_sext_i32_i16 s0, s3 11808; GFX8-NOHSA-NEXT: s_lshl_b32 s19, s19, 8 11809; GFX8-NOHSA-NEXT: s_lshl_b32 s0, s0, 8 11810; GFX8-NOHSA-NEXT: s_bfe_i32 s1, s3, 0x80000 11811; GFX8-NOHSA-NEXT: s_and_b32 s19, s19, 0xffff0000 11812; GFX8-NOHSA-NEXT: s_and_b32 s20, 0xffff, s20 11813; GFX8-NOHSA-NEXT: s_and_b32 s0, s0, 0xffff0000 11814; GFX8-NOHSA-NEXT: s_and_b32 s1, 0xffff, s1 11815; GFX8-NOHSA-NEXT: s_or_b32 s19, s20, s19 11816; GFX8-NOHSA-NEXT: s_or_b32 s20, s1, s0 11817; GFX8-NOHSA-NEXT: s_sext_i32_i16 s0, s2 11818; GFX8-NOHSA-NEXT: s_lshl_b32 s0, s0, 8 11819; GFX8-NOHSA-NEXT: s_bfe_i32 s1, s2, 0x80000 11820; GFX8-NOHSA-NEXT: s_and_b32 s0, s0, 0xffff0000 11821; GFX8-NOHSA-NEXT: s_and_b32 s1, 0xffff, s1 11822; GFX8-NOHSA-NEXT: s_lshr_b32 s15, s3, 16 11823; GFX8-NOHSA-NEXT: s_or_b32 s21, s1, s0 11824; GFX8-NOHSA-NEXT: s_ashr_i32 s0, s3, 16 11825; GFX8-NOHSA-NEXT: s_lshl_b32 s0, s0, 8 11826; GFX8-NOHSA-NEXT: s_bfe_i32 s1, s15, 0x80000 11827; GFX8-NOHSA-NEXT: s_lshr_b32 s14, s2, 16 11828; GFX8-NOHSA-NEXT: s_and_b32 s0, s0, 0xffff0000 11829; GFX8-NOHSA-NEXT: s_and_b32 s1, 0xffff, s1 11830; GFX8-NOHSA-NEXT: s_or_b32 s3, s1, s0 11831; GFX8-NOHSA-NEXT: s_ashr_i32 s0, s2, 24 11832; GFX8-NOHSA-NEXT: s_bfe_i32 s1, s14, 0x80000 11833; GFX8-NOHSA-NEXT: s_lshl_b32 s0, s0, 16 11834; GFX8-NOHSA-NEXT: s_and_b32 s1, 0xffff, s1 11835; GFX8-NOHSA-NEXT: s_or_b32 s2, s1, s0 11836; GFX8-NOHSA-NEXT: s_sext_i32_i16 s0, s5 11837; GFX8-NOHSA-NEXT: s_lshl_b32 s0, s0, 8 11838; GFX8-NOHSA-NEXT: s_bfe_i32 s1, s5, 0x80000 11839; GFX8-NOHSA-NEXT: s_and_b32 s0, s0, 0xffff0000 11840; GFX8-NOHSA-NEXT: s_and_b32 s1, 0xffff, s1 11841; GFX8-NOHSA-NEXT: s_or_b32 s14, s1, s0 11842; GFX8-NOHSA-NEXT: s_sext_i32_i16 s0, s4 11843; GFX8-NOHSA-NEXT: s_lshl_b32 s0, s0, 8 11844; GFX8-NOHSA-NEXT: s_bfe_i32 s1, s4, 0x80000 11845; GFX8-NOHSA-NEXT: s_and_b32 s0, s0, 0xffff0000 11846; GFX8-NOHSA-NEXT: s_and_b32 s1, 0xffff, s1 11847; GFX8-NOHSA-NEXT: s_lshr_b32 s13, s5, 16 11848; GFX8-NOHSA-NEXT: s_or_b32 s15, s1, s0 11849; GFX8-NOHSA-NEXT: s_ashr_i64 s[0:1], s[4:5], 56 11850; GFX8-NOHSA-NEXT: s_bfe_i32 s1, s13, 0x80000 11851; GFX8-NOHSA-NEXT: s_lshr_b32 s12, s4, 16 11852; GFX8-NOHSA-NEXT: s_lshl_b32 s0, s0, 16 11853; GFX8-NOHSA-NEXT: s_and_b32 s1, 0xffff, s1 11854; GFX8-NOHSA-NEXT: s_or_b32 s5, s1, s0 11855; GFX8-NOHSA-NEXT: s_ashr_i32 s0, s4, 24 11856; GFX8-NOHSA-NEXT: s_bfe_i32 s1, s12, 0x80000 11857; GFX8-NOHSA-NEXT: s_lshl_b32 s0, s0, 16 11858; GFX8-NOHSA-NEXT: s_and_b32 s1, 0xffff, s1 11859; GFX8-NOHSA-NEXT: s_or_b32 s4, s1, s0 11860; GFX8-NOHSA-NEXT: s_sext_i32_i16 s0, s7 11861; GFX8-NOHSA-NEXT: s_lshl_b32 s0, s0, 8 11862; GFX8-NOHSA-NEXT: s_bfe_i32 s1, s7, 0x80000 11863; GFX8-NOHSA-NEXT: s_and_b32 s0, s0, 0xffff0000 11864; GFX8-NOHSA-NEXT: s_and_b32 s1, 0xffff, s1 11865; GFX8-NOHSA-NEXT: s_lshr_b32 s10, s6, 16 11866; GFX8-NOHSA-NEXT: s_lshr_b32 s11, s7, 16 11867; GFX8-NOHSA-NEXT: s_or_b32 s0, s1, s0 11868; GFX8-NOHSA-NEXT: s_sext_i32_i16 s1, s6 11869; GFX8-NOHSA-NEXT: s_ashr_i32 s7, s7, 16 11870; GFX8-NOHSA-NEXT: s_lshl_b32 s1, s1, 8 11871; GFX8-NOHSA-NEXT: s_bfe_i32 s12, s6, 0x80000 11872; GFX8-NOHSA-NEXT: s_lshl_b32 s7, s7, 8 11873; GFX8-NOHSA-NEXT: s_bfe_i32 s11, s11, 0x80000 11874; GFX8-NOHSA-NEXT: s_ashr_i32 s6, s6, 24 11875; GFX8-NOHSA-NEXT: s_bfe_i32 s10, s10, 0x80000 11876; GFX8-NOHSA-NEXT: s_and_b32 s1, s1, 0xffff0000 11877; GFX8-NOHSA-NEXT: s_and_b32 s12, 0xffff, s12 11878; GFX8-NOHSA-NEXT: s_and_b32 s7, s7, 0xffff0000 11879; GFX8-NOHSA-NEXT: s_and_b32 s11, 0xffff, s11 11880; GFX8-NOHSA-NEXT: s_lshl_b32 s6, s6, 16 11881; GFX8-NOHSA-NEXT: s_and_b32 s10, 0xffff, s10 11882; GFX8-NOHSA-NEXT: s_or_b32 s1, s12, s1 11883; GFX8-NOHSA-NEXT: s_or_b32 s7, s11, s7 11884; GFX8-NOHSA-NEXT: s_or_b32 s6, s10, s6 11885; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s0 11886; GFX8-NOHSA-NEXT: s_add_u32 s0, s8, 48 11887; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s1 11888; GFX8-NOHSA-NEXT: s_addc_u32 s1, s9, 0 11889; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 11890; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 11891; GFX8-NOHSA-NEXT: s_add_u32 s0, s8, 32 11892; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s6 11893; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s7 11894; GFX8-NOHSA-NEXT: s_addc_u32 s1, s9, 0 11895; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 11896; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 11897; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 11898; GFX8-NOHSA-NEXT: s_add_u32 s0, s8, 16 11899; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s15 11900; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s4 11901; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s14 11902; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s5 11903; GFX8-NOHSA-NEXT: s_addc_u32 s1, s9, 0 11904; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 11905; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1 11906; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s21 11907; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s2 11908; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s20 11909; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s3 11910; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0 11911; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 11912; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s8 11913; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s19 11914; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s16 11915; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s18 11916; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s17 11917; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s9 11918; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 11919; GFX8-NOHSA-NEXT: s_endpgm 11920; 11921; EG-LABEL: constant_sextload_v32i8_to_v32i16: 11922; EG: ; %bb.0: 11923; EG-NEXT: ALU 1, @14, KC0[CB0:0-32], KC1[] 11924; EG-NEXT: TEX 1 @10 11925; EG-NEXT: ALU 104, @16, KC0[], KC1[] 11926; EG-NEXT: ALU 104, @121, KC0[], KC1[] 11927; EG-NEXT: ALU 95, @226, KC0[CB0:0-32], KC1[] 11928; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T42.X, 0 11929; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T41.X, 0 11930; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T40.X, 0 11931; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 1 11932; EG-NEXT: CF_END 11933; EG-NEXT: Fetch clause starting at 10: 11934; EG-NEXT: VTX_READ_128 T37.XYZW, T35.X, 16, #1 11935; EG-NEXT: VTX_READ_128 T35.XYZW, T35.X, 0, #1 11936; EG-NEXT: ALU clause starting at 14: 11937; EG-NEXT: MOV * T0.Y, T16.X, 11938; EG-NEXT: MOV * T35.X, KC0[2].Z, 11939; EG-NEXT: ALU clause starting at 16: 11940; EG-NEXT: BFE_INT * T0.W, T37.X, 0.0, literal.x, 11941; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 11942; EG-NEXT: AND_INT T0.W, PV.W, literal.x, 11943; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y, 11944; EG-NEXT: 65535(9.183409e-41), -65536(nan) 11945; EG-NEXT: OR_INT * T0.W, PS, PV.W, 11946; EG-NEXT: MOV * T16.X, PV.W, 11947; EG-NEXT: MOV T0.Y, PV.X, 11948; EG-NEXT: LSHR * T0.W, T37.X, literal.x, 11949; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 11950; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 11951; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 11952; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41) 11953; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 11954; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 11955; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 11956; EG-NEXT: MOV T16.X, PV.W, 11957; EG-NEXT: MOV T0.Y, T17.X, 11958; EG-NEXT: LSHR * T0.W, T37.X, literal.x, BS:VEC_120/SCL_212 11959; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 11960; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 11961; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 11962; EG-NEXT: 8(1.121039e-44), -65536(nan) 11963; EG-NEXT: AND_INT * T0.W, PV.W, literal.x, 11964; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 11965; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 11966; EG-NEXT: MOV * T17.X, PV.W, 11967; EG-NEXT: MOV T0.Y, PV.X, 11968; EG-NEXT: ASHR * T0.W, T37.X, literal.x, 11969; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 11970; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 11971; EG-NEXT: LSHL * T0.W, PV.W, literal.y, 11972; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 11973; EG-NEXT: OR_INT * T36.Y, PV.W, PS, 11974; EG-NEXT: MOV T17.X, PV.Y, 11975; EG-NEXT: MOV T0.Y, T12.X, 11976; EG-NEXT: BFE_INT * T0.W, T37.Y, 0.0, literal.x, 11977; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 11978; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 11979; EG-NEXT: AND_INT * T0.W, PV.W, literal.y, 11980; EG-NEXT: -65536(nan), 65535(9.183409e-41) 11981; EG-NEXT: OR_INT * T0.W, PV.W, PS, 11982; EG-NEXT: MOV * T12.X, PV.W, 11983; EG-NEXT: MOV T0.Y, PV.X, 11984; EG-NEXT: LSHR * T0.W, T37.Y, literal.x, 11985; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 11986; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 11987; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 11988; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41) 11989; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 11990; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 11991; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 11992; EG-NEXT: MOV T12.X, PV.W, 11993; EG-NEXT: MOV T0.Y, T13.X, 11994; EG-NEXT: LSHR * T0.W, T37.Y, literal.x, 11995; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 11996; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 11997; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 11998; EG-NEXT: 8(1.121039e-44), -65536(nan) 11999; EG-NEXT: AND_INT * T0.W, PV.W, literal.x, 12000; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 12001; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 12002; EG-NEXT: MOV * T13.X, PV.W, 12003; EG-NEXT: MOV T0.Y, PV.X, 12004; EG-NEXT: ASHR * T0.W, T37.Y, literal.x, 12005; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 12006; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 12007; EG-NEXT: LSHL * T0.W, PV.W, literal.y, 12008; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 12009; EG-NEXT: OR_INT * T36.W, PV.W, PS, 12010; EG-NEXT: MOV T13.X, PV.W, 12011; EG-NEXT: MOV T0.Y, T8.X, 12012; EG-NEXT: BFE_INT * T0.W, T37.Z, 0.0, literal.x, 12013; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 12014; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 12015; EG-NEXT: AND_INT * T0.W, PV.W, literal.y, 12016; EG-NEXT: -65536(nan), 65535(9.183409e-41) 12017; EG-NEXT: OR_INT * T0.W, PV.W, PS, 12018; EG-NEXT: MOV * T8.X, PV.W, 12019; EG-NEXT: MOV T0.Y, PV.X, 12020; EG-NEXT: LSHR * T0.W, T37.Z, literal.x, 12021; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 12022; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 12023; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 12024; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41) 12025; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 12026; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 12027; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 12028; EG-NEXT: MOV T8.X, PV.W, 12029; EG-NEXT: MOV T0.Y, T9.X, 12030; EG-NEXT: LSHR * T0.W, T37.Z, literal.x, 12031; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 12032; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 12033; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 12034; EG-NEXT: 8(1.121039e-44), -65536(nan) 12035; EG-NEXT: AND_INT * T0.W, PV.W, literal.x, 12036; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 12037; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 12038; EG-NEXT: MOV * T9.X, PV.W, 12039; EG-NEXT: MOV T0.Y, PV.X, 12040; EG-NEXT: ASHR * T0.W, T37.Z, literal.x, 12041; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 12042; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 12043; EG-NEXT: LSHL * T0.W, PV.W, literal.y, 12044; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 12045; EG-NEXT: ALU clause starting at 121: 12046; EG-NEXT: OR_INT * T37.Y, T1.W, T0.W, 12047; EG-NEXT: MOV T9.X, PV.Y, 12048; EG-NEXT: MOV T0.Y, T4.X, 12049; EG-NEXT: BFE_INT * T0.W, T37.W, 0.0, literal.x, 12050; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 12051; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 12052; EG-NEXT: AND_INT * T0.W, PV.W, literal.y, 12053; EG-NEXT: -65536(nan), 65535(9.183409e-41) 12054; EG-NEXT: OR_INT * T0.W, PV.W, PS, 12055; EG-NEXT: MOV * T4.X, PV.W, 12056; EG-NEXT: MOV T0.Y, PV.X, 12057; EG-NEXT: LSHR * T0.W, T37.W, literal.x, 12058; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 12059; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 12060; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 12061; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41) 12062; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 12063; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 12064; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 12065; EG-NEXT: MOV T4.X, PV.W, 12066; EG-NEXT: MOV T0.Y, T5.X, 12067; EG-NEXT: LSHR * T0.W, T37.W, literal.x, 12068; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 12069; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 12070; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 12071; EG-NEXT: 8(1.121039e-44), -65536(nan) 12072; EG-NEXT: AND_INT * T0.W, PV.W, literal.x, 12073; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 12074; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 12075; EG-NEXT: MOV * T5.X, PV.W, 12076; EG-NEXT: MOV T0.Y, PV.X, 12077; EG-NEXT: ASHR * T0.W, T37.W, literal.x, 12078; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 12079; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 12080; EG-NEXT: LSHL * T0.W, PV.W, literal.y, 12081; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 12082; EG-NEXT: OR_INT * T37.W, PV.W, PS, 12083; EG-NEXT: MOV T5.X, PV.W, 12084; EG-NEXT: MOV T0.Y, T32.X, 12085; EG-NEXT: BFE_INT * T0.W, T35.X, 0.0, literal.x, BS:VEC_120/SCL_212 12086; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 12087; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 12088; EG-NEXT: AND_INT * T0.W, PV.W, literal.y, 12089; EG-NEXT: -65536(nan), 65535(9.183409e-41) 12090; EG-NEXT: OR_INT * T0.W, PV.W, PS, 12091; EG-NEXT: MOV * T32.X, PV.W, 12092; EG-NEXT: MOV T0.Y, PV.X, 12093; EG-NEXT: LSHR * T0.W, T35.X, literal.x, 12094; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 12095; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 12096; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 12097; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41) 12098; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 12099; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 12100; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 12101; EG-NEXT: MOV T32.X, PV.W, 12102; EG-NEXT: MOV T0.Y, T33.X, 12103; EG-NEXT: LSHR * T0.W, T35.X, literal.x, BS:VEC_120/SCL_212 12104; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 12105; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 12106; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 12107; EG-NEXT: 8(1.121039e-44), -65536(nan) 12108; EG-NEXT: AND_INT * T0.W, PV.W, literal.x, 12109; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 12110; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 12111; EG-NEXT: MOV * T33.X, PV.W, 12112; EG-NEXT: MOV T0.Y, PV.X, 12113; EG-NEXT: ASHR * T0.W, T35.X, literal.x, 12114; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 12115; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 12116; EG-NEXT: LSHL * T0.W, PV.W, literal.y, 12117; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 12118; EG-NEXT: OR_INT * T38.Y, PV.W, PS, 12119; EG-NEXT: MOV T33.X, PV.Y, 12120; EG-NEXT: MOV T0.Y, T28.X, 12121; EG-NEXT: BFE_INT * T0.W, T35.Y, 0.0, literal.x, 12122; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 12123; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 12124; EG-NEXT: AND_INT * T0.W, PV.W, literal.y, 12125; EG-NEXT: -65536(nan), 65535(9.183409e-41) 12126; EG-NEXT: OR_INT * T0.W, PV.W, PS, 12127; EG-NEXT: MOV * T28.X, PV.W, 12128; EG-NEXT: MOV T0.Y, PV.X, 12129; EG-NEXT: LSHR * T0.W, T35.Y, literal.x, 12130; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 12131; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 12132; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 12133; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41) 12134; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 12135; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 12136; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 12137; EG-NEXT: MOV T28.X, PV.W, 12138; EG-NEXT: MOV T0.Y, T29.X, 12139; EG-NEXT: LSHR * T0.W, T35.Y, literal.x, 12140; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 12141; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 12142; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 12143; EG-NEXT: 8(1.121039e-44), -65536(nan) 12144; EG-NEXT: AND_INT * T0.W, PV.W, literal.x, 12145; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 12146; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 12147; EG-NEXT: MOV * T29.X, PV.W, 12148; EG-NEXT: MOV T0.Y, PV.X, 12149; EG-NEXT: ASHR * T0.W, T35.Y, literal.x, 12150; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 12151; EG-NEXT: ALU clause starting at 226: 12152; EG-NEXT: AND_INT T1.W, T0.Y, literal.x, 12153; EG-NEXT: LSHL * T0.W, T0.W, literal.y, 12154; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 12155; EG-NEXT: OR_INT * T38.W, PV.W, PS, 12156; EG-NEXT: MOV T29.X, PV.W, 12157; EG-NEXT: MOV T0.Y, T24.X, 12158; EG-NEXT: BFE_INT * T0.W, T35.Z, 0.0, literal.x, 12159; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 12160; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 12161; EG-NEXT: AND_INT * T0.W, PV.W, literal.y, 12162; EG-NEXT: -65536(nan), 65535(9.183409e-41) 12163; EG-NEXT: OR_INT * T0.W, PV.W, PS, 12164; EG-NEXT: MOV * T24.X, PV.W, 12165; EG-NEXT: MOV T0.Y, PV.X, 12166; EG-NEXT: LSHR * T0.W, T35.Z, literal.x, 12167; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 12168; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 12169; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 12170; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41) 12171; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 12172; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 12173; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 12174; EG-NEXT: MOV T24.X, PV.W, 12175; EG-NEXT: MOV T0.Y, T25.X, 12176; EG-NEXT: LSHR * T0.W, T35.Z, literal.x, 12177; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 12178; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 12179; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 12180; EG-NEXT: 8(1.121039e-44), -65536(nan) 12181; EG-NEXT: AND_INT * T0.W, PV.W, literal.x, 12182; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 12183; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 12184; EG-NEXT: MOV * T25.X, PV.W, 12185; EG-NEXT: MOV T0.Y, PV.X, 12186; EG-NEXT: ASHR * T0.W, T35.Z, literal.x, 12187; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) 12188; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 12189; EG-NEXT: LSHL * T0.W, PV.W, literal.y, 12190; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 12191; EG-NEXT: OR_INT * T35.Y, PV.W, PS, 12192; EG-NEXT: MOV T25.X, PV.Y, 12193; EG-NEXT: MOV T0.Y, T20.X, 12194; EG-NEXT: BFE_INT * T0.W, T35.W, 0.0, literal.x, 12195; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 12196; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 12197; EG-NEXT: AND_INT * T0.W, PV.W, literal.y, 12198; EG-NEXT: -65536(nan), 65535(9.183409e-41) 12199; EG-NEXT: OR_INT * T0.W, PV.W, PS, 12200; EG-NEXT: MOV * T20.X, PV.W, 12201; EG-NEXT: MOV T0.Y, PV.X, 12202; EG-NEXT: LSHR * T0.W, T35.W, literal.x, 12203; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 12204; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 12205; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 12206; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41) 12207; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 12208; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 12209; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 12210; EG-NEXT: MOV T20.X, PV.W, 12211; EG-NEXT: MOV T0.Y, T21.X, 12212; EG-NEXT: LSHR * T0.W, T35.W, literal.x, 12213; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 12214; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, 12215; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y, 12216; EG-NEXT: 8(1.121039e-44), -65536(nan) 12217; EG-NEXT: AND_INT * T0.W, PV.W, literal.x, 12218; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 12219; EG-NEXT: OR_INT * T0.W, T1.W, PV.W, 12220; EG-NEXT: MOV * T21.X, PV.W, 12221; EG-NEXT: MOV T0.Y, PV.X, 12222; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 12223; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 12224; EG-NEXT: LSHR T39.X, PV.W, literal.x, 12225; EG-NEXT: LSHR * T40.X, KC0[2].Y, literal.x, 12226; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 12227; EG-NEXT: ASHR T0.W, T35.W, literal.x, 12228; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 12229; EG-NEXT: 24(3.363116e-44), 48(6.726233e-44) 12230; EG-NEXT: LSHR T41.X, PS, literal.x, 12231; EG-NEXT: AND_INT T0.Z, T0.Y, literal.y, 12232; EG-NEXT: LSHL T0.W, PV.W, literal.z, 12233; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.w, 12234; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41) 12235; EG-NEXT: 16(2.242078e-44), 32(4.484155e-44) 12236; EG-NEXT: LSHR T42.X, PS, literal.x, 12237; EG-NEXT: OR_INT * T35.W, PV.Z, PV.W, 12238; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 12239; EG-NEXT: MOV T21.X, PV.W, 12240; EG-NEXT: MOV * T36.X, T16.X, 12241; EG-NEXT: MOV * T36.Z, T12.X, 12242; EG-NEXT: MOV T37.X, T8.X, 12243; EG-NEXT: MOV T37.Z, T4.X, BS:VEC_120/SCL_212 12244; EG-NEXT: MOV * T38.X, T32.X, 12245; EG-NEXT: MOV * T38.Z, T28.X, 12246; EG-NEXT: MOV T35.X, T24.X, 12247; EG-NEXT: MOV * T35.Z, T20.X, BS:VEC_120/SCL_212 12248; 12249; GFX12-LABEL: constant_sextload_v32i8_to_v32i16: 12250; GFX12: ; %bb.0: 12251; GFX12-NEXT: s_load_b128 s[8:11], s[4:5], 0x24 12252; GFX12-NEXT: s_wait_kmcnt 0x0 12253; GFX12-NEXT: s_load_b256 s[0:7], s[10:11], 0x0 12254; GFX12-NEXT: s_wait_kmcnt 0x0 12255; GFX12-NEXT: s_lshr_b32 s13, s5, 16 12256; GFX12-NEXT: s_lshr_b32 s16, s0, 16 12257; GFX12-NEXT: s_lshr_b32 s17, s1, 16 12258; GFX12-NEXT: s_ashr_i32 s18, s1, 16 12259; GFX12-NEXT: s_bfe_i32 s19, s1, 0x80000 12260; GFX12-NEXT: s_sext_i32_i16 s20, s1 12261; GFX12-NEXT: s_ashr_i32 s21, s0, 24 12262; GFX12-NEXT: s_bfe_i32 s22, s0, 0x80000 12263; GFX12-NEXT: s_sext_i32_i16 s23, s0 12264; GFX12-NEXT: s_ashr_i64 s[0:1], s[4:5], 56 12265; GFX12-NEXT: s_lshr_b32 s12, s4, 16 12266; GFX12-NEXT: s_bfe_i32 s1, s5, 0x80000 12267; GFX12-NEXT: s_sext_i32_i16 s5, s5 12268; GFX12-NEXT: s_bfe_i32 s13, s13, 0x80000 12269; GFX12-NEXT: s_lshr_b32 s5, s5, 8 12270; GFX12-NEXT: s_pack_ll_b32_b16 s0, s13, s0 12271; GFX12-NEXT: s_ashr_i32 s13, s4, 24 12272; GFX12-NEXT: s_bfe_i32 s12, s12, 0x80000 12273; GFX12-NEXT: s_pack_ll_b32_b16 s1, s1, s5 12274; GFX12-NEXT: s_pack_ll_b32_b16 s5, s12, s13 12275; GFX12-NEXT: s_sext_i32_i16 s12, s4 12276; GFX12-NEXT: s_bfe_i32 s4, s4, 0x80000 12277; GFX12-NEXT: s_lshr_b32 s12, s12, 8 12278; GFX12-NEXT: s_ashr_i32 s13, s7, 16 12279; GFX12-NEXT: s_pack_ll_b32_b16 s4, s4, s12 12280; GFX12-NEXT: s_lshr_b32 s12, s13, 8 12281; GFX12-NEXT: s_sext_i32_i16 s13, s7 12282; GFX12-NEXT: s_lshr_b32 s11, s7, 16 12283; GFX12-NEXT: s_bfe_i32 s7, s7, 0x80000 12284; GFX12-NEXT: s_lshr_b32 s13, s13, 8 12285; GFX12-NEXT: s_lshr_b32 s10, s6, 16 12286; GFX12-NEXT: s_bfe_i32 s11, s11, 0x80000 12287; GFX12-NEXT: s_pack_ll_b32_b16 s7, s7, s13 12288; GFX12-NEXT: s_sext_i32_i16 s13, s6 12289; GFX12-NEXT: s_lshr_b32 s14, s2, 16 12290; GFX12-NEXT: s_lshr_b32 s15, s3, 16 12291; GFX12-NEXT: s_ashr_i32 s24, s3, 16 12292; GFX12-NEXT: s_bfe_i32 s25, s3, 0x80000 12293; GFX12-NEXT: s_sext_i32_i16 s3, s3 12294; GFX12-NEXT: s_ashr_i32 s26, s2, 24 12295; GFX12-NEXT: s_bfe_i32 s27, s2, 0x80000 12296; GFX12-NEXT: s_sext_i32_i16 s2, s2 12297; GFX12-NEXT: s_pack_ll_b32_b16 s11, s11, s12 12298; GFX12-NEXT: s_ashr_i32 s12, s6, 24 12299; GFX12-NEXT: s_bfe_i32 s6, s6, 0x80000 12300; GFX12-NEXT: s_lshr_b32 s13, s13, 8 12301; GFX12-NEXT: s_bfe_i32 s10, s10, 0x80000 12302; GFX12-NEXT: s_lshr_b32 s24, s24, 8 12303; GFX12-NEXT: s_bfe_i32 s15, s15, 0x80000 12304; GFX12-NEXT: s_lshr_b32 s3, s3, 8 12305; GFX12-NEXT: s_bfe_i32 s14, s14, 0x80000 12306; GFX12-NEXT: s_lshr_b32 s2, s2, 8 12307; GFX12-NEXT: s_pack_ll_b32_b16 s6, s6, s13 12308; GFX12-NEXT: s_pack_ll_b32_b16 s10, s10, s12 12309; GFX12-NEXT: s_lshr_b32 s18, s18, 8 12310; GFX12-NEXT: s_bfe_i32 s17, s17, 0x80000 12311; GFX12-NEXT: s_lshr_b32 s20, s20, 8 12312; GFX12-NEXT: s_bfe_i32 s16, s16, 0x80000 12313; GFX12-NEXT: s_lshr_b32 s23, s23, 8 12314; GFX12-NEXT: v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v1, s10 12315; GFX12-NEXT: s_pack_ll_b32_b16 s15, s15, s24 12316; GFX12-NEXT: s_pack_ll_b32_b16 s3, s25, s3 12317; GFX12-NEXT: s_pack_ll_b32_b16 s14, s14, s26 12318; GFX12-NEXT: s_pack_ll_b32_b16 s2, s27, s2 12319; GFX12-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v3, s11 12320; GFX12-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v5, s5 12321; GFX12-NEXT: s_pack_ll_b32_b16 s17, s17, s18 12322; GFX12-NEXT: s_pack_ll_b32_b16 s18, s19, s20 12323; GFX12-NEXT: s_pack_ll_b32_b16 s16, s16, s21 12324; GFX12-NEXT: s_pack_ll_b32_b16 s19, s22, s23 12325; GFX12-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v7, s0 12326; GFX12-NEXT: v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v9, s14 12327; GFX12-NEXT: v_dual_mov_b32 v8, s2 :: v_dual_mov_b32 v11, s15 12328; GFX12-NEXT: v_dual_mov_b32 v10, s3 :: v_dual_mov_b32 v13, s16 12329; GFX12-NEXT: v_dual_mov_b32 v12, s19 :: v_dual_mov_b32 v15, s17 12330; GFX12-NEXT: v_mov_b32_e32 v14, s18 12331; GFX12-NEXT: s_clause 0x3 12332; GFX12-NEXT: global_store_b128 v16, v[0:3], s[8:9] offset:48 12333; GFX12-NEXT: global_store_b128 v16, v[4:7], s[8:9] offset:32 12334; GFX12-NEXT: global_store_b128 v16, v[8:11], s[8:9] offset:16 12335; GFX12-NEXT: global_store_b128 v16, v[12:15], s[8:9] 12336; GFX12-NEXT: s_endpgm 12337 %load = load <32 x i8>, ptr addrspace(4) %in 12338 %ext = sext <32 x i8> %load to <32 x i16> 12339 store <32 x i16> %ext, ptr addrspace(1) %out 12340 ret void 12341} 12342 12343; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i16: 12344; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 12345; %load = load <64 x i8>, ptr addrspace(4) %in 12346; %ext = zext <64 x i8> %load to <64 x i16> 12347; store <64 x i16> %ext, ptr addrspace(1) %out 12348; ret void 12349; } 12350 12351; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i16: 12352; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 12353; %load = load <64 x i8>, ptr addrspace(4) %in 12354; %ext = sext <64 x i8> %load to <64 x i16> 12355; store <64 x i16> %ext, ptr addrspace(1) %out 12356; ret void 12357; } 12358 12359attributes #0 = { nounwind } 12360