1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-NOHSA-SI %s 3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-HSA %s 4; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-NOHSA-VI %s 5; RUN: llc -mtriple=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck --check-prefix=EG %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s 7 8define amdgpu_kernel void @constant_load_i16(ptr addrspace(1) %out, ptr addrspace(4) %in) { 9; GCN-NOHSA-SI-LABEL: constant_load_i16: 10; GCN-NOHSA-SI: ; %bb.0: ; %entry 11; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 12; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 13; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 14; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 15; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 16; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 17; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 18; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 19; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 20; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 21; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 22; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 23; GCN-NOHSA-SI-NEXT: buffer_store_short v0, off, s[4:7], 0 24; GCN-NOHSA-SI-NEXT: s_endpgm 25; 26; GCN-HSA-LABEL: constant_load_i16: 27; GCN-HSA: ; %bb.0: ; %entry 28; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 29; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 30; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 31; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 32; GCN-HSA-NEXT: flat_load_ushort v2, v[0:1] 33; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 34; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 35; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 36; GCN-HSA-NEXT: flat_store_short v[0:1], v2 37; GCN-HSA-NEXT: s_endpgm 38; 39; GCN-NOHSA-VI-LABEL: constant_load_i16: 40; GCN-NOHSA-VI: ; %bb.0: ; %entry 41; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 42; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 43; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 44; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3 45; GCN-NOHSA-VI-NEXT: flat_load_ushort v2, v[0:1] 46; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 47; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 48; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 49; GCN-NOHSA-VI-NEXT: flat_store_short v[0:1], v2 50; GCN-NOHSA-VI-NEXT: s_endpgm 51; 52; EG-LABEL: constant_load_i16: 53; EG: ; %bb.0: ; %entry 54; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 55; EG-NEXT: TEX 0 @6 56; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 57; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 58; EG-NEXT: CF_END 59; EG-NEXT: PAD 60; EG-NEXT: Fetch clause starting at 6: 61; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 62; EG-NEXT: ALU clause starting at 8: 63; EG-NEXT: MOV * T0.X, KC0[2].Z, 64; EG-NEXT: ALU clause starting at 9: 65; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 66; EG-NEXT: AND_INT * T1.W, T0.X, literal.y, 67; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41) 68; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 69; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 70; EG-NEXT: LSHL T0.X, T1.W, PV.W, 71; EG-NEXT: LSHL * T0.W, literal.x, PV.W, 72; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 73; EG-NEXT: MOV T0.Y, 0.0, 74; EG-NEXT: MOV * T0.Z, 0.0, 75; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 76; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 77; 78; GFX12-LABEL: constant_load_i16: 79; GFX12: ; %bb.0: ; %entry 80; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 81; GFX12-NEXT: v_mov_b32_e32 v0, 0 82; GFX12-NEXT: s_wait_kmcnt 0x0 83; GFX12-NEXT: global_load_u16 v1, v0, s[2:3] 84; GFX12-NEXT: s_wait_loadcnt 0x0 85; GFX12-NEXT: global_store_b16 v0, v1, s[0:1] 86; GFX12-NEXT: s_endpgm 87entry: 88 %ld = load i16, ptr addrspace(4) %in 89 store i16 %ld, ptr addrspace(1) %out 90 ret void 91} 92 93define amdgpu_kernel void @constant_load_v2i16(ptr addrspace(1) %out, ptr addrspace(4) %in) { 94; GCN-NOHSA-SI-LABEL: constant_load_v2i16: 95; GCN-NOHSA-SI: ; %bb.0: ; %entry 96; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 97; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 98; GCN-NOHSA-SI-NEXT: s_load_dword s4, s[2:3], 0x0 99; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 100; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 101; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 102; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 103; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 104; GCN-NOHSA-SI-NEXT: s_endpgm 105; 106; GCN-HSA-LABEL: constant_load_v2i16: 107; GCN-HSA: ; %bb.0: ; %entry 108; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 109; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 110; GCN-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 111; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 112; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 113; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 114; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 115; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 116; GCN-HSA-NEXT: s_endpgm 117; 118; GCN-NOHSA-VI-LABEL: constant_load_v2i16: 119; GCN-NOHSA-VI: ; %bb.0: ; %entry 120; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 121; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 122; GCN-NOHSA-VI-NEXT: s_load_dword s2, s[2:3], 0x0 123; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 124; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 125; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 126; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s2 127; GCN-NOHSA-VI-NEXT: flat_store_dword v[0:1], v2 128; GCN-NOHSA-VI-NEXT: s_endpgm 129; 130; EG-LABEL: constant_load_v2i16: 131; EG: ; %bb.0: ; %entry 132; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 133; EG-NEXT: TEX 0 @6 134; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 135; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 136; EG-NEXT: CF_END 137; EG-NEXT: PAD 138; EG-NEXT: Fetch clause starting at 6: 139; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 140; EG-NEXT: ALU clause starting at 8: 141; EG-NEXT: MOV * T0.X, KC0[2].Z, 142; EG-NEXT: ALU clause starting at 9: 143; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 144; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 145; 146; GFX12-LABEL: constant_load_v2i16: 147; GFX12: ; %bb.0: ; %entry 148; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 149; GFX12-NEXT: s_wait_kmcnt 0x0 150; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0 151; GFX12-NEXT: s_wait_kmcnt 0x0 152; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 153; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] 154; GFX12-NEXT: s_endpgm 155entry: 156 %ld = load <2 x i16>, ptr addrspace(4) %in 157 store <2 x i16> %ld, ptr addrspace(1) %out 158 ret void 159} 160 161define amdgpu_kernel void @constant_load_v3i16(ptr addrspace(1) %out, ptr addrspace(4) %in) { 162; GCN-NOHSA-SI-LABEL: constant_load_v3i16: 163; GCN-NOHSA-SI: ; %bb.0: ; %entry 164; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 165; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 166; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 167; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 168; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 169; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 170; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 171; GCN-NOHSA-SI-NEXT: buffer_store_short v0, off, s[0:3], 0 offset:4 172; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 173; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 174; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 175; GCN-NOHSA-SI-NEXT: s_endpgm 176; 177; GCN-HSA-LABEL: constant_load_v3i16: 178; GCN-HSA: ; %bb.0: ; %entry 179; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 180; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 181; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 182; GCN-HSA-NEXT: s_add_u32 s4, s0, 4 183; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 184; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4 185; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 186; GCN-HSA-NEXT: v_mov_b32_e32 v3, s5 187; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 188; GCN-HSA-NEXT: v_mov_b32_e32 v4, s3 189; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 190; GCN-HSA-NEXT: v_mov_b32_e32 v5, s2 191; GCN-HSA-NEXT: flat_store_short v[2:3], v4 192; GCN-HSA-NEXT: flat_store_dword v[0:1], v5 193; GCN-HSA-NEXT: s_endpgm 194; 195; GCN-NOHSA-VI-LABEL: constant_load_v3i16: 196; GCN-NOHSA-VI: ; %bb.0: ; %entry 197; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 198; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 199; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 200; GCN-NOHSA-VI-NEXT: s_add_u32 s4, s0, 4 201; GCN-NOHSA-VI-NEXT: s_addc_u32 s5, s1, 0 202; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 203; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 204; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5 205; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 206; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s3 207; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 208; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s2 209; GCN-NOHSA-VI-NEXT: flat_store_short v[2:3], v4 210; GCN-NOHSA-VI-NEXT: flat_store_dword v[0:1], v5 211; GCN-NOHSA-VI-NEXT: s_endpgm 212; 213; EG-LABEL: constant_load_v3i16: 214; EG: ; %bb.0: ; %entry 215; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 216; EG-NEXT: TEX 2 @6 217; EG-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[] 218; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0 219; EG-NEXT: MEM_RAT MSKOR T5.XW, T8.X 220; EG-NEXT: CF_END 221; EG-NEXT: Fetch clause starting at 6: 222; EG-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1 223; EG-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1 224; EG-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1 225; EG-NEXT: ALU clause starting at 12: 226; EG-NEXT: MOV * T5.X, KC0[2].Z, 227; EG-NEXT: ALU clause starting at 13: 228; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 229; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) 230; EG-NEXT: AND_INT T1.W, PV.W, literal.x, 231; EG-NEXT: AND_INT * T2.W, T5.X, literal.y, 232; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41) 233; EG-NEXT: LSHL * T1.W, PV.W, literal.x, 234; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 235; EG-NEXT: LSHL T5.X, T2.W, PV.W, 236; EG-NEXT: LSHL * T5.W, literal.x, PV.W, 237; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 238; EG-NEXT: MOV T5.Y, 0.0, 239; EG-NEXT: MOV * T5.Z, 0.0, 240; EG-NEXT: LSHR T8.X, T0.W, literal.x, 241; EG-NEXT: LSHL T0.W, T7.X, literal.y, 242; EG-NEXT: AND_INT * T1.W, T6.X, literal.z, 243; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 244; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 245; EG-NEXT: OR_INT T6.X, PV.W, PS, 246; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x, 247; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 248; 249; GFX12-LABEL: constant_load_v3i16: 250; GFX12: ; %bb.0: ; %entry 251; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 252; GFX12-NEXT: s_wait_kmcnt 0x0 253; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 254; GFX12-NEXT: s_wait_kmcnt 0x0 255; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s3 256; GFX12-NEXT: v_mov_b32_e32 v2, s2 257; GFX12-NEXT: s_clause 0x1 258; GFX12-NEXT: global_store_b16 v0, v1, s[0:1] offset:4 259; GFX12-NEXT: global_store_b32 v0, v2, s[0:1] 260; GFX12-NEXT: s_endpgm 261entry: 262 %ld = load <3 x i16>, ptr addrspace(4) %in 263 store <3 x i16> %ld, ptr addrspace(1) %out 264 ret void 265} 266 267define amdgpu_kernel void @constant_load_v4i16(ptr addrspace(1) %out, ptr addrspace(4) %in) { 268; GCN-NOHSA-SI-LABEL: constant_load_v4i16: 269; GCN-NOHSA-SI: ; %bb.0: ; %entry 270; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 271; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 272; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 273; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 274; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 275; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 276; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 277; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s5 278; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 279; GCN-NOHSA-SI-NEXT: s_endpgm 280; 281; GCN-HSA-LABEL: constant_load_v4i16: 282; GCN-HSA: ; %bb.0: ; %entry 283; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 284; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 285; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 286; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 287; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 288; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 289; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 290; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 291; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 292; GCN-HSA-NEXT: s_endpgm 293; 294; GCN-NOHSA-VI-LABEL: constant_load_v4i16: 295; GCN-NOHSA-VI: ; %bb.0: ; %entry 296; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 297; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 298; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 299; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 300; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 301; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 302; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s2 303; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s3 304; GCN-NOHSA-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 305; GCN-NOHSA-VI-NEXT: s_endpgm 306; 307; EG-LABEL: constant_load_v4i16: 308; EG: ; %bb.0: ; %entry 309; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 310; EG-NEXT: TEX 0 @6 311; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 312; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 313; EG-NEXT: CF_END 314; EG-NEXT: PAD 315; EG-NEXT: Fetch clause starting at 6: 316; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 317; EG-NEXT: ALU clause starting at 8: 318; EG-NEXT: MOV * T0.X, KC0[2].Z, 319; EG-NEXT: ALU clause starting at 9: 320; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 321; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 322; 323; GFX12-LABEL: constant_load_v4i16: 324; GFX12: ; %bb.0: ; %entry 325; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 326; GFX12-NEXT: s_wait_kmcnt 0x0 327; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 328; GFX12-NEXT: v_mov_b32_e32 v2, 0 329; GFX12-NEXT: s_wait_kmcnt 0x0 330; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 331; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] 332; GFX12-NEXT: s_endpgm 333entry: 334 %ld = load <4 x i16>, ptr addrspace(4) %in 335 store <4 x i16> %ld, ptr addrspace(1) %out 336 ret void 337} 338 339define amdgpu_kernel void @constant_load_v8i16(ptr addrspace(1) %out, ptr addrspace(4) %in) { 340; GCN-NOHSA-SI-LABEL: constant_load_v8i16: 341; GCN-NOHSA-SI: ; %bb.0: ; %entry 342; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 343; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 344; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 345; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 346; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 347; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 348; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 349; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s5 350; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6 351; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7 352; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 353; GCN-NOHSA-SI-NEXT: s_endpgm 354; 355; GCN-HSA-LABEL: constant_load_v8i16: 356; GCN-HSA: ; %bb.0: ; %entry 357; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 358; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 359; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 360; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 361; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 362; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 363; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 364; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 365; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 366; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 367; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 368; GCN-HSA-NEXT: s_endpgm 369; 370; GCN-NOHSA-VI-LABEL: constant_load_v8i16: 371; GCN-NOHSA-VI: ; %bb.0: ; %entry 372; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 373; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 374; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 375; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 376; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 377; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 378; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 379; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s5 380; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 381; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7 382; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 383; GCN-NOHSA-VI-NEXT: s_endpgm 384; 385; EG-LABEL: constant_load_v8i16: 386; EG: ; %bb.0: ; %entry 387; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 388; EG-NEXT: TEX 0 @6 389; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 390; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 391; EG-NEXT: CF_END 392; EG-NEXT: PAD 393; EG-NEXT: Fetch clause starting at 6: 394; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 395; EG-NEXT: ALU clause starting at 8: 396; EG-NEXT: MOV * T0.X, KC0[2].Z, 397; EG-NEXT: ALU clause starting at 9: 398; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 399; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 400; 401; GFX12-LABEL: constant_load_v8i16: 402; GFX12: ; %bb.0: ; %entry 403; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 404; GFX12-NEXT: s_wait_kmcnt 0x0 405; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 406; GFX12-NEXT: v_mov_b32_e32 v4, 0 407; GFX12-NEXT: s_wait_kmcnt 0x0 408; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v3, s7 409; GFX12-NEXT: v_dual_mov_b32 v1, s5 :: v_dual_mov_b32 v2, s6 410; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1] 411; GFX12-NEXT: s_endpgm 412entry: 413 %ld = load <8 x i16>, ptr addrspace(4) %in 414 store <8 x i16> %ld, ptr addrspace(1) %out 415 ret void 416} 417 418define amdgpu_kernel void @constant_load_v16i16(ptr addrspace(1) %out, ptr addrspace(4) %in) { 419; GCN-NOHSA-SI-LABEL: constant_load_v16i16: 420; GCN-NOHSA-SI: ; %bb.0: ; %entry 421; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 422; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 423; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 424; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, 0xf000 425; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, -1 426; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 427; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 428; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s5 429; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6 430; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7 431; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16 432; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 433; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s0 434; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s1 435; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s2 436; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s3 437; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 438; GCN-NOHSA-SI-NEXT: s_endpgm 439; 440; GCN-HSA-LABEL: constant_load_v16i16: 441; GCN-HSA: ; %bb.0: ; %entry 442; GCN-HSA-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x0 443; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 444; GCN-HSA-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 445; GCN-HSA-NEXT: s_add_u32 s10, s8, 16 446; GCN-HSA-NEXT: s_addc_u32 s11, s9, 0 447; GCN-HSA-NEXT: v_mov_b32_e32 v6, s10 448; GCN-HSA-NEXT: v_mov_b32_e32 v7, s11 449; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 450; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 451; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 452; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 453; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 454; GCN-HSA-NEXT: flat_store_dwordx4 v[6:7], v[0:3] 455; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 456; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 457; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 458; GCN-HSA-NEXT: v_mov_b32_e32 v6, s2 459; GCN-HSA-NEXT: v_mov_b32_e32 v7, s3 460; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 461; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 462; GCN-HSA-NEXT: s_endpgm 463; 464; GCN-NOHSA-VI-LABEL: constant_load_v16i16: 465; GCN-NOHSA-VI: ; %bb.0: ; %entry 466; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 467; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 468; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 469; GCN-NOHSA-VI-NEXT: s_add_u32 s10, s8, 16 470; GCN-NOHSA-VI-NEXT: s_addc_u32 s11, s9, 0 471; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, s10 472; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, s11 473; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 474; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 475; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s5 476; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 477; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7 478; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[6:7], v[0:3] 479; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 480; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 481; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 482; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, s2 483; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, s3 484; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9 485; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 486; GCN-NOHSA-VI-NEXT: s_endpgm 487; 488; EG-LABEL: constant_load_v16i16: 489; EG: ; %bb.0: ; %entry 490; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 491; EG-NEXT: TEX 0 @8 492; EG-NEXT: ALU 3, @13, KC0[CB0:0-32], KC1[] 493; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 494; EG-NEXT: TEX 0 @10 495; EG-NEXT: ALU 1, @17, KC0[CB0:0-32], KC1[] 496; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 497; EG-NEXT: CF_END 498; EG-NEXT: Fetch clause starting at 8: 499; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 500; EG-NEXT: Fetch clause starting at 10: 501; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 502; EG-NEXT: ALU clause starting at 12: 503; EG-NEXT: MOV * T0.X, KC0[2].Z, 504; EG-NEXT: ALU clause starting at 13: 505; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 506; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 507; EG-NEXT: LSHR * T2.X, PV.W, literal.x, 508; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 509; EG-NEXT: ALU clause starting at 17: 510; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 511; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 512; 513; GFX12-LABEL: constant_load_v16i16: 514; GFX12: ; %bb.0: ; %entry 515; GFX12-NEXT: s_load_b128 s[8:11], s[4:5], 0x24 516; GFX12-NEXT: s_wait_kmcnt 0x0 517; GFX12-NEXT: s_load_b256 s[0:7], s[10:11], 0x0 518; GFX12-NEXT: s_wait_kmcnt 0x0 519; GFX12-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s5 520; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v3, s7 521; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v5, s1 522; GFX12-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v7, s3 523; GFX12-NEXT: v_mov_b32_e32 v6, s2 524; GFX12-NEXT: s_clause 0x1 525; GFX12-NEXT: global_store_b128 v8, v[0:3], s[8:9] offset:16 526; GFX12-NEXT: global_store_b128 v8, v[4:7], s[8:9] 527; GFX12-NEXT: s_endpgm 528entry: 529 %ld = load <16 x i16>, ptr addrspace(4) %in 530 store <16 x i16> %ld, ptr addrspace(1) %out 531 ret void 532} 533 534define amdgpu_kernel void @constant_load_v16i16_align2(ptr addrspace(4) %ptr0) #0 { 535; GCN-NOHSA-SI-LABEL: constant_load_v16i16_align2: 536; GCN-NOHSA-SI: ; %bb.0: ; %entry 537; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 538; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 539; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 540; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 541; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 542; GCN-NOHSA-SI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:2 543; GCN-NOHSA-SI-NEXT: buffer_load_ushort v4, off, s[0:3], 0 offset:4 544; GCN-NOHSA-SI-NEXT: buffer_load_ushort v2, off, s[0:3], 0 offset:6 545; GCN-NOHSA-SI-NEXT: buffer_load_ushort v5, off, s[0:3], 0 offset:8 546; GCN-NOHSA-SI-NEXT: buffer_load_ushort v3, off, s[0:3], 0 offset:10 547; GCN-NOHSA-SI-NEXT: buffer_load_ushort v6, off, s[0:3], 0 offset:12 548; GCN-NOHSA-SI-NEXT: buffer_load_ushort v7, off, s[0:3], 0 offset:14 549; GCN-NOHSA-SI-NEXT: buffer_load_ushort v8, off, s[0:3], 0 offset:16 550; GCN-NOHSA-SI-NEXT: buffer_load_ushort v9, off, s[0:3], 0 offset:18 551; GCN-NOHSA-SI-NEXT: buffer_load_ushort v10, off, s[0:3], 0 offset:20 552; GCN-NOHSA-SI-NEXT: buffer_load_ushort v11, off, s[0:3], 0 offset:22 553; GCN-NOHSA-SI-NEXT: buffer_load_ushort v12, off, s[0:3], 0 offset:24 554; GCN-NOHSA-SI-NEXT: buffer_load_ushort v13, off, s[0:3], 0 offset:26 555; GCN-NOHSA-SI-NEXT: buffer_load_ushort v14, off, s[0:3], 0 offset:28 556; GCN-NOHSA-SI-NEXT: buffer_load_ushort v15, off, s[0:3], 0 offset:30 557; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(8) 558; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v7, 16, v7 559; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v16, 16, v3 560; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v17, 16, v2 561; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v18, 16, v1 562; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 563; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v15, 16, v15 564; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v13, 16, v13 565; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v11, 16, v11 566; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v9, 16, v9 567; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v3, v7, v6 568; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v2, v16, v5 569; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v1, v17, v4 570; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v0, v18, v0 571; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v7, v15, v14 572; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v6, v13, v12 573; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v5, v11, v10 574; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v4, v9, v8 575; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 576; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 577; GCN-NOHSA-SI-NEXT: s_endpgm 578; 579; GCN-HSA-LABEL: constant_load_v16i16_align2: 580; GCN-HSA: ; %bb.0: ; %entry 581; GCN-HSA-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 582; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 583; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 584; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 585; GCN-HSA-NEXT: s_add_u32 s0, s0, 16 586; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 587; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 588; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 589; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 590; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 591; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 592; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[0:3] 593; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 594; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 595; GCN-HSA-NEXT: s_endpgm 596; 597; GCN-NOHSA-VI-LABEL: constant_load_v16i16_align2: 598; GCN-NOHSA-VI: ; %bb.0: ; %entry 599; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 600; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 601; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 14 602; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 603; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 604; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3 605; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 12 606; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 607; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s2 608; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s3 609; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 10 610; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 611; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 612; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 613; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 8 614; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 615; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, s3 616; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, s2 617; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 6 618; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 619; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v9, s3 620; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v8, s2 621; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 4 622; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 623; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v11, s3 624; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v10, s2 625; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 30 626; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 627; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v13, s3 628; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v12, s2 629; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 28 630; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 631; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v15, s3 632; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v14, s2 633; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 26 634; GCN-NOHSA-VI-NEXT: flat_load_ushort v16, v[0:1] 635; GCN-NOHSA-VI-NEXT: flat_load_ushort v17, v[2:3] 636; GCN-NOHSA-VI-NEXT: flat_load_ushort v18, v[4:5] 637; GCN-NOHSA-VI-NEXT: flat_load_ushort v19, v[6:7] 638; GCN-NOHSA-VI-NEXT: flat_load_ushort v20, v[8:9] 639; GCN-NOHSA-VI-NEXT: flat_load_ushort v21, v[10:11] 640; GCN-NOHSA-VI-NEXT: flat_load_ushort v12, v[12:13] 641; GCN-NOHSA-VI-NEXT: flat_load_ushort v13, v[14:15] 642; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 643; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 644; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3 645; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 24 646; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 647; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s2 648; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s3 649; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 22 650; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 651; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 652; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 653; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 20 654; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 655; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, s3 656; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, s2 657; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 18 658; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 659; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v9, s3 660; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v8, s2 661; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 16 662; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 663; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v11, s3 664; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v10, s2 665; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 2 666; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 667; GCN-NOHSA-VI-NEXT: flat_load_ushort v14, v[0:1] 668; GCN-NOHSA-VI-NEXT: flat_load_ushort v15, v[2:3] 669; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 670; GCN-NOHSA-VI-NEXT: flat_load_ushort v4, v[4:5] 671; GCN-NOHSA-VI-NEXT: flat_load_ushort v5, v[6:7] 672; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s1 673; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3 674; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s0 675; GCN-NOHSA-VI-NEXT: flat_load_ushort v8, v[8:9] 676; GCN-NOHSA-VI-NEXT: flat_load_ushort v9, v[10:11] 677; GCN-NOHSA-VI-NEXT: flat_load_ushort v0, v[0:1] 678; GCN-NOHSA-VI-NEXT: flat_load_ushort v10, v[2:3] 679; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(14) 680; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v1, 16, v16 681; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v3, v17, v1 682; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(13) 683; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v1, 16, v18 684; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(12) 685; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v2, v19, v1 686; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(11) 687; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v1, 16, v20 688; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(10) 689; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v1, v21, v1 690; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(9) 691; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v6, 16, v12 692; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(8) 693; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v7, v13, v6 694; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(7) 695; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v6, 16, v14 696; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(6) 697; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v6, v15, v6 698; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(5) 699; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v4, 16, v4 700; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(4) 701; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v5, v5, v4 702; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 703; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v4, 16, v8 704; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(2) 705; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v4, v9, v4 706; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 707; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 708; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 709; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v0, v10, v0 710; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 711; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[0:1], v[0:3] 712; GCN-NOHSA-VI-NEXT: s_endpgm 713; 714; EG-LABEL: constant_load_v16i16_align2: 715; EG: ; %bb.0: ; %entry 716; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 717; EG-NEXT: TEX 1 @6 718; EG-NEXT: ALU 1, @11, KC0[], KC1[] 719; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 720; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1 721; EG-NEXT: CF_END 722; EG-NEXT: Fetch clause starting at 6: 723; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 724; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 725; EG-NEXT: ALU clause starting at 10: 726; EG-NEXT: MOV * T0.X, KC0[2].Y, 727; EG-NEXT: ALU clause starting at 11: 728; EG-NEXT: MOV * T2.X, literal.x, 729; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00) 730; 731; GFX12-LABEL: constant_load_v16i16_align2: 732; GFX12: ; %bb.0: ; %entry 733; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 734; GFX12-NEXT: v_mov_b32_e32 v8, 0 735; GFX12-NEXT: s_wait_kmcnt 0x0 736; GFX12-NEXT: s_clause 0x7 737; GFX12-NEXT: global_load_u16 v3, v8, s[0:1] offset:28 738; GFX12-NEXT: global_load_u16 v2, v8, s[0:1] offset:24 739; GFX12-NEXT: global_load_u16 v1, v8, s[0:1] offset:20 740; GFX12-NEXT: global_load_u16 v0, v8, s[0:1] offset:16 741; GFX12-NEXT: global_load_u16 v7, v8, s[0:1] offset:12 742; GFX12-NEXT: global_load_u16 v6, v8, s[0:1] offset:8 743; GFX12-NEXT: global_load_u16 v5, v8, s[0:1] offset:4 744; GFX12-NEXT: global_load_u16 v4, v8, s[0:1] 745; GFX12-NEXT: s_wait_loadcnt 0x7 746; GFX12-NEXT: global_load_d16_hi_b16 v3, v8, s[0:1] offset:30 747; GFX12-NEXT: s_wait_loadcnt 0x7 748; GFX12-NEXT: global_load_d16_hi_b16 v2, v8, s[0:1] offset:26 749; GFX12-NEXT: s_wait_loadcnt 0x7 750; GFX12-NEXT: global_load_d16_hi_b16 v1, v8, s[0:1] offset:22 751; GFX12-NEXT: s_wait_loadcnt 0x7 752; GFX12-NEXT: global_load_d16_hi_b16 v0, v8, s[0:1] offset:18 753; GFX12-NEXT: s_wait_loadcnt 0x7 754; GFX12-NEXT: global_load_d16_hi_b16 v7, v8, s[0:1] offset:14 755; GFX12-NEXT: s_wait_loadcnt 0x7 756; GFX12-NEXT: global_load_d16_hi_b16 v6, v8, s[0:1] offset:10 757; GFX12-NEXT: s_wait_loadcnt 0x7 758; GFX12-NEXT: global_load_d16_hi_b16 v5, v8, s[0:1] offset:6 759; GFX12-NEXT: s_wait_loadcnt 0x7 760; GFX12-NEXT: global_load_d16_hi_b16 v4, v8, s[0:1] offset:2 761; GFX12-NEXT: s_wait_loadcnt 0x4 762; GFX12-NEXT: global_store_b128 v[0:1], v[0:3], off 763; GFX12-NEXT: s_wait_loadcnt 0x0 764; GFX12-NEXT: global_store_b128 v[0:1], v[4:7], off 765; GFX12-NEXT: s_endpgm 766entry: 767 %ld = load <16 x i16>, ptr addrspace(4) %ptr0, align 2 768 store <16 x i16> %ld, ptr addrspace(1) undef, align 32 769 ret void 770} 771 772define amdgpu_kernel void @constant_zextload_i16_to_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 773; GCN-NOHSA-SI-LABEL: constant_zextload_i16_to_i32: 774; GCN-NOHSA-SI: ; %bb.0: 775; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 776; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 777; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 778; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 779; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 780; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 781; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 782; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 783; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 784; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 785; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 786; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 787; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 788; GCN-NOHSA-SI-NEXT: s_endpgm 789; 790; GCN-HSA-LABEL: constant_zextload_i16_to_i32: 791; GCN-HSA: ; %bb.0: 792; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 793; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 794; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 795; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 796; GCN-HSA-NEXT: flat_load_ushort v2, v[0:1] 797; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 798; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 799; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 800; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 801; GCN-HSA-NEXT: s_endpgm 802; 803; GCN-NOHSA-VI-LABEL: constant_zextload_i16_to_i32: 804; GCN-NOHSA-VI: ; %bb.0: 805; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 806; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 807; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 808; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3 809; GCN-NOHSA-VI-NEXT: flat_load_ushort v2, v[0:1] 810; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 811; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 812; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 813; GCN-NOHSA-VI-NEXT: flat_store_dword v[0:1], v2 814; GCN-NOHSA-VI-NEXT: s_endpgm 815; 816; EG-LABEL: constant_zextload_i16_to_i32: 817; EG: ; %bb.0: 818; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 819; EG-NEXT: TEX 0 @6 820; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 821; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 822; EG-NEXT: CF_END 823; EG-NEXT: PAD 824; EG-NEXT: Fetch clause starting at 6: 825; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 826; EG-NEXT: ALU clause starting at 8: 827; EG-NEXT: MOV * T0.X, KC0[2].Z, 828; EG-NEXT: ALU clause starting at 9: 829; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 830; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 831; 832; GFX12-LABEL: constant_zextload_i16_to_i32: 833; GFX12: ; %bb.0: 834; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 835; GFX12-NEXT: s_wait_kmcnt 0x0 836; GFX12-NEXT: s_load_u16 s2, s[2:3], 0x0 837; GFX12-NEXT: s_wait_kmcnt 0x0 838; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 839; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] 840; GFX12-NEXT: s_endpgm 841 %a = load i16, ptr addrspace(4) %in 842 %ext = zext i16 %a to i32 843 store i32 %ext, ptr addrspace(1) %out 844 ret void 845} 846 847define amdgpu_kernel void @constant_sextload_i16_to_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 848; GCN-NOHSA-SI-LABEL: constant_sextload_i16_to_i32: 849; GCN-NOHSA-SI: ; %bb.0: 850; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 851; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 852; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 853; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 854; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 855; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 856; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 857; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 858; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 859; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 860; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 861; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 862; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 863; GCN-NOHSA-SI-NEXT: s_endpgm 864; 865; GCN-HSA-LABEL: constant_sextload_i16_to_i32: 866; GCN-HSA: ; %bb.0: 867; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 868; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 869; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 870; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 871; GCN-HSA-NEXT: flat_load_sshort v2, v[0:1] 872; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 873; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 874; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 875; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 876; GCN-HSA-NEXT: s_endpgm 877; 878; GCN-NOHSA-VI-LABEL: constant_sextload_i16_to_i32: 879; GCN-NOHSA-VI: ; %bb.0: 880; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 881; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 882; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 883; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3 884; GCN-NOHSA-VI-NEXT: flat_load_sshort v2, v[0:1] 885; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 886; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 887; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 888; GCN-NOHSA-VI-NEXT: flat_store_dword v[0:1], v2 889; GCN-NOHSA-VI-NEXT: s_endpgm 890; 891; EG-LABEL: constant_sextload_i16_to_i32: 892; EG: ; %bb.0: 893; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 894; EG-NEXT: TEX 0 @6 895; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 896; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 897; EG-NEXT: CF_END 898; EG-NEXT: PAD 899; EG-NEXT: Fetch clause starting at 6: 900; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 901; EG-NEXT: ALU clause starting at 8: 902; EG-NEXT: MOV * T0.X, KC0[2].Z, 903; EG-NEXT: ALU clause starting at 9: 904; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 905; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 906; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 907; 908; GFX12-LABEL: constant_sextload_i16_to_i32: 909; GFX12: ; %bb.0: 910; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 911; GFX12-NEXT: s_wait_kmcnt 0x0 912; GFX12-NEXT: s_load_i16 s2, s[2:3], 0x0 913; GFX12-NEXT: s_wait_kmcnt 0x0 914; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 915; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] 916; GFX12-NEXT: s_endpgm 917 %a = load i16, ptr addrspace(4) %in 918 %ext = sext i16 %a to i32 919 store i32 %ext, ptr addrspace(1) %out 920 ret void 921} 922 923define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 924; GCN-NOHSA-SI-LABEL: constant_zextload_v1i16_to_v1i32: 925; GCN-NOHSA-SI: ; %bb.0: 926; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 927; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 928; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 929; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 930; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 931; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 932; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 933; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 934; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 935; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 936; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 937; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 938; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 939; GCN-NOHSA-SI-NEXT: s_endpgm 940; 941; GCN-HSA-LABEL: constant_zextload_v1i16_to_v1i32: 942; GCN-HSA: ; %bb.0: 943; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 944; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 945; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 946; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 947; GCN-HSA-NEXT: flat_load_ushort v2, v[0:1] 948; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 949; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 950; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 951; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 952; GCN-HSA-NEXT: s_endpgm 953; 954; GCN-NOHSA-VI-LABEL: constant_zextload_v1i16_to_v1i32: 955; GCN-NOHSA-VI: ; %bb.0: 956; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 957; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 958; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 959; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3 960; GCN-NOHSA-VI-NEXT: flat_load_ushort v2, v[0:1] 961; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 962; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 963; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 964; GCN-NOHSA-VI-NEXT: flat_store_dword v[0:1], v2 965; GCN-NOHSA-VI-NEXT: s_endpgm 966; 967; EG-LABEL: constant_zextload_v1i16_to_v1i32: 968; EG: ; %bb.0: 969; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 970; EG-NEXT: TEX 0 @6 971; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 972; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 973; EG-NEXT: CF_END 974; EG-NEXT: PAD 975; EG-NEXT: Fetch clause starting at 6: 976; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 977; EG-NEXT: ALU clause starting at 8: 978; EG-NEXT: MOV * T0.X, KC0[2].Z, 979; EG-NEXT: ALU clause starting at 9: 980; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 981; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 982; 983; GFX12-LABEL: constant_zextload_v1i16_to_v1i32: 984; GFX12: ; %bb.0: 985; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 986; GFX12-NEXT: s_wait_kmcnt 0x0 987; GFX12-NEXT: s_load_u16 s2, s[2:3], 0x0 988; GFX12-NEXT: s_wait_kmcnt 0x0 989; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 990; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] 991; GFX12-NEXT: s_endpgm 992 %load = load <1 x i16>, ptr addrspace(4) %in 993 %ext = zext <1 x i16> %load to <1 x i32> 994 store <1 x i32> %ext, ptr addrspace(1) %out 995 ret void 996} 997 998define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 999; GCN-NOHSA-SI-LABEL: constant_sextload_v1i16_to_v1i32: 1000; GCN-NOHSA-SI: ; %bb.0: 1001; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1002; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1003; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1004; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1005; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1006; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1007; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1008; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1009; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 1010; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1011; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1012; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1013; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 1014; GCN-NOHSA-SI-NEXT: s_endpgm 1015; 1016; GCN-HSA-LABEL: constant_sextload_v1i16_to_v1i32: 1017; GCN-HSA: ; %bb.0: 1018; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1019; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1020; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1021; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1022; GCN-HSA-NEXT: flat_load_sshort v2, v[0:1] 1023; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 1024; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 1025; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1026; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 1027; GCN-HSA-NEXT: s_endpgm 1028; 1029; GCN-NOHSA-VI-LABEL: constant_sextload_v1i16_to_v1i32: 1030; GCN-NOHSA-VI: ; %bb.0: 1031; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1032; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1033; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 1034; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3 1035; GCN-NOHSA-VI-NEXT: flat_load_sshort v2, v[0:1] 1036; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 1037; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 1038; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1039; GCN-NOHSA-VI-NEXT: flat_store_dword v[0:1], v2 1040; GCN-NOHSA-VI-NEXT: s_endpgm 1041; 1042; EG-LABEL: constant_sextload_v1i16_to_v1i32: 1043; EG: ; %bb.0: 1044; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1045; EG-NEXT: TEX 0 @6 1046; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 1047; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 1048; EG-NEXT: CF_END 1049; EG-NEXT: PAD 1050; EG-NEXT: Fetch clause starting at 6: 1051; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 1052; EG-NEXT: ALU clause starting at 8: 1053; EG-NEXT: MOV * T0.X, KC0[2].Z, 1054; EG-NEXT: ALU clause starting at 9: 1055; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 1056; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 1057; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 1058; 1059; GFX12-LABEL: constant_sextload_v1i16_to_v1i32: 1060; GFX12: ; %bb.0: 1061; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1062; GFX12-NEXT: s_wait_kmcnt 0x0 1063; GFX12-NEXT: s_load_i16 s2, s[2:3], 0x0 1064; GFX12-NEXT: s_wait_kmcnt 0x0 1065; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 1066; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] 1067; GFX12-NEXT: s_endpgm 1068 %load = load <1 x i16>, ptr addrspace(4) %in 1069 %ext = sext <1 x i16> %load to <1 x i32> 1070 store <1 x i32> %ext, ptr addrspace(1) %out 1071 ret void 1072} 1073 1074define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 1075; GCN-NOHSA-SI-LABEL: constant_zextload_v2i16_to_v2i32: 1076; GCN-NOHSA-SI: ; %bb.0: 1077; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1078; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1079; GCN-NOHSA-SI-NEXT: s_load_dword s2, s[2:3], 0x0 1080; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1081; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1082; GCN-NOHSA-SI-NEXT: s_lshr_b32 s4, s2, 16 1083; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s2, 0xffff 1084; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1085; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 1086; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s4 1087; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1088; GCN-NOHSA-SI-NEXT: s_endpgm 1089; 1090; GCN-HSA-LABEL: constant_zextload_v2i16_to_v2i32: 1091; GCN-HSA: ; %bb.0: 1092; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1093; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1094; GCN-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 1095; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 1096; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 1097; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1098; GCN-HSA-NEXT: s_lshr_b32 s0, s2, 16 1099; GCN-HSA-NEXT: s_and_b32 s1, s2, 0xffff 1100; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 1101; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 1102; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1103; GCN-HSA-NEXT: s_endpgm 1104; 1105; GCN-NOHSA-VI-LABEL: constant_zextload_v2i16_to_v2i32: 1106; GCN-NOHSA-VI: ; %bb.0: 1107; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1108; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1109; GCN-NOHSA-VI-NEXT: s_load_dword s2, s[2:3], 0x0 1110; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 1111; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 1112; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1113; GCN-NOHSA-VI-NEXT: s_lshr_b32 s0, s2, 16 1114; GCN-NOHSA-VI-NEXT: s_and_b32 s1, s2, 0xffff 1115; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s1 1116; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s0 1117; GCN-NOHSA-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1118; GCN-NOHSA-VI-NEXT: s_endpgm 1119; 1120; EG-LABEL: constant_zextload_v2i16_to_v2i32: 1121; EG: ; %bb.0: 1122; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1123; EG-NEXT: TEX 0 @6 1124; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 1125; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1 1126; EG-NEXT: CF_END 1127; EG-NEXT: PAD 1128; EG-NEXT: Fetch clause starting at 6: 1129; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 1130; EG-NEXT: ALU clause starting at 8: 1131; EG-NEXT: MOV * T4.X, KC0[2].Z, 1132; EG-NEXT: ALU clause starting at 9: 1133; EG-NEXT: LSHR * T4.Y, T4.X, literal.x, 1134; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1135; EG-NEXT: AND_INT T4.X, T4.X, literal.x, 1136; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 1137; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 1138; 1139; GFX12-LABEL: constant_zextload_v2i16_to_v2i32: 1140; GFX12: ; %bb.0: 1141; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1142; GFX12-NEXT: s_wait_kmcnt 0x0 1143; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0 1144; GFX12-NEXT: s_wait_kmcnt 0x0 1145; GFX12-NEXT: s_and_b32 s3, s2, 0xffff 1146; GFX12-NEXT: s_lshr_b32 s2, s2, 16 1147; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1148; GFX12-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2 1149; GFX12-NEXT: v_mov_b32_e32 v0, s3 1150; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1151; GFX12-NEXT: s_endpgm 1152 %load = load <2 x i16>, ptr addrspace(4) %in 1153 %ext = zext <2 x i16> %load to <2 x i32> 1154 store <2 x i32> %ext, ptr addrspace(1) %out 1155 ret void 1156} 1157 1158; TODO: We should use ASHR instead of LSHR + BFE 1159define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 1160; GCN-NOHSA-SI-LABEL: constant_sextload_v2i16_to_v2i32: 1161; GCN-NOHSA-SI: ; %bb.0: 1162; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1163; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1164; GCN-NOHSA-SI-NEXT: s_load_dword s2, s[2:3], 0x0 1165; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1166; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1167; GCN-NOHSA-SI-NEXT: s_ashr_i32 s4, s2, 16 1168; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s2 1169; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1170; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 1171; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s4 1172; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1173; GCN-NOHSA-SI-NEXT: s_endpgm 1174; 1175; GCN-HSA-LABEL: constant_sextload_v2i16_to_v2i32: 1176; GCN-HSA: ; %bb.0: 1177; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1178; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1179; GCN-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 1180; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 1181; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 1182; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1183; GCN-HSA-NEXT: s_ashr_i32 s0, s2, 16 1184; GCN-HSA-NEXT: s_sext_i32_i16 s1, s2 1185; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 1186; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 1187; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1188; GCN-HSA-NEXT: s_endpgm 1189; 1190; GCN-NOHSA-VI-LABEL: constant_sextload_v2i16_to_v2i32: 1191; GCN-NOHSA-VI: ; %bb.0: 1192; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1193; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1194; GCN-NOHSA-VI-NEXT: s_load_dword s2, s[2:3], 0x0 1195; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 1196; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 1197; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1198; GCN-NOHSA-VI-NEXT: s_ashr_i32 s0, s2, 16 1199; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s1, s2 1200; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s1 1201; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s0 1202; GCN-NOHSA-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1203; GCN-NOHSA-VI-NEXT: s_endpgm 1204; 1205; EG-LABEL: constant_sextload_v2i16_to_v2i32: 1206; EG: ; %bb.0: 1207; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1208; EG-NEXT: TEX 0 @6 1209; EG-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[] 1210; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1 1211; EG-NEXT: CF_END 1212; EG-NEXT: PAD 1213; EG-NEXT: Fetch clause starting at 6: 1214; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 1215; EG-NEXT: ALU clause starting at 8: 1216; EG-NEXT: MOV * T4.X, KC0[2].Z, 1217; EG-NEXT: ALU clause starting at 9: 1218; EG-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x, 1219; EG-NEXT: LSHR T0.W, T4.X, literal.x, 1220; EG-NEXT: LSHR * T4.X, KC0[2].Y, literal.y, 1221; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 1222; EG-NEXT: BFE_INT * T5.Y, PV.W, 0.0, literal.x, 1223; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1224; 1225; GFX12-LABEL: constant_sextload_v2i16_to_v2i32: 1226; GFX12: ; %bb.0: 1227; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1228; GFX12-NEXT: s_wait_kmcnt 0x0 1229; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0 1230; GFX12-NEXT: s_wait_kmcnt 0x0 1231; GFX12-NEXT: s_sext_i32_i16 s3, s2 1232; GFX12-NEXT: s_ashr_i32 s2, s2, 16 1233; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1234; GFX12-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2 1235; GFX12-NEXT: v_mov_b32_e32 v0, s3 1236; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1237; GFX12-NEXT: s_endpgm 1238 %load = load <2 x i16>, ptr addrspace(4) %in 1239 %ext = sext <2 x i16> %load to <2 x i32> 1240 store <2 x i32> %ext, ptr addrspace(1) %out 1241 ret void 1242} 1243 1244define amdgpu_kernel void @constant_zextload_v3i16_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) %in) { 1245; GCN-NOHSA-SI-LABEL: constant_zextload_v3i16_to_v3i32: 1246; GCN-NOHSA-SI: ; %bb.0: ; %entry 1247; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1248; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1249; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1250; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1251; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1252; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1253; GCN-NOHSA-SI-NEXT: s_lshr_b32 s6, s4, 16 1254; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, 0xffff 1255; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, 0xffff 1256; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 1257; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 1258; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1259; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1260; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s6 1261; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1262; GCN-NOHSA-SI-NEXT: s_endpgm 1263; 1264; GCN-HSA-LABEL: constant_zextload_v3i16_to_v3i32: 1265; GCN-HSA: ; %bb.0: ; %entry 1266; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1267; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1268; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1269; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 1270; GCN-HSA-NEXT: v_mov_b32_e32 v4, s1 1271; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1272; GCN-HSA-NEXT: s_lshr_b32 s0, s2, 16 1273; GCN-HSA-NEXT: s_and_b32 s1, s3, 0xffff 1274; GCN-HSA-NEXT: s_and_b32 s2, s2, 0xffff 1275; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1276; GCN-HSA-NEXT: v_mov_b32_e32 v1, s0 1277; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 1278; GCN-HSA-NEXT: flat_store_dwordx3 v[3:4], v[0:2] 1279; GCN-HSA-NEXT: s_endpgm 1280; 1281; GCN-NOHSA-VI-LABEL: constant_zextload_v3i16_to_v3i32: 1282; GCN-NOHSA-VI: ; %bb.0: ; %entry 1283; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1284; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1285; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1286; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s0 1287; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s1 1288; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1289; GCN-NOHSA-VI-NEXT: s_and_b32 s0, s3, 0xffff 1290; GCN-NOHSA-VI-NEXT: s_lshr_b32 s1, s2, 16 1291; GCN-NOHSA-VI-NEXT: s_and_b32 s2, s2, 0xffff 1292; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 1293; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 1294; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s0 1295; GCN-NOHSA-VI-NEXT: flat_store_dwordx3 v[3:4], v[0:2] 1296; GCN-NOHSA-VI-NEXT: s_endpgm 1297; 1298; EG-LABEL: constant_zextload_v3i16_to_v3i32: 1299; EG: ; %bb.0: ; %entry 1300; EG-NEXT: ALU 4, @12, KC0[CB0:0-32], KC1[] 1301; EG-NEXT: TEX 2 @6 1302; EG-NEXT: ALU 2, @17, KC0[], KC1[] 1303; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T4.X, 0 1304; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XY, T0.X, 1 1305; EG-NEXT: CF_END 1306; EG-NEXT: Fetch clause starting at 6: 1307; EG-NEXT: VTX_READ_16 T2.X, T1.X, 4, #1 1308; EG-NEXT: VTX_READ_16 T3.X, T1.X, 0, #1 1309; EG-NEXT: VTX_READ_16 T1.X, T1.X, 2, #1 1310; EG-NEXT: ALU clause starting at 12: 1311; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, 1312; EG-NEXT: MOV * T1.X, KC0[2].Z, 1313; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1314; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 1315; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 1316; EG-NEXT: ALU clause starting at 17: 1317; EG-NEXT: LSHR T4.X, T0.W, literal.x, 1318; EG-NEXT: MOV * T3.Y, T1.X, 1319; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1320; 1321; GFX12-LABEL: constant_zextload_v3i16_to_v3i32: 1322; GFX12: ; %bb.0: ; %entry 1323; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1324; GFX12-NEXT: s_wait_kmcnt 0x0 1325; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 1326; GFX12-NEXT: s_wait_kmcnt 0x0 1327; GFX12-NEXT: s_and_b32 s3, s3, 0xffff 1328; GFX12-NEXT: s_and_b32 s4, s2, 0xffff 1329; GFX12-NEXT: s_lshr_b32 s2, s2, 16 1330; GFX12-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s4 1331; GFX12-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s3 1332; GFX12-NEXT: global_store_b96 v3, v[0:2], s[0:1] 1333; GFX12-NEXT: s_endpgm 1334entry: 1335 %ld = load <3 x i16>, ptr addrspace(4) %in 1336 %ext = zext <3 x i16> %ld to <3 x i32> 1337 store <3 x i32> %ext, ptr addrspace(1) %out 1338 ret void 1339} 1340 1341define amdgpu_kernel void @constant_sextload_v3i16_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) %in) { 1342; GCN-NOHSA-SI-LABEL: constant_sextload_v3i16_to_v3i32: 1343; GCN-NOHSA-SI: ; %bb.0: ; %entry 1344; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1345; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1346; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1347; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1348; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1349; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1350; GCN-NOHSA-SI-NEXT: s_ashr_i32 s6, s4, 16 1351; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 1352; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 1353; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 1354; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 1355; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1356; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1357; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s6 1358; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1359; GCN-NOHSA-SI-NEXT: s_endpgm 1360; 1361; GCN-HSA-LABEL: constant_sextload_v3i16_to_v3i32: 1362; GCN-HSA: ; %bb.0: ; %entry 1363; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1364; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1365; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1366; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 1367; GCN-HSA-NEXT: v_mov_b32_e32 v4, s1 1368; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1369; GCN-HSA-NEXT: s_ashr_i32 s0, s2, 16 1370; GCN-HSA-NEXT: s_sext_i32_i16 s1, s3 1371; GCN-HSA-NEXT: s_sext_i32_i16 s2, s2 1372; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1373; GCN-HSA-NEXT: v_mov_b32_e32 v1, s0 1374; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 1375; GCN-HSA-NEXT: flat_store_dwordx3 v[3:4], v[0:2] 1376; GCN-HSA-NEXT: s_endpgm 1377; 1378; GCN-NOHSA-VI-LABEL: constant_sextload_v3i16_to_v3i32: 1379; GCN-NOHSA-VI: ; %bb.0: ; %entry 1380; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1381; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1382; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1383; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s0 1384; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s1 1385; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1386; GCN-NOHSA-VI-NEXT: s_ashr_i32 s0, s2, 16 1387; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s1, s3 1388; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s2, s2 1389; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 1390; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s0 1391; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s1 1392; GCN-NOHSA-VI-NEXT: flat_store_dwordx3 v[3:4], v[0:2] 1393; GCN-NOHSA-VI-NEXT: s_endpgm 1394; 1395; EG-LABEL: constant_sextload_v3i16_to_v3i32: 1396; EG: ; %bb.0: ; %entry 1397; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 1398; EG-NEXT: TEX 2 @6 1399; EG-NEXT: ALU 9, @13, KC0[CB0:0-32], KC1[] 1400; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0 1401; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1402; EG-NEXT: CF_END 1403; EG-NEXT: Fetch clause starting at 6: 1404; EG-NEXT: VTX_READ_16 T1.X, T0.X, 2, #1 1405; EG-NEXT: VTX_READ_16 T2.X, T0.X, 4, #1 1406; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 1407; EG-NEXT: ALU clause starting at 12: 1408; EG-NEXT: MOV * T0.X, KC0[2].Z, 1409; EG-NEXT: ALU clause starting at 13: 1410; EG-NEXT: BFE_INT * T0.Y, T1.X, 0.0, literal.x, 1411; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1412; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 1413; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 1414; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 1415; EG-NEXT: BFE_INT T2.X, T2.X, 0.0, literal.x, 1416; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1417; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) 1418; EG-NEXT: LSHR * T3.X, PV.W, literal.x, 1419; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1420; 1421; GFX12-LABEL: constant_sextload_v3i16_to_v3i32: 1422; GFX12: ; %bb.0: ; %entry 1423; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1424; GFX12-NEXT: s_wait_kmcnt 0x0 1425; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 1426; GFX12-NEXT: s_wait_kmcnt 0x0 1427; GFX12-NEXT: s_ashr_i32 s4, s2, 16 1428; GFX12-NEXT: s_sext_i32_i16 s2, s2 1429; GFX12-NEXT: s_sext_i32_i16 s3, s3 1430; GFX12-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s2 1431; GFX12-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s3 1432; GFX12-NEXT: global_store_b96 v3, v[0:2], s[0:1] 1433; GFX12-NEXT: s_endpgm 1434entry: 1435 %ld = load <3 x i16>, ptr addrspace(4) %in 1436 %ext = sext <3 x i16> %ld to <3 x i32> 1437 store <3 x i32> %ext, ptr addrspace(1) %out 1438 ret void 1439} 1440 1441; v4i16 is naturally 8 byte aligned 1442; TODO: This should use LD, but for some there are redundant MOVs 1443define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 1444; GCN-NOHSA-SI-LABEL: constant_zextload_v4i16_to_v4i32: 1445; GCN-NOHSA-SI: ; %bb.0: 1446; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1447; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1448; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1449; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1450; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1451; GCN-NOHSA-SI-NEXT: s_lshr_b32 s6, s5, 16 1452; GCN-NOHSA-SI-NEXT: s_lshr_b32 s7, s4, 16 1453; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, 0xffff 1454; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, 0xffff 1455; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1456; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1457; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s7 1458; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1459; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s6 1460; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1461; GCN-NOHSA-SI-NEXT: s_endpgm 1462; 1463; GCN-HSA-LABEL: constant_zextload_v4i16_to_v4i32: 1464; GCN-HSA: ; %bb.0: 1465; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1466; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1467; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1468; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1469; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1470; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1471; GCN-HSA-NEXT: s_lshr_b32 s0, s3, 16 1472; GCN-HSA-NEXT: s_lshr_b32 s1, s2, 16 1473; GCN-HSA-NEXT: s_and_b32 s3, s3, 0xffff 1474; GCN-HSA-NEXT: s_and_b32 s2, s2, 0xffff 1475; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1476; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 1477; GCN-HSA-NEXT: v_mov_b32_e32 v2, s3 1478; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 1479; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1480; GCN-HSA-NEXT: s_endpgm 1481; 1482; GCN-NOHSA-VI-LABEL: constant_zextload_v4i16_to_v4i32: 1483; GCN-NOHSA-VI: ; %bb.0: 1484; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1485; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1486; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1487; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 1488; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 1489; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1490; GCN-NOHSA-VI-NEXT: s_lshr_b32 s0, s3, 16 1491; GCN-NOHSA-VI-NEXT: s_and_b32 s1, s3, 0xffff 1492; GCN-NOHSA-VI-NEXT: s_lshr_b32 s3, s2, 16 1493; GCN-NOHSA-VI-NEXT: s_and_b32 s2, s2, 0xffff 1494; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 1495; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3 1496; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s1 1497; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s0 1498; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1499; GCN-NOHSA-VI-NEXT: s_endpgm 1500; 1501; EG-LABEL: constant_zextload_v4i16_to_v4i32: 1502; EG: ; %bb.0: 1503; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1504; EG-NEXT: TEX 0 @6 1505; EG-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[] 1506; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1 1507; EG-NEXT: CF_END 1508; EG-NEXT: PAD 1509; EG-NEXT: Fetch clause starting at 6: 1510; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1511; EG-NEXT: ALU clause starting at 8: 1512; EG-NEXT: MOV * T5.X, KC0[2].Z, 1513; EG-NEXT: ALU clause starting at 9: 1514; EG-NEXT: LSHR * T5.W, T5.Y, literal.x, 1515; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1516; EG-NEXT: AND_INT * T5.Z, T5.Y, literal.x, 1517; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1518; EG-NEXT: LSHR * T5.Y, T5.X, literal.x, 1519; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1520; EG-NEXT: AND_INT T5.X, T5.X, literal.x, 1521; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.y, 1522; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 1523; 1524; GFX12-LABEL: constant_zextload_v4i16_to_v4i32: 1525; GFX12: ; %bb.0: 1526; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1527; GFX12-NEXT: s_wait_kmcnt 0x0 1528; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 1529; GFX12-NEXT: s_wait_kmcnt 0x0 1530; GFX12-NEXT: s_lshr_b32 s4, s3, 16 1531; GFX12-NEXT: s_and_b32 s3, s3, 0xffff 1532; GFX12-NEXT: s_and_b32 s5, s2, 0xffff 1533; GFX12-NEXT: s_lshr_b32 s2, s2, 16 1534; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1535; GFX12-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s2 1536; GFX12-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v3, s4 1537; GFX12-NEXT: v_mov_b32_e32 v2, s3 1538; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1] 1539; GFX12-NEXT: s_endpgm 1540 %load = load <4 x i16>, ptr addrspace(4) %in 1541 %ext = zext <4 x i16> %load to <4 x i32> 1542 store <4 x i32> %ext, ptr addrspace(1) %out 1543 ret void 1544} 1545 1546; v4i16 is naturally 8 byte aligned 1547; TODO: This should use LD, but for some there are redundant MOVs 1548; TODO: We should use ASHR instead of LSHR + BFE 1549define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 1550; GCN-NOHSA-SI-LABEL: constant_sextload_v4i16_to_v4i32: 1551; GCN-NOHSA-SI: ; %bb.0: 1552; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1553; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1554; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1555; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1556; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1557; GCN-NOHSA-SI-NEXT: s_ashr_i32 s8, s4, 16 1558; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[6:7], s[4:5], 48 1559; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 1560; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 1561; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1562; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1563; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s8 1564; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1565; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s6 1566; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1567; GCN-NOHSA-SI-NEXT: s_endpgm 1568; 1569; GCN-HSA-LABEL: constant_sextload_v4i16_to_v4i32: 1570; GCN-HSA: ; %bb.0: 1571; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1572; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1573; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1574; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1575; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1576; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1577; GCN-HSA-NEXT: s_ashr_i64 s[0:1], s[2:3], 48 1578; GCN-HSA-NEXT: s_ashr_i32 s4, s2, 16 1579; GCN-HSA-NEXT: s_sext_i32_i16 s1, s3 1580; GCN-HSA-NEXT: s_sext_i32_i16 s2, s2 1581; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1582; GCN-HSA-NEXT: v_mov_b32_e32 v1, s4 1583; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 1584; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 1585; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1586; GCN-HSA-NEXT: s_endpgm 1587; 1588; GCN-NOHSA-VI-LABEL: constant_sextload_v4i16_to_v4i32: 1589; GCN-NOHSA-VI: ; %bb.0: 1590; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1591; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1592; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1593; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 1594; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 1595; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1596; GCN-NOHSA-VI-NEXT: s_ashr_i32 s0, s3, 16 1597; GCN-NOHSA-VI-NEXT: s_ashr_i32 s1, s2, 16 1598; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s3, s3 1599; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s2, s2 1600; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 1601; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 1602; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s3 1603; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s0 1604; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1605; GCN-NOHSA-VI-NEXT: s_endpgm 1606; 1607; EG-LABEL: constant_sextload_v4i16_to_v4i32: 1608; EG: ; %bb.0: 1609; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1610; EG-NEXT: TEX 0 @6 1611; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[] 1612; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1 1613; EG-NEXT: CF_END 1614; EG-NEXT: PAD 1615; EG-NEXT: Fetch clause starting at 6: 1616; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1617; EG-NEXT: ALU clause starting at 8: 1618; EG-NEXT: MOV * T5.X, KC0[2].Z, 1619; EG-NEXT: ALU clause starting at 9: 1620; EG-NEXT: BFE_INT * T6.Z, T5.Y, 0.0, literal.x, 1621; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1622; EG-NEXT: BFE_INT T6.X, T5.X, 0.0, literal.x, 1623; EG-NEXT: LSHR * T0.W, T5.Y, literal.x, 1624; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1625; EG-NEXT: BFE_INT T6.W, PV.W, 0.0, literal.x, 1626; EG-NEXT: LSHR * T0.W, T5.X, literal.x, 1627; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1628; EG-NEXT: LSHR T5.X, KC0[2].Y, literal.x, 1629; EG-NEXT: BFE_INT * T6.Y, PS, 0.0, literal.y, 1630; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1631; 1632; GFX12-LABEL: constant_sextload_v4i16_to_v4i32: 1633; GFX12: ; %bb.0: 1634; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1635; GFX12-NEXT: s_wait_kmcnt 0x0 1636; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 1637; GFX12-NEXT: s_wait_kmcnt 0x0 1638; GFX12-NEXT: s_ashr_i32 s4, s3, 16 1639; GFX12-NEXT: s_ashr_i32 s5, s2, 16 1640; GFX12-NEXT: s_sext_i32_i16 s2, s2 1641; GFX12-NEXT: s_sext_i32_i16 s3, s3 1642; GFX12-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s5 1643; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, s4 1644; GFX12-NEXT: v_mov_b32_e32 v2, s3 1645; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1] 1646; GFX12-NEXT: s_endpgm 1647 %load = load <4 x i16>, ptr addrspace(4) %in 1648 %ext = sext <4 x i16> %load to <4 x i32> 1649 store <4 x i32> %ext, ptr addrspace(1) %out 1650 ret void 1651} 1652 1653; v8i16 is naturally 16 byte aligned 1654; TODO: These should use LSHR instead of BFE_UINT 1655; TODO: This should use DST, but for some there are redundant MOVs 1656define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 1657; GCN-NOHSA-SI-LABEL: constant_zextload_v8i16_to_v8i32: 1658; GCN-NOHSA-SI: ; %bb.0: 1659; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1660; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1661; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1662; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1663; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1664; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1665; GCN-NOHSA-SI-NEXT: s_lshr_b32 s8, s5, 16 1666; GCN-NOHSA-SI-NEXT: s_lshr_b32 s9, s4, 16 1667; GCN-NOHSA-SI-NEXT: s_lshr_b32 s10, s7, 16 1668; GCN-NOHSA-SI-NEXT: s_lshr_b32 s11, s6, 16 1669; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, 0xffff 1670; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, 0xffff 1671; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, 0xffff 1672; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, 0xffff 1673; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 1674; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s11 1675; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 1676; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s10 1677; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1678; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1679; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1680; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s9 1681; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1682; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s8 1683; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1684; GCN-NOHSA-SI-NEXT: s_endpgm 1685; 1686; GCN-HSA-LABEL: constant_zextload_v8i16_to_v8i32: 1687; GCN-HSA: ; %bb.0: 1688; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1689; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1690; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1691; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1692; GCN-HSA-NEXT: s_lshr_b32 s8, s5, 16 1693; GCN-HSA-NEXT: s_lshr_b32 s9, s4, 16 1694; GCN-HSA-NEXT: s_lshr_b32 s2, s7, 16 1695; GCN-HSA-NEXT: s_lshr_b32 s3, s6, 16 1696; GCN-HSA-NEXT: s_and_b32 s5, s5, 0xffff 1697; GCN-HSA-NEXT: s_and_b32 s4, s4, 0xffff 1698; GCN-HSA-NEXT: s_and_b32 s7, s7, 0xffff 1699; GCN-HSA-NEXT: s_and_b32 s6, s6, 0xffff 1700; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2 1701; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 1702; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1703; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1704; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1705; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 1706; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 1707; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1708; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1709; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1710; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 1711; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 1712; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 1713; GCN-HSA-NEXT: v_mov_b32_e32 v3, s8 1714; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1715; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1716; GCN-HSA-NEXT: s_endpgm 1717; 1718; GCN-NOHSA-VI-LABEL: constant_zextload_v8i16_to_v8i32: 1719; GCN-NOHSA-VI: ; %bb.0: 1720; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1721; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1722; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1723; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1724; GCN-NOHSA-VI-NEXT: s_lshr_b32 s8, s5, 16 1725; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, 0xffff 1726; GCN-NOHSA-VI-NEXT: s_lshr_b32 s9, s4, 16 1727; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, 0xffff 1728; GCN-NOHSA-VI-NEXT: s_lshr_b32 s2, s7, 16 1729; GCN-NOHSA-VI-NEXT: s_and_b32 s3, s7, 0xffff 1730; GCN-NOHSA-VI-NEXT: s_lshr_b32 s7, s6, 16 1731; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, 0xffff 1732; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s2 1733; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 16 1734; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s3 1735; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 1736; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 1737; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 1738; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s7 1739; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 1740; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1741; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 1742; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 1743; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9 1744; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 1745; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s8 1746; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 1747; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1748; GCN-NOHSA-VI-NEXT: s_endpgm 1749; 1750; EG-LABEL: constant_zextload_v8i16_to_v8i32: 1751; EG: ; %bb.0: 1752; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1753; EG-NEXT: TEX 0 @6 1754; EG-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[] 1755; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0 1756; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1 1757; EG-NEXT: CF_END 1758; EG-NEXT: Fetch clause starting at 6: 1759; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 1760; EG-NEXT: ALU clause starting at 8: 1761; EG-NEXT: MOV * T7.X, KC0[2].Z, 1762; EG-NEXT: ALU clause starting at 9: 1763; EG-NEXT: LSHR * T8.W, T7.Y, literal.x, 1764; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1765; EG-NEXT: AND_INT * T8.Z, T7.Y, literal.x, 1766; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1767; EG-NEXT: LSHR T8.Y, T7.X, literal.x, 1768; EG-NEXT: LSHR * T9.W, T7.W, literal.x, 1769; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1770; EG-NEXT: AND_INT T8.X, T7.X, literal.x, 1771; EG-NEXT: AND_INT T9.Z, T7.W, literal.x, 1772; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.y, 1773; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 1774; EG-NEXT: LSHR * T9.Y, T7.Z, literal.x, 1775; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1776; EG-NEXT: AND_INT T9.X, T7.Z, literal.x, 1777; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1778; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 1779; EG-NEXT: LSHR * T10.X, PV.W, literal.x, 1780; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1781; 1782; GFX12-LABEL: constant_zextload_v8i16_to_v8i32: 1783; GFX12: ; %bb.0: 1784; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1785; GFX12-NEXT: s_wait_kmcnt 0x0 1786; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 1787; GFX12-NEXT: s_wait_kmcnt 0x0 1788; GFX12-NEXT: s_lshr_b32 s8, s7, 16 1789; GFX12-NEXT: s_and_b32 s7, s7, 0xffff 1790; GFX12-NEXT: s_and_b32 s9, s6, 0xffff 1791; GFX12-NEXT: s_lshr_b32 s6, s6, 16 1792; GFX12-NEXT: s_lshr_b32 s2, s5, 16 1793; GFX12-NEXT: s_and_b32 s3, s5, 0xffff 1794; GFX12-NEXT: s_lshr_b32 s5, s4, 16 1795; GFX12-NEXT: s_and_b32 s4, s4, 0xffff 1796; GFX12-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s6 1797; GFX12-NEXT: v_dual_mov_b32 v0, s9 :: v_dual_mov_b32 v3, s8 1798; GFX12-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v5, s5 1799; GFX12-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v7, s2 1800; GFX12-NEXT: v_mov_b32_e32 v6, s3 1801; GFX12-NEXT: s_clause 0x1 1802; GFX12-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:16 1803; GFX12-NEXT: global_store_b128 v8, v[4:7], s[0:1] 1804; GFX12-NEXT: s_endpgm 1805 %load = load <8 x i16>, ptr addrspace(4) %in 1806 %ext = zext <8 x i16> %load to <8 x i32> 1807 store <8 x i32> %ext, ptr addrspace(1) %out 1808 ret void 1809} 1810 1811; v8i16 is naturally 16 byte aligned 1812; TODO: 4 of these should use ASHR instead of LSHR + BFE_INT 1813; TODO: This should use DST, but for some there are redundant MOVs 1814define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 1815; GCN-NOHSA-SI-LABEL: constant_sextload_v8i16_to_v8i32: 1816; GCN-NOHSA-SI: ; %bb.0: 1817; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1818; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1819; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1820; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1821; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1822; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1823; GCN-NOHSA-SI-NEXT: s_ashr_i32 s8, s5, 16 1824; GCN-NOHSA-SI-NEXT: s_ashr_i32 s9, s4, 16 1825; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 1826; GCN-NOHSA-SI-NEXT: s_ashr_i32 s10, s7, 16 1827; GCN-NOHSA-SI-NEXT: s_ashr_i32 s11, s6, 16 1828; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s7, s7 1829; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s6, s6 1830; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 1831; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 1832; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s11 1833; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 1834; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s10 1835; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1836; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1837; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1838; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s9 1839; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1840; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s8 1841; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1842; GCN-NOHSA-SI-NEXT: s_endpgm 1843; 1844; GCN-HSA-LABEL: constant_sextload_v8i16_to_v8i32: 1845; GCN-HSA: ; %bb.0: 1846; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1847; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1848; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1849; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1850; GCN-HSA-NEXT: s_ashr_i32 s8, s5, 16 1851; GCN-HSA-NEXT: s_ashr_i32 s9, s4, 16 1852; GCN-HSA-NEXT: s_ashr_i32 s2, s7, 16 1853; GCN-HSA-NEXT: s_ashr_i32 s3, s6, 16 1854; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2 1855; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 1856; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1857; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1858; GCN-HSA-NEXT: s_sext_i32_i16 s7, s7 1859; GCN-HSA-NEXT: s_sext_i32_i16 s6, s6 1860; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1861; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 1862; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 1863; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1864; GCN-HSA-NEXT: s_sext_i32_i16 s5, s5 1865; GCN-HSA-NEXT: s_sext_i32_i16 s4, s4 1866; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1867; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1868; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 1869; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 1870; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 1871; GCN-HSA-NEXT: v_mov_b32_e32 v3, s8 1872; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1873; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1874; GCN-HSA-NEXT: s_endpgm 1875; 1876; GCN-NOHSA-VI-LABEL: constant_sextload_v8i16_to_v8i32: 1877; GCN-NOHSA-VI: ; %bb.0: 1878; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1879; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1880; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1881; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1882; GCN-NOHSA-VI-NEXT: s_ashr_i32 s8, s5, 16 1883; GCN-NOHSA-VI-NEXT: s_ashr_i32 s9, s4, 16 1884; GCN-NOHSA-VI-NEXT: s_ashr_i32 s2, s7, 16 1885; GCN-NOHSA-VI-NEXT: s_ashr_i32 s3, s6, 16 1886; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s2 1887; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 16 1888; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3 1889; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 1890; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s7, s7 1891; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s6, s6 1892; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 1893; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 1894; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 1895; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 1896; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 1897; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 1898; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1899; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 1900; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 1901; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9 1902; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 1903; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s8 1904; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 1905; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1906; GCN-NOHSA-VI-NEXT: s_endpgm 1907; 1908; EG-LABEL: constant_sextload_v8i16_to_v8i32: 1909; EG: ; %bb.0: 1910; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1911; EG-NEXT: TEX 0 @6 1912; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[] 1913; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0 1914; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1 1915; EG-NEXT: CF_END 1916; EG-NEXT: Fetch clause starting at 6: 1917; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 1918; EG-NEXT: ALU clause starting at 8: 1919; EG-NEXT: MOV * T7.X, KC0[2].Z, 1920; EG-NEXT: ALU clause starting at 9: 1921; EG-NEXT: BFE_INT * T8.Z, T7.Y, 0.0, literal.x, 1922; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1923; EG-NEXT: BFE_INT T8.X, T7.X, 0.0, literal.x, 1924; EG-NEXT: BFE_INT T9.Z, T7.W, 0.0, literal.x, 1925; EG-NEXT: LSHR * T0.W, T7.Y, literal.x, 1926; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1927; EG-NEXT: BFE_INT T9.X, T7.Z, 0.0, literal.x, 1928; EG-NEXT: LSHR T0.Z, T7.W, literal.x, 1929; EG-NEXT: BFE_INT T8.W, PV.W, 0.0, literal.x, 1930; EG-NEXT: LSHR * T0.W, T7.X, literal.x, 1931; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1932; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x, 1933; EG-NEXT: BFE_INT T8.Y, PS, 0.0, literal.y, 1934; EG-NEXT: LSHR T1.Z, T7.Z, literal.y, 1935; EG-NEXT: BFE_INT T9.W, PV.Z, 0.0, literal.y, 1936; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1937; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1938; EG-NEXT: LSHR T10.X, PS, literal.x, 1939; EG-NEXT: BFE_INT * T9.Y, PV.Z, 0.0, literal.y, 1940; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1941; 1942; GFX12-LABEL: constant_sextload_v8i16_to_v8i32: 1943; GFX12: ; %bb.0: 1944; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1945; GFX12-NEXT: s_wait_kmcnt 0x0 1946; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 1947; GFX12-NEXT: s_wait_kmcnt 0x0 1948; GFX12-NEXT: s_ashr_i32 s8, s7, 16 1949; GFX12-NEXT: s_ashr_i32 s9, s6, 16 1950; GFX12-NEXT: s_sext_i32_i16 s6, s6 1951; GFX12-NEXT: s_sext_i32_i16 s7, s7 1952; GFX12-NEXT: s_ashr_i32 s2, s5, 16 1953; GFX12-NEXT: s_ashr_i32 s3, s4, 16 1954; GFX12-NEXT: s_sext_i32_i16 s5, s5 1955; GFX12-NEXT: s_sext_i32_i16 s4, s4 1956; GFX12-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s9 1957; GFX12-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v3, s8 1958; GFX12-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v5, s3 1959; GFX12-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v7, s2 1960; GFX12-NEXT: v_mov_b32_e32 v6, s5 1961; GFX12-NEXT: s_clause 0x1 1962; GFX12-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:16 1963; GFX12-NEXT: global_store_b128 v8, v[4:7], s[0:1] 1964; GFX12-NEXT: s_endpgm 1965 %load = load <8 x i16>, ptr addrspace(4) %in 1966 %ext = sext <8 x i16> %load to <8 x i32> 1967 store <8 x i32> %ext, ptr addrspace(1) %out 1968 ret void 1969} 1970 1971define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 1972; GCN-NOHSA-SI-LABEL: constant_zextload_v16i16_to_v16i32: 1973; GCN-NOHSA-SI: ; %bb.0: 1974; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1975; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1976; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 1977; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1978; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1979; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1980; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s5, 16 1981; GCN-NOHSA-SI-NEXT: s_lshr_b32 s13, s4, 16 1982; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s7, 16 1983; GCN-NOHSA-SI-NEXT: s_lshr_b32 s15, s6, 16 1984; GCN-NOHSA-SI-NEXT: s_lshr_b32 s16, s9, 16 1985; GCN-NOHSA-SI-NEXT: s_lshr_b32 s17, s8, 16 1986; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s11, 16 1987; GCN-NOHSA-SI-NEXT: s_lshr_b32 s19, s10, 16 1988; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, 0xffff 1989; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, 0xffff 1990; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, 0xffff 1991; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, 0xffff 1992; GCN-NOHSA-SI-NEXT: s_and_b32 s9, s9, 0xffff 1993; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, 0xffff 1994; GCN-NOHSA-SI-NEXT: s_and_b32 s10, s10, 0xffff 1995; GCN-NOHSA-SI-NEXT: s_and_b32 s8, s8, 0xffff 1996; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 1997; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 1998; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11 1999; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s18 2000; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 2001; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2002; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 2003; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s17 2004; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9 2005; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s16 2006; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2007; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2008; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 2009; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s15 2010; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 2011; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s14 2012; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 2013; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2014; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 2015; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s13 2016; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 2017; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s12 2018; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2019; GCN-NOHSA-SI-NEXT: s_endpgm 2020; 2021; GCN-HSA-LABEL: constant_zextload_v16i16_to_v16i32: 2022; GCN-HSA: ; %bb.0: 2023; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 2024; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2025; GCN-HSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 2026; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2027; GCN-HSA-NEXT: s_lshr_b32 s12, s5, 16 2028; GCN-HSA-NEXT: s_lshr_b32 s13, s4, 16 2029; GCN-HSA-NEXT: s_lshr_b32 s14, s7, 16 2030; GCN-HSA-NEXT: s_lshr_b32 s15, s6, 16 2031; GCN-HSA-NEXT: s_lshr_b32 s16, s9, 16 2032; GCN-HSA-NEXT: s_lshr_b32 s17, s8, 16 2033; GCN-HSA-NEXT: s_lshr_b32 s2, s11, 16 2034; GCN-HSA-NEXT: s_lshr_b32 s3, s10, 16 2035; GCN-HSA-NEXT: s_and_b32 s5, s5, 0xffff 2036; GCN-HSA-NEXT: s_and_b32 s4, s4, 0xffff 2037; GCN-HSA-NEXT: s_and_b32 s7, s7, 0xffff 2038; GCN-HSA-NEXT: s_and_b32 s6, s6, 0xffff 2039; GCN-HSA-NEXT: s_and_b32 s9, s9, 0xffff 2040; GCN-HSA-NEXT: s_and_b32 s8, s8, 0xffff 2041; GCN-HSA-NEXT: s_and_b32 s11, s11, 0xffff 2042; GCN-HSA-NEXT: s_and_b32 s10, s10, 0xffff 2043; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2 2044; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 2045; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 2046; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2047; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2048; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2049; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 2050; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 2051; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11 2052; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2053; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2054; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2055; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2056; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 2057; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 2058; GCN-HSA-NEXT: v_mov_b32_e32 v1, s17 2059; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 2060; GCN-HSA-NEXT: v_mov_b32_e32 v3, s16 2061; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2062; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2063; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2064; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 2065; GCN-HSA-NEXT: v_mov_b32_e32 v1, s15 2066; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 2067; GCN-HSA-NEXT: v_mov_b32_e32 v3, s14 2068; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2069; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2070; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 2071; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 2072; GCN-HSA-NEXT: v_mov_b32_e32 v1, s13 2073; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 2074; GCN-HSA-NEXT: v_mov_b32_e32 v3, s12 2075; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 2076; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2077; GCN-HSA-NEXT: s_endpgm 2078; 2079; GCN-NOHSA-VI-LABEL: constant_zextload_v16i16_to_v16i32: 2080; GCN-NOHSA-VI: ; %bb.0: 2081; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2082; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2083; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 2084; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2085; GCN-NOHSA-VI-NEXT: s_lshr_b32 s12, s5, 16 2086; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, 0xffff 2087; GCN-NOHSA-VI-NEXT: s_lshr_b32 s13, s4, 16 2088; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, 0xffff 2089; GCN-NOHSA-VI-NEXT: s_lshr_b32 s14, s7, 16 2090; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s7, 0xffff 2091; GCN-NOHSA-VI-NEXT: s_lshr_b32 s15, s6, 16 2092; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, 0xffff 2093; GCN-NOHSA-VI-NEXT: s_lshr_b32 s16, s9, 16 2094; GCN-NOHSA-VI-NEXT: s_and_b32 s9, s9, 0xffff 2095; GCN-NOHSA-VI-NEXT: s_lshr_b32 s17, s8, 16 2096; GCN-NOHSA-VI-NEXT: s_and_b32 s8, s8, 0xffff 2097; GCN-NOHSA-VI-NEXT: s_lshr_b32 s2, s11, 16 2098; GCN-NOHSA-VI-NEXT: s_and_b32 s3, s11, 0xffff 2099; GCN-NOHSA-VI-NEXT: s_lshr_b32 s11, s10, 16 2100; GCN-NOHSA-VI-NEXT: s_and_b32 s10, s10, 0xffff 2101; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s2 2102; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 48 2103; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s3 2104; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 2105; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 2106; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 2107; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 32 2108; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 2109; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s11 2110; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 2111; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2112; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 2113; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 2114; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 16 2115; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 2116; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s17 2117; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 2118; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s16 2119; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 2120; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2121; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 2122; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 2123; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s15 2124; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 2125; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s14 2126; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 2127; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2128; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 2129; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 2130; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s13 2131; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 2132; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s12 2133; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 2134; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2135; GCN-NOHSA-VI-NEXT: s_endpgm 2136; 2137; EG-LABEL: constant_zextload_v16i16_to_v16i32: 2138; EG: ; %bb.0: 2139; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 2140; EG-NEXT: TEX 1 @8 2141; EG-NEXT: ALU 35, @13, KC0[CB0:0-32], KC1[] 2142; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0 2143; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T11.X, 0 2144; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0 2145; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T12.X, 1 2146; EG-NEXT: CF_END 2147; EG-NEXT: Fetch clause starting at 8: 2148; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 0, #1 2149; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 16, #1 2150; EG-NEXT: ALU clause starting at 12: 2151; EG-NEXT: MOV * T11.X, KC0[2].Z, 2152; EG-NEXT: ALU clause starting at 13: 2153; EG-NEXT: LSHR * T13.W, T12.Y, literal.x, 2154; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2155; EG-NEXT: AND_INT * T13.Z, T12.Y, literal.x, 2156; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2157; EG-NEXT: LSHR T13.Y, T12.X, literal.x, 2158; EG-NEXT: LSHR * T14.W, T12.W, literal.x, 2159; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2160; EG-NEXT: AND_INT T13.X, T12.X, literal.x, 2161; EG-NEXT: AND_INT T14.Z, T12.W, literal.x, 2162; EG-NEXT: LSHR * T12.X, KC0[2].Y, literal.y, 2163; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 2164; EG-NEXT: LSHR T14.Y, T12.Z, literal.x, 2165; EG-NEXT: LSHR * T15.W, T11.Y, literal.x, 2166; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2167; EG-NEXT: AND_INT T14.X, T12.Z, literal.x, 2168; EG-NEXT: AND_INT T15.Z, T11.Y, literal.x, 2169; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2170; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 2171; EG-NEXT: LSHR T16.X, PV.W, literal.x, 2172; EG-NEXT: LSHR T15.Y, T11.X, literal.y, 2173; EG-NEXT: LSHR T17.W, T11.W, literal.y, 2174; EG-NEXT: AND_INT * T15.X, T11.X, literal.z, 2175; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2176; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2177; EG-NEXT: AND_INT T17.Z, T11.W, literal.x, 2178; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2179; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 2180; EG-NEXT: LSHR T11.X, PV.W, literal.x, 2181; EG-NEXT: LSHR T17.Y, T11.Z, literal.y, 2182; EG-NEXT: AND_INT * T17.X, T11.Z, literal.z, 2183; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2184; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2185; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 2186; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 2187; EG-NEXT: LSHR * T18.X, PV.W, literal.x, 2188; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2189; 2190; GFX12-LABEL: constant_zextload_v16i16_to_v16i32: 2191; GFX12: ; %bb.0: 2192; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 2193; GFX12-NEXT: s_wait_kmcnt 0x0 2194; GFX12-NEXT: s_load_b256 s[4:11], s[2:3], 0x0 2195; GFX12-NEXT: s_wait_kmcnt 0x0 2196; GFX12-NEXT: s_lshr_b32 s16, s11, 16 2197; GFX12-NEXT: s_and_b32 s11, s11, 0xffff 2198; GFX12-NEXT: s_and_b32 s17, s10, 0xffff 2199; GFX12-NEXT: s_lshr_b32 s10, s10, 16 2200; GFX12-NEXT: s_lshr_b32 s14, s9, 16 2201; GFX12-NEXT: s_and_b32 s9, s9, 0xffff 2202; GFX12-NEXT: s_lshr_b32 s15, s8, 16 2203; GFX12-NEXT: s_and_b32 s8, s8, 0xffff 2204; GFX12-NEXT: v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v1, s10 2205; GFX12-NEXT: s_lshr_b32 s12, s7, 16 2206; GFX12-NEXT: s_and_b32 s7, s7, 0xffff 2207; GFX12-NEXT: s_lshr_b32 s13, s6, 16 2208; GFX12-NEXT: s_and_b32 s6, s6, 0xffff 2209; GFX12-NEXT: v_dual_mov_b32 v0, s17 :: v_dual_mov_b32 v3, s16 2210; GFX12-NEXT: v_dual_mov_b32 v2, s11 :: v_dual_mov_b32 v5, s15 2211; GFX12-NEXT: s_lshr_b32 s2, s5, 16 2212; GFX12-NEXT: s_and_b32 s3, s5, 0xffff 2213; GFX12-NEXT: s_lshr_b32 s5, s4, 16 2214; GFX12-NEXT: s_and_b32 s4, s4, 0xffff 2215; GFX12-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v7, s14 2216; GFX12-NEXT: v_dual_mov_b32 v6, s9 :: v_dual_mov_b32 v9, s13 2217; GFX12-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v11, s12 2218; GFX12-NEXT: v_dual_mov_b32 v10, s7 :: v_dual_mov_b32 v13, s5 2219; GFX12-NEXT: v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v15, s2 2220; GFX12-NEXT: v_mov_b32_e32 v14, s3 2221; GFX12-NEXT: s_clause 0x3 2222; GFX12-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:48 2223; GFX12-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:32 2224; GFX12-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:16 2225; GFX12-NEXT: global_store_b128 v16, v[12:15], s[0:1] 2226; GFX12-NEXT: s_endpgm 2227 %load = load <16 x i16>, ptr addrspace(4) %in 2228 %ext = zext <16 x i16> %load to <16 x i32> 2229 store <16 x i32> %ext, ptr addrspace(1) %out 2230 ret void 2231} 2232 2233define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 2234; GCN-NOHSA-SI-LABEL: constant_sextload_v16i16_to_v16i32: 2235; GCN-NOHSA-SI: ; %bb.0: 2236; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2237; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2238; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 2239; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 2240; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 2241; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2242; GCN-NOHSA-SI-NEXT: s_ashr_i32 s12, s5, 16 2243; GCN-NOHSA-SI-NEXT: s_ashr_i32 s13, s4, 16 2244; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 2245; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 2246; GCN-NOHSA-SI-NEXT: s_ashr_i32 s14, s7, 16 2247; GCN-NOHSA-SI-NEXT: s_ashr_i32 s15, s6, 16 2248; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s7, s7 2249; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s6, s6 2250; GCN-NOHSA-SI-NEXT: s_ashr_i32 s16, s9, 16 2251; GCN-NOHSA-SI-NEXT: s_ashr_i32 s17, s8, 16 2252; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s9, s9 2253; GCN-NOHSA-SI-NEXT: s_ashr_i32 s18, s11, 16 2254; GCN-NOHSA-SI-NEXT: s_ashr_i32 s19, s10, 16 2255; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s11, s11 2256; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s10, s10 2257; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s8, s8 2258; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 2259; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 2260; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11 2261; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s18 2262; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 2263; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2264; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 2265; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s17 2266; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9 2267; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s16 2268; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2269; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2270; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 2271; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s15 2272; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 2273; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s14 2274; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 2275; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2276; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 2277; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s13 2278; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 2279; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s12 2280; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2281; GCN-NOHSA-SI-NEXT: s_endpgm 2282; 2283; GCN-HSA-LABEL: constant_sextload_v16i16_to_v16i32: 2284; GCN-HSA: ; %bb.0: 2285; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 2286; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2287; GCN-HSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 2288; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2289; GCN-HSA-NEXT: s_ashr_i32 s12, s5, 16 2290; GCN-HSA-NEXT: s_ashr_i32 s13, s4, 16 2291; GCN-HSA-NEXT: s_ashr_i32 s14, s7, 16 2292; GCN-HSA-NEXT: s_ashr_i32 s15, s6, 16 2293; GCN-HSA-NEXT: s_ashr_i32 s16, s9, 16 2294; GCN-HSA-NEXT: s_ashr_i32 s17, s8, 16 2295; GCN-HSA-NEXT: s_ashr_i32 s2, s11, 16 2296; GCN-HSA-NEXT: s_ashr_i32 s3, s10, 16 2297; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2 2298; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 2299; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 2300; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2301; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2302; GCN-HSA-NEXT: s_sext_i32_i16 s11, s11 2303; GCN-HSA-NEXT: s_sext_i32_i16 s10, s10 2304; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2305; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 2306; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 2307; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11 2308; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2309; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2310; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2311; GCN-HSA-NEXT: s_sext_i32_i16 s9, s9 2312; GCN-HSA-NEXT: s_sext_i32_i16 s8, s8 2313; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2314; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 2315; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 2316; GCN-HSA-NEXT: v_mov_b32_e32 v1, s17 2317; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 2318; GCN-HSA-NEXT: v_mov_b32_e32 v3, s16 2319; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2320; GCN-HSA-NEXT: s_sext_i32_i16 s7, s7 2321; GCN-HSA-NEXT: s_sext_i32_i16 s6, s6 2322; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2323; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2324; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 2325; GCN-HSA-NEXT: v_mov_b32_e32 v1, s15 2326; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 2327; GCN-HSA-NEXT: v_mov_b32_e32 v3, s14 2328; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2329; GCN-HSA-NEXT: s_sext_i32_i16 s5, s5 2330; GCN-HSA-NEXT: s_sext_i32_i16 s4, s4 2331; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2332; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 2333; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 2334; GCN-HSA-NEXT: v_mov_b32_e32 v1, s13 2335; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 2336; GCN-HSA-NEXT: v_mov_b32_e32 v3, s12 2337; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 2338; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2339; GCN-HSA-NEXT: s_endpgm 2340; 2341; GCN-NOHSA-VI-LABEL: constant_sextload_v16i16_to_v16i32: 2342; GCN-NOHSA-VI: ; %bb.0: 2343; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2344; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2345; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 2346; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2347; GCN-NOHSA-VI-NEXT: s_ashr_i32 s12, s5, 16 2348; GCN-NOHSA-VI-NEXT: s_ashr_i32 s13, s4, 16 2349; GCN-NOHSA-VI-NEXT: s_ashr_i32 s14, s7, 16 2350; GCN-NOHSA-VI-NEXT: s_ashr_i32 s15, s6, 16 2351; GCN-NOHSA-VI-NEXT: s_ashr_i32 s16, s9, 16 2352; GCN-NOHSA-VI-NEXT: s_ashr_i32 s17, s8, 16 2353; GCN-NOHSA-VI-NEXT: s_ashr_i32 s2, s11, 16 2354; GCN-NOHSA-VI-NEXT: s_ashr_i32 s3, s10, 16 2355; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s2 2356; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 48 2357; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3 2358; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 2359; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 2360; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s11, s11 2361; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s10, s10 2362; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 2363; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 32 2364; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 2365; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 2366; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 2367; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2368; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 2369; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s9, s9 2370; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s8, s8 2371; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 2372; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 16 2373; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 2374; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s17 2375; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 2376; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s16 2377; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 2378; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s7, s7 2379; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s6, s6 2380; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2381; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 2382; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 2383; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s15 2384; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 2385; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s14 2386; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 2387; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 2388; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 2389; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2390; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 2391; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 2392; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s13 2393; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 2394; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s12 2395; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 2396; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2397; GCN-NOHSA-VI-NEXT: s_endpgm 2398; 2399; EG-LABEL: constant_sextload_v16i16_to_v16i32: 2400; EG: ; %bb.0: 2401; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 2402; EG-NEXT: TEX 1 @8 2403; EG-NEXT: ALU 39, @13, KC0[CB0:0-32], KC1[] 2404; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0 2405; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0 2406; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0 2407; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1 2408; EG-NEXT: CF_END 2409; EG-NEXT: Fetch clause starting at 8: 2410; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 2411; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 2412; EG-NEXT: ALU clause starting at 12: 2413; EG-NEXT: MOV * T11.X, KC0[2].Z, 2414; EG-NEXT: ALU clause starting at 13: 2415; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x, 2416; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2417; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2418; EG-NEXT: LSHR T14.X, PV.W, literal.x, 2419; EG-NEXT: BFE_INT * T15.Z, T11.Y, 0.0, literal.y, 2420; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2421; EG-NEXT: BFE_INT T15.X, T11.X, 0.0, literal.x, 2422; EG-NEXT: LSHR T0.Y, T12.W, literal.x, 2423; EG-NEXT: BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212 2424; EG-NEXT: LSHR T0.W, T12.Y, literal.x, 2425; EG-NEXT: LSHR * T1.W, T11.Y, literal.x, 2426; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2427; EG-NEXT: BFE_INT T16.X, T11.Z, 0.0, literal.x, 2428; EG-NEXT: LSHR T1.Y, T11.W, literal.x, 2429; EG-NEXT: BFE_INT T17.Z, T12.Y, 0.0, literal.x, 2430; EG-NEXT: BFE_INT T15.W, PS, 0.0, literal.x, 2431; EG-NEXT: LSHR * T1.W, T11.X, literal.x, 2432; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2433; EG-NEXT: BFE_INT T17.X, T12.X, 0.0, literal.x, 2434; EG-NEXT: BFE_INT T15.Y, PS, 0.0, literal.x, 2435; EG-NEXT: BFE_INT T18.Z, T12.W, 0.0, literal.x, 2436; EG-NEXT: BFE_INT T16.W, PV.Y, 0.0, literal.x, 2437; EG-NEXT: LSHR * T1.W, T11.Z, literal.x, 2438; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2439; EG-NEXT: BFE_INT T18.X, T12.Z, 0.0, literal.x, 2440; EG-NEXT: BFE_INT T16.Y, PS, 0.0, literal.x, 2441; EG-NEXT: LSHR T0.Z, T12.X, literal.x, 2442; EG-NEXT: BFE_INT T17.W, T0.W, 0.0, literal.x, 2443; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2444; EG-NEXT: 16(2.242078e-44), 32(4.484155e-44) 2445; EG-NEXT: LSHR T11.X, PS, literal.x, 2446; EG-NEXT: BFE_INT T17.Y, PV.Z, 0.0, literal.y, 2447; EG-NEXT: LSHR T0.Z, T12.Z, literal.y, 2448; EG-NEXT: BFE_INT T18.W, T0.Y, 0.0, literal.y, 2449; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 2450; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2451; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 2452; EG-NEXT: LSHR T12.X, PS, literal.x, 2453; EG-NEXT: BFE_INT * T18.Y, PV.Z, 0.0, literal.y, 2454; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2455; 2456; GFX12-LABEL: constant_sextload_v16i16_to_v16i32: 2457; GFX12: ; %bb.0: 2458; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 2459; GFX12-NEXT: s_wait_kmcnt 0x0 2460; GFX12-NEXT: s_load_b256 s[4:11], s[2:3], 0x0 2461; GFX12-NEXT: s_wait_kmcnt 0x0 2462; GFX12-NEXT: s_ashr_i32 s16, s11, 16 2463; GFX12-NEXT: s_ashr_i32 s17, s10, 16 2464; GFX12-NEXT: s_sext_i32_i16 s10, s10 2465; GFX12-NEXT: s_sext_i32_i16 s11, s11 2466; GFX12-NEXT: s_ashr_i32 s14, s9, 16 2467; GFX12-NEXT: s_ashr_i32 s15, s8, 16 2468; GFX12-NEXT: s_sext_i32_i16 s9, s9 2469; GFX12-NEXT: s_sext_i32_i16 s8, s8 2470; GFX12-NEXT: v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v1, s17 2471; GFX12-NEXT: s_ashr_i32 s12, s7, 16 2472; GFX12-NEXT: s_ashr_i32 s13, s6, 16 2473; GFX12-NEXT: s_sext_i32_i16 s7, s7 2474; GFX12-NEXT: s_sext_i32_i16 s6, s6 2475; GFX12-NEXT: v_dual_mov_b32 v0, s10 :: v_dual_mov_b32 v3, s16 2476; GFX12-NEXT: v_dual_mov_b32 v2, s11 :: v_dual_mov_b32 v5, s15 2477; GFX12-NEXT: s_ashr_i32 s2, s5, 16 2478; GFX12-NEXT: s_ashr_i32 s3, s4, 16 2479; GFX12-NEXT: s_sext_i32_i16 s5, s5 2480; GFX12-NEXT: s_sext_i32_i16 s4, s4 2481; GFX12-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v7, s14 2482; GFX12-NEXT: v_dual_mov_b32 v6, s9 :: v_dual_mov_b32 v9, s13 2483; GFX12-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v11, s12 2484; GFX12-NEXT: v_dual_mov_b32 v10, s7 :: v_dual_mov_b32 v13, s3 2485; GFX12-NEXT: v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v15, s2 2486; GFX12-NEXT: v_mov_b32_e32 v14, s5 2487; GFX12-NEXT: s_clause 0x3 2488; GFX12-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:48 2489; GFX12-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:32 2490; GFX12-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:16 2491; GFX12-NEXT: global_store_b128 v16, v[12:15], s[0:1] 2492; GFX12-NEXT: s_endpgm 2493 %load = load <16 x i16>, ptr addrspace(4) %in 2494 %ext = sext <16 x i16> %load to <16 x i32> 2495 store <16 x i32> %ext, ptr addrspace(1) %out 2496 ret void 2497} 2498 2499define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 2500; GCN-NOHSA-SI-LABEL: constant_zextload_v32i16_to_v32i32: 2501; GCN-NOHSA-SI: ; %bb.0: 2502; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x9 2503; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2504; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 2505; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2506; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s1, 16 2507; GCN-NOHSA-SI-NEXT: s_lshr_b32 s19, s0, 16 2508; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s3, 16 2509; GCN-NOHSA-SI-NEXT: s_lshr_b32 s21, s2, 16 2510; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s5, 16 2511; GCN-NOHSA-SI-NEXT: s_lshr_b32 s23, s4, 16 2512; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s7, 16 2513; GCN-NOHSA-SI-NEXT: s_lshr_b32 s25, s6, 16 2514; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s9, 16 2515; GCN-NOHSA-SI-NEXT: s_lshr_b32 s27, s8, 16 2516; GCN-NOHSA-SI-NEXT: s_lshr_b32 s28, s11, 16 2517; GCN-NOHSA-SI-NEXT: s_lshr_b32 s29, s10, 16 2518; GCN-NOHSA-SI-NEXT: s_lshr_b32 s30, s13, 16 2519; GCN-NOHSA-SI-NEXT: s_lshr_b32 s31, s12, 16 2520; GCN-NOHSA-SI-NEXT: s_lshr_b32 s33, s15, 16 2521; GCN-NOHSA-SI-NEXT: s_lshr_b32 s34, s14, 16 2522; GCN-NOHSA-SI-NEXT: s_and_b32 s35, s1, 0xffff 2523; GCN-NOHSA-SI-NEXT: s_and_b32 s36, s0, 0xffff 2524; GCN-NOHSA-SI-NEXT: s_and_b32 s37, s3, 0xffff 2525; GCN-NOHSA-SI-NEXT: s_and_b32 s38, s2, 0xffff 2526; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, 0xffff 2527; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, 0xffff 2528; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, 0xffff 2529; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, 0xffff 2530; GCN-NOHSA-SI-NEXT: s_and_b32 s9, s9, 0xffff 2531; GCN-NOHSA-SI-NEXT: s_and_b32 s8, s8, 0xffff 2532; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, 0xffff 2533; GCN-NOHSA-SI-NEXT: s_and_b32 s10, s10, 0xffff 2534; GCN-NOHSA-SI-NEXT: s_and_b32 s13, s13, 0xffff 2535; GCN-NOHSA-SI-NEXT: s_and_b32 s12, s12, 0xffff 2536; GCN-NOHSA-SI-NEXT: s_and_b32 s15, s15, 0xffff 2537; GCN-NOHSA-SI-NEXT: s_and_b32 s14, s14, 0xffff 2538; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 2539; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 2540; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s16 2541; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s17 2542; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14 2543; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s34 2544; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s15 2545; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s33 2546; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 2547; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2548; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12 2549; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s31 2550; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s13 2551; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s30 2552; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 2553; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2554; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 2555; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s29 2556; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11 2557; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s28 2558; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 2559; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2560; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 2561; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s27 2562; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9 2563; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s26 2564; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 2565; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2566; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 2567; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s25 2568; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 2569; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s24 2570; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 2571; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2572; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 2573; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s23 2574; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 2575; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s22 2576; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2577; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2578; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s38 2579; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s21 2580; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s37 2581; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s20 2582; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 2583; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2584; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s36 2585; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 2586; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s35 2587; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s18 2588; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2589; GCN-NOHSA-SI-NEXT: s_endpgm 2590; 2591; GCN-HSA-LABEL: constant_zextload_v32i16_to_v32i32: 2592; GCN-HSA: ; %bb.0: 2593; GCN-HSA-NEXT: s_load_dwordx4 s[16:19], s[8:9], 0x0 2594; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2595; GCN-HSA-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 2596; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2597; GCN-HSA-NEXT: s_lshr_b32 s18, s1, 16 2598; GCN-HSA-NEXT: s_lshr_b32 s19, s0, 16 2599; GCN-HSA-NEXT: s_lshr_b32 s20, s3, 16 2600; GCN-HSA-NEXT: s_lshr_b32 s21, s2, 16 2601; GCN-HSA-NEXT: s_lshr_b32 s22, s5, 16 2602; GCN-HSA-NEXT: s_lshr_b32 s23, s4, 16 2603; GCN-HSA-NEXT: s_lshr_b32 s24, s7, 16 2604; GCN-HSA-NEXT: s_lshr_b32 s25, s6, 16 2605; GCN-HSA-NEXT: s_lshr_b32 s26, s9, 16 2606; GCN-HSA-NEXT: s_lshr_b32 s27, s8, 16 2607; GCN-HSA-NEXT: s_lshr_b32 s28, s11, 16 2608; GCN-HSA-NEXT: s_lshr_b32 s29, s10, 16 2609; GCN-HSA-NEXT: s_lshr_b32 s30, s13, 16 2610; GCN-HSA-NEXT: s_lshr_b32 s31, s12, 16 2611; GCN-HSA-NEXT: s_lshr_b32 s33, s15, 16 2612; GCN-HSA-NEXT: s_lshr_b32 s34, s14, 16 2613; GCN-HSA-NEXT: s_and_b32 s35, s1, 0xffff 2614; GCN-HSA-NEXT: s_and_b32 s36, s0, 0xffff 2615; GCN-HSA-NEXT: s_and_b32 s3, s3, 0xffff 2616; GCN-HSA-NEXT: s_and_b32 s2, s2, 0xffff 2617; GCN-HSA-NEXT: s_and_b32 s5, s5, 0xffff 2618; GCN-HSA-NEXT: s_and_b32 s4, s4, 0xffff 2619; GCN-HSA-NEXT: s_and_b32 s7, s7, 0xffff 2620; GCN-HSA-NEXT: s_and_b32 s6, s6, 0xffff 2621; GCN-HSA-NEXT: s_and_b32 s9, s9, 0xffff 2622; GCN-HSA-NEXT: s_and_b32 s8, s8, 0xffff 2623; GCN-HSA-NEXT: s_and_b32 s11, s11, 0xffff 2624; GCN-HSA-NEXT: s_and_b32 s10, s10, 0xffff 2625; GCN-HSA-NEXT: s_and_b32 s13, s13, 0xffff 2626; GCN-HSA-NEXT: s_and_b32 s12, s12, 0xffff 2627; GCN-HSA-NEXT: s_and_b32 s15, s15, 0xffff 2628; GCN-HSA-NEXT: s_and_b32 s14, s14, 0xffff 2629; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x70 2630; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 2631; GCN-HSA-NEXT: v_mov_b32_e32 v9, s1 2632; GCN-HSA-NEXT: v_mov_b32_e32 v8, s0 2633; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x60 2634; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 2635; GCN-HSA-NEXT: v_mov_b32_e32 v11, s1 2636; GCN-HSA-NEXT: v_mov_b32_e32 v10, s0 2637; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x50 2638; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14 2639; GCN-HSA-NEXT: v_mov_b32_e32 v1, s34 2640; GCN-HSA-NEXT: v_mov_b32_e32 v2, s15 2641; GCN-HSA-NEXT: v_mov_b32_e32 v3, s33 2642; GCN-HSA-NEXT: v_mov_b32_e32 v4, s12 2643; GCN-HSA-NEXT: v_mov_b32_e32 v5, s31 2644; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 2645; GCN-HSA-NEXT: v_mov_b32_e32 v6, s13 2646; GCN-HSA-NEXT: v_mov_b32_e32 v7, s30 2647; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 2648; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 2649; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 2650; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 2651; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 2652; GCN-HSA-NEXT: s_add_u32 s0, s16, 64 2653; GCN-HSA-NEXT: v_mov_b32_e32 v1, s29 2654; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11 2655; GCN-HSA-NEXT: v_mov_b32_e32 v3, s28 2656; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 2657; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2658; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 2659; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 2660; GCN-HSA-NEXT: s_add_u32 s0, s16, 48 2661; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 2662; GCN-HSA-NEXT: v_mov_b32_e32 v1, s27 2663; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 2664; GCN-HSA-NEXT: v_mov_b32_e32 v3, s26 2665; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 2666; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2667; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 2668; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 2669; GCN-HSA-NEXT: s_add_u32 s0, s16, 32 2670; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 2671; GCN-HSA-NEXT: v_mov_b32_e32 v1, s25 2672; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 2673; GCN-HSA-NEXT: v_mov_b32_e32 v3, s24 2674; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 2675; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2676; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 2677; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 2678; GCN-HSA-NEXT: s_add_u32 s0, s16, 16 2679; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 2680; GCN-HSA-NEXT: v_mov_b32_e32 v1, s23 2681; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 2682; GCN-HSA-NEXT: v_mov_b32_e32 v3, s22 2683; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 2684; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2685; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 2686; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 2687; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21 2688; GCN-HSA-NEXT: v_mov_b32_e32 v2, s3 2689; GCN-HSA-NEXT: v_mov_b32_e32 v3, s20 2690; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 2691; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2692; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16 2693; GCN-HSA-NEXT: v_mov_b32_e32 v0, s36 2694; GCN-HSA-NEXT: v_mov_b32_e32 v1, s19 2695; GCN-HSA-NEXT: v_mov_b32_e32 v2, s35 2696; GCN-HSA-NEXT: v_mov_b32_e32 v3, s18 2697; GCN-HSA-NEXT: v_mov_b32_e32 v5, s17 2698; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2699; GCN-HSA-NEXT: s_endpgm 2700; 2701; GCN-NOHSA-VI-LABEL: constant_zextload_v32i16_to_v32i32: 2702; GCN-NOHSA-VI: ; %bb.0: 2703; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x24 2704; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2705; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 2706; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2707; GCN-NOHSA-VI-NEXT: s_lshr_b32 s18, s1, 16 2708; GCN-NOHSA-VI-NEXT: s_and_b32 s19, s1, 0xffff 2709; GCN-NOHSA-VI-NEXT: s_lshr_b32 s20, s0, 16 2710; GCN-NOHSA-VI-NEXT: s_and_b32 s21, s0, 0xffff 2711; GCN-NOHSA-VI-NEXT: s_lshr_b32 s22, s3, 16 2712; GCN-NOHSA-VI-NEXT: s_and_b32 s3, s3, 0xffff 2713; GCN-NOHSA-VI-NEXT: s_lshr_b32 s23, s2, 16 2714; GCN-NOHSA-VI-NEXT: s_and_b32 s2, s2, 0xffff 2715; GCN-NOHSA-VI-NEXT: s_lshr_b32 s24, s5, 16 2716; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, 0xffff 2717; GCN-NOHSA-VI-NEXT: s_lshr_b32 s25, s4, 16 2718; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, 0xffff 2719; GCN-NOHSA-VI-NEXT: s_lshr_b32 s26, s7, 16 2720; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s7, 0xffff 2721; GCN-NOHSA-VI-NEXT: s_lshr_b32 s27, s6, 16 2722; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, 0xffff 2723; GCN-NOHSA-VI-NEXT: s_lshr_b32 s28, s9, 16 2724; GCN-NOHSA-VI-NEXT: s_and_b32 s9, s9, 0xffff 2725; GCN-NOHSA-VI-NEXT: s_lshr_b32 s29, s8, 16 2726; GCN-NOHSA-VI-NEXT: s_and_b32 s8, s8, 0xffff 2727; GCN-NOHSA-VI-NEXT: s_lshr_b32 s30, s11, 16 2728; GCN-NOHSA-VI-NEXT: s_and_b32 s11, s11, 0xffff 2729; GCN-NOHSA-VI-NEXT: s_lshr_b32 s31, s10, 16 2730; GCN-NOHSA-VI-NEXT: s_and_b32 s10, s10, 0xffff 2731; GCN-NOHSA-VI-NEXT: s_lshr_b32 s33, s13, 16 2732; GCN-NOHSA-VI-NEXT: s_and_b32 s13, s13, 0xffff 2733; GCN-NOHSA-VI-NEXT: s_lshr_b32 s34, s12, 16 2734; GCN-NOHSA-VI-NEXT: s_and_b32 s12, s12, 0xffff 2735; GCN-NOHSA-VI-NEXT: s_lshr_b32 s0, s15, 16 2736; GCN-NOHSA-VI-NEXT: s_and_b32 s1, s15, 0xffff 2737; GCN-NOHSA-VI-NEXT: s_lshr_b32 s15, s14, 16 2738; GCN-NOHSA-VI-NEXT: s_and_b32 s14, s14, 0xffff 2739; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s0 2740; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s16, 0x70 2741; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s1 2742; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s17, 0 2743; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 2744; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 2745; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s16, 0x60 2746; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 2747; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s15 2748; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s17, 0 2749; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2750; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 2751; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 2752; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s16, 0x50 2753; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 2754; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s34 2755; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s13 2756; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s33 2757; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s17, 0 2758; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2759; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 2760; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 2761; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s16, 64 2762; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 2763; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s31 2764; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 2765; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s30 2766; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s17, 0 2767; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2768; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 2769; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 2770; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s16, 48 2771; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 2772; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s29 2773; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 2774; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s28 2775; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s17, 0 2776; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2777; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 2778; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 2779; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s16, 32 2780; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 2781; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s27 2782; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 2783; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s26 2784; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s17, 0 2785; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2786; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 2787; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 2788; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s16, 16 2789; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 2790; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s25 2791; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 2792; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s24 2793; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s17, 0 2794; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2795; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 2796; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 2797; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s23 2798; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s3 2799; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s22 2800; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 2801; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2802; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s16 2803; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s21 2804; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s20 2805; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s19 2806; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s18 2807; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s17 2808; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2809; GCN-NOHSA-VI-NEXT: s_endpgm 2810; 2811; EG-LABEL: constant_zextload_v32i16_to_v32i32: 2812; EG: ; %bb.0: 2813; EG-NEXT: ALU 0, @20, KC0[CB0:0-32], KC1[] 2814; EG-NEXT: TEX 3 @12 2815; EG-NEXT: ALU 71, @21, KC0[CB0:0-32], KC1[] 2816; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T34.X, 0 2817; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0 2818; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T32.X, 0 2819; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T22.X, 0 2820; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T29.X, 0 2821; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T19.X, 0 2822; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T26.X, 0 2823; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T20.X, 1 2824; EG-NEXT: CF_END 2825; EG-NEXT: Fetch clause starting at 12: 2826; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1 2827; EG-NEXT: VTX_READ_128 T21.XYZW, T19.X, 48, #1 2828; EG-NEXT: VTX_READ_128 T22.XYZW, T19.X, 32, #1 2829; EG-NEXT: VTX_READ_128 T19.XYZW, T19.X, 16, #1 2830; EG-NEXT: ALU clause starting at 20: 2831; EG-NEXT: MOV * T19.X, KC0[2].Z, 2832; EG-NEXT: ALU clause starting at 21: 2833; EG-NEXT: LSHR * T23.W, T20.Y, literal.x, 2834; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2835; EG-NEXT: AND_INT * T23.Z, T20.Y, literal.x, 2836; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2837; EG-NEXT: LSHR T23.Y, T20.X, literal.x, 2838; EG-NEXT: LSHR * T24.W, T20.W, literal.x, 2839; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2840; EG-NEXT: AND_INT T23.X, T20.X, literal.x, 2841; EG-NEXT: AND_INT T24.Z, T20.W, literal.x, 2842; EG-NEXT: LSHR * T20.X, KC0[2].Y, literal.y, 2843; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 2844; EG-NEXT: LSHR T24.Y, T20.Z, literal.x, 2845; EG-NEXT: LSHR * T25.W, T19.Y, literal.x, 2846; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2847; EG-NEXT: AND_INT T24.X, T20.Z, literal.x, 2848; EG-NEXT: AND_INT T25.Z, T19.Y, literal.x, 2849; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2850; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 2851; EG-NEXT: LSHR T26.X, PV.W, literal.x, 2852; EG-NEXT: LSHR T25.Y, T19.X, literal.y, 2853; EG-NEXT: LSHR T27.W, T19.W, literal.y, 2854; EG-NEXT: AND_INT * T25.X, T19.X, literal.z, 2855; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2856; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2857; EG-NEXT: AND_INT T27.Z, T19.W, literal.x, 2858; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2859; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 2860; EG-NEXT: LSHR T19.X, PV.W, literal.x, 2861; EG-NEXT: LSHR T27.Y, T19.Z, literal.y, 2862; EG-NEXT: LSHR T28.W, T22.Y, literal.y, 2863; EG-NEXT: AND_INT * T27.X, T19.Z, literal.z, 2864; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2865; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2866; EG-NEXT: AND_INT T28.Z, T22.Y, literal.x, 2867; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2868; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 2869; EG-NEXT: LSHR T29.X, PV.W, literal.x, 2870; EG-NEXT: LSHR T28.Y, T22.X, literal.y, 2871; EG-NEXT: LSHR T30.W, T22.W, literal.y, 2872; EG-NEXT: AND_INT * T28.X, T22.X, literal.z, 2873; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2874; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2875; EG-NEXT: AND_INT T30.Z, T22.W, literal.x, 2876; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2877; EG-NEXT: 65535(9.183409e-41), 64(8.968310e-44) 2878; EG-NEXT: LSHR T22.X, PV.W, literal.x, 2879; EG-NEXT: LSHR T30.Y, T22.Z, literal.y, 2880; EG-NEXT: LSHR T31.W, T21.Y, literal.y, 2881; EG-NEXT: AND_INT * T30.X, T22.Z, literal.z, 2882; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2883; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2884; EG-NEXT: AND_INT T31.Z, T21.Y, literal.x, 2885; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2886; EG-NEXT: 65535(9.183409e-41), 80(1.121039e-43) 2887; EG-NEXT: LSHR T32.X, PV.W, literal.x, 2888; EG-NEXT: LSHR T31.Y, T21.X, literal.y, 2889; EG-NEXT: LSHR T33.W, T21.W, literal.y, 2890; EG-NEXT: AND_INT * T31.X, T21.X, literal.z, 2891; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2892; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2893; EG-NEXT: AND_INT T33.Z, T21.W, literal.x, 2894; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2895; EG-NEXT: 65535(9.183409e-41), 96(1.345247e-43) 2896; EG-NEXT: LSHR T21.X, PV.W, literal.x, 2897; EG-NEXT: LSHR T33.Y, T21.Z, literal.y, 2898; EG-NEXT: AND_INT * T33.X, T21.Z, literal.z, 2899; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2900; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2901; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 2902; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 2903; EG-NEXT: LSHR * T34.X, PV.W, literal.x, 2904; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2905; 2906; GFX12-LABEL: constant_zextload_v32i16_to_v32i32: 2907; GFX12: ; %bb.0: 2908; GFX12-NEXT: s_load_b128 s[16:19], s[4:5], 0x24 2909; GFX12-NEXT: s_wait_kmcnt 0x0 2910; GFX12-NEXT: s_load_b512 s[0:15], s[18:19], 0x0 2911; GFX12-NEXT: s_wait_kmcnt 0x0 2912; GFX12-NEXT: s_lshr_b32 s33, s15, 16 2913; GFX12-NEXT: s_and_b32 s15, s15, 0xffff 2914; GFX12-NEXT: s_and_b32 s34, s14, 0xffff 2915; GFX12-NEXT: s_lshr_b32 s14, s14, 16 2916; GFX12-NEXT: s_lshr_b32 s30, s13, 16 2917; GFX12-NEXT: s_and_b32 s13, s13, 0xffff 2918; GFX12-NEXT: s_lshr_b32 s31, s12, 16 2919; GFX12-NEXT: s_and_b32 s12, s12, 0xffff 2920; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s14 2921; GFX12-NEXT: v_dual_mov_b32 v0, s34 :: v_dual_mov_b32 v3, s33 2922; GFX12-NEXT: v_dual_mov_b32 v2, s15 :: v_dual_mov_b32 v5, s31 2923; GFX12-NEXT: s_lshr_b32 s29, s10, 16 2924; GFX12-NEXT: v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v7, s30 2925; GFX12-NEXT: v_dual_mov_b32 v6, s13 :: v_dual_mov_b32 v9, s29 2926; GFX12-NEXT: s_lshr_b32 s28, s11, 16 2927; GFX12-NEXT: s_and_b32 s11, s11, 0xffff 2928; GFX12-NEXT: s_and_b32 s10, s10, 0xffff 2929; GFX12-NEXT: s_lshr_b32 s26, s9, 16 2930; GFX12-NEXT: s_and_b32 s9, s9, 0xffff 2931; GFX12-NEXT: s_lshr_b32 s27, s8, 16 2932; GFX12-NEXT: s_and_b32 s8, s8, 0xffff 2933; GFX12-NEXT: s_lshr_b32 s24, s7, 16 2934; GFX12-NEXT: s_and_b32 s7, s7, 0xffff 2935; GFX12-NEXT: s_lshr_b32 s25, s6, 16 2936; GFX12-NEXT: s_and_b32 s6, s6, 0xffff 2937; GFX12-NEXT: s_wait_alu 0xfffe 2938; GFX12-NEXT: v_dual_mov_b32 v8, s10 :: v_dual_mov_b32 v11, s28 2939; GFX12-NEXT: v_mov_b32_e32 v10, s11 2940; GFX12-NEXT: s_lshr_b32 s22, s5, 16 2941; GFX12-NEXT: s_and_b32 s5, s5, 0xffff 2942; GFX12-NEXT: s_lshr_b32 s23, s4, 16 2943; GFX12-NEXT: s_and_b32 s4, s4, 0xffff 2944; GFX12-NEXT: s_clause 0x1 2945; GFX12-NEXT: global_store_b128 v24, v[0:3], s[16:17] offset:112 2946; GFX12-NEXT: global_store_b128 v24, v[4:7], s[16:17] offset:96 2947; GFX12-NEXT: v_dual_mov_b32 v1, s27 :: v_dual_mov_b32 v0, s8 2948; GFX12-NEXT: v_dual_mov_b32 v3, s26 :: v_dual_mov_b32 v2, s9 2949; GFX12-NEXT: v_mov_b32_e32 v5, s25 2950; GFX12-NEXT: s_lshr_b32 s20, s3, 16 2951; GFX12-NEXT: s_and_b32 s3, s3, 0xffff 2952; GFX12-NEXT: s_lshr_b32 s21, s2, 16 2953; GFX12-NEXT: s_and_b32 s2, s2, 0xffff 2954; GFX12-NEXT: v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v7, s24 2955; GFX12-NEXT: v_dual_mov_b32 v6, s7 :: v_dual_mov_b32 v13, s23 2956; GFX12-NEXT: s_lshr_b32 s18, s1, 16 2957; GFX12-NEXT: s_and_b32 s1, s1, 0xffff 2958; GFX12-NEXT: s_lshr_b32 s19, s0, 16 2959; GFX12-NEXT: s_and_b32 s0, s0, 0xffff 2960; GFX12-NEXT: v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v15, s22 2961; GFX12-NEXT: v_dual_mov_b32 v14, s5 :: v_dual_mov_b32 v17, s21 2962; GFX12-NEXT: v_dual_mov_b32 v16, s2 :: v_dual_mov_b32 v19, s20 2963; GFX12-NEXT: v_dual_mov_b32 v18, s3 :: v_dual_mov_b32 v21, s19 2964; GFX12-NEXT: v_dual_mov_b32 v20, s0 :: v_dual_mov_b32 v23, s18 2965; GFX12-NEXT: v_mov_b32_e32 v22, s1 2966; GFX12-NEXT: s_clause 0x5 2967; GFX12-NEXT: global_store_b128 v24, v[8:11], s[16:17] offset:80 2968; GFX12-NEXT: global_store_b128 v24, v[0:3], s[16:17] offset:64 2969; GFX12-NEXT: global_store_b128 v24, v[4:7], s[16:17] offset:48 2970; GFX12-NEXT: global_store_b128 v24, v[12:15], s[16:17] offset:32 2971; GFX12-NEXT: global_store_b128 v24, v[16:19], s[16:17] offset:16 2972; GFX12-NEXT: global_store_b128 v24, v[20:23], s[16:17] 2973; GFX12-NEXT: s_endpgm 2974 %load = load <32 x i16>, ptr addrspace(4) %in 2975 %ext = zext <32 x i16> %load to <32 x i32> 2976 store <32 x i32> %ext, ptr addrspace(1) %out 2977 ret void 2978} 2979 2980define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 2981; GCN-NOHSA-SI-LABEL: constant_sextload_v32i16_to_v32i32: 2982; GCN-NOHSA-SI: ; %bb.0: 2983; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x9 2984; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2985; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 2986; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2987; GCN-NOHSA-SI-NEXT: s_ashr_i32 s18, s1, 16 2988; GCN-NOHSA-SI-NEXT: s_ashr_i32 s19, s0, 16 2989; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s20, s1 2990; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s21, s0 2991; GCN-NOHSA-SI-NEXT: s_ashr_i32 s22, s3, 16 2992; GCN-NOHSA-SI-NEXT: s_ashr_i32 s23, s2, 16 2993; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s24, s3 2994; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s25, s2 2995; GCN-NOHSA-SI-NEXT: s_ashr_i32 s26, s5, 16 2996; GCN-NOHSA-SI-NEXT: s_ashr_i32 s27, s4, 16 2997; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 2998; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 2999; GCN-NOHSA-SI-NEXT: s_ashr_i32 s28, s7, 16 3000; GCN-NOHSA-SI-NEXT: s_ashr_i32 s29, s6, 16 3001; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s7, s7 3002; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s6, s6 3003; GCN-NOHSA-SI-NEXT: s_ashr_i32 s30, s9, 16 3004; GCN-NOHSA-SI-NEXT: s_ashr_i32 s31, s8, 16 3005; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s9, s9 3006; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s8, s8 3007; GCN-NOHSA-SI-NEXT: s_ashr_i32 s33, s11, 16 3008; GCN-NOHSA-SI-NEXT: s_ashr_i32 s34, s10, 16 3009; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s11, s11 3010; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s10, s10 3011; GCN-NOHSA-SI-NEXT: s_ashr_i32 s35, s13, 16 3012; GCN-NOHSA-SI-NEXT: s_ashr_i32 s36, s12, 16 3013; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s13, s13 3014; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s12, s12 3015; GCN-NOHSA-SI-NEXT: s_ashr_i32 s37, s15, 16 3016; GCN-NOHSA-SI-NEXT: s_ashr_i32 s38, s14, 16 3017; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s15, s15 3018; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s14, s14 3019; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 3020; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 3021; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s16 3022; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s17 3023; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14 3024; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s38 3025; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s15 3026; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s37 3027; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 3028; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3029; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12 3030; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s36 3031; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s13 3032; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s35 3033; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 3034; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3035; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 3036; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s34 3037; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11 3038; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s33 3039; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 3040; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3041; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 3042; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s31 3043; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9 3044; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s30 3045; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 3046; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3047; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 3048; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s29 3049; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 3050; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s28 3051; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 3052; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3053; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 3054; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s27 3055; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 3056; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s26 3057; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 3058; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3059; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s25 3060; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s23 3061; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s24 3062; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s22 3063; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 3064; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3065; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s21 3066; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 3067; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s20 3068; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s18 3069; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 3070; GCN-NOHSA-SI-NEXT: s_endpgm 3071; 3072; GCN-HSA-LABEL: constant_sextload_v32i16_to_v32i32: 3073; GCN-HSA: ; %bb.0: 3074; GCN-HSA-NEXT: s_load_dwordx4 s[16:19], s[8:9], 0x0 3075; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3076; GCN-HSA-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 3077; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3078; GCN-HSA-NEXT: s_ashr_i32 s18, s1, 16 3079; GCN-HSA-NEXT: s_ashr_i32 s19, s0, 16 3080; GCN-HSA-NEXT: s_ashr_i32 s22, s3, 16 3081; GCN-HSA-NEXT: s_ashr_i32 s23, s2, 16 3082; GCN-HSA-NEXT: s_ashr_i32 s24, s5, 16 3083; GCN-HSA-NEXT: s_ashr_i32 s25, s4, 16 3084; GCN-HSA-NEXT: s_ashr_i32 s26, s7, 16 3085; GCN-HSA-NEXT: s_ashr_i32 s27, s6, 16 3086; GCN-HSA-NEXT: s_ashr_i32 s28, s9, 16 3087; GCN-HSA-NEXT: s_ashr_i32 s29, s8, 16 3088; GCN-HSA-NEXT: s_ashr_i32 s30, s11, 16 3089; GCN-HSA-NEXT: s_ashr_i32 s31, s10, 16 3090; GCN-HSA-NEXT: s_ashr_i32 s33, s13, 16 3091; GCN-HSA-NEXT: s_ashr_i32 s34, s12, 16 3092; GCN-HSA-NEXT: s_ashr_i32 s35, s15, 16 3093; GCN-HSA-NEXT: s_ashr_i32 s36, s14, 16 3094; GCN-HSA-NEXT: s_sext_i32_i16 s21, s0 3095; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x70 3096; GCN-HSA-NEXT: s_sext_i32_i16 s20, s1 3097; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3098; GCN-HSA-NEXT: v_mov_b32_e32 v9, s1 3099; GCN-HSA-NEXT: v_mov_b32_e32 v8, s0 3100; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x60 3101; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3102; GCN-HSA-NEXT: v_mov_b32_e32 v11, s1 3103; GCN-HSA-NEXT: s_sext_i32_i16 s12, s12 3104; GCN-HSA-NEXT: s_sext_i32_i16 s15, s15 3105; GCN-HSA-NEXT: s_sext_i32_i16 s14, s14 3106; GCN-HSA-NEXT: v_mov_b32_e32 v10, s0 3107; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x50 3108; GCN-HSA-NEXT: s_sext_i32_i16 s13, s13 3109; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14 3110; GCN-HSA-NEXT: v_mov_b32_e32 v1, s36 3111; GCN-HSA-NEXT: v_mov_b32_e32 v2, s15 3112; GCN-HSA-NEXT: v_mov_b32_e32 v3, s35 3113; GCN-HSA-NEXT: v_mov_b32_e32 v4, s12 3114; GCN-HSA-NEXT: v_mov_b32_e32 v5, s34 3115; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3116; GCN-HSA-NEXT: v_mov_b32_e32 v6, s13 3117; GCN-HSA-NEXT: v_mov_b32_e32 v7, s33 3118; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 3119; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 3120; GCN-HSA-NEXT: s_sext_i32_i16 s11, s11 3121; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 3122; GCN-HSA-NEXT: s_sext_i32_i16 s10, s10 3123; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 3124; GCN-HSA-NEXT: s_add_u32 s0, s16, 64 3125; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 3126; GCN-HSA-NEXT: v_mov_b32_e32 v1, s31 3127; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11 3128; GCN-HSA-NEXT: v_mov_b32_e32 v3, s30 3129; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3130; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3131; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 3132; GCN-HSA-NEXT: s_sext_i32_i16 s9, s9 3133; GCN-HSA-NEXT: s_sext_i32_i16 s8, s8 3134; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 3135; GCN-HSA-NEXT: s_add_u32 s0, s16, 48 3136; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 3137; GCN-HSA-NEXT: v_mov_b32_e32 v1, s29 3138; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 3139; GCN-HSA-NEXT: v_mov_b32_e32 v3, s28 3140; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3141; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3142; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 3143; GCN-HSA-NEXT: s_sext_i32_i16 s7, s7 3144; GCN-HSA-NEXT: s_sext_i32_i16 s6, s6 3145; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 3146; GCN-HSA-NEXT: s_add_u32 s0, s16, 32 3147; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 3148; GCN-HSA-NEXT: v_mov_b32_e32 v1, s27 3149; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 3150; GCN-HSA-NEXT: v_mov_b32_e32 v3, s26 3151; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3152; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3153; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 3154; GCN-HSA-NEXT: s_sext_i32_i16 s5, s5 3155; GCN-HSA-NEXT: s_sext_i32_i16 s4, s4 3156; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 3157; GCN-HSA-NEXT: s_add_u32 s0, s16, 16 3158; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 3159; GCN-HSA-NEXT: v_mov_b32_e32 v1, s25 3160; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 3161; GCN-HSA-NEXT: v_mov_b32_e32 v3, s24 3162; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3163; GCN-HSA-NEXT: s_sext_i32_i16 s3, s3 3164; GCN-HSA-NEXT: s_sext_i32_i16 s2, s2 3165; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3166; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 3167; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 3168; GCN-HSA-NEXT: v_mov_b32_e32 v1, s23 3169; GCN-HSA-NEXT: v_mov_b32_e32 v2, s3 3170; GCN-HSA-NEXT: v_mov_b32_e32 v3, s22 3171; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 3172; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3173; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16 3174; GCN-HSA-NEXT: v_mov_b32_e32 v0, s21 3175; GCN-HSA-NEXT: v_mov_b32_e32 v1, s19 3176; GCN-HSA-NEXT: v_mov_b32_e32 v2, s20 3177; GCN-HSA-NEXT: v_mov_b32_e32 v3, s18 3178; GCN-HSA-NEXT: v_mov_b32_e32 v5, s17 3179; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3180; GCN-HSA-NEXT: s_endpgm 3181; 3182; GCN-NOHSA-VI-LABEL: constant_sextload_v32i16_to_v32i32: 3183; GCN-NOHSA-VI: ; %bb.0: 3184; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x24 3185; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 3186; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 3187; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 3188; GCN-NOHSA-VI-NEXT: s_ashr_i32 s18, s1, 16 3189; GCN-NOHSA-VI-NEXT: s_ashr_i32 s19, s0, 16 3190; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s20, s1 3191; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s21, s0 3192; GCN-NOHSA-VI-NEXT: s_ashr_i32 s22, s3, 16 3193; GCN-NOHSA-VI-NEXT: s_ashr_i32 s23, s2, 16 3194; GCN-NOHSA-VI-NEXT: s_ashr_i32 s24, s5, 16 3195; GCN-NOHSA-VI-NEXT: s_ashr_i32 s25, s4, 16 3196; GCN-NOHSA-VI-NEXT: s_ashr_i32 s26, s7, 16 3197; GCN-NOHSA-VI-NEXT: s_ashr_i32 s27, s6, 16 3198; GCN-NOHSA-VI-NEXT: s_ashr_i32 s28, s9, 16 3199; GCN-NOHSA-VI-NEXT: s_ashr_i32 s29, s8, 16 3200; GCN-NOHSA-VI-NEXT: s_ashr_i32 s30, s11, 16 3201; GCN-NOHSA-VI-NEXT: s_ashr_i32 s31, s10, 16 3202; GCN-NOHSA-VI-NEXT: s_ashr_i32 s33, s13, 16 3203; GCN-NOHSA-VI-NEXT: s_ashr_i32 s34, s12, 16 3204; GCN-NOHSA-VI-NEXT: s_ashr_i32 s0, s15, 16 3205; GCN-NOHSA-VI-NEXT: s_ashr_i32 s1, s14, 16 3206; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s0 3207; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s16, 0x70 3208; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 3209; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s17, 0 3210; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 3211; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s15, s15 3212; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s14, s14 3213; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 3214; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s16, 0x60 3215; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 3216; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s15 3217; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s17, 0 3218; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3219; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 3220; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s13, s13 3221; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s12, s12 3222; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 3223; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s16, 0x50 3224; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 3225; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s34 3226; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s13 3227; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s33 3228; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s17, 0 3229; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3230; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 3231; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s11, s11 3232; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s10, s10 3233; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 3234; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s16, 64 3235; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 3236; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s31 3237; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 3238; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s30 3239; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s17, 0 3240; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3241; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 3242; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s9, s9 3243; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s8, s8 3244; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 3245; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s16, 48 3246; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 3247; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s29 3248; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 3249; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s28 3250; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s17, 0 3251; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3252; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 3253; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s7, s7 3254; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s6, s6 3255; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 3256; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s16, 32 3257; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 3258; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s27 3259; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 3260; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s26 3261; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s17, 0 3262; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3263; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 3264; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 3265; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 3266; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 3267; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s16, 16 3268; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 3269; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s25 3270; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 3271; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s24 3272; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s17, 0 3273; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s3, s3 3274; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s2, s2 3275; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3276; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 3277; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 3278; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s23 3279; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s3 3280; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s22 3281; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 3282; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3283; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s16 3284; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s21 3285; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s19 3286; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s20 3287; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s18 3288; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s17 3289; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3290; GCN-NOHSA-VI-NEXT: s_endpgm 3291; 3292; EG-LABEL: constant_sextload_v32i16_to_v32i32: 3293; EG: ; %bb.0: 3294; EG-NEXT: ALU 8, @20, KC0[CB0:0-32], KC1[] 3295; EG-NEXT: TEX 3 @12 3296; EG-NEXT: ALU 73, @29, KC0[CB0:0-32], KC1[] 3297; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T24.X, 0 3298; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T22.X, 0 3299; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T28.X, 0 3300; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T27.X, 0 3301; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T26.X, 0 3302; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0 3303; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T20.X, 0 3304; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T19.X, 1 3305; EG-NEXT: CF_END 3306; EG-NEXT: Fetch clause starting at 12: 3307; EG-NEXT: VTX_READ_128 T23.XYZW, T22.X, 16, #1 3308; EG-NEXT: VTX_READ_128 T24.XYZW, T22.X, 32, #1 3309; EG-NEXT: VTX_READ_128 T25.XYZW, T22.X, 0, #1 3310; EG-NEXT: VTX_READ_128 T22.XYZW, T22.X, 48, #1 3311; EG-NEXT: ALU clause starting at 20: 3312; EG-NEXT: LSHR T19.X, KC0[2].Y, literal.x, 3313; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3314; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3315; EG-NEXT: LSHR T20.X, PV.W, literal.x, 3316; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3317; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 3318; EG-NEXT: LSHR T21.X, PV.W, literal.x, 3319; EG-NEXT: MOV * T22.X, KC0[2].Z, 3320; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 3321; EG-NEXT: ALU clause starting at 29: 3322; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 3323; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 3324; EG-NEXT: LSHR T26.X, PV.W, literal.x, 3325; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3326; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 3327; EG-NEXT: LSHR T27.X, PV.W, literal.x, 3328; EG-NEXT: LSHR T0.W, T22.W, literal.y, 3329; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 3330; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3331; EG-NEXT: 80(1.121039e-43), 0(0.000000e+00) 3332; EG-NEXT: LSHR T28.X, PS, literal.x, 3333; EG-NEXT: LSHR T0.Y, T22.Y, literal.y, 3334; EG-NEXT: BFE_INT T29.Z, T25.Y, 0.0, literal.y, BS:VEC_120/SCL_212 3335; EG-NEXT: LSHR T1.W, T24.W, literal.y, 3336; EG-NEXT: LSHR * T2.W, T24.Y, literal.y, 3337; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3338; EG-NEXT: BFE_INT T29.X, T25.X, 0.0, literal.x, 3339; EG-NEXT: LSHR T1.Y, T23.W, literal.x, 3340; EG-NEXT: BFE_INT T30.Z, T25.W, 0.0, literal.x, BS:VEC_120/SCL_212 3341; EG-NEXT: LSHR T3.W, T23.Y, literal.x, 3342; EG-NEXT: LSHR * T4.W, T25.Y, literal.x, 3343; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3344; EG-NEXT: BFE_INT T30.X, T25.Z, 0.0, literal.x, 3345; EG-NEXT: LSHR T2.Y, T25.W, literal.x, 3346; EG-NEXT: BFE_INT T31.Z, T23.Y, 0.0, literal.x, 3347; EG-NEXT: BFE_INT T29.W, PS, 0.0, literal.x, 3348; EG-NEXT: LSHR * T4.W, T25.X, literal.x, 3349; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3350; EG-NEXT: BFE_INT T31.X, T23.X, 0.0, literal.x, 3351; EG-NEXT: BFE_INT T29.Y, PS, 0.0, literal.x, 3352; EG-NEXT: BFE_INT T32.Z, T23.W, 0.0, literal.x, 3353; EG-NEXT: BFE_INT T30.W, PV.Y, 0.0, literal.x, 3354; EG-NEXT: LSHR * T4.W, T25.Z, literal.x, 3355; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3356; EG-NEXT: BFE_INT T32.X, T23.Z, 0.0, literal.x, 3357; EG-NEXT: BFE_INT T30.Y, PS, 0.0, literal.x, 3358; EG-NEXT: BFE_INT T25.Z, T24.Y, 0.0, literal.x, 3359; EG-NEXT: BFE_INT T31.W, T3.W, 0.0, literal.x, 3360; EG-NEXT: LSHR * T3.W, T23.X, literal.x, 3361; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3362; EG-NEXT: BFE_INT T25.X, T24.X, 0.0, literal.x, 3363; EG-NEXT: BFE_INT T31.Y, PS, 0.0, literal.x, 3364; EG-NEXT: BFE_INT T33.Z, T24.W, 0.0, literal.x, 3365; EG-NEXT: BFE_INT T32.W, T1.Y, 0.0, literal.x, 3366; EG-NEXT: LSHR * T3.W, T23.Z, literal.x, 3367; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3368; EG-NEXT: BFE_INT T33.X, T24.Z, 0.0, literal.x, 3369; EG-NEXT: BFE_INT T32.Y, PS, 0.0, literal.x, 3370; EG-NEXT: BFE_INT T23.Z, T22.Y, 0.0, literal.x, 3371; EG-NEXT: BFE_INT T25.W, T2.W, 0.0, literal.x, 3372; EG-NEXT: LSHR * T2.W, T24.X, literal.x, 3373; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3374; EG-NEXT: BFE_INT T23.X, T22.X, 0.0, literal.x, 3375; EG-NEXT: BFE_INT T25.Y, PS, 0.0, literal.x, 3376; EG-NEXT: BFE_INT T34.Z, T22.W, 0.0, literal.x, 3377; EG-NEXT: BFE_INT T33.W, T1.W, 0.0, literal.x, BS:VEC_120/SCL_212 3378; EG-NEXT: LSHR * T1.W, T24.Z, literal.x, 3379; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3380; EG-NEXT: BFE_INT T34.X, T22.Z, 0.0, literal.x, 3381; EG-NEXT: BFE_INT T33.Y, PS, 0.0, literal.x, 3382; EG-NEXT: LSHR T0.Z, T22.X, literal.x, 3383; EG-NEXT: BFE_INT T23.W, T0.Y, 0.0, literal.x, 3384; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 3385; EG-NEXT: 16(2.242078e-44), 96(1.345247e-43) 3386; EG-NEXT: LSHR T22.X, PS, literal.x, 3387; EG-NEXT: BFE_INT T23.Y, PV.Z, 0.0, literal.y, 3388; EG-NEXT: LSHR T0.Z, T22.Z, literal.y, 3389; EG-NEXT: BFE_INT T34.W, T0.W, 0.0, literal.y, 3390; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3391; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3392; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 3393; EG-NEXT: LSHR T24.X, PS, literal.x, 3394; EG-NEXT: BFE_INT * T34.Y, PV.Z, 0.0, literal.y, 3395; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3396; 3397; GFX12-LABEL: constant_sextload_v32i16_to_v32i32: 3398; GFX12: ; %bb.0: 3399; GFX12-NEXT: s_load_b128 s[16:19], s[4:5], 0x24 3400; GFX12-NEXT: s_wait_kmcnt 0x0 3401; GFX12-NEXT: s_load_b512 s[0:15], s[18:19], 0x0 3402; GFX12-NEXT: s_wait_kmcnt 0x0 3403; GFX12-NEXT: s_ashr_i32 s33, s15, 16 3404; GFX12-NEXT: s_ashr_i32 s34, s14, 16 3405; GFX12-NEXT: s_sext_i32_i16 s14, s14 3406; GFX12-NEXT: s_sext_i32_i16 s15, s15 3407; GFX12-NEXT: s_ashr_i32 s30, s13, 16 3408; GFX12-NEXT: s_ashr_i32 s31, s12, 16 3409; GFX12-NEXT: s_sext_i32_i16 s13, s13 3410; GFX12-NEXT: s_sext_i32_i16 s12, s12 3411; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s34 3412; GFX12-NEXT: v_dual_mov_b32 v0, s14 :: v_dual_mov_b32 v3, s33 3413; GFX12-NEXT: v_dual_mov_b32 v2, s15 :: v_dual_mov_b32 v5, s31 3414; GFX12-NEXT: s_ashr_i32 s29, s10, 16 3415; GFX12-NEXT: v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v7, s30 3416; GFX12-NEXT: v_dual_mov_b32 v6, s13 :: v_dual_mov_b32 v9, s29 3417; GFX12-NEXT: s_ashr_i32 s28, s11, 16 3418; GFX12-NEXT: s_sext_i32_i16 s11, s11 3419; GFX12-NEXT: s_sext_i32_i16 s10, s10 3420; GFX12-NEXT: s_ashr_i32 s26, s9, 16 3421; GFX12-NEXT: s_ashr_i32 s27, s8, 16 3422; GFX12-NEXT: s_sext_i32_i16 s9, s9 3423; GFX12-NEXT: s_sext_i32_i16 s8, s8 3424; GFX12-NEXT: s_ashr_i32 s24, s7, 16 3425; GFX12-NEXT: s_ashr_i32 s25, s6, 16 3426; GFX12-NEXT: s_sext_i32_i16 s7, s7 3427; GFX12-NEXT: s_sext_i32_i16 s6, s6 3428; GFX12-NEXT: s_wait_alu 0xfffe 3429; GFX12-NEXT: v_dual_mov_b32 v8, s10 :: v_dual_mov_b32 v11, s28 3430; GFX12-NEXT: v_mov_b32_e32 v10, s11 3431; GFX12-NEXT: s_ashr_i32 s22, s5, 16 3432; GFX12-NEXT: s_ashr_i32 s23, s4, 16 3433; GFX12-NEXT: s_sext_i32_i16 s5, s5 3434; GFX12-NEXT: s_sext_i32_i16 s4, s4 3435; GFX12-NEXT: s_clause 0x1 3436; GFX12-NEXT: global_store_b128 v24, v[0:3], s[16:17] offset:112 3437; GFX12-NEXT: global_store_b128 v24, v[4:7], s[16:17] offset:96 3438; GFX12-NEXT: v_dual_mov_b32 v1, s27 :: v_dual_mov_b32 v0, s8 3439; GFX12-NEXT: v_dual_mov_b32 v3, s26 :: v_dual_mov_b32 v2, s9 3440; GFX12-NEXT: v_mov_b32_e32 v5, s25 3441; GFX12-NEXT: s_ashr_i32 s20, s3, 16 3442; GFX12-NEXT: s_ashr_i32 s21, s2, 16 3443; GFX12-NEXT: s_sext_i32_i16 s3, s3 3444; GFX12-NEXT: s_sext_i32_i16 s2, s2 3445; GFX12-NEXT: v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v7, s24 3446; GFX12-NEXT: v_dual_mov_b32 v6, s7 :: v_dual_mov_b32 v13, s23 3447; GFX12-NEXT: s_ashr_i32 s18, s1, 16 3448; GFX12-NEXT: s_ashr_i32 s19, s0, 16 3449; GFX12-NEXT: s_sext_i32_i16 s1, s1 3450; GFX12-NEXT: s_sext_i32_i16 s0, s0 3451; GFX12-NEXT: v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v15, s22 3452; GFX12-NEXT: v_dual_mov_b32 v14, s5 :: v_dual_mov_b32 v17, s21 3453; GFX12-NEXT: v_dual_mov_b32 v16, s2 :: v_dual_mov_b32 v19, s20 3454; GFX12-NEXT: v_dual_mov_b32 v18, s3 :: v_dual_mov_b32 v21, s19 3455; GFX12-NEXT: v_dual_mov_b32 v20, s0 :: v_dual_mov_b32 v23, s18 3456; GFX12-NEXT: v_mov_b32_e32 v22, s1 3457; GFX12-NEXT: s_clause 0x5 3458; GFX12-NEXT: global_store_b128 v24, v[8:11], s[16:17] offset:80 3459; GFX12-NEXT: global_store_b128 v24, v[0:3], s[16:17] offset:64 3460; GFX12-NEXT: global_store_b128 v24, v[4:7], s[16:17] offset:48 3461; GFX12-NEXT: global_store_b128 v24, v[12:15], s[16:17] offset:32 3462; GFX12-NEXT: global_store_b128 v24, v[16:19], s[16:17] offset:16 3463; GFX12-NEXT: global_store_b128 v24, v[20:23], s[16:17] 3464; GFX12-NEXT: s_endpgm 3465 %load = load <32 x i16>, ptr addrspace(4) %in 3466 %ext = sext <32 x i16> %load to <32 x i32> 3467 store <32 x i32> %ext, ptr addrspace(1) %out 3468 ret void 3469} 3470 3471define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 3472; GCN-NOHSA-SI-LABEL: constant_zextload_v64i16_to_v64i32: 3473; GCN-NOHSA-SI: ; %bb.0: 3474; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[36:39], s[4:5], 0x9 3475; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 3476; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[38:39], 0x0 3477; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[16:31], s[38:39], 0x10 3478; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 3479; GCN-NOHSA-SI-NEXT: s_lshr_b32 s33, s1, 16 3480; GCN-NOHSA-SI-NEXT: s_lshr_b32 s34, s0, 16 3481; GCN-NOHSA-SI-NEXT: s_lshr_b32 s35, s3, 16 3482; GCN-NOHSA-SI-NEXT: s_lshr_b32 s38, s2, 16 3483; GCN-NOHSA-SI-NEXT: s_lshr_b32 s41, s5, 16 3484; GCN-NOHSA-SI-NEXT: s_lshr_b32 s42, s4, 16 3485; GCN-NOHSA-SI-NEXT: s_lshr_b32 s45, s7, 16 3486; GCN-NOHSA-SI-NEXT: s_lshr_b32 s46, s6, 16 3487; GCN-NOHSA-SI-NEXT: s_lshr_b32 s47, s9, 16 3488; GCN-NOHSA-SI-NEXT: s_lshr_b32 s48, s8, 16 3489; GCN-NOHSA-SI-NEXT: s_lshr_b32 s49, s11, 16 3490; GCN-NOHSA-SI-NEXT: s_lshr_b32 s50, s10, 16 3491; GCN-NOHSA-SI-NEXT: s_lshr_b32 s51, s13, 16 3492; GCN-NOHSA-SI-NEXT: s_lshr_b32 s52, s12, 16 3493; GCN-NOHSA-SI-NEXT: s_lshr_b32 s53, s15, 16 3494; GCN-NOHSA-SI-NEXT: s_lshr_b32 s54, s14, 16 3495; GCN-NOHSA-SI-NEXT: s_and_b32 s39, s1, 0xffff 3496; GCN-NOHSA-SI-NEXT: s_and_b32 s40, s0, 0xffff 3497; GCN-NOHSA-SI-NEXT: s_and_b32 s43, s3, 0xffff 3498; GCN-NOHSA-SI-NEXT: s_and_b32 s44, s2, 0xffff 3499; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, 0xffff 3500; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, 0xffff 3501; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, 0xffff 3502; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, 0xffff 3503; GCN-NOHSA-SI-NEXT: s_and_b32 s9, s9, 0xffff 3504; GCN-NOHSA-SI-NEXT: s_and_b32 s8, s8, 0xffff 3505; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, 0xffff 3506; GCN-NOHSA-SI-NEXT: s_and_b32 s10, s10, 0xffff 3507; GCN-NOHSA-SI-NEXT: s_and_b32 s13, s13, 0xffff 3508; GCN-NOHSA-SI-NEXT: s_and_b32 s12, s12, 0xffff 3509; GCN-NOHSA-SI-NEXT: s_and_b32 s15, s15, 0xffff 3510; GCN-NOHSA-SI-NEXT: s_and_b32 s14, s14, 0xffff 3511; GCN-NOHSA-SI-NEXT: s_lshr_b32 s55, s17, 16 3512; GCN-NOHSA-SI-NEXT: s_lshr_b32 s56, s16, 16 3513; GCN-NOHSA-SI-NEXT: s_lshr_b32 s57, s19, 16 3514; GCN-NOHSA-SI-NEXT: s_lshr_b32 s58, s18, 16 3515; GCN-NOHSA-SI-NEXT: s_lshr_b32 s59, s21, 16 3516; GCN-NOHSA-SI-NEXT: s_lshr_b32 s60, s20, 16 3517; GCN-NOHSA-SI-NEXT: s_lshr_b32 s61, s23, 16 3518; GCN-NOHSA-SI-NEXT: s_lshr_b32 s62, s22, 16 3519; GCN-NOHSA-SI-NEXT: s_lshr_b32 s63, s25, 16 3520; GCN-NOHSA-SI-NEXT: s_lshr_b32 s64, s24, 16 3521; GCN-NOHSA-SI-NEXT: s_lshr_b32 s65, s27, 16 3522; GCN-NOHSA-SI-NEXT: s_lshr_b32 s66, s26, 16 3523; GCN-NOHSA-SI-NEXT: s_lshr_b32 s67, s29, 16 3524; GCN-NOHSA-SI-NEXT: s_lshr_b32 s68, s28, 16 3525; GCN-NOHSA-SI-NEXT: s_lshr_b32 s69, s31, 16 3526; GCN-NOHSA-SI-NEXT: s_lshr_b32 s70, s30, 16 3527; GCN-NOHSA-SI-NEXT: s_and_b32 s17, s17, 0xffff 3528; GCN-NOHSA-SI-NEXT: s_and_b32 s16, s16, 0xffff 3529; GCN-NOHSA-SI-NEXT: s_and_b32 s19, s19, 0xffff 3530; GCN-NOHSA-SI-NEXT: s_and_b32 s18, s18, 0xffff 3531; GCN-NOHSA-SI-NEXT: s_and_b32 s20, s20, 0xffff 3532; GCN-NOHSA-SI-NEXT: s_and_b32 s23, s23, 0xffff 3533; GCN-NOHSA-SI-NEXT: s_and_b32 s22, s22, 0xffff 3534; GCN-NOHSA-SI-NEXT: s_and_b32 s25, s25, 0xffff 3535; GCN-NOHSA-SI-NEXT: s_and_b32 s24, s24, 0xffff 3536; GCN-NOHSA-SI-NEXT: s_and_b32 s27, s27, 0xffff 3537; GCN-NOHSA-SI-NEXT: s_and_b32 s26, s26, 0xffff 3538; GCN-NOHSA-SI-NEXT: s_and_b32 s29, s29, 0xffff 3539; GCN-NOHSA-SI-NEXT: s_and_b32 s28, s28, 0xffff 3540; GCN-NOHSA-SI-NEXT: s_and_b32 s31, s31, 0xffff 3541; GCN-NOHSA-SI-NEXT: s_and_b32 s30, s30, 0xffff 3542; GCN-NOHSA-SI-NEXT: s_and_b32 s21, s21, 0xffff 3543; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s36 3544; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s37 3545; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 3546; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 3547; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s30 3548; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s70 3549; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s31 3550; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s69 3551; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s28 3552; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s68 3553; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s29 3554; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s67 3555; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s26 3556; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s66 3557; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s27 3558; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s65 3559; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s24 3560; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s64 3561; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s25 3562; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s63 3563; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s22 3564; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s62 3565; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s23 3566; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 3567; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3568; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s20 3569; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s61 3570; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s60 3571; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s21 3572; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s59 3573; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224 3574; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208 3575; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192 3576; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176 3577; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 3578; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3579; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18 3580; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s58 3581; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s19 3582; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s57 3583; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 3584; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3585; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s16 3586; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s56 3587; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s17 3588; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s55 3589; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 3590; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3591; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14 3592; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s54 3593; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s15 3594; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s53 3595; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 3596; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3597; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12 3598; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s52 3599; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s13 3600; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s51 3601; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 3602; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3603; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 3604; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s50 3605; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11 3606; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s49 3607; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 3608; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3609; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 3610; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s48 3611; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9 3612; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s47 3613; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 3614; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3615; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 3616; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s46 3617; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 3618; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s45 3619; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 3620; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3621; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 3622; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s42 3623; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 3624; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s41 3625; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 3626; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3627; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s44 3628; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s38 3629; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s43 3630; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s35 3631; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 3632; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3633; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s40 3634; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s34 3635; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s39 3636; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s33 3637; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 3638; GCN-NOHSA-SI-NEXT: s_endpgm 3639; 3640; GCN-HSA-LABEL: constant_zextload_v64i16_to_v64i32: 3641; GCN-HSA: ; %bb.0: 3642; GCN-HSA-NEXT: s_load_dwordx4 s[16:19], s[8:9], 0x0 3643; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3644; GCN-HSA-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 3645; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3646; GCN-HSA-NEXT: s_lshr_b32 s20, s1, 16 3647; GCN-HSA-NEXT: s_lshr_b32 s21, s0, 16 3648; GCN-HSA-NEXT: s_lshr_b32 s22, s3, 16 3649; GCN-HSA-NEXT: s_lshr_b32 s23, s2, 16 3650; GCN-HSA-NEXT: s_lshr_b32 s24, s5, 16 3651; GCN-HSA-NEXT: s_lshr_b32 s26, s4, 16 3652; GCN-HSA-NEXT: s_lshr_b32 s28, s7, 16 3653; GCN-HSA-NEXT: s_lshr_b32 s30, s6, 16 3654; GCN-HSA-NEXT: s_lshr_b32 s33, s9, 16 3655; GCN-HSA-NEXT: s_lshr_b32 s35, s8, 16 3656; GCN-HSA-NEXT: s_lshr_b32 s37, s11, 16 3657; GCN-HSA-NEXT: s_lshr_b32 s39, s10, 16 3658; GCN-HSA-NEXT: s_lshr_b32 s42, s13, 16 3659; GCN-HSA-NEXT: s_lshr_b32 s44, s12, 16 3660; GCN-HSA-NEXT: s_lshr_b32 s45, s15, 16 3661; GCN-HSA-NEXT: s_lshr_b32 s46, s14, 16 3662; GCN-HSA-NEXT: s_and_b32 s25, s1, 0xffff 3663; GCN-HSA-NEXT: s_and_b32 s27, s0, 0xffff 3664; GCN-HSA-NEXT: s_and_b32 s29, s3, 0xffff 3665; GCN-HSA-NEXT: s_and_b32 s31, s2, 0xffff 3666; GCN-HSA-NEXT: s_and_b32 s34, s5, 0xffff 3667; GCN-HSA-NEXT: s_and_b32 s36, s4, 0xffff 3668; GCN-HSA-NEXT: s_and_b32 s38, s7, 0xffff 3669; GCN-HSA-NEXT: s_and_b32 s40, s6, 0xffff 3670; GCN-HSA-NEXT: s_and_b32 s41, s9, 0xffff 3671; GCN-HSA-NEXT: s_and_b32 s43, s8, 0xffff 3672; GCN-HSA-NEXT: s_and_b32 s47, s11, 0xffff 3673; GCN-HSA-NEXT: s_and_b32 s48, s10, 0xffff 3674; GCN-HSA-NEXT: s_and_b32 s49, s13, 0xffff 3675; GCN-HSA-NEXT: s_and_b32 s51, s12, 0xffff 3676; GCN-HSA-NEXT: s_and_b32 s50, s15, 0xffff 3677; GCN-HSA-NEXT: s_and_b32 s52, s14, 0xffff 3678; GCN-HSA-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x10 3679; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3680; GCN-HSA-NEXT: s_lshr_b32 s18, s1, 16 3681; GCN-HSA-NEXT: s_lshr_b32 s19, s0, 16 3682; GCN-HSA-NEXT: s_lshr_b32 s53, s3, 16 3683; GCN-HSA-NEXT: s_lshr_b32 s54, s2, 16 3684; GCN-HSA-NEXT: s_lshr_b32 s55, s5, 16 3685; GCN-HSA-NEXT: s_lshr_b32 s56, s4, 16 3686; GCN-HSA-NEXT: s_lshr_b32 s57, s7, 16 3687; GCN-HSA-NEXT: s_lshr_b32 s58, s6, 16 3688; GCN-HSA-NEXT: s_lshr_b32 s59, s9, 16 3689; GCN-HSA-NEXT: s_lshr_b32 s60, s8, 16 3690; GCN-HSA-NEXT: s_lshr_b32 s61, s11, 16 3691; GCN-HSA-NEXT: s_lshr_b32 s62, s10, 16 3692; GCN-HSA-NEXT: s_lshr_b32 s63, s13, 16 3693; GCN-HSA-NEXT: s_lshr_b32 s64, s12, 16 3694; GCN-HSA-NEXT: s_lshr_b32 s65, s15, 16 3695; GCN-HSA-NEXT: s_lshr_b32 s66, s14, 16 3696; GCN-HSA-NEXT: s_and_b32 s67, s1, 0xffff 3697; GCN-HSA-NEXT: s_and_b32 s68, s0, 0xffff 3698; GCN-HSA-NEXT: s_and_b32 s3, s3, 0xffff 3699; GCN-HSA-NEXT: s_and_b32 s2, s2, 0xffff 3700; GCN-HSA-NEXT: s_and_b32 s5, s5, 0xffff 3701; GCN-HSA-NEXT: s_and_b32 s4, s4, 0xffff 3702; GCN-HSA-NEXT: s_and_b32 s7, s7, 0xffff 3703; GCN-HSA-NEXT: s_and_b32 s6, s6, 0xffff 3704; GCN-HSA-NEXT: s_and_b32 s9, s9, 0xffff 3705; GCN-HSA-NEXT: s_and_b32 s8, s8, 0xffff 3706; GCN-HSA-NEXT: s_and_b32 s11, s11, 0xffff 3707; GCN-HSA-NEXT: s_and_b32 s10, s10, 0xffff 3708; GCN-HSA-NEXT: s_and_b32 s13, s13, 0xffff 3709; GCN-HSA-NEXT: s_and_b32 s12, s12, 0xffff 3710; GCN-HSA-NEXT: s_and_b32 s15, s15, 0xffff 3711; GCN-HSA-NEXT: s_and_b32 s14, s14, 0xffff 3712; GCN-HSA-NEXT: s_add_u32 s0, s16, 0xf0 3713; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3714; GCN-HSA-NEXT: v_mov_b32_e32 v20, s1 3715; GCN-HSA-NEXT: v_mov_b32_e32 v19, s0 3716; GCN-HSA-NEXT: s_add_u32 s0, s16, 0xe0 3717; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3718; GCN-HSA-NEXT: v_mov_b32_e32 v22, s1 3719; GCN-HSA-NEXT: v_mov_b32_e32 v21, s0 3720; GCN-HSA-NEXT: s_add_u32 s0, s16, 0xd0 3721; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3722; GCN-HSA-NEXT: v_mov_b32_e32 v24, s1 3723; GCN-HSA-NEXT: v_mov_b32_e32 v23, s0 3724; GCN-HSA-NEXT: s_add_u32 s0, s16, 0xc0 3725; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3726; GCN-HSA-NEXT: v_mov_b32_e32 v26, s1 3727; GCN-HSA-NEXT: v_mov_b32_e32 v25, s0 3728; GCN-HSA-NEXT: s_add_u32 s0, s16, 0xb0 3729; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3730; GCN-HSA-NEXT: v_mov_b32_e32 v28, s1 3731; GCN-HSA-NEXT: v_mov_b32_e32 v27, s0 3732; GCN-HSA-NEXT: s_add_u32 s0, s16, 0xa0 3733; GCN-HSA-NEXT: v_mov_b32_e32 v8, s10 3734; GCN-HSA-NEXT: v_mov_b32_e32 v9, s62 3735; GCN-HSA-NEXT: v_mov_b32_e32 v10, s11 3736; GCN-HSA-NEXT: v_mov_b32_e32 v11, s61 3737; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3738; GCN-HSA-NEXT: flat_store_dwordx4 v[23:24], v[8:11] 3739; GCN-HSA-NEXT: v_mov_b32_e32 v12, s8 3740; GCN-HSA-NEXT: v_mov_b32_e32 v10, s1 3741; GCN-HSA-NEXT: v_mov_b32_e32 v9, s0 3742; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x90 3743; GCN-HSA-NEXT: v_mov_b32_e32 v13, s60 3744; GCN-HSA-NEXT: v_mov_b32_e32 v14, s9 3745; GCN-HSA-NEXT: v_mov_b32_e32 v15, s59 3746; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3747; GCN-HSA-NEXT: flat_store_dwordx4 v[25:26], v[12:15] 3748; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14 3749; GCN-HSA-NEXT: v_mov_b32_e32 v13, s1 3750; GCN-HSA-NEXT: v_mov_b32_e32 v12, s0 3751; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x80 3752; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3753; GCN-HSA-NEXT: v_mov_b32_e32 v15, s1 3754; GCN-HSA-NEXT: v_mov_b32_e32 v1, s66 3755; GCN-HSA-NEXT: v_mov_b32_e32 v2, s15 3756; GCN-HSA-NEXT: v_mov_b32_e32 v3, s65 3757; GCN-HSA-NEXT: v_mov_b32_e32 v14, s0 3758; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x70 3759; GCN-HSA-NEXT: v_mov_b32_e32 v16, s6 3760; GCN-HSA-NEXT: v_mov_b32_e32 v17, s58 3761; GCN-HSA-NEXT: v_mov_b32_e32 v18, s7 3762; GCN-HSA-NEXT: flat_store_dwordx4 v[19:20], v[0:3] 3763; GCN-HSA-NEXT: v_mov_b32_e32 v19, s57 3764; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3765; GCN-HSA-NEXT: flat_store_dwordx4 v[27:28], v[16:19] 3766; GCN-HSA-NEXT: v_mov_b32_e32 v4, s12 3767; GCN-HSA-NEXT: v_mov_b32_e32 v17, s1 3768; GCN-HSA-NEXT: v_mov_b32_e32 v16, s0 3769; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x60 3770; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3771; GCN-HSA-NEXT: v_mov_b32_e32 v19, s1 3772; GCN-HSA-NEXT: v_mov_b32_e32 v5, s64 3773; GCN-HSA-NEXT: v_mov_b32_e32 v6, s13 3774; GCN-HSA-NEXT: v_mov_b32_e32 v7, s63 3775; GCN-HSA-NEXT: v_mov_b32_e32 v18, s0 3776; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x50 3777; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 3778; GCN-HSA-NEXT: v_mov_b32_e32 v20, s2 3779; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[4:7] 3780; GCN-HSA-NEXT: v_mov_b32_e32 v1, s56 3781; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 3782; GCN-HSA-NEXT: v_mov_b32_e32 v3, s55 3783; GCN-HSA-NEXT: v_mov_b32_e32 v21, s54 3784; GCN-HSA-NEXT: v_mov_b32_e32 v22, s3 3785; GCN-HSA-NEXT: v_mov_b32_e32 v4, s68 3786; GCN-HSA-NEXT: v_mov_b32_e32 v23, s53 3787; GCN-HSA-NEXT: v_mov_b32_e32 v5, s19 3788; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3789; GCN-HSA-NEXT: v_mov_b32_e32 v6, s67 3790; GCN-HSA-NEXT: v_mov_b32_e32 v8, s52 3791; GCN-HSA-NEXT: v_mov_b32_e32 v7, s18 3792; GCN-HSA-NEXT: flat_store_dwordx4 v[9:10], v[0:3] 3793; GCN-HSA-NEXT: v_mov_b32_e32 v9, s46 3794; GCN-HSA-NEXT: v_mov_b32_e32 v0, s51 3795; GCN-HSA-NEXT: v_mov_b32_e32 v10, s50 3796; GCN-HSA-NEXT: v_mov_b32_e32 v11, s45 3797; GCN-HSA-NEXT: v_mov_b32_e32 v1, s44 3798; GCN-HSA-NEXT: v_mov_b32_e32 v2, s49 3799; GCN-HSA-NEXT: v_mov_b32_e32 v3, s42 3800; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[20:23] 3801; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[4:7] 3802; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 3803; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[0:3] 3804; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 3805; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 3806; GCN-HSA-NEXT: s_add_u32 s0, s16, 64 3807; GCN-HSA-NEXT: v_mov_b32_e32 v0, s48 3808; GCN-HSA-NEXT: v_mov_b32_e32 v1, s39 3809; GCN-HSA-NEXT: v_mov_b32_e32 v2, s47 3810; GCN-HSA-NEXT: v_mov_b32_e32 v3, s37 3811; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3812; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3813; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 3814; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 3815; GCN-HSA-NEXT: s_add_u32 s0, s16, 48 3816; GCN-HSA-NEXT: v_mov_b32_e32 v0, s43 3817; GCN-HSA-NEXT: v_mov_b32_e32 v1, s35 3818; GCN-HSA-NEXT: v_mov_b32_e32 v2, s41 3819; GCN-HSA-NEXT: v_mov_b32_e32 v3, s33 3820; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3821; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3822; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 3823; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 3824; GCN-HSA-NEXT: s_add_u32 s0, s16, 32 3825; GCN-HSA-NEXT: v_mov_b32_e32 v0, s40 3826; GCN-HSA-NEXT: v_mov_b32_e32 v1, s30 3827; GCN-HSA-NEXT: v_mov_b32_e32 v2, s38 3828; GCN-HSA-NEXT: v_mov_b32_e32 v3, s28 3829; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3830; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3831; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 3832; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 3833; GCN-HSA-NEXT: s_add_u32 s0, s16, 16 3834; GCN-HSA-NEXT: v_mov_b32_e32 v0, s36 3835; GCN-HSA-NEXT: v_mov_b32_e32 v1, s26 3836; GCN-HSA-NEXT: v_mov_b32_e32 v2, s34 3837; GCN-HSA-NEXT: v_mov_b32_e32 v3, s24 3838; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 3839; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3840; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 3841; GCN-HSA-NEXT: v_mov_b32_e32 v0, s31 3842; GCN-HSA-NEXT: v_mov_b32_e32 v1, s23 3843; GCN-HSA-NEXT: v_mov_b32_e32 v2, s29 3844; GCN-HSA-NEXT: v_mov_b32_e32 v3, s22 3845; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 3846; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3847; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16 3848; GCN-HSA-NEXT: v_mov_b32_e32 v0, s27 3849; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21 3850; GCN-HSA-NEXT: v_mov_b32_e32 v2, s25 3851; GCN-HSA-NEXT: v_mov_b32_e32 v3, s20 3852; GCN-HSA-NEXT: v_mov_b32_e32 v5, s17 3853; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3854; GCN-HSA-NEXT: s_endpgm 3855; 3856; GCN-NOHSA-VI-LABEL: constant_zextload_v64i16_to_v64i32: 3857; GCN-NOHSA-VI: ; %bb.0: 3858; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[36:39], s[4:5], 0x24 3859; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 3860; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[16:31], s[38:39], 0x0 3861; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[0:15], s[38:39], 0x40 3862; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 3863; GCN-NOHSA-VI-NEXT: s_lshr_b32 s33, s17, 16 3864; GCN-NOHSA-VI-NEXT: s_lshr_b32 s34, s16, 16 3865; GCN-NOHSA-VI-NEXT: s_lshr_b32 s35, s19, 16 3866; GCN-NOHSA-VI-NEXT: s_lshr_b32 s40, s18, 16 3867; GCN-NOHSA-VI-NEXT: s_lshr_b32 s41, s21, 16 3868; GCN-NOHSA-VI-NEXT: s_lshr_b32 s42, s20, 16 3869; GCN-NOHSA-VI-NEXT: s_lshr_b32 s43, s23, 16 3870; GCN-NOHSA-VI-NEXT: s_lshr_b32 s44, s22, 16 3871; GCN-NOHSA-VI-NEXT: s_lshr_b32 s45, s25, 16 3872; GCN-NOHSA-VI-NEXT: s_lshr_b32 s46, s24, 16 3873; GCN-NOHSA-VI-NEXT: s_lshr_b32 s47, s27, 16 3874; GCN-NOHSA-VI-NEXT: s_lshr_b32 s48, s26, 16 3875; GCN-NOHSA-VI-NEXT: s_lshr_b32 s38, s29, 16 3876; GCN-NOHSA-VI-NEXT: s_lshr_b32 s39, s28, 16 3877; GCN-NOHSA-VI-NEXT: s_lshr_b32 s49, s31, 16 3878; GCN-NOHSA-VI-NEXT: s_lshr_b32 s50, s30, 16 3879; GCN-NOHSA-VI-NEXT: s_lshr_b32 s51, s1, 16 3880; GCN-NOHSA-VI-NEXT: s_lshr_b32 s52, s0, 16 3881; GCN-NOHSA-VI-NEXT: s_lshr_b32 s53, s3, 16 3882; GCN-NOHSA-VI-NEXT: s_lshr_b32 s54, s2, 16 3883; GCN-NOHSA-VI-NEXT: s_lshr_b32 s55, s5, 16 3884; GCN-NOHSA-VI-NEXT: s_lshr_b32 s56, s4, 16 3885; GCN-NOHSA-VI-NEXT: s_lshr_b32 s57, s7, 16 3886; GCN-NOHSA-VI-NEXT: s_lshr_b32 s58, s6, 16 3887; GCN-NOHSA-VI-NEXT: s_lshr_b32 s59, s9, 16 3888; GCN-NOHSA-VI-NEXT: s_and_b32 s17, s17, 0xffff 3889; GCN-NOHSA-VI-NEXT: s_and_b32 s16, s16, 0xffff 3890; GCN-NOHSA-VI-NEXT: s_and_b32 s19, s19, 0xffff 3891; GCN-NOHSA-VI-NEXT: s_and_b32 s18, s18, 0xffff 3892; GCN-NOHSA-VI-NEXT: s_and_b32 s21, s21, 0xffff 3893; GCN-NOHSA-VI-NEXT: s_and_b32 s20, s20, 0xffff 3894; GCN-NOHSA-VI-NEXT: s_and_b32 s23, s23, 0xffff 3895; GCN-NOHSA-VI-NEXT: s_and_b32 s22, s22, 0xffff 3896; GCN-NOHSA-VI-NEXT: s_and_b32 s25, s25, 0xffff 3897; GCN-NOHSA-VI-NEXT: s_and_b32 s24, s24, 0xffff 3898; GCN-NOHSA-VI-NEXT: s_and_b32 s27, s27, 0xffff 3899; GCN-NOHSA-VI-NEXT: s_and_b32 s26, s26, 0xffff 3900; GCN-NOHSA-VI-NEXT: s_and_b32 s29, s29, 0xffff 3901; GCN-NOHSA-VI-NEXT: s_and_b32 s28, s28, 0xffff 3902; GCN-NOHSA-VI-NEXT: s_and_b32 s31, s31, 0xffff 3903; GCN-NOHSA-VI-NEXT: s_and_b32 s30, s30, 0xffff 3904; GCN-NOHSA-VI-NEXT: s_lshr_b32 s60, s8, 16 3905; GCN-NOHSA-VI-NEXT: s_lshr_b32 s61, s11, 16 3906; GCN-NOHSA-VI-NEXT: s_lshr_b32 s62, s10, 16 3907; GCN-NOHSA-VI-NEXT: s_lshr_b32 s63, s13, 16 3908; GCN-NOHSA-VI-NEXT: s_lshr_b32 s64, s12, 16 3909; GCN-NOHSA-VI-NEXT: s_lshr_b32 s65, s15, 16 3910; GCN-NOHSA-VI-NEXT: s_lshr_b32 s66, s14, 16 3911; GCN-NOHSA-VI-NEXT: s_and_b32 s67, s1, 0xffff 3912; GCN-NOHSA-VI-NEXT: s_and_b32 s68, s0, 0xffff 3913; GCN-NOHSA-VI-NEXT: s_and_b32 s3, s3, 0xffff 3914; GCN-NOHSA-VI-NEXT: s_and_b32 s2, s2, 0xffff 3915; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, 0xffff 3916; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, 0xffff 3917; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s7, 0xffff 3918; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, 0xffff 3919; GCN-NOHSA-VI-NEXT: s_and_b32 s9, s9, 0xffff 3920; GCN-NOHSA-VI-NEXT: s_and_b32 s8, s8, 0xffff 3921; GCN-NOHSA-VI-NEXT: s_and_b32 s11, s11, 0xffff 3922; GCN-NOHSA-VI-NEXT: s_and_b32 s10, s10, 0xffff 3923; GCN-NOHSA-VI-NEXT: s_and_b32 s13, s13, 0xffff 3924; GCN-NOHSA-VI-NEXT: s_and_b32 s12, s12, 0xffff 3925; GCN-NOHSA-VI-NEXT: s_and_b32 s0, s15, 0xffff 3926; GCN-NOHSA-VI-NEXT: s_and_b32 s1, s14, 0xffff 3927; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s0 3928; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0xf0 3929; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s1 3930; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 3931; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 3932; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 3933; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0xe0 3934; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s66 3935; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s65 3936; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 3937; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3938; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 3939; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 3940; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0xd0 3941; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 3942; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s64 3943; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s13 3944; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s63 3945; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 3946; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3947; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 3948; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 3949; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0xc0 3950; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 3951; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s62 3952; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 3953; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s61 3954; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 3955; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3956; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 3957; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 3958; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0xb0 3959; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 3960; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s60 3961; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 3962; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s59 3963; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 3964; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3965; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 3966; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 3967; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0xa0 3968; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 3969; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s58 3970; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 3971; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s57 3972; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 3973; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3974; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 3975; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 3976; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0x90 3977; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 3978; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s56 3979; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 3980; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s55 3981; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 3982; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3983; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 3984; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 3985; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0x80 3986; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 3987; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s54 3988; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s3 3989; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s53 3990; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 3991; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3992; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 3993; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 3994; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0x70 3995; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s68 3996; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s52 3997; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s67 3998; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s51 3999; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4000; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4001; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4002; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4003; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0x60 4004; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s30 4005; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s50 4006; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s31 4007; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s49 4008; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4009; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4010; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4011; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4012; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0x50 4013; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28 4014; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s39 4015; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s29 4016; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s38 4017; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4018; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4019; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4020; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4021; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 64 4022; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s26 4023; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s48 4024; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s27 4025; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s47 4026; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4027; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4028; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4029; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4030; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 48 4031; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s24 4032; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s46 4033; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s25 4034; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s45 4035; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4036; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4037; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4038; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4039; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 32 4040; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s22 4041; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s44 4042; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s23 4043; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s43 4044; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4045; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4046; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4047; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4048; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 16 4049; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s20 4050; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s42 4051; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s21 4052; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s41 4053; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4054; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4055; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4056; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18 4057; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s40 4058; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s19 4059; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s35 4060; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4061; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4062; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s36 4063; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s16 4064; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s34 4065; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s17 4066; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s33 4067; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s37 4068; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4069; GCN-NOHSA-VI-NEXT: s_endpgm 4070; 4071; EG-LABEL: constant_zextload_v64i16_to_v64i32: 4072; EG: ; %bb.0: 4073; EG-NEXT: ALU 0, @38, KC0[CB0:0-32], KC1[] 4074; EG-NEXT: TEX 3 @22 4075; EG-NEXT: ALU 55, @39, KC0[CB0:0-32], KC1[] 4076; EG-NEXT: TEX 3 @30 4077; EG-NEXT: ALU 87, @95, KC0[CB0:0-32], KC1[] 4078; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T66.X, 0 4079; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T49.X, 0 4080; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T64.X, 0 4081; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T50.X, 0 4082; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T61.X, 0 4083; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T51.X, 0 4084; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T56.XYZW, T58.X, 0 4085; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T54.XYZW, T52.X, 0 4086; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T53.XYZW, T55.X, 0 4087; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T39.X, 0 4088; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T48.X, 0 4089; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T40.X, 0 4090; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T46.X, 0 4091; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T41.X, 0 4092; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T43.X, 0 4093; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T38.X, 1 4094; EG-NEXT: CF_END 4095; EG-NEXT: Fetch clause starting at 22: 4096; EG-NEXT: VTX_READ_128 T38.XYZW, T37.X, 0, #1 4097; EG-NEXT: VTX_READ_128 T39.XYZW, T37.X, 48, #1 4098; EG-NEXT: VTX_READ_128 T40.XYZW, T37.X, 32, #1 4099; EG-NEXT: VTX_READ_128 T41.XYZW, T37.X, 16, #1 4100; EG-NEXT: Fetch clause starting at 30: 4101; EG-NEXT: VTX_READ_128 T49.XYZW, T37.X, 112, #1 4102; EG-NEXT: VTX_READ_128 T50.XYZW, T37.X, 96, #1 4103; EG-NEXT: VTX_READ_128 T51.XYZW, T37.X, 80, #1 4104; EG-NEXT: VTX_READ_128 T52.XYZW, T37.X, 64, #1 4105; EG-NEXT: ALU clause starting at 38: 4106; EG-NEXT: MOV * T37.X, KC0[2].Z, 4107; EG-NEXT: ALU clause starting at 39: 4108; EG-NEXT: LSHR * T35.W, T38.Y, literal.x, 4109; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4110; EG-NEXT: AND_INT * T35.Z, T38.Y, literal.x, 4111; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4112; EG-NEXT: LSHR T35.Y, T38.X, literal.x, 4113; EG-NEXT: LSHR * T36.W, T38.W, literal.x, 4114; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4115; EG-NEXT: AND_INT T35.X, T38.X, literal.x, 4116; EG-NEXT: AND_INT T36.Z, T38.W, literal.x, 4117; EG-NEXT: LSHR * T38.X, KC0[2].Y, literal.y, 4118; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 4119; EG-NEXT: LSHR T36.Y, T38.Z, literal.x, 4120; EG-NEXT: LSHR * T42.W, T41.Y, literal.x, 4121; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4122; EG-NEXT: AND_INT T36.X, T38.Z, literal.x, 4123; EG-NEXT: AND_INT T42.Z, T41.Y, literal.x, 4124; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4125; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 4126; EG-NEXT: LSHR T43.X, PV.W, literal.x, 4127; EG-NEXT: LSHR T42.Y, T41.X, literal.y, 4128; EG-NEXT: LSHR T44.W, T41.W, literal.y, 4129; EG-NEXT: AND_INT * T42.X, T41.X, literal.z, 4130; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4131; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4132; EG-NEXT: AND_INT T44.Z, T41.W, literal.x, 4133; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4134; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 4135; EG-NEXT: LSHR T41.X, PV.W, literal.x, 4136; EG-NEXT: LSHR T44.Y, T41.Z, literal.y, 4137; EG-NEXT: LSHR T45.W, T40.Y, literal.y, 4138; EG-NEXT: AND_INT * T44.X, T41.Z, literal.z, 4139; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4140; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4141; EG-NEXT: AND_INT T45.Z, T40.Y, literal.x, 4142; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4143; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 4144; EG-NEXT: LSHR T46.X, PV.W, literal.x, 4145; EG-NEXT: LSHR T45.Y, T40.X, literal.y, 4146; EG-NEXT: LSHR T47.W, T40.W, literal.y, 4147; EG-NEXT: AND_INT * T45.X, T40.X, literal.z, 4148; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4149; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4150; EG-NEXT: AND_INT T47.Z, T40.W, literal.x, 4151; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4152; EG-NEXT: 65535(9.183409e-41), 64(8.968310e-44) 4153; EG-NEXT: LSHR T40.X, PV.W, literal.x, 4154; EG-NEXT: LSHR T47.Y, T40.Z, literal.y, 4155; EG-NEXT: AND_INT * T47.X, T40.Z, literal.z, 4156; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4157; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4158; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.x, 4159; EG-NEXT: LSHR * T37.W, T39.Y, literal.y, 4160; EG-NEXT: 80(1.121039e-43), 16(2.242078e-44) 4161; EG-NEXT: LSHR T48.X, PV.W, literal.x, 4162; EG-NEXT: AND_INT * T37.Z, T39.Y, literal.y, 4163; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41) 4164; EG-NEXT: ALU clause starting at 95: 4165; EG-NEXT: LSHR T37.Y, T39.X, literal.x, 4166; EG-NEXT: LSHR * T53.W, T39.W, literal.x, 4167; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4168; EG-NEXT: AND_INT T37.X, T39.X, literal.x, 4169; EG-NEXT: AND_INT T53.Z, T39.W, literal.x, 4170; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4171; EG-NEXT: 65535(9.183409e-41), 96(1.345247e-43) 4172; EG-NEXT: LSHR T39.X, PV.W, literal.x, 4173; EG-NEXT: LSHR T53.Y, T39.Z, literal.y, 4174; EG-NEXT: LSHR T54.W, T52.Y, literal.y, 4175; EG-NEXT: AND_INT * T53.X, T39.Z, literal.z, 4176; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4177; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4178; EG-NEXT: AND_INT T54.Z, T52.Y, literal.x, 4179; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4180; EG-NEXT: 65535(9.183409e-41), 112(1.569454e-43) 4181; EG-NEXT: LSHR T55.X, PV.W, literal.x, 4182; EG-NEXT: LSHR T54.Y, T52.X, literal.y, 4183; EG-NEXT: LSHR T56.W, T52.W, literal.y, 4184; EG-NEXT: AND_INT * T54.X, T52.X, literal.z, 4185; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4186; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4187; EG-NEXT: AND_INT T56.Z, T52.W, literal.x, 4188; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4189; EG-NEXT: 65535(9.183409e-41), 128(1.793662e-43) 4190; EG-NEXT: LSHR T52.X, PV.W, literal.x, 4191; EG-NEXT: LSHR T56.Y, T52.Z, literal.y, 4192; EG-NEXT: LSHR T57.W, T51.Y, literal.y, 4193; EG-NEXT: AND_INT * T56.X, T52.Z, literal.z, 4194; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4195; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4196; EG-NEXT: AND_INT T57.Z, T51.Y, literal.x, 4197; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4198; EG-NEXT: 65535(9.183409e-41), 144(2.017870e-43) 4199; EG-NEXT: LSHR T58.X, PV.W, literal.x, 4200; EG-NEXT: LSHR T57.Y, T51.X, literal.y, 4201; EG-NEXT: LSHR T59.W, T51.W, literal.y, 4202; EG-NEXT: AND_INT * T57.X, T51.X, literal.z, 4203; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4204; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4205; EG-NEXT: AND_INT T59.Z, T51.W, literal.x, 4206; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4207; EG-NEXT: 65535(9.183409e-41), 160(2.242078e-43) 4208; EG-NEXT: LSHR T51.X, PV.W, literal.x, 4209; EG-NEXT: LSHR T59.Y, T51.Z, literal.y, 4210; EG-NEXT: LSHR T60.W, T50.Y, literal.y, 4211; EG-NEXT: AND_INT * T59.X, T51.Z, literal.z, 4212; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4213; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4214; EG-NEXT: AND_INT T60.Z, T50.Y, literal.x, 4215; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4216; EG-NEXT: 65535(9.183409e-41), 176(2.466285e-43) 4217; EG-NEXT: LSHR T61.X, PV.W, literal.x, 4218; EG-NEXT: LSHR T60.Y, T50.X, literal.y, 4219; EG-NEXT: LSHR T62.W, T50.W, literal.y, 4220; EG-NEXT: AND_INT * T60.X, T50.X, literal.z, 4221; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4222; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4223; EG-NEXT: AND_INT T62.Z, T50.W, literal.x, 4224; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4225; EG-NEXT: 65535(9.183409e-41), 192(2.690493e-43) 4226; EG-NEXT: LSHR T50.X, PV.W, literal.x, 4227; EG-NEXT: LSHR T62.Y, T50.Z, literal.y, 4228; EG-NEXT: LSHR T63.W, T49.Y, literal.y, 4229; EG-NEXT: AND_INT * T62.X, T50.Z, literal.z, 4230; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4231; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4232; EG-NEXT: AND_INT T63.Z, T49.Y, literal.x, 4233; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4234; EG-NEXT: 65535(9.183409e-41), 208(2.914701e-43) 4235; EG-NEXT: LSHR T64.X, PV.W, literal.x, 4236; EG-NEXT: LSHR T63.Y, T49.X, literal.y, 4237; EG-NEXT: LSHR T65.W, T49.W, literal.y, 4238; EG-NEXT: AND_INT * T63.X, T49.X, literal.z, 4239; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4240; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4241; EG-NEXT: AND_INT T65.Z, T49.W, literal.x, 4242; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4243; EG-NEXT: 65535(9.183409e-41), 224(3.138909e-43) 4244; EG-NEXT: LSHR T49.X, PV.W, literal.x, 4245; EG-NEXT: LSHR T65.Y, T49.Z, literal.y, 4246; EG-NEXT: AND_INT * T65.X, T49.Z, literal.z, 4247; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4248; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4249; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 4250; EG-NEXT: 240(3.363116e-43), 0(0.000000e+00) 4251; EG-NEXT: LSHR * T66.X, PV.W, literal.x, 4252; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4253; 4254; GFX12-LABEL: constant_zextload_v64i16_to_v64i32: 4255; GFX12: ; %bb.0: 4256; GFX12-NEXT: s_load_b128 s[36:39], s[4:5], 0x24 4257; GFX12-NEXT: s_wait_kmcnt 0x0 4258; GFX12-NEXT: s_clause 0x1 4259; GFX12-NEXT: s_load_b512 s[16:31], s[38:39], 0x0 4260; GFX12-NEXT: s_load_b512 s[0:15], s[38:39], 0x40 4261; GFX12-NEXT: s_wait_kmcnt 0x0 4262; GFX12-NEXT: s_lshr_b32 s49, s31, 16 4263; GFX12-NEXT: s_lshr_b32 s65, s15, 16 4264; GFX12-NEXT: s_lshr_b32 s66, s14, 16 4265; GFX12-NEXT: s_and_b32 s14, s14, 0xffff 4266; GFX12-NEXT: s_and_b32 s15, s15, 0xffff 4267; GFX12-NEXT: s_lshr_b32 s63, s13, 16 4268; GFX12-NEXT: s_lshr_b32 s64, s12, 16 4269; GFX12-NEXT: s_and_b32 s13, s13, 0xffff 4270; GFX12-NEXT: s_and_b32 s12, s12, 0xffff 4271; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s66 4272; GFX12-NEXT: s_lshr_b32 s61, s11, 16 4273; GFX12-NEXT: s_lshr_b32 s62, s10, 16 4274; GFX12-NEXT: s_and_b32 s11, s11, 0xffff 4275; GFX12-NEXT: s_and_b32 s10, s10, 0xffff 4276; GFX12-NEXT: v_dual_mov_b32 v0, s14 :: v_dual_mov_b32 v3, s65 4277; GFX12-NEXT: v_dual_mov_b32 v2, s15 :: v_dual_mov_b32 v5, s64 4278; GFX12-NEXT: s_lshr_b32 s59, s9, 16 4279; GFX12-NEXT: s_lshr_b32 s60, s8, 16 4280; GFX12-NEXT: s_and_b32 s9, s9, 0xffff 4281; GFX12-NEXT: s_and_b32 s8, s8, 0xffff 4282; GFX12-NEXT: v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v7, s63 4283; GFX12-NEXT: v_dual_mov_b32 v6, s13 :: v_dual_mov_b32 v9, s62 4284; GFX12-NEXT: v_dual_mov_b32 v8, s10 :: v_dual_mov_b32 v11, s61 4285; GFX12-NEXT: v_dual_mov_b32 v10, s11 :: v_dual_mov_b32 v13, s60 4286; GFX12-NEXT: s_lshr_b32 s57, s7, 16 4287; GFX12-NEXT: s_lshr_b32 s58, s6, 16 4288; GFX12-NEXT: s_and_b32 s7, s7, 0xffff 4289; GFX12-NEXT: v_dual_mov_b32 v12, s8 :: v_dual_mov_b32 v15, s59 4290; GFX12-NEXT: v_mov_b32_e32 v14, s9 4291; GFX12-NEXT: s_and_b32 s6, s6, 0xffff 4292; GFX12-NEXT: s_lshr_b32 s55, s5, 16 4293; GFX12-NEXT: s_lshr_b32 s56, s4, 16 4294; GFX12-NEXT: s_and_b32 s5, s5, 0xffff 4295; GFX12-NEXT: s_and_b32 s4, s4, 0xffff 4296; GFX12-NEXT: s_lshr_b32 s53, s3, 16 4297; GFX12-NEXT: s_lshr_b32 s54, s2, 16 4298; GFX12-NEXT: s_and_b32 s3, s3, 0xffff 4299; GFX12-NEXT: s_and_b32 s2, s2, 0xffff 4300; GFX12-NEXT: s_clause 0x3 4301; GFX12-NEXT: global_store_b128 v24, v[0:3], s[36:37] offset:240 4302; GFX12-NEXT: global_store_b128 v24, v[4:7], s[36:37] offset:224 4303; GFX12-NEXT: global_store_b128 v24, v[8:11], s[36:37] offset:208 4304; GFX12-NEXT: global_store_b128 v24, v[12:15], s[36:37] offset:192 4305; GFX12-NEXT: v_dual_mov_b32 v1, s58 :: v_dual_mov_b32 v0, s6 4306; GFX12-NEXT: v_dual_mov_b32 v3, s57 :: v_dual_mov_b32 v2, s7 4307; GFX12-NEXT: v_mov_b32_e32 v5, s56 4308; GFX12-NEXT: s_lshr_b32 s51, s1, 16 4309; GFX12-NEXT: s_lshr_b32 s52, s0, 16 4310; GFX12-NEXT: s_and_b32 s1, s1, 0xffff 4311; GFX12-NEXT: s_and_b32 s0, s0, 0xffff 4312; GFX12-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v7, s55 4313; GFX12-NEXT: v_dual_mov_b32 v6, s5 :: v_dual_mov_b32 v9, s54 4314; GFX12-NEXT: s_lshr_b32 s50, s30, 16 4315; GFX12-NEXT: s_and_b32 s31, s31, 0xffff 4316; GFX12-NEXT: s_and_b32 s30, s30, 0xffff 4317; GFX12-NEXT: v_dual_mov_b32 v8, s2 :: v_dual_mov_b32 v11, s53 4318; GFX12-NEXT: v_dual_mov_b32 v10, s3 :: v_dual_mov_b32 v13, s52 4319; GFX12-NEXT: s_lshr_b32 s45, s27, 16 4320; GFX12-NEXT: s_lshr_b32 s46, s26, 16 4321; GFX12-NEXT: s_lshr_b32 s47, s29, 16 4322; GFX12-NEXT: s_lshr_b32 s48, s28, 16 4323; GFX12-NEXT: s_and_b32 s27, s27, 0xffff 4324; GFX12-NEXT: s_and_b32 s26, s26, 0xffff 4325; GFX12-NEXT: s_and_b32 s29, s29, 0xffff 4326; GFX12-NEXT: s_and_b32 s28, s28, 0xffff 4327; GFX12-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v15, s51 4328; GFX12-NEXT: v_dual_mov_b32 v14, s1 :: v_dual_mov_b32 v17, s50 4329; GFX12-NEXT: s_lshr_b32 s43, s25, 16 4330; GFX12-NEXT: s_lshr_b32 s44, s24, 16 4331; GFX12-NEXT: s_and_b32 s25, s25, 0xffff 4332; GFX12-NEXT: s_and_b32 s24, s24, 0xffff 4333; GFX12-NEXT: v_dual_mov_b32 v16, s30 :: v_dual_mov_b32 v19, s49 4334; GFX12-NEXT: v_dual_mov_b32 v18, s31 :: v_dual_mov_b32 v21, s48 4335; GFX12-NEXT: s_lshr_b32 s41, s23, 16 4336; GFX12-NEXT: s_lshr_b32 s42, s22, 16 4337; GFX12-NEXT: s_and_b32 s23, s23, 0xffff 4338; GFX12-NEXT: s_and_b32 s22, s22, 0xffff 4339; GFX12-NEXT: v_dual_mov_b32 v20, s28 :: v_dual_mov_b32 v23, s47 4340; GFX12-NEXT: v_mov_b32_e32 v22, s29 4341; GFX12-NEXT: s_clause 0x5 4342; GFX12-NEXT: global_store_b128 v24, v[0:3], s[36:37] offset:176 4343; GFX12-NEXT: global_store_b128 v24, v[4:7], s[36:37] offset:160 4344; GFX12-NEXT: global_store_b128 v24, v[8:11], s[36:37] offset:144 4345; GFX12-NEXT: global_store_b128 v24, v[12:15], s[36:37] offset:128 4346; GFX12-NEXT: global_store_b128 v24, v[16:19], s[36:37] offset:112 4347; GFX12-NEXT: global_store_b128 v24, v[20:23], s[36:37] offset:96 4348; GFX12-NEXT: v_dual_mov_b32 v1, s46 :: v_dual_mov_b32 v0, s26 4349; GFX12-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v2, s27 4350; GFX12-NEXT: v_mov_b32_e32 v5, s44 4351; GFX12-NEXT: s_lshr_b32 s39, s21, 16 4352; GFX12-NEXT: s_lshr_b32 s40, s20, 16 4353; GFX12-NEXT: s_and_b32 s21, s21, 0xffff 4354; GFX12-NEXT: s_and_b32 s20, s20, 0xffff 4355; GFX12-NEXT: v_dual_mov_b32 v4, s24 :: v_dual_mov_b32 v7, s43 4356; GFX12-NEXT: v_dual_mov_b32 v6, s25 :: v_dual_mov_b32 v9, s42 4357; GFX12-NEXT: s_lshr_b32 s35, s19, 16 4358; GFX12-NEXT: s_lshr_b32 s38, s18, 16 4359; GFX12-NEXT: s_and_b32 s19, s19, 0xffff 4360; GFX12-NEXT: s_and_b32 s18, s18, 0xffff 4361; GFX12-NEXT: v_dual_mov_b32 v8, s22 :: v_dual_mov_b32 v11, s41 4362; GFX12-NEXT: v_dual_mov_b32 v10, s23 :: v_dual_mov_b32 v13, s40 4363; GFX12-NEXT: s_lshr_b32 s33, s17, 16 4364; GFX12-NEXT: s_lshr_b32 s34, s16, 16 4365; GFX12-NEXT: s_and_b32 s17, s17, 0xffff 4366; GFX12-NEXT: s_and_b32 s16, s16, 0xffff 4367; GFX12-NEXT: v_dual_mov_b32 v12, s20 :: v_dual_mov_b32 v15, s39 4368; GFX12-NEXT: v_dual_mov_b32 v14, s21 :: v_dual_mov_b32 v17, s38 4369; GFX12-NEXT: v_dual_mov_b32 v16, s18 :: v_dual_mov_b32 v19, s35 4370; GFX12-NEXT: v_dual_mov_b32 v18, s19 :: v_dual_mov_b32 v21, s34 4371; GFX12-NEXT: v_dual_mov_b32 v20, s16 :: v_dual_mov_b32 v23, s33 4372; GFX12-NEXT: v_mov_b32_e32 v22, s17 4373; GFX12-NEXT: s_clause 0x5 4374; GFX12-NEXT: global_store_b128 v24, v[0:3], s[36:37] offset:80 4375; GFX12-NEXT: global_store_b128 v24, v[4:7], s[36:37] offset:64 4376; GFX12-NEXT: global_store_b128 v24, v[8:11], s[36:37] offset:48 4377; GFX12-NEXT: global_store_b128 v24, v[12:15], s[36:37] offset:32 4378; GFX12-NEXT: global_store_b128 v24, v[16:19], s[36:37] offset:16 4379; GFX12-NEXT: global_store_b128 v24, v[20:23], s[36:37] 4380; GFX12-NEXT: s_endpgm 4381 %load = load <64 x i16>, ptr addrspace(4) %in 4382 %ext = zext <64 x i16> %load to <64 x i32> 4383 store <64 x i32> %ext, ptr addrspace(1) %out 4384 ret void 4385} 4386 4387define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 4388; GCN-NOHSA-SI-LABEL: constant_sextload_v64i16_to_v64i32: 4389; GCN-NOHSA-SI: ; %bb.0: 4390; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[36:39], s[4:5], 0x9 4391; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4392; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[16:31], s[38:39], 0x0 4393; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[38:39], 0x10 4394; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4395; GCN-NOHSA-SI-NEXT: s_ashr_i32 s33, s17, 16 4396; GCN-NOHSA-SI-NEXT: s_ashr_i32 s34, s16, 16 4397; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s17, s17 4398; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s16, s16 4399; GCN-NOHSA-SI-NEXT: s_ashr_i32 s35, s19, 16 4400; GCN-NOHSA-SI-NEXT: s_ashr_i32 s38, s18, 16 4401; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s19, s19 4402; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s18, s18 4403; GCN-NOHSA-SI-NEXT: s_ashr_i32 s39, s21, 16 4404; GCN-NOHSA-SI-NEXT: s_ashr_i32 s40, s20, 16 4405; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s21, s21 4406; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s20, s20 4407; GCN-NOHSA-SI-NEXT: s_ashr_i32 s41, s23, 16 4408; GCN-NOHSA-SI-NEXT: s_ashr_i32 s42, s22, 16 4409; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s23, s23 4410; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s22, s22 4411; GCN-NOHSA-SI-NEXT: s_ashr_i32 s43, s25, 16 4412; GCN-NOHSA-SI-NEXT: s_ashr_i32 s44, s24, 16 4413; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s25, s25 4414; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s24, s24 4415; GCN-NOHSA-SI-NEXT: s_ashr_i32 s45, s27, 16 4416; GCN-NOHSA-SI-NEXT: s_ashr_i32 s46, s26, 16 4417; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s27, s27 4418; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s26, s26 4419; GCN-NOHSA-SI-NEXT: s_ashr_i32 s47, s29, 16 4420; GCN-NOHSA-SI-NEXT: s_ashr_i32 s48, s28, 16 4421; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s29, s29 4422; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s28, s28 4423; GCN-NOHSA-SI-NEXT: s_ashr_i32 s49, s31, 16 4424; GCN-NOHSA-SI-NEXT: s_ashr_i32 s50, s30, 16 4425; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s31, s31 4426; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s30, s30 4427; GCN-NOHSA-SI-NEXT: s_ashr_i32 s51, s1, 16 4428; GCN-NOHSA-SI-NEXT: s_ashr_i32 s52, s0, 16 4429; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s53, s1 4430; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s54, s0 4431; GCN-NOHSA-SI-NEXT: s_ashr_i32 s55, s3, 16 4432; GCN-NOHSA-SI-NEXT: s_ashr_i32 s56, s2, 16 4433; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s57, s3 4434; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s58, s2 4435; GCN-NOHSA-SI-NEXT: s_ashr_i32 s59, s5, 16 4436; GCN-NOHSA-SI-NEXT: s_ashr_i32 s60, s4, 16 4437; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 4438; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 4439; GCN-NOHSA-SI-NEXT: s_ashr_i32 s61, s6, 16 4440; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s62, s7 4441; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s6, s6 4442; GCN-NOHSA-SI-NEXT: s_ashr_i32 s63, s9, 16 4443; GCN-NOHSA-SI-NEXT: s_ashr_i32 s64, s8, 16 4444; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s9, s9 4445; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s8, s8 4446; GCN-NOHSA-SI-NEXT: s_ashr_i32 s65, s11, 16 4447; GCN-NOHSA-SI-NEXT: s_ashr_i32 s66, s10, 16 4448; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s11, s11 4449; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s10, s10 4450; GCN-NOHSA-SI-NEXT: s_ashr_i32 s67, s13, 16 4451; GCN-NOHSA-SI-NEXT: s_ashr_i32 s68, s12, 16 4452; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s13, s13 4453; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s12, s12 4454; GCN-NOHSA-SI-NEXT: s_ashr_i32 s69, s15, 16 4455; GCN-NOHSA-SI-NEXT: s_ashr_i32 s70, s14, 16 4456; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s15, s15 4457; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s14, s14 4458; GCN-NOHSA-SI-NEXT: s_ashr_i32 s7, s7, 16 4459; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s36 4460; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s37 4461; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 4462; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 4463; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14 4464; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s70 4465; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s15 4466; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s69 4467; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s12 4468; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s68 4469; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s13 4470; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s67 4471; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s10 4472; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s66 4473; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s11 4474; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s65 4475; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s8 4476; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s64 4477; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s9 4478; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s63 4479; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s6 4480; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s61 4481; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s62 4482; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 4483; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 4484; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 4485; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s7 4486; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s60 4487; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 4488; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s59 4489; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224 4490; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208 4491; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192 4492; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176 4493; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 4494; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 4495; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s58 4496; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s56 4497; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s57 4498; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s55 4499; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 4500; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 4501; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s54 4502; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s52 4503; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s53 4504; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s51 4505; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 4506; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 4507; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s30 4508; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s50 4509; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s31 4510; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s49 4511; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 4512; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 4513; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s28 4514; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s48 4515; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s29 4516; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s47 4517; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 4518; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 4519; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s26 4520; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s46 4521; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s27 4522; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s45 4523; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 4524; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 4525; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s24 4526; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s44 4527; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s25 4528; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s43 4529; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 4530; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 4531; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s22 4532; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s42 4533; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s23 4534; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s41 4535; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 4536; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 4537; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s20 4538; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s40 4539; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s21 4540; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s39 4541; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 4542; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 4543; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18 4544; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s38 4545; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s19 4546; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s35 4547; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 4548; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 4549; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s16 4550; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s34 4551; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s17 4552; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s33 4553; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 4554; GCN-NOHSA-SI-NEXT: s_endpgm 4555; 4556; GCN-HSA-LABEL: constant_sextload_v64i16_to_v64i32: 4557; GCN-HSA: ; %bb.0: 4558; GCN-HSA-NEXT: s_load_dwordx4 s[16:19], s[8:9], 0x0 4559; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4560; GCN-HSA-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 4561; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4562; GCN-HSA-NEXT: s_ashr_i32 s20, s1, 16 4563; GCN-HSA-NEXT: s_ashr_i32 s21, s0, 16 4564; GCN-HSA-NEXT: s_sext_i32_i16 s22, s1 4565; GCN-HSA-NEXT: s_sext_i32_i16 s23, s0 4566; GCN-HSA-NEXT: s_ashr_i32 s24, s3, 16 4567; GCN-HSA-NEXT: s_ashr_i32 s25, s2, 16 4568; GCN-HSA-NEXT: s_sext_i32_i16 s26, s3 4569; GCN-HSA-NEXT: s_sext_i32_i16 s27, s2 4570; GCN-HSA-NEXT: s_ashr_i32 s28, s5, 16 4571; GCN-HSA-NEXT: s_ashr_i32 s29, s4, 16 4572; GCN-HSA-NEXT: s_sext_i32_i16 s30, s5 4573; GCN-HSA-NEXT: s_sext_i32_i16 s31, s4 4574; GCN-HSA-NEXT: s_ashr_i32 s33, s7, 16 4575; GCN-HSA-NEXT: s_ashr_i32 s34, s6, 16 4576; GCN-HSA-NEXT: s_sext_i32_i16 s35, s7 4577; GCN-HSA-NEXT: s_sext_i32_i16 s36, s6 4578; GCN-HSA-NEXT: s_ashr_i32 s37, s9, 16 4579; GCN-HSA-NEXT: s_ashr_i32 s38, s8, 16 4580; GCN-HSA-NEXT: s_sext_i32_i16 s39, s9 4581; GCN-HSA-NEXT: s_sext_i32_i16 s40, s8 4582; GCN-HSA-NEXT: s_ashr_i32 s41, s11, 16 4583; GCN-HSA-NEXT: s_ashr_i32 s42, s10, 16 4584; GCN-HSA-NEXT: s_sext_i32_i16 s43, s11 4585; GCN-HSA-NEXT: s_sext_i32_i16 s44, s10 4586; GCN-HSA-NEXT: s_ashr_i32 s45, s13, 16 4587; GCN-HSA-NEXT: s_ashr_i32 s47, s12, 16 4588; GCN-HSA-NEXT: s_sext_i32_i16 s46, s13 4589; GCN-HSA-NEXT: s_sext_i32_i16 s49, s12 4590; GCN-HSA-NEXT: s_ashr_i32 s48, s15, 16 4591; GCN-HSA-NEXT: s_ashr_i32 s50, s14, 16 4592; GCN-HSA-NEXT: s_sext_i32_i16 s51, s15 4593; GCN-HSA-NEXT: s_sext_i32_i16 s52, s14 4594; GCN-HSA-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x10 4595; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4596; GCN-HSA-NEXT: s_ashr_i32 s18, s1, 16 4597; GCN-HSA-NEXT: s_ashr_i32 s19, s0, 16 4598; GCN-HSA-NEXT: s_ashr_i32 s53, s3, 16 4599; GCN-HSA-NEXT: s_ashr_i32 s54, s2, 16 4600; GCN-HSA-NEXT: s_ashr_i32 s57, s5, 16 4601; GCN-HSA-NEXT: s_ashr_i32 s58, s4, 16 4602; GCN-HSA-NEXT: s_ashr_i32 s59, s7, 16 4603; GCN-HSA-NEXT: s_ashr_i32 s60, s6, 16 4604; GCN-HSA-NEXT: s_ashr_i32 s61, s9, 16 4605; GCN-HSA-NEXT: s_ashr_i32 s62, s8, 16 4606; GCN-HSA-NEXT: s_ashr_i32 s63, s11, 16 4607; GCN-HSA-NEXT: s_ashr_i32 s64, s10, 16 4608; GCN-HSA-NEXT: s_ashr_i32 s65, s13, 16 4609; GCN-HSA-NEXT: s_ashr_i32 s66, s12, 16 4610; GCN-HSA-NEXT: s_ashr_i32 s67, s15, 16 4611; GCN-HSA-NEXT: s_ashr_i32 s68, s14, 16 4612; GCN-HSA-NEXT: s_sext_i32_i16 s56, s2 4613; GCN-HSA-NEXT: s_add_u32 s2, s16, 0xf0 4614; GCN-HSA-NEXT: s_sext_i32_i16 s55, s3 4615; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 4616; GCN-HSA-NEXT: v_mov_b32_e32 v20, s3 4617; GCN-HSA-NEXT: v_mov_b32_e32 v19, s2 4618; GCN-HSA-NEXT: s_add_u32 s2, s16, 0xe0 4619; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 4620; GCN-HSA-NEXT: v_mov_b32_e32 v22, s3 4621; GCN-HSA-NEXT: v_mov_b32_e32 v21, s2 4622; GCN-HSA-NEXT: s_add_u32 s2, s16, 0xd0 4623; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 4624; GCN-HSA-NEXT: v_mov_b32_e32 v24, s3 4625; GCN-HSA-NEXT: v_mov_b32_e32 v23, s2 4626; GCN-HSA-NEXT: s_add_u32 s2, s16, 0xc0 4627; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 4628; GCN-HSA-NEXT: v_mov_b32_e32 v26, s3 4629; GCN-HSA-NEXT: v_mov_b32_e32 v25, s2 4630; GCN-HSA-NEXT: s_add_u32 s2, s16, 0xb0 4631; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 4632; GCN-HSA-NEXT: v_mov_b32_e32 v28, s3 4633; GCN-HSA-NEXT: s_sext_i32_i16 s11, s11 4634; GCN-HSA-NEXT: s_sext_i32_i16 s10, s10 4635; GCN-HSA-NEXT: v_mov_b32_e32 v27, s2 4636; GCN-HSA-NEXT: s_add_u32 s2, s16, 0xa0 4637; GCN-HSA-NEXT: v_mov_b32_e32 v8, s10 4638; GCN-HSA-NEXT: v_mov_b32_e32 v9, s64 4639; GCN-HSA-NEXT: v_mov_b32_e32 v10, s11 4640; GCN-HSA-NEXT: v_mov_b32_e32 v11, s63 4641; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 4642; GCN-HSA-NEXT: flat_store_dwordx4 v[23:24], v[8:11] 4643; GCN-HSA-NEXT: s_sext_i32_i16 s9, s9 4644; GCN-HSA-NEXT: v_mov_b32_e32 v10, s3 4645; GCN-HSA-NEXT: s_sext_i32_i16 s8, s8 4646; GCN-HSA-NEXT: v_mov_b32_e32 v9, s2 4647; GCN-HSA-NEXT: s_add_u32 s2, s16, 0x90 4648; GCN-HSA-NEXT: v_mov_b32_e32 v12, s8 4649; GCN-HSA-NEXT: v_mov_b32_e32 v13, s62 4650; GCN-HSA-NEXT: v_mov_b32_e32 v14, s9 4651; GCN-HSA-NEXT: v_mov_b32_e32 v15, s61 4652; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 4653; GCN-HSA-NEXT: flat_store_dwordx4 v[25:26], v[12:15] 4654; GCN-HSA-NEXT: s_sext_i32_i16 s13, s13 4655; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 4656; GCN-HSA-NEXT: s_sext_i32_i16 s12, s12 4657; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 4658; GCN-HSA-NEXT: s_add_u32 s2, s16, 0x80 4659; GCN-HSA-NEXT: s_sext_i32_i16 s0, s0 4660; GCN-HSA-NEXT: s_sext_i32_i16 s15, s15 4661; GCN-HSA-NEXT: s_sext_i32_i16 s14, s14 4662; GCN-HSA-NEXT: v_mov_b32_e32 v4, s12 4663; GCN-HSA-NEXT: v_mov_b32_e32 v5, s66 4664; GCN-HSA-NEXT: v_mov_b32_e32 v6, s13 4665; GCN-HSA-NEXT: v_mov_b32_e32 v7, s65 4666; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 4667; GCN-HSA-NEXT: s_sext_i32_i16 s1, s1 4668; GCN-HSA-NEXT: s_sext_i32_i16 s7, s7 4669; GCN-HSA-NEXT: s_sext_i32_i16 s6, s6 4670; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14 4671; GCN-HSA-NEXT: v_mov_b32_e32 v1, s68 4672; GCN-HSA-NEXT: v_mov_b32_e32 v2, s15 4673; GCN-HSA-NEXT: v_mov_b32_e32 v3, s67 4674; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[4:7] 4675; GCN-HSA-NEXT: v_mov_b32_e32 v16, s6 4676; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 4677; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x70 4678; GCN-HSA-NEXT: v_mov_b32_e32 v17, s60 4679; GCN-HSA-NEXT: v_mov_b32_e32 v18, s7 4680; GCN-HSA-NEXT: flat_store_dwordx4 v[19:20], v[0:3] 4681; GCN-HSA-NEXT: v_mov_b32_e32 v19, s59 4682; GCN-HSA-NEXT: v_mov_b32_e32 v6, s1 4683; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 4684; GCN-HSA-NEXT: flat_store_dwordx4 v[27:28], v[16:19] 4685; GCN-HSA-NEXT: s_sext_i32_i16 s5, s5 4686; GCN-HSA-NEXT: v_mov_b32_e32 v17, s1 4687; GCN-HSA-NEXT: v_mov_b32_e32 v16, s0 4688; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x60 4689; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 4690; GCN-HSA-NEXT: v_mov_b32_e32 v19, s1 4691; GCN-HSA-NEXT: s_sext_i32_i16 s4, s4 4692; GCN-HSA-NEXT: v_mov_b32_e32 v18, s0 4693; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x50 4694; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 4695; GCN-HSA-NEXT: v_mov_b32_e32 v20, s56 4696; GCN-HSA-NEXT: v_mov_b32_e32 v1, s58 4697; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 4698; GCN-HSA-NEXT: v_mov_b32_e32 v3, s57 4699; GCN-HSA-NEXT: v_mov_b32_e32 v21, s54 4700; GCN-HSA-NEXT: v_mov_b32_e32 v22, s55 4701; GCN-HSA-NEXT: v_mov_b32_e32 v23, s53 4702; GCN-HSA-NEXT: v_mov_b32_e32 v5, s19 4703; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3 4704; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 4705; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2 4706; GCN-HSA-NEXT: v_mov_b32_e32 v8, s52 4707; GCN-HSA-NEXT: v_mov_b32_e32 v7, s18 4708; GCN-HSA-NEXT: flat_store_dwordx4 v[9:10], v[0:3] 4709; GCN-HSA-NEXT: v_mov_b32_e32 v9, s50 4710; GCN-HSA-NEXT: v_mov_b32_e32 v0, s49 4711; GCN-HSA-NEXT: v_mov_b32_e32 v10, s51 4712; GCN-HSA-NEXT: v_mov_b32_e32 v11, s48 4713; GCN-HSA-NEXT: v_mov_b32_e32 v1, s47 4714; GCN-HSA-NEXT: v_mov_b32_e32 v2, s46 4715; GCN-HSA-NEXT: v_mov_b32_e32 v3, s45 4716; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[20:23] 4717; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[4:7] 4718; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 4719; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[0:3] 4720; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 4721; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 4722; GCN-HSA-NEXT: s_add_u32 s0, s16, 64 4723; GCN-HSA-NEXT: v_mov_b32_e32 v0, s44 4724; GCN-HSA-NEXT: v_mov_b32_e32 v1, s42 4725; GCN-HSA-NEXT: v_mov_b32_e32 v2, s43 4726; GCN-HSA-NEXT: v_mov_b32_e32 v3, s41 4727; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 4728; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4729; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 4730; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 4731; GCN-HSA-NEXT: s_add_u32 s0, s16, 48 4732; GCN-HSA-NEXT: v_mov_b32_e32 v0, s40 4733; GCN-HSA-NEXT: v_mov_b32_e32 v1, s38 4734; GCN-HSA-NEXT: v_mov_b32_e32 v2, s39 4735; GCN-HSA-NEXT: v_mov_b32_e32 v3, s37 4736; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 4737; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4738; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 4739; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 4740; GCN-HSA-NEXT: s_add_u32 s0, s16, 32 4741; GCN-HSA-NEXT: v_mov_b32_e32 v0, s36 4742; GCN-HSA-NEXT: v_mov_b32_e32 v1, s34 4743; GCN-HSA-NEXT: v_mov_b32_e32 v2, s35 4744; GCN-HSA-NEXT: v_mov_b32_e32 v3, s33 4745; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 4746; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4747; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 4748; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 4749; GCN-HSA-NEXT: s_add_u32 s0, s16, 16 4750; GCN-HSA-NEXT: v_mov_b32_e32 v0, s31 4751; GCN-HSA-NEXT: v_mov_b32_e32 v1, s29 4752; GCN-HSA-NEXT: v_mov_b32_e32 v2, s30 4753; GCN-HSA-NEXT: v_mov_b32_e32 v3, s28 4754; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 4755; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4756; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 4757; GCN-HSA-NEXT: v_mov_b32_e32 v0, s27 4758; GCN-HSA-NEXT: v_mov_b32_e32 v1, s25 4759; GCN-HSA-NEXT: v_mov_b32_e32 v2, s26 4760; GCN-HSA-NEXT: v_mov_b32_e32 v3, s24 4761; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 4762; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4763; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16 4764; GCN-HSA-NEXT: v_mov_b32_e32 v0, s23 4765; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21 4766; GCN-HSA-NEXT: v_mov_b32_e32 v2, s22 4767; GCN-HSA-NEXT: v_mov_b32_e32 v3, s20 4768; GCN-HSA-NEXT: v_mov_b32_e32 v5, s17 4769; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4770; GCN-HSA-NEXT: s_endpgm 4771; 4772; GCN-NOHSA-VI-LABEL: constant_sextload_v64i16_to_v64i32: 4773; GCN-NOHSA-VI: ; %bb.0: 4774; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[36:39], s[4:5], 0x24 4775; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4776; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[16:31], s[38:39], 0x0 4777; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[0:15], s[38:39], 0x40 4778; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4779; GCN-NOHSA-VI-NEXT: s_ashr_i32 s33, s17, 16 4780; GCN-NOHSA-VI-NEXT: s_ashr_i32 s34, s16, 16 4781; GCN-NOHSA-VI-NEXT: s_ashr_i32 s35, s19, 16 4782; GCN-NOHSA-VI-NEXT: s_ashr_i32 s38, s18, 16 4783; GCN-NOHSA-VI-NEXT: s_ashr_i32 s39, s21, 16 4784; GCN-NOHSA-VI-NEXT: s_ashr_i32 s40, s20, 16 4785; GCN-NOHSA-VI-NEXT: s_ashr_i32 s41, s23, 16 4786; GCN-NOHSA-VI-NEXT: s_ashr_i32 s42, s22, 16 4787; GCN-NOHSA-VI-NEXT: s_ashr_i32 s43, s25, 16 4788; GCN-NOHSA-VI-NEXT: s_ashr_i32 s44, s24, 16 4789; GCN-NOHSA-VI-NEXT: s_ashr_i32 s45, s27, 16 4790; GCN-NOHSA-VI-NEXT: s_ashr_i32 s46, s26, 16 4791; GCN-NOHSA-VI-NEXT: s_ashr_i32 s47, s29, 16 4792; GCN-NOHSA-VI-NEXT: s_ashr_i32 s48, s28, 16 4793; GCN-NOHSA-VI-NEXT: s_ashr_i32 s49, s31, 16 4794; GCN-NOHSA-VI-NEXT: s_ashr_i32 s50, s30, 16 4795; GCN-NOHSA-VI-NEXT: s_ashr_i32 s51, s1, 16 4796; GCN-NOHSA-VI-NEXT: s_ashr_i32 s52, s0, 16 4797; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s53, s1 4798; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s54, s0 4799; GCN-NOHSA-VI-NEXT: s_ashr_i32 s55, s3, 16 4800; GCN-NOHSA-VI-NEXT: s_ashr_i32 s56, s2, 16 4801; GCN-NOHSA-VI-NEXT: s_ashr_i32 s57, s5, 16 4802; GCN-NOHSA-VI-NEXT: s_ashr_i32 s58, s4, 16 4803; GCN-NOHSA-VI-NEXT: s_ashr_i32 s59, s7, 16 4804; GCN-NOHSA-VI-NEXT: s_ashr_i32 s60, s6, 16 4805; GCN-NOHSA-VI-NEXT: s_ashr_i32 s61, s9, 16 4806; GCN-NOHSA-VI-NEXT: s_ashr_i32 s62, s8, 16 4807; GCN-NOHSA-VI-NEXT: s_ashr_i32 s63, s11, 16 4808; GCN-NOHSA-VI-NEXT: s_ashr_i32 s64, s10, 16 4809; GCN-NOHSA-VI-NEXT: s_ashr_i32 s65, s13, 16 4810; GCN-NOHSA-VI-NEXT: s_ashr_i32 s66, s12, 16 4811; GCN-NOHSA-VI-NEXT: s_ashr_i32 s0, s15, 16 4812; GCN-NOHSA-VI-NEXT: s_ashr_i32 s1, s14, 16 4813; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s0 4814; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0xf0 4815; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 4816; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4817; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4818; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s15, s15 4819; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s14, s14 4820; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4821; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0xe0 4822; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 4823; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s15 4824; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4825; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4826; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4827; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s13, s13 4828; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s12, s12 4829; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4830; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0xd0 4831; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 4832; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s66 4833; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s13 4834; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s65 4835; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4836; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4837; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4838; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s11, s11 4839; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s10, s10 4840; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4841; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0xc0 4842; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 4843; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s64 4844; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 4845; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s63 4846; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4847; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4848; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4849; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s9, s9 4850; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s8, s8 4851; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4852; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0xb0 4853; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 4854; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s62 4855; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 4856; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s61 4857; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4858; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4859; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4860; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s7, s7 4861; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s6, s6 4862; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4863; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0xa0 4864; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 4865; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s60 4866; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 4867; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s59 4868; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4869; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4870; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4871; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 4872; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 4873; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4874; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0x90 4875; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 4876; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s58 4877; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 4878; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s57 4879; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4880; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4881; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4882; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s3, s3 4883; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s2, s2 4884; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4885; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0x80 4886; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 4887; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s56 4888; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s3 4889; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s55 4890; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4891; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4892; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4893; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4894; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0x70 4895; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s54 4896; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s52 4897; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s53 4898; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s51 4899; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4900; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4901; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4902; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s31, s31 4903; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s30, s30 4904; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4905; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0x60 4906; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s30 4907; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s50 4908; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s31 4909; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s49 4910; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4911; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4912; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4913; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s29, s29 4914; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s28, s28 4915; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4916; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 0x50 4917; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28 4918; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s48 4919; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s29 4920; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s47 4921; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4922; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4923; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4924; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s27, s27 4925; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s26, s26 4926; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4927; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 64 4928; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s26 4929; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s46 4930; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s27 4931; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s45 4932; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4933; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4934; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4935; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s25, s25 4936; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s24, s24 4937; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4938; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 48 4939; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s24 4940; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s44 4941; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s25 4942; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s43 4943; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4944; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4945; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4946; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s23, s23 4947; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s22, s22 4948; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4949; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 32 4950; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s22 4951; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s42 4952; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s23 4953; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s41 4954; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4955; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4956; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4957; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s21, s21 4958; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s20, s20 4959; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4960; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s36, 16 4961; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s20 4962; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s40 4963; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s21 4964; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s39 4965; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s37, 0 4966; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s19, s19 4967; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s18, s18 4968; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4969; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 4970; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18 4971; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s38 4972; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s19 4973; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s35 4974; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 4975; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s17, s17 4976; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s16, s16 4977; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4978; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s36 4979; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s16 4980; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s34 4981; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s17 4982; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s33 4983; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s37 4984; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4985; GCN-NOHSA-VI-NEXT: s_endpgm 4986; 4987; EG-LABEL: constant_sextload_v64i16_to_v64i32: 4988; EG: ; %bb.0: 4989; EG-NEXT: ALU 17, @38, KC0[CB0:0-32], KC1[] 4990; EG-NEXT: TEX 7 @22 4991; EG-NEXT: ALU 75, @56, KC0[CB0:0-32], KC1[] 4992; EG-NEXT: ALU 71, @132, KC0[CB0:0-32], KC1[] 4993; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T66.XYZW, T48.X, 0 4994; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T41.X, 0 4995; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T56.X, 0 4996; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T55.X, 0 4997; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T64.XYZW, T54.X, 0 4998; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T53.X, 0 4999; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T52.X, 0 5000; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T51.X, 0 5001; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T50.X, 0 5002; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T49.X, 0 5003; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T61.XYZW, T40.X, 0 5004; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T39.X, 0 5005; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T38.X, 0 5006; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T37.X, 0 5007; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T58.XYZW, T36.X, 0 5008; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T35.X, 1 5009; EG-NEXT: CF_END 5010; EG-NEXT: PAD 5011; EG-NEXT: Fetch clause starting at 22: 5012; EG-NEXT: VTX_READ_128 T42.XYZW, T41.X, 16, #1 5013; EG-NEXT: VTX_READ_128 T43.XYZW, T41.X, 32, #1 5014; EG-NEXT: VTX_READ_128 T44.XYZW, T41.X, 0, #1 5015; EG-NEXT: VTX_READ_128 T45.XYZW, T41.X, 48, #1 5016; EG-NEXT: VTX_READ_128 T46.XYZW, T41.X, 64, #1 5017; EG-NEXT: VTX_READ_128 T47.XYZW, T41.X, 80, #1 5018; EG-NEXT: VTX_READ_128 T48.XYZW, T41.X, 96, #1 5019; EG-NEXT: VTX_READ_128 T41.XYZW, T41.X, 112, #1 5020; EG-NEXT: ALU clause starting at 38: 5021; EG-NEXT: LSHR T35.X, KC0[2].Y, literal.x, 5022; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5023; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5024; EG-NEXT: LSHR T36.X, PV.W, literal.x, 5025; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5026; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 5027; EG-NEXT: LSHR T37.X, PV.W, literal.x, 5028; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5029; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 5030; EG-NEXT: LSHR T38.X, PV.W, literal.x, 5031; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5032; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 5033; EG-NEXT: LSHR T39.X, PV.W, literal.x, 5034; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5035; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 5036; EG-NEXT: LSHR T40.X, PV.W, literal.x, 5037; EG-NEXT: MOV * T41.X, KC0[2].Z, 5038; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5039; EG-NEXT: ALU clause starting at 56: 5040; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 5041; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) 5042; EG-NEXT: LSHR T49.X, PV.W, literal.x, 5043; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5044; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 5045; EG-NEXT: LSHR T50.X, PV.W, literal.x, 5046; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5047; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 5048; EG-NEXT: LSHR T51.X, PV.W, literal.x, 5049; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5050; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 5051; EG-NEXT: LSHR T52.X, PV.W, literal.x, 5052; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5053; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 5054; EG-NEXT: LSHR T53.X, PV.W, literal.x, 5055; EG-NEXT: LSHR T0.Y, T41.W, literal.y, 5056; EG-NEXT: LSHR T0.Z, T41.Y, literal.y, 5057; EG-NEXT: LSHR T0.W, T48.W, literal.y, BS:VEC_120/SCL_212 5058; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5059; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5060; EG-NEXT: 176(2.466285e-43), 0(0.000000e+00) 5061; EG-NEXT: LSHR T54.X, PS, literal.x, 5062; EG-NEXT: LSHR T1.Y, T48.Y, literal.y, 5063; EG-NEXT: LSHR T1.Z, T47.W, literal.y, 5064; EG-NEXT: LSHR T1.W, T47.Y, literal.y, BS:VEC_120/SCL_212 5065; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.z, 5066; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5067; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00) 5068; EG-NEXT: LSHR T55.X, PS, literal.x, 5069; EG-NEXT: LSHR T2.Y, T46.W, literal.y, 5070; EG-NEXT: LSHR T2.Z, T46.Y, literal.y, 5071; EG-NEXT: LSHR T2.W, T45.W, literal.y, BS:VEC_120/SCL_212 5072; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.z, 5073; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5074; EG-NEXT: 208(2.914701e-43), 0(0.000000e+00) 5075; EG-NEXT: LSHR T56.X, PS, literal.x, 5076; EG-NEXT: LSHR T3.Y, T45.Y, literal.y, 5077; EG-NEXT: BFE_INT T57.Z, T44.Y, 0.0, literal.y, BS:VEC_120/SCL_212 5078; EG-NEXT: LSHR T3.W, T43.W, literal.y, 5079; EG-NEXT: LSHR * T4.W, T43.Y, literal.y, 5080; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5081; EG-NEXT: BFE_INT T57.X, T44.X, 0.0, literal.x, 5082; EG-NEXT: LSHR T4.Y, T42.W, literal.x, 5083; EG-NEXT: BFE_INT T58.Z, T44.W, 0.0, literal.x, BS:VEC_120/SCL_212 5084; EG-NEXT: LSHR T5.W, T42.Y, literal.x, 5085; EG-NEXT: LSHR * T6.W, T44.Y, literal.x, 5086; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5087; EG-NEXT: BFE_INT T58.X, T44.Z, 0.0, literal.x, 5088; EG-NEXT: LSHR T5.Y, T44.W, literal.x, 5089; EG-NEXT: BFE_INT T59.Z, T42.Y, 0.0, literal.x, 5090; EG-NEXT: BFE_INT T57.W, PS, 0.0, literal.x, 5091; EG-NEXT: LSHR * T6.W, T44.X, literal.x, 5092; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5093; EG-NEXT: BFE_INT T59.X, T42.X, 0.0, literal.x, 5094; EG-NEXT: BFE_INT T57.Y, PS, 0.0, literal.x, 5095; EG-NEXT: BFE_INT T60.Z, T42.W, 0.0, literal.x, 5096; EG-NEXT: BFE_INT T58.W, PV.Y, 0.0, literal.x, 5097; EG-NEXT: LSHR * T6.W, T44.Z, literal.x, 5098; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5099; EG-NEXT: BFE_INT T60.X, T42.Z, 0.0, literal.x, 5100; EG-NEXT: BFE_INT T58.Y, PS, 0.0, literal.x, 5101; EG-NEXT: BFE_INT T44.Z, T43.Y, 0.0, literal.x, 5102; EG-NEXT: BFE_INT T59.W, T5.W, 0.0, literal.x, 5103; EG-NEXT: LSHR * T5.W, T42.X, literal.x, 5104; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5105; EG-NEXT: BFE_INT T44.X, T43.X, 0.0, literal.x, 5106; EG-NEXT: BFE_INT T59.Y, PS, 0.0, literal.x, 5107; EG-NEXT: BFE_INT T61.Z, T43.W, 0.0, literal.x, 5108; EG-NEXT: BFE_INT T60.W, T4.Y, 0.0, literal.x, 5109; EG-NEXT: LSHR * T5.W, T42.Z, literal.x, 5110; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5111; EG-NEXT: BFE_INT T61.X, T43.Z, 0.0, literal.x, 5112; EG-NEXT: BFE_INT T60.Y, PS, 0.0, literal.x, 5113; EG-NEXT: BFE_INT T42.Z, T45.Y, 0.0, literal.x, 5114; EG-NEXT: BFE_INT * T44.W, T4.W, 0.0, literal.x, 5115; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5116; EG-NEXT: ALU clause starting at 132: 5117; EG-NEXT: LSHR * T4.W, T43.X, literal.x, 5118; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5119; EG-NEXT: BFE_INT T42.X, T45.X, 0.0, literal.x, 5120; EG-NEXT: BFE_INT T44.Y, PV.W, 0.0, literal.x, 5121; EG-NEXT: BFE_INT T62.Z, T45.W, 0.0, literal.x, 5122; EG-NEXT: BFE_INT T61.W, T3.W, 0.0, literal.x, BS:VEC_120/SCL_212 5123; EG-NEXT: LSHR * T3.W, T43.Z, literal.x, 5124; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5125; EG-NEXT: BFE_INT T62.X, T45.Z, 0.0, literal.x, 5126; EG-NEXT: BFE_INT T61.Y, PS, 0.0, literal.x, 5127; EG-NEXT: BFE_INT T43.Z, T46.Y, 0.0, literal.x, 5128; EG-NEXT: BFE_INT T42.W, T3.Y, 0.0, literal.x, BS:VEC_120/SCL_212 5129; EG-NEXT: LSHR * T3.W, T45.X, literal.x, 5130; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5131; EG-NEXT: BFE_INT T43.X, T46.X, 0.0, literal.x, 5132; EG-NEXT: BFE_INT T42.Y, PS, 0.0, literal.x, 5133; EG-NEXT: BFE_INT T63.Z, T46.W, 0.0, literal.x, 5134; EG-NEXT: BFE_INT T62.W, T2.W, 0.0, literal.x, BS:VEC_120/SCL_212 5135; EG-NEXT: LSHR * T2.W, T45.Z, literal.x, 5136; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5137; EG-NEXT: BFE_INT T63.X, T46.Z, 0.0, literal.x, 5138; EG-NEXT: BFE_INT T62.Y, PS, 0.0, literal.x, 5139; EG-NEXT: BFE_INT T45.Z, T47.Y, 0.0, literal.x, 5140; EG-NEXT: BFE_INT T43.W, T2.Z, 0.0, literal.x, BS:VEC_120/SCL_212 5141; EG-NEXT: LSHR * T2.W, T46.X, literal.x, 5142; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5143; EG-NEXT: BFE_INT T45.X, T47.X, 0.0, literal.x, 5144; EG-NEXT: BFE_INT T43.Y, PS, 0.0, literal.x, 5145; EG-NEXT: BFE_INT T64.Z, T47.W, 0.0, literal.x, 5146; EG-NEXT: BFE_INT T63.W, T2.Y, 0.0, literal.x, 5147; EG-NEXT: LSHR * T2.W, T46.Z, literal.x, 5148; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5149; EG-NEXT: BFE_INT T64.X, T47.Z, 0.0, literal.x, 5150; EG-NEXT: BFE_INT T63.Y, PS, 0.0, literal.x, 5151; EG-NEXT: BFE_INT T46.Z, T48.Y, 0.0, literal.x, 5152; EG-NEXT: BFE_INT T45.W, T1.W, 0.0, literal.x, 5153; EG-NEXT: LSHR * T1.W, T47.X, literal.x, 5154; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5155; EG-NEXT: BFE_INT T46.X, T48.X, 0.0, literal.x, 5156; EG-NEXT: BFE_INT T45.Y, PS, 0.0, literal.x, 5157; EG-NEXT: BFE_INT T65.Z, T48.W, 0.0, literal.x, 5158; EG-NEXT: BFE_INT T64.W, T1.Z, 0.0, literal.x, 5159; EG-NEXT: LSHR * T1.W, T47.Z, literal.x, 5160; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5161; EG-NEXT: BFE_INT T65.X, T48.Z, 0.0, literal.x, 5162; EG-NEXT: BFE_INT T64.Y, PS, 0.0, literal.x, 5163; EG-NEXT: BFE_INT T47.Z, T41.Y, 0.0, literal.x, 5164; EG-NEXT: BFE_INT T46.W, T1.Y, 0.0, literal.x, BS:VEC_120/SCL_212 5165; EG-NEXT: LSHR * T1.W, T48.X, literal.x, 5166; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5167; EG-NEXT: BFE_INT T47.X, T41.X, 0.0, literal.x, 5168; EG-NEXT: BFE_INT T46.Y, PS, 0.0, literal.x, 5169; EG-NEXT: BFE_INT T66.Z, T41.W, 0.0, literal.x, 5170; EG-NEXT: BFE_INT T65.W, T0.W, 0.0, literal.x, BS:VEC_120/SCL_212 5171; EG-NEXT: LSHR * T0.W, T48.Z, literal.x, 5172; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5173; EG-NEXT: BFE_INT T66.X, T41.Z, 0.0, literal.x, 5174; EG-NEXT: BFE_INT T65.Y, PS, 0.0, literal.x, 5175; EG-NEXT: LSHR T1.Z, T41.X, literal.x, 5176; EG-NEXT: BFE_INT T47.W, T0.Z, 0.0, literal.x, BS:VEC_120/SCL_212 5177; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5178; EG-NEXT: 16(2.242078e-44), 224(3.138909e-43) 5179; EG-NEXT: LSHR T41.X, PS, literal.x, 5180; EG-NEXT: BFE_INT T47.Y, PV.Z, 0.0, literal.y, 5181; EG-NEXT: LSHR T0.Z, T41.Z, literal.y, 5182; EG-NEXT: BFE_INT T66.W, T0.Y, 0.0, literal.y, 5183; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 5184; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5185; EG-NEXT: 240(3.363116e-43), 0(0.000000e+00) 5186; EG-NEXT: LSHR T48.X, PS, literal.x, 5187; EG-NEXT: BFE_INT * T66.Y, PV.Z, 0.0, literal.y, 5188; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5189; 5190; GFX12-LABEL: constant_sextload_v64i16_to_v64i32: 5191; GFX12: ; %bb.0: 5192; GFX12-NEXT: s_load_b128 s[36:39], s[4:5], 0x24 5193; GFX12-NEXT: s_wait_kmcnt 0x0 5194; GFX12-NEXT: s_clause 0x1 5195; GFX12-NEXT: s_load_b512 s[0:15], s[38:39], 0x40 5196; GFX12-NEXT: s_load_b512 s[16:31], s[38:39], 0x0 5197; GFX12-NEXT: s_wait_kmcnt 0x0 5198; GFX12-NEXT: s_ashr_i32 s65, s15, 16 5199; GFX12-NEXT: s_ashr_i32 s66, s14, 16 5200; GFX12-NEXT: s_sext_i32_i16 s14, s14 5201; GFX12-NEXT: s_sext_i32_i16 s15, s15 5202; GFX12-NEXT: s_ashr_i32 s63, s13, 16 5203; GFX12-NEXT: s_ashr_i32 s64, s12, 16 5204; GFX12-NEXT: s_sext_i32_i16 s13, s13 5205; GFX12-NEXT: s_sext_i32_i16 s12, s12 5206; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s66 5207; GFX12-NEXT: s_ashr_i32 s61, s11, 16 5208; GFX12-NEXT: s_ashr_i32 s62, s10, 16 5209; GFX12-NEXT: s_sext_i32_i16 s11, s11 5210; GFX12-NEXT: s_sext_i32_i16 s10, s10 5211; GFX12-NEXT: v_dual_mov_b32 v0, s14 :: v_dual_mov_b32 v3, s65 5212; GFX12-NEXT: v_dual_mov_b32 v2, s15 :: v_dual_mov_b32 v5, s64 5213; GFX12-NEXT: s_ashr_i32 s59, s9, 16 5214; GFX12-NEXT: s_ashr_i32 s60, s8, 16 5215; GFX12-NEXT: s_sext_i32_i16 s9, s9 5216; GFX12-NEXT: s_sext_i32_i16 s8, s8 5217; GFX12-NEXT: v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v7, s63 5218; GFX12-NEXT: v_dual_mov_b32 v6, s13 :: v_dual_mov_b32 v9, s62 5219; GFX12-NEXT: v_dual_mov_b32 v8, s10 :: v_dual_mov_b32 v11, s61 5220; GFX12-NEXT: v_dual_mov_b32 v10, s11 :: v_dual_mov_b32 v13, s60 5221; GFX12-NEXT: s_ashr_i32 s57, s7, 16 5222; GFX12-NEXT: s_ashr_i32 s58, s6, 16 5223; GFX12-NEXT: s_sext_i32_i16 s7, s7 5224; GFX12-NEXT: v_dual_mov_b32 v12, s8 :: v_dual_mov_b32 v15, s59 5225; GFX12-NEXT: v_mov_b32_e32 v14, s9 5226; GFX12-NEXT: s_sext_i32_i16 s6, s6 5227; GFX12-NEXT: s_ashr_i32 s55, s5, 16 5228; GFX12-NEXT: s_ashr_i32 s56, s4, 16 5229; GFX12-NEXT: s_sext_i32_i16 s5, s5 5230; GFX12-NEXT: s_sext_i32_i16 s4, s4 5231; GFX12-NEXT: s_ashr_i32 s53, s3, 16 5232; GFX12-NEXT: s_ashr_i32 s54, s2, 16 5233; GFX12-NEXT: s_sext_i32_i16 s3, s3 5234; GFX12-NEXT: s_sext_i32_i16 s2, s2 5235; GFX12-NEXT: s_clause 0x3 5236; GFX12-NEXT: global_store_b128 v24, v[0:3], s[36:37] offset:240 5237; GFX12-NEXT: global_store_b128 v24, v[4:7], s[36:37] offset:224 5238; GFX12-NEXT: global_store_b128 v24, v[8:11], s[36:37] offset:208 5239; GFX12-NEXT: global_store_b128 v24, v[12:15], s[36:37] offset:192 5240; GFX12-NEXT: v_dual_mov_b32 v1, s58 :: v_dual_mov_b32 v0, s6 5241; GFX12-NEXT: v_dual_mov_b32 v3, s57 :: v_dual_mov_b32 v2, s7 5242; GFX12-NEXT: v_mov_b32_e32 v5, s56 5243; GFX12-NEXT: s_ashr_i32 s51, s1, 16 5244; GFX12-NEXT: s_ashr_i32 s52, s0, 16 5245; GFX12-NEXT: s_sext_i32_i16 s1, s1 5246; GFX12-NEXT: s_sext_i32_i16 s0, s0 5247; GFX12-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v7, s55 5248; GFX12-NEXT: v_dual_mov_b32 v6, s5 :: v_dual_mov_b32 v9, s54 5249; GFX12-NEXT: s_ashr_i32 s49, s31, 16 5250; GFX12-NEXT: s_ashr_i32 s50, s30, 16 5251; GFX12-NEXT: s_sext_i32_i16 s31, s31 5252; GFX12-NEXT: s_sext_i32_i16 s30, s30 5253; GFX12-NEXT: v_dual_mov_b32 v8, s2 :: v_dual_mov_b32 v11, s53 5254; GFX12-NEXT: v_dual_mov_b32 v10, s3 :: v_dual_mov_b32 v13, s52 5255; GFX12-NEXT: s_ashr_i32 s45, s27, 16 5256; GFX12-NEXT: s_ashr_i32 s46, s26, 16 5257; GFX12-NEXT: s_sext_i32_i16 s27, s27 5258; GFX12-NEXT: s_sext_i32_i16 s26, s26 5259; GFX12-NEXT: s_ashr_i32 s47, s29, 16 5260; GFX12-NEXT: s_ashr_i32 s48, s28, 16 5261; GFX12-NEXT: s_sext_i32_i16 s29, s29 5262; GFX12-NEXT: s_sext_i32_i16 s28, s28 5263; GFX12-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v15, s51 5264; GFX12-NEXT: v_dual_mov_b32 v14, s1 :: v_dual_mov_b32 v17, s50 5265; GFX12-NEXT: s_ashr_i32 s43, s25, 16 5266; GFX12-NEXT: s_ashr_i32 s44, s24, 16 5267; GFX12-NEXT: s_sext_i32_i16 s25, s25 5268; GFX12-NEXT: s_sext_i32_i16 s24, s24 5269; GFX12-NEXT: v_dual_mov_b32 v16, s30 :: v_dual_mov_b32 v19, s49 5270; GFX12-NEXT: v_dual_mov_b32 v18, s31 :: v_dual_mov_b32 v21, s48 5271; GFX12-NEXT: s_ashr_i32 s41, s23, 16 5272; GFX12-NEXT: s_ashr_i32 s42, s22, 16 5273; GFX12-NEXT: s_sext_i32_i16 s23, s23 5274; GFX12-NEXT: s_sext_i32_i16 s22, s22 5275; GFX12-NEXT: v_dual_mov_b32 v20, s28 :: v_dual_mov_b32 v23, s47 5276; GFX12-NEXT: v_mov_b32_e32 v22, s29 5277; GFX12-NEXT: s_clause 0x5 5278; GFX12-NEXT: global_store_b128 v24, v[0:3], s[36:37] offset:176 5279; GFX12-NEXT: global_store_b128 v24, v[4:7], s[36:37] offset:160 5280; GFX12-NEXT: global_store_b128 v24, v[8:11], s[36:37] offset:144 5281; GFX12-NEXT: global_store_b128 v24, v[12:15], s[36:37] offset:128 5282; GFX12-NEXT: global_store_b128 v24, v[16:19], s[36:37] offset:112 5283; GFX12-NEXT: global_store_b128 v24, v[20:23], s[36:37] offset:96 5284; GFX12-NEXT: v_dual_mov_b32 v1, s46 :: v_dual_mov_b32 v0, s26 5285; GFX12-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v2, s27 5286; GFX12-NEXT: v_mov_b32_e32 v5, s44 5287; GFX12-NEXT: s_ashr_i32 s39, s21, 16 5288; GFX12-NEXT: s_ashr_i32 s40, s20, 16 5289; GFX12-NEXT: s_sext_i32_i16 s21, s21 5290; GFX12-NEXT: s_sext_i32_i16 s20, s20 5291; GFX12-NEXT: v_dual_mov_b32 v4, s24 :: v_dual_mov_b32 v7, s43 5292; GFX12-NEXT: v_dual_mov_b32 v6, s25 :: v_dual_mov_b32 v9, s42 5293; GFX12-NEXT: s_ashr_i32 s35, s19, 16 5294; GFX12-NEXT: s_ashr_i32 s38, s18, 16 5295; GFX12-NEXT: s_sext_i32_i16 s19, s19 5296; GFX12-NEXT: s_sext_i32_i16 s18, s18 5297; GFX12-NEXT: v_dual_mov_b32 v8, s22 :: v_dual_mov_b32 v11, s41 5298; GFX12-NEXT: v_dual_mov_b32 v10, s23 :: v_dual_mov_b32 v13, s40 5299; GFX12-NEXT: s_ashr_i32 s33, s17, 16 5300; GFX12-NEXT: s_ashr_i32 s34, s16, 16 5301; GFX12-NEXT: s_sext_i32_i16 s17, s17 5302; GFX12-NEXT: s_sext_i32_i16 s16, s16 5303; GFX12-NEXT: v_dual_mov_b32 v12, s20 :: v_dual_mov_b32 v15, s39 5304; GFX12-NEXT: v_dual_mov_b32 v14, s21 :: v_dual_mov_b32 v17, s38 5305; GFX12-NEXT: v_dual_mov_b32 v16, s18 :: v_dual_mov_b32 v19, s35 5306; GFX12-NEXT: v_dual_mov_b32 v18, s19 :: v_dual_mov_b32 v21, s34 5307; GFX12-NEXT: v_dual_mov_b32 v20, s16 :: v_dual_mov_b32 v23, s33 5308; GFX12-NEXT: v_mov_b32_e32 v22, s17 5309; GFX12-NEXT: s_clause 0x5 5310; GFX12-NEXT: global_store_b128 v24, v[0:3], s[36:37] offset:80 5311; GFX12-NEXT: global_store_b128 v24, v[4:7], s[36:37] offset:64 5312; GFX12-NEXT: global_store_b128 v24, v[8:11], s[36:37] offset:48 5313; GFX12-NEXT: global_store_b128 v24, v[12:15], s[36:37] offset:32 5314; GFX12-NEXT: global_store_b128 v24, v[16:19], s[36:37] offset:16 5315; GFX12-NEXT: global_store_b128 v24, v[20:23], s[36:37] 5316; GFX12-NEXT: s_endpgm 5317 %load = load <64 x i16>, ptr addrspace(4) %in 5318 %ext = sext <64 x i16> %load to <64 x i32> 5319 store <64 x i32> %ext, ptr addrspace(1) %out 5320 ret void 5321} 5322 5323define amdgpu_kernel void @constant_zextload_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 5324; GCN-NOHSA-SI-LABEL: constant_zextload_i16_to_i64: 5325; GCN-NOHSA-SI: ; %bb.0: 5326; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5327; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5328; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5329; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5330; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5331; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5332; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5333; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5334; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 5335; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5336; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5337; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5338; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5339; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5340; GCN-NOHSA-SI-NEXT: s_endpgm 5341; 5342; GCN-HSA-LABEL: constant_zextload_i16_to_i64: 5343; GCN-HSA: ; %bb.0: 5344; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5345; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5346; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5347; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5348; GCN-HSA-NEXT: flat_load_ushort v0, v[0:1] 5349; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 5350; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 5351; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 5352; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5353; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 5354; GCN-HSA-NEXT: s_endpgm 5355; 5356; GCN-NOHSA-VI-LABEL: constant_zextload_i16_to_i64: 5357; GCN-NOHSA-VI: ; %bb.0: 5358; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5359; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, 0 5360; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5361; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 5362; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3 5363; GCN-NOHSA-VI-NEXT: flat_load_ushort v2, v[0:1] 5364; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 5365; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 5366; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5367; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, 0xffff, v2 5368; GCN-NOHSA-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 5369; GCN-NOHSA-VI-NEXT: s_endpgm 5370; 5371; EG-LABEL: constant_zextload_i16_to_i64: 5372; EG: ; %bb.0: 5373; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5374; EG-NEXT: TEX 0 @6 5375; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 5376; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 5377; EG-NEXT: CF_END 5378; EG-NEXT: PAD 5379; EG-NEXT: Fetch clause starting at 6: 5380; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5381; EG-NEXT: ALU clause starting at 8: 5382; EG-NEXT: MOV * T0.X, KC0[2].Z, 5383; EG-NEXT: ALU clause starting at 9: 5384; EG-NEXT: MOV * T0.Y, 0.0, 5385; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 5386; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5387; 5388; GFX12-LABEL: constant_zextload_i16_to_i64: 5389; GFX12: ; %bb.0: 5390; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 5391; GFX12-NEXT: v_mov_b32_e32 v1, 0 5392; GFX12-NEXT: s_wait_kmcnt 0x0 5393; GFX12-NEXT: global_load_u16 v0, v1, s[2:3] 5394; GFX12-NEXT: s_wait_loadcnt 0x0 5395; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0 5396; GFX12-NEXT: global_store_b64 v1, v[0:1], s[0:1] 5397; GFX12-NEXT: s_endpgm 5398 %a = load i16, ptr addrspace(4) %in 5399 %ext = zext i16 %a to i64 5400 store i64 %ext, ptr addrspace(1) %out 5401 ret void 5402} 5403 5404; FIXME: Need to optimize this sequence to avoid extra bfe: 5405; t28: i32,ch = load<LD2[%in(addrspace=1)], anyext from i16> t12, t27, undef:i64 5406; t31: i64 = any_extend t28 5407; t33: i64 = sign_extend_inreg t31, ValueType:ch:i16 5408; TODO: These could be expanded earlier using ASHR 15 5409define amdgpu_kernel void @constant_sextload_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 5410; GCN-NOHSA-SI-LABEL: constant_sextload_i16_to_i64: 5411; GCN-NOHSA-SI: ; %bb.0: 5412; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5413; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5414; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5415; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5416; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5417; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5418; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5419; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5420; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 5421; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5422; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5423; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5424; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5425; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5426; GCN-NOHSA-SI-NEXT: s_endpgm 5427; 5428; GCN-HSA-LABEL: constant_sextload_i16_to_i64: 5429; GCN-HSA: ; %bb.0: 5430; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5431; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5432; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5433; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5434; GCN-HSA-NEXT: flat_load_sshort v0, v[0:1] 5435; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 5436; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 5437; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5438; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5439; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 5440; GCN-HSA-NEXT: s_endpgm 5441; 5442; GCN-NOHSA-VI-LABEL: constant_sextload_i16_to_i64: 5443; GCN-NOHSA-VI: ; %bb.0: 5444; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5445; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5446; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 5447; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3 5448; GCN-NOHSA-VI-NEXT: flat_load_ushort v2, v[0:1] 5449; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 5450; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 5451; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5452; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v2, 0, 16 5453; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5454; GCN-NOHSA-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 5455; GCN-NOHSA-VI-NEXT: s_endpgm 5456; 5457; EG-LABEL: constant_sextload_i16_to_i64: 5458; EG: ; %bb.0: 5459; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5460; EG-NEXT: TEX 0 @6 5461; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 5462; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 5463; EG-NEXT: CF_END 5464; EG-NEXT: PAD 5465; EG-NEXT: Fetch clause starting at 6: 5466; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5467; EG-NEXT: ALU clause starting at 8: 5468; EG-NEXT: MOV * T0.X, KC0[2].Z, 5469; EG-NEXT: ALU clause starting at 9: 5470; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 5471; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 5472; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 5473; EG-NEXT: ASHR * T0.Y, PV.X, literal.x, 5474; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5475; 5476; GFX12-LABEL: constant_sextload_i16_to_i64: 5477; GFX12: ; %bb.0: 5478; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 5479; GFX12-NEXT: v_mov_b32_e32 v2, 0 5480; GFX12-NEXT: s_wait_kmcnt 0x0 5481; GFX12-NEXT: global_load_u16 v0, v2, s[2:3] 5482; GFX12-NEXT: s_wait_loadcnt 0x0 5483; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16 5484; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 5485; GFX12-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5486; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] 5487; GFX12-NEXT: s_endpgm 5488 %a = load i16, ptr addrspace(4) %in 5489 %ext = sext i16 %a to i64 5490 store i64 %ext, ptr addrspace(1) %out 5491 ret void 5492} 5493 5494define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 5495; GCN-NOHSA-SI-LABEL: constant_zextload_v1i16_to_v1i64: 5496; GCN-NOHSA-SI: ; %bb.0: 5497; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5498; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5499; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5500; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5501; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5502; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5503; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5504; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5505; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 5506; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5507; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5508; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5509; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5510; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5511; GCN-NOHSA-SI-NEXT: s_endpgm 5512; 5513; GCN-HSA-LABEL: constant_zextload_v1i16_to_v1i64: 5514; GCN-HSA: ; %bb.0: 5515; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5516; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5517; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5518; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5519; GCN-HSA-NEXT: flat_load_ushort v0, v[0:1] 5520; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 5521; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 5522; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 5523; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5524; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 5525; GCN-HSA-NEXT: s_endpgm 5526; 5527; GCN-NOHSA-VI-LABEL: constant_zextload_v1i16_to_v1i64: 5528; GCN-NOHSA-VI: ; %bb.0: 5529; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5530; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, 0 5531; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5532; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 5533; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3 5534; GCN-NOHSA-VI-NEXT: flat_load_ushort v2, v[0:1] 5535; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 5536; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 5537; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5538; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, 0xffff, v2 5539; GCN-NOHSA-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 5540; GCN-NOHSA-VI-NEXT: s_endpgm 5541; 5542; EG-LABEL: constant_zextload_v1i16_to_v1i64: 5543; EG: ; %bb.0: 5544; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5545; EG-NEXT: TEX 0 @6 5546; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 5547; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 5548; EG-NEXT: CF_END 5549; EG-NEXT: PAD 5550; EG-NEXT: Fetch clause starting at 6: 5551; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5552; EG-NEXT: ALU clause starting at 8: 5553; EG-NEXT: MOV * T0.X, KC0[2].Z, 5554; EG-NEXT: ALU clause starting at 9: 5555; EG-NEXT: MOV * T0.Y, 0.0, 5556; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 5557; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5558; 5559; GFX12-LABEL: constant_zextload_v1i16_to_v1i64: 5560; GFX12: ; %bb.0: 5561; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 5562; GFX12-NEXT: v_mov_b32_e32 v1, 0 5563; GFX12-NEXT: s_wait_kmcnt 0x0 5564; GFX12-NEXT: global_load_u16 v0, v1, s[2:3] 5565; GFX12-NEXT: s_wait_loadcnt 0x0 5566; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0 5567; GFX12-NEXT: global_store_b64 v1, v[0:1], s[0:1] 5568; GFX12-NEXT: s_endpgm 5569 %load = load <1 x i16>, ptr addrspace(4) %in 5570 %ext = zext <1 x i16> %load to <1 x i64> 5571 store <1 x i64> %ext, ptr addrspace(1) %out 5572 ret void 5573} 5574 5575define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 5576; GCN-NOHSA-SI-LABEL: constant_sextload_v1i16_to_v1i64: 5577; GCN-NOHSA-SI: ; %bb.0: 5578; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5579; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5580; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5581; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5582; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5583; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5584; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5585; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5586; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 5587; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5588; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5589; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5590; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5591; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5592; GCN-NOHSA-SI-NEXT: s_endpgm 5593; 5594; GCN-HSA-LABEL: constant_sextload_v1i16_to_v1i64: 5595; GCN-HSA: ; %bb.0: 5596; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5597; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5598; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5599; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5600; GCN-HSA-NEXT: flat_load_sshort v0, v[0:1] 5601; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 5602; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 5603; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5604; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5605; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 5606; GCN-HSA-NEXT: s_endpgm 5607; 5608; GCN-NOHSA-VI-LABEL: constant_sextload_v1i16_to_v1i64: 5609; GCN-NOHSA-VI: ; %bb.0: 5610; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5611; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5612; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 5613; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3 5614; GCN-NOHSA-VI-NEXT: flat_load_ushort v2, v[0:1] 5615; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 5616; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 5617; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5618; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v2, 0, 16 5619; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5620; GCN-NOHSA-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 5621; GCN-NOHSA-VI-NEXT: s_endpgm 5622; 5623; EG-LABEL: constant_sextload_v1i16_to_v1i64: 5624; EG: ; %bb.0: 5625; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5626; EG-NEXT: TEX 0 @6 5627; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 5628; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 5629; EG-NEXT: CF_END 5630; EG-NEXT: PAD 5631; EG-NEXT: Fetch clause starting at 6: 5632; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5633; EG-NEXT: ALU clause starting at 8: 5634; EG-NEXT: MOV * T0.X, KC0[2].Z, 5635; EG-NEXT: ALU clause starting at 9: 5636; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 5637; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 5638; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 5639; EG-NEXT: ASHR * T0.Y, PV.X, literal.x, 5640; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5641; 5642; GFX12-LABEL: constant_sextload_v1i16_to_v1i64: 5643; GFX12: ; %bb.0: 5644; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 5645; GFX12-NEXT: v_mov_b32_e32 v2, 0 5646; GFX12-NEXT: s_wait_kmcnt 0x0 5647; GFX12-NEXT: global_load_u16 v0, v2, s[2:3] 5648; GFX12-NEXT: s_wait_loadcnt 0x0 5649; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16 5650; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 5651; GFX12-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5652; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] 5653; GFX12-NEXT: s_endpgm 5654 %load = load <1 x i16>, ptr addrspace(4) %in 5655 %ext = sext <1 x i16> %load to <1 x i64> 5656 store <1 x i64> %ext, ptr addrspace(1) %out 5657 ret void 5658} 5659 5660define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 5661; GCN-NOHSA-SI-LABEL: constant_zextload_v2i16_to_v2i64: 5662; GCN-NOHSA-SI: ; %bb.0: 5663; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5664; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5665; GCN-NOHSA-SI-NEXT: s_load_dword s2, s[2:3], 0x0 5666; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 5667; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5668; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5669; GCN-NOHSA-SI-NEXT: s_lshr_b32 s4, s2, 16 5670; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s2, 0xffff 5671; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 5672; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 5673; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 5674; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 5675; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5676; GCN-NOHSA-SI-NEXT: s_endpgm 5677; 5678; GCN-HSA-LABEL: constant_zextload_v2i16_to_v2i64: 5679; GCN-HSA: ; %bb.0: 5680; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5681; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 5682; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 5683; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5684; GCN-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 5685; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5686; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5687; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5688; GCN-HSA-NEXT: s_lshr_b32 s0, s2, 16 5689; GCN-HSA-NEXT: s_and_b32 s1, s2, 0xffff 5690; GCN-HSA-NEXT: v_mov_b32_e32 v0, s1 5691; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 5692; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5693; GCN-HSA-NEXT: s_endpgm 5694; 5695; GCN-NOHSA-VI-LABEL: constant_zextload_v2i16_to_v2i64: 5696; GCN-NOHSA-VI: ; %bb.0: 5697; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5698; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 5699; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 5700; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5701; GCN-NOHSA-VI-NEXT: s_load_dword s2, s[2:3], 0x0 5702; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 5703; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 5704; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5705; GCN-NOHSA-VI-NEXT: s_lshr_b32 s0, s2, 16 5706; GCN-NOHSA-VI-NEXT: s_and_b32 s1, s2, 0xffff 5707; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s1 5708; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s0 5709; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5710; GCN-NOHSA-VI-NEXT: s_endpgm 5711; 5712; EG-LABEL: constant_zextload_v2i16_to_v2i64: 5713; EG: ; %bb.0: 5714; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5715; EG-NEXT: TEX 0 @6 5716; EG-NEXT: ALU 6, @9, KC0[CB0:0-32], KC1[] 5717; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1 5718; EG-NEXT: CF_END 5719; EG-NEXT: PAD 5720; EG-NEXT: Fetch clause starting at 6: 5721; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 5722; EG-NEXT: ALU clause starting at 8: 5723; EG-NEXT: MOV * T4.X, KC0[2].Z, 5724; EG-NEXT: ALU clause starting at 9: 5725; EG-NEXT: LSHR * T4.Z, T4.X, literal.x, 5726; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5727; EG-NEXT: AND_INT T4.X, T4.X, literal.x, 5728; EG-NEXT: MOV T4.Y, 0.0, 5729; EG-NEXT: MOV T4.W, 0.0, 5730; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 5731; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 5732; 5733; GFX12-LABEL: constant_zextload_v2i16_to_v2i64: 5734; GFX12: ; %bb.0: 5735; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 5736; GFX12-NEXT: s_wait_kmcnt 0x0 5737; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0 5738; GFX12-NEXT: s_wait_kmcnt 0x0 5739; GFX12-NEXT: s_and_b32 s3, 0xffff, s2 5740; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) 5741; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s3 5742; GFX12-NEXT: s_pack_hl_b32_b16 s2, s2, 0 5743; GFX12-NEXT: s_wait_alu 0xfffe 5744; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, v1 5745; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] 5746; GFX12-NEXT: s_endpgm 5747 %load = load <2 x i16>, ptr addrspace(4) %in 5748 %ext = zext <2 x i16> %load to <2 x i64> 5749 store <2 x i64> %ext, ptr addrspace(1) %out 5750 ret void 5751} 5752 5753define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 5754; GCN-NOHSA-SI-LABEL: constant_sextload_v2i16_to_v2i64: 5755; GCN-NOHSA-SI: ; %bb.0: 5756; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5757; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5758; GCN-NOHSA-SI-NEXT: s_load_dword s2, s[2:3], 0x0 5759; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 5760; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5761; GCN-NOHSA-SI-NEXT: s_lshr_b32 s4, s2, 16 5762; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[2:3], 0x100000 5763; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 5764; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 5765; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 5766; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s7 5767; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 5768; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5 5769; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5770; GCN-NOHSA-SI-NEXT: s_endpgm 5771; 5772; GCN-HSA-LABEL: constant_sextload_v2i16_to_v2i64: 5773; GCN-HSA: ; %bb.0: 5774; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5775; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5776; GCN-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 5777; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5778; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5779; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5780; GCN-HSA-NEXT: s_lshr_b32 s0, s2, 16 5781; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000 5782; GCN-HSA-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x100000 5783; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5784; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5785; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 5786; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 5787; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5788; GCN-HSA-NEXT: s_endpgm 5789; 5790; GCN-NOHSA-VI-LABEL: constant_sextload_v2i16_to_v2i64: 5791; GCN-NOHSA-VI: ; %bb.0: 5792; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5793; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5794; GCN-NOHSA-VI-NEXT: s_load_dword s2, s[2:3], 0x0 5795; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 5796; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 5797; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5798; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x100000 5799; GCN-NOHSA-VI-NEXT: s_lshr_b32 s2, s2, 16 5800; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000 5801; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 5802; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 5803; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s2 5804; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s3 5805; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5806; GCN-NOHSA-VI-NEXT: s_endpgm 5807; 5808; EG-LABEL: constant_sextload_v2i16_to_v2i64: 5809; EG: ; %bb.0: 5810; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5811; EG-NEXT: TEX 0 @6 5812; EG-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[] 5813; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1 5814; EG-NEXT: CF_END 5815; EG-NEXT: PAD 5816; EG-NEXT: Fetch clause starting at 6: 5817; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 5818; EG-NEXT: ALU clause starting at 8: 5819; EG-NEXT: MOV * T4.X, KC0[2].Z, 5820; EG-NEXT: ALU clause starting at 9: 5821; EG-NEXT: ASHR * T4.W, T4.X, literal.x, 5822; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5823; EG-NEXT: ASHR * T4.Z, T4.X, literal.x, 5824; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5825; EG-NEXT: BFE_INT T4.X, T4.X, 0.0, literal.x, 5826; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 5827; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 5828; EG-NEXT: ASHR * T4.Y, PV.X, literal.x, 5829; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5830; 5831; GFX12-LABEL: constant_sextload_v2i16_to_v2i64: 5832; GFX12: ; %bb.0: 5833; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 5834; GFX12-NEXT: s_wait_kmcnt 0x0 5835; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0 5836; GFX12-NEXT: s_wait_kmcnt 0x0 5837; GFX12-NEXT: s_lshr_b32 s4, s2, 16 5838; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000 5839; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 5840; GFX12-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s3 5841; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, s5 5842; GFX12-NEXT: v_mov_b32_e32 v2, s4 5843; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1] 5844; GFX12-NEXT: s_endpgm 5845 %load = load <2 x i16>, ptr addrspace(4) %in 5846 %ext = sext <2 x i16> %load to <2 x i64> 5847 store <2 x i64> %ext, ptr addrspace(1) %out 5848 ret void 5849} 5850 5851define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 5852; GCN-NOHSA-SI-LABEL: constant_zextload_v4i16_to_v4i64: 5853; GCN-NOHSA-SI: ; %bb.0: 5854; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5855; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5856; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 5857; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 5858; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5859; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 5860; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 5861; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5862; GCN-NOHSA-SI-NEXT: s_lshr_b32 s6, s5, 16 5863; GCN-NOHSA-SI-NEXT: s_lshr_b32 s7, s4, 16 5864; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, 0xffff 5865; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, 0xffff 5866; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 5867; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6 5868; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5869; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5870; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 5871; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 5872; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5873; GCN-NOHSA-SI-NEXT: s_endpgm 5874; 5875; GCN-HSA-LABEL: constant_zextload_v4i16_to_v4i64: 5876; GCN-HSA: ; %bb.0: 5877; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5878; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 5879; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 5880; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5881; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 5882; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5883; GCN-HSA-NEXT: s_lshr_b32 s4, s3, 16 5884; GCN-HSA-NEXT: s_lshr_b32 s5, s2, 16 5885; GCN-HSA-NEXT: s_and_b32 s6, s2, 0xffff 5886; GCN-HSA-NEXT: s_and_b32 s2, s3, 0xffff 5887; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5888; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 5889; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5890; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5891; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4 5892; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5893; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5894; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5895; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 5896; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 5897; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5898; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5899; GCN-HSA-NEXT: s_endpgm 5900; 5901; GCN-NOHSA-VI-LABEL: constant_zextload_v4i16_to_v4i64: 5902; GCN-NOHSA-VI: ; %bb.0: 5903; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5904; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 5905; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 5906; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5907; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 5908; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5909; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s2, 16 5910; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s2, 0xffff 5911; GCN-NOHSA-VI-NEXT: s_lshr_b32 s2, s3, 16 5912; GCN-NOHSA-VI-NEXT: s_and_b32 s3, s3, 0xffff 5913; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s2 5914; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 16 5915; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s3 5916; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 5917; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 5918; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 5919; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5920; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 5921; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s5 5922; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 5923; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 5924; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5925; GCN-NOHSA-VI-NEXT: s_endpgm 5926; 5927; EG-LABEL: constant_zextload_v4i16_to_v4i64: 5928; EG: ; %bb.0: 5929; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5930; EG-NEXT: TEX 0 @6 5931; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[] 5932; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T8.X, 0 5933; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T7.X, 1 5934; EG-NEXT: CF_END 5935; EG-NEXT: Fetch clause starting at 6: 5936; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 5937; EG-NEXT: ALU clause starting at 8: 5938; EG-NEXT: MOV * T5.X, KC0[2].Z, 5939; EG-NEXT: ALU clause starting at 9: 5940; EG-NEXT: LSHR * T6.Z, T5.Y, literal.x, 5941; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5942; EG-NEXT: AND_INT T6.X, T5.Y, literal.x, 5943; EG-NEXT: MOV T6.Y, 0.0, 5944; EG-NEXT: LSHR T5.Z, T5.X, literal.y, 5945; EG-NEXT: AND_INT * T5.X, T5.X, literal.x, 5946; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5947; EG-NEXT: MOV T5.Y, 0.0, 5948; EG-NEXT: MOV T6.W, 0.0, 5949; EG-NEXT: MOV * T5.W, 0.0, 5950; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x, 5951; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5952; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5953; EG-NEXT: LSHR * T8.X, PV.W, literal.x, 5954; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5955; 5956; GFX12-LABEL: constant_zextload_v4i16_to_v4i64: 5957; GFX12: ; %bb.0: 5958; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 5959; GFX12-NEXT: s_wait_kmcnt 0x0 5960; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 5961; GFX12-NEXT: s_wait_kmcnt 0x0 5962; GFX12-NEXT: s_and_b32 s4, 0xffff, s2 5963; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5964; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s4 5965; GFX12-NEXT: s_pack_hl_b32_b16 s2, s2, 0 5966; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 5967; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, v1 5968; GFX12-NEXT: s_pack_hl_b32_b16 s2, s3, 0 5969; GFX12-NEXT: s_and_b32 s3, 0xffff, s3 5970; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] 5971; GFX12-NEXT: s_wait_alu 0xfffe 5972; GFX12-NEXT: v_mov_b32_e32 v0, s3 5973; GFX12-NEXT: v_mov_b32_e32 v2, s2 5974; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16 5975; GFX12-NEXT: s_endpgm 5976 %load = load <4 x i16>, ptr addrspace(4) %in 5977 %ext = zext <4 x i16> %load to <4 x i64> 5978 store <4 x i64> %ext, ptr addrspace(1) %out 5979 ret void 5980} 5981 5982define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 5983; GCN-NOHSA-SI-LABEL: constant_sextload_v4i16_to_v4i64: 5984; GCN-NOHSA-SI: ; %bb.0: 5985; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5986; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5987; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 5988; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 5989; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 5990; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5991; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, s5 5992; GCN-NOHSA-SI-NEXT: s_lshr_b32 s8, s4, 16 5993; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[4:5], 0x100000 5994; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[4:5], s[4:5], 48 5995; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 5996; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000 5997; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 5998; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s7 5999; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 6000; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5 6001; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 6002; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6003; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 6004; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s11 6005; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s8 6006; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s9 6007; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 6008; GCN-NOHSA-SI-NEXT: s_endpgm 6009; 6010; GCN-HSA-LABEL: constant_sextload_v4i16_to_v4i64: 6011; GCN-HSA: ; %bb.0: 6012; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 6013; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6014; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 6015; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6016; GCN-HSA-NEXT: s_mov_b32 s4, s3 6017; GCN-HSA-NEXT: s_lshr_b32 s6, s2, 16 6018; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[2:3], 0x100000 6019; GCN-HSA-NEXT: s_ashr_i64 s[2:3], s[2:3], 48 6020; GCN-HSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 6021; GCN-HSA-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 6022; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 6023; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 6024; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 6025; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6026; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6027; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 6028; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 6029; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6030; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6031; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 6032; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 6033; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 6034; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 6035; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 6036; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 6037; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6038; GCN-HSA-NEXT: s_endpgm 6039; 6040; GCN-NOHSA-VI-LABEL: constant_sextload_v4i16_to_v4i64: 6041; GCN-NOHSA-VI: ; %bb.0: 6042; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 6043; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6044; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 6045; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6046; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x100000 6047; GCN-NOHSA-VI-NEXT: s_lshr_b32 s2, s2, 16 6048; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s3 6049; GCN-NOHSA-VI-NEXT: s_lshr_b32 s8, s3, 16 6050; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000 6051; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 6052; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000 6053; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 6054; GCN-NOHSA-VI-NEXT: s_add_u32 s6, s0, 16 6055; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s7 6056; GCN-NOHSA-VI-NEXT: s_addc_u32 s7, s1, 0 6057; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s6 6058; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s8 6059; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s9 6060; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s7 6061; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6062; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 6063; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 6064; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s5 6065; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s2 6066; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s3 6067; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 6068; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6069; GCN-NOHSA-VI-NEXT: s_endpgm 6070; 6071; EG-LABEL: constant_sextload_v4i16_to_v4i64: 6072; EG: ; %bb.0: 6073; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 6074; EG-NEXT: TEX 0 @6 6075; EG-NEXT: ALU 16, @9, KC0[CB0:0-32], KC1[] 6076; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 0 6077; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1 6078; EG-NEXT: CF_END 6079; EG-NEXT: Fetch clause starting at 6: 6080; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 6081; EG-NEXT: ALU clause starting at 8: 6082; EG-NEXT: MOV * T5.X, KC0[2].Z, 6083; EG-NEXT: ALU clause starting at 9: 6084; EG-NEXT: ASHR * T5.W, T5.X, literal.x, 6085; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6086; EG-NEXT: LSHR T6.X, KC0[2].Y, literal.x, 6087; EG-NEXT: ASHR T5.Z, T5.X, literal.y, 6088; EG-NEXT: ASHR * T7.W, T5.Y, literal.z, 6089; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6090; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6091; EG-NEXT: BFE_INT T5.X, T5.X, 0.0, literal.x, 6092; EG-NEXT: ASHR * T7.Z, T5.Y, literal.x, 6093; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6094; EG-NEXT: BFE_INT T7.X, T5.Y, 0.0, literal.x, 6095; EG-NEXT: ASHR T5.Y, PV.X, literal.y, 6096; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 6097; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6098; EG-NEXT: LSHR T8.X, PV.W, literal.x, 6099; EG-NEXT: ASHR * T7.Y, PV.X, literal.y, 6100; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 6101; 6102; GFX12-LABEL: constant_sextload_v4i16_to_v4i64: 6103; GFX12: ; %bb.0: 6104; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 6105; GFX12-NEXT: s_wait_kmcnt 0x0 6106; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 6107; GFX12-NEXT: s_wait_kmcnt 0x0 6108; GFX12-NEXT: s_mov_b32 s6, s3 6109; GFX12-NEXT: s_lshr_b32 s8, s3, 16 6110; GFX12-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x100000 6111; GFX12-NEXT: s_lshr_b32 s2, s2, 16 6112; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 6113; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000 6114; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000 6115; GFX12-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s5 6116; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v5, s7 6117; GFX12-NEXT: v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v7, s9 6118; GFX12-NEXT: v_dual_mov_b32 v6, s8 :: v_dual_mov_b32 v3, s3 6119; GFX12-NEXT: v_mov_b32_e32 v2, s2 6120; GFX12-NEXT: s_clause 0x1 6121; GFX12-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:16 6122; GFX12-NEXT: global_store_b128 v8, v[0:3], s[0:1] 6123; GFX12-NEXT: s_endpgm 6124 %load = load <4 x i16>, ptr addrspace(4) %in 6125 %ext = sext <4 x i16> %load to <4 x i64> 6126 store <4 x i64> %ext, ptr addrspace(1) %out 6127 ret void 6128} 6129 6130define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 6131; GCN-NOHSA-SI-LABEL: constant_zextload_v8i16_to_v8i64: 6132; GCN-NOHSA-SI: ; %bb.0: 6133; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 6134; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6135; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 6136; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 6137; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 6138; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 6139; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 6140; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6141; GCN-NOHSA-SI-NEXT: s_lshr_b32 s8, s5, 16 6142; GCN-NOHSA-SI-NEXT: s_lshr_b32 s9, s7, 16 6143; GCN-NOHSA-SI-NEXT: s_lshr_b32 s10, s6, 16 6144; GCN-NOHSA-SI-NEXT: s_lshr_b32 s11, s4, 16 6145; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, 0xffff 6146; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, 0xffff 6147; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, 0xffff 6148; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, 0xffff 6149; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s7 6150; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9 6151; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 6152; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6153; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 6154; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s8 6155; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 6156; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6157; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 6158; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s10 6159; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 6160; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6161; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 6162; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11 6163; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 6164; GCN-NOHSA-SI-NEXT: s_endpgm 6165; 6166; GCN-HSA-LABEL: constant_zextload_v8i16_to_v8i64: 6167; GCN-HSA: ; %bb.0: 6168; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 6169; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 6170; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 6171; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6172; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 6173; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6174; GCN-HSA-NEXT: s_lshr_b32 s8, s5, 16 6175; GCN-HSA-NEXT: s_lshr_b32 s2, s7, 16 6176; GCN-HSA-NEXT: s_lshr_b32 s9, s6, 16 6177; GCN-HSA-NEXT: s_lshr_b32 s10, s4, 16 6178; GCN-HSA-NEXT: s_and_b32 s4, s4, 0xffff 6179; GCN-HSA-NEXT: s_and_b32 s6, s6, 0xffff 6180; GCN-HSA-NEXT: s_and_b32 s5, s5, 0xffff 6181; GCN-HSA-NEXT: s_and_b32 s3, s7, 0xffff 6182; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 6183; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 6184; GCN-HSA-NEXT: v_mov_b32_e32 v0, s3 6185; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6186; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6187; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6188; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 6189; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6190; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6191; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6192; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6193; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 6194; GCN-HSA-NEXT: v_mov_b32_e32 v0, s5 6195; GCN-HSA-NEXT: v_mov_b32_e32 v2, s8 6196; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6197; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6198; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6199; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 6200; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 6201; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6202; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6203; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 6204; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 6205; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10 6206; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 6207; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6208; GCN-HSA-NEXT: s_endpgm 6209; 6210; GCN-NOHSA-VI-LABEL: constant_zextload_v8i16_to_v8i64: 6211; GCN-NOHSA-VI: ; %bb.0: 6212; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 6213; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 6214; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 6215; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6216; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 6217; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6218; GCN-NOHSA-VI-NEXT: s_lshr_b32 s8, s4, 16 6219; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, 0xffff 6220; GCN-NOHSA-VI-NEXT: s_lshr_b32 s9, s5, 16 6221; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, 0xffff 6222; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s6, 16 6223; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, 0xffff 6224; GCN-NOHSA-VI-NEXT: s_lshr_b32 s2, s7, 16 6225; GCN-NOHSA-VI-NEXT: s_and_b32 s3, s7, 0xffff 6226; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s2 6227; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 48 6228; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s3 6229; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 6230; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 6231; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 6232; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 32 6233; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 6234; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6235; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 6236; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 6237; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 16 6238; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 6239; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10 6240; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 6241; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6242; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 6243; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s5 6244; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 6245; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 6246; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6247; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 6248; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 6249; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s8 6250; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 6251; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6252; GCN-NOHSA-VI-NEXT: s_endpgm 6253; 6254; EG-LABEL: constant_zextload_v8i16_to_v8i64: 6255; EG: ; %bb.0: 6256; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 6257; EG-NEXT: TEX 0 @8 6258; EG-NEXT: ALU 30, @11, KC0[CB0:0-32], KC1[] 6259; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T14.X, 0 6260; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T13.X, 0 6261; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T12.X, 0 6262; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T11.X, 1 6263; EG-NEXT: CF_END 6264; EG-NEXT: Fetch clause starting at 8: 6265; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 6266; EG-NEXT: ALU clause starting at 10: 6267; EG-NEXT: MOV * T7.X, KC0[2].Z, 6268; EG-NEXT: ALU clause starting at 11: 6269; EG-NEXT: LSHR * T8.Z, T7.W, literal.x, 6270; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6271; EG-NEXT: AND_INT T8.X, T7.W, literal.x, 6272; EG-NEXT: MOV T8.Y, 0.0, 6273; EG-NEXT: LSHR T9.Z, T7.Z, literal.y, 6274; EG-NEXT: AND_INT * T9.X, T7.Z, literal.x, 6275; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6276; EG-NEXT: MOV T9.Y, 0.0, 6277; EG-NEXT: LSHR * T10.Z, T7.Y, literal.x, 6278; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6279; EG-NEXT: AND_INT T10.X, T7.Y, literal.x, 6280; EG-NEXT: MOV T10.Y, 0.0, 6281; EG-NEXT: LSHR T7.Z, T7.X, literal.y, 6282; EG-NEXT: AND_INT * T7.X, T7.X, literal.x, 6283; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6284; EG-NEXT: MOV T7.Y, 0.0, 6285; EG-NEXT: MOV T8.W, 0.0, 6286; EG-NEXT: MOV * T9.W, 0.0, 6287; EG-NEXT: MOV T10.W, 0.0, 6288; EG-NEXT: MOV * T7.W, 0.0, 6289; EG-NEXT: LSHR T11.X, KC0[2].Y, literal.x, 6290; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6291; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6292; EG-NEXT: LSHR T12.X, PV.W, literal.x, 6293; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6294; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6295; EG-NEXT: LSHR T13.X, PV.W, literal.x, 6296; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6297; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 6298; EG-NEXT: LSHR * T14.X, PV.W, literal.x, 6299; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6300; 6301; GFX12-LABEL: constant_zextload_v8i16_to_v8i64: 6302; GFX12: ; %bb.0: 6303; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 6304; GFX12-NEXT: s_wait_kmcnt 0x0 6305; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 6306; GFX12-NEXT: s_wait_kmcnt 0x0 6307; GFX12-NEXT: s_and_b32 s2, 0xffff, s7 6308; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) 6309; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 6310; GFX12-NEXT: s_pack_hl_b32_b16 s3, s7, 0 6311; GFX12-NEXT: s_pack_hl_b32_b16 s2, s6, 0 6312; GFX12-NEXT: s_wait_alu 0xfffe 6313; GFX12-NEXT: v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v3, v1 6314; GFX12-NEXT: s_and_b32 s3, 0xffff, s6 6315; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:48 6316; GFX12-NEXT: s_wait_alu 0xfffe 6317; GFX12-NEXT: v_mov_b32_e32 v0, s3 6318; GFX12-NEXT: v_mov_b32_e32 v2, s2 6319; GFX12-NEXT: s_pack_hl_b32_b16 s2, s5, 0 6320; GFX12-NEXT: s_and_b32 s3, 0xffff, s5 6321; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:32 6322; GFX12-NEXT: s_wait_alu 0xfffe 6323; GFX12-NEXT: v_mov_b32_e32 v0, s3 6324; GFX12-NEXT: v_mov_b32_e32 v2, s2 6325; GFX12-NEXT: s_pack_hl_b32_b16 s2, s4, 0 6326; GFX12-NEXT: s_and_b32 s3, 0xffff, s4 6327; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16 6328; GFX12-NEXT: s_wait_alu 0xfffe 6329; GFX12-NEXT: v_mov_b32_e32 v0, s3 6330; GFX12-NEXT: v_mov_b32_e32 v2, s2 6331; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] 6332; GFX12-NEXT: s_endpgm 6333 %load = load <8 x i16>, ptr addrspace(4) %in 6334 %ext = zext <8 x i16> %load to <8 x i64> 6335 store <8 x i64> %ext, ptr addrspace(1) %out 6336 ret void 6337} 6338 6339define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 6340; GCN-NOHSA-SI-LABEL: constant_sextload_v8i16_to_v8i64: 6341; GCN-NOHSA-SI: ; %bb.0: 6342; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 6343; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6344; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 6345; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 6346; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 6347; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6348; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s7 6349; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s5 6350; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s6, 16 6351; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s4, 16 6352; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[4:5], 0x100000 6353; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[6:7], 0x100000 6354; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[4:5], s[4:5], 48 6355; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[6:7], s[6:7], 48 6356; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000 6357; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000 6358; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000 6359; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 6360; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 6361; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s9 6362; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6 6363; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7 6364; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 6365; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6366; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 6367; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s11 6368; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 6369; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5 6370; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 6371; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6372; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18 6373; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 6374; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s16 6375; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s17 6376; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s12 6377; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s13 6378; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 6379; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s14 6380; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s15 6381; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 6382; GCN-NOHSA-SI-NEXT: s_endpgm 6383; 6384; GCN-HSA-LABEL: constant_sextload_v8i16_to_v8i64: 6385; GCN-HSA: ; %bb.0: 6386; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 6387; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6388; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 6389; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6390; GCN-HSA-NEXT: s_mov_b32 s2, s7 6391; GCN-HSA-NEXT: s_mov_b32 s8, s5 6392; GCN-HSA-NEXT: s_lshr_b32 s10, s6, 16 6393; GCN-HSA-NEXT: s_lshr_b32 s12, s4, 16 6394; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[4:5], 0x100000 6395; GCN-HSA-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x100000 6396; GCN-HSA-NEXT: s_ashr_i64 s[4:5], s[4:5], 48 6397; GCN-HSA-NEXT: s_ashr_i64 s[6:7], s[6:7], 48 6398; GCN-HSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 6399; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000 6400; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000 6401; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000 6402; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 6403; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 6404; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 6405; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6406; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6407; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6408; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 6409; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 6410; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 6411; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6412; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6413; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6414; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6415; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 6416; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 6417; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 6418; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4 6419; GCN-HSA-NEXT: v_mov_b32_e32 v3, s5 6420; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6421; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6422; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6423; GCN-HSA-NEXT: v_mov_b32_e32 v0, s16 6424; GCN-HSA-NEXT: v_mov_b32_e32 v1, s17 6425; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10 6426; GCN-HSA-NEXT: v_mov_b32_e32 v3, s11 6427; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6428; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6429; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 6430; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14 6431; GCN-HSA-NEXT: v_mov_b32_e32 v1, s15 6432; GCN-HSA-NEXT: v_mov_b32_e32 v2, s12 6433; GCN-HSA-NEXT: v_mov_b32_e32 v3, s13 6434; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 6435; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6436; GCN-HSA-NEXT: s_endpgm 6437; 6438; GCN-NOHSA-VI-LABEL: constant_sextload_v8i16_to_v8i64: 6439; GCN-NOHSA-VI: ; %bb.0: 6440; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 6441; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6442; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 6443; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6444; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[6:7], 0x100000 6445; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16 6446; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[6:7], 0x100000 6447; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s7 6448; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x100000 6449; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 6450; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s5 6451; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s5, 16 6452; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x100000 6453; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s7, 16 6454; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 6455; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000 6456; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000 6457; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 6458; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 6459; GCN-NOHSA-VI-NEXT: s_add_u32 s6, s0, 48 6460; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7 6461; GCN-NOHSA-VI-NEXT: s_addc_u32 s7, s1, 0 6462; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s6 6463; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s16 6464; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s17 6465; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s7 6466; GCN-NOHSA-VI-NEXT: s_add_u32 s6, s0, 32 6467; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6468; GCN-NOHSA-VI-NEXT: s_addc_u32 s7, s1, 0 6469; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s6 6470; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 6471; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s13 6472; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s14 6473; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s15 6474; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s7 6475; GCN-NOHSA-VI-NEXT: s_add_u32 s6, s0, 16 6476; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6477; GCN-NOHSA-VI-NEXT: s_addc_u32 s7, s1, 0 6478; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s6 6479; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 6480; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9 6481; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10 6482; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s11 6483; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s7 6484; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6485; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 6486; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 6487; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3 6488; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 6489; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5 6490; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 6491; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6492; GCN-NOHSA-VI-NEXT: s_endpgm 6493; 6494; EG-LABEL: constant_sextload_v8i16_to_v8i64: 6495; EG: ; %bb.0: 6496; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 6497; EG-NEXT: TEX 0 @8 6498; EG-NEXT: ALU 33, @11, KC0[CB0:0-32], KC1[] 6499; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T7.X, 0 6500; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 0 6501; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T9.X, 0 6502; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T8.X, 1 6503; EG-NEXT: CF_END 6504; EG-NEXT: Fetch clause starting at 8: 6505; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 6506; EG-NEXT: ALU clause starting at 10: 6507; EG-NEXT: MOV * T7.X, KC0[2].Z, 6508; EG-NEXT: ALU clause starting at 11: 6509; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x, 6510; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6511; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6512; EG-NEXT: LSHR T9.X, PV.W, literal.x, 6513; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, 6514; EG-NEXT: ASHR * T10.W, T7.X, literal.z, 6515; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6516; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6517; EG-NEXT: LSHR T11.X, PV.W, literal.x, 6518; EG-NEXT: ASHR T10.Z, T7.X, literal.y, 6519; EG-NEXT: ASHR * T12.W, T7.Y, literal.z, 6520; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6521; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6522; EG-NEXT: BFE_INT T10.X, T7.X, 0.0, literal.x, 6523; EG-NEXT: ASHR T12.Z, T7.Y, literal.x, 6524; EG-NEXT: ASHR * T13.W, T7.Z, literal.y, 6525; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6526; EG-NEXT: BFE_INT T12.X, T7.Y, 0.0, literal.x, 6527; EG-NEXT: ASHR T10.Y, PV.X, literal.y, 6528; EG-NEXT: ASHR T13.Z, T7.Z, literal.x, 6529; EG-NEXT: ASHR * T14.W, T7.W, literal.y, 6530; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6531; EG-NEXT: BFE_INT T13.X, T7.Z, 0.0, literal.x, 6532; EG-NEXT: ASHR T12.Y, PV.X, literal.y, 6533; EG-NEXT: ASHR * T14.Z, T7.W, literal.x, 6534; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6535; EG-NEXT: BFE_INT T14.X, T7.W, 0.0, literal.x, 6536; EG-NEXT: ASHR T13.Y, PV.X, literal.y, 6537; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 6538; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6539; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 6540; EG-NEXT: LSHR T7.X, PV.W, literal.x, 6541; EG-NEXT: ASHR * T14.Y, PV.X, literal.y, 6542; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 6543; 6544; GFX12-LABEL: constant_sextload_v8i16_to_v8i64: 6545; GFX12: ; %bb.0: 6546; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 6547; GFX12-NEXT: s_wait_kmcnt 0x0 6548; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 6549; GFX12-NEXT: s_wait_kmcnt 0x0 6550; GFX12-NEXT: s_mov_b32 s14, s7 6551; GFX12-NEXT: s_lshr_b32 s16, s7, 16 6552; GFX12-NEXT: s_bfe_i64 s[12:13], s[6:7], 0x100000 6553; GFX12-NEXT: s_lshr_b32 s6, s6, 16 6554; GFX12-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x100000 6555; GFX12-NEXT: s_mov_b32 s8, s5 6556; GFX12-NEXT: s_lshr_b32 s10, s5, 16 6557; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000 6558; GFX12-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000 6559; GFX12-NEXT: s_lshr_b32 s4, s4, 16 6560; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 6561; GFX12-NEXT: v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v1, s13 6562; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000 6563; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000 6564; GFX12-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v9, s15 6565; GFX12-NEXT: v_dual_mov_b32 v8, s14 :: v_dual_mov_b32 v11, s17 6566; GFX12-NEXT: v_dual_mov_b32 v10, s16 :: v_dual_mov_b32 v3, s7 6567; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 6568; GFX12-NEXT: v_dual_mov_b32 v0, s12 :: v_dual_mov_b32 v5, s3 6569; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v13, s9 6570; GFX12-NEXT: v_dual_mov_b32 v12, s8 :: v_dual_mov_b32 v15, s11 6571; GFX12-NEXT: v_dual_mov_b32 v14, s10 :: v_dual_mov_b32 v7, s5 6572; GFX12-NEXT: v_mov_b32_e32 v6, s4 6573; GFX12-NEXT: s_clause 0x3 6574; GFX12-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:48 6575; GFX12-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:32 6576; GFX12-NEXT: global_store_b128 v16, v[12:15], s[0:1] offset:16 6577; GFX12-NEXT: global_store_b128 v16, v[4:7], s[0:1] 6578; GFX12-NEXT: s_endpgm 6579 %load = load <8 x i16>, ptr addrspace(4) %in 6580 %ext = sext <8 x i16> %load to <8 x i64> 6581 store <8 x i64> %ext, ptr addrspace(1) %out 6582 ret void 6583} 6584 6585define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 6586; GCN-NOHSA-SI-LABEL: constant_zextload_v16i16_to_v16i64: 6587; GCN-NOHSA-SI: ; %bb.0: 6588; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 6589; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6590; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 6591; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, 0xf000 6592; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 6593; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, -1 6594; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 6595; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6596; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s1, 16 6597; GCN-NOHSA-SI-NEXT: s_lshr_b32 s13, s3, 16 6598; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s7, 16 6599; GCN-NOHSA-SI-NEXT: s_lshr_b32 s15, s5, 16 6600; GCN-NOHSA-SI-NEXT: s_lshr_b32 s16, s4, 16 6601; GCN-NOHSA-SI-NEXT: s_lshr_b32 s17, s6, 16 6602; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s2, 16 6603; GCN-NOHSA-SI-NEXT: s_lshr_b32 s19, s0, 16 6604; GCN-NOHSA-SI-NEXT: s_and_b32 s0, s0, 0xffff 6605; GCN-NOHSA-SI-NEXT: s_and_b32 s2, s2, 0xffff 6606; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, 0xffff 6607; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, 0xffff 6608; GCN-NOHSA-SI-NEXT: s_and_b32 s1, s1, 0xffff 6609; GCN-NOHSA-SI-NEXT: s_and_b32 s3, s3, 0xffff 6610; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, 0xffff 6611; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, 0xffff 6612; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 6613; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s15 6614; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:80 6615; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6616; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s7 6617; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s14 6618; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:112 6619; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6620; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s3 6621; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s13 6622; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48 6623; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6624; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s1 6625; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s12 6626; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16 6627; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6628; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 6629; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s16 6630; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:64 6631; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6632; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 6633; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s17 6634; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:96 6635; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6636; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s2 6637; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s18 6638; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:32 6639; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6640; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s0 6641; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s19 6642; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 6643; GCN-NOHSA-SI-NEXT: s_endpgm 6644; 6645; GCN-HSA-LABEL: constant_zextload_v16i16_to_v16i64: 6646; GCN-HSA: ; %bb.0: 6647; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 6648; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 6649; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 6650; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6651; GCN-HSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 6652; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6653; GCN-HSA-NEXT: s_lshr_b32 s12, s5, 16 6654; GCN-HSA-NEXT: s_lshr_b32 s13, s7, 16 6655; GCN-HSA-NEXT: s_lshr_b32 s14, s11, 16 6656; GCN-HSA-NEXT: s_lshr_b32 s2, s9, 16 6657; GCN-HSA-NEXT: s_lshr_b32 s15, s8, 16 6658; GCN-HSA-NEXT: s_lshr_b32 s16, s10, 16 6659; GCN-HSA-NEXT: s_lshr_b32 s17, s6, 16 6660; GCN-HSA-NEXT: s_lshr_b32 s18, s4, 16 6661; GCN-HSA-NEXT: s_and_b32 s4, s4, 0xffff 6662; GCN-HSA-NEXT: s_and_b32 s6, s6, 0xffff 6663; GCN-HSA-NEXT: s_and_b32 s10, s10, 0xffff 6664; GCN-HSA-NEXT: s_and_b32 s8, s8, 0xffff 6665; GCN-HSA-NEXT: s_and_b32 s5, s5, 0xffff 6666; GCN-HSA-NEXT: s_and_b32 s7, s7, 0xffff 6667; GCN-HSA-NEXT: s_and_b32 s11, s11, 0xffff 6668; GCN-HSA-NEXT: s_and_b32 s3, s9, 0xffff 6669; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 6670; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 6671; GCN-HSA-NEXT: v_mov_b32_e32 v0, s3 6672; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6673; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6674; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6675; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 6676; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6677; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6678; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6679; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6680; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 6681; GCN-HSA-NEXT: v_mov_b32_e32 v0, s11 6682; GCN-HSA-NEXT: v_mov_b32_e32 v2, s14 6683; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6684; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6685; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6686; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6687; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 6688; GCN-HSA-NEXT: v_mov_b32_e32 v0, s7 6689; GCN-HSA-NEXT: v_mov_b32_e32 v2, s13 6690; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6691; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6692; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6693; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6694; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 6695; GCN-HSA-NEXT: v_mov_b32_e32 v0, s5 6696; GCN-HSA-NEXT: v_mov_b32_e32 v2, s12 6697; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6698; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6699; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6700; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6701; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 6702; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 6703; GCN-HSA-NEXT: v_mov_b32_e32 v2, s15 6704; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6705; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6706; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6707; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6708; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 6709; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 6710; GCN-HSA-NEXT: v_mov_b32_e32 v2, s16 6711; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6712; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6713; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6714; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 6715; GCN-HSA-NEXT: v_mov_b32_e32 v2, s17 6716; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6717; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6718; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 6719; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 6720; GCN-HSA-NEXT: v_mov_b32_e32 v2, s18 6721; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 6722; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6723; GCN-HSA-NEXT: s_endpgm 6724; 6725; GCN-NOHSA-VI-LABEL: constant_zextload_v16i16_to_v16i64: 6726; GCN-NOHSA-VI: ; %bb.0: 6727; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 6728; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 6729; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 6730; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6731; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 6732; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6733; GCN-NOHSA-VI-NEXT: s_lshr_b32 s12, s4, 16 6734; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, 0xffff 6735; GCN-NOHSA-VI-NEXT: s_lshr_b32 s13, s5, 16 6736; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, 0xffff 6737; GCN-NOHSA-VI-NEXT: s_lshr_b32 s14, s6, 16 6738; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, 0xffff 6739; GCN-NOHSA-VI-NEXT: s_lshr_b32 s15, s7, 16 6740; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s7, 0xffff 6741; GCN-NOHSA-VI-NEXT: s_lshr_b32 s16, s10, 16 6742; GCN-NOHSA-VI-NEXT: s_and_b32 s10, s10, 0xffff 6743; GCN-NOHSA-VI-NEXT: s_lshr_b32 s17, s11, 16 6744; GCN-NOHSA-VI-NEXT: s_and_b32 s11, s11, 0xffff 6745; GCN-NOHSA-VI-NEXT: s_lshr_b32 s18, s8, 16 6746; GCN-NOHSA-VI-NEXT: s_and_b32 s8, s8, 0xffff 6747; GCN-NOHSA-VI-NEXT: s_lshr_b32 s2, s9, 16 6748; GCN-NOHSA-VI-NEXT: s_and_b32 s3, s9, 0xffff 6749; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s2 6750; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 0x50 6751; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s3 6752; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 6753; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 6754; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 6755; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 64 6756; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 6757; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6758; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 6759; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 6760; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 0x70 6761; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 6762; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s18 6763; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 6764; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6765; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 6766; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 6767; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 0x60 6768; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s11 6769; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s17 6770; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 6771; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6772; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 6773; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 6774; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 48 6775; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 6776; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s16 6777; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 6778; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6779; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 6780; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 6781; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 32 6782; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s7 6783; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s15 6784; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 6785; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6786; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 6787; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 6788; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 16 6789; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 6790; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s14 6791; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0 6792; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6793; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 6794; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s5 6795; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s13 6796; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 6797; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6798; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 6799; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 6800; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s12 6801; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 6802; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6803; GCN-NOHSA-VI-NEXT: s_endpgm 6804; 6805; EG-LABEL: constant_zextload_v16i16_to_v16i64: 6806; EG: ; %bb.0: 6807; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 6808; EG-NEXT: TEX 1 @12 6809; EG-NEXT: ALU 62, @17, KC0[CB0:0-32], KC1[] 6810; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T26.X, 0 6811; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T25.X, 0 6812; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T24.X, 0 6813; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T23.X, 0 6814; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T22.X, 0 6815; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T21.X, 0 6816; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T20.X, 0 6817; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T19.X, 1 6818; EG-NEXT: CF_END 6819; EG-NEXT: Fetch clause starting at 12: 6820; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 6821; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 6822; EG-NEXT: ALU clause starting at 16: 6823; EG-NEXT: MOV * T11.X, KC0[2].Z, 6824; EG-NEXT: ALU clause starting at 17: 6825; EG-NEXT: LSHR * T13.Z, T12.W, literal.x, 6826; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6827; EG-NEXT: AND_INT T13.X, T12.W, literal.x, 6828; EG-NEXT: MOV T13.Y, 0.0, 6829; EG-NEXT: LSHR T14.Z, T12.Z, literal.y, 6830; EG-NEXT: AND_INT * T14.X, T12.Z, literal.x, 6831; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6832; EG-NEXT: MOV T14.Y, 0.0, 6833; EG-NEXT: LSHR * T15.Z, T12.Y, literal.x, 6834; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6835; EG-NEXT: AND_INT T15.X, T12.Y, literal.x, 6836; EG-NEXT: MOV T15.Y, 0.0, 6837; EG-NEXT: LSHR T12.Z, T12.X, literal.y, 6838; EG-NEXT: AND_INT * T12.X, T12.X, literal.x, 6839; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6840; EG-NEXT: MOV T12.Y, 0.0, 6841; EG-NEXT: LSHR * T16.Z, T11.W, literal.x, 6842; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6843; EG-NEXT: AND_INT T16.X, T11.W, literal.x, 6844; EG-NEXT: MOV T16.Y, 0.0, 6845; EG-NEXT: LSHR T17.Z, T11.Z, literal.y, 6846; EG-NEXT: AND_INT * T17.X, T11.Z, literal.x, 6847; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6848; EG-NEXT: MOV T17.Y, 0.0, 6849; EG-NEXT: LSHR * T18.Z, T11.Y, literal.x, 6850; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6851; EG-NEXT: AND_INT T18.X, T11.Y, literal.x, 6852; EG-NEXT: MOV T18.Y, 0.0, 6853; EG-NEXT: LSHR T11.Z, T11.X, literal.y, 6854; EG-NEXT: AND_INT * T11.X, T11.X, literal.x, 6855; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6856; EG-NEXT: MOV T11.Y, 0.0, 6857; EG-NEXT: MOV T13.W, 0.0, 6858; EG-NEXT: MOV * T14.W, 0.0, 6859; EG-NEXT: MOV T15.W, 0.0, 6860; EG-NEXT: MOV * T12.W, 0.0, 6861; EG-NEXT: MOV T16.W, 0.0, 6862; EG-NEXT: MOV * T17.W, 0.0, 6863; EG-NEXT: MOV T18.W, 0.0, 6864; EG-NEXT: MOV * T11.W, 0.0, 6865; EG-NEXT: LSHR T19.X, KC0[2].Y, literal.x, 6866; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6867; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6868; EG-NEXT: LSHR T20.X, PV.W, literal.x, 6869; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6870; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6871; EG-NEXT: LSHR T21.X, PV.W, literal.x, 6872; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6873; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 6874; EG-NEXT: LSHR T22.X, PV.W, literal.x, 6875; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6876; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 6877; EG-NEXT: LSHR T23.X, PV.W, literal.x, 6878; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6879; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 6880; EG-NEXT: LSHR T24.X, PV.W, literal.x, 6881; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6882; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 6883; EG-NEXT: LSHR T25.X, PV.W, literal.x, 6884; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6885; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 6886; EG-NEXT: LSHR * T26.X, PV.W, literal.x, 6887; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6888; 6889; GFX12-LABEL: constant_zextload_v16i16_to_v16i64: 6890; GFX12: ; %bb.0: 6891; GFX12-NEXT: s_load_b128 s[8:11], s[4:5], 0x24 6892; GFX12-NEXT: s_wait_kmcnt 0x0 6893; GFX12-NEXT: s_load_b256 s[0:7], s[10:11], 0x0 6894; GFX12-NEXT: s_wait_kmcnt 0x0 6895; GFX12-NEXT: s_and_b32 s10, s5, 0xffff 6896; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 6897; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s10 6898; GFX12-NEXT: s_lshr_b32 s5, s5, 16 6899; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 6900; GFX12-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v3, v1 6901; GFX12-NEXT: s_lshr_b32 s5, s4, 16 6902; GFX12-NEXT: s_and_b32 s4, s4, 0xffff 6903; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:80 6904; GFX12-NEXT: s_wait_alu 0xfffe 6905; GFX12-NEXT: v_mov_b32_e32 v0, s4 6906; GFX12-NEXT: v_mov_b32_e32 v2, s5 6907; GFX12-NEXT: s_lshr_b32 s4, s7, 16 6908; GFX12-NEXT: s_and_b32 s5, s7, 0xffff 6909; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:64 6910; GFX12-NEXT: s_wait_alu 0xfffe 6911; GFX12-NEXT: v_mov_b32_e32 v0, s5 6912; GFX12-NEXT: v_mov_b32_e32 v2, s4 6913; GFX12-NEXT: s_lshr_b32 s4, s6, 16 6914; GFX12-NEXT: s_and_b32 s5, s6, 0xffff 6915; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:112 6916; GFX12-NEXT: s_wait_alu 0xfffe 6917; GFX12-NEXT: v_mov_b32_e32 v0, s5 6918; GFX12-NEXT: v_mov_b32_e32 v2, s4 6919; GFX12-NEXT: s_lshr_b32 s4, s3, 16 6920; GFX12-NEXT: s_and_b32 s3, s3, 0xffff 6921; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:96 6922; GFX12-NEXT: v_mov_b32_e32 v0, s3 6923; GFX12-NEXT: s_wait_alu 0xfffe 6924; GFX12-NEXT: v_mov_b32_e32 v2, s4 6925; GFX12-NEXT: s_lshr_b32 s3, s2, 16 6926; GFX12-NEXT: s_and_b32 s2, s2, 0xffff 6927; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:48 6928; GFX12-NEXT: s_wait_alu 0xfffe 6929; GFX12-NEXT: v_mov_b32_e32 v0, s2 6930; GFX12-NEXT: v_mov_b32_e32 v2, s3 6931; GFX12-NEXT: s_lshr_b32 s2, s1, 16 6932; GFX12-NEXT: s_and_b32 s1, s1, 0xffff 6933; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:32 6934; GFX12-NEXT: v_mov_b32_e32 v0, s1 6935; GFX12-NEXT: s_wait_alu 0xfffe 6936; GFX12-NEXT: v_mov_b32_e32 v2, s2 6937; GFX12-NEXT: s_lshr_b32 s1, s0, 16 6938; GFX12-NEXT: s_and_b32 s0, s0, 0xffff 6939; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] offset:16 6940; GFX12-NEXT: s_wait_alu 0xfffe 6941; GFX12-NEXT: v_mov_b32_e32 v0, s0 6942; GFX12-NEXT: v_mov_b32_e32 v2, s1 6943; GFX12-NEXT: global_store_b128 v1, v[0:3], s[8:9] 6944; GFX12-NEXT: s_endpgm 6945 %load = load <16 x i16>, ptr addrspace(4) %in 6946 %ext = zext <16 x i16> %load to <16 x i64> 6947 store <16 x i64> %ext, ptr addrspace(1) %out 6948 ret void 6949} 6950 6951define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 6952; GCN-NOHSA-SI-LABEL: constant_sextload_v16i16_to_v16i64: 6953; GCN-NOHSA-SI: ; %bb.0: 6954; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 6955; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6956; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 6957; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 6958; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 6959; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6960; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, s11 6961; GCN-NOHSA-SI-NEXT: s_mov_b32 s14, s9 6962; GCN-NOHSA-SI-NEXT: s_mov_b32 s16, s7 6963; GCN-NOHSA-SI-NEXT: s_mov_b32 s18, s5 6964; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s10, 16 6965; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s8, 16 6966; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s6, 16 6967; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s4, 16 6968; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[28:29], s[4:5], 0x100000 6969; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[30:31], s[6:7], 0x100000 6970; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[34:35], s[8:9], 0x100000 6971; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[36:37], s[10:11], 0x100000 6972; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[4:5], s[4:5], 48 6973; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[6:7], s[6:7], 48 6974; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[8:9], s[8:9], 48 6975; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[10:11], s[10:11], 48 6976; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x100000 6977; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000 6978; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000 6979; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 6980; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x100000 6981; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x100000 6982; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x100000 6983; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000 6984; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12 6985; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s13 6986; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s10 6987; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s11 6988; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 6989; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6990; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14 6991; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s15 6992; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s8 6993; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s9 6994; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 6995; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6996; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s16 6997; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s17 6998; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6 6999; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7 7000; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 7001; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7002; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18 7003; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 7004; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 7005; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5 7006; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 7007; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7008; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s36 7009; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s37 7010; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s34 7011; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s35 7012; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s30 7013; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s31 7014; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s28 7015; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s29 7016; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s20 7017; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s21 7018; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 7019; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s22 7020; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s23 7021; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64 7022; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s24 7023; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s25 7024; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32 7025; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s26 7026; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s27 7027; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 7028; GCN-NOHSA-SI-NEXT: s_endpgm 7029; 7030; GCN-HSA-LABEL: constant_sextload_v16i16_to_v16i64: 7031; GCN-HSA: ; %bb.0: 7032; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 7033; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 7034; GCN-HSA-NEXT: s_load_dwordx8 s[12:19], s[2:3], 0x0 7035; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 7036; GCN-HSA-NEXT: s_mov_b32 s6, s19 7037; GCN-HSA-NEXT: s_mov_b32 s10, s17 7038; GCN-HSA-NEXT: s_mov_b32 s20, s15 7039; GCN-HSA-NEXT: s_mov_b32 s22, s13 7040; GCN-HSA-NEXT: s_lshr_b32 s24, s18, 16 7041; GCN-HSA-NEXT: s_lshr_b32 s26, s16, 16 7042; GCN-HSA-NEXT: s_lshr_b32 s28, s14, 16 7043; GCN-HSA-NEXT: s_lshr_b32 s30, s12, 16 7044; GCN-HSA-NEXT: s_bfe_i64 s[34:35], s[18:19], 0x100000 7045; GCN-HSA-NEXT: s_ashr_i64 s[18:19], s[18:19], 48 7046; GCN-HSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 7047; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[12:13], 0x100000 7048; GCN-HSA-NEXT: s_bfe_i64 s[4:5], s[14:15], 0x100000 7049; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[16:17], 0x100000 7050; GCN-HSA-NEXT: s_ashr_i64 s[12:13], s[12:13], 48 7051; GCN-HSA-NEXT: s_ashr_i64 s[14:15], s[14:15], 48 7052; GCN-HSA-NEXT: s_ashr_i64 s[16:17], s[16:17], 48 7053; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 7054; GCN-HSA-NEXT: v_mov_b32_e32 v1, s7 7055; GCN-HSA-NEXT: v_mov_b32_e32 v2, s18 7056; GCN-HSA-NEXT: v_mov_b32_e32 v3, s19 7057; GCN-HSA-NEXT: s_bfe_i64 s[6:7], s[30:31], 0x100000 7058; GCN-HSA-NEXT: s_bfe_i64 s[18:19], s[28:29], 0x100000 7059; GCN-HSA-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x100000 7060; GCN-HSA-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x100000 7061; GCN-HSA-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x100000 7062; GCN-HSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000 7063; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000 7064; GCN-HSA-NEXT: s_add_u32 s28, s0, 0x70 7065; GCN-HSA-NEXT: s_addc_u32 s29, s1, 0 7066; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10 7067; GCN-HSA-NEXT: s_add_u32 s10, s0, 0x50 7068; GCN-HSA-NEXT: v_mov_b32_e32 v8, s28 7069; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11 7070; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0 7071; GCN-HSA-NEXT: v_mov_b32_e32 v10, s10 7072; GCN-HSA-NEXT: v_mov_b32_e32 v9, s29 7073; GCN-HSA-NEXT: v_mov_b32_e32 v11, s11 7074; GCN-HSA-NEXT: s_add_u32 s10, s0, 48 7075; GCN-HSA-NEXT: v_mov_b32_e32 v6, s16 7076; GCN-HSA-NEXT: v_mov_b32_e32 v7, s17 7077; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 7078; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 7079; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0 7080; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10 7081; GCN-HSA-NEXT: v_mov_b32_e32 v0, s20 7082; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21 7083; GCN-HSA-NEXT: v_mov_b32_e32 v2, s14 7084; GCN-HSA-NEXT: v_mov_b32_e32 v3, s15 7085; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11 7086; GCN-HSA-NEXT: s_add_u32 s10, s0, 16 7087; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7088; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0 7089; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10 7090; GCN-HSA-NEXT: v_mov_b32_e32 v0, s22 7091; GCN-HSA-NEXT: v_mov_b32_e32 v1, s23 7092; GCN-HSA-NEXT: v_mov_b32_e32 v2, s12 7093; GCN-HSA-NEXT: v_mov_b32_e32 v3, s13 7094; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11 7095; GCN-HSA-NEXT: s_add_u32 s10, s0, 0x60 7096; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7097; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0 7098; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10 7099; GCN-HSA-NEXT: v_mov_b32_e32 v0, s34 7100; GCN-HSA-NEXT: v_mov_b32_e32 v1, s35 7101; GCN-HSA-NEXT: v_mov_b32_e32 v2, s24 7102; GCN-HSA-NEXT: v_mov_b32_e32 v3, s25 7103; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11 7104; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7105; GCN-HSA-NEXT: s_nop 0 7106; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 7107; GCN-HSA-NEXT: s_add_u32 s8, s0, 64 7108; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 7109; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0 7110; GCN-HSA-NEXT: v_mov_b32_e32 v4, s8 7111; GCN-HSA-NEXT: v_mov_b32_e32 v2, s26 7112; GCN-HSA-NEXT: v_mov_b32_e32 v3, s27 7113; GCN-HSA-NEXT: v_mov_b32_e32 v5, s9 7114; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7115; GCN-HSA-NEXT: s_nop 0 7116; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 7117; GCN-HSA-NEXT: s_add_u32 s4, s0, 32 7118; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 7119; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 7120; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 7121; GCN-HSA-NEXT: v_mov_b32_e32 v2, s18 7122; GCN-HSA-NEXT: v_mov_b32_e32 v3, s19 7123; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 7124; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7125; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 7126; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 7127; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 7128; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 7129; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 7130; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 7131; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7132; GCN-HSA-NEXT: s_endpgm 7133; 7134; GCN-NOHSA-VI-LABEL: constant_sextload_v16i16_to_v16i64: 7135; GCN-NOHSA-VI: ; %bb.0: 7136; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 7137; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 7138; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 7139; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 7140; GCN-NOHSA-VI-NEXT: s_mov_b32 s12, s1 7141; GCN-NOHSA-VI-NEXT: s_lshr_b32 s18, s1, 16 7142; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[16:17], s[2:3], 0x100000 7143; GCN-NOHSA-VI-NEXT: s_lshr_b32 s2, s2, 16 7144; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[22:23], s[6:7], 0x100000 7145; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16 7146; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[28:29], s[4:5], 0x100000 7147; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 7148; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[12:13], 0x100000 7149; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[18:19], 0x100000 7150; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[18:19], s[2:3], 0x100000 7151; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, s3 7152; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[24:25], s[6:7], 0x100000 7153; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s7 7154; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[30:31], s[4:5], 0x100000 7155; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s5 7156; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[0:1], 0x100000 7157; GCN-NOHSA-VI-NEXT: s_lshr_b32 s0, s0, 16 7158; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[20:21], s[2:3], 0x100000 7159; GCN-NOHSA-VI-NEXT: s_lshr_b32 s2, s3, 16 7160; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[26:27], s[6:7], 0x100000 7161; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s7, 16 7162; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[34:35], s[4:5], 0x100000 7163; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s5, 16 7164; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x100000 7165; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000 7166; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 7167; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 7168; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 7169; GCN-NOHSA-VI-NEXT: s_add_u32 s4, s8, 0x50 7170; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5 7171; GCN-NOHSA-VI-NEXT: s_addc_u32 s5, s9, 0 7172; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s4 7173; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s34 7174; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s35 7175; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s5 7176; GCN-NOHSA-VI-NEXT: s_add_u32 s4, s8, 64 7177; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7178; GCN-NOHSA-VI-NEXT: s_addc_u32 s5, s9, 0 7179; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s4 7180; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28 7181; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s29 7182; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s30 7183; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s31 7184; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s5 7185; GCN-NOHSA-VI-NEXT: s_add_u32 s4, s8, 0x70 7186; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7187; GCN-NOHSA-VI-NEXT: s_addc_u32 s5, s9, 0 7188; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s4 7189; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s26 7190; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s27 7191; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 7192; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7 7193; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s5 7194; GCN-NOHSA-VI-NEXT: s_add_u32 s4, s8, 0x60 7195; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7196; GCN-NOHSA-VI-NEXT: s_addc_u32 s5, s9, 0 7197; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s4 7198; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s22 7199; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s23 7200; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s24 7201; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s25 7202; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s5 7203; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7204; GCN-NOHSA-VI-NEXT: s_nop 0 7205; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s2 7206; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s8, 48 7207; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s3 7208; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s9, 0 7209; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 7210; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 7211; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s8, 32 7212; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s20 7213; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s21 7214; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s9, 0 7215; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7216; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 7217; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 7218; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s8, 16 7219; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s16 7220; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s17 7221; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s18 7222; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s19 7223; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s9, 0 7224; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7225; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 7226; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 7227; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s15 7228; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s12 7229; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s13 7230; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 7231; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7232; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s8 7233; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 7234; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s11 7235; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s0 7236; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s1 7237; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s9 7238; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7239; GCN-NOHSA-VI-NEXT: s_endpgm 7240; 7241; EG-LABEL: constant_sextload_v16i16_to_v16i64: 7242; EG: ; %bb.0: 7243; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 7244; EG-NEXT: TEX 1 @12 7245; EG-NEXT: ALU 65, @17, KC0[CB0:0-32], KC1[] 7246; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T12.X, 0 7247; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T20.X, 0 7248; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T18.X, 0 7249; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T17.X, 0 7250; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T16.X, 0 7251; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T15.X, 0 7252; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T14.X, 0 7253; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T13.X, 1 7254; EG-NEXT: CF_END 7255; EG-NEXT: Fetch clause starting at 12: 7256; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 7257; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 7258; EG-NEXT: ALU clause starting at 16: 7259; EG-NEXT: MOV * T11.X, KC0[2].Z, 7260; EG-NEXT: ALU clause starting at 17: 7261; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x, 7262; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7263; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7264; EG-NEXT: LSHR T14.X, PV.W, literal.x, 7265; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7266; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 7267; EG-NEXT: LSHR T15.X, PV.W, literal.x, 7268; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7269; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 7270; EG-NEXT: LSHR T16.X, PV.W, literal.x, 7271; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7272; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 7273; EG-NEXT: LSHR T17.X, PV.W, literal.x, 7274; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7275; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 7276; EG-NEXT: LSHR T18.X, PV.W, literal.x, 7277; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, 7278; EG-NEXT: ASHR * T19.W, T11.X, literal.z, 7279; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 7280; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7281; EG-NEXT: LSHR T20.X, PV.W, literal.x, 7282; EG-NEXT: ASHR T19.Z, T11.X, literal.y, 7283; EG-NEXT: ASHR * T21.W, T11.Y, literal.z, 7284; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7285; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7286; EG-NEXT: BFE_INT T19.X, T11.X, 0.0, literal.x, 7287; EG-NEXT: ASHR T21.Z, T11.Y, literal.x, 7288; EG-NEXT: ASHR * T22.W, T11.Z, literal.y, 7289; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7290; EG-NEXT: BFE_INT T21.X, T11.Y, 0.0, literal.x, 7291; EG-NEXT: ASHR T19.Y, PV.X, literal.y, 7292; EG-NEXT: ASHR T22.Z, T11.Z, literal.x, 7293; EG-NEXT: ASHR * T23.W, T11.W, literal.y, 7294; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7295; EG-NEXT: BFE_INT T22.X, T11.Z, 0.0, literal.x, 7296; EG-NEXT: ASHR T21.Y, PV.X, literal.y, 7297; EG-NEXT: ASHR T23.Z, T11.W, literal.x, 7298; EG-NEXT: ASHR * T24.W, T12.X, literal.y, 7299; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7300; EG-NEXT: BFE_INT T23.X, T11.W, 0.0, literal.x, 7301; EG-NEXT: ASHR T22.Y, PV.X, literal.y, 7302; EG-NEXT: ASHR T24.Z, T12.X, literal.x, 7303; EG-NEXT: ASHR * T11.W, T12.Y, literal.y, 7304; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7305; EG-NEXT: BFE_INT T24.X, T12.X, 0.0, literal.x, 7306; EG-NEXT: ASHR T23.Y, PV.X, literal.y, 7307; EG-NEXT: ASHR T11.Z, T12.Y, literal.x, 7308; EG-NEXT: ASHR * T25.W, T12.Z, literal.y, 7309; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7310; EG-NEXT: BFE_INT T11.X, T12.Y, 0.0, literal.x, 7311; EG-NEXT: ASHR T24.Y, PV.X, literal.y, 7312; EG-NEXT: ASHR T25.Z, T12.Z, literal.x, 7313; EG-NEXT: ASHR * T26.W, T12.W, literal.y, 7314; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7315; EG-NEXT: BFE_INT T25.X, T12.Z, 0.0, literal.x, 7316; EG-NEXT: ASHR T11.Y, PV.X, literal.y, 7317; EG-NEXT: ASHR * T26.Z, T12.W, literal.x, 7318; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7319; EG-NEXT: BFE_INT T26.X, T12.W, 0.0, literal.x, 7320; EG-NEXT: ASHR T25.Y, PV.X, literal.y, 7321; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 7322; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7323; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 7324; EG-NEXT: LSHR T12.X, PV.W, literal.x, 7325; EG-NEXT: ASHR * T26.Y, PV.X, literal.y, 7326; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 7327; 7328; GFX12-LABEL: constant_sextload_v16i16_to_v16i64: 7329; GFX12: ; %bb.0: 7330; GFX12-NEXT: s_load_b128 s[8:11], s[4:5], 0x24 7331; GFX12-NEXT: s_wait_kmcnt 0x0 7332; GFX12-NEXT: s_load_b256 s[0:7], s[10:11], 0x0 7333; GFX12-NEXT: s_wait_kmcnt 0x0 7334; GFX12-NEXT: s_mov_b32 s30, s5 7335; GFX12-NEXT: s_lshr_b32 s34, s5, 16 7336; GFX12-NEXT: s_bfe_i64 s[28:29], s[4:5], 0x100000 7337; GFX12-NEXT: s_lshr_b32 s4, s4, 16 7338; GFX12-NEXT: s_bfe_i64 s[22:23], s[6:7], 0x100000 7339; GFX12-NEXT: s_mov_b32 s24, s7 7340; GFX12-NEXT: s_lshr_b32 s26, s7, 16 7341; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x100000 7342; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x100000 7343; GFX12-NEXT: s_lshr_b32 s6, s6, 16 7344; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 7345; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s29 7346; GFX12-NEXT: s_mov_b32 s18, s3 7347; GFX12-NEXT: s_lshr_b32 s20, s3, 16 7348; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x100000 7349; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x100000 7350; GFX12-NEXT: v_dual_mov_b32 v4, s22 :: v_dual_mov_b32 v9, s31 7351; GFX12-NEXT: v_dual_mov_b32 v8, s30 :: v_dual_mov_b32 v11, s35 7352; GFX12-NEXT: v_dual_mov_b32 v10, s34 :: v_dual_mov_b32 v3, s5 7353; GFX12-NEXT: s_bfe_i64 s[16:17], s[2:3], 0x100000 7354; GFX12-NEXT: s_lshr_b32 s2, s2, 16 7355; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 7356; GFX12-NEXT: v_dual_mov_b32 v0, s28 :: v_dual_mov_b32 v5, s23 7357; GFX12-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v13, s25 7358; GFX12-NEXT: s_mov_b32 s12, s1 7359; GFX12-NEXT: s_lshr_b32 s14, s1, 16 7360; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x100000 7361; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000 7362; GFX12-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v15, s27 7363; GFX12-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v7, s7 7364; GFX12-NEXT: s_bfe_i64 s[10:11], s[0:1], 0x100000 7365; GFX12-NEXT: s_lshr_b32 s0, s0, 16 7366; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000 7367; GFX12-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v17, s19 7368; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 7369; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000 7370; GFX12-NEXT: v_dual_mov_b32 v16, s18 :: v_dual_mov_b32 v19, s21 7371; GFX12-NEXT: v_mov_b32_e32 v18, s20 7372; GFX12-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x100000 7373; GFX12-NEXT: s_clause 0x1 7374; GFX12-NEXT: global_store_b128 v24, v[8:11], s[8:9] offset:80 7375; GFX12-NEXT: global_store_b128 v24, v[0:3], s[8:9] offset:64 7376; GFX12-NEXT: v_dual_mov_b32 v1, s17 :: v_dual_mov_b32 v0, s16 7377; GFX12-NEXT: v_dual_mov_b32 v3, s3 :: v_dual_mov_b32 v2, s2 7378; GFX12-NEXT: v_dual_mov_b32 v9, s13 :: v_dual_mov_b32 v8, s12 7379; GFX12-NEXT: v_dual_mov_b32 v11, s15 :: v_dual_mov_b32 v10, s14 7380; GFX12-NEXT: v_dual_mov_b32 v21, s11 :: v_dual_mov_b32 v20, s10 7381; GFX12-NEXT: v_dual_mov_b32 v23, s1 :: v_dual_mov_b32 v22, s0 7382; GFX12-NEXT: s_clause 0x5 7383; GFX12-NEXT: global_store_b128 v24, v[12:15], s[8:9] offset:112 7384; GFX12-NEXT: global_store_b128 v24, v[4:7], s[8:9] offset:96 7385; GFX12-NEXT: global_store_b128 v24, v[16:19], s[8:9] offset:48 7386; GFX12-NEXT: global_store_b128 v24, v[0:3], s[8:9] offset:32 7387; GFX12-NEXT: global_store_b128 v24, v[8:11], s[8:9] offset:16 7388; GFX12-NEXT: global_store_b128 v24, v[20:23], s[8:9] 7389; GFX12-NEXT: s_endpgm 7390 %load = load <16 x i16>, ptr addrspace(4) %in 7391 %ext = sext <16 x i16> %load to <16 x i64> 7392 store <16 x i64> %ext, ptr addrspace(1) %out 7393 ret void 7394} 7395 7396define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 7397; GCN-NOHSA-SI-LABEL: constant_zextload_v32i16_to_v32i64: 7398; GCN-NOHSA-SI: ; %bb.0: 7399; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x9 7400; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 7401; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 7402; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 7403; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s1, 16 7404; GCN-NOHSA-SI-NEXT: s_lshr_b32 s19, s3, 16 7405; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s5, 16 7406; GCN-NOHSA-SI-NEXT: s_lshr_b32 s21, s7, 16 7407; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s9, 16 7408; GCN-NOHSA-SI-NEXT: s_lshr_b32 s23, s11, 16 7409; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s13, 16 7410; GCN-NOHSA-SI-NEXT: s_lshr_b32 s25, s15, 16 7411; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s14, 16 7412; GCN-NOHSA-SI-NEXT: s_lshr_b32 s27, s12, 16 7413; GCN-NOHSA-SI-NEXT: s_lshr_b32 s28, s10, 16 7414; GCN-NOHSA-SI-NEXT: s_lshr_b32 s29, s8, 16 7415; GCN-NOHSA-SI-NEXT: s_lshr_b32 s30, s6, 16 7416; GCN-NOHSA-SI-NEXT: s_lshr_b32 s31, s4, 16 7417; GCN-NOHSA-SI-NEXT: s_lshr_b32 s33, s2, 16 7418; GCN-NOHSA-SI-NEXT: s_lshr_b32 s34, s0, 16 7419; GCN-NOHSA-SI-NEXT: s_and_b32 s35, s0, 0xffff 7420; GCN-NOHSA-SI-NEXT: s_and_b32 s36, s2, 0xffff 7421; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, 0xffff 7422; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, 0xffff 7423; GCN-NOHSA-SI-NEXT: s_and_b32 s8, s8, 0xffff 7424; GCN-NOHSA-SI-NEXT: s_and_b32 s10, s10, 0xffff 7425; GCN-NOHSA-SI-NEXT: s_and_b32 s12, s12, 0xffff 7426; GCN-NOHSA-SI-NEXT: s_and_b32 s14, s14, 0xffff 7427; GCN-NOHSA-SI-NEXT: s_and_b32 s37, s1, 0xffff 7428; GCN-NOHSA-SI-NEXT: s_and_b32 s38, s3, 0xffff 7429; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, 0xffff 7430; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, 0xffff 7431; GCN-NOHSA-SI-NEXT: s_and_b32 s9, s9, 0xffff 7432; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, 0xffff 7433; GCN-NOHSA-SI-NEXT: s_and_b32 s13, s13, 0xffff 7434; GCN-NOHSA-SI-NEXT: s_and_b32 s15, s15, 0xffff 7435; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 7436; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 7437; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 7438; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 7439; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s16 7440; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s17 7441; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s15 7442; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s25 7443; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 7444; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7445; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s13 7446; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s24 7447; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208 7448; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7449; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s11 7450; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s23 7451; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176 7452; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7453; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s9 7454; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s22 7455; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 7456; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7457; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s7 7458; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s21 7459; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 7460; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7461; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 7462; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s20 7463; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 7464; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7465; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s38 7466; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s19 7467; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 7468; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7469; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s37 7470; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s18 7471; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 7472; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7473; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14 7474; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s26 7475; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 7476; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7477; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12 7478; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s27 7479; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192 7480; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7481; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 7482; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s28 7483; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 7484; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7485; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 7486; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s29 7487; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 7488; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7489; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 7490; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s30 7491; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 7492; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7493; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 7494; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s31 7495; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 7496; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7497; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s36 7498; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s33 7499; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 7500; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7501; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s35 7502; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s34 7503; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 7504; GCN-NOHSA-SI-NEXT: s_endpgm 7505; 7506; GCN-HSA-LABEL: constant_zextload_v32i16_to_v32i64: 7507; GCN-HSA: ; %bb.0: 7508; GCN-HSA-NEXT: s_load_dwordx4 s[16:19], s[8:9], 0x0 7509; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 7510; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 7511; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 7512; GCN-HSA-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 7513; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 7514; GCN-HSA-NEXT: s_lshr_b32 s20, s1, 16 7515; GCN-HSA-NEXT: s_lshr_b32 s21, s3, 16 7516; GCN-HSA-NEXT: s_lshr_b32 s22, s5, 16 7517; GCN-HSA-NEXT: s_lshr_b32 s23, s7, 16 7518; GCN-HSA-NEXT: s_lshr_b32 s24, s9, 16 7519; GCN-HSA-NEXT: s_lshr_b32 s25, s11, 16 7520; GCN-HSA-NEXT: s_lshr_b32 s26, s13, 16 7521; GCN-HSA-NEXT: s_lshr_b32 s27, s15, 16 7522; GCN-HSA-NEXT: s_lshr_b32 s28, s14, 16 7523; GCN-HSA-NEXT: s_lshr_b32 s29, s12, 16 7524; GCN-HSA-NEXT: s_lshr_b32 s30, s10, 16 7525; GCN-HSA-NEXT: s_lshr_b32 s31, s8, 16 7526; GCN-HSA-NEXT: s_lshr_b32 s33, s6, 16 7527; GCN-HSA-NEXT: s_lshr_b32 s34, s4, 16 7528; GCN-HSA-NEXT: s_lshr_b32 s19, s2, 16 7529; GCN-HSA-NEXT: s_lshr_b32 s18, s0, 16 7530; GCN-HSA-NEXT: s_and_b32 s0, s0, 0xffff 7531; GCN-HSA-NEXT: s_and_b32 s2, s2, 0xffff 7532; GCN-HSA-NEXT: s_and_b32 s35, s4, 0xffff 7533; GCN-HSA-NEXT: s_and_b32 s6, s6, 0xffff 7534; GCN-HSA-NEXT: s_and_b32 s8, s8, 0xffff 7535; GCN-HSA-NEXT: s_and_b32 s10, s10, 0xffff 7536; GCN-HSA-NEXT: s_and_b32 s12, s12, 0xffff 7537; GCN-HSA-NEXT: s_and_b32 s14, s14, 0xffff 7538; GCN-HSA-NEXT: s_and_b32 s1, s1, 0xffff 7539; GCN-HSA-NEXT: s_and_b32 s3, s3, 0xffff 7540; GCN-HSA-NEXT: s_and_b32 s36, s5, 0xffff 7541; GCN-HSA-NEXT: s_and_b32 s7, s7, 0xffff 7542; GCN-HSA-NEXT: s_and_b32 s9, s9, 0xffff 7543; GCN-HSA-NEXT: s_and_b32 s11, s11, 0xffff 7544; GCN-HSA-NEXT: s_and_b32 s13, s13, 0xffff 7545; GCN-HSA-NEXT: s_and_b32 s15, s15, 0xffff 7546; GCN-HSA-NEXT: s_add_u32 s4, s16, 0xf0 7547; GCN-HSA-NEXT: s_addc_u32 s5, s17, 0 7548; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 7549; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 7550; GCN-HSA-NEXT: s_add_u32 s4, s16, 0xd0 7551; GCN-HSA-NEXT: s_addc_u32 s5, s17, 0 7552; GCN-HSA-NEXT: v_mov_b32_e32 v7, s5 7553; GCN-HSA-NEXT: v_mov_b32_e32 v6, s4 7554; GCN-HSA-NEXT: s_add_u32 s4, s16, 0xb0 7555; GCN-HSA-NEXT: s_addc_u32 s5, s17, 0 7556; GCN-HSA-NEXT: v_mov_b32_e32 v9, s5 7557; GCN-HSA-NEXT: v_mov_b32_e32 v8, s4 7558; GCN-HSA-NEXT: s_add_u32 s4, s16, 0x90 7559; GCN-HSA-NEXT: s_addc_u32 s5, s17, 0 7560; GCN-HSA-NEXT: v_mov_b32_e32 v0, s15 7561; GCN-HSA-NEXT: v_mov_b32_e32 v2, s27 7562; GCN-HSA-NEXT: v_mov_b32_e32 v11, s5 7563; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7564; GCN-HSA-NEXT: v_mov_b32_e32 v10, s4 7565; GCN-HSA-NEXT: v_mov_b32_e32 v0, s13 7566; GCN-HSA-NEXT: v_mov_b32_e32 v2, s26 7567; GCN-HSA-NEXT: flat_store_dwordx4 v[6:7], v[0:3] 7568; GCN-HSA-NEXT: s_add_u32 s4, s16, 0x70 7569; GCN-HSA-NEXT: v_mov_b32_e32 v0, s11 7570; GCN-HSA-NEXT: v_mov_b32_e32 v2, s25 7571; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 7572; GCN-HSA-NEXT: s_addc_u32 s5, s17, 0 7573; GCN-HSA-NEXT: v_mov_b32_e32 v0, s9 7574; GCN-HSA-NEXT: v_mov_b32_e32 v2, s24 7575; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 7576; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[0:3] 7577; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 7578; GCN-HSA-NEXT: v_mov_b32_e32 v0, s7 7579; GCN-HSA-NEXT: v_mov_b32_e32 v2, s23 7580; GCN-HSA-NEXT: s_add_u32 s4, s16, 0x50 7581; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7582; GCN-HSA-NEXT: s_addc_u32 s5, s17, 0 7583; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 7584; GCN-HSA-NEXT: v_mov_b32_e32 v0, s36 7585; GCN-HSA-NEXT: v_mov_b32_e32 v2, s22 7586; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 7587; GCN-HSA-NEXT: s_add_u32 s4, s16, 48 7588; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7589; GCN-HSA-NEXT: s_addc_u32 s5, s17, 0 7590; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 7591; GCN-HSA-NEXT: v_mov_b32_e32 v0, s3 7592; GCN-HSA-NEXT: v_mov_b32_e32 v2, s21 7593; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 7594; GCN-HSA-NEXT: s_add_u32 s4, s16, 16 7595; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7596; GCN-HSA-NEXT: s_addc_u32 s5, s17, 0 7597; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 7598; GCN-HSA-NEXT: v_mov_b32_e32 v0, s1 7599; GCN-HSA-NEXT: v_mov_b32_e32 v2, s20 7600; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 7601; GCN-HSA-NEXT: s_add_u32 s4, s16, 0xe0 7602; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7603; GCN-HSA-NEXT: s_addc_u32 s5, s17, 0 7604; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 7605; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14 7606; GCN-HSA-NEXT: v_mov_b32_e32 v2, s28 7607; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 7608; GCN-HSA-NEXT: s_add_u32 s4, s16, 0xc0 7609; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7610; GCN-HSA-NEXT: s_addc_u32 s5, s17, 0 7611; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 7612; GCN-HSA-NEXT: v_mov_b32_e32 v0, s12 7613; GCN-HSA-NEXT: v_mov_b32_e32 v2, s29 7614; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 7615; GCN-HSA-NEXT: s_add_u32 s4, s16, 0xa0 7616; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7617; GCN-HSA-NEXT: s_addc_u32 s5, s17, 0 7618; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 7619; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 7620; GCN-HSA-NEXT: v_mov_b32_e32 v2, s30 7621; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 7622; GCN-HSA-NEXT: s_add_u32 s4, s16, 0x80 7623; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7624; GCN-HSA-NEXT: s_addc_u32 s5, s17, 0 7625; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 7626; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 7627; GCN-HSA-NEXT: v_mov_b32_e32 v2, s31 7628; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 7629; GCN-HSA-NEXT: s_add_u32 s4, s16, 0x60 7630; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7631; GCN-HSA-NEXT: s_addc_u32 s5, s17, 0 7632; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 7633; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 7634; GCN-HSA-NEXT: v_mov_b32_e32 v2, s33 7635; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 7636; GCN-HSA-NEXT: s_add_u32 s4, s16, 64 7637; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7638; GCN-HSA-NEXT: s_addc_u32 s5, s17, 0 7639; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 7640; GCN-HSA-NEXT: v_mov_b32_e32 v0, s35 7641; GCN-HSA-NEXT: v_mov_b32_e32 v2, s34 7642; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 7643; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7644; GCN-HSA-NEXT: s_nop 0 7645; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 7646; GCN-HSA-NEXT: s_add_u32 s2, s16, 32 7647; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 7648; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 7649; GCN-HSA-NEXT: v_mov_b32_e32 v2, s19 7650; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 7651; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7652; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16 7653; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 7654; GCN-HSA-NEXT: v_mov_b32_e32 v2, s18 7655; GCN-HSA-NEXT: v_mov_b32_e32 v5, s17 7656; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7657; GCN-HSA-NEXT: s_endpgm 7658; 7659; GCN-NOHSA-VI-LABEL: constant_zextload_v32i16_to_v32i64: 7660; GCN-NOHSA-VI: ; %bb.0: 7661; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x24 7662; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 7663; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 7664; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 7665; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 7666; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 7667; GCN-NOHSA-VI-NEXT: s_lshr_b32 s18, s0, 16 7668; GCN-NOHSA-VI-NEXT: s_and_b32 s0, s0, 0xffff 7669; GCN-NOHSA-VI-NEXT: s_lshr_b32 s19, s1, 16 7670; GCN-NOHSA-VI-NEXT: s_and_b32 s1, s1, 0xffff 7671; GCN-NOHSA-VI-NEXT: s_lshr_b32 s20, s2, 16 7672; GCN-NOHSA-VI-NEXT: s_and_b32 s2, s2, 0xffff 7673; GCN-NOHSA-VI-NEXT: s_lshr_b32 s21, s3, 16 7674; GCN-NOHSA-VI-NEXT: s_and_b32 s3, s3, 0xffff 7675; GCN-NOHSA-VI-NEXT: s_lshr_b32 s22, s4, 16 7676; GCN-NOHSA-VI-NEXT: s_and_b32 s23, s4, 0xffff 7677; GCN-NOHSA-VI-NEXT: s_lshr_b32 s24, s5, 16 7678; GCN-NOHSA-VI-NEXT: s_and_b32 s25, s5, 0xffff 7679; GCN-NOHSA-VI-NEXT: s_lshr_b32 s26, s6, 16 7680; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, 0xffff 7681; GCN-NOHSA-VI-NEXT: s_lshr_b32 s27, s7, 16 7682; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s7, 0xffff 7683; GCN-NOHSA-VI-NEXT: s_lshr_b32 s28, s8, 16 7684; GCN-NOHSA-VI-NEXT: s_and_b32 s8, s8, 0xffff 7685; GCN-NOHSA-VI-NEXT: s_lshr_b32 s29, s9, 16 7686; GCN-NOHSA-VI-NEXT: s_and_b32 s9, s9, 0xffff 7687; GCN-NOHSA-VI-NEXT: s_lshr_b32 s30, s10, 16 7688; GCN-NOHSA-VI-NEXT: s_and_b32 s10, s10, 0xffff 7689; GCN-NOHSA-VI-NEXT: s_lshr_b32 s31, s11, 16 7690; GCN-NOHSA-VI-NEXT: s_and_b32 s11, s11, 0xffff 7691; GCN-NOHSA-VI-NEXT: s_lshr_b32 s33, s12, 16 7692; GCN-NOHSA-VI-NEXT: s_and_b32 s12, s12, 0xffff 7693; GCN-NOHSA-VI-NEXT: s_lshr_b32 s34, s13, 16 7694; GCN-NOHSA-VI-NEXT: s_and_b32 s13, s13, 0xffff 7695; GCN-NOHSA-VI-NEXT: s_lshr_b32 s35, s14, 16 7696; GCN-NOHSA-VI-NEXT: s_and_b32 s14, s14, 0xffff 7697; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s15, 16 7698; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s15, 0xffff 7699; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 7700; GCN-NOHSA-VI-NEXT: s_add_u32 s4, s16, 0xf0 7701; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s5 7702; GCN-NOHSA-VI-NEXT: s_addc_u32 s5, s17, 0 7703; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s4 7704; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s5 7705; GCN-NOHSA-VI-NEXT: s_add_u32 s4, s16, 0xe0 7706; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7707; GCN-NOHSA-VI-NEXT: s_addc_u32 s5, s17, 0 7708; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s4 7709; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 7710; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s35 7711; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s5 7712; GCN-NOHSA-VI-NEXT: s_add_u32 s4, s16, 0xd0 7713; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7714; GCN-NOHSA-VI-NEXT: s_addc_u32 s5, s17, 0 7715; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s4 7716; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s13 7717; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s34 7718; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s5 7719; GCN-NOHSA-VI-NEXT: s_add_u32 s4, s16, 0xc0 7720; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7721; GCN-NOHSA-VI-NEXT: s_addc_u32 s5, s17, 0 7722; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s4 7723; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 7724; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s33 7725; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s5 7726; GCN-NOHSA-VI-NEXT: s_add_u32 s4, s16, 0xb0 7727; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7728; GCN-NOHSA-VI-NEXT: s_addc_u32 s5, s17, 0 7729; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s4 7730; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s11 7731; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s31 7732; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s5 7733; GCN-NOHSA-VI-NEXT: s_add_u32 s4, s16, 0xa0 7734; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7735; GCN-NOHSA-VI-NEXT: s_addc_u32 s5, s17, 0 7736; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s4 7737; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 7738; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s30 7739; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s5 7740; GCN-NOHSA-VI-NEXT: s_add_u32 s4, s16, 0x90 7741; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7742; GCN-NOHSA-VI-NEXT: s_addc_u32 s5, s17, 0 7743; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s4 7744; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s9 7745; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s29 7746; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s5 7747; GCN-NOHSA-VI-NEXT: s_add_u32 s4, s16, 0x80 7748; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7749; GCN-NOHSA-VI-NEXT: s_addc_u32 s5, s17, 0 7750; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s4 7751; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 7752; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s28 7753; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s5 7754; GCN-NOHSA-VI-NEXT: s_add_u32 s4, s16, 0x70 7755; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7756; GCN-NOHSA-VI-NEXT: s_addc_u32 s5, s17, 0 7757; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s4 7758; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s7 7759; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s27 7760; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s5 7761; GCN-NOHSA-VI-NEXT: s_add_u32 s4, s16, 0x60 7762; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7763; GCN-NOHSA-VI-NEXT: s_addc_u32 s5, s17, 0 7764; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s4 7765; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 7766; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s26 7767; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s5 7768; GCN-NOHSA-VI-NEXT: s_add_u32 s4, s16, 0x50 7769; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7770; GCN-NOHSA-VI-NEXT: s_addc_u32 s5, s17, 0 7771; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s4 7772; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s25 7773; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s24 7774; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s5 7775; GCN-NOHSA-VI-NEXT: s_add_u32 s4, s16, 64 7776; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7777; GCN-NOHSA-VI-NEXT: s_addc_u32 s5, s17, 0 7778; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s4 7779; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s23 7780; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s22 7781; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s5 7782; GCN-NOHSA-VI-NEXT: s_add_u32 s4, s16, 48 7783; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7784; GCN-NOHSA-VI-NEXT: s_addc_u32 s5, s17, 0 7785; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s4 7786; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s3 7787; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s21 7788; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s5 7789; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7790; GCN-NOHSA-VI-NEXT: s_nop 0 7791; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 7792; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s16, 32 7793; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s17, 0 7794; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 7795; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 7796; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s16, 16 7797; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s20 7798; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s17, 0 7799; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7800; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 7801; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s1 7802; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s19 7803; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 7804; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7805; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s16 7806; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 7807; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s18 7808; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s17 7809; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7810; GCN-NOHSA-VI-NEXT: s_endpgm 7811; 7812; EG-LABEL: constant_zextload_v32i16_to_v32i64: 7813; EG: ; %bb.0: 7814; EG-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[] 7815; EG-NEXT: TEX 2 @22 7816; EG-NEXT: ALU 33, @31, KC0[], KC1[] 7817; EG-NEXT: TEX 0 @28 7818; EG-NEXT: ALU 92, @65, KC0[CB0:0-32], KC1[] 7819; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T50.X, 0 7820; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T49.X, 0 7821; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T48.X, 0 7822; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T47.X, 0 7823; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T46.X, 0 7824; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T45.X, 0 7825; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T44.X, 0 7826; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T43.X, 0 7827; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T42.X, 0 7828; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T41.X, 0 7829; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T40.X, 0 7830; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T39.X, 0 7831; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T38.X, 0 7832; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T37.X, 0 7833; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T36.X, 0 7834; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T35.X, 1 7835; EG-NEXT: CF_END 7836; EG-NEXT: Fetch clause starting at 22: 7837; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 48, #1 7838; EG-NEXT: VTX_READ_128 T21.XYZW, T19.X, 16, #1 7839; EG-NEXT: VTX_READ_128 T22.XYZW, T19.X, 32, #1 7840; EG-NEXT: Fetch clause starting at 28: 7841; EG-NEXT: VTX_READ_128 T29.XYZW, T19.X, 0, #1 7842; EG-NEXT: ALU clause starting at 30: 7843; EG-NEXT: MOV * T19.X, KC0[2].Z, 7844; EG-NEXT: ALU clause starting at 31: 7845; EG-NEXT: LSHR * T23.Z, T20.W, literal.x, 7846; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7847; EG-NEXT: AND_INT T23.X, T20.W, literal.x, 7848; EG-NEXT: MOV T23.Y, 0.0, 7849; EG-NEXT: LSHR T24.Z, T20.Z, literal.y, 7850; EG-NEXT: AND_INT * T24.X, T20.Z, literal.x, 7851; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7852; EG-NEXT: MOV T24.Y, 0.0, 7853; EG-NEXT: LSHR * T25.Z, T20.Y, literal.x, 7854; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7855; EG-NEXT: AND_INT T25.X, T20.Y, literal.x, 7856; EG-NEXT: MOV T25.Y, 0.0, 7857; EG-NEXT: LSHR T20.Z, T20.X, literal.y, 7858; EG-NEXT: AND_INT * T20.X, T20.X, literal.x, 7859; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7860; EG-NEXT: MOV T20.Y, 0.0, 7861; EG-NEXT: LSHR * T26.Z, T22.W, literal.x, 7862; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7863; EG-NEXT: AND_INT T26.X, T22.W, literal.x, 7864; EG-NEXT: MOV T26.Y, 0.0, 7865; EG-NEXT: LSHR T27.Z, T22.Z, literal.y, 7866; EG-NEXT: AND_INT * T27.X, T22.Z, literal.x, 7867; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7868; EG-NEXT: MOV T27.Y, 0.0, 7869; EG-NEXT: LSHR * T28.Z, T22.Y, literal.x, 7870; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7871; EG-NEXT: AND_INT T28.X, T22.Y, literal.x, 7872; EG-NEXT: MOV T28.Y, 0.0, 7873; EG-NEXT: LSHR T22.Z, T22.X, literal.y, 7874; EG-NEXT: AND_INT * T22.X, T22.X, literal.x, 7875; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7876; EG-NEXT: MOV T22.Y, 0.0, 7877; EG-NEXT: LSHR * T19.Z, T21.W, literal.x, 7878; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7879; EG-NEXT: ALU clause starting at 65: 7880; EG-NEXT: AND_INT T19.X, T21.W, literal.x, 7881; EG-NEXT: MOV T19.Y, 0.0, 7882; EG-NEXT: LSHR T30.Z, T21.Z, literal.y, 7883; EG-NEXT: AND_INT * T30.X, T21.Z, literal.x, 7884; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7885; EG-NEXT: MOV T30.Y, 0.0, 7886; EG-NEXT: LSHR * T31.Z, T21.Y, literal.x, 7887; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7888; EG-NEXT: AND_INT T31.X, T21.Y, literal.x, 7889; EG-NEXT: MOV T31.Y, 0.0, 7890; EG-NEXT: LSHR T21.Z, T21.X, literal.y, 7891; EG-NEXT: AND_INT * T21.X, T21.X, literal.x, 7892; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7893; EG-NEXT: MOV T21.Y, 0.0, 7894; EG-NEXT: LSHR * T32.Z, T29.W, literal.x, 7895; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7896; EG-NEXT: AND_INT T32.X, T29.W, literal.x, 7897; EG-NEXT: MOV T32.Y, 0.0, 7898; EG-NEXT: LSHR T33.Z, T29.Z, literal.y, 7899; EG-NEXT: AND_INT * T33.X, T29.Z, literal.x, 7900; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7901; EG-NEXT: MOV T33.Y, 0.0, 7902; EG-NEXT: LSHR * T34.Z, T29.Y, literal.x, 7903; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7904; EG-NEXT: AND_INT T34.X, T29.Y, literal.x, 7905; EG-NEXT: MOV T34.Y, 0.0, 7906; EG-NEXT: LSHR T29.Z, T29.X, literal.y, 7907; EG-NEXT: AND_INT * T29.X, T29.X, literal.x, 7908; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7909; EG-NEXT: MOV T29.Y, 0.0, 7910; EG-NEXT: MOV T23.W, 0.0, 7911; EG-NEXT: MOV * T24.W, 0.0, 7912; EG-NEXT: MOV T25.W, 0.0, 7913; EG-NEXT: MOV * T20.W, 0.0, 7914; EG-NEXT: MOV T26.W, 0.0, 7915; EG-NEXT: MOV * T27.W, 0.0, 7916; EG-NEXT: MOV T28.W, 0.0, 7917; EG-NEXT: MOV * T22.W, 0.0, 7918; EG-NEXT: MOV T19.W, 0.0, 7919; EG-NEXT: MOV * T30.W, 0.0, 7920; EG-NEXT: MOV T31.W, 0.0, 7921; EG-NEXT: MOV * T21.W, 0.0, 7922; EG-NEXT: MOV T32.W, 0.0, 7923; EG-NEXT: MOV * T33.W, 0.0, 7924; EG-NEXT: MOV T34.W, 0.0, 7925; EG-NEXT: MOV * T29.W, 0.0, 7926; EG-NEXT: LSHR T35.X, KC0[2].Y, literal.x, 7927; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7928; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7929; EG-NEXT: LSHR T36.X, PV.W, literal.x, 7930; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7931; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 7932; EG-NEXT: LSHR T37.X, PV.W, literal.x, 7933; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7934; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 7935; EG-NEXT: LSHR T38.X, PV.W, literal.x, 7936; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7937; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 7938; EG-NEXT: LSHR T39.X, PV.W, literal.x, 7939; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7940; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 7941; EG-NEXT: LSHR T40.X, PV.W, literal.x, 7942; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7943; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 7944; EG-NEXT: LSHR T41.X, PV.W, literal.x, 7945; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7946; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 7947; EG-NEXT: LSHR T42.X, PV.W, literal.x, 7948; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7949; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 7950; EG-NEXT: LSHR T43.X, PV.W, literal.x, 7951; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7952; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 7953; EG-NEXT: LSHR T44.X, PV.W, literal.x, 7954; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7955; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 7956; EG-NEXT: LSHR T45.X, PV.W, literal.x, 7957; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7958; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 7959; EG-NEXT: LSHR T46.X, PV.W, literal.x, 7960; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7961; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43) 7962; EG-NEXT: LSHR T47.X, PV.W, literal.x, 7963; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7964; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43) 7965; EG-NEXT: LSHR T48.X, PV.W, literal.x, 7966; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7967; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43) 7968; EG-NEXT: LSHR T49.X, PV.W, literal.x, 7969; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7970; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43) 7971; EG-NEXT: LSHR * T50.X, PV.W, literal.x, 7972; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 7973; 7974; GFX12-LABEL: constant_zextload_v32i16_to_v32i64: 7975; GFX12: ; %bb.0: 7976; GFX12-NEXT: s_load_b128 s[16:19], s[4:5], 0x24 7977; GFX12-NEXT: s_wait_kmcnt 0x0 7978; GFX12-NEXT: s_load_b512 s[0:15], s[18:19], 0x0 7979; GFX12-NEXT: s_wait_kmcnt 0x0 7980; GFX12-NEXT: s_and_b32 s18, s15, 0xffff 7981; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 7982; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s18 7983; GFX12-NEXT: s_lshr_b32 s15, s15, 16 7984; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 7985; GFX12-NEXT: v_dual_mov_b32 v2, s15 :: v_dual_mov_b32 v3, v1 7986; GFX12-NEXT: s_lshr_b32 s15, s14, 16 7987; GFX12-NEXT: s_and_b32 s14, s14, 0xffff 7988; GFX12-NEXT: global_store_b128 v1, v[0:3], s[16:17] offset:240 7989; GFX12-NEXT: s_wait_alu 0xfffe 7990; GFX12-NEXT: v_mov_b32_e32 v0, s14 7991; GFX12-NEXT: v_mov_b32_e32 v2, s15 7992; GFX12-NEXT: s_lshr_b32 s14, s13, 16 7993; GFX12-NEXT: s_and_b32 s13, s13, 0xffff 7994; GFX12-NEXT: global_store_b128 v1, v[0:3], s[16:17] offset:224 7995; GFX12-NEXT: v_mov_b32_e32 v0, s13 7996; GFX12-NEXT: s_wait_alu 0xfffe 7997; GFX12-NEXT: v_mov_b32_e32 v2, s14 7998; GFX12-NEXT: s_lshr_b32 s13, s12, 16 7999; GFX12-NEXT: s_and_b32 s12, s12, 0xffff 8000; GFX12-NEXT: global_store_b128 v1, v[0:3], s[16:17] offset:208 8001; GFX12-NEXT: s_wait_alu 0xfffe 8002; GFX12-NEXT: v_mov_b32_e32 v0, s12 8003; GFX12-NEXT: v_mov_b32_e32 v2, s13 8004; GFX12-NEXT: s_lshr_b32 s12, s11, 16 8005; GFX12-NEXT: s_and_b32 s11, s11, 0xffff 8006; GFX12-NEXT: global_store_b128 v1, v[0:3], s[16:17] offset:192 8007; GFX12-NEXT: v_mov_b32_e32 v0, s11 8008; GFX12-NEXT: s_wait_alu 0xfffe 8009; GFX12-NEXT: v_mov_b32_e32 v2, s12 8010; GFX12-NEXT: s_lshr_b32 s11, s10, 16 8011; GFX12-NEXT: s_and_b32 s10, s10, 0xffff 8012; GFX12-NEXT: global_store_b128 v1, v[0:3], s[16:17] offset:176 8013; GFX12-NEXT: s_wait_alu 0xfffe 8014; GFX12-NEXT: v_mov_b32_e32 v0, s10 8015; GFX12-NEXT: v_mov_b32_e32 v2, s11 8016; GFX12-NEXT: s_lshr_b32 s10, s9, 16 8017; GFX12-NEXT: s_and_b32 s9, s9, 0xffff 8018; GFX12-NEXT: global_store_b128 v1, v[0:3], s[16:17] offset:160 8019; GFX12-NEXT: v_mov_b32_e32 v0, s9 8020; GFX12-NEXT: s_wait_alu 0xfffe 8021; GFX12-NEXT: v_mov_b32_e32 v2, s10 8022; GFX12-NEXT: s_lshr_b32 s9, s8, 16 8023; GFX12-NEXT: s_and_b32 s8, s8, 0xffff 8024; GFX12-NEXT: global_store_b128 v1, v[0:3], s[16:17] offset:144 8025; GFX12-NEXT: s_wait_alu 0xfffe 8026; GFX12-NEXT: v_mov_b32_e32 v0, s8 8027; GFX12-NEXT: v_mov_b32_e32 v2, s9 8028; GFX12-NEXT: s_lshr_b32 s8, s7, 16 8029; GFX12-NEXT: s_and_b32 s7, s7, 0xffff 8030; GFX12-NEXT: global_store_b128 v1, v[0:3], s[16:17] offset:128 8031; GFX12-NEXT: v_mov_b32_e32 v0, s7 8032; GFX12-NEXT: s_wait_alu 0xfffe 8033; GFX12-NEXT: v_mov_b32_e32 v2, s8 8034; GFX12-NEXT: s_lshr_b32 s7, s6, 16 8035; GFX12-NEXT: s_and_b32 s6, s6, 0xffff 8036; GFX12-NEXT: global_store_b128 v1, v[0:3], s[16:17] offset:112 8037; GFX12-NEXT: s_wait_alu 0xfffe 8038; GFX12-NEXT: v_mov_b32_e32 v0, s6 8039; GFX12-NEXT: v_mov_b32_e32 v2, s7 8040; GFX12-NEXT: s_lshr_b32 s6, s5, 16 8041; GFX12-NEXT: s_and_b32 s5, s5, 0xffff 8042; GFX12-NEXT: global_store_b128 v1, v[0:3], s[16:17] offset:96 8043; GFX12-NEXT: v_mov_b32_e32 v0, s5 8044; GFX12-NEXT: s_wait_alu 0xfffe 8045; GFX12-NEXT: v_mov_b32_e32 v2, s6 8046; GFX12-NEXT: s_lshr_b32 s5, s4, 16 8047; GFX12-NEXT: s_and_b32 s4, s4, 0xffff 8048; GFX12-NEXT: global_store_b128 v1, v[0:3], s[16:17] offset:80 8049; GFX12-NEXT: s_wait_alu 0xfffe 8050; GFX12-NEXT: v_mov_b32_e32 v0, s4 8051; GFX12-NEXT: v_mov_b32_e32 v2, s5 8052; GFX12-NEXT: s_lshr_b32 s4, s3, 16 8053; GFX12-NEXT: s_and_b32 s3, s3, 0xffff 8054; GFX12-NEXT: global_store_b128 v1, v[0:3], s[16:17] offset:64 8055; GFX12-NEXT: v_mov_b32_e32 v0, s3 8056; GFX12-NEXT: s_wait_alu 0xfffe 8057; GFX12-NEXT: v_mov_b32_e32 v2, s4 8058; GFX12-NEXT: s_lshr_b32 s3, s2, 16 8059; GFX12-NEXT: s_and_b32 s2, s2, 0xffff 8060; GFX12-NEXT: global_store_b128 v1, v[0:3], s[16:17] offset:48 8061; GFX12-NEXT: s_wait_alu 0xfffe 8062; GFX12-NEXT: v_mov_b32_e32 v0, s2 8063; GFX12-NEXT: v_mov_b32_e32 v2, s3 8064; GFX12-NEXT: s_lshr_b32 s2, s1, 16 8065; GFX12-NEXT: s_and_b32 s1, s1, 0xffff 8066; GFX12-NEXT: global_store_b128 v1, v[0:3], s[16:17] offset:32 8067; GFX12-NEXT: v_mov_b32_e32 v0, s1 8068; GFX12-NEXT: s_wait_alu 0xfffe 8069; GFX12-NEXT: v_mov_b32_e32 v2, s2 8070; GFX12-NEXT: s_lshr_b32 s1, s0, 16 8071; GFX12-NEXT: s_and_b32 s0, s0, 0xffff 8072; GFX12-NEXT: global_store_b128 v1, v[0:3], s[16:17] offset:16 8073; GFX12-NEXT: s_wait_alu 0xfffe 8074; GFX12-NEXT: v_mov_b32_e32 v0, s0 8075; GFX12-NEXT: v_mov_b32_e32 v2, s1 8076; GFX12-NEXT: global_store_b128 v1, v[0:3], s[16:17] 8077; GFX12-NEXT: s_endpgm 8078 %load = load <32 x i16>, ptr addrspace(4) %in 8079 %ext = zext <32 x i16> %load to <32 x i64> 8080 store <32 x i64> %ext, ptr addrspace(1) %out 8081 ret void 8082} 8083 8084define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 8085; GCN-NOHSA-SI-LABEL: constant_sextload_v32i16_to_v32i64: 8086; GCN-NOHSA-SI: ; %bb.0: 8087; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x9 8088; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 8089; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 8090; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 8091; GCN-NOHSA-SI-NEXT: s_mov_b32 s18, s15 8092; GCN-NOHSA-SI-NEXT: s_mov_b32 s20, s13 8093; GCN-NOHSA-SI-NEXT: s_mov_b32 s50, s11 8094; GCN-NOHSA-SI-NEXT: s_mov_b32 s52, s9 8095; GCN-NOHSA-SI-NEXT: s_mov_b32 s56, s7 8096; GCN-NOHSA-SI-NEXT: s_mov_b32 s54, s5 8097; GCN-NOHSA-SI-NEXT: s_mov_b32 s42, s3 8098; GCN-NOHSA-SI-NEXT: s_mov_b32 s44, s1 8099; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s14, 16 8100; GCN-NOHSA-SI-NEXT: s_lshr_b32 s30, s12, 16 8101; GCN-NOHSA-SI-NEXT: s_lshr_b32 s34, s10, 16 8102; GCN-NOHSA-SI-NEXT: s_lshr_b32 s36, s8, 16 8103; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[68:69], s[20:21], 0x100000 8104; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[70:71], s[18:19], 0x100000 8105; GCN-NOHSA-SI-NEXT: s_lshr_b32 s60, s6, 16 8106; GCN-NOHSA-SI-NEXT: s_lshr_b32 s62, s4, 16 8107; GCN-NOHSA-SI-NEXT: s_lshr_b32 s64, s2, 16 8108; GCN-NOHSA-SI-NEXT: s_lshr_b32 s66, s0, 16 8109; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[0:1], 0x100000 8110; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[20:21], s[2:3], 0x100000 8111; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[22:23], s[4:5], 0x100000 8112; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[24:25], s[6:7], 0x100000 8113; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[28:29], s[8:9], 0x100000 8114; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[38:39], s[10:11], 0x100000 8115; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[40:41], s[12:13], 0x100000 8116; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[46:47], s[14:15], 0x100000 8117; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[48:49], s[0:1], 48 8118; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[58:59], s[2:3], 48 8119; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[6:7], s[6:7], 48 8120; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[8:9], s[8:9], 48 8121; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[10:11], s[10:11], 48 8122; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[2:3], s[12:13], 48 8123; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[12:13], s[14:15], 48 8124; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[4:5], s[4:5], 48 8125; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s16 8126; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s17 8127; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s70 8128; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s71 8129; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s12 8130; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s13 8131; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s68 8132; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s69 8133; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s2 8134; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s3 8135; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 8136; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 8137; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[56:57], 0x100000 8138; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[52:53], 0x100000 8139; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[50:51], 0x100000 8140; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[50:51], s[54:55], 0x100000 8141; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x100000 8142; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x100000 8143; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s16 8144; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s17 8145; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s10 8146; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s11 8147; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s14 8148; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s15 8149; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s8 8150; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s9 8151; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s12 8152; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s13 8153; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s6 8154; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s7 8155; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s50 8156; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 8157; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s51 8158; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s4 8159; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s5 8160; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:208 8161; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[4:5], s[66:67], 0x100000 8162; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[64:65], 0x100000 8163; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[8:9], s[62:63], 0x100000 8164; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[60:61], 0x100000 8165; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[36:37], 0x100000 8166; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[34:35], 0x100000 8167; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[30:31], 0x100000 8168; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x100000 8169; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:176 8170; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:144 8171; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:112 8172; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:80 8173; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(5) 8174; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s42 8175; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s43 8176; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s58 8177; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s59 8178; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 8179; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8180; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s44 8181; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s45 8182; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s48 8183; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s49 8184; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 8185; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8186; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s46 8187; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s47 8188; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s40 8189; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s41 8190; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s38 8191; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s39 8192; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s28 8193; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s29 8194; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s24 8195; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s25 8196; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s22 8197; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s23 8198; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v24, s20 8199; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v25, s21 8200; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s26 8201; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s27 8202; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 8203; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8204; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18 8205; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 8206; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s16 8207; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s17 8208; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192 8209; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s14 8210; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s15 8211; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:160 8212; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s12 8213; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s13 8214; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:128 8215; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s10 8216; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s11 8217; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:96 8218; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s8 8219; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s9 8220; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64 8221; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v26, s6 8222; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v27, s7 8223; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32 8224; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 8225; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5 8226; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 8227; GCN-NOHSA-SI-NEXT: s_endpgm 8228; 8229; GCN-HSA-LABEL: constant_sextload_v32i16_to_v32i64: 8230; GCN-HSA: ; %bb.0: 8231; GCN-HSA-NEXT: s_load_dwordx4 s[16:19], s[8:9], 0x0 8232; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 8233; GCN-HSA-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 8234; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 8235; GCN-HSA-NEXT: s_mov_b32 s40, s15 8236; GCN-HSA-NEXT: s_mov_b32 s48, s13 8237; GCN-HSA-NEXT: s_mov_b32 s50, s11 8238; GCN-HSA-NEXT: s_mov_b32 s52, s9 8239; GCN-HSA-NEXT: s_mov_b32 s54, s7 8240; GCN-HSA-NEXT: s_mov_b32 s56, s5 8241; GCN-HSA-NEXT: s_mov_b32 s44, s3 8242; GCN-HSA-NEXT: s_mov_b32 s58, s1 8243; GCN-HSA-NEXT: s_lshr_b32 s60, s14, 16 8244; GCN-HSA-NEXT: s_lshr_b32 s62, s12, 16 8245; GCN-HSA-NEXT: s_lshr_b32 s64, s10, 16 8246; GCN-HSA-NEXT: s_lshr_b32 s66, s8, 16 8247; GCN-HSA-NEXT: s_lshr_b32 s68, s6, 16 8248; GCN-HSA-NEXT: s_lshr_b32 s70, s4, 16 8249; GCN-HSA-NEXT: s_lshr_b32 s72, s2, 16 8250; GCN-HSA-NEXT: s_lshr_b32 s74, s0, 16 8251; GCN-HSA-NEXT: s_bfe_i64 s[18:19], s[0:1], 0x100000 8252; GCN-HSA-NEXT: s_bfe_i64 s[20:21], s[2:3], 0x100000 8253; GCN-HSA-NEXT: s_ashr_i64 s[36:37], s[0:1], 48 8254; GCN-HSA-NEXT: s_ashr_i64 s[38:39], s[2:3], 48 8255; GCN-HSA-NEXT: s_ashr_i64 s[0:1], s[14:15], 48 8256; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[40:41], 0x100000 8257; GCN-HSA-NEXT: s_bfe_i64 s[22:23], s[4:5], 0x100000 8258; GCN-HSA-NEXT: s_bfe_i64 s[24:25], s[6:7], 0x100000 8259; GCN-HSA-NEXT: s_bfe_i64 s[26:27], s[8:9], 0x100000 8260; GCN-HSA-NEXT: s_bfe_i64 s[28:29], s[10:11], 0x100000 8261; GCN-HSA-NEXT: s_bfe_i64 s[30:31], s[12:13], 0x100000 8262; GCN-HSA-NEXT: s_bfe_i64 s[34:35], s[14:15], 0x100000 8263; GCN-HSA-NEXT: s_ashr_i64 s[42:43], s[4:5], 48 8264; GCN-HSA-NEXT: s_ashr_i64 s[46:47], s[6:7], 48 8265; GCN-HSA-NEXT: s_ashr_i64 s[76:77], s[8:9], 48 8266; GCN-HSA-NEXT: s_ashr_i64 s[78:79], s[10:11], 48 8267; GCN-HSA-NEXT: s_ashr_i64 s[80:81], s[12:13], 48 8268; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 8269; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 8270; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 8271; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 8272; GCN-HSA-NEXT: s_bfe_i64 s[0:1], s[74:75], 0x100000 8273; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[72:73], 0x100000 8274; GCN-HSA-NEXT: s_bfe_i64 s[4:5], s[70:71], 0x100000 8275; GCN-HSA-NEXT: s_bfe_i64 s[6:7], s[68:69], 0x100000 8276; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[66:67], 0x100000 8277; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[64:65], 0x100000 8278; GCN-HSA-NEXT: s_bfe_i64 s[12:13], s[62:63], 0x100000 8279; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[60:61], 0x100000 8280; GCN-HSA-NEXT: s_bfe_i64 s[40:41], s[58:59], 0x100000 8281; GCN-HSA-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x100000 8282; GCN-HSA-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x100000 8283; GCN-HSA-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x100000 8284; GCN-HSA-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x100000 8285; GCN-HSA-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x100000 8286; GCN-HSA-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x100000 8287; GCN-HSA-NEXT: s_add_u32 s58, s16, 0xf0 8288; GCN-HSA-NEXT: s_addc_u32 s59, s17, 0 8289; GCN-HSA-NEXT: v_mov_b32_e32 v4, s48 8290; GCN-HSA-NEXT: s_add_u32 s48, s16, 0xd0 8291; GCN-HSA-NEXT: v_mov_b32_e32 v5, s49 8292; GCN-HSA-NEXT: s_addc_u32 s49, s17, 0 8293; GCN-HSA-NEXT: v_mov_b32_e32 v24, s48 8294; GCN-HSA-NEXT: v_mov_b32_e32 v25, s49 8295; GCN-HSA-NEXT: s_add_u32 s48, s16, 0xb0 8296; GCN-HSA-NEXT: s_addc_u32 s49, s17, 0 8297; GCN-HSA-NEXT: v_mov_b32_e32 v22, s58 8298; GCN-HSA-NEXT: v_mov_b32_e32 v18, s46 8299; GCN-HSA-NEXT: s_add_u32 s46, s16, 0x90 8300; GCN-HSA-NEXT: v_mov_b32_e32 v23, s59 8301; GCN-HSA-NEXT: v_mov_b32_e32 v19, s47 8302; GCN-HSA-NEXT: s_addc_u32 s47, s17, 0 8303; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[0:3] 8304; GCN-HSA-NEXT: v_mov_b32_e32 v22, s42 8305; GCN-HSA-NEXT: s_add_u32 s42, s16, 0x70 8306; GCN-HSA-NEXT: v_mov_b32_e32 v23, s43 8307; GCN-HSA-NEXT: s_addc_u32 s43, s17, 0 8308; GCN-HSA-NEXT: v_mov_b32_e32 v30, s42 8309; GCN-HSA-NEXT: v_mov_b32_e32 v31, s43 8310; GCN-HSA-NEXT: s_add_u32 s42, s16, 0x50 8311; GCN-HSA-NEXT: s_addc_u32 s43, s17, 0 8312; GCN-HSA-NEXT: v_mov_b32_e32 v6, s80 8313; GCN-HSA-NEXT: v_mov_b32_e32 v7, s81 8314; GCN-HSA-NEXT: v_mov_b32_e32 v26, s48 8315; GCN-HSA-NEXT: v_mov_b32_e32 v2, s38 8316; GCN-HSA-NEXT: s_add_u32 s38, s16, 48 8317; GCN-HSA-NEXT: v_mov_b32_e32 v8, s50 8318; GCN-HSA-NEXT: v_mov_b32_e32 v9, s51 8319; GCN-HSA-NEXT: v_mov_b32_e32 v10, s78 8320; GCN-HSA-NEXT: v_mov_b32_e32 v11, s79 8321; GCN-HSA-NEXT: v_mov_b32_e32 v27, s49 8322; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[4:7] 8323; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11] 8324; GCN-HSA-NEXT: v_mov_b32_e32 v3, s39 8325; GCN-HSA-NEXT: s_addc_u32 s39, s17, 0 8326; GCN-HSA-NEXT: v_mov_b32_e32 v24, s38 8327; GCN-HSA-NEXT: v_mov_b32_e32 v10, s42 8328; GCN-HSA-NEXT: v_mov_b32_e32 v25, s39 8329; GCN-HSA-NEXT: s_add_u32 s38, s16, 16 8330; GCN-HSA-NEXT: v_mov_b32_e32 v20, s56 8331; GCN-HSA-NEXT: v_mov_b32_e32 v21, s57 8332; GCN-HSA-NEXT: v_mov_b32_e32 v11, s43 8333; GCN-HSA-NEXT: s_addc_u32 s39, s17, 0 8334; GCN-HSA-NEXT: v_mov_b32_e32 v28, s46 8335; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[20:23] 8336; GCN-HSA-NEXT: v_mov_b32_e32 v10, s14 8337; GCN-HSA-NEXT: s_add_u32 s14, s16, 0xe0 8338; GCN-HSA-NEXT: v_mov_b32_e32 v12, s52 8339; GCN-HSA-NEXT: v_mov_b32_e32 v13, s53 8340; GCN-HSA-NEXT: v_mov_b32_e32 v14, s76 8341; GCN-HSA-NEXT: v_mov_b32_e32 v15, s77 8342; GCN-HSA-NEXT: v_mov_b32_e32 v29, s47 8343; GCN-HSA-NEXT: v_mov_b32_e32 v11, s15 8344; GCN-HSA-NEXT: s_addc_u32 s15, s17, 0 8345; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[12:15] 8346; GCN-HSA-NEXT: v_mov_b32_e32 v16, s54 8347; GCN-HSA-NEXT: v_mov_b32_e32 v14, s12 8348; GCN-HSA-NEXT: s_add_u32 s12, s16, 0xc0 8349; GCN-HSA-NEXT: v_mov_b32_e32 v17, s55 8350; GCN-HSA-NEXT: v_mov_b32_e32 v15, s13 8351; GCN-HSA-NEXT: s_addc_u32 s13, s17, 0 8352; GCN-HSA-NEXT: v_mov_b32_e32 v0, s44 8353; GCN-HSA-NEXT: v_mov_b32_e32 v1, s45 8354; GCN-HSA-NEXT: v_mov_b32_e32 v26, s38 8355; GCN-HSA-NEXT: flat_store_dwordx4 v[30:31], v[16:19] 8356; GCN-HSA-NEXT: v_mov_b32_e32 v4, s40 8357; GCN-HSA-NEXT: v_mov_b32_e32 v17, s15 8358; GCN-HSA-NEXT: v_mov_b32_e32 v19, s13 8359; GCN-HSA-NEXT: v_mov_b32_e32 v5, s41 8360; GCN-HSA-NEXT: v_mov_b32_e32 v27, s39 8361; GCN-HSA-NEXT: v_mov_b32_e32 v6, s36 8362; GCN-HSA-NEXT: v_mov_b32_e32 v7, s37 8363; GCN-HSA-NEXT: v_mov_b32_e32 v8, s34 8364; GCN-HSA-NEXT: v_mov_b32_e32 v9, s35 8365; GCN-HSA-NEXT: v_mov_b32_e32 v12, s30 8366; GCN-HSA-NEXT: v_mov_b32_e32 v13, s31 8367; GCN-HSA-NEXT: v_mov_b32_e32 v16, s14 8368; GCN-HSA-NEXT: v_mov_b32_e32 v18, s12 8369; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[0:3] 8370; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[4:7] 8371; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 8372; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15] 8373; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10 8374; GCN-HSA-NEXT: s_add_u32 s10, s16, 0xa0 8375; GCN-HSA-NEXT: v_mov_b32_e32 v3, s11 8376; GCN-HSA-NEXT: s_addc_u32 s11, s17, 0 8377; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10 8378; GCN-HSA-NEXT: v_mov_b32_e32 v0, s28 8379; GCN-HSA-NEXT: v_mov_b32_e32 v1, s29 8380; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11 8381; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8382; GCN-HSA-NEXT: s_nop 0 8383; GCN-HSA-NEXT: v_mov_b32_e32 v2, s8 8384; GCN-HSA-NEXT: s_add_u32 s8, s16, 0x80 8385; GCN-HSA-NEXT: v_mov_b32_e32 v3, s9 8386; GCN-HSA-NEXT: s_addc_u32 s9, s17, 0 8387; GCN-HSA-NEXT: v_mov_b32_e32 v4, s8 8388; GCN-HSA-NEXT: v_mov_b32_e32 v0, s26 8389; GCN-HSA-NEXT: v_mov_b32_e32 v1, s27 8390; GCN-HSA-NEXT: v_mov_b32_e32 v5, s9 8391; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8392; GCN-HSA-NEXT: s_nop 0 8393; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 8394; GCN-HSA-NEXT: s_add_u32 s6, s16, 0x60 8395; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 8396; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 8397; GCN-HSA-NEXT: v_mov_b32_e32 v4, s6 8398; GCN-HSA-NEXT: v_mov_b32_e32 v0, s24 8399; GCN-HSA-NEXT: v_mov_b32_e32 v1, s25 8400; GCN-HSA-NEXT: v_mov_b32_e32 v5, s7 8401; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8402; GCN-HSA-NEXT: s_nop 0 8403; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4 8404; GCN-HSA-NEXT: s_add_u32 s4, s16, 64 8405; GCN-HSA-NEXT: v_mov_b32_e32 v3, s5 8406; GCN-HSA-NEXT: s_addc_u32 s5, s17, 0 8407; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 8408; GCN-HSA-NEXT: v_mov_b32_e32 v0, s22 8409; GCN-HSA-NEXT: v_mov_b32_e32 v1, s23 8410; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 8411; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8412; GCN-HSA-NEXT: s_nop 0 8413; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 8414; GCN-HSA-NEXT: s_add_u32 s2, s16, 32 8415; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 8416; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 8417; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 8418; GCN-HSA-NEXT: v_mov_b32_e32 v0, s20 8419; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21 8420; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 8421; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8422; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16 8423; GCN-HSA-NEXT: v_mov_b32_e32 v0, s18 8424; GCN-HSA-NEXT: v_mov_b32_e32 v1, s19 8425; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 8426; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 8427; GCN-HSA-NEXT: v_mov_b32_e32 v5, s17 8428; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8429; GCN-HSA-NEXT: s_endpgm 8430; 8431; GCN-NOHSA-VI-LABEL: constant_sextload_v32i16_to_v32i64: 8432; GCN-NOHSA-VI: ; %bb.0: 8433; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x24 8434; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 8435; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 8436; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 8437; GCN-NOHSA-VI-NEXT: s_lshr_b32 s22, s0, 16 8438; GCN-NOHSA-VI-NEXT: s_mov_b32 s26, s1 8439; GCN-NOHSA-VI-NEXT: s_lshr_b32 s28, s1, 16 8440; GCN-NOHSA-VI-NEXT: s_lshr_b32 s30, s2, 16 8441; GCN-NOHSA-VI-NEXT: s_mov_b32 s34, s3 8442; GCN-NOHSA-VI-NEXT: s_lshr_b32 s36, s3, 16 8443; GCN-NOHSA-VI-NEXT: s_lshr_b32 s40, s4, 16 8444; GCN-NOHSA-VI-NEXT: s_mov_b32 s42, s5 8445; GCN-NOHSA-VI-NEXT: s_lshr_b32 s44, s5, 16 8446; GCN-NOHSA-VI-NEXT: s_lshr_b32 s46, s6, 16 8447; GCN-NOHSA-VI-NEXT: s_mov_b32 s48, s7 8448; GCN-NOHSA-VI-NEXT: s_lshr_b32 s50, s7, 16 8449; GCN-NOHSA-VI-NEXT: s_lshr_b32 s52, s8, 16 8450; GCN-NOHSA-VI-NEXT: s_mov_b32 s54, s9 8451; GCN-NOHSA-VI-NEXT: s_lshr_b32 s56, s9, 16 8452; GCN-NOHSA-VI-NEXT: s_lshr_b32 s58, s10, 16 8453; GCN-NOHSA-VI-NEXT: s_mov_b32 s60, s11 8454; GCN-NOHSA-VI-NEXT: s_lshr_b32 s62, s11, 16 8455; GCN-NOHSA-VI-NEXT: s_lshr_b32 s66, s12, 16 8456; GCN-NOHSA-VI-NEXT: s_mov_b32 s68, s13 8457; GCN-NOHSA-VI-NEXT: s_lshr_b32 s70, s13, 16 8458; GCN-NOHSA-VI-NEXT: s_lshr_b32 s74, s14, 16 8459; GCN-NOHSA-VI-NEXT: s_mov_b32 s76, s15 8460; GCN-NOHSA-VI-NEXT: s_lshr_b32 s78, s15, 16 8461; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[18:19], s[0:1], 0x100000 8462; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x100000 8463; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x100000 8464; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[20:21], s[6:7], 0x100000 8465; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[24:25], s[8:9], 0x100000 8466; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[38:39], s[10:11], 0x100000 8467; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[64:65], s[12:13], 0x100000 8468; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[72:73], s[14:15], 0x100000 8469; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[22:23], 0x100000 8470; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[26:27], 0x100000 8471; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[28:29], 0x100000 8472; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[30:31], 0x100000 8473; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[34:35], 0x100000 8474; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[36:37], 0x100000 8475; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[22:23], s[40:41], 0x100000 8476; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[28:29], s[42:43], 0x100000 8477; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[26:27], s[44:45], 0x100000 8478; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[30:31], s[46:47], 0x100000 8479; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[36:37], s[48:49], 0x100000 8480; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[34:35], s[50:51], 0x100000 8481; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[40:41], s[52:53], 0x100000 8482; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[44:45], s[54:55], 0x100000 8483; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[42:43], s[56:57], 0x100000 8484; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[46:47], s[58:59], 0x100000 8485; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[48:49], s[60:61], 0x100000 8486; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[50:51], s[62:63], 0x100000 8487; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[52:53], s[66:67], 0x100000 8488; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[54:55], s[68:69], 0x100000 8489; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[56:57], s[70:71], 0x100000 8490; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[58:59], s[74:75], 0x100000 8491; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[60:61], s[76:77], 0x100000 8492; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[62:63], s[78:79], 0x100000 8493; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s60 8494; GCN-NOHSA-VI-NEXT: s_add_u32 s60, s16, 0xf0 8495; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s61 8496; GCN-NOHSA-VI-NEXT: s_addc_u32 s61, s17, 0 8497; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s60 8498; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s62 8499; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s63 8500; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s61 8501; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8502; GCN-NOHSA-VI-NEXT: s_nop 0 8503; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s58 8504; GCN-NOHSA-VI-NEXT: s_add_u32 s58, s16, 0xe0 8505; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s59 8506; GCN-NOHSA-VI-NEXT: s_addc_u32 s59, s17, 0 8507; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s58 8508; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s72 8509; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s73 8510; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s59 8511; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8512; GCN-NOHSA-VI-NEXT: s_nop 0 8513; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s54 8514; GCN-NOHSA-VI-NEXT: s_add_u32 s54, s16, 0xd0 8515; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s55 8516; GCN-NOHSA-VI-NEXT: s_addc_u32 s55, s17, 0 8517; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s54 8518; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s56 8519; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s57 8520; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s55 8521; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8522; GCN-NOHSA-VI-NEXT: s_nop 0 8523; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s52 8524; GCN-NOHSA-VI-NEXT: s_add_u32 s52, s16, 0xc0 8525; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s53 8526; GCN-NOHSA-VI-NEXT: s_addc_u32 s53, s17, 0 8527; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s52 8528; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s64 8529; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s65 8530; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s53 8531; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8532; GCN-NOHSA-VI-NEXT: s_nop 0 8533; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s48 8534; GCN-NOHSA-VI-NEXT: s_add_u32 s48, s16, 0xb0 8535; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s49 8536; GCN-NOHSA-VI-NEXT: s_addc_u32 s49, s17, 0 8537; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s48 8538; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s50 8539; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s51 8540; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s49 8541; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8542; GCN-NOHSA-VI-NEXT: s_nop 0 8543; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s38 8544; GCN-NOHSA-VI-NEXT: s_add_u32 s38, s16, 0xa0 8545; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s39 8546; GCN-NOHSA-VI-NEXT: s_addc_u32 s39, s17, 0 8547; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s38 8548; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s46 8549; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s47 8550; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s39 8551; GCN-NOHSA-VI-NEXT: s_add_u32 s38, s16, 0x90 8552; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8553; GCN-NOHSA-VI-NEXT: s_addc_u32 s39, s17, 0 8554; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s38 8555; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s44 8556; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s45 8557; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s42 8558; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s43 8559; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s39 8560; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8561; GCN-NOHSA-VI-NEXT: s_nop 0 8562; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s24 8563; GCN-NOHSA-VI-NEXT: s_add_u32 s24, s16, 0x80 8564; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s25 8565; GCN-NOHSA-VI-NEXT: s_addc_u32 s25, s17, 0 8566; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s24 8567; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s40 8568; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s41 8569; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s25 8570; GCN-NOHSA-VI-NEXT: s_add_u32 s24, s16, 0x70 8571; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8572; GCN-NOHSA-VI-NEXT: s_addc_u32 s25, s17, 0 8573; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s24 8574; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s36 8575; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s37 8576; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s34 8577; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s35 8578; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s25 8579; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8580; GCN-NOHSA-VI-NEXT: s_nop 0 8581; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s20 8582; GCN-NOHSA-VI-NEXT: s_add_u32 s20, s16, 0x60 8583; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s21 8584; GCN-NOHSA-VI-NEXT: s_addc_u32 s21, s17, 0 8585; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s20 8586; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s30 8587; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s31 8588; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s21 8589; GCN-NOHSA-VI-NEXT: s_add_u32 s20, s16, 0x50 8590; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8591; GCN-NOHSA-VI-NEXT: s_addc_u32 s21, s17, 0 8592; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s20 8593; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28 8594; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s29 8595; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s26 8596; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s27 8597; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s21 8598; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8599; GCN-NOHSA-VI-NEXT: s_nop 0 8600; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 8601; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s16, 64 8602; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3 8603; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s17, 0 8604; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 8605; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 8606; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s16, 48 8607; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s22 8608; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s23 8609; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s17, 0 8610; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8611; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3 8612; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 8613; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s15 8614; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s12 8615; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s13 8616; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2 8617; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8618; GCN-NOHSA-VI-NEXT: s_nop 0 8619; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 8620; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s16, 32 8621; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 8622; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s17, 0 8623; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 8624; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 8625; GCN-NOHSA-VI-NEXT: s_add_u32 s0, s16, 16 8626; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10 8627; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s11 8628; GCN-NOHSA-VI-NEXT: s_addc_u32 s1, s17, 0 8629; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8630; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 8631; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 8632; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9 8633; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 8634; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7 8635; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 8636; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8637; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s16 8638; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18 8639; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s19 8640; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 8641; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5 8642; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s17 8643; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8644; GCN-NOHSA-VI-NEXT: s_endpgm 8645; 8646; EG-LABEL: constant_sextload_v32i16_to_v32i64: 8647; EG: ; %bb.0: 8648; EG-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[] 8649; EG-NEXT: TEX 0 @22 8650; EG-NEXT: ALU 55, @31, KC0[CB0:0-32], KC1[] 8651; EG-NEXT: TEX 2 @24 8652; EG-NEXT: ALU 74, @87, KC0[CB0:0-32], KC1[] 8653; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T38.X, 0 8654; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T36.X, 0 8655; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T34.X, 0 8656; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T33.X, 0 8657; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T32.X, 0 8658; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T31.X, 0 8659; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T30.X, 0 8660; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T29.X, 0 8661; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T28.X, 0 8662; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T27.X, 0 8663; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T26.X, 0 8664; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T25.X, 0 8665; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T24.X, 0 8666; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T23.X, 0 8667; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T22.X, 0 8668; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T21.X, 1 8669; EG-NEXT: CF_END 8670; EG-NEXT: Fetch clause starting at 22: 8671; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1 8672; EG-NEXT: Fetch clause starting at 24: 8673; EG-NEXT: VTX_READ_128 T38.XYZW, T19.X, 48, #1 8674; EG-NEXT: VTX_READ_128 T39.XYZW, T19.X, 32, #1 8675; EG-NEXT: VTX_READ_128 T40.XYZW, T19.X, 16, #1 8676; EG-NEXT: ALU clause starting at 30: 8677; EG-NEXT: MOV * T19.X, KC0[2].Z, 8678; EG-NEXT: ALU clause starting at 31: 8679; EG-NEXT: LSHR T21.X, KC0[2].Y, literal.x, 8680; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8681; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 8682; EG-NEXT: LSHR T22.X, PV.W, literal.x, 8683; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8684; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 8685; EG-NEXT: LSHR T23.X, PV.W, literal.x, 8686; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8687; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 8688; EG-NEXT: LSHR T24.X, PV.W, literal.x, 8689; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8690; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 8691; EG-NEXT: LSHR T25.X, PV.W, literal.x, 8692; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8693; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 8694; EG-NEXT: LSHR T26.X, PV.W, literal.x, 8695; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8696; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 8697; EG-NEXT: LSHR T27.X, PV.W, literal.x, 8698; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8699; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 8700; EG-NEXT: LSHR T28.X, PV.W, literal.x, 8701; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8702; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 8703; EG-NEXT: LSHR T29.X, PV.W, literal.x, 8704; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8705; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 8706; EG-NEXT: LSHR T30.X, PV.W, literal.x, 8707; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8708; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 8709; EG-NEXT: LSHR T31.X, PV.W, literal.x, 8710; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8711; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 8712; EG-NEXT: LSHR T32.X, PV.W, literal.x, 8713; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8714; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43) 8715; EG-NEXT: LSHR T33.X, PV.W, literal.x, 8716; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8717; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43) 8718; EG-NEXT: LSHR T34.X, PV.W, literal.x, 8719; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, 8720; EG-NEXT: ASHR * T35.W, T20.X, literal.z, 8721; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43) 8722; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 8723; EG-NEXT: LSHR T36.X, PV.W, literal.x, 8724; EG-NEXT: ASHR T35.Z, T20.X, literal.y, 8725; EG-NEXT: ASHR * T37.W, T20.Y, literal.z, 8726; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 8727; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 8728; EG-NEXT: BFE_INT T35.X, T20.X, 0.0, literal.x, 8729; EG-NEXT: ASHR * T37.Z, T20.Y, literal.x, 8730; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 8731; EG-NEXT: BFE_INT T37.X, T20.Y, 0.0, literal.x, 8732; EG-NEXT: ASHR T35.Y, PV.X, literal.y, 8733; EG-NEXT: ASHR * T19.W, T20.Z, literal.y, 8734; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8735; EG-NEXT: ALU clause starting at 87: 8736; EG-NEXT: ASHR T19.Z, T20.Z, literal.x, 8737; EG-NEXT: ASHR * T41.W, T20.W, literal.y, 8738; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8739; EG-NEXT: BFE_INT T19.X, T20.Z, 0.0, literal.x, 8740; EG-NEXT: ASHR T37.Y, T37.X, literal.y, 8741; EG-NEXT: ASHR T41.Z, T20.W, literal.x, 8742; EG-NEXT: ASHR * T42.W, T40.X, literal.y, BS:VEC_120/SCL_212 8743; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8744; EG-NEXT: BFE_INT T41.X, T20.W, 0.0, literal.x, 8745; EG-NEXT: ASHR T19.Y, PV.X, literal.y, 8746; EG-NEXT: ASHR T42.Z, T40.X, literal.x, 8747; EG-NEXT: ASHR * T20.W, T40.Y, literal.y, 8748; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8749; EG-NEXT: BFE_INT T42.X, T40.X, 0.0, literal.x, 8750; EG-NEXT: ASHR T41.Y, PV.X, literal.y, 8751; EG-NEXT: ASHR T20.Z, T40.Y, literal.x, 8752; EG-NEXT: ASHR * T43.W, T40.Z, literal.y, 8753; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8754; EG-NEXT: BFE_INT T20.X, T40.Y, 0.0, literal.x, 8755; EG-NEXT: ASHR T42.Y, PV.X, literal.y, 8756; EG-NEXT: ASHR T43.Z, T40.Z, literal.x, 8757; EG-NEXT: ASHR * T44.W, T40.W, literal.y, 8758; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8759; EG-NEXT: BFE_INT T43.X, T40.Z, 0.0, literal.x, 8760; EG-NEXT: ASHR T20.Y, PV.X, literal.y, 8761; EG-NEXT: ASHR T44.Z, T40.W, literal.x, 8762; EG-NEXT: ASHR * T45.W, T39.X, literal.y, 8763; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8764; EG-NEXT: BFE_INT T44.X, T40.W, 0.0, literal.x, 8765; EG-NEXT: ASHR T43.Y, PV.X, literal.y, 8766; EG-NEXT: ASHR T45.Z, T39.X, literal.x, 8767; EG-NEXT: ASHR * T40.W, T39.Y, literal.y, 8768; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8769; EG-NEXT: BFE_INT T45.X, T39.X, 0.0, literal.x, 8770; EG-NEXT: ASHR T44.Y, PV.X, literal.y, 8771; EG-NEXT: ASHR T40.Z, T39.Y, literal.x, 8772; EG-NEXT: ASHR * T46.W, T39.Z, literal.y, 8773; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8774; EG-NEXT: BFE_INT T40.X, T39.Y, 0.0, literal.x, 8775; EG-NEXT: ASHR T45.Y, PV.X, literal.y, 8776; EG-NEXT: ASHR T46.Z, T39.Z, literal.x, 8777; EG-NEXT: ASHR * T47.W, T39.W, literal.y, 8778; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8779; EG-NEXT: BFE_INT T46.X, T39.Z, 0.0, literal.x, 8780; EG-NEXT: ASHR T40.Y, PV.X, literal.y, 8781; EG-NEXT: ASHR T47.Z, T39.W, literal.x, 8782; EG-NEXT: ASHR * T48.W, T38.X, literal.y, 8783; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8784; EG-NEXT: BFE_INT T47.X, T39.W, 0.0, literal.x, 8785; EG-NEXT: ASHR T46.Y, PV.X, literal.y, 8786; EG-NEXT: ASHR T48.Z, T38.X, literal.x, 8787; EG-NEXT: ASHR * T39.W, T38.Y, literal.y, 8788; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8789; EG-NEXT: BFE_INT T48.X, T38.X, 0.0, literal.x, 8790; EG-NEXT: ASHR T47.Y, PV.X, literal.y, 8791; EG-NEXT: ASHR T39.Z, T38.Y, literal.x, 8792; EG-NEXT: ASHR * T49.W, T38.Z, literal.y, 8793; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8794; EG-NEXT: BFE_INT T39.X, T38.Y, 0.0, literal.x, 8795; EG-NEXT: ASHR T48.Y, PV.X, literal.y, 8796; EG-NEXT: ASHR T49.Z, T38.Z, literal.x, 8797; EG-NEXT: ASHR * T50.W, T38.W, literal.y, 8798; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8799; EG-NEXT: BFE_INT T49.X, T38.Z, 0.0, literal.x, 8800; EG-NEXT: ASHR T39.Y, PV.X, literal.y, 8801; EG-NEXT: ASHR * T50.Z, T38.W, literal.x, 8802; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8803; EG-NEXT: BFE_INT T50.X, T38.W, 0.0, literal.x, 8804; EG-NEXT: ASHR T49.Y, PV.X, literal.y, 8805; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 8806; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8807; EG-NEXT: 240(3.363116e-43), 0(0.000000e+00) 8808; EG-NEXT: LSHR T38.X, PV.W, literal.x, 8809; EG-NEXT: ASHR * T50.Y, PV.X, literal.y, 8810; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 8811; 8812; GFX12-LABEL: constant_sextload_v32i16_to_v32i64: 8813; GFX12: ; %bb.0: 8814; GFX12-NEXT: s_load_b128 s[16:19], s[4:5], 0x24 8815; GFX12-NEXT: s_wait_kmcnt 0x0 8816; GFX12-NEXT: s_load_b512 s[0:15], s[18:19], 0x0 8817; GFX12-NEXT: s_wait_kmcnt 0x0 8818; GFX12-NEXT: s_lshr_b32 s28, s2, 16 8819; GFX12-NEXT: s_lshr_b32 s42, s5, 16 8820; GFX12-NEXT: s_lshr_b32 s52, s8, 16 8821; GFX12-NEXT: s_mov_b32 s60, s11 8822; GFX12-NEXT: s_lshr_b32 s22, s0, 16 8823; GFX12-NEXT: s_mov_b32 s24, s1 8824; GFX12-NEXT: s_lshr_b32 s26, s1, 16 8825; GFX12-NEXT: s_mov_b32 s30, s3 8826; GFX12-NEXT: s_lshr_b32 s36, s3, 16 8827; GFX12-NEXT: s_lshr_b32 s38, s4, 16 8828; GFX12-NEXT: s_mov_b32 s40, s5 8829; GFX12-NEXT: s_lshr_b32 s46, s6, 16 8830; GFX12-NEXT: s_mov_b32 s48, s7 8831; GFX12-NEXT: s_lshr_b32 s50, s7, 16 8832; GFX12-NEXT: s_mov_b32 s54, s9 8833; GFX12-NEXT: s_lshr_b32 s56, s9, 16 8834; GFX12-NEXT: s_bfe_i64 s[44:45], s[10:11], 0x100000 8835; GFX12-NEXT: s_lshr_b32 s58, s10, 16 8836; GFX12-NEXT: s_lshr_b32 s62, s11, 16 8837; GFX12-NEXT: s_bfe_i64 s[10:11], s[28:29], 0x100000 8838; GFX12-NEXT: s_bfe_i64 s[28:29], s[42:43], 0x100000 8839; GFX12-NEXT: s_bfe_i64 s[42:43], s[52:53], 0x100000 8840; GFX12-NEXT: s_bfe_i64 s[52:53], s[60:61], 0x100000 8841; GFX12-NEXT: s_lshr_b32 s60, s14, 16 8842; GFX12-NEXT: s_bfe_i64 s[64:65], s[14:15], 0x100000 8843; GFX12-NEXT: s_mov_b32 s14, s15 8844; GFX12-NEXT: s_lshr_b32 s66, s15, 16 8845; GFX12-NEXT: s_bfe_i64 s[18:19], s[0:1], 0x100000 8846; GFX12-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x100000 8847; GFX12-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x100000 8848; GFX12-NEXT: s_bfe_i64 s[20:21], s[6:7], 0x100000 8849; GFX12-NEXT: s_bfe_i64 s[34:35], s[8:9], 0x100000 8850; GFX12-NEXT: s_bfe_i64 s[4:5], s[22:23], 0x100000 8851; GFX12-NEXT: s_bfe_i64 s[8:9], s[24:25], 0x100000 8852; GFX12-NEXT: s_bfe_i64 s[6:7], s[26:27], 0x100000 8853; GFX12-NEXT: s_bfe_i64 s[24:25], s[30:31], 0x100000 8854; GFX12-NEXT: s_bfe_i64 s[22:23], s[36:37], 0x100000 8855; GFX12-NEXT: s_bfe_i64 s[26:27], s[38:39], 0x100000 8856; GFX12-NEXT: s_bfe_i64 s[30:31], s[40:41], 0x100000 8857; GFX12-NEXT: s_bfe_i64 s[36:37], s[46:47], 0x100000 8858; GFX12-NEXT: s_bfe_i64 s[40:41], s[48:49], 0x100000 8859; GFX12-NEXT: s_bfe_i64 s[38:39], s[50:51], 0x100000 8860; GFX12-NEXT: s_bfe_i64 s[46:47], s[54:55], 0x100000 8861; GFX12-NEXT: s_bfe_i64 s[48:49], s[56:57], 0x100000 8862; GFX12-NEXT: s_bfe_i64 s[50:51], s[58:59], 0x100000 8863; GFX12-NEXT: s_lshr_b32 s54, s12, 16 8864; GFX12-NEXT: s_bfe_i64 s[56:57], s[12:13], 0x100000 8865; GFX12-NEXT: s_mov_b32 s12, s13 8866; GFX12-NEXT: s_lshr_b32 s58, s13, 16 8867; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000 8868; GFX12-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x100000 8869; GFX12-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x100000 8870; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s15 8871; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 8872; GFX12-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x100000 8873; GFX12-NEXT: v_dual_mov_b32 v0, s14 :: v_dual_mov_b32 v3, s67 8874; GFX12-NEXT: v_dual_mov_b32 v2, s66 :: v_dual_mov_b32 v5, s65 8875; GFX12-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x100000 8876; GFX12-NEXT: v_dual_mov_b32 v4, s64 :: v_dual_mov_b32 v7, s61 8877; GFX12-NEXT: v_dual_mov_b32 v6, s60 :: v_dual_mov_b32 v9, s13 8878; GFX12-NEXT: s_wait_alu 0xfffe 8879; GFX12-NEXT: v_dual_mov_b32 v8, s12 :: v_dual_mov_b32 v11, s59 8880; GFX12-NEXT: v_dual_mov_b32 v10, s58 :: v_dual_mov_b32 v13, s57 8881; GFX12-NEXT: v_dual_mov_b32 v12, s56 :: v_dual_mov_b32 v15, s55 8882; GFX12-NEXT: v_mov_b32_e32 v14, s54 8883; GFX12-NEXT: s_bfe_i64 s[12:13], s[62:63], 0x100000 8884; GFX12-NEXT: s_clause 0x3 8885; GFX12-NEXT: global_store_b128 v24, v[0:3], s[16:17] offset:240 8886; GFX12-NEXT: global_store_b128 v24, v[4:7], s[16:17] offset:224 8887; GFX12-NEXT: global_store_b128 v24, v[8:11], s[16:17] offset:208 8888; GFX12-NEXT: global_store_b128 v24, v[12:15], s[16:17] offset:192 8889; GFX12-NEXT: v_dual_mov_b32 v1, s53 :: v_dual_mov_b32 v0, s52 8890; GFX12-NEXT: s_wait_alu 0xfffe 8891; GFX12-NEXT: v_dual_mov_b32 v3, s13 :: v_dual_mov_b32 v2, s12 8892; GFX12-NEXT: v_dual_mov_b32 v5, s45 :: v_dual_mov_b32 v4, s44 8893; GFX12-NEXT: v_dual_mov_b32 v7, s51 :: v_dual_mov_b32 v6, s50 8894; GFX12-NEXT: v_dual_mov_b32 v9, s47 :: v_dual_mov_b32 v8, s46 8895; GFX12-NEXT: v_dual_mov_b32 v11, s49 :: v_dual_mov_b32 v10, s48 8896; GFX12-NEXT: v_dual_mov_b32 v13, s35 :: v_dual_mov_b32 v12, s34 8897; GFX12-NEXT: v_dual_mov_b32 v15, s43 :: v_dual_mov_b32 v14, s42 8898; GFX12-NEXT: v_dual_mov_b32 v17, s41 :: v_dual_mov_b32 v16, s40 8899; GFX12-NEXT: v_dual_mov_b32 v19, s39 :: v_dual_mov_b32 v18, s38 8900; GFX12-NEXT: v_dual_mov_b32 v21, s21 :: v_dual_mov_b32 v20, s20 8901; GFX12-NEXT: v_dual_mov_b32 v23, s37 :: v_dual_mov_b32 v22, s36 8902; GFX12-NEXT: s_clause 0x5 8903; GFX12-NEXT: global_store_b128 v24, v[0:3], s[16:17] offset:176 8904; GFX12-NEXT: global_store_b128 v24, v[4:7], s[16:17] offset:160 8905; GFX12-NEXT: global_store_b128 v24, v[8:11], s[16:17] offset:144 8906; GFX12-NEXT: global_store_b128 v24, v[12:15], s[16:17] offset:128 8907; GFX12-NEXT: global_store_b128 v24, v[16:19], s[16:17] offset:112 8908; GFX12-NEXT: global_store_b128 v24, v[20:23], s[16:17] offset:96 8909; GFX12-NEXT: v_dual_mov_b32 v1, s31 :: v_dual_mov_b32 v0, s30 8910; GFX12-NEXT: v_dual_mov_b32 v3, s29 :: v_dual_mov_b32 v2, s28 8911; GFX12-NEXT: v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2 8912; GFX12-NEXT: v_dual_mov_b32 v7, s27 :: v_dual_mov_b32 v6, s26 8913; GFX12-NEXT: v_dual_mov_b32 v9, s25 :: v_dual_mov_b32 v8, s24 8914; GFX12-NEXT: v_dual_mov_b32 v11, s23 :: v_dual_mov_b32 v10, s22 8915; GFX12-NEXT: v_dual_mov_b32 v13, s1 :: v_dual_mov_b32 v12, s0 8916; GFX12-NEXT: v_dual_mov_b32 v15, s11 :: v_dual_mov_b32 v14, s10 8917; GFX12-NEXT: v_dual_mov_b32 v17, s9 :: v_dual_mov_b32 v16, s8 8918; GFX12-NEXT: v_dual_mov_b32 v19, s7 :: v_dual_mov_b32 v18, s6 8919; GFX12-NEXT: v_dual_mov_b32 v21, s19 :: v_dual_mov_b32 v20, s18 8920; GFX12-NEXT: v_dual_mov_b32 v23, s5 :: v_dual_mov_b32 v22, s4 8921; GFX12-NEXT: s_clause 0x5 8922; GFX12-NEXT: global_store_b128 v24, v[0:3], s[16:17] offset:80 8923; GFX12-NEXT: global_store_b128 v24, v[4:7], s[16:17] offset:64 8924; GFX12-NEXT: global_store_b128 v24, v[8:11], s[16:17] offset:48 8925; GFX12-NEXT: global_store_b128 v24, v[12:15], s[16:17] offset:32 8926; GFX12-NEXT: global_store_b128 v24, v[16:19], s[16:17] offset:16 8927; GFX12-NEXT: global_store_b128 v24, v[20:23], s[16:17] 8928; GFX12-NEXT: s_endpgm 8929 %load = load <32 x i16>, ptr addrspace(4) %in 8930 %ext = sext <32 x i16> %load to <32 x i64> 8931 store <32 x i64> %ext, ptr addrspace(1) %out 8932 ret void 8933} 8934 8935; These trigger undefined register machine verifier errors 8936 8937; define amdgpu_kernel void @constant_zextload_v64i16_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 8938; %load = load <64 x i16>, ptr addrspace(4) %in 8939; %ext = zext <64 x i16> %load to <64 x i64> 8940; store <64 x i64> %ext, ptr addrspace(1) %out 8941; ret void 8942; } 8943 8944; define amdgpu_kernel void @constant_sextload_v64i16_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 8945; %load = load <64 x i16>, ptr addrspace(4) %in 8946; %ext = sext <64 x i16> %load to <64 x i64> 8947; store <64 x i64> %ext, ptr addrspace(1) %out 8948; ret void 8949; } 8950 8951attributes #0 = { nounwind } 8952