1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=GCN-NOHSA-SI %s 3; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=GCN-HSA %s 4; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=GCN-NOHSA-VI %s 5; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=EG %s 6; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=cayman < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=CM %s 7 8; FIXME: r600 is broken because the bigger testcases spill and it's not implemented 9 10define amdgpu_kernel void @global_load_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) { 11; GCN-NOHSA-SI-LABEL: global_load_i16: 12; GCN-NOHSA-SI: ; %bb.0: ; %entry 13; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 14; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 15; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 16; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 17; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 18; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 19; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 20; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 21; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 22; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 23; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 24; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 25; GCN-NOHSA-SI-NEXT: buffer_store_short v0, off, s[4:7], 0 26; GCN-NOHSA-SI-NEXT: s_endpgm 27; 28; GCN-HSA-LABEL: global_load_i16: 29; GCN-HSA: ; %bb.0: ; %entry 30; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 31; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 32; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 33; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 34; GCN-HSA-NEXT: flat_load_ushort v2, v[0:1] 35; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 36; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 37; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 38; GCN-HSA-NEXT: flat_store_short v[0:1], v2 39; GCN-HSA-NEXT: s_endpgm 40; 41; GCN-NOHSA-VI-LABEL: global_load_i16: 42; GCN-NOHSA-VI: ; %bb.0: ; %entry 43; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 44; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 45; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 46; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 47; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 48; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 49; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 50; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 51; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 52; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 53; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 54; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 55; GCN-NOHSA-VI-NEXT: buffer_store_short v0, off, s[4:7], 0 56; GCN-NOHSA-VI-NEXT: s_endpgm 57; 58; EG-LABEL: global_load_i16: 59; EG: ; %bb.0: ; %entry 60; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 61; EG-NEXT: TEX 0 @6 62; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 63; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 64; EG-NEXT: CF_END 65; EG-NEXT: PAD 66; EG-NEXT: Fetch clause starting at 6: 67; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 68; EG-NEXT: ALU clause starting at 8: 69; EG-NEXT: MOV * T0.X, KC0[2].Z, 70; EG-NEXT: ALU clause starting at 9: 71; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 72; EG-NEXT: AND_INT * T1.W, T0.X, literal.y, 73; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41) 74; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 75; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 76; EG-NEXT: LSHL T0.X, T1.W, PV.W, 77; EG-NEXT: LSHL * T0.W, literal.x, PV.W, 78; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 79; EG-NEXT: MOV T0.Y, 0.0, 80; EG-NEXT: MOV * T0.Z, 0.0, 81; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 82; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 83; 84; CM-LABEL: global_load_i16: 85; CM: ; %bb.0: ; %entry 86; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 87; CM-NEXT: TEX 0 @6 88; CM-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 89; CM-NEXT: MEM_RAT MSKOR T0.XW, T1.X 90; CM-NEXT: CF_END 91; CM-NEXT: PAD 92; CM-NEXT: Fetch clause starting at 6: 93; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 94; CM-NEXT: ALU clause starting at 8: 95; CM-NEXT: MOV * T0.X, KC0[2].Z, 96; CM-NEXT: ALU clause starting at 9: 97; CM-NEXT: AND_INT * T0.W, KC0[2].Y, literal.x, 98; CM-NEXT: 3(4.203895e-45), 0(0.000000e+00) 99; CM-NEXT: AND_INT T0.Z, T0.X, literal.x, 100; CM-NEXT: LSHL * T0.W, PV.W, literal.y, 101; CM-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 102; CM-NEXT: LSHL T0.X, PV.Z, PV.W, 103; CM-NEXT: LSHL * T0.W, literal.x, PV.W, 104; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 105; CM-NEXT: MOV T0.Y, 0.0, 106; CM-NEXT: MOV * T0.Z, 0.0, 107; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 108; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 109entry: 110 %ld = load i16, ptr addrspace(1) %in 111 store i16 %ld, ptr addrspace(1) %out 112 ret void 113} 114 115define amdgpu_kernel void @global_load_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) { 116; GCN-NOHSA-SI-LABEL: global_load_v2i16: 117; GCN-NOHSA-SI: ; %bb.0: ; %entry 118; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 119; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 120; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 121; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 122; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 123; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 124; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 125; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 126; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 127; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 128; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 129; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 130; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 131; GCN-NOHSA-SI-NEXT: s_endpgm 132; 133; GCN-HSA-LABEL: global_load_v2i16: 134; GCN-HSA: ; %bb.0: ; %entry 135; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 136; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 137; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 138; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 139; GCN-HSA-NEXT: flat_load_dword v2, v[0:1] 140; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 141; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 142; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 143; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 144; GCN-HSA-NEXT: s_endpgm 145; 146; GCN-NOHSA-VI-LABEL: global_load_v2i16: 147; GCN-NOHSA-VI: ; %bb.0: ; %entry 148; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 149; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 150; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 151; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 152; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 153; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 154; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 155; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 156; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 157; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 158; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 159; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 160; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 161; GCN-NOHSA-VI-NEXT: s_endpgm 162; 163; EG-LABEL: global_load_v2i16: 164; EG: ; %bb.0: ; %entry 165; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 166; EG-NEXT: TEX 0 @6 167; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 168; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 169; EG-NEXT: CF_END 170; EG-NEXT: PAD 171; EG-NEXT: Fetch clause starting at 6: 172; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 173; EG-NEXT: ALU clause starting at 8: 174; EG-NEXT: MOV * T0.X, KC0[2].Z, 175; EG-NEXT: ALU clause starting at 9: 176; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 177; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 178; 179; CM-LABEL: global_load_v2i16: 180; CM: ; %bb.0: ; %entry 181; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 182; CM-NEXT: TEX 0 @6 183; CM-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 184; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 185; CM-NEXT: CF_END 186; CM-NEXT: PAD 187; CM-NEXT: Fetch clause starting at 6: 188; CM-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 189; CM-NEXT: ALU clause starting at 8: 190; CM-NEXT: MOV * T0.X, KC0[2].Z, 191; CM-NEXT: ALU clause starting at 9: 192; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 193; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 194entry: 195 %ld = load <2 x i16>, ptr addrspace(1) %in 196 store <2 x i16> %ld, ptr addrspace(1) %out 197 ret void 198} 199 200define amdgpu_kernel void @global_load_v3i16(ptr addrspace(1) %out, ptr addrspace(1) %in) { 201; GCN-NOHSA-SI-LABEL: global_load_v3i16: 202; GCN-NOHSA-SI: ; %bb.0: ; %entry 203; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 204; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 205; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 206; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 207; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 208; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 209; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 210; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 211; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 212; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 213; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 214; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 215; GCN-NOHSA-SI-NEXT: buffer_store_short v1, off, s[4:7], 0 offset:4 216; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 217; GCN-NOHSA-SI-NEXT: s_endpgm 218; 219; GCN-HSA-LABEL: global_load_v3i16: 220; GCN-HSA: ; %bb.0: ; %entry 221; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 222; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 223; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 224; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 225; GCN-HSA-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 226; GCN-HSA-NEXT: s_add_u32 s2, s0, 4 227; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 228; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 229; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 230; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 231; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 232; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 233; GCN-HSA-NEXT: flat_store_short v[4:5], v1 234; GCN-HSA-NEXT: flat_store_dword v[2:3], v0 235; GCN-HSA-NEXT: s_endpgm 236; 237; GCN-NOHSA-VI-LABEL: global_load_v3i16: 238; GCN-NOHSA-VI: ; %bb.0: ; %entry 239; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 240; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 241; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 242; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 243; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 244; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 245; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 246; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 247; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 248; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 249; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 250; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 251; GCN-NOHSA-VI-NEXT: buffer_store_short v1, off, s[4:7], 0 offset:4 252; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 253; GCN-NOHSA-VI-NEXT: s_endpgm 254; 255; EG-LABEL: global_load_v3i16: 256; EG: ; %bb.0: ; %entry 257; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 258; EG-NEXT: TEX 2 @6 259; EG-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[] 260; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0 261; EG-NEXT: MEM_RAT MSKOR T5.XW, T8.X 262; EG-NEXT: CF_END 263; EG-NEXT: Fetch clause starting at 6: 264; EG-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1 265; EG-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1 266; EG-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1 267; EG-NEXT: ALU clause starting at 12: 268; EG-NEXT: MOV * T5.X, KC0[2].Z, 269; EG-NEXT: ALU clause starting at 13: 270; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 271; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) 272; EG-NEXT: AND_INT T1.W, PV.W, literal.x, 273; EG-NEXT: AND_INT * T2.W, T5.X, literal.y, 274; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41) 275; EG-NEXT: LSHL * T1.W, PV.W, literal.x, 276; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 277; EG-NEXT: LSHL T5.X, T2.W, PV.W, 278; EG-NEXT: LSHL * T5.W, literal.x, PV.W, 279; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 280; EG-NEXT: MOV T5.Y, 0.0, 281; EG-NEXT: MOV * T5.Z, 0.0, 282; EG-NEXT: LSHR T8.X, T0.W, literal.x, 283; EG-NEXT: LSHL T0.W, T7.X, literal.y, 284; EG-NEXT: AND_INT * T1.W, T6.X, literal.z, 285; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 286; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 287; EG-NEXT: OR_INT T6.X, PV.W, PS, 288; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x, 289; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 290; 291; CM-LABEL: global_load_v3i16: 292; CM: ; %bb.0: ; %entry 293; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 294; CM-NEXT: TEX 2 @6 295; CM-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[] 296; CM-NEXT: MEM_RAT MSKOR T5.XW, T8.X 297; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6.X, T7.X 298; CM-NEXT: CF_END 299; CM-NEXT: Fetch clause starting at 6: 300; CM-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1 301; CM-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1 302; CM-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1 303; CM-NEXT: ALU clause starting at 12: 304; CM-NEXT: MOV * T5.X, KC0[2].Z, 305; CM-NEXT: ALU clause starting at 13: 306; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 307; CM-NEXT: 4(5.605194e-45), 0(0.000000e+00) 308; CM-NEXT: AND_INT * T1.W, PV.W, literal.x, 309; CM-NEXT: 3(4.203895e-45), 0(0.000000e+00) 310; CM-NEXT: AND_INT T0.Z, T5.X, literal.x, 311; CM-NEXT: LSHL * T1.W, PV.W, literal.y, 312; CM-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 313; CM-NEXT: LSHL T5.X, PV.Z, PV.W, 314; CM-NEXT: LSHL * T5.W, literal.x, PV.W, 315; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 316; CM-NEXT: MOV T5.Y, 0.0, 317; CM-NEXT: MOV * T5.Z, 0.0, 318; CM-NEXT: LSHL T0.Z, T7.X, literal.x, 319; CM-NEXT: AND_INT * T1.W, T6.X, literal.y, BS:VEC_120/SCL_212 320; CM-NEXT: 16(2.242078e-44), 65535(9.183409e-41) 321; CM-NEXT: OR_INT * T6.X, PV.Z, PV.W, 322; CM-NEXT: LSHR * T7.X, KC0[2].Y, literal.x, 323; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 324; CM-NEXT: LSHR * T8.X, T0.W, literal.x, 325; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 326entry: 327 %ld = load <3 x i16>, ptr addrspace(1) %in 328 store <3 x i16> %ld, ptr addrspace(1) %out 329 ret void 330} 331 332define amdgpu_kernel void @global_load_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) { 333; GCN-NOHSA-SI-LABEL: global_load_v4i16: 334; GCN-NOHSA-SI: ; %bb.0: ; %entry 335; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 336; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 337; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 338; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 339; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 340; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 341; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 342; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 343; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 344; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 345; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 346; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 347; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 348; GCN-NOHSA-SI-NEXT: s_endpgm 349; 350; GCN-HSA-LABEL: global_load_v4i16: 351; GCN-HSA: ; %bb.0: ; %entry 352; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 353; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 354; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 355; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 356; GCN-HSA-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 357; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 358; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 359; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 360; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 361; GCN-HSA-NEXT: s_endpgm 362; 363; GCN-NOHSA-VI-LABEL: global_load_v4i16: 364; GCN-NOHSA-VI: ; %bb.0: ; %entry 365; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 366; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 367; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 368; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 369; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 370; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 371; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 372; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 373; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 374; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 375; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 376; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 377; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 378; GCN-NOHSA-VI-NEXT: s_endpgm 379; 380; EG-LABEL: global_load_v4i16: 381; EG: ; %bb.0: ; %entry 382; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 383; EG-NEXT: TEX 0 @6 384; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 385; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 386; EG-NEXT: CF_END 387; EG-NEXT: PAD 388; EG-NEXT: Fetch clause starting at 6: 389; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 390; EG-NEXT: ALU clause starting at 8: 391; EG-NEXT: MOV * T0.X, KC0[2].Z, 392; EG-NEXT: ALU clause starting at 9: 393; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 394; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 395; 396; CM-LABEL: global_load_v4i16: 397; CM: ; %bb.0: ; %entry 398; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 399; CM-NEXT: TEX 0 @6 400; CM-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 401; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 402; CM-NEXT: CF_END 403; CM-NEXT: PAD 404; CM-NEXT: Fetch clause starting at 6: 405; CM-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 406; CM-NEXT: ALU clause starting at 8: 407; CM-NEXT: MOV * T0.X, KC0[2].Z, 408; CM-NEXT: ALU clause starting at 9: 409; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 410; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 411entry: 412 %ld = load <4 x i16>, ptr addrspace(1) %in 413 store <4 x i16> %ld, ptr addrspace(1) %out 414 ret void 415} 416 417define amdgpu_kernel void @global_load_v8i16(ptr addrspace(1) %out, ptr addrspace(1) %in) { 418; GCN-NOHSA-SI-LABEL: global_load_v8i16: 419; GCN-NOHSA-SI: ; %bb.0: ; %entry 420; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 421; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 422; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 423; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 424; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 425; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 426; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 427; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 428; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 429; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 430; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 431; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 432; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 433; GCN-NOHSA-SI-NEXT: s_endpgm 434; 435; GCN-HSA-LABEL: global_load_v8i16: 436; GCN-HSA: ; %bb.0: ; %entry 437; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 438; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 439; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 440; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 441; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 442; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 443; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 444; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 445; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 446; GCN-HSA-NEXT: s_endpgm 447; 448; GCN-NOHSA-VI-LABEL: global_load_v8i16: 449; GCN-NOHSA-VI: ; %bb.0: ; %entry 450; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 451; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 452; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 453; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 454; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 455; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 456; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 457; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 458; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 459; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 460; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 461; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 462; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 463; GCN-NOHSA-VI-NEXT: s_endpgm 464; 465; EG-LABEL: global_load_v8i16: 466; EG: ; %bb.0: ; %entry 467; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 468; EG-NEXT: TEX 0 @6 469; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 470; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 471; EG-NEXT: CF_END 472; EG-NEXT: PAD 473; EG-NEXT: Fetch clause starting at 6: 474; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 475; EG-NEXT: ALU clause starting at 8: 476; EG-NEXT: MOV * T0.X, KC0[2].Z, 477; EG-NEXT: ALU clause starting at 9: 478; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 479; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 480; 481; CM-LABEL: global_load_v8i16: 482; CM: ; %bb.0: ; %entry 483; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 484; CM-NEXT: TEX 0 @6 485; CM-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 486; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 487; CM-NEXT: CF_END 488; CM-NEXT: PAD 489; CM-NEXT: Fetch clause starting at 6: 490; CM-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 491; CM-NEXT: ALU clause starting at 8: 492; CM-NEXT: MOV * T0.X, KC0[2].Z, 493; CM-NEXT: ALU clause starting at 9: 494; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 495; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 496entry: 497 %ld = load <8 x i16>, ptr addrspace(1) %in 498 store <8 x i16> %ld, ptr addrspace(1) %out 499 ret void 500} 501 502define amdgpu_kernel void @global_load_v16i16(ptr addrspace(1) %out, ptr addrspace(1) %in) { 503; GCN-NOHSA-SI-LABEL: global_load_v16i16: 504; GCN-NOHSA-SI: ; %bb.0: ; %entry 505; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 506; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 507; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 508; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 509; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 510; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 511; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 512; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 513; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:16 514; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 515; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 516; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 517; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 518; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16 519; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 520; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 521; GCN-NOHSA-SI-NEXT: s_endpgm 522; 523; GCN-HSA-LABEL: global_load_v16i16: 524; GCN-HSA: ; %bb.0: ; %entry 525; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 526; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 527; GCN-HSA-NEXT: s_add_u32 s4, s0, 16 528; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 529; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 530; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 531; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 532; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 533; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 534; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 535; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 536; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 537; GCN-HSA-NEXT: v_mov_b32_e32 v11, s5 538; GCN-HSA-NEXT: v_mov_b32_e32 v9, s1 539; GCN-HSA-NEXT: v_mov_b32_e32 v10, s4 540; GCN-HSA-NEXT: v_mov_b32_e32 v8, s0 541; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 542; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[0:3] 543; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 544; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 545; GCN-HSA-NEXT: s_endpgm 546; 547; GCN-NOHSA-VI-LABEL: global_load_v16i16: 548; GCN-NOHSA-VI: ; %bb.0: ; %entry 549; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 550; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 551; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 552; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 553; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 554; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 555; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 556; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 557; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:16 558; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 559; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 560; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 561; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 562; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16 563; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 564; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 565; GCN-NOHSA-VI-NEXT: s_endpgm 566; 567; EG-LABEL: global_load_v16i16: 568; EG: ; %bb.0: ; %entry 569; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 570; EG-NEXT: TEX 0 @8 571; EG-NEXT: ALU 1, @13, KC0[CB0:0-32], KC1[] 572; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 573; EG-NEXT: TEX 0 @10 574; EG-NEXT: ALU 3, @15, KC0[CB0:0-32], KC1[] 575; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 576; EG-NEXT: CF_END 577; EG-NEXT: Fetch clause starting at 8: 578; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 0, #1 579; EG-NEXT: Fetch clause starting at 10: 580; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 16, #1 581; EG-NEXT: ALU clause starting at 12: 582; EG-NEXT: MOV * T0.X, KC0[2].Z, 583; EG-NEXT: ALU clause starting at 13: 584; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, 585; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 586; EG-NEXT: ALU clause starting at 15: 587; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, 588; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 589; EG-NEXT: LSHR * T1.X, PV.W, literal.x, 590; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 591; 592; CM-LABEL: global_load_v16i16: 593; CM: ; %bb.0: ; %entry 594; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 595; CM-NEXT: TEX 0 @8 596; CM-NEXT: ALU 1, @13, KC0[CB0:0-32], KC1[] 597; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T2.X 598; CM-NEXT: TEX 0 @10 599; CM-NEXT: ALU 3, @15, KC0[CB0:0-32], KC1[] 600; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 601; CM-NEXT: CF_END 602; CM-NEXT: Fetch clause starting at 8: 603; CM-NEXT: VTX_READ_128 T1.XYZW, T0.X, 0, #1 604; CM-NEXT: Fetch clause starting at 10: 605; CM-NEXT: VTX_READ_128 T0.XYZW, T0.X, 16, #1 606; CM-NEXT: ALU clause starting at 12: 607; CM-NEXT: MOV * T0.X, KC0[2].Z, 608; CM-NEXT: ALU clause starting at 13: 609; CM-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, 610; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 611; CM-NEXT: ALU clause starting at 15: 612; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, 613; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 614; CM-NEXT: LSHR * T1.X, PV.W, literal.x, 615; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 616entry: 617 %ld = load <16 x i16>, ptr addrspace(1) %in 618 store <16 x i16> %ld, ptr addrspace(1) %out 619 ret void 620} 621 622define amdgpu_kernel void @global_load_v16i16_align2(ptr addrspace(1) %in, ptr addrspace(1) %out) #0 { 623; GCN-NOHSA-SI-LABEL: global_load_v16i16_align2: 624; GCN-NOHSA-SI: ; %bb.0: ; %entry 625; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 626; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, 0xf000 627; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, -1 628; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, s10 629; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, s11 630; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 631; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s4 632; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s5 633; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s6 634; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s7 635; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 636; GCN-NOHSA-SI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 offset:2 637; GCN-NOHSA-SI-NEXT: buffer_load_ushort v4, off, s[8:11], 0 offset:4 638; GCN-NOHSA-SI-NEXT: buffer_load_ushort v2, off, s[8:11], 0 offset:6 639; GCN-NOHSA-SI-NEXT: buffer_load_ushort v5, off, s[8:11], 0 offset:8 640; GCN-NOHSA-SI-NEXT: buffer_load_ushort v3, off, s[8:11], 0 offset:10 641; GCN-NOHSA-SI-NEXT: buffer_load_ushort v6, off, s[8:11], 0 offset:12 642; GCN-NOHSA-SI-NEXT: buffer_load_ushort v7, off, s[8:11], 0 offset:14 643; GCN-NOHSA-SI-NEXT: buffer_load_ushort v8, off, s[8:11], 0 offset:16 644; GCN-NOHSA-SI-NEXT: buffer_load_ushort v9, off, s[8:11], 0 offset:18 645; GCN-NOHSA-SI-NEXT: buffer_load_ushort v10, off, s[8:11], 0 offset:20 646; GCN-NOHSA-SI-NEXT: buffer_load_ushort v11, off, s[8:11], 0 offset:22 647; GCN-NOHSA-SI-NEXT: buffer_load_ushort v12, off, s[8:11], 0 offset:24 648; GCN-NOHSA-SI-NEXT: buffer_load_ushort v13, off, s[8:11], 0 offset:26 649; GCN-NOHSA-SI-NEXT: buffer_load_ushort v14, off, s[8:11], 0 offset:28 650; GCN-NOHSA-SI-NEXT: buffer_load_ushort v15, off, s[8:11], 0 offset:30 651; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(8) 652; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v7, 16, v7 653; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v16, 16, v3 654; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v17, 16, v2 655; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v18, 16, v1 656; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 657; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v15, 16, v15 658; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v13, 16, v13 659; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v11, 16, v11 660; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v9, 16, v9 661; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v3, v7, v6 662; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v2, v16, v5 663; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v1, v17, v4 664; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v0, v18, v0 665; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v7, v15, v14 666; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v6, v13, v12 667; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v5, v11, v10 668; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v4, v9, v8 669; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 670; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 671; GCN-NOHSA-SI-NEXT: s_endpgm 672; 673; GCN-HSA-LABEL: global_load_v16i16_align2: 674; GCN-HSA: ; %bb.0: ; %entry 675; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 676; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 677; GCN-HSA-NEXT: s_add_u32 s4, s2, 16 678; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 679; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 680; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 681; GCN-HSA-NEXT: s_add_u32 s0, s0, 16 682; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 683; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 684; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 685; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 686; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 687; GCN-HSA-NEXT: v_mov_b32_e32 v11, s5 688; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 689; GCN-HSA-NEXT: v_mov_b32_e32 v10, s4 690; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 691; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 692; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[0:3] 693; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 694; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 695; GCN-HSA-NEXT: s_endpgm 696; 697; GCN-NOHSA-VI-LABEL: global_load_v16i16_align2: 698; GCN-NOHSA-VI: ; %bb.0: ; %entry 699; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 700; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 701; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 702; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 703; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 704; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 705; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 offset:14 706; GCN-NOHSA-VI-NEXT: buffer_load_ushort v1, off, s[4:7], 0 offset:10 707; GCN-NOHSA-VI-NEXT: buffer_load_ushort v2, off, s[4:7], 0 offset:6 708; GCN-NOHSA-VI-NEXT: buffer_load_ushort v3, off, s[4:7], 0 offset:2 709; GCN-NOHSA-VI-NEXT: buffer_load_ushort v4, off, s[4:7], 0 offset:30 710; GCN-NOHSA-VI-NEXT: buffer_load_ushort v5, off, s[4:7], 0 offset:26 711; GCN-NOHSA-VI-NEXT: buffer_load_ushort v6, off, s[4:7], 0 offset:22 712; GCN-NOHSA-VI-NEXT: buffer_load_ushort v7, off, s[4:7], 0 offset:18 713; GCN-NOHSA-VI-NEXT: buffer_load_ushort v8, off, s[4:7], 0 offset:12 714; GCN-NOHSA-VI-NEXT: buffer_load_ushort v9, off, s[4:7], 0 offset:8 715; GCN-NOHSA-VI-NEXT: buffer_load_ushort v10, off, s[4:7], 0 offset:4 716; GCN-NOHSA-VI-NEXT: buffer_load_ushort v11, off, s[4:7], 0 717; GCN-NOHSA-VI-NEXT: buffer_load_ushort v12, off, s[4:7], 0 offset:28 718; GCN-NOHSA-VI-NEXT: buffer_load_ushort v13, off, s[4:7], 0 offset:24 719; GCN-NOHSA-VI-NEXT: buffer_load_ushort v14, off, s[4:7], 0 offset:20 720; GCN-NOHSA-VI-NEXT: buffer_load_ushort v15, off, s[4:7], 0 offset:16 721; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s2 722; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s3 723; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(14) 724; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 725; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 726; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(13) 727; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v16, 16, v2 728; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(12) 729; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v17, 16, v3 730; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(11) 731; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v4, 16, v4 732; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(10) 733; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 734; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(9) 735; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v18, 16, v6 736; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(8) 737; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v19, 16, v7 738; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(7) 739; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v3, v8, v0 740; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(6) 741; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v2, v9, v1 742; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(5) 743; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v1, v10, v16 744; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(4) 745; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v0, v11, v17 746; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 747; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v7, v12, v4 748; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(2) 749; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v6, v13, v5 750; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 751; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v5, v14, v18 752; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 753; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v4, v15, v19 754; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 755; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 756; GCN-NOHSA-VI-NEXT: s_endpgm 757; 758; EG-LABEL: global_load_v16i16_align2: 759; EG: ; %bb.0: ; %entry 760; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 761; EG-NEXT: TEX 1 @6 762; EG-NEXT: ALU 4, @11, KC0[CB0:0-32], KC1[] 763; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T3.X, 0 764; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1 765; EG-NEXT: CF_END 766; EG-NEXT: Fetch clause starting at 6: 767; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 768; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 769; EG-NEXT: ALU clause starting at 10: 770; EG-NEXT: MOV * T0.X, KC0[2].Y, 771; EG-NEXT: ALU clause starting at 11: 772; EG-NEXT: LSHR T2.X, KC0[2].Z, literal.x, 773; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.y, 774; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 775; EG-NEXT: LSHR * T3.X, PV.W, literal.x, 776; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 777; 778; CM-LABEL: global_load_v16i16_align2: 779; CM: ; %bb.0: ; %entry 780; CM-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 781; CM-NEXT: TEX 1 @6 782; CM-NEXT: ALU 5, @11, KC0[CB0:0-32], KC1[] 783; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T3.X 784; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T2.X 785; CM-NEXT: CF_END 786; CM-NEXT: Fetch clause starting at 6: 787; CM-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 788; CM-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 789; CM-NEXT: ALU clause starting at 10: 790; CM-NEXT: MOV * T0.X, KC0[2].Y, 791; CM-NEXT: ALU clause starting at 11: 792; CM-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, 793; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 794; CM-NEXT: LSHR * T2.X, PV.W, literal.x, 795; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 796; CM-NEXT: LSHR * T3.X, KC0[2].Z, literal.x, 797; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 798entry: 799 %ld = load <16 x i16>, ptr addrspace(1) %in, align 2 800 store <16 x i16> %ld, ptr addrspace(1) %out, align 32 801 ret void 802} 803 804define amdgpu_kernel void @global_zextload_i16_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 805; GCN-NOHSA-SI-LABEL: global_zextload_i16_to_i32: 806; GCN-NOHSA-SI: ; %bb.0: 807; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 808; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 809; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 810; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 811; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 812; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 813; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 814; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 815; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 816; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 817; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 818; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 819; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 820; GCN-NOHSA-SI-NEXT: s_endpgm 821; 822; GCN-HSA-LABEL: global_zextload_i16_to_i32: 823; GCN-HSA: ; %bb.0: 824; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 825; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 826; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 827; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 828; GCN-HSA-NEXT: flat_load_ushort v2, v[0:1] 829; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 830; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 831; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 832; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 833; GCN-HSA-NEXT: s_endpgm 834; 835; GCN-NOHSA-VI-LABEL: global_zextload_i16_to_i32: 836; GCN-NOHSA-VI: ; %bb.0: 837; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 838; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 839; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 840; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 841; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 842; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 843; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 844; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 845; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 846; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 847; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 848; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 849; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 850; GCN-NOHSA-VI-NEXT: s_endpgm 851; 852; EG-LABEL: global_zextload_i16_to_i32: 853; EG: ; %bb.0: 854; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 855; EG-NEXT: TEX 0 @6 856; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 857; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 858; EG-NEXT: CF_END 859; EG-NEXT: PAD 860; EG-NEXT: Fetch clause starting at 6: 861; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 862; EG-NEXT: ALU clause starting at 8: 863; EG-NEXT: MOV * T0.X, KC0[2].Z, 864; EG-NEXT: ALU clause starting at 9: 865; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 866; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 867; 868; CM-LABEL: global_zextload_i16_to_i32: 869; CM: ; %bb.0: 870; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 871; CM-NEXT: TEX 0 @6 872; CM-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 873; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 874; CM-NEXT: CF_END 875; CM-NEXT: PAD 876; CM-NEXT: Fetch clause starting at 6: 877; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 878; CM-NEXT: ALU clause starting at 8: 879; CM-NEXT: MOV * T0.X, KC0[2].Z, 880; CM-NEXT: ALU clause starting at 9: 881; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 882; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 883 %a = load i16, ptr addrspace(1) %in 884 %ext = zext i16 %a to i32 885 store i32 %ext, ptr addrspace(1) %out 886 ret void 887} 888 889define amdgpu_kernel void @global_sextload_i16_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 890; GCN-NOHSA-SI-LABEL: global_sextload_i16_to_i32: 891; GCN-NOHSA-SI: ; %bb.0: 892; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 893; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 894; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 895; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 896; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 897; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 898; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 899; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 900; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 901; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 902; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 903; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 904; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 905; GCN-NOHSA-SI-NEXT: s_endpgm 906; 907; GCN-HSA-LABEL: global_sextload_i16_to_i32: 908; GCN-HSA: ; %bb.0: 909; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 910; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 911; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 912; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 913; GCN-HSA-NEXT: flat_load_sshort v2, v[0:1] 914; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 915; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 916; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 917; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 918; GCN-HSA-NEXT: s_endpgm 919; 920; GCN-NOHSA-VI-LABEL: global_sextload_i16_to_i32: 921; GCN-NOHSA-VI: ; %bb.0: 922; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 923; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 924; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 925; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 926; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 927; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 928; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 929; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 930; GCN-NOHSA-VI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 931; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 932; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 933; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 934; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 935; GCN-NOHSA-VI-NEXT: s_endpgm 936; 937; EG-LABEL: global_sextload_i16_to_i32: 938; EG: ; %bb.0: 939; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 940; EG-NEXT: TEX 0 @6 941; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 942; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 943; EG-NEXT: CF_END 944; EG-NEXT: PAD 945; EG-NEXT: Fetch clause starting at 6: 946; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 947; EG-NEXT: ALU clause starting at 8: 948; EG-NEXT: MOV * T0.X, KC0[2].Z, 949; EG-NEXT: ALU clause starting at 9: 950; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 951; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 952; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 953; 954; CM-LABEL: global_sextload_i16_to_i32: 955; CM: ; %bb.0: 956; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 957; CM-NEXT: TEX 0 @6 958; CM-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[] 959; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 960; CM-NEXT: CF_END 961; CM-NEXT: PAD 962; CM-NEXT: Fetch clause starting at 6: 963; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 964; CM-NEXT: ALU clause starting at 8: 965; CM-NEXT: MOV * T0.X, KC0[2].Z, 966; CM-NEXT: ALU clause starting at 9: 967; CM-NEXT: BFE_INT * T0.X, T0.X, 0.0, literal.x, 968; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 969; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 970; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 971 %a = load i16, ptr addrspace(1) %in 972 %ext = sext i16 %a to i32 973 store i32 %ext, ptr addrspace(1) %out 974 ret void 975} 976 977define amdgpu_kernel void @global_zextload_v1i16_to_v1i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 978; GCN-NOHSA-SI-LABEL: global_zextload_v1i16_to_v1i32: 979; GCN-NOHSA-SI: ; %bb.0: 980; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 981; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 982; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 983; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 984; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 985; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 986; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 987; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 988; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 989; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 990; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 991; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 992; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 993; GCN-NOHSA-SI-NEXT: s_endpgm 994; 995; GCN-HSA-LABEL: global_zextload_v1i16_to_v1i32: 996; GCN-HSA: ; %bb.0: 997; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 998; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 999; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1000; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1001; GCN-HSA-NEXT: flat_load_ushort v2, v[0:1] 1002; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 1003; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 1004; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1005; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 1006; GCN-HSA-NEXT: s_endpgm 1007; 1008; GCN-NOHSA-VI-LABEL: global_zextload_v1i16_to_v1i32: 1009; GCN-NOHSA-VI: ; %bb.0: 1010; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1011; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 1012; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 1013; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 1014; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 1015; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1016; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 1017; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 1018; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 1019; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 1020; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 1021; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1022; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 1023; GCN-NOHSA-VI-NEXT: s_endpgm 1024; 1025; EG-LABEL: global_zextload_v1i16_to_v1i32: 1026; EG: ; %bb.0: 1027; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1028; EG-NEXT: TEX 0 @6 1029; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 1030; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 1031; EG-NEXT: CF_END 1032; EG-NEXT: PAD 1033; EG-NEXT: Fetch clause starting at 6: 1034; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 1035; EG-NEXT: ALU clause starting at 8: 1036; EG-NEXT: MOV * T0.X, KC0[2].Z, 1037; EG-NEXT: ALU clause starting at 9: 1038; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1039; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1040; 1041; CM-LABEL: global_zextload_v1i16_to_v1i32: 1042; CM: ; %bb.0: 1043; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1044; CM-NEXT: TEX 0 @6 1045; CM-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 1046; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 1047; CM-NEXT: CF_END 1048; CM-NEXT: PAD 1049; CM-NEXT: Fetch clause starting at 6: 1050; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 1051; CM-NEXT: ALU clause starting at 8: 1052; CM-NEXT: MOV * T0.X, KC0[2].Z, 1053; CM-NEXT: ALU clause starting at 9: 1054; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1055; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1056 %load = load <1 x i16>, ptr addrspace(1) %in 1057 %ext = zext <1 x i16> %load to <1 x i32> 1058 store <1 x i32> %ext, ptr addrspace(1) %out 1059 ret void 1060} 1061 1062define amdgpu_kernel void @global_sextload_v1i16_to_v1i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 1063; GCN-NOHSA-SI-LABEL: global_sextload_v1i16_to_v1i32: 1064; GCN-NOHSA-SI: ; %bb.0: 1065; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1066; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1067; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1068; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1069; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1070; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1071; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1072; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1073; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 1074; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1075; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1076; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1077; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 1078; GCN-NOHSA-SI-NEXT: s_endpgm 1079; 1080; GCN-HSA-LABEL: global_sextload_v1i16_to_v1i32: 1081; GCN-HSA: ; %bb.0: 1082; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1083; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1084; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1085; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1086; GCN-HSA-NEXT: flat_load_sshort v2, v[0:1] 1087; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 1088; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 1089; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1090; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 1091; GCN-HSA-NEXT: s_endpgm 1092; 1093; GCN-NOHSA-VI-LABEL: global_sextload_v1i16_to_v1i32: 1094; GCN-NOHSA-VI: ; %bb.0: 1095; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1096; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 1097; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 1098; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 1099; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 1100; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1101; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 1102; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 1103; GCN-NOHSA-VI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 1104; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 1105; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 1106; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1107; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 1108; GCN-NOHSA-VI-NEXT: s_endpgm 1109; 1110; EG-LABEL: global_sextload_v1i16_to_v1i32: 1111; EG: ; %bb.0: 1112; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1113; EG-NEXT: TEX 0 @6 1114; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 1115; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 1116; EG-NEXT: CF_END 1117; EG-NEXT: PAD 1118; EG-NEXT: Fetch clause starting at 6: 1119; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 1120; EG-NEXT: ALU clause starting at 8: 1121; EG-NEXT: MOV * T0.X, KC0[2].Z, 1122; EG-NEXT: ALU clause starting at 9: 1123; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 1124; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 1125; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 1126; 1127; CM-LABEL: global_sextload_v1i16_to_v1i32: 1128; CM: ; %bb.0: 1129; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1130; CM-NEXT: TEX 0 @6 1131; CM-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[] 1132; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 1133; CM-NEXT: CF_END 1134; CM-NEXT: PAD 1135; CM-NEXT: Fetch clause starting at 6: 1136; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 1137; CM-NEXT: ALU clause starting at 8: 1138; CM-NEXT: MOV * T0.X, KC0[2].Z, 1139; CM-NEXT: ALU clause starting at 9: 1140; CM-NEXT: BFE_INT * T0.X, T0.X, 0.0, literal.x, 1141; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1142; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1143; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1144 %load = load <1 x i16>, ptr addrspace(1) %in 1145 %ext = sext <1 x i16> %load to <1 x i32> 1146 store <1 x i32> %ext, ptr addrspace(1) %out 1147 ret void 1148} 1149 1150define amdgpu_kernel void @global_zextload_v2i16_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 1151; GCN-NOHSA-SI-LABEL: global_zextload_v2i16_to_v2i32: 1152; GCN-NOHSA-SI: ; %bb.0: 1153; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1154; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1155; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1156; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1157; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1158; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1159; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1160; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1161; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 1162; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1163; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1164; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1165; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1166; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 1167; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1168; GCN-NOHSA-SI-NEXT: s_endpgm 1169; 1170; GCN-HSA-LABEL: global_zextload_v2i16_to_v2i32: 1171; GCN-HSA: ; %bb.0: 1172; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1173; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1174; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1175; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1176; GCN-HSA-NEXT: flat_load_dword v2, v[0:1] 1177; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 1178; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 1179; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1180; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v2 1181; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v2 1182; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1183; GCN-HSA-NEXT: s_endpgm 1184; 1185; GCN-NOHSA-VI-LABEL: global_zextload_v2i16_to_v2i32: 1186; GCN-NOHSA-VI: ; %bb.0: 1187; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1188; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 1189; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 1190; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 1191; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 1192; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1193; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 1194; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 1195; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 1196; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 1197; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 1198; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1199; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1200; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 1201; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1202; GCN-NOHSA-VI-NEXT: s_endpgm 1203; 1204; EG-LABEL: global_zextload_v2i16_to_v2i32: 1205; EG: ; %bb.0: 1206; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1207; EG-NEXT: TEX 0 @6 1208; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 1209; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1 1210; EG-NEXT: CF_END 1211; EG-NEXT: PAD 1212; EG-NEXT: Fetch clause starting at 6: 1213; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 1214; EG-NEXT: ALU clause starting at 8: 1215; EG-NEXT: MOV * T4.X, KC0[2].Z, 1216; EG-NEXT: ALU clause starting at 9: 1217; EG-NEXT: LSHR * T4.Y, T4.X, literal.x, 1218; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1219; EG-NEXT: AND_INT T4.X, T4.X, literal.x, 1220; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 1221; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 1222; 1223; CM-LABEL: global_zextload_v2i16_to_v2i32: 1224; CM: ; %bb.0: 1225; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1226; CM-NEXT: TEX 0 @6 1227; CM-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[] 1228; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T4, T5.X 1229; CM-NEXT: CF_END 1230; CM-NEXT: PAD 1231; CM-NEXT: Fetch clause starting at 6: 1232; CM-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 1233; CM-NEXT: ALU clause starting at 8: 1234; CM-NEXT: MOV * T4.X, KC0[2].Z, 1235; CM-NEXT: ALU clause starting at 9: 1236; CM-NEXT: LSHR * T4.Y, T4.X, literal.x, 1237; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1238; CM-NEXT: AND_INT * T4.X, T4.X, literal.x, 1239; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1240; CM-NEXT: LSHR * T5.X, KC0[2].Y, literal.x, 1241; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1242 %load = load <2 x i16>, ptr addrspace(1) %in 1243 %ext = zext <2 x i16> %load to <2 x i32> 1244 store <2 x i32> %ext, ptr addrspace(1) %out 1245 ret void 1246} 1247 1248; TODO: This should use ASHR instead of LSHR + BFE 1249define amdgpu_kernel void @global_sextload_v2i16_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 1250; GCN-NOHSA-SI-LABEL: global_sextload_v2i16_to_v2i32: 1251; GCN-NOHSA-SI: ; %bb.0: 1252; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1253; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1254; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1255; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1256; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1257; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1258; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1259; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1260; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 1261; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1262; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1263; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1264; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v0 1265; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v0, 0, 16 1266; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1267; GCN-NOHSA-SI-NEXT: s_endpgm 1268; 1269; GCN-HSA-LABEL: global_sextload_v2i16_to_v2i32: 1270; GCN-HSA: ; %bb.0: 1271; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1272; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1273; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1274; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1275; GCN-HSA-NEXT: flat_load_dword v2, v[0:1] 1276; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 1277; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 1278; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1279; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v2 1280; GCN-HSA-NEXT: v_bfe_i32 v2, v2, 0, 16 1281; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1282; GCN-HSA-NEXT: s_endpgm 1283; 1284; GCN-NOHSA-VI-LABEL: global_sextload_v2i16_to_v2i32: 1285; GCN-NOHSA-VI: ; %bb.0: 1286; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1287; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 1288; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 1289; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 1290; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 1291; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1292; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 1293; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 1294; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 1295; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 1296; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 1297; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1298; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v0 1299; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v0, 0, 16 1300; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1301; GCN-NOHSA-VI-NEXT: s_endpgm 1302; 1303; EG-LABEL: global_sextload_v2i16_to_v2i32: 1304; EG: ; %bb.0: 1305; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1306; EG-NEXT: TEX 0 @6 1307; EG-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[] 1308; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1 1309; EG-NEXT: CF_END 1310; EG-NEXT: PAD 1311; EG-NEXT: Fetch clause starting at 6: 1312; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 1313; EG-NEXT: ALU clause starting at 8: 1314; EG-NEXT: MOV * T4.X, KC0[2].Z, 1315; EG-NEXT: ALU clause starting at 9: 1316; EG-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x, 1317; EG-NEXT: LSHR T0.W, T4.X, literal.x, 1318; EG-NEXT: LSHR * T4.X, KC0[2].Y, literal.y, 1319; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 1320; EG-NEXT: BFE_INT * T5.Y, PV.W, 0.0, literal.x, 1321; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1322; 1323; CM-LABEL: global_sextload_v2i16_to_v2i32: 1324; CM: ; %bb.0: 1325; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1326; CM-NEXT: TEX 0 @6 1327; CM-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[] 1328; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T5, T4.X 1329; CM-NEXT: CF_END 1330; CM-NEXT: PAD 1331; CM-NEXT: Fetch clause starting at 6: 1332; CM-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 1333; CM-NEXT: ALU clause starting at 8: 1334; CM-NEXT: MOV * T4.X, KC0[2].Z, 1335; CM-NEXT: ALU clause starting at 9: 1336; CM-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x, 1337; CM-NEXT: LSHR * T0.W, T4.X, literal.x, 1338; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1339; CM-NEXT: LSHR T4.X, KC0[2].Y, literal.x, 1340; CM-NEXT: BFE_INT * T5.Y, PV.W, 0.0, literal.y, 1341; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1342 %load = load <2 x i16>, ptr addrspace(1) %in 1343 %ext = sext <2 x i16> %load to <2 x i32> 1344 store <2 x i32> %ext, ptr addrspace(1) %out 1345 ret void 1346} 1347 1348define amdgpu_kernel void @global_zextload_v3i16_to_v3i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { 1349; GCN-NOHSA-SI-LABEL: global_zextload_v3i16_to_v3i32: 1350; GCN-NOHSA-SI: ; %bb.0: ; %entry 1351; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1352; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1353; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1354; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1355; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1356; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1357; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1358; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1359; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 1360; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1361; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1362; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1363; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v0 1364; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, 0xffff, v0 1365; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v1 1366; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:8 1367; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[2:3], off, s[4:7], 0 1368; GCN-NOHSA-SI-NEXT: s_endpgm 1369; 1370; GCN-HSA-LABEL: global_zextload_v3i16_to_v3i32: 1371; GCN-HSA: ; %bb.0: ; %entry 1372; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1373; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1374; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1375; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1376; GCN-HSA-NEXT: flat_load_dwordx2 v[3:4], v[0:1] 1377; GCN-HSA-NEXT: v_mov_b32_e32 v5, s0 1378; GCN-HSA-NEXT: v_mov_b32_e32 v6, s1 1379; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1380; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v3 1381; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v4 1382; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v3 1383; GCN-HSA-NEXT: flat_store_dwordx3 v[5:6], v[0:2] 1384; GCN-HSA-NEXT: s_endpgm 1385; 1386; GCN-NOHSA-VI-LABEL: global_zextload_v3i16_to_v3i32: 1387; GCN-NOHSA-VI: ; %bb.0: ; %entry 1388; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1389; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 1390; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 1391; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 1392; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 1393; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1394; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 1395; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 1396; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 1397; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 1398; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 1399; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1400; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, 0xffff, v1 1401; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1402; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 1403; GCN-NOHSA-VI-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0 1404; GCN-NOHSA-VI-NEXT: s_endpgm 1405; 1406; EG-LABEL: global_zextload_v3i16_to_v3i32: 1407; EG: ; %bb.0: ; %entry 1408; EG-NEXT: ALU 4, @12, KC0[CB0:0-32], KC1[] 1409; EG-NEXT: TEX 2 @6 1410; EG-NEXT: ALU 2, @17, KC0[], KC1[] 1411; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T4.X, 0 1412; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XY, T0.X, 1 1413; EG-NEXT: CF_END 1414; EG-NEXT: Fetch clause starting at 6: 1415; EG-NEXT: VTX_READ_16 T2.X, T1.X, 4, #1 1416; EG-NEXT: VTX_READ_16 T3.X, T1.X, 0, #1 1417; EG-NEXT: VTX_READ_16 T1.X, T1.X, 2, #1 1418; EG-NEXT: ALU clause starting at 12: 1419; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, 1420; EG-NEXT: MOV * T1.X, KC0[2].Z, 1421; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1422; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 1423; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 1424; EG-NEXT: ALU clause starting at 17: 1425; EG-NEXT: LSHR T4.X, T0.W, literal.x, 1426; EG-NEXT: MOV * T3.Y, T1.X, 1427; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1428; 1429; CM-LABEL: global_zextload_v3i16_to_v3i32: 1430; CM: ; %bb.0: ; %entry 1431; CM-NEXT: ALU 4, @12, KC0[CB0:0-32], KC1[] 1432; CM-NEXT: TEX 2 @6 1433; CM-NEXT: ALU 2, @17, KC0[CB0:0-32], KC1[] 1434; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T3, T4.X 1435; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2.X, T0.X 1436; CM-NEXT: CF_END 1437; CM-NEXT: Fetch clause starting at 6: 1438; CM-NEXT: VTX_READ_16 T2.X, T1.X, 4, #1 1439; CM-NEXT: VTX_READ_16 T3.X, T1.X, 0, #1 1440; CM-NEXT: VTX_READ_16 T1.X, T1.X, 2, #1 1441; CM-NEXT: ALU clause starting at 12: 1442; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 1443; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) 1444; CM-NEXT: LSHR * T0.X, PV.W, literal.x, 1445; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1446; CM-NEXT: MOV * T1.X, KC0[2].Z, 1447; CM-NEXT: ALU clause starting at 17: 1448; CM-NEXT: LSHR T4.X, KC0[2].Y, literal.x, 1449; CM-NEXT: MOV * T3.Y, T1.X, 1450; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1451entry: 1452 %ld = load <3 x i16>, ptr addrspace(1) %in 1453 %ext = zext <3 x i16> %ld to <3 x i32> 1454 store <3 x i32> %ext, ptr addrspace(1) %out 1455 ret void 1456} 1457 1458define amdgpu_kernel void @global_sextload_v3i16_to_v3i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { 1459; GCN-NOHSA-SI-LABEL: global_sextload_v3i16_to_v3i32: 1460; GCN-NOHSA-SI: ; %bb.0: ; %entry 1461; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1462; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1463; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1464; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1465; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1466; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1467; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1468; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1469; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 1470; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1471; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1472; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1473; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v0 1474; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v0, 0, 16 1475; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v1, 0, 16 1476; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:8 1477; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[2:3], off, s[4:7], 0 1478; GCN-NOHSA-SI-NEXT: s_endpgm 1479; 1480; GCN-HSA-LABEL: global_sextload_v3i16_to_v3i32: 1481; GCN-HSA: ; %bb.0: ; %entry 1482; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1483; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1484; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1485; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1486; GCN-HSA-NEXT: flat_load_dwordx2 v[3:4], v[0:1] 1487; GCN-HSA-NEXT: v_mov_b32_e32 v5, s0 1488; GCN-HSA-NEXT: v_mov_b32_e32 v6, s1 1489; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1490; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v3 1491; GCN-HSA-NEXT: v_bfe_i32 v2, v4, 0, 16 1492; GCN-HSA-NEXT: v_bfe_i32 v0, v3, 0, 16 1493; GCN-HSA-NEXT: flat_store_dwordx3 v[5:6], v[0:2] 1494; GCN-HSA-NEXT: s_endpgm 1495; 1496; GCN-NOHSA-VI-LABEL: global_sextload_v3i16_to_v3i32: 1497; GCN-NOHSA-VI: ; %bb.0: ; %entry 1498; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1499; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 1500; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 1501; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 1502; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 1503; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1504; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 1505; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 1506; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[3:4], off, s[8:11], 0 1507; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 1508; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 1509; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1510; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v3 1511; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v4, 0, 16 1512; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v3, 0, 16 1513; GCN-NOHSA-VI-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0 1514; GCN-NOHSA-VI-NEXT: s_endpgm 1515; 1516; EG-LABEL: global_sextload_v3i16_to_v3i32: 1517; EG: ; %bb.0: ; %entry 1518; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 1519; EG-NEXT: TEX 2 @6 1520; EG-NEXT: ALU 9, @13, KC0[CB0:0-32], KC1[] 1521; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0 1522; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1523; EG-NEXT: CF_END 1524; EG-NEXT: Fetch clause starting at 6: 1525; EG-NEXT: VTX_READ_16 T1.X, T0.X, 2, #1 1526; EG-NEXT: VTX_READ_16 T2.X, T0.X, 4, #1 1527; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 1528; EG-NEXT: ALU clause starting at 12: 1529; EG-NEXT: MOV * T0.X, KC0[2].Z, 1530; EG-NEXT: ALU clause starting at 13: 1531; EG-NEXT: BFE_INT * T0.Y, T1.X, 0.0, literal.x, 1532; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1533; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 1534; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 1535; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 1536; EG-NEXT: BFE_INT T2.X, T2.X, 0.0, literal.x, 1537; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1538; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) 1539; EG-NEXT: LSHR * T3.X, PV.W, literal.x, 1540; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1541; 1542; CM-LABEL: global_sextload_v3i16_to_v3i32: 1543; CM: ; %bb.0: ; %entry 1544; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 1545; CM-NEXT: TEX 2 @6 1546; CM-NEXT: ALU 9, @13, KC0[CB0:0-32], KC1[] 1547; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T2.X 1548; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T3.X 1549; CM-NEXT: CF_END 1550; CM-NEXT: Fetch clause starting at 6: 1551; CM-NEXT: VTX_READ_16 T1.X, T0.X, 4, #1 1552; CM-NEXT: VTX_READ_16 T2.X, T0.X, 0, #1 1553; CM-NEXT: VTX_READ_16 T0.X, T0.X, 2, #1 1554; CM-NEXT: ALU clause starting at 12: 1555; CM-NEXT: MOV * T0.X, KC0[2].Z, 1556; CM-NEXT: ALU clause starting at 13: 1557; CM-NEXT: BFE_INT T1.X, T1.X, 0.0, literal.x, 1558; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1559; CM-NEXT: 16(2.242078e-44), 8(1.121039e-44) 1560; CM-NEXT: LSHR T3.X, PV.W, literal.x, 1561; CM-NEXT: BFE_INT * T0.Y, T0.X, 0.0, literal.y, 1562; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1563; CM-NEXT: BFE_INT * T0.X, T2.X, 0.0, literal.x, 1564; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1565; CM-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, 1566; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1567entry: 1568 %ld = load <3 x i16>, ptr addrspace(1) %in 1569 %ext = sext <3 x i16> %ld to <3 x i32> 1570 store <3 x i32> %ext, ptr addrspace(1) %out 1571 ret void 1572} 1573 1574; TODO: This should use DST, but for some there are redundant MOVs 1575define amdgpu_kernel void @global_zextload_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 1576; GCN-NOHSA-SI-LABEL: global_zextload_v4i16_to_v4i32: 1577; GCN-NOHSA-SI: ; %bb.0: 1578; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1579; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1580; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1581; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1582; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1583; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1584; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1585; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1586; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[4:5], off, s[8:11], 0 1587; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1588; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1589; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1590; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v5 1591; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v4 1592; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, 0xffff, v5 1593; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v4 1594; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1595; GCN-NOHSA-SI-NEXT: s_endpgm 1596; 1597; GCN-HSA-LABEL: global_zextload_v4i16_to_v4i32: 1598; GCN-HSA: ; %bb.0: 1599; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1600; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1601; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1602; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1603; GCN-HSA-NEXT: flat_load_dwordx2 v[4:5], v[0:1] 1604; GCN-HSA-NEXT: v_mov_b32_e32 v6, s0 1605; GCN-HSA-NEXT: v_mov_b32_e32 v7, s1 1606; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1607; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v5 1608; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v4 1609; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v5 1610; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v4 1611; GCN-HSA-NEXT: flat_store_dwordx4 v[6:7], v[0:3] 1612; GCN-HSA-NEXT: s_endpgm 1613; 1614; GCN-NOHSA-VI-LABEL: global_zextload_v4i16_to_v4i32: 1615; GCN-NOHSA-VI: ; %bb.0: 1616; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1617; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 1618; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 1619; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 1620; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 1621; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1622; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 1623; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 1624; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 1625; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 1626; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 1627; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1628; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v1 1629; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, 0xffff, v1 1630; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1631; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 1632; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1633; GCN-NOHSA-VI-NEXT: s_endpgm 1634; 1635; EG-LABEL: global_zextload_v4i16_to_v4i32: 1636; EG: ; %bb.0: 1637; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1638; EG-NEXT: TEX 0 @6 1639; EG-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[] 1640; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1 1641; EG-NEXT: CF_END 1642; EG-NEXT: PAD 1643; EG-NEXT: Fetch clause starting at 6: 1644; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1645; EG-NEXT: ALU clause starting at 8: 1646; EG-NEXT: MOV * T5.X, KC0[2].Z, 1647; EG-NEXT: ALU clause starting at 9: 1648; EG-NEXT: LSHR * T5.W, T5.Y, literal.x, 1649; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1650; EG-NEXT: AND_INT * T5.Z, T5.Y, literal.x, 1651; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1652; EG-NEXT: LSHR * T5.Y, T5.X, literal.x, 1653; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1654; EG-NEXT: AND_INT T5.X, T5.X, literal.x, 1655; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.y, 1656; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 1657; 1658; CM-LABEL: global_zextload_v4i16_to_v4i32: 1659; CM: ; %bb.0: 1660; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1661; CM-NEXT: TEX 0 @6 1662; CM-NEXT: ALU 9, @9, KC0[CB0:0-32], KC1[] 1663; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T5, T6.X 1664; CM-NEXT: CF_END 1665; CM-NEXT: PAD 1666; CM-NEXT: Fetch clause starting at 6: 1667; CM-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1668; CM-NEXT: ALU clause starting at 8: 1669; CM-NEXT: MOV * T5.X, KC0[2].Z, 1670; CM-NEXT: ALU clause starting at 9: 1671; CM-NEXT: LSHR * T5.W, T5.Y, literal.x, 1672; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1673; CM-NEXT: AND_INT * T5.Z, T5.Y, literal.x, 1674; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1675; CM-NEXT: LSHR * T5.Y, T5.X, literal.x, 1676; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1677; CM-NEXT: AND_INT * T5.X, T5.X, literal.x, 1678; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1679; CM-NEXT: LSHR * T6.X, KC0[2].Y, literal.x, 1680; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1681 %load = load <4 x i16>, ptr addrspace(1) %in 1682 %ext = zext <4 x i16> %load to <4 x i32> 1683 store <4 x i32> %ext, ptr addrspace(1) %out 1684 ret void 1685} 1686 1687; TODO: We should use ASHR instead of LSHR + BFE 1688; TODO: This should use DST, but for some there are redundant MOVs 1689define amdgpu_kernel void @global_sextload_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 1690; GCN-NOHSA-SI-LABEL: global_sextload_v4i16_to_v4i32: 1691; GCN-NOHSA-SI: ; %bb.0: 1692; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1693; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1694; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1695; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1696; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1697; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1698; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1699; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1700; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[3:4], off, s[8:11], 0 1701; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1702; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1703; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1704; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v3 1705; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[5:6], v[3:4], 48 1706; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v4, 0, 16 1707; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v3, 0, 16 1708; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v5 1709; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1710; GCN-NOHSA-SI-NEXT: s_endpgm 1711; 1712; GCN-HSA-LABEL: global_sextload_v4i16_to_v4i32: 1713; GCN-HSA: ; %bb.0: 1714; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1715; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1716; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1717; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1718; GCN-HSA-NEXT: flat_load_dwordx2 v[3:4], v[0:1] 1719; GCN-HSA-NEXT: v_mov_b32_e32 v5, s0 1720; GCN-HSA-NEXT: v_mov_b32_e32 v6, s1 1721; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1722; GCN-HSA-NEXT: v_ashr_i64 v[7:8], v[3:4], 48 1723; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v3 1724; GCN-HSA-NEXT: v_bfe_i32 v2, v4, 0, 16 1725; GCN-HSA-NEXT: v_bfe_i32 v0, v3, 0, 16 1726; GCN-HSA-NEXT: v_mov_b32_e32 v3, v7 1727; GCN-HSA-NEXT: flat_store_dwordx4 v[5:6], v[0:3] 1728; GCN-HSA-NEXT: s_endpgm 1729; 1730; GCN-NOHSA-VI-LABEL: global_sextload_v4i16_to_v4i32: 1731; GCN-NOHSA-VI: ; %bb.0: 1732; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1733; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 1734; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 1735; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 1736; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 1737; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1738; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 1739; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 1740; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[4:5], off, s[8:11], 0 1741; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 1742; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 1743; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1744; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 16, v5 1745; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v4 1746; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v5, 0, 16 1747; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v4, 0, 16 1748; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1749; GCN-NOHSA-VI-NEXT: s_endpgm 1750; 1751; EG-LABEL: global_sextload_v4i16_to_v4i32: 1752; EG: ; %bb.0: 1753; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1754; EG-NEXT: TEX 0 @6 1755; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[] 1756; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1 1757; EG-NEXT: CF_END 1758; EG-NEXT: PAD 1759; EG-NEXT: Fetch clause starting at 6: 1760; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1761; EG-NEXT: ALU clause starting at 8: 1762; EG-NEXT: MOV * T5.X, KC0[2].Z, 1763; EG-NEXT: ALU clause starting at 9: 1764; EG-NEXT: BFE_INT * T6.Z, T5.Y, 0.0, literal.x, 1765; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1766; EG-NEXT: BFE_INT T6.X, T5.X, 0.0, literal.x, 1767; EG-NEXT: LSHR * T0.W, T5.Y, literal.x, 1768; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1769; EG-NEXT: BFE_INT T6.W, PV.W, 0.0, literal.x, 1770; EG-NEXT: LSHR * T0.W, T5.X, literal.x, 1771; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1772; EG-NEXT: LSHR T5.X, KC0[2].Y, literal.x, 1773; EG-NEXT: BFE_INT * T6.Y, PS, 0.0, literal.y, 1774; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1775; 1776; CM-LABEL: global_sextload_v4i16_to_v4i32: 1777; CM: ; %bb.0: 1778; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1779; CM-NEXT: TEX 0 @6 1780; CM-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[] 1781; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6, T5.X 1782; CM-NEXT: CF_END 1783; CM-NEXT: PAD 1784; CM-NEXT: Fetch clause starting at 6: 1785; CM-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1786; CM-NEXT: ALU clause starting at 8: 1787; CM-NEXT: MOV * T5.X, KC0[2].Z, 1788; CM-NEXT: ALU clause starting at 9: 1789; CM-NEXT: BFE_INT * T6.Z, T5.Y, 0.0, literal.x, 1790; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1791; CM-NEXT: BFE_INT T6.X, T5.X, 0.0, literal.x, 1792; CM-NEXT: LSHR * T0.W, T5.Y, literal.x, 1793; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1794; CM-NEXT: LSHR T0.Z, T5.X, literal.x, 1795; CM-NEXT: BFE_INT * T6.W, PV.W, 0.0, literal.x, 1796; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1797; CM-NEXT: LSHR T5.X, KC0[2].Y, literal.x, 1798; CM-NEXT: BFE_INT * T6.Y, PV.Z, 0.0, literal.y, 1799; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1800 %load = load <4 x i16>, ptr addrspace(1) %in 1801 %ext = sext <4 x i16> %load to <4 x i32> 1802 store <4 x i32> %ext, ptr addrspace(1) %out 1803 ret void 1804} 1805 1806; TODO: These should use LSHR instead of BFE_UINT 1807define amdgpu_kernel void @global_zextload_v8i16_to_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 1808; GCN-NOHSA-SI-LABEL: global_zextload_v8i16_to_v8i32: 1809; GCN-NOHSA-SI: ; %bb.0: 1810; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1811; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1812; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1813; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1814; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1815; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1816; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1817; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1818; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 1819; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1820; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1821; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1822; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v1 1823; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v0 1824; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v11, 16, v3 1825; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v2 1826; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v6, 0xffff, v1 1827; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v0 1828; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v10, 0xffff, v3 1829; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v2 1830; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16 1831; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 1832; GCN-NOHSA-SI-NEXT: s_endpgm 1833; 1834; GCN-HSA-LABEL: global_zextload_v8i16_to_v8i32: 1835; GCN-HSA: ; %bb.0: 1836; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1837; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1838; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1839; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1840; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 1841; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 1842; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1843; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3 1844; GCN-HSA-NEXT: v_mov_b32_e32 v13, s1 1845; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2 1846; GCN-HSA-NEXT: v_mov_b32_e32 v12, s0 1847; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1848; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v3 1849; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v2 1850; GCN-HSA-NEXT: v_and_b32_e32 v10, 0xffff, v3 1851; GCN-HSA-NEXT: v_and_b32_e32 v8, 0xffff, v2 1852; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v1 1853; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v0 1854; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v1 1855; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v0 1856; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11] 1857; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[4:7] 1858; GCN-HSA-NEXT: s_endpgm 1859; 1860; GCN-NOHSA-VI-LABEL: global_zextload_v8i16_to_v8i32: 1861; GCN-NOHSA-VI: ; %bb.0: 1862; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1863; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 1864; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 1865; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 1866; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 1867; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1868; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 1869; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 1870; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 1871; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 1872; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 1873; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1874; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v3 1875; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v10, 0xffff, v3 1876; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v2 1877; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, 0xffff, v2 1878; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v1 1879; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v6, 0xffff, v1 1880; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v0 1881; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, 0xffff, v0 1882; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16 1883; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 1884; GCN-NOHSA-VI-NEXT: s_endpgm 1885; 1886; EG-LABEL: global_zextload_v8i16_to_v8i32: 1887; EG: ; %bb.0: 1888; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1889; EG-NEXT: TEX 0 @6 1890; EG-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[] 1891; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0 1892; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1 1893; EG-NEXT: CF_END 1894; EG-NEXT: Fetch clause starting at 6: 1895; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 1896; EG-NEXT: ALU clause starting at 8: 1897; EG-NEXT: MOV * T7.X, KC0[2].Z, 1898; EG-NEXT: ALU clause starting at 9: 1899; EG-NEXT: LSHR * T8.W, T7.Y, literal.x, 1900; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1901; EG-NEXT: AND_INT * T8.Z, T7.Y, literal.x, 1902; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1903; EG-NEXT: LSHR T8.Y, T7.X, literal.x, 1904; EG-NEXT: LSHR * T9.W, T7.W, literal.x, 1905; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1906; EG-NEXT: AND_INT T8.X, T7.X, literal.x, 1907; EG-NEXT: AND_INT T9.Z, T7.W, literal.x, 1908; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.y, 1909; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 1910; EG-NEXT: LSHR * T9.Y, T7.Z, literal.x, 1911; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1912; EG-NEXT: AND_INT T9.X, T7.Z, literal.x, 1913; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1914; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 1915; EG-NEXT: LSHR * T10.X, PV.W, literal.x, 1916; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1917; 1918; CM-LABEL: global_zextload_v8i16_to_v8i32: 1919; CM: ; %bb.0: 1920; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1921; CM-NEXT: TEX 0 @6 1922; CM-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[] 1923; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T10.X 1924; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T8, T9.X 1925; CM-NEXT: CF_END 1926; CM-NEXT: Fetch clause starting at 6: 1927; CM-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 1928; CM-NEXT: ALU clause starting at 8: 1929; CM-NEXT: MOV * T7.X, KC0[2].Z, 1930; CM-NEXT: ALU clause starting at 9: 1931; CM-NEXT: LSHR * T8.W, T7.W, literal.x, 1932; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1933; CM-NEXT: AND_INT * T8.Z, T7.W, literal.x, 1934; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1935; CM-NEXT: LSHR T8.Y, T7.Z, literal.x, 1936; CM-NEXT: LSHR * T7.W, T7.Y, literal.x, 1937; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1938; CM-NEXT: AND_INT T8.X, T7.Z, literal.x, 1939; CM-NEXT: AND_INT T7.Z, T7.Y, literal.x, 1940; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1941; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 1942; CM-NEXT: LSHR T9.X, PV.W, literal.x, 1943; CM-NEXT: LSHR * T7.Y, T7.X, literal.y, 1944; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1945; CM-NEXT: AND_INT * T7.X, T7.X, literal.x, 1946; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1947; CM-NEXT: LSHR * T10.X, KC0[2].Y, literal.x, 1948; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1949 %load = load <8 x i16>, ptr addrspace(1) %in 1950 %ext = zext <8 x i16> %load to <8 x i32> 1951 store <8 x i32> %ext, ptr addrspace(1) %out 1952 ret void 1953} 1954 1955; TODO: These should use ASHR instead of LSHR + BFE_INT 1956define amdgpu_kernel void @global_sextload_v8i16_to_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 1957; GCN-NOHSA-SI-LABEL: global_sextload_v8i16_to_v8i32: 1958; GCN-NOHSA-SI: ; %bb.0: 1959; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1960; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1961; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1962; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1963; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1964; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1965; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1966; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1967; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 1968; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1969; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1970; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1971; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v7, 16, v1 1972; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 16, v0 1973; GCN-NOHSA-SI-NEXT: v_bfe_i32 v6, v1, 0, 16 1974; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v0, 0, 16 1975; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 16, v3 1976; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 16, v2 1977; GCN-NOHSA-SI-NEXT: v_bfe_i32 v10, v3, 0, 16 1978; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v2, 0, 16 1979; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16 1980; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 1981; GCN-NOHSA-SI-NEXT: s_endpgm 1982; 1983; GCN-HSA-LABEL: global_sextload_v8i16_to_v8i32: 1984; GCN-HSA: ; %bb.0: 1985; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1986; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1987; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1988; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1989; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 1990; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 1991; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1992; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3 1993; GCN-HSA-NEXT: v_mov_b32_e32 v13, s1 1994; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2 1995; GCN-HSA-NEXT: v_mov_b32_e32 v12, s0 1996; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1997; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 16, v3 1998; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 16, v2 1999; GCN-HSA-NEXT: v_bfe_i32 v10, v3, 0, 16 2000; GCN-HSA-NEXT: v_bfe_i32 v8, v2, 0, 16 2001; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 16, v1 2002; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 16, v0 2003; GCN-HSA-NEXT: v_bfe_i32 v6, v1, 0, 16 2004; GCN-HSA-NEXT: v_bfe_i32 v4, v0, 0, 16 2005; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11] 2006; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[4:7] 2007; GCN-HSA-NEXT: s_endpgm 2008; 2009; GCN-NOHSA-VI-LABEL: global_sextload_v8i16_to_v8i32: 2010; GCN-NOHSA-VI: ; %bb.0: 2011; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2012; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 2013; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 2014; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 2015; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 2016; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2017; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 2018; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 2019; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 2020; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 2021; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 2022; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 2023; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 16, v3 2024; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 16, v2 2025; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v3, 0, 16 2026; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v2, 0, 16 2027; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 16, v1 2028; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 16, v0 2029; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v1, 0, 16 2030; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v0, 0, 16 2031; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16 2032; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 2033; GCN-NOHSA-VI-NEXT: s_endpgm 2034; 2035; EG-LABEL: global_sextload_v8i16_to_v8i32: 2036; EG: ; %bb.0: 2037; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 2038; EG-NEXT: TEX 0 @6 2039; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[] 2040; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0 2041; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1 2042; EG-NEXT: CF_END 2043; EG-NEXT: Fetch clause starting at 6: 2044; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 2045; EG-NEXT: ALU clause starting at 8: 2046; EG-NEXT: MOV * T7.X, KC0[2].Z, 2047; EG-NEXT: ALU clause starting at 9: 2048; EG-NEXT: BFE_INT * T8.Z, T7.Y, 0.0, literal.x, 2049; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2050; EG-NEXT: BFE_INT T8.X, T7.X, 0.0, literal.x, 2051; EG-NEXT: BFE_INT T9.Z, T7.W, 0.0, literal.x, 2052; EG-NEXT: LSHR * T0.W, T7.Y, literal.x, 2053; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2054; EG-NEXT: BFE_INT T9.X, T7.Z, 0.0, literal.x, 2055; EG-NEXT: LSHR T0.Z, T7.W, literal.x, 2056; EG-NEXT: BFE_INT T8.W, PV.W, 0.0, literal.x, 2057; EG-NEXT: LSHR * T0.W, T7.X, literal.x, 2058; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2059; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x, 2060; EG-NEXT: BFE_INT T8.Y, PS, 0.0, literal.y, 2061; EG-NEXT: LSHR T1.Z, T7.Z, literal.y, 2062; EG-NEXT: BFE_INT T9.W, PV.Z, 0.0, literal.y, 2063; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2064; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2065; EG-NEXT: LSHR T10.X, PS, literal.x, 2066; EG-NEXT: BFE_INT * T9.Y, PV.Z, 0.0, literal.y, 2067; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2068; 2069; CM-LABEL: global_sextload_v8i16_to_v8i32: 2070; CM: ; %bb.0: 2071; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 2072; CM-NEXT: TEX 0 @6 2073; CM-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[] 2074; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T9, T7.X 2075; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T8, T10.X 2076; CM-NEXT: CF_END 2077; CM-NEXT: Fetch clause starting at 6: 2078; CM-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 2079; CM-NEXT: ALU clause starting at 8: 2080; CM-NEXT: MOV * T7.X, KC0[2].Z, 2081; CM-NEXT: ALU clause starting at 9: 2082; CM-NEXT: BFE_INT * T8.Z, T7.W, 0.0, literal.x, 2083; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2084; CM-NEXT: BFE_INT T8.X, T7.Z, 0.0, literal.x, 2085; CM-NEXT: LSHR T0.Y, T7.Y, literal.x, 2086; CM-NEXT: BFE_INT T9.Z, T7.Y, 0.0, literal.x, 2087; CM-NEXT: LSHR * T0.W, T7.W, literal.x, 2088; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2089; CM-NEXT: BFE_INT T9.X, T7.X, 0.0, literal.x, 2090; CM-NEXT: LSHR T1.Y, T7.Z, literal.x, 2091; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.x, 2092; CM-NEXT: BFE_INT * T8.W, PV.W, 0.0, literal.x, 2093; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2094; CM-NEXT: LSHR T10.X, PV.Z, literal.x, 2095; CM-NEXT: BFE_INT T8.Y, PV.Y, 0.0, literal.y, 2096; CM-NEXT: LSHR T0.Z, T7.X, literal.y, 2097; CM-NEXT: BFE_INT * T9.W, T0.Y, 0.0, literal.y, 2098; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2099; CM-NEXT: LSHR T7.X, KC0[2].Y, literal.x, 2100; CM-NEXT: BFE_INT * T9.Y, PV.Z, 0.0, literal.y, 2101; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2102 %load = load <8 x i16>, ptr addrspace(1) %in 2103 %ext = sext <8 x i16> %load to <8 x i32> 2104 store <8 x i32> %ext, ptr addrspace(1) %out 2105 ret void 2106} 2107 2108define amdgpu_kernel void @global_zextload_v16i16_to_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 2109; GCN-NOHSA-SI-LABEL: global_zextload_v16i16_to_v16i32: 2110; GCN-NOHSA-SI: ; %bb.0: 2111; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 2112; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 2113; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 2114; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 2115; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 2116; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2117; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 2118; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 2119; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 2120; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 2121; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 2122; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 2123; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 2124; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v11, 16, v1 2125; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v0 2126; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v15, 16, v3 2127; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v13, 16, v2 2128; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 2129; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v19, 16, v5 2130; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v17, 16, v4 2131; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v23, 16, v7 2132; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v21, 16, v6 2133; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v10, 0xffff, v1 2134; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v0 2135; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v14, 0xffff, v3 2136; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v12, 0xffff, v2 2137; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v18, 0xffff, v5 2138; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, 0xffff, v4 2139; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v22, 0xffff, v7 2140; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v20, 0xffff, v6 2141; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:48 2142; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32 2143; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16 2144; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 2145; GCN-NOHSA-SI-NEXT: s_endpgm 2146; 2147; GCN-HSA-LABEL: global_zextload_v16i16_to_v16i32: 2148; GCN-HSA: ; %bb.0: 2149; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 2150; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2151; GCN-HSA-NEXT: s_add_u32 s4, s2, 16 2152; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 2153; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 2154; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2155; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 2156; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2157; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2158; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 2159; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 2160; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2161; GCN-HSA-NEXT: v_mov_b32_e32 v23, s3 2162; GCN-HSA-NEXT: v_mov_b32_e32 v22, s2 2163; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 2164; GCN-HSA-NEXT: v_mov_b32_e32 v21, s1 2165; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2166; GCN-HSA-NEXT: v_mov_b32_e32 v20, s0 2167; GCN-HSA-NEXT: s_add_u32 s0, s0, 32 2168; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 2169; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3 2170; GCN-HSA-NEXT: v_mov_b32_e32 v27, s1 2171; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2 2172; GCN-HSA-NEXT: v_mov_b32_e32 v26, s0 2173; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 2174; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v1 2175; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v0 2176; GCN-HSA-NEXT: v_lshrrev_b32_e32 v15, 16, v3 2177; GCN-HSA-NEXT: v_lshrrev_b32_e32 v13, 16, v2 2178; GCN-HSA-NEXT: v_and_b32_e32 v10, 0xffff, v1 2179; GCN-HSA-NEXT: v_and_b32_e32 v8, 0xffff, v0 2180; GCN-HSA-NEXT: v_and_b32_e32 v14, 0xffff, v3 2181; GCN-HSA-NEXT: v_and_b32_e32 v12, 0xffff, v2 2182; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 2183; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v7 2184; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v6 2185; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v7 2186; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v6 2187; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v5 2188; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v4 2189; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v5 2190; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v4 2191; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[0:3] 2192; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19] 2193; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[12:15] 2194; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11] 2195; GCN-HSA-NEXT: s_endpgm 2196; 2197; GCN-NOHSA-VI-LABEL: global_zextload_v16i16_to_v16i32: 2198; GCN-NOHSA-VI: ; %bb.0: 2199; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 2200; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 2201; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 2202; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 2203; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 2204; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2205; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 2206; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 2207; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 2208; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 2209; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 2210; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 2211; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 2212; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v1 2213; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 2214; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v19, 16, v7 2215; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v18, 0xffff, v7 2216; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v17, 16, v6 2217; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v16, 0xffff, v6 2218; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v10, 0xffff, v1 2219; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v0 2220; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, 0xffff, v0 2221; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v3 2222; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v14, 0xffff, v3 2223; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v13, 16, v2 2224; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v12, 0xffff, v2 2225; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v5 2226; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, 0xffff, v5 2227; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v4 2228; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v4 2229; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48 2230; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2231; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16 2232; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 2233; GCN-NOHSA-VI-NEXT: s_endpgm 2234; 2235; EG-LABEL: global_zextload_v16i16_to_v16i32: 2236; EG: ; %bb.0: 2237; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 2238; EG-NEXT: TEX 1 @8 2239; EG-NEXT: ALU 35, @13, KC0[CB0:0-32], KC1[] 2240; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0 2241; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T11.X, 0 2242; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0 2243; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T12.X, 1 2244; EG-NEXT: CF_END 2245; EG-NEXT: Fetch clause starting at 8: 2246; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 0, #1 2247; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 16, #1 2248; EG-NEXT: ALU clause starting at 12: 2249; EG-NEXT: MOV * T11.X, KC0[2].Z, 2250; EG-NEXT: ALU clause starting at 13: 2251; EG-NEXT: LSHR * T13.W, T12.Y, literal.x, 2252; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2253; EG-NEXT: AND_INT * T13.Z, T12.Y, literal.x, 2254; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2255; EG-NEXT: LSHR T13.Y, T12.X, literal.x, 2256; EG-NEXT: LSHR * T14.W, T12.W, literal.x, 2257; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2258; EG-NEXT: AND_INT T13.X, T12.X, literal.x, 2259; EG-NEXT: AND_INT T14.Z, T12.W, literal.x, 2260; EG-NEXT: LSHR * T12.X, KC0[2].Y, literal.y, 2261; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 2262; EG-NEXT: LSHR T14.Y, T12.Z, literal.x, 2263; EG-NEXT: LSHR * T15.W, T11.Y, literal.x, 2264; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2265; EG-NEXT: AND_INT T14.X, T12.Z, literal.x, 2266; EG-NEXT: AND_INT T15.Z, T11.Y, literal.x, 2267; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2268; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 2269; EG-NEXT: LSHR T16.X, PV.W, literal.x, 2270; EG-NEXT: LSHR T15.Y, T11.X, literal.y, 2271; EG-NEXT: LSHR T17.W, T11.W, literal.y, 2272; EG-NEXT: AND_INT * T15.X, T11.X, literal.z, 2273; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2274; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2275; EG-NEXT: AND_INT T17.Z, T11.W, literal.x, 2276; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2277; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 2278; EG-NEXT: LSHR T11.X, PV.W, literal.x, 2279; EG-NEXT: LSHR T17.Y, T11.Z, literal.y, 2280; EG-NEXT: AND_INT * T17.X, T11.Z, literal.z, 2281; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2282; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2283; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 2284; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 2285; EG-NEXT: LSHR * T18.X, PV.W, literal.x, 2286; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2287; 2288; CM-LABEL: global_zextload_v16i16_to_v16i32: 2289; CM: ; %bb.0: 2290; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 2291; CM-NEXT: TEX 1 @8 2292; CM-NEXT: ALU 33, @13, KC0[CB0:0-32], KC1[] 2293; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T11, T18.X 2294; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T15, T17.X 2295; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T16.X 2296; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T13, T14.X 2297; CM-NEXT: CF_END 2298; CM-NEXT: Fetch clause starting at 8: 2299; CM-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 2300; CM-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 2301; CM-NEXT: ALU clause starting at 12: 2302; CM-NEXT: MOV * T11.X, KC0[2].Z, 2303; CM-NEXT: ALU clause starting at 13: 2304; CM-NEXT: LSHR * T13.W, T12.W, literal.x, 2305; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2306; CM-NEXT: AND_INT * T13.Z, T12.W, literal.x, 2307; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2308; CM-NEXT: LSHR T13.Y, T12.Z, literal.x, 2309; CM-NEXT: LSHR * T12.W, T12.Y, literal.x, 2310; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2311; CM-NEXT: AND_INT T13.X, T12.Z, literal.x, 2312; CM-NEXT: AND_INT T12.Z, T12.Y, literal.x, 2313; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2314; CM-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 2315; CM-NEXT: LSHR T14.X, PV.W, literal.x, 2316; CM-NEXT: LSHR T12.Y, T12.X, literal.y, 2317; CM-NEXT: LSHR * T15.W, T11.W, literal.y, 2318; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2319; CM-NEXT: AND_INT T12.X, T12.X, literal.x, 2320; CM-NEXT: AND_INT T15.Z, T11.W, literal.x, 2321; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2322; CM-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 2323; CM-NEXT: LSHR T16.X, PV.W, literal.x, 2324; CM-NEXT: LSHR T15.Y, T11.Z, literal.y, 2325; CM-NEXT: LSHR * T11.W, T11.Y, literal.y, 2326; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2327; CM-NEXT: AND_INT T15.X, T11.Z, literal.x, 2328; CM-NEXT: AND_INT T11.Z, T11.Y, literal.x, 2329; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2330; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 2331; CM-NEXT: LSHR T17.X, PV.W, literal.x, 2332; CM-NEXT: LSHR * T11.Y, T11.X, literal.y, 2333; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2334; CM-NEXT: AND_INT * T11.X, T11.X, literal.x, 2335; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2336; CM-NEXT: LSHR * T18.X, KC0[2].Y, literal.x, 2337; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2338 %load = load <16 x i16>, ptr addrspace(1) %in 2339 %ext = zext <16 x i16> %load to <16 x i32> 2340 store <16 x i32> %ext, ptr addrspace(1) %out 2341 ret void 2342} 2343 2344define amdgpu_kernel void @global_sextload_v16i16_to_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 2345; GCN-NOHSA-SI-LABEL: global_sextload_v16i16_to_v16i32: 2346; GCN-NOHSA-SI: ; %bb.0: 2347; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 2348; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 2349; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 2350; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 2351; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 2352; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2353; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 2354; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 2355; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 2356; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 2357; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 2358; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 2359; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 2360; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 16, v1 2361; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 16, v0 2362; GCN-NOHSA-SI-NEXT: v_bfe_i32 v10, v1, 0, 16 2363; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v0, 0, 16 2364; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v15, 16, v3 2365; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v13, 16, v2 2366; GCN-NOHSA-SI-NEXT: v_bfe_i32 v14, v3, 0, 16 2367; GCN-NOHSA-SI-NEXT: v_bfe_i32 v12, v2, 0, 16 2368; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 2369; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v5 2370; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v4 2371; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v5, 0, 16 2372; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v4, 0, 16 2373; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v19, 16, v7 2374; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v17, 16, v6 2375; GCN-NOHSA-SI-NEXT: v_bfe_i32 v18, v7, 0, 16 2376; GCN-NOHSA-SI-NEXT: v_bfe_i32 v16, v6, 0, 16 2377; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48 2378; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2379; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16 2380; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 2381; GCN-NOHSA-SI-NEXT: s_endpgm 2382; 2383; GCN-HSA-LABEL: global_sextload_v16i16_to_v16i32: 2384; GCN-HSA: ; %bb.0: 2385; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 2386; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2387; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 2388; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 2389; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 2390; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 2391; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2392; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2393; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2394; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 2395; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 2396; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2397; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3 2398; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2 2399; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 2400; GCN-HSA-NEXT: v_mov_b32_e32 v17, s1 2401; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2402; GCN-HSA-NEXT: v_mov_b32_e32 v16, s0 2403; GCN-HSA-NEXT: s_add_u32 s0, s0, 32 2404; GCN-HSA-NEXT: v_mov_b32_e32 v21, s3 2405; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 2406; GCN-HSA-NEXT: v_mov_b32_e32 v20, s2 2407; GCN-HSA-NEXT: v_mov_b32_e32 v23, s1 2408; GCN-HSA-NEXT: v_mov_b32_e32 v22, s0 2409; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 2410; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 16, v0 2411; GCN-HSA-NEXT: v_bfe_i32 v10, v1, 0, 16 2412; GCN-HSA-NEXT: v_bfe_i32 v8, v0, 0, 16 2413; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 16, v3 2414; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 16, v2 2415; GCN-HSA-NEXT: v_bfe_i32 v14, v3, 0, 16 2416; GCN-HSA-NEXT: v_bfe_i32 v12, v2, 0, 16 2417; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 16, v1 2418; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15] 2419; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 2420; GCN-HSA-NEXT: s_waitcnt vmcnt(2) 2421; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v5 2422; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 16, v7 2423; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 16, v6 2424; GCN-HSA-NEXT: v_bfe_i32 v9, v7, 0, 16 2425; GCN-HSA-NEXT: v_bfe_i32 v7, v6, 0, 16 2426; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v4 2427; GCN-HSA-NEXT: v_bfe_i32 v2, v5, 0, 16 2428; GCN-HSA-NEXT: v_bfe_i32 v0, v4, 0, 16 2429; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[7:10] 2430; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[0:3] 2431; GCN-HSA-NEXT: s_endpgm 2432; 2433; GCN-NOHSA-VI-LABEL: global_sextload_v16i16_to_v16i32: 2434; GCN-NOHSA-VI: ; %bb.0: 2435; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 2436; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 2437; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 2438; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 2439; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 2440; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2441; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 2442; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 2443; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 2444; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 2445; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 2446; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 2447; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 2448; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 16, v1 2449; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 2450; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 16, v7 2451; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 16, v6 2452; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v7, 0, 16 2453; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v6, 0, 16 2454; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 16, v0 2455; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v1, 0, 16 2456; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v0, 0, 16 2457; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 16, v3 2458; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v13, 16, v2 2459; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v3, 0, 16 2460; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v2, 0, 16 2461; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 16, v5 2462; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v4 2463; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v5, 0, 16 2464; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v4, 0, 16 2465; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48 2466; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2467; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16 2468; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 2469; GCN-NOHSA-VI-NEXT: s_endpgm 2470; 2471; EG-LABEL: global_sextload_v16i16_to_v16i32: 2472; EG: ; %bb.0: 2473; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 2474; EG-NEXT: TEX 1 @8 2475; EG-NEXT: ALU 39, @13, KC0[CB0:0-32], KC1[] 2476; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0 2477; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0 2478; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0 2479; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1 2480; EG-NEXT: CF_END 2481; EG-NEXT: Fetch clause starting at 8: 2482; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 2483; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 2484; EG-NEXT: ALU clause starting at 12: 2485; EG-NEXT: MOV * T11.X, KC0[2].Z, 2486; EG-NEXT: ALU clause starting at 13: 2487; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x, 2488; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2489; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2490; EG-NEXT: LSHR T14.X, PV.W, literal.x, 2491; EG-NEXT: BFE_INT * T15.Z, T11.Y, 0.0, literal.y, 2492; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2493; EG-NEXT: BFE_INT T15.X, T11.X, 0.0, literal.x, 2494; EG-NEXT: LSHR T0.Y, T12.W, literal.x, 2495; EG-NEXT: BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212 2496; EG-NEXT: LSHR T0.W, T12.Y, literal.x, 2497; EG-NEXT: LSHR * T1.W, T11.Y, literal.x, 2498; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2499; EG-NEXT: BFE_INT T16.X, T11.Z, 0.0, literal.x, 2500; EG-NEXT: LSHR T1.Y, T11.W, literal.x, 2501; EG-NEXT: BFE_INT T17.Z, T12.Y, 0.0, literal.x, 2502; EG-NEXT: BFE_INT T15.W, PS, 0.0, literal.x, 2503; EG-NEXT: LSHR * T1.W, T11.X, literal.x, 2504; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2505; EG-NEXT: BFE_INT T17.X, T12.X, 0.0, literal.x, 2506; EG-NEXT: BFE_INT T15.Y, PS, 0.0, literal.x, 2507; EG-NEXT: BFE_INT T18.Z, T12.W, 0.0, literal.x, 2508; EG-NEXT: BFE_INT T16.W, PV.Y, 0.0, literal.x, 2509; EG-NEXT: LSHR * T1.W, T11.Z, literal.x, 2510; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2511; EG-NEXT: BFE_INT T18.X, T12.Z, 0.0, literal.x, 2512; EG-NEXT: BFE_INT T16.Y, PS, 0.0, literal.x, 2513; EG-NEXT: LSHR T0.Z, T12.X, literal.x, 2514; EG-NEXT: BFE_INT T17.W, T0.W, 0.0, literal.x, 2515; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2516; EG-NEXT: 16(2.242078e-44), 32(4.484155e-44) 2517; EG-NEXT: LSHR T11.X, PS, literal.x, 2518; EG-NEXT: BFE_INT T17.Y, PV.Z, 0.0, literal.y, 2519; EG-NEXT: LSHR T0.Z, T12.Z, literal.y, 2520; EG-NEXT: BFE_INT T18.W, T0.Y, 0.0, literal.y, 2521; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 2522; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2523; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 2524; EG-NEXT: LSHR T12.X, PS, literal.x, 2525; EG-NEXT: BFE_INT * T18.Y, PV.Z, 0.0, literal.y, 2526; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2527; 2528; CM-LABEL: global_sextload_v16i16_to_v16i32: 2529; CM: ; %bb.0: 2530; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 2531; CM-NEXT: TEX 1 @8 2532; CM-NEXT: ALU 40, @13, KC0[CB0:0-32], KC1[] 2533; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T17, T11.X 2534; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T18.X 2535; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T16, T14.X 2536; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T15, T13.X 2537; CM-NEXT: CF_END 2538; CM-NEXT: Fetch clause starting at 8: 2539; CM-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 2540; CM-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 2541; CM-NEXT: ALU clause starting at 12: 2542; CM-NEXT: MOV * T11.X, KC0[2].Z, 2543; CM-NEXT: ALU clause starting at 13: 2544; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 2545; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00) 2546; CM-NEXT: LSHR T13.X, PV.W, literal.x, 2547; CM-NEXT: LSHR T0.Y, T11.Y, literal.y, 2548; CM-NEXT: LSHR T0.Z, T11.Z, literal.y, 2549; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 2550; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2551; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00) 2552; CM-NEXT: LSHR T14.X, PV.W, literal.x, 2553; CM-NEXT: LSHR T1.Y, T11.W, literal.y, 2554; CM-NEXT: BFE_INT T15.Z, T12.W, 0.0, literal.y, BS:VEC_120/SCL_212 2555; CM-NEXT: LSHR * T0.W, T12.X, literal.y, 2556; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2557; CM-NEXT: BFE_INT T15.X, T12.Z, 0.0, literal.x, 2558; CM-NEXT: LSHR T2.Y, T12.Y, literal.x, 2559; CM-NEXT: BFE_INT T16.Z, T12.Y, 0.0, literal.x, 2560; CM-NEXT: LSHR * T1.W, T12.W, literal.x, 2561; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2562; CM-NEXT: BFE_INT T16.X, T12.X, 0.0, literal.x, 2563; CM-NEXT: LSHR T3.Y, T12.Z, literal.x, 2564; CM-NEXT: BFE_INT T12.Z, T11.W, 0.0, literal.x, 2565; CM-NEXT: BFE_INT * T15.W, PV.W, 0.0, literal.x, 2566; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2567; CM-NEXT: BFE_INT T12.X, T11.Z, 0.0, literal.x, 2568; CM-NEXT: BFE_INT T15.Y, PV.Y, 0.0, literal.x, 2569; CM-NEXT: BFE_INT T17.Z, T11.Y, 0.0, literal.x, 2570; CM-NEXT: BFE_INT * T16.W, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212 2571; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2572; CM-NEXT: BFE_INT T17.X, T11.X, 0.0, literal.x, 2573; CM-NEXT: BFE_INT T16.Y, T0.W, 0.0, literal.x, 2574; CM-NEXT: ADD_INT T1.Z, KC0[2].Y, literal.x, 2575; CM-NEXT: BFE_INT * T12.W, T1.Y, 0.0, literal.x, 2576; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2577; CM-NEXT: LSHR T18.X, PV.Z, literal.x, 2578; CM-NEXT: BFE_INT T12.Y, T0.Z, 0.0, literal.y, 2579; CM-NEXT: LSHR T0.Z, T11.X, literal.y, 2580; CM-NEXT: BFE_INT * T17.W, T0.Y, 0.0, literal.y, 2581; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2582; CM-NEXT: LSHR T11.X, KC0[2].Y, literal.x, 2583; CM-NEXT: BFE_INT * T17.Y, PV.Z, 0.0, literal.y, 2584; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2585 %load = load <16 x i16>, ptr addrspace(1) %in 2586 %ext = sext <16 x i16> %load to <16 x i32> 2587 store <16 x i32> %ext, ptr addrspace(1) %out 2588 ret void 2589} 2590 2591define amdgpu_kernel void @global_zextload_v32i16_to_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 2592; GCN-NOHSA-SI-LABEL: global_zextload_v32i16_to_v32i32: 2593; GCN-NOHSA-SI: ; %bb.0: 2594; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 2595; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 2596; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 2597; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 2598; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 2599; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2600; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 2601; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 2602; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 2603; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 2604; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 2605; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 2606; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(3) 2607; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v19, 16, v3 2608; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v17, 16, v2 2609; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v23, 16, v1 2610; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v21, 16, v0 2611; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(2) 2612; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v27, 16, v7 2613; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v25, 16, v6 2614; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v18, 0xffff, v3 2615; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, 0xffff, v2 2616; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v22, 0xffff, v1 2617; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v20, 0xffff, v0 2618; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v5 2619; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v4 2620; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v26, 0xffff, v7 2621; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v24, 0xffff, v6 2622; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, 0xffff, v5 2623; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v4 2624; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 2625; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v11 2626; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v10 2627; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v31, 16, v9 2628; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v29, 16, v8 2629; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v6, 0xffff, v11 2630; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v10 2631; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v30, 0xffff, v9 2632; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v28, 0xffff, v8 2633; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 2634; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v11, 16, v15 2635; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v14 2636; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v35, 16, v13 2637; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v33, 16, v12 2638; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v10, 0xffff, v15 2639; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v14 2640; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v34, 0xffff, v13 2641; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v32, 0xffff, v12 2642; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 2643; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 2644; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96 2645; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112 2646; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64 2647; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80 2648; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2649; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:48 2650; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 2651; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16 2652; GCN-NOHSA-SI-NEXT: s_endpgm 2653; 2654; GCN-HSA-LABEL: global_zextload_v32i16_to_v32i32: 2655; GCN-HSA: ; %bb.0: 2656; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 2657; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2658; GCN-HSA-NEXT: s_add_u32 s4, s2, 16 2659; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 2660; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 2661; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 2662; GCN-HSA-NEXT: s_add_u32 s4, s2, 32 2663; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[0:1] 2664; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 2665; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 2666; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 2667; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[0:1] 2668; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 2669; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 2670; GCN-HSA-NEXT: s_add_u32 s2, s2, 48 2671; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 2672; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2673; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 2674; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 2675; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[8:9] 2676; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 2677; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2678; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3 2679; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2 2680; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 2681; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2682; GCN-HSA-NEXT: v_mov_b32_e32 v27, s3 2683; GCN-HSA-NEXT: v_mov_b32_e32 v26, s2 2684; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 2685; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2686; GCN-HSA-NEXT: s_add_u32 s4, s0, 64 2687; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 2688; GCN-HSA-NEXT: s_add_u32 s6, s0, 0x50 2689; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0 2690; GCN-HSA-NEXT: s_add_u32 s8, s0, 32 2691; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0 2692; GCN-HSA-NEXT: v_mov_b32_e32 v23, s9 2693; GCN-HSA-NEXT: v_mov_b32_e32 v22, s8 2694; GCN-HSA-NEXT: v_mov_b32_e32 v21, s1 2695; GCN-HSA-NEXT: v_mov_b32_e32 v20, s0 2696; GCN-HSA-NEXT: s_add_u32 s0, s0, 48 2697; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 2698; GCN-HSA-NEXT: v_mov_b32_e32 v29, s3 2699; GCN-HSA-NEXT: v_mov_b32_e32 v31, s1 2700; GCN-HSA-NEXT: v_mov_b32_e32 v28, s2 2701; GCN-HSA-NEXT: v_mov_b32_e32 v30, s0 2702; GCN-HSA-NEXT: s_waitcnt vmcnt(3) 2703; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v5 2704; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v4 2705; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v5 2706; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v4 2707; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 2708; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[16:19] 2709; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 2710; GCN-HSA-NEXT: s_waitcnt vmcnt(3) 2711; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v13 2712; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v12 2713; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v13 2714; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v12 2715; GCN-HSA-NEXT: v_mov_b32_e32 v13, s7 2716; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[16:19] 2717; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v14 2718; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v7 2719; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v6 2720; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v7 2721; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v6 2722; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v15 2723; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v15 2724; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v14 2725; GCN-HSA-NEXT: v_mov_b32_e32 v12, s6 2726; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[4:7] 2727; GCN-HSA-NEXT: s_waitcnt vmcnt(4) 2728; GCN-HSA-NEXT: v_lshrrev_b32_e32 v23, 16, v3 2729; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v1 2730; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v0 2731; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v1 2732; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v0 2733; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[4:7] 2734; GCN-HSA-NEXT: v_lshrrev_b32_e32 v21, 16, v2 2735; GCN-HSA-NEXT: v_and_b32_e32 v22, 0xffff, v3 2736; GCN-HSA-NEXT: v_and_b32_e32 v20, 0xffff, v2 2737; GCN-HSA-NEXT: s_waitcnt vmcnt(4) 2738; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v11 2739; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v10 2740; GCN-HSA-NEXT: v_lshrrev_b32_e32 v14, 16, v9 2741; GCN-HSA-NEXT: v_lshrrev_b32_e32 v12, 16, v8 2742; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v11 2743; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v10 2744; GCN-HSA-NEXT: v_and_b32_e32 v13, 0xffff, v9 2745; GCN-HSA-NEXT: v_and_b32_e32 v11, 0xffff, v8 2746; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[20:23] 2747; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[11:14] 2748; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[4:7] 2749; GCN-HSA-NEXT: flat_store_dwordx4 v[30:31], v[16:19] 2750; GCN-HSA-NEXT: s_endpgm 2751; 2752; GCN-NOHSA-VI-LABEL: global_zextload_v32i16_to_v32i32: 2753; GCN-NOHSA-VI: ; %bb.0: 2754; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 2755; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 2756; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 2757; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 2758; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 2759; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2760; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 2761; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 2762; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 2763; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 2764; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 2765; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 2766; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 2767; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 2768; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 2769; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v19, 16, v3 2770; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v18, 0xffff, v3 2771; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v17, 16, v2 2772; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 2773; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v31, 16, v15 2774; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v30, 0xffff, v15 2775; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v29, 16, v14 2776; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v28, 0xffff, v14 2777; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v13 2778; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v14, 0xffff, v13 2779; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v13, 16, v12 2780; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v12, 0xffff, v12 2781; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v16, 0xffff, v2 2782; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v1 2783; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, 0xffff, v1 2784; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 2785; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 2786; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v23, 16, v7 2787; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v22, 0xffff, v7 2788; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v21, 16, v6 2789; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v20, 0xffff, v6 2790; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v5 2791; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v6, 0xffff, v5 2792; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v4 2793; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, 0xffff, v4 2794; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v27, 16, v11 2795; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v26, 0xffff, v11 2796; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v25, 16, v10 2797; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v24, 0xffff, v10 2798; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v9 2799; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v10, 0xffff, v9 2800; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v8 2801; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, 0xffff, v8 2802; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:96 2803; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:112 2804; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:64 2805; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:80 2806; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32 2807; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:48 2808; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2809; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16 2810; GCN-NOHSA-VI-NEXT: s_endpgm 2811; 2812; EG-LABEL: global_zextload_v32i16_to_v32i32: 2813; EG: ; %bb.0: 2814; EG-NEXT: ALU 0, @20, KC0[CB0:0-32], KC1[] 2815; EG-NEXT: TEX 3 @12 2816; EG-NEXT: ALU 72, @21, KC0[CB0:0-32], KC1[] 2817; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T34.X, 0 2818; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T33.X, 0 2819; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T32.X, 0 2820; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T30.X, 0 2821; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T29.X, 0 2822; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T27.X, 0 2823; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T26.X, 0 2824; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T24.X, 1 2825; EG-NEXT: CF_END 2826; EG-NEXT: Fetch clause starting at 12: 2827; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1 2828; EG-NEXT: VTX_READ_128 T21.XYZW, T19.X, 48, #1 2829; EG-NEXT: VTX_READ_128 T22.XYZW, T19.X, 32, #1 2830; EG-NEXT: VTX_READ_128 T19.XYZW, T19.X, 16, #1 2831; EG-NEXT: ALU clause starting at 20: 2832; EG-NEXT: MOV * T19.X, KC0[2].Z, 2833; EG-NEXT: ALU clause starting at 21: 2834; EG-NEXT: LSHR * T23.W, T20.W, literal.x, 2835; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2836; EG-NEXT: AND_INT * T23.Z, T20.W, literal.x, 2837; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2838; EG-NEXT: LSHR T23.Y, T20.Z, literal.x, 2839; EG-NEXT: LSHR * T20.W, T20.Y, literal.x, 2840; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2841; EG-NEXT: AND_INT T23.X, T20.Z, literal.x, 2842; EG-NEXT: AND_INT T20.Z, T20.Y, literal.x, 2843; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2844; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 2845; EG-NEXT: LSHR T24.X, PV.W, literal.x, 2846; EG-NEXT: LSHR T20.Y, T20.X, literal.y, 2847; EG-NEXT: LSHR T25.W, T19.W, literal.y, 2848; EG-NEXT: AND_INT * T20.X, T20.X, literal.z, 2849; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2850; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2851; EG-NEXT: AND_INT * T25.Z, T19.W, literal.x, 2852; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2853; EG-NEXT: LSHR T26.X, KC0[2].Y, literal.x, 2854; EG-NEXT: LSHR T25.Y, T19.Z, literal.y, 2855; EG-NEXT: LSHR T19.W, T19.Y, literal.y, 2856; EG-NEXT: AND_INT * T25.X, T19.Z, literal.z, 2857; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2858; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2859; EG-NEXT: AND_INT T19.Z, T19.Y, literal.x, 2860; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2861; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 2862; EG-NEXT: LSHR T27.X, PV.W, literal.x, 2863; EG-NEXT: LSHR T19.Y, T19.X, literal.y, 2864; EG-NEXT: LSHR T28.W, T22.W, literal.y, 2865; EG-NEXT: AND_INT * T19.X, T19.X, literal.z, 2866; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2867; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2868; EG-NEXT: AND_INT T28.Z, T22.W, literal.x, 2869; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2870; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 2871; EG-NEXT: LSHR T29.X, PV.W, literal.x, 2872; EG-NEXT: LSHR T28.Y, T22.Z, literal.y, 2873; EG-NEXT: LSHR T22.W, T22.Y, literal.y, 2874; EG-NEXT: AND_INT * T28.X, T22.Z, literal.z, 2875; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2876; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2877; EG-NEXT: AND_INT T22.Z, T22.Y, literal.x, 2878; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2879; EG-NEXT: 65535(9.183409e-41), 80(1.121039e-43) 2880; EG-NEXT: LSHR T30.X, PV.W, literal.x, 2881; EG-NEXT: LSHR T22.Y, T22.X, literal.y, 2882; EG-NEXT: LSHR T31.W, T21.W, literal.y, 2883; EG-NEXT: AND_INT * T22.X, T22.X, literal.z, 2884; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2885; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2886; EG-NEXT: AND_INT T31.Z, T21.W, literal.x, 2887; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2888; EG-NEXT: 65535(9.183409e-41), 64(8.968310e-44) 2889; EG-NEXT: LSHR T32.X, PV.W, literal.x, 2890; EG-NEXT: LSHR T31.Y, T21.Z, literal.y, 2891; EG-NEXT: LSHR T21.W, T21.Y, literal.y, 2892; EG-NEXT: AND_INT * T31.X, T21.Z, literal.z, 2893; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2894; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2895; EG-NEXT: AND_INT T21.Z, T21.Y, literal.x, 2896; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2897; EG-NEXT: 65535(9.183409e-41), 112(1.569454e-43) 2898; EG-NEXT: LSHR T33.X, PV.W, literal.x, 2899; EG-NEXT: LSHR T21.Y, T21.X, literal.y, 2900; EG-NEXT: AND_INT * T21.X, T21.X, literal.z, 2901; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2902; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2903; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 2904; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) 2905; EG-NEXT: LSHR * T34.X, PV.W, literal.x, 2906; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2907; 2908; CM-LABEL: global_zextload_v32i16_to_v32i32: 2909; CM: ; %bb.0: 2910; CM-NEXT: ALU 0, @20, KC0[CB0:0-32], KC1[] 2911; CM-NEXT: TEX 3 @12 2912; CM-NEXT: ALU 65, @21, KC0[CB0:0-32], KC1[] 2913; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T33, T34.X 2914; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T31, T21.X 2915; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T30, T32.X 2916; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T28, T22.X 2917; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T27, T29.X 2918; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T25, T19.X 2919; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T24, T26.X 2920; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T23, T20.X 2921; CM-NEXT: CF_END 2922; CM-NEXT: Fetch clause starting at 12: 2923; CM-NEXT: VTX_READ_128 T20.XYZW, T19.X, 48, #1 2924; CM-NEXT: VTX_READ_128 T21.XYZW, T19.X, 0, #1 2925; CM-NEXT: VTX_READ_128 T22.XYZW, T19.X, 16, #1 2926; CM-NEXT: VTX_READ_128 T19.XYZW, T19.X, 32, #1 2927; CM-NEXT: ALU clause starting at 20: 2928; CM-NEXT: MOV * T19.X, KC0[2].Z, 2929; CM-NEXT: ALU clause starting at 21: 2930; CM-NEXT: LSHR * T23.W, T20.Y, literal.x, 2931; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2932; CM-NEXT: AND_INT * T23.Z, T20.Y, literal.x, 2933; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2934; CM-NEXT: LSHR T23.Y, T20.X, literal.x, 2935; CM-NEXT: LSHR * T24.W, T20.W, literal.x, 2936; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2937; CM-NEXT: AND_INT T23.X, T20.X, literal.x, 2938; CM-NEXT: AND_INT T24.Z, T20.W, literal.x, 2939; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2940; CM-NEXT: 65535(9.183409e-41), 96(1.345247e-43) 2941; CM-NEXT: LSHR T20.X, PV.W, literal.x, 2942; CM-NEXT: LSHR T24.Y, T20.Z, literal.y, 2943; CM-NEXT: LSHR * T25.W, T19.Y, literal.y, 2944; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2945; CM-NEXT: AND_INT T24.X, T20.Z, literal.x, 2946; CM-NEXT: AND_INT T25.Z, T19.Y, literal.x, 2947; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2948; CM-NEXT: 65535(9.183409e-41), 112(1.569454e-43) 2949; CM-NEXT: LSHR T26.X, PV.W, literal.x, 2950; CM-NEXT: LSHR T25.Y, T19.X, literal.y, 2951; CM-NEXT: LSHR * T27.W, T19.W, literal.y, 2952; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2953; CM-NEXT: AND_INT T25.X, T19.X, literal.x, 2954; CM-NEXT: AND_INT T27.Z, T19.W, literal.x, 2955; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2956; CM-NEXT: 65535(9.183409e-41), 64(8.968310e-44) 2957; CM-NEXT: LSHR T19.X, PV.W, literal.x, 2958; CM-NEXT: LSHR T27.Y, T19.Z, literal.y, 2959; CM-NEXT: LSHR * T28.W, T22.Y, literal.y, 2960; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2961; CM-NEXT: AND_INT T27.X, T19.Z, literal.x, 2962; CM-NEXT: AND_INT T28.Z, T22.Y, literal.x, 2963; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2964; CM-NEXT: 65535(9.183409e-41), 80(1.121039e-43) 2965; CM-NEXT: LSHR T29.X, PV.W, literal.x, 2966; CM-NEXT: LSHR T28.Y, T22.X, literal.y, 2967; CM-NEXT: LSHR * T30.W, T22.W, literal.y, 2968; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2969; CM-NEXT: AND_INT T28.X, T22.X, literal.x, 2970; CM-NEXT: AND_INT T30.Z, T22.W, literal.x, 2971; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2972; CM-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 2973; CM-NEXT: LSHR T22.X, PV.W, literal.x, 2974; CM-NEXT: LSHR T30.Y, T22.Z, literal.y, 2975; CM-NEXT: LSHR * T31.W, T21.Y, literal.y, 2976; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2977; CM-NEXT: AND_INT T30.X, T22.Z, literal.x, 2978; CM-NEXT: AND_INT T31.Z, T21.Y, literal.x, 2979; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2980; CM-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 2981; CM-NEXT: LSHR T32.X, PV.W, literal.x, 2982; CM-NEXT: LSHR T31.Y, T21.X, literal.y, 2983; CM-NEXT: LSHR * T33.W, T21.W, literal.y, 2984; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2985; CM-NEXT: AND_INT T31.X, T21.X, literal.x, 2986; CM-NEXT: AND_INT * T33.Z, T21.W, literal.x, 2987; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2988; CM-NEXT: LSHR T21.X, KC0[2].Y, literal.x, 2989; CM-NEXT: LSHR * T33.Y, T21.Z, literal.y, 2990; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2991; CM-NEXT: AND_INT T33.X, T21.Z, literal.x, 2992; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2993; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 2994; CM-NEXT: LSHR * T34.X, PV.W, literal.x, 2995; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2996 %load = load <32 x i16>, ptr addrspace(1) %in 2997 %ext = zext <32 x i16> %load to <32 x i32> 2998 store <32 x i32> %ext, ptr addrspace(1) %out 2999 ret void 3000} 3001 3002define amdgpu_kernel void @global_sextload_v32i16_to_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 3003; GCN-NOHSA-SI-LABEL: global_sextload_v32i16_to_v32i32: 3004; GCN-NOHSA-SI: ; %bb.0: 3005; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 3006; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 3007; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 3008; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 3009; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 3010; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 3011; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 3012; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 3013; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 3014; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 3015; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 3016; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 3017; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(3) 3018; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v19, 16, v3 3019; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v17, 16, v2 3020; GCN-NOHSA-SI-NEXT: v_bfe_i32 v18, v3, 0, 16 3021; GCN-NOHSA-SI-NEXT: v_bfe_i32 v16, v2, 0, 16 3022; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 16, v1 3023; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v21, 16, v0 3024; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v1, 0, 16 3025; GCN-NOHSA-SI-NEXT: v_bfe_i32 v20, v0, 0, 16 3026; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(2) 3027; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v7 3028; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v6 3029; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v7, 0, 16 3030; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v6, 0, 16 3031; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v27, 16, v5 3032; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v25, 16, v4 3033; GCN-NOHSA-SI-NEXT: v_bfe_i32 v26, v5, 0, 16 3034; GCN-NOHSA-SI-NEXT: v_bfe_i32 v24, v4, 0, 16 3035; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 3036; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v7, 16, v11 3037; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 16, v10 3038; GCN-NOHSA-SI-NEXT: v_bfe_i32 v6, v11, 0, 16 3039; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v10, 0, 16 3040; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v31, 16, v9 3041; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v29, 16, v8 3042; GCN-NOHSA-SI-NEXT: v_bfe_i32 v30, v9, 0, 16 3043; GCN-NOHSA-SI-NEXT: v_bfe_i32 v28, v8, 0, 16 3044; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 3045; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 16, v15 3046; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 16, v14 3047; GCN-NOHSA-SI-NEXT: v_bfe_i32 v10, v15, 0, 16 3048; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v14, 0, 16 3049; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v35, 16, v13 3050; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v33, 16, v12 3051; GCN-NOHSA-SI-NEXT: v_bfe_i32 v34, v13, 0, 16 3052; GCN-NOHSA-SI-NEXT: v_bfe_i32 v32, v12, 0, 16 3053; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 3054; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 3055; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96 3056; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112 3057; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64 3058; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80 3059; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32 3060; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 3061; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 3062; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16 3063; GCN-NOHSA-SI-NEXT: s_endpgm 3064; 3065; GCN-HSA-LABEL: global_sextload_v32i16_to_v32i32: 3066; GCN-HSA: ; %bb.0: 3067; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 3068; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3069; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 3070; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 3071; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[0:1] 3072; GCN-HSA-NEXT: s_add_u32 s4, s2, 48 3073; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 3074; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 3075; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 3076; GCN-HSA-NEXT: s_add_u32 s4, s2, 32 3077; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[0:1] 3078; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 3079; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 3080; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 3081; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 3082; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[0:1] 3083; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 3084; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 3085; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 3086; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3087; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 3088; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3089; GCN-HSA-NEXT: v_mov_b32_e32 v23, s3 3090; GCN-HSA-NEXT: v_mov_b32_e32 v22, s2 3091; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 3092; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3093; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3 3094; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2 3095; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 3096; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3097; GCN-HSA-NEXT: v_mov_b32_e32 v27, s3 3098; GCN-HSA-NEXT: v_mov_b32_e32 v21, s1 3099; GCN-HSA-NEXT: v_mov_b32_e32 v26, s2 3100; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 3101; GCN-HSA-NEXT: v_mov_b32_e32 v20, s0 3102; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3103; GCN-HSA-NEXT: s_waitcnt vmcnt(3) 3104; GCN-HSA-NEXT: v_ashrrev_i32_e32 v19, 16, v13 3105; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 16, v12 3106; GCN-HSA-NEXT: v_bfe_i32 v18, v13, 0, 16 3107; GCN-HSA-NEXT: v_bfe_i32 v16, v12, 0, 16 3108; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19] 3109; GCN-HSA-NEXT: v_mov_b32_e32 v20, s3 3110; GCN-HSA-NEXT: v_mov_b32_e32 v19, s2 3111; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 3112; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3113; GCN-HSA-NEXT: v_mov_b32_e32 v29, s3 3114; GCN-HSA-NEXT: v_mov_b32_e32 v28, s2 3115; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 3116; GCN-HSA-NEXT: v_ashrrev_i32_e32 v18, 16, v15 3117; GCN-HSA-NEXT: v_ashrrev_i32_e32 v16, 16, v14 3118; GCN-HSA-NEXT: v_bfe_i32 v17, v15, 0, 16 3119; GCN-HSA-NEXT: v_bfe_i32 v15, v14, 0, 16 3120; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3121; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[15:18] 3122; GCN-HSA-NEXT: s_waitcnt vmcnt(4) 3123; GCN-HSA-NEXT: v_ashrrev_i32_e32 v14, 16, v11 3124; GCN-HSA-NEXT: v_ashrrev_i32_e32 v12, 16, v10 3125; GCN-HSA-NEXT: v_bfe_i32 v13, v11, 0, 16 3126; GCN-HSA-NEXT: v_bfe_i32 v11, v10, 0, 16 3127; GCN-HSA-NEXT: v_ashrrev_i32_e32 v18, 16, v9 3128; GCN-HSA-NEXT: v_ashrrev_i32_e32 v16, 16, v8 3129; GCN-HSA-NEXT: v_bfe_i32 v17, v9, 0, 16 3130; GCN-HSA-NEXT: v_bfe_i32 v15, v8, 0, 16 3131; GCN-HSA-NEXT: s_add_u32 s0, s0, 48 3132; GCN-HSA-NEXT: v_mov_b32_e32 v22, s3 3133; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 3134; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[15:18] 3135; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[11:14] 3136; GCN-HSA-NEXT: s_waitcnt vmcnt(5) 3137; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 16, v7 3138; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 16, v6 3139; GCN-HSA-NEXT: v_bfe_i32 v9, v7, 0, 16 3140; GCN-HSA-NEXT: v_bfe_i32 v7, v6, 0, 16 3141; GCN-HSA-NEXT: v_ashrrev_i32_e32 v14, 16, v5 3142; GCN-HSA-NEXT: v_ashrrev_i32_e32 v12, 16, v4 3143; GCN-HSA-NEXT: v_bfe_i32 v13, v5, 0, 16 3144; GCN-HSA-NEXT: v_bfe_i32 v11, v4, 0, 16 3145; GCN-HSA-NEXT: v_mov_b32_e32 v21, s2 3146; GCN-HSA-NEXT: v_mov_b32_e32 v31, s1 3147; GCN-HSA-NEXT: flat_store_dwordx4 v[19:20], v[11:14] 3148; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[7:10] 3149; GCN-HSA-NEXT: v_mov_b32_e32 v30, s0 3150; GCN-HSA-NEXT: s_waitcnt vmcnt(6) 3151; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 16, v1 3152; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 16, v0 3153; GCN-HSA-NEXT: v_bfe_i32 v9, v1, 0, 16 3154; GCN-HSA-NEXT: v_bfe_i32 v7, v0, 0, 16 3155; GCN-HSA-NEXT: v_ashrrev_i32_e32 v6, 16, v3 3156; GCN-HSA-NEXT: v_ashrrev_i32_e32 v4, 16, v2 3157; GCN-HSA-NEXT: v_bfe_i32 v5, v3, 0, 16 3158; GCN-HSA-NEXT: v_bfe_i32 v3, v2, 0, 16 3159; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[7:10] 3160; GCN-HSA-NEXT: flat_store_dwordx4 v[30:31], v[3:6] 3161; GCN-HSA-NEXT: s_endpgm 3162; 3163; GCN-NOHSA-VI-LABEL: global_sextload_v32i16_to_v32i32: 3164; GCN-NOHSA-VI: ; %bb.0: 3165; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 3166; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 3167; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 3168; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 3169; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 3170; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 3171; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 3172; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 3173; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 3174; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 3175; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 3176; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 3177; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 3178; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 3179; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 3180; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 16, v3 3181; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 16, v2 3182; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v3, 0, 16 3183; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 3184; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v35, 16, v13 3185; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v33, 16, v12 3186; GCN-NOHSA-VI-NEXT: v_bfe_i32 v34, v13, 0, 16 3187; GCN-NOHSA-VI-NEXT: v_bfe_i32 v32, v12, 0, 16 3188; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v2, 0, 16 3189; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v23, 16, v1 3190; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v21, 16, v0 3191; GCN-NOHSA-VI-NEXT: v_bfe_i32 v22, v1, 0, 16 3192; GCN-NOHSA-VI-NEXT: v_bfe_i32 v20, v0, 0, 16 3193; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 16, v7 3194; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v6 3195; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v7, 0, 16 3196; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v6, 0, 16 3197; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v27, 16, v5 3198; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v25, 16, v4 3199; GCN-NOHSA-VI-NEXT: v_bfe_i32 v26, v5, 0, 16 3200; GCN-NOHSA-VI-NEXT: v_bfe_i32 v24, v4, 0, 16 3201; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 16, v11 3202; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 16, v10 3203; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v11, 0, 16 3204; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v10, 0, 16 3205; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v31, 16, v9 3206; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v29, 16, v8 3207; GCN-NOHSA-VI-NEXT: v_bfe_i32 v30, v9, 0, 16 3208; GCN-NOHSA-VI-NEXT: v_bfe_i32 v28, v8, 0, 16 3209; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 16, v15 3210; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 16, v14 3211; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v15, 0, 16 3212; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v14, 0, 16 3213; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96 3214; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112 3215; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64 3216; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80 3217; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32 3218; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 3219; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 3220; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16 3221; GCN-NOHSA-VI-NEXT: s_endpgm 3222; 3223; EG-LABEL: global_sextload_v32i16_to_v32i32: 3224; EG: ; %bb.0: 3225; EG-NEXT: ALU 9, @20, KC0[CB0:0-32], KC1[] 3226; EG-NEXT: TEX 3 @12 3227; EG-NEXT: ALU 73, @30, KC0[CB0:0-32], KC1[] 3228; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T22.X, 0 3229; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T34.X, 0 3230; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T28.X, 0 3231; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T27.X, 0 3232; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T26.X, 0 3233; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0 3234; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T20.X, 0 3235; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T19.X, 1 3236; EG-NEXT: CF_END 3237; EG-NEXT: Fetch clause starting at 12: 3238; EG-NEXT: VTX_READ_128 T23.XYZW, T22.X, 16, #1 3239; EG-NEXT: VTX_READ_128 T24.XYZW, T22.X, 32, #1 3240; EG-NEXT: VTX_READ_128 T25.XYZW, T22.X, 0, #1 3241; EG-NEXT: VTX_READ_128 T22.XYZW, T22.X, 48, #1 3242; EG-NEXT: ALU clause starting at 20: 3243; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 3244; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3245; EG-NEXT: LSHR T19.X, PV.W, literal.x, 3246; EG-NEXT: LSHR * T20.X, KC0[2].Y, literal.x, 3247; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 3248; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 3249; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 3250; EG-NEXT: LSHR T21.X, PV.W, literal.x, 3251; EG-NEXT: MOV * T22.X, KC0[2].Z, 3252; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 3253; EG-NEXT: ALU clause starting at 30: 3254; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 3255; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 3256; EG-NEXT: LSHR T26.X, PV.W, literal.x, 3257; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3258; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 3259; EG-NEXT: LSHR T27.X, PV.W, literal.x, 3260; EG-NEXT: LSHR T0.W, T22.Y, literal.y, 3261; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 3262; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3263; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00) 3264; EG-NEXT: LSHR T28.X, PS, literal.x, 3265; EG-NEXT: LSHR T0.Y, T22.W, literal.y, 3266; EG-NEXT: BFE_INT T29.Z, T25.W, 0.0, literal.y, BS:VEC_120/SCL_212 3267; EG-NEXT: LSHR T1.W, T24.Y, literal.y, 3268; EG-NEXT: LSHR * T2.W, T24.W, literal.y, 3269; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3270; EG-NEXT: BFE_INT T29.X, T25.Z, 0.0, literal.x, 3271; EG-NEXT: LSHR T1.Y, T23.Y, literal.x, 3272; EG-NEXT: BFE_INT T30.Z, T25.Y, 0.0, literal.x, BS:VEC_120/SCL_212 3273; EG-NEXT: LSHR T3.W, T23.W, literal.x, 3274; EG-NEXT: LSHR * T4.W, T25.W, literal.x, 3275; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3276; EG-NEXT: BFE_INT T30.X, T25.X, 0.0, literal.x, 3277; EG-NEXT: LSHR T2.Y, T25.Y, literal.x, 3278; EG-NEXT: BFE_INT T31.Z, T23.W, 0.0, literal.x, 3279; EG-NEXT: BFE_INT T29.W, PS, 0.0, literal.x, 3280; EG-NEXT: LSHR * T4.W, T25.Z, literal.x, 3281; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3282; EG-NEXT: BFE_INT T31.X, T23.Z, 0.0, literal.x, 3283; EG-NEXT: BFE_INT T29.Y, PS, 0.0, literal.x, 3284; EG-NEXT: BFE_INT T25.Z, T23.Y, 0.0, literal.x, 3285; EG-NEXT: BFE_INT T30.W, PV.Y, 0.0, literal.x, 3286; EG-NEXT: LSHR * T4.W, T25.X, literal.x, 3287; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3288; EG-NEXT: BFE_INT T25.X, T23.X, 0.0, literal.x, 3289; EG-NEXT: BFE_INT T30.Y, PS, 0.0, literal.x, 3290; EG-NEXT: BFE_INT T32.Z, T24.W, 0.0, literal.x, 3291; EG-NEXT: BFE_INT T31.W, T3.W, 0.0, literal.x, BS:VEC_120/SCL_212 3292; EG-NEXT: LSHR * T3.W, T23.Z, literal.x, 3293; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3294; EG-NEXT: BFE_INT T32.X, T24.Z, 0.0, literal.x, 3295; EG-NEXT: BFE_INT T31.Y, PS, 0.0, literal.x, 3296; EG-NEXT: BFE_INT T23.Z, T24.Y, 0.0, literal.x, 3297; EG-NEXT: BFE_INT T25.W, T1.Y, 0.0, literal.x, BS:VEC_120/SCL_212 3298; EG-NEXT: LSHR * T3.W, T23.X, literal.x, 3299; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3300; EG-NEXT: BFE_INT T23.X, T24.X, 0.0, literal.x, 3301; EG-NEXT: BFE_INT T25.Y, PS, 0.0, literal.x, 3302; EG-NEXT: BFE_INT T33.Z, T22.W, 0.0, literal.x, 3303; EG-NEXT: BFE_INT T32.W, T2.W, 0.0, literal.x, BS:VEC_120/SCL_212 3304; EG-NEXT: LSHR * T2.W, T24.Z, literal.x, 3305; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3306; EG-NEXT: BFE_INT T33.X, T22.Z, 0.0, literal.x, 3307; EG-NEXT: BFE_INT T32.Y, PS, 0.0, literal.x, 3308; EG-NEXT: BFE_INT T24.Z, T22.Y, 0.0, literal.x, 3309; EG-NEXT: BFE_INT T23.W, T1.W, 0.0, literal.x, 3310; EG-NEXT: LSHR * T1.W, T24.X, literal.x, 3311; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3312; EG-NEXT: BFE_INT T24.X, T22.X, 0.0, literal.x, 3313; EG-NEXT: BFE_INT T23.Y, PS, 0.0, literal.x, 3314; EG-NEXT: LSHR T0.Z, T22.Z, literal.x, 3315; EG-NEXT: BFE_INT T33.W, T0.Y, 0.0, literal.x, 3316; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 3317; EG-NEXT: 16(2.242078e-44), 112(1.569454e-43) 3318; EG-NEXT: LSHR T34.X, PS, literal.x, 3319; EG-NEXT: BFE_INT T33.Y, PV.Z, 0.0, literal.y, 3320; EG-NEXT: LSHR T0.Z, T22.X, literal.y, 3321; EG-NEXT: BFE_INT T24.W, T0.W, 0.0, literal.y, 3322; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3323; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3324; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) 3325; EG-NEXT: LSHR T22.X, PS, literal.x, 3326; EG-NEXT: BFE_INT * T24.Y, PV.Z, 0.0, literal.y, 3327; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3328; 3329; CM-LABEL: global_sextload_v32i16_to_v32i32: 3330; CM: ; %bb.0: 3331; CM-NEXT: ALU 0, @22, KC0[CB0:0-32], KC1[] 3332; CM-NEXT: TEX 0 @14 3333; CM-NEXT: ALU 7, @23, KC0[CB0:0-32], KC1[] 3334; CM-NEXT: TEX 2 @16 3335; CM-NEXT: ALU 76, @31, KC0[CB0:0-32], KC1[] 3336; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T33, T34.X 3337; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T19, T20.X 3338; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T32, T28.X 3339; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T23, T27.X 3340; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T31, T26.X 3341; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T22, T25.X 3342; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T30, T24.X 3343; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T29, T21.X 3344; CM-NEXT: CF_END 3345; CM-NEXT: Fetch clause starting at 14: 3346; CM-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1 3347; CM-NEXT: Fetch clause starting at 16: 3348; CM-NEXT: VTX_READ_128 T22.XYZW, T19.X, 48, #1 3349; CM-NEXT: VTX_READ_128 T23.XYZW, T19.X, 32, #1 3350; CM-NEXT: VTX_READ_128 T19.XYZW, T19.X, 16, #1 3351; CM-NEXT: ALU clause starting at 22: 3352; CM-NEXT: MOV * T19.X, KC0[2].Z, 3353; CM-NEXT: ALU clause starting at 23: 3354; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 3355; CM-NEXT: 96(1.345247e-43), 0(0.000000e+00) 3356; CM-NEXT: LSHR T21.X, PV.W, literal.x, 3357; CM-NEXT: LSHR T0.Y, T20.Z, literal.y, 3358; CM-NEXT: LSHR T0.Z, T20.W, literal.y, 3359; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3360; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3361; CM-NEXT: 112(1.569454e-43), 0(0.000000e+00) 3362; CM-NEXT: ALU clause starting at 31: 3363; CM-NEXT: LSHR T24.X, T0.W, literal.x, 3364; CM-NEXT: LSHR T1.Y, T20.Y, literal.y, 3365; CM-NEXT: LSHR T1.Z, T19.Z, literal.y, 3366; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3367; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3368; CM-NEXT: 64(8.968310e-44), 0(0.000000e+00) 3369; CM-NEXT: LSHR T25.X, PV.W, literal.x, 3370; CM-NEXT: LSHR T2.Y, T19.W, literal.y, 3371; CM-NEXT: LSHR T2.Z, T19.X, literal.y, 3372; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3373; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3374; CM-NEXT: 80(1.121039e-43), 0(0.000000e+00) 3375; CM-NEXT: LSHR T26.X, PV.W, literal.x, 3376; CM-NEXT: LSHR T3.Y, T19.Y, literal.y, 3377; CM-NEXT: LSHR T3.Z, T23.Z, literal.y, 3378; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3379; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3380; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00) 3381; CM-NEXT: LSHR T27.X, PV.W, literal.x, 3382; CM-NEXT: LSHR T4.Y, T23.W, literal.y, 3383; CM-NEXT: LSHR T4.Z, T23.X, literal.y, 3384; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3385; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3386; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00) 3387; CM-NEXT: LSHR T28.X, PV.W, literal.x, 3388; CM-NEXT: LSHR T5.Y, T23.Y, literal.y, 3389; CM-NEXT: BFE_INT T29.Z, T22.Y, 0.0, literal.y, BS:VEC_120/SCL_212 3390; CM-NEXT: LSHR * T0.W, T22.Z, literal.y, 3391; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3392; CM-NEXT: BFE_INT T29.X, T22.X, 0.0, literal.x, 3393; CM-NEXT: LSHR T6.Y, T22.W, literal.x, 3394; CM-NEXT: BFE_INT T30.Z, T22.W, 0.0, literal.x, 3395; CM-NEXT: LSHR * T1.W, T22.Y, literal.x, 3396; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3397; CM-NEXT: BFE_INT T30.X, T22.Z, 0.0, literal.x, 3398; CM-NEXT: LSHR T7.Y, T22.X, literal.x, 3399; CM-NEXT: BFE_INT T22.Z, T23.Y, 0.0, literal.x, 3400; CM-NEXT: BFE_INT * T29.W, PV.W, 0.0, literal.x, 3401; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3402; CM-NEXT: BFE_INT T22.X, T23.X, 0.0, literal.x, 3403; CM-NEXT: BFE_INT T29.Y, PV.Y, 0.0, literal.x, 3404; CM-NEXT: BFE_INT T31.Z, T23.W, 0.0, literal.x, 3405; CM-NEXT: BFE_INT * T30.W, T6.Y, 0.0, literal.x, 3406; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3407; CM-NEXT: BFE_INT T31.X, T23.Z, 0.0, literal.x, 3408; CM-NEXT: BFE_INT T30.Y, T0.W, 0.0, literal.x, 3409; CM-NEXT: BFE_INT T23.Z, T19.Y, 0.0, literal.x, 3410; CM-NEXT: BFE_INT * T22.W, T5.Y, 0.0, literal.x, BS:VEC_120/SCL_212 3411; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3412; CM-NEXT: BFE_INT T23.X, T19.X, 0.0, literal.x, 3413; CM-NEXT: BFE_INT T22.Y, T4.Z, 0.0, literal.x, 3414; CM-NEXT: BFE_INT T32.Z, T19.W, 0.0, literal.x, 3415; CM-NEXT: BFE_INT * T31.W, T4.Y, 0.0, literal.x, 3416; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3417; CM-NEXT: BFE_INT T32.X, T19.Z, 0.0, literal.x, 3418; CM-NEXT: BFE_INT T31.Y, T3.Z, 0.0, literal.x, BS:VEC_120/SCL_212 3419; CM-NEXT: BFE_INT T19.Z, T20.Y, 0.0, literal.x, 3420; CM-NEXT: BFE_INT * T23.W, T3.Y, 0.0, literal.x, BS:VEC_120/SCL_212 3421; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3422; CM-NEXT: BFE_INT T19.X, T20.X, 0.0, literal.x, 3423; CM-NEXT: BFE_INT T23.Y, T2.Z, 0.0, literal.x, 3424; CM-NEXT: BFE_INT T33.Z, T20.W, 0.0, literal.x, 3425; CM-NEXT: BFE_INT * T32.W, T2.Y, 0.0, literal.x, 3426; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3427; CM-NEXT: BFE_INT T33.X, T20.Z, 0.0, literal.x, 3428; CM-NEXT: BFE_INT T32.Y, T1.Z, 0.0, literal.x, BS:VEC_120/SCL_212 3429; CM-NEXT: LSHR T1.Z, T20.X, literal.x, 3430; CM-NEXT: BFE_INT * T19.W, T1.Y, 0.0, literal.x, 3431; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3432; CM-NEXT: LSHR T20.X, KC0[2].Y, literal.x, 3433; CM-NEXT: BFE_INT T19.Y, PV.Z, 0.0, literal.y, 3434; CM-NEXT: ADD_INT T1.Z, KC0[2].Y, literal.y, 3435; CM-NEXT: BFE_INT * T33.W, T0.Z, 0.0, literal.y, 3436; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3437; CM-NEXT: LSHR T34.X, PV.Z, literal.x, 3438; CM-NEXT: BFE_INT * T33.Y, T0.Y, 0.0, literal.y, 3439; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3440 %load = load <32 x i16>, ptr addrspace(1) %in 3441 %ext = sext <32 x i16> %load to <32 x i32> 3442 store <32 x i32> %ext, ptr addrspace(1) %out 3443 ret void 3444} 3445 3446define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 3447; GCN-NOHSA-SI-LABEL: global_zextload_v64i16_to_v64i32: 3448; GCN-NOHSA-SI: ; %bb.0: 3449; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 3450; GCN-NOHSA-SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 3451; GCN-NOHSA-SI-NEXT: s_mov_b32 s14, -1 3452; GCN-NOHSA-SI-NEXT: s_mov_b32 s15, 0xe8f000 3453; GCN-NOHSA-SI-NEXT: s_add_u32 s12, s12, s11 3454; GCN-NOHSA-SI-NEXT: s_addc_u32 s13, s13, 0 3455; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 3456; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 3457; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 3458; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 3459; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 3460; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 3461; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 3462; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 3463; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 3464; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:16 3465; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:32 3466; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:48 3467; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[27:30], off, s[8:11], 0 offset:64 3468; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[31:34], off, s[8:11], 0 offset:80 3469; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[35:38], off, s[8:11], 0 offset:96 3470; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[39:42], off, s[8:11], 0 offset:112 3471; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(7) 3472; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v18, 16, v15 3473; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v16, 16, v14 3474; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v21, 16, v13 3475; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v19, 16, v12 3476; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(6) 3477; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v26, 16, v11 3478; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v24, 16, v10 3479; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v17, 0xffff, v15 3480; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v15, 0xffff, v14 3481; GCN-NOHSA-SI-NEXT: buffer_store_dword v15, off, s[12:15], 0 ; 4-byte Folded Spill 3482; GCN-NOHSA-SI-NEXT: buffer_store_dword v16, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill 3483; GCN-NOHSA-SI-NEXT: buffer_store_dword v17, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill 3484; GCN-NOHSA-SI-NEXT: buffer_store_dword v18, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill 3485; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v20, 0xffff, v13 3486; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3487; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v18, 0xffff, v12 3488; GCN-NOHSA-SI-NEXT: buffer_store_dword v18, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill 3489; GCN-NOHSA-SI-NEXT: buffer_store_dword v19, off, s[12:15], 0 offset:20 ; 4-byte Folded Spill 3490; GCN-NOHSA-SI-NEXT: buffer_store_dword v20, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill 3491; GCN-NOHSA-SI-NEXT: buffer_store_dword v21, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill 3492; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(2) 3493; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v19, 16, v9 3494; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v17, 16, v8 3495; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v25, 0xffff, v11 3496; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v23, 0xffff, v10 3497; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v18, 0xffff, v9 3498; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, 0xffff, v8 3499; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(13) 3500; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v15, 16, v7 3501; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v13, 16, v6 3502; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v46, 16, v5 3503; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v44, 16, v4 3504; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v14, 0xffff, v7 3505; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v12, 0xffff, v6 3506; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v45, 0xffff, v5 3507; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v43, 0xffff, v4 3508; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(12) 3509; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v11, 16, v3 3510; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v2 3511; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v50, 16, v1 3512; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v48, 16, v0 3513; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v10, 0xffff, v3 3514; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v2 3515; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v49, 0xffff, v1 3516; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v47, 0xffff, v0 3517; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(11) 3518; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v54, 16, v30 3519; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v52, 16, v29 3520; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v58, 16, v28 3521; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v56, 16, v27 3522; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v53, 0xffff, v30 3523; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v51, 0xffff, v29 3524; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v57, 0xffff, v28 3525; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v55, 0xffff, v27 3526; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(10) 3527; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v30, 16, v34 3528; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v28, 16, v33 3529; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v62, 16, v32 3530; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v60, 16, v31 3531; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v29, 0xffff, v34 3532; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v27, 0xffff, v33 3533; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v61, 0xffff, v32 3534; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v59, 0xffff, v31 3535; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(9) 3536; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v34, 16, v38 3537; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v32, 16, v37 3538; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v36 3539; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v35 3540; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v33, 0xffff, v38 3541; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v31, 0xffff, v37 3542; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v6, 0xffff, v36 3543; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v35 3544; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(8) 3545; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v38, 16, v42 3546; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v36, 16, v41 3547; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v40 3548; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v39 3549; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v37, 0xffff, v42 3550; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v35, 0xffff, v41 3551; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, 0xffff, v40 3552; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v39 3553; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 3554; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 3555; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 3556; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[35:38], off, s[0:3], 0 offset:240 3557; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192 3558; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[31:34], off, s[0:3], 0 offset:208 3559; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[59:62], off, s[0:3], 0 offset:160 3560; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:176 3561; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[55:58], off, s[0:3], 0 offset:128 3562; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[51:54], off, s[0:3], 0 offset:144 3563; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[47:50], off, s[0:3], 0 offset:96 3564; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112 3565; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[43:46], off, s[0:3], 0 offset:64 3566; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:80 3567; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32 3568; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:48 3569; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload 3570; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload 3571; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload 3572; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload 3573; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 3574; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 3575; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3576; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload 3577; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload 3578; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload 3579; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload 3580; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 3581; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 3582; GCN-NOHSA-SI-NEXT: s_endpgm 3583; 3584; GCN-HSA-LABEL: global_zextload_v64i16_to_v64i32: 3585; GCN-HSA: ; %bb.0: 3586; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 3587; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3588; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 3589; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 3590; GCN-HSA-NEXT: flat_load_dwordx4 v[24:27], v[0:1] 3591; GCN-HSA-NEXT: s_add_u32 s4, s2, 16 3592; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 3593; GCN-HSA-NEXT: s_add_u32 s6, s2, 32 3594; GCN-HSA-NEXT: s_addc_u32 s7, s3, 0 3595; GCN-HSA-NEXT: s_add_u32 s8, s2, 48 3596; GCN-HSA-NEXT: s_addc_u32 s9, s3, 0 3597; GCN-HSA-NEXT: v_mov_b32_e32 v17, s9 3598; GCN-HSA-NEXT: s_add_u32 s10, s2, 64 3599; GCN-HSA-NEXT: v_mov_b32_e32 v16, s8 3600; GCN-HSA-NEXT: flat_load_dwordx4 v[20:23], v[16:17] 3601; GCN-HSA-NEXT: s_addc_u32 s11, s3, 0 3602; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 3603; GCN-HSA-NEXT: v_mov_b32_e32 v1, s11 3604; GCN-HSA-NEXT: s_add_u32 s10, s2, 0x50 3605; GCN-HSA-NEXT: s_addc_u32 s11, s3, 0 3606; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10 3607; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11 3608; GCN-HSA-NEXT: s_add_u32 s10, s2, 0x60 3609; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3610; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[4:5] 3611; GCN-HSA-NEXT: s_addc_u32 s11, s3, 0 3612; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10 3613; GCN-HSA-NEXT: s_add_u32 s2, s2, 0x70 3614; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11 3615; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 3616; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 3617; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 3618; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 3619; GCN-HSA-NEXT: flat_load_dwordx4 v[28:31], v[12:13] 3620; GCN-HSA-NEXT: v_mov_b32_e32 v13, s5 3621; GCN-HSA-NEXT: v_mov_b32_e32 v15, s7 3622; GCN-HSA-NEXT: v_mov_b32_e32 v12, s4 3623; GCN-HSA-NEXT: v_mov_b32_e32 v14, s6 3624; GCN-HSA-NEXT: flat_load_dwordx4 v[16:19], v[12:13] 3625; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[14:15] 3626; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 3627; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3628; GCN-HSA-NEXT: v_mov_b32_e32 v37, s1 3629; GCN-HSA-NEXT: v_mov_b32_e32 v36, s0 3630; GCN-HSA-NEXT: s_waitcnt vmcnt(7) 3631; GCN-HSA-NEXT: v_lshrrev_b32_e32 v35, 16, v25 3632; GCN-HSA-NEXT: v_lshrrev_b32_e32 v33, 16, v24 3633; GCN-HSA-NEXT: v_and_b32_e32 v34, 0xffff, v25 3634; GCN-HSA-NEXT: v_and_b32_e32 v32, 0xffff, v24 3635; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3 3636; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2 3637; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0 3638; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3639; GCN-HSA-NEXT: s_add_u32 s4, s0, 0xf0 3640; GCN-HSA-NEXT: flat_store_dwordx4 v[36:37], v[32:35] 3641; GCN-HSA-NEXT: v_mov_b32_e32 v37, s3 3642; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 3643; GCN-HSA-NEXT: v_mov_b32_e32 v36, s2 3644; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0 3645; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3646; GCN-HSA-NEXT: s_add_u32 s6, s0, 0xd0 3647; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0 3648; GCN-HSA-NEXT: s_add_u32 s8, s0, 0xa0 3649; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0 3650; GCN-HSA-NEXT: s_add_u32 s10, s0, 0xb0 3651; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0 3652; GCN-HSA-NEXT: s_add_u32 s12, s0, 0x80 3653; GCN-HSA-NEXT: v_lshrrev_b32_e32 v35, 16, v27 3654; GCN-HSA-NEXT: v_lshrrev_b32_e32 v33, 16, v26 3655; GCN-HSA-NEXT: v_and_b32_e32 v34, 0xffff, v27 3656; GCN-HSA-NEXT: v_and_b32_e32 v32, 0xffff, v26 3657; GCN-HSA-NEXT: s_addc_u32 s13, s1, 0 3658; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[32:35] 3659; GCN-HSA-NEXT: s_waitcnt vmcnt(7) 3660; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v1 3661; GCN-HSA-NEXT: v_mov_b32_e32 v33, s13 3662; GCN-HSA-NEXT: v_mov_b32_e32 v32, s12 3663; GCN-HSA-NEXT: v_lshrrev_b32_e32 v25, 16, v0 3664; GCN-HSA-NEXT: v_and_b32_e32 v26, 0xffff, v1 3665; GCN-HSA-NEXT: v_and_b32_e32 v24, 0xffff, v0 3666; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 3667; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[24:27] 3668; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 3669; GCN-HSA-NEXT: s_waitcnt vmcnt(7) 3670; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v9 3671; GCN-HSA-NEXT: v_lshrrev_b32_e32 v25, 16, v8 3672; GCN-HSA-NEXT: v_and_b32_e32 v26, 0xffff, v9 3673; GCN-HSA-NEXT: v_and_b32_e32 v24, 0xffff, v8 3674; GCN-HSA-NEXT: v_mov_b32_e32 v8, s10 3675; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[24:27] 3676; GCN-HSA-NEXT: v_mov_b32_e32 v9, s11 3677; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v11 3678; GCN-HSA-NEXT: v_lshrrev_b32_e32 v25, 16, v10 3679; GCN-HSA-NEXT: v_and_b32_e32 v26, 0xffff, v11 3680; GCN-HSA-NEXT: v_and_b32_e32 v24, 0xffff, v10 3681; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[24:27] 3682; GCN-HSA-NEXT: s_waitcnt vmcnt(8) 3683; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v5 3684; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v4 3685; GCN-HSA-NEXT: v_and_b32_e32 v10, 0xffff, v5 3686; GCN-HSA-NEXT: v_and_b32_e32 v8, 0xffff, v4 3687; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3688; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 3689; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3690; GCN-HSA-NEXT: v_mov_b32_e32 v33, s7 3691; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 3692; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v7 3693; GCN-HSA-NEXT: v_lshrrev_b32_e32 v25, 16, v6 3694; GCN-HSA-NEXT: v_and_b32_e32 v26, 0xffff, v7 3695; GCN-HSA-NEXT: v_and_b32_e32 v24, 0xffff, v6 3696; GCN-HSA-NEXT: v_mov_b32_e32 v32, s6 3697; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[8:11] 3698; GCN-HSA-NEXT: s_waitcnt vmcnt(8) 3699; GCN-HSA-NEXT: v_and_b32_e32 v7, 0xffff, v28 3700; GCN-HSA-NEXT: v_lshrrev_b32_e32 v10, 16, v29 3701; GCN-HSA-NEXT: v_lshrrev_b32_e32 v8, 16, v28 3702; GCN-HSA-NEXT: v_and_b32_e32 v9, 0xffff, v29 3703; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90 3704; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[24:27] 3705; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3706; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v31 3707; GCN-HSA-NEXT: v_lshrrev_b32_e32 v25, 16, v30 3708; GCN-HSA-NEXT: v_and_b32_e32 v26, 0xffff, v31 3709; GCN-HSA-NEXT: v_and_b32_e32 v24, 0xffff, v30 3710; GCN-HSA-NEXT: flat_store_dwordx4 v[36:37], v[7:10] 3711; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[24:27] 3712; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 3713; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 3714; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 3715; GCN-HSA-NEXT: v_lshrrev_b32_e32 v6, 16, v3 3716; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v2 3717; GCN-HSA-NEXT: v_and_b32_e32 v5, 0xffff, v3 3718; GCN-HSA-NEXT: v_and_b32_e32 v3, 0xffff, v2 3719; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3720; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[3:6] 3721; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v20 3722; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3723; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3724; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 3725; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3726; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 3727; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v21 3728; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v21 3729; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v20 3730; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 3731; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 3732; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3733; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v23 3734; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v22 3735; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v23 3736; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v22 3737; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3738; GCN-HSA-NEXT: s_waitcnt vmcnt(12) 3739; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v18 3740; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 3741; GCN-HSA-NEXT: s_waitcnt vmcnt(12) 3742; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v15 3743; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v17 3744; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v16 3745; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v18 3746; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v17 3747; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v16 3748; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v13 3749; GCN-HSA-NEXT: v_lshrrev_b32_e32 v16, 16, v12 3750; GCN-HSA-NEXT: v_and_b32_e32 v10, 0xffff, v15 3751; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v13 3752; GCN-HSA-NEXT: v_and_b32_e32 v15, 0xffff, v12 3753; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 3754; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 3755; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 3756; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3757; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[15:18] 3758; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 3759; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 3760; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 3761; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v14 3762; GCN-HSA-NEXT: v_and_b32_e32 v8, 0xffff, v14 3763; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3764; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 3765; GCN-HSA-NEXT: s_add_u32 s0, s0, 48 3766; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 3767; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 3768; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 3769; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 3770; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v19 3771; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 3772; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v19 3773; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 3774; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3775; GCN-HSA-NEXT: s_endpgm 3776; 3777; GCN-NOHSA-VI-LABEL: global_zextload_v64i16_to_v64i32: 3778; GCN-NOHSA-VI: ; %bb.0: 3779; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 3780; GCN-NOHSA-VI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 3781; GCN-NOHSA-VI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1 3782; GCN-NOHSA-VI-NEXT: s_mov_b32 s90, -1 3783; GCN-NOHSA-VI-NEXT: s_mov_b32 s91, 0xe80000 3784; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 3785; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 3786; GCN-NOHSA-VI-NEXT: s_add_u32 s88, s88, s11 3787; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 3788; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 3789; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 3790; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 3791; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 3792; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 3793; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:16 3794; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:32 3795; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:48 3796; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[27:30], off, s[8:11], 0 offset:64 3797; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[31:34], off, s[8:11], 0 offset:80 3798; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[35:38], off, s[8:11], 0 offset:96 3799; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[39:42], off, s[8:11], 0 offset:112 3800; GCN-NOHSA-VI-NEXT: s_addc_u32 s89, s89, 0 3801; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 3802; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 3803; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(7) 3804; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v18, 16, v15 3805; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v16, 16, v14 3806; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v17, 0xffff, v15 3807; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v15, 0xffff, v14 3808; GCN-NOHSA-VI-NEXT: buffer_store_dword v15, off, s[88:91], 0 ; 4-byte Folded Spill 3809; GCN-NOHSA-VI-NEXT: buffer_store_dword v16, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill 3810; GCN-NOHSA-VI-NEXT: buffer_store_dword v17, off, s[88:91], 0 offset:8 ; 4-byte Folded Spill 3811; GCN-NOHSA-VI-NEXT: buffer_store_dword v18, off, s[88:91], 0 offset:12 ; 4-byte Folded Spill 3812; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v21, 16, v13 3813; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v19, 16, v12 3814; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v20, 0xffff, v13 3815; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v18, 0xffff, v12 3816; GCN-NOHSA-VI-NEXT: buffer_store_dword v18, off, s[88:91], 0 offset:16 ; 4-byte Folded Spill 3817; GCN-NOHSA-VI-NEXT: buffer_store_dword v19, off, s[88:91], 0 offset:20 ; 4-byte Folded Spill 3818; GCN-NOHSA-VI-NEXT: buffer_store_dword v20, off, s[88:91], 0 offset:24 ; 4-byte Folded Spill 3819; GCN-NOHSA-VI-NEXT: buffer_store_dword v21, off, s[88:91], 0 offset:28 ; 4-byte Folded Spill 3820; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(14) 3821; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v26, 16, v11 3822; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v24, 16, v10 3823; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v19, 16, v9 3824; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v17, 16, v8 3825; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v25, 0xffff, v11 3826; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v23, 0xffff, v10 3827; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v18, 0xffff, v9 3828; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v16, 0xffff, v8 3829; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(12) 3830; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v3 3831; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v2 3832; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v50, 16, v1 3833; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v48, 16, v0 3834; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v10, 0xffff, v3 3835; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, 0xffff, v2 3836; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v49, 0xffff, v1 3837; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v47, 0xffff, v0 3838; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(8) 3839; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v40 3840; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v39 3841; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, 0xffff, v40 3842; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v39 3843; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v7 3844; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v13, 16, v6 3845; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v46, 16, v5 3846; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v44, 16, v4 3847; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v14, 0xffff, v7 3848; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v12, 0xffff, v6 3849; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v45, 0xffff, v5 3850; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v43, 0xffff, v4 3851; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v54, 16, v30 3852; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v52, 16, v29 3853; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v58, 16, v28 3854; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v56, 16, v27 3855; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v53, 0xffff, v30 3856; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v51, 0xffff, v29 3857; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v57, 0xffff, v28 3858; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v55, 0xffff, v27 3859; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v30, 16, v34 3860; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v28, 16, v33 3861; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v62, 16, v32 3862; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v60, 16, v31 3863; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v29, 0xffff, v34 3864; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v27, 0xffff, v33 3865; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v61, 0xffff, v32 3866; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v59, 0xffff, v31 3867; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v34, 16, v38 3868; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v32, 16, v37 3869; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v36 3870; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v35 3871; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v33, 0xffff, v38 3872; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v31, 0xffff, v37 3873; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v6, 0xffff, v36 3874; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, 0xffff, v35 3875; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v38, 16, v42 3876; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v36, 16, v41 3877; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v37, 0xffff, v42 3878; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v35, 0xffff, v41 3879; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 3880; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[35:38], off, s[0:3], 0 offset:240 3881; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192 3882; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[31:34], off, s[0:3], 0 offset:208 3883; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[59:62], off, s[0:3], 0 offset:160 3884; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:176 3885; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[55:58], off, s[0:3], 0 offset:128 3886; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[51:54], off, s[0:3], 0 offset:144 3887; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[47:50], off, s[0:3], 0 offset:96 3888; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112 3889; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[43:46], off, s[0:3], 0 offset:64 3890; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:80 3891; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32 3892; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:48 3893; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[88:91], 0 offset:16 ; 4-byte Folded Reload 3894; GCN-NOHSA-VI-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:20 ; 4-byte Folded Reload 3895; GCN-NOHSA-VI-NEXT: buffer_load_dword v2, off, s[88:91], 0 offset:24 ; 4-byte Folded Reload 3896; GCN-NOHSA-VI-NEXT: buffer_load_dword v3, off, s[88:91], 0 offset:28 ; 4-byte Folded Reload 3897; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 3898; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 3899; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[88:91], 0 ; 4-byte Folded Reload 3900; GCN-NOHSA-VI-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:4 ; 4-byte Folded Reload 3901; GCN-NOHSA-VI-NEXT: buffer_load_dword v2, off, s[88:91], 0 offset:8 ; 4-byte Folded Reload 3902; GCN-NOHSA-VI-NEXT: buffer_load_dword v3, off, s[88:91], 0 offset:12 ; 4-byte Folded Reload 3903; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 3904; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 3905; GCN-NOHSA-VI-NEXT: s_endpgm 3906; 3907; EG-LABEL: global_zextload_v64i16_to_v64i32: 3908; EG: ; %bb.0: 3909; EG-NEXT: ALU 0, @38, KC0[CB0:0-32], KC1[] 3910; EG-NEXT: TEX 3 @22 3911; EG-NEXT: ALU 56, @39, KC0[CB0:0-32], KC1[] 3912; EG-NEXT: TEX 3 @30 3913; EG-NEXT: ALU 87, @96, KC0[CB0:0-32], KC1[] 3914; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T66.X, 0 3915; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T65.X, 0 3916; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T64.X, 0 3917; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T62.X, 0 3918; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T51.XYZW, T61.X, 0 3919; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T59.X, 0 3920; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T52.XYZW, T58.X, 0 3921; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T54.XYZW, T56.X, 0 3922; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T55.X, 0 3923; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T53.X, 0 3924; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T48.X, 0 3925; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T47.X, 0 3926; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T46.X, 0 3927; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T44.X, 0 3928; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T43.X, 0 3929; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T41.X, 1 3930; EG-NEXT: CF_END 3931; EG-NEXT: Fetch clause starting at 22: 3932; EG-NEXT: VTX_READ_128 T36.XYZW, T37.X, 0, #1 3933; EG-NEXT: VTX_READ_128 T38.XYZW, T37.X, 48, #1 3934; EG-NEXT: VTX_READ_128 T39.XYZW, T37.X, 32, #1 3935; EG-NEXT: VTX_READ_128 T40.XYZW, T37.X, 16, #1 3936; EG-NEXT: Fetch clause starting at 30: 3937; EG-NEXT: VTX_READ_128 T49.XYZW, T37.X, 112, #1 3938; EG-NEXT: VTX_READ_128 T50.XYZW, T37.X, 96, #1 3939; EG-NEXT: VTX_READ_128 T51.XYZW, T37.X, 80, #1 3940; EG-NEXT: VTX_READ_128 T52.XYZW, T37.X, 64, #1 3941; EG-NEXT: ALU clause starting at 38: 3942; EG-NEXT: MOV * T37.X, KC0[2].Z, 3943; EG-NEXT: ALU clause starting at 39: 3944; EG-NEXT: LSHR * T35.W, T36.W, literal.x, 3945; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3946; EG-NEXT: AND_INT * T35.Z, T36.W, literal.x, 3947; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3948; EG-NEXT: LSHR T35.Y, T36.Z, literal.x, 3949; EG-NEXT: LSHR * T36.W, T36.Y, literal.x, 3950; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3951; EG-NEXT: AND_INT T35.X, T36.Z, literal.x, 3952; EG-NEXT: AND_INT T36.Z, T36.Y, literal.x, 3953; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3954; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 3955; EG-NEXT: LSHR T41.X, PV.W, literal.x, 3956; EG-NEXT: LSHR T36.Y, T36.X, literal.y, 3957; EG-NEXT: LSHR T42.W, T40.W, literal.y, 3958; EG-NEXT: AND_INT * T36.X, T36.X, literal.z, 3959; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3960; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3961; EG-NEXT: AND_INT * T42.Z, T40.W, literal.x, 3962; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3963; EG-NEXT: LSHR T43.X, KC0[2].Y, literal.x, 3964; EG-NEXT: LSHR T42.Y, T40.Z, literal.y, 3965; EG-NEXT: LSHR T40.W, T40.Y, literal.y, 3966; EG-NEXT: AND_INT * T42.X, T40.Z, literal.z, 3967; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3968; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3969; EG-NEXT: AND_INT T40.Z, T40.Y, literal.x, 3970; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3971; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 3972; EG-NEXT: LSHR T44.X, PV.W, literal.x, 3973; EG-NEXT: LSHR T40.Y, T40.X, literal.y, 3974; EG-NEXT: LSHR T45.W, T39.W, literal.y, 3975; EG-NEXT: AND_INT * T40.X, T40.X, literal.z, 3976; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3977; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3978; EG-NEXT: AND_INT T45.Z, T39.W, literal.x, 3979; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3980; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 3981; EG-NEXT: LSHR T46.X, PV.W, literal.x, 3982; EG-NEXT: LSHR T45.Y, T39.Z, literal.y, 3983; EG-NEXT: LSHR T39.W, T39.Y, literal.y, 3984; EG-NEXT: AND_INT * T45.X, T39.Z, literal.z, 3985; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3986; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3987; EG-NEXT: AND_INT T39.Z, T39.Y, literal.x, 3988; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3989; EG-NEXT: 65535(9.183409e-41), 80(1.121039e-43) 3990; EG-NEXT: LSHR T47.X, PV.W, literal.x, 3991; EG-NEXT: LSHR T39.Y, T39.X, literal.y, 3992; EG-NEXT: AND_INT * T39.X, T39.X, literal.z, 3993; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3994; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3995; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.x, 3996; EG-NEXT: LSHR * T37.W, T38.W, literal.y, 3997; EG-NEXT: 64(8.968310e-44), 16(2.242078e-44) 3998; EG-NEXT: LSHR T48.X, PV.W, literal.x, 3999; EG-NEXT: AND_INT * T37.Z, T38.W, literal.y, 4000; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41) 4001; EG-NEXT: ALU clause starting at 96: 4002; EG-NEXT: LSHR T37.Y, T38.Z, literal.x, 4003; EG-NEXT: LSHR * T38.W, T38.Y, literal.x, 4004; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4005; EG-NEXT: AND_INT T37.X, T38.Z, literal.x, 4006; EG-NEXT: AND_INT T38.Z, T38.Y, literal.x, 4007; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4008; EG-NEXT: 65535(9.183409e-41), 112(1.569454e-43) 4009; EG-NEXT: LSHR T53.X, PV.W, literal.x, 4010; EG-NEXT: LSHR T38.Y, T38.X, literal.y, 4011; EG-NEXT: LSHR T54.W, T52.W, literal.y, 4012; EG-NEXT: AND_INT * T38.X, T38.X, literal.z, 4013; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4014; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4015; EG-NEXT: AND_INT T54.Z, T52.W, literal.x, 4016; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4017; EG-NEXT: 65535(9.183409e-41), 96(1.345247e-43) 4018; EG-NEXT: LSHR T55.X, PV.W, literal.x, 4019; EG-NEXT: LSHR T54.Y, T52.Z, literal.y, 4020; EG-NEXT: LSHR T52.W, T52.Y, literal.y, 4021; EG-NEXT: AND_INT * T54.X, T52.Z, literal.z, 4022; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4023; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4024; EG-NEXT: AND_INT T52.Z, T52.Y, literal.x, 4025; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4026; EG-NEXT: 65535(9.183409e-41), 144(2.017870e-43) 4027; EG-NEXT: LSHR T56.X, PV.W, literal.x, 4028; EG-NEXT: LSHR T52.Y, T52.X, literal.y, 4029; EG-NEXT: LSHR T57.W, T51.W, literal.y, 4030; EG-NEXT: AND_INT * T52.X, T52.X, literal.z, 4031; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4032; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4033; EG-NEXT: AND_INT T57.Z, T51.W, literal.x, 4034; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4035; EG-NEXT: 65535(9.183409e-41), 128(1.793662e-43) 4036; EG-NEXT: LSHR T58.X, PV.W, literal.x, 4037; EG-NEXT: LSHR T57.Y, T51.Z, literal.y, 4038; EG-NEXT: LSHR T51.W, T51.Y, literal.y, 4039; EG-NEXT: AND_INT * T57.X, T51.Z, literal.z, 4040; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4041; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4042; EG-NEXT: AND_INT T51.Z, T51.Y, literal.x, 4043; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4044; EG-NEXT: 65535(9.183409e-41), 176(2.466285e-43) 4045; EG-NEXT: LSHR T59.X, PV.W, literal.x, 4046; EG-NEXT: LSHR T51.Y, T51.X, literal.y, 4047; EG-NEXT: LSHR T60.W, T50.W, literal.y, 4048; EG-NEXT: AND_INT * T51.X, T51.X, literal.z, 4049; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4050; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4051; EG-NEXT: AND_INT T60.Z, T50.W, literal.x, 4052; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4053; EG-NEXT: 65535(9.183409e-41), 160(2.242078e-43) 4054; EG-NEXT: LSHR T61.X, PV.W, literal.x, 4055; EG-NEXT: LSHR T60.Y, T50.Z, literal.y, 4056; EG-NEXT: LSHR T50.W, T50.Y, literal.y, 4057; EG-NEXT: AND_INT * T60.X, T50.Z, literal.z, 4058; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4059; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4060; EG-NEXT: AND_INT T50.Z, T50.Y, literal.x, 4061; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4062; EG-NEXT: 65535(9.183409e-41), 208(2.914701e-43) 4063; EG-NEXT: LSHR T62.X, PV.W, literal.x, 4064; EG-NEXT: LSHR T50.Y, T50.X, literal.y, 4065; EG-NEXT: LSHR T63.W, T49.W, literal.y, 4066; EG-NEXT: AND_INT * T50.X, T50.X, literal.z, 4067; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4068; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4069; EG-NEXT: AND_INT T63.Z, T49.W, literal.x, 4070; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4071; EG-NEXT: 65535(9.183409e-41), 192(2.690493e-43) 4072; EG-NEXT: LSHR T64.X, PV.W, literal.x, 4073; EG-NEXT: LSHR T63.Y, T49.Z, literal.y, 4074; EG-NEXT: LSHR T49.W, T49.Y, literal.y, 4075; EG-NEXT: AND_INT * T63.X, T49.Z, literal.z, 4076; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4077; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4078; EG-NEXT: AND_INT T49.Z, T49.Y, literal.x, 4079; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4080; EG-NEXT: 65535(9.183409e-41), 240(3.363116e-43) 4081; EG-NEXT: LSHR T65.X, PV.W, literal.x, 4082; EG-NEXT: LSHR T49.Y, T49.X, literal.y, 4083; EG-NEXT: AND_INT * T49.X, T49.X, literal.z, 4084; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4085; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4086; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 4087; EG-NEXT: 224(3.138909e-43), 0(0.000000e+00) 4088; EG-NEXT: LSHR * T66.X, PV.W, literal.x, 4089; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4090; 4091; CM-LABEL: global_zextload_v64i16_to_v64i32: 4092; CM: ; %bb.0: 4093; CM-NEXT: ALU 0, @38, KC0[CB0:0-32], KC1[] 4094; CM-NEXT: TEX 3 @22 4095; CM-NEXT: ALU 50, @39, KC0[CB0:0-32], KC1[] 4096; CM-NEXT: TEX 3 @30 4097; CM-NEXT: ALU 78, @90, KC0[CB0:0-32], KC1[] 4098; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T65, T66.X 4099; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T63, T48.X 4100; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T62, T64.X 4101; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T60, T49.X 4102; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T59, T61.X 4103; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T57, T50.X 4104; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T56, T58.X 4105; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T54, T51.X 4106; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T53, T55.X 4107; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T35, T37.X 4108; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T47, T52.X 4109; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T45, T38.X 4110; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T44, T46.X 4111; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T42, T39.X 4112; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T41, T43.X 4113; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T40, T36.X 4114; CM-NEXT: CF_END 4115; CM-NEXT: Fetch clause starting at 22: 4116; CM-NEXT: VTX_READ_128 T36.XYZW, T35.X, 112, #1 4117; CM-NEXT: VTX_READ_128 T37.XYZW, T35.X, 64, #1 4118; CM-NEXT: VTX_READ_128 T38.XYZW, T35.X, 80, #1 4119; CM-NEXT: VTX_READ_128 T39.XYZW, T35.X, 96, #1 4120; CM-NEXT: Fetch clause starting at 30: 4121; CM-NEXT: VTX_READ_128 T48.XYZW, T35.X, 0, #1 4122; CM-NEXT: VTX_READ_128 T49.XYZW, T35.X, 16, #1 4123; CM-NEXT: VTX_READ_128 T50.XYZW, T35.X, 32, #1 4124; CM-NEXT: VTX_READ_128 T51.XYZW, T35.X, 48, #1 4125; CM-NEXT: ALU clause starting at 38: 4126; CM-NEXT: MOV * T35.X, KC0[2].Z, 4127; CM-NEXT: ALU clause starting at 39: 4128; CM-NEXT: LSHR * T40.W, T36.Y, literal.x, 4129; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4130; CM-NEXT: AND_INT * T40.Z, T36.Y, literal.x, 4131; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4132; CM-NEXT: LSHR T40.Y, T36.X, literal.x, 4133; CM-NEXT: LSHR * T41.W, T36.W, literal.x, 4134; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4135; CM-NEXT: AND_INT T40.X, T36.X, literal.x, 4136; CM-NEXT: AND_INT T41.Z, T36.W, literal.x, 4137; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4138; CM-NEXT: 65535(9.183409e-41), 224(3.138909e-43) 4139; CM-NEXT: LSHR T36.X, PV.W, literal.x, 4140; CM-NEXT: LSHR T41.Y, T36.Z, literal.y, 4141; CM-NEXT: LSHR * T42.W, T39.Y, literal.y, 4142; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4143; CM-NEXT: AND_INT T41.X, T36.Z, literal.x, 4144; CM-NEXT: AND_INT T42.Z, T39.Y, literal.x, 4145; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4146; CM-NEXT: 65535(9.183409e-41), 240(3.363116e-43) 4147; CM-NEXT: LSHR T43.X, PV.W, literal.x, 4148; CM-NEXT: LSHR T42.Y, T39.X, literal.y, 4149; CM-NEXT: LSHR * T44.W, T39.W, literal.y, 4150; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4151; CM-NEXT: AND_INT T42.X, T39.X, literal.x, 4152; CM-NEXT: AND_INT T44.Z, T39.W, literal.x, 4153; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4154; CM-NEXT: 65535(9.183409e-41), 192(2.690493e-43) 4155; CM-NEXT: LSHR T39.X, PV.W, literal.x, 4156; CM-NEXT: LSHR T44.Y, T39.Z, literal.y, 4157; CM-NEXT: LSHR * T45.W, T38.Y, literal.y, 4158; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4159; CM-NEXT: AND_INT T44.X, T39.Z, literal.x, 4160; CM-NEXT: AND_INT T45.Z, T38.Y, literal.x, 4161; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4162; CM-NEXT: 65535(9.183409e-41), 208(2.914701e-43) 4163; CM-NEXT: LSHR T46.X, PV.W, literal.x, 4164; CM-NEXT: LSHR T45.Y, T38.X, literal.y, 4165; CM-NEXT: LSHR * T47.W, T38.W, literal.y, 4166; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4167; CM-NEXT: AND_INT T45.X, T38.X, literal.x, 4168; CM-NEXT: AND_INT T47.Z, T38.W, literal.x, 4169; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4170; CM-NEXT: 65535(9.183409e-41), 160(2.242078e-43) 4171; CM-NEXT: LSHR T38.X, PV.W, literal.x, 4172; CM-NEXT: LSHR T47.Y, T38.Z, literal.y, 4173; CM-NEXT: LSHR * T35.W, T37.Y, literal.y, 4174; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4175; CM-NEXT: AND_INT T47.X, T38.Z, literal.x, 4176; CM-NEXT: AND_INT T35.Z, T37.Y, literal.x, 4177; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4178; CM-NEXT: 65535(9.183409e-41), 176(2.466285e-43) 4179; CM-NEXT: ALU clause starting at 90: 4180; CM-NEXT: LSHR T52.X, T0.W, literal.x, 4181; CM-NEXT: LSHR T35.Y, T37.X, literal.y, 4182; CM-NEXT: LSHR * T53.W, T37.W, literal.y, BS:VEC_120/SCL_212 4183; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4184; CM-NEXT: AND_INT T35.X, T37.X, literal.x, 4185; CM-NEXT: AND_INT T53.Z, T37.W, literal.x, 4186; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4187; CM-NEXT: 65535(9.183409e-41), 128(1.793662e-43) 4188; CM-NEXT: LSHR T37.X, PV.W, literal.x, 4189; CM-NEXT: LSHR T53.Y, T37.Z, literal.y, 4190; CM-NEXT: LSHR * T54.W, T51.Y, literal.y, 4191; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4192; CM-NEXT: AND_INT T53.X, T37.Z, literal.x, 4193; CM-NEXT: AND_INT T54.Z, T51.Y, literal.x, 4194; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4195; CM-NEXT: 65535(9.183409e-41), 144(2.017870e-43) 4196; CM-NEXT: LSHR T55.X, PV.W, literal.x, 4197; CM-NEXT: LSHR T54.Y, T51.X, literal.y, 4198; CM-NEXT: LSHR * T56.W, T51.W, literal.y, 4199; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4200; CM-NEXT: AND_INT T54.X, T51.X, literal.x, 4201; CM-NEXT: AND_INT T56.Z, T51.W, literal.x, 4202; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4203; CM-NEXT: 65535(9.183409e-41), 96(1.345247e-43) 4204; CM-NEXT: LSHR T51.X, PV.W, literal.x, 4205; CM-NEXT: LSHR T56.Y, T51.Z, literal.y, 4206; CM-NEXT: LSHR * T57.W, T50.Y, literal.y, 4207; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4208; CM-NEXT: AND_INT T56.X, T51.Z, literal.x, 4209; CM-NEXT: AND_INT T57.Z, T50.Y, literal.x, 4210; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4211; CM-NEXT: 65535(9.183409e-41), 112(1.569454e-43) 4212; CM-NEXT: LSHR T58.X, PV.W, literal.x, 4213; CM-NEXT: LSHR T57.Y, T50.X, literal.y, 4214; CM-NEXT: LSHR * T59.W, T50.W, literal.y, 4215; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4216; CM-NEXT: AND_INT T57.X, T50.X, literal.x, 4217; CM-NEXT: AND_INT T59.Z, T50.W, literal.x, 4218; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4219; CM-NEXT: 65535(9.183409e-41), 64(8.968310e-44) 4220; CM-NEXT: LSHR T50.X, PV.W, literal.x, 4221; CM-NEXT: LSHR T59.Y, T50.Z, literal.y, 4222; CM-NEXT: LSHR * T60.W, T49.Y, literal.y, 4223; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4224; CM-NEXT: AND_INT T59.X, T50.Z, literal.x, 4225; CM-NEXT: AND_INT T60.Z, T49.Y, literal.x, 4226; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4227; CM-NEXT: 65535(9.183409e-41), 80(1.121039e-43) 4228; CM-NEXT: LSHR T61.X, PV.W, literal.x, 4229; CM-NEXT: LSHR T60.Y, T49.X, literal.y, 4230; CM-NEXT: LSHR * T62.W, T49.W, literal.y, 4231; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4232; CM-NEXT: AND_INT T60.X, T49.X, literal.x, 4233; CM-NEXT: AND_INT T62.Z, T49.W, literal.x, 4234; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4235; CM-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 4236; CM-NEXT: LSHR T49.X, PV.W, literal.x, 4237; CM-NEXT: LSHR T62.Y, T49.Z, literal.y, 4238; CM-NEXT: LSHR * T63.W, T48.Y, literal.y, 4239; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4240; CM-NEXT: AND_INT T62.X, T49.Z, literal.x, 4241; CM-NEXT: AND_INT T63.Z, T48.Y, literal.x, 4242; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4243; CM-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 4244; CM-NEXT: LSHR T64.X, PV.W, literal.x, 4245; CM-NEXT: LSHR T63.Y, T48.X, literal.y, 4246; CM-NEXT: LSHR * T65.W, T48.W, literal.y, 4247; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4248; CM-NEXT: AND_INT T63.X, T48.X, literal.x, 4249; CM-NEXT: AND_INT * T65.Z, T48.W, literal.x, 4250; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4251; CM-NEXT: LSHR T48.X, KC0[2].Y, literal.x, 4252; CM-NEXT: LSHR * T65.Y, T48.Z, literal.y, 4253; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4254; CM-NEXT: AND_INT T65.X, T48.Z, literal.x, 4255; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4256; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 4257; CM-NEXT: LSHR * T66.X, PV.W, literal.x, 4258; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4259 %load = load <64 x i16>, ptr addrspace(1) %in 4260 %ext = zext <64 x i16> %load to <64 x i32> 4261 store <64 x i32> %ext, ptr addrspace(1) %out 4262 ret void 4263} 4264 4265define amdgpu_kernel void @global_sextload_v64i16_to_v64i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 4266; GCN-NOHSA-SI-LABEL: global_sextload_v64i16_to_v64i32: 4267; GCN-NOHSA-SI: ; %bb.0: 4268; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 4269; GCN-NOHSA-SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 4270; GCN-NOHSA-SI-NEXT: s_mov_b32 s14, -1 4271; GCN-NOHSA-SI-NEXT: s_mov_b32 s15, 0xe8f000 4272; GCN-NOHSA-SI-NEXT: s_add_u32 s12, s12, s11 4273; GCN-NOHSA-SI-NEXT: s_addc_u32 s13, s13, 0 4274; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 4275; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 4276; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 4277; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4278; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 4279; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 4280; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s6 4281; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s7 4282; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, s2 4283; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, s3 4284; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:112 4285; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:96 4286; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:80 4287; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:64 4288; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 4289; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[32:35], off, s[4:7], 0 offset:16 4290; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[36:39], off, s[4:7], 0 offset:32 4291; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[40:43], off, s[4:7], 0 offset:48 4292; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(3) 4293; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v11 4294; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v10 4295; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v11, 0, 16 4296; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v10, 0, 16 4297; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill 4298; GCN-NOHSA-SI-NEXT: buffer_store_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill 4299; GCN-NOHSA-SI-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill 4300; GCN-NOHSA-SI-NEXT: buffer_store_dword v3, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill 4301; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v7, 16, v9 4302; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 16, v8 4303; GCN-NOHSA-SI-NEXT: v_bfe_i32 v6, v9, 0, 16 4304; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v8, 0, 16 4305; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(6) 4306; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 16, v35 4307; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 16, v34 4308; GCN-NOHSA-SI-NEXT: v_bfe_i32 v10, v35, 0, 16 4309; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v34, 0, 16 4310; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v15, 16, v33 4311; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v13, 16, v32 4312; GCN-NOHSA-SI-NEXT: v_bfe_i32 v14, v33, 0, 16 4313; GCN-NOHSA-SI-NEXT: v_bfe_i32 v12, v32, 0, 16 4314; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(5) 4315; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v35, 16, v39 4316; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v33, 16, v38 4317; GCN-NOHSA-SI-NEXT: v_bfe_i32 v34, v39, 0, 16 4318; GCN-NOHSA-SI-NEXT: v_bfe_i32 v32, v38, 0, 16 4319; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v47, 16, v37 4320; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v45, 16, v36 4321; GCN-NOHSA-SI-NEXT: v_bfe_i32 v46, v37, 0, 16 4322; GCN-NOHSA-SI-NEXT: v_bfe_i32 v44, v36, 0, 16 4323; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(4) 4324; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v39, 16, v43 4325; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v37, 16, v42 4326; GCN-NOHSA-SI-NEXT: v_bfe_i32 v38, v43, 0, 16 4327; GCN-NOHSA-SI-NEXT: v_bfe_i32 v36, v42, 0, 16 4328; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v51, 16, v41 4329; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v49, 16, v40 4330; GCN-NOHSA-SI-NEXT: v_bfe_i32 v50, v41, 0, 16 4331; GCN-NOHSA-SI-NEXT: v_bfe_i32 v48, v40, 0, 16 4332; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v43, 16, v31 4333; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v41, 16, v30 4334; GCN-NOHSA-SI-NEXT: v_bfe_i32 v42, v31, 0, 16 4335; GCN-NOHSA-SI-NEXT: v_bfe_i32 v40, v30, 0, 16 4336; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v55, 16, v29 4337; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v53, 16, v28 4338; GCN-NOHSA-SI-NEXT: v_bfe_i32 v54, v29, 0, 16 4339; GCN-NOHSA-SI-NEXT: v_bfe_i32 v52, v28, 0, 16 4340; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v30, 16, v27 4341; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v28, 16, v26 4342; GCN-NOHSA-SI-NEXT: v_bfe_i32 v29, v27, 0, 16 4343; GCN-NOHSA-SI-NEXT: v_bfe_i32 v27, v26, 0, 16 4344; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v59, 16, v25 4345; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v57, 16, v24 4346; GCN-NOHSA-SI-NEXT: v_bfe_i32 v58, v25, 0, 16 4347; GCN-NOHSA-SI-NEXT: v_bfe_i32 v56, v24, 0, 16 4348; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v26, 16, v23 4349; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v24, 16, v22 4350; GCN-NOHSA-SI-NEXT: v_bfe_i32 v25, v23, 0, 16 4351; GCN-NOHSA-SI-NEXT: v_bfe_i32 v23, v22, 0, 16 4352; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v63, 16, v21 4353; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v61, 16, v20 4354; GCN-NOHSA-SI-NEXT: v_bfe_i32 v62, v21, 0, 16 4355; GCN-NOHSA-SI-NEXT: v_bfe_i32 v60, v20, 0, 16 4356; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v22, 16, v19 4357; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v20, 16, v18 4358; GCN-NOHSA-SI-NEXT: v_bfe_i32 v21, v19, 0, 16 4359; GCN-NOHSA-SI-NEXT: v_bfe_i32 v19, v18, 0, 16 4360; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 4361; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v17 4362; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v16 4363; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v17, 0, 16 4364; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v16, 0, 16 4365; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 4366; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:240 4367; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[60:63], off, s[0:3], 0 offset:192 4368; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:208 4369; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:160 4370; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:176 4371; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:128 4372; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:144 4373; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:96 4374; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[36:39], off, s[0:3], 0 offset:112 4375; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:64 4376; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:80 4377; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32 4378; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:48 4379; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 4380; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload 4381; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload 4382; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload 4383; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload 4384; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 4385; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 4386; GCN-NOHSA-SI-NEXT: s_endpgm 4387; 4388; GCN-HSA-LABEL: global_sextload_v64i16_to_v64i32: 4389; GCN-HSA: ; %bb.0: 4390; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 4391; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4392; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 4393; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 4394; GCN-HSA-NEXT: flat_load_dwordx4 v[28:31], v[0:1] 4395; GCN-HSA-NEXT: s_add_u32 s4, s2, 0x70 4396; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 4397; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 4398; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 4399; GCN-HSA-NEXT: flat_load_dwordx4 v[20:23], v[0:1] 4400; GCN-HSA-NEXT: s_add_u32 s4, s2, 0x60 4401; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 4402; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 4403; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 4404; GCN-HSA-NEXT: s_add_u32 s4, s2, 0x50 4405; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[0:1] 4406; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 4407; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 4408; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 4409; GCN-HSA-NEXT: s_add_u32 s4, s2, 64 4410; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[0:1] 4411; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 4412; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 4413; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 4414; GCN-HSA-NEXT: s_add_u32 s4, s2, 48 4415; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 4416; GCN-HSA-NEXT: s_add_u32 s6, s2, 32 4417; GCN-HSA-NEXT: s_addc_u32 s7, s3, 0 4418; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 4419; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 4420; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 4421; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 4422; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 4423; GCN-HSA-NEXT: flat_load_dwordx4 v[24:27], v[8:9] 4424; GCN-HSA-NEXT: v_mov_b32_e32 v9, s5 4425; GCN-HSA-NEXT: v_mov_b32_e32 v8, s4 4426; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[8:9] 4427; GCN-HSA-NEXT: v_mov_b32_e32 v17, s7 4428; GCN-HSA-NEXT: v_mov_b32_e32 v16, s6 4429; GCN-HSA-NEXT: flat_load_dwordx4 v[16:19], v[16:17] 4430; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 4431; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4432; GCN-HSA-NEXT: v_mov_b32_e32 v37, s1 4433; GCN-HSA-NEXT: v_mov_b32_e32 v36, s0 4434; GCN-HSA-NEXT: s_waitcnt vmcnt(7) 4435; GCN-HSA-NEXT: v_ashrrev_i32_e32 v35, 16, v29 4436; GCN-HSA-NEXT: v_ashrrev_i32_e32 v33, 16, v28 4437; GCN-HSA-NEXT: v_bfe_i32 v34, v29, 0, 16 4438; GCN-HSA-NEXT: v_bfe_i32 v32, v28, 0, 16 4439; GCN-HSA-NEXT: v_mov_b32_e32 v29, s3 4440; GCN-HSA-NEXT: v_mov_b32_e32 v28, s2 4441; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0 4442; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4443; GCN-HSA-NEXT: flat_store_dwordx4 v[36:37], v[32:35] 4444; GCN-HSA-NEXT: v_mov_b32_e32 v36, s3 4445; GCN-HSA-NEXT: v_mov_b32_e32 v35, s2 4446; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xf0 4447; GCN-HSA-NEXT: v_ashrrev_i32_e32 v34, 16, v31 4448; GCN-HSA-NEXT: v_ashrrev_i32_e32 v32, 16, v30 4449; GCN-HSA-NEXT: v_bfe_i32 v33, v31, 0, 16 4450; GCN-HSA-NEXT: v_bfe_i32 v31, v30, 0, 16 4451; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4452; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[31:34] 4453; GCN-HSA-NEXT: s_waitcnt vmcnt(8) 4454; GCN-HSA-NEXT: v_ashrrev_i32_e32 v29, 16, v20 4455; GCN-HSA-NEXT: v_mov_b32_e32 v33, s3 4456; GCN-HSA-NEXT: v_mov_b32_e32 v32, s2 4457; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0 4458; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4459; GCN-HSA-NEXT: v_ashrrev_i32_e32 v31, 16, v21 4460; GCN-HSA-NEXT: v_bfe_i32 v30, v21, 0, 16 4461; GCN-HSA-NEXT: v_bfe_i32 v28, v20, 0, 16 4462; GCN-HSA-NEXT: flat_store_dwordx4 v[35:36], v[28:31] 4463; GCN-HSA-NEXT: v_mov_b32_e32 v35, s3 4464; GCN-HSA-NEXT: v_mov_b32_e32 v34, s2 4465; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xd0 4466; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4467; GCN-HSA-NEXT: v_mov_b32_e32 v37, s3 4468; GCN-HSA-NEXT: v_mov_b32_e32 v36, s2 4469; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0 4470; GCN-HSA-NEXT: v_ashrrev_i32_e32 v31, 16, v23 4471; GCN-HSA-NEXT: v_ashrrev_i32_e32 v29, 16, v22 4472; GCN-HSA-NEXT: v_bfe_i32 v30, v23, 0, 16 4473; GCN-HSA-NEXT: v_bfe_i32 v28, v22, 0, 16 4474; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4475; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[28:31] 4476; GCN-HSA-NEXT: v_mov_b32_e32 v33, s3 4477; GCN-HSA-NEXT: v_mov_b32_e32 v32, s2 4478; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xb0 4479; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4480; GCN-HSA-NEXT: v_mov_b32_e32 v39, s3 4481; GCN-HSA-NEXT: s_waitcnt vmcnt(9) 4482; GCN-HSA-NEXT: v_ashrrev_i32_e32 v23, 16, v13 4483; GCN-HSA-NEXT: v_ashrrev_i32_e32 v21, 16, v12 4484; GCN-HSA-NEXT: v_bfe_i32 v22, v13, 0, 16 4485; GCN-HSA-NEXT: v_bfe_i32 v20, v12, 0, 16 4486; GCN-HSA-NEXT: v_mov_b32_e32 v38, s2 4487; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80 4488; GCN-HSA-NEXT: v_ashrrev_i32_e32 v31, 16, v15 4489; GCN-HSA-NEXT: v_ashrrev_i32_e32 v29, 16, v14 4490; GCN-HSA-NEXT: v_bfe_i32 v30, v15, 0, 16 4491; GCN-HSA-NEXT: v_bfe_i32 v28, v14, 0, 16 4492; GCN-HSA-NEXT: flat_store_dwordx4 v[34:35], v[20:23] 4493; GCN-HSA-NEXT: flat_store_dwordx4 v[36:37], v[28:31] 4494; GCN-HSA-NEXT: s_waitcnt vmcnt(10) 4495; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 16, v5 4496; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 16, v4 4497; GCN-HSA-NEXT: v_bfe_i32 v14, v5, 0, 16 4498; GCN-HSA-NEXT: v_bfe_i32 v12, v4, 0, 16 4499; GCN-HSA-NEXT: v_ashrrev_i32_e32 v23, 16, v7 4500; GCN-HSA-NEXT: v_ashrrev_i32_e32 v21, 16, v6 4501; GCN-HSA-NEXT: v_bfe_i32 v22, v7, 0, 16 4502; GCN-HSA-NEXT: v_bfe_i32 v20, v6, 0, 16 4503; GCN-HSA-NEXT: s_waitcnt vmcnt(9) 4504; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 16, v1 4505; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 16, v0 4506; GCN-HSA-NEXT: v_bfe_i32 v6, v1, 0, 16 4507; GCN-HSA-NEXT: v_bfe_i32 v4, v0, 0, 16 4508; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4509; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 4510; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 4511; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90 4512; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[12:15] 4513; GCN-HSA-NEXT: flat_store_dwordx4 v[38:39], v[20:23] 4514; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 4515; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4516; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 4517; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 4518; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 4519; GCN-HSA-NEXT: v_ashrrev_i32_e32 v23, 16, v3 4520; GCN-HSA-NEXT: v_ashrrev_i32_e32 v21, 16, v2 4521; GCN-HSA-NEXT: v_bfe_i32 v22, v3, 0, 16 4522; GCN-HSA-NEXT: v_bfe_i32 v20, v2, 0, 16 4523; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4524; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[20:23] 4525; GCN-HSA-NEXT: s_waitcnt vmcnt(11) 4526; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v9 4527; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v8 4528; GCN-HSA-NEXT: v_bfe_i32 v2, v9, 0, 16 4529; GCN-HSA-NEXT: v_bfe_i32 v0, v8, 0, 16 4530; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 4531; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 4532; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 4533; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4534; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 4535; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 4536; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 4537; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 4538; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 16, v24 4539; GCN-HSA-NEXT: v_bfe_i32 v12, v24, 0, 16 4540; GCN-HSA-NEXT: v_ashrrev_i32_e32 v24, 16, v11 4541; GCN-HSA-NEXT: v_ashrrev_i32_e32 v22, 16, v10 4542; GCN-HSA-NEXT: v_bfe_i32 v23, v11, 0, 16 4543; GCN-HSA-NEXT: v_bfe_i32 v21, v10, 0, 16 4544; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4545; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[21:24] 4546; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 4547; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 4548; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 4549; GCN-HSA-NEXT: s_waitcnt vmcnt(12) 4550; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v19 4551; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v18 4552; GCN-HSA-NEXT: v_bfe_i32 v2, v19, 0, 16 4553; GCN-HSA-NEXT: v_bfe_i32 v0, v18, 0, 16 4554; GCN-HSA-NEXT: v_ashrrev_i32_e32 v20, 16, v17 4555; GCN-HSA-NEXT: v_ashrrev_i32_e32 v18, 16, v16 4556; GCN-HSA-NEXT: v_bfe_i32 v19, v17, 0, 16 4557; GCN-HSA-NEXT: v_bfe_i32 v17, v16, 0, 16 4558; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4559; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[17:20] 4560; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 4561; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 4562; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 4563; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 4564; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4565; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 4566; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 16, v25 4567; GCN-HSA-NEXT: v_bfe_i32 v14, v25, 0, 16 4568; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 4569; GCN-HSA-NEXT: s_add_u32 s0, s0, 48 4570; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[12:15] 4571; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 4572; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 4573; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 16, v27 4574; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 16, v26 4575; GCN-HSA-NEXT: v_bfe_i32 v6, v27, 0, 16 4576; GCN-HSA-NEXT: v_bfe_i32 v4, v26, 0, 16 4577; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 4578; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 4579; GCN-HSA-NEXT: s_endpgm 4580; 4581; GCN-NOHSA-VI-LABEL: global_sextload_v64i16_to_v64i32: 4582; GCN-NOHSA-VI: ; %bb.0: 4583; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 4584; GCN-NOHSA-VI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 4585; GCN-NOHSA-VI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1 4586; GCN-NOHSA-VI-NEXT: s_mov_b32 s90, -1 4587; GCN-NOHSA-VI-NEXT: s_mov_b32 s91, 0xe80000 4588; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 4589; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 4590; GCN-NOHSA-VI-NEXT: s_add_u32 s88, s88, s11 4591; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 4592; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4593; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 4594; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 4595; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 4596; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 4597; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:16 4598; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:32 4599; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:48 4600; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[23:26], off, s[8:11], 0 offset:64 4601; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[27:30], off, s[8:11], 0 offset:80 4602; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[31:34], off, s[8:11], 0 offset:96 4603; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[35:38], off, s[8:11], 0 offset:112 4604; GCN-NOHSA-VI-NEXT: s_addc_u32 s89, s89, 0 4605; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 4606; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 4607; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(7) 4608; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v18, 16, v15 4609; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 16, v14 4610; GCN-NOHSA-VI-NEXT: v_bfe_i32 v17, v15, 0, 16 4611; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v14, 0, 16 4612; GCN-NOHSA-VI-NEXT: buffer_store_dword v15, off, s[88:91], 0 ; 4-byte Folded Spill 4613; GCN-NOHSA-VI-NEXT: buffer_store_dword v16, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill 4614; GCN-NOHSA-VI-NEXT: buffer_store_dword v17, off, s[88:91], 0 offset:8 ; 4-byte Folded Spill 4615; GCN-NOHSA-VI-NEXT: buffer_store_dword v18, off, s[88:91], 0 offset:12 ; 4-byte Folded Spill 4616; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 16, v13 4617; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 16, v12 4618; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v13, 0, 16 4619; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v12, 0, 16 4620; GCN-NOHSA-VI-NEXT: buffer_store_dword v13, off, s[88:91], 0 offset:16 ; 4-byte Folded Spill 4621; GCN-NOHSA-VI-NEXT: buffer_store_dword v14, off, s[88:91], 0 offset:20 ; 4-byte Folded Spill 4622; GCN-NOHSA-VI-NEXT: buffer_store_dword v15, off, s[88:91], 0 offset:24 ; 4-byte Folded Spill 4623; GCN-NOHSA-VI-NEXT: buffer_store_dword v16, off, s[88:91], 0 offset:28 ; 4-byte Folded Spill 4624; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(14) 4625; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 16, v11 4626; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 16, v10 4627; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v11, 0, 16 4628; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v10, 0, 16 4629; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v42, 16, v9 4630; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v40, 16, v8 4631; GCN-NOHSA-VI-NEXT: v_bfe_i32 v41, v9, 0, 16 4632; GCN-NOHSA-VI-NEXT: v_bfe_i32 v39, v8, 0, 16 4633; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(12) 4634; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 16, v3 4635; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 16, v2 4636; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v3, 0, 16 4637; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v2, 0, 16 4638; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v50, 16, v1 4639; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v48, 16, v0 4640; GCN-NOHSA-VI-NEXT: v_bfe_i32 v49, v1, 0, 16 4641; GCN-NOHSA-VI-NEXT: v_bfe_i32 v47, v0, 0, 16 4642; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(8) 4643; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 16, v36 4644; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v35 4645; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v36, 0, 16 4646; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v35, 0, 16 4647; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 16, v7 4648; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v13, 16, v6 4649; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v7, 0, 16 4650; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v6, 0, 16 4651; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v46, 16, v5 4652; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v44, 16, v4 4653; GCN-NOHSA-VI-NEXT: v_bfe_i32 v45, v5, 0, 16 4654; GCN-NOHSA-VI-NEXT: v_bfe_i32 v43, v4, 0, 16 4655; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v54, 16, v26 4656; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v52, 16, v25 4657; GCN-NOHSA-VI-NEXT: v_bfe_i32 v53, v26, 0, 16 4658; GCN-NOHSA-VI-NEXT: v_bfe_i32 v51, v25, 0, 16 4659; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v58, 16, v24 4660; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v56, 16, v23 4661; GCN-NOHSA-VI-NEXT: v_bfe_i32 v57, v24, 0, 16 4662; GCN-NOHSA-VI-NEXT: v_bfe_i32 v55, v23, 0, 16 4663; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v26, 16, v30 4664; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v24, 16, v29 4665; GCN-NOHSA-VI-NEXT: v_bfe_i32 v25, v30, 0, 16 4666; GCN-NOHSA-VI-NEXT: v_bfe_i32 v23, v29, 0, 16 4667; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v62, 16, v28 4668; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v60, 16, v27 4669; GCN-NOHSA-VI-NEXT: v_bfe_i32 v61, v28, 0, 16 4670; GCN-NOHSA-VI-NEXT: v_bfe_i32 v59, v27, 0, 16 4671; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v30, 16, v34 4672; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v28, 16, v33 4673; GCN-NOHSA-VI-NEXT: v_bfe_i32 v29, v34, 0, 16 4674; GCN-NOHSA-VI-NEXT: v_bfe_i32 v27, v33, 0, 16 4675; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 16, v32 4676; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 16, v31 4677; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v32, 0, 16 4678; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v31, 0, 16 4679; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v34, 16, v38 4680; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v32, 16, v37 4681; GCN-NOHSA-VI-NEXT: v_bfe_i32 v33, v38, 0, 16 4682; GCN-NOHSA-VI-NEXT: v_bfe_i32 v31, v37, 0, 16 4683; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 4684; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[31:34], off, s[0:3], 0 offset:240 4685; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192 4686; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:208 4687; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[59:62], off, s[0:3], 0 offset:160 4688; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:176 4689; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[55:58], off, s[0:3], 0 offset:128 4690; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[51:54], off, s[0:3], 0 offset:144 4691; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[47:50], off, s[0:3], 0 offset:96 4692; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112 4693; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[43:46], off, s[0:3], 0 offset:64 4694; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:80 4695; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[39:42], off, s[0:3], 0 offset:32 4696; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48 4697; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[88:91], 0 offset:16 ; 4-byte Folded Reload 4698; GCN-NOHSA-VI-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:20 ; 4-byte Folded Reload 4699; GCN-NOHSA-VI-NEXT: buffer_load_dword v2, off, s[88:91], 0 offset:24 ; 4-byte Folded Reload 4700; GCN-NOHSA-VI-NEXT: buffer_load_dword v3, off, s[88:91], 0 offset:28 ; 4-byte Folded Reload 4701; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 4702; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 4703; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[88:91], 0 ; 4-byte Folded Reload 4704; GCN-NOHSA-VI-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:4 ; 4-byte Folded Reload 4705; GCN-NOHSA-VI-NEXT: buffer_load_dword v2, off, s[88:91], 0 offset:8 ; 4-byte Folded Reload 4706; GCN-NOHSA-VI-NEXT: buffer_load_dword v3, off, s[88:91], 0 offset:12 ; 4-byte Folded Reload 4707; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 4708; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 4709; GCN-NOHSA-VI-NEXT: s_endpgm 4710; 4711; EG-LABEL: global_sextload_v64i16_to_v64i32: 4712; EG: ; %bb.0: 4713; EG-NEXT: ALU 18, @38, KC0[CB0:0-32], KC1[] 4714; EG-NEXT: TEX 7 @22 4715; EG-NEXT: ALU 75, @57, KC0[CB0:0-32], KC1[] 4716; EG-NEXT: ALU 71, @133, KC0[CB0:0-32], KC1[] 4717; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T41.X, 0 4718; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T66.X, 0 4719; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T56.X, 0 4720; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T64.XYZW, T55.X, 0 4721; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T54.X, 0 4722; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T53.X, 0 4723; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T52.X, 0 4724; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T51.X, 0 4725; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T50.X, 0 4726; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T61.XYZW, T49.X, 0 4727; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T40.X, 0 4728; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T39.X, 0 4729; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T38.X, 0 4730; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T37.X, 0 4731; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T58.XYZW, T36.X, 0 4732; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T35.X, 1 4733; EG-NEXT: CF_END 4734; EG-NEXT: PAD 4735; EG-NEXT: Fetch clause starting at 22: 4736; EG-NEXT: VTX_READ_128 T42.XYZW, T41.X, 16, #1 4737; EG-NEXT: VTX_READ_128 T43.XYZW, T41.X, 32, #1 4738; EG-NEXT: VTX_READ_128 T44.XYZW, T41.X, 0, #1 4739; EG-NEXT: VTX_READ_128 T45.XYZW, T41.X, 48, #1 4740; EG-NEXT: VTX_READ_128 T46.XYZW, T41.X, 64, #1 4741; EG-NEXT: VTX_READ_128 T47.XYZW, T41.X, 80, #1 4742; EG-NEXT: VTX_READ_128 T48.XYZW, T41.X, 96, #1 4743; EG-NEXT: VTX_READ_128 T41.XYZW, T41.X, 112, #1 4744; EG-NEXT: ALU clause starting at 38: 4745; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 4746; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4747; EG-NEXT: LSHR T35.X, PV.W, literal.x, 4748; EG-NEXT: LSHR * T36.X, KC0[2].Y, literal.x, 4749; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4750; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 4751; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 4752; EG-NEXT: LSHR T37.X, PV.W, literal.x, 4753; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4754; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 4755; EG-NEXT: LSHR T38.X, PV.W, literal.x, 4756; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4757; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 4758; EG-NEXT: LSHR T39.X, PV.W, literal.x, 4759; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4760; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 4761; EG-NEXT: LSHR T40.X, PV.W, literal.x, 4762; EG-NEXT: MOV * T41.X, KC0[2].Z, 4763; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4764; EG-NEXT: ALU clause starting at 57: 4765; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 4766; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 4767; EG-NEXT: LSHR T49.X, PV.W, literal.x, 4768; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4769; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 4770; EG-NEXT: LSHR T50.X, PV.W, literal.x, 4771; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4772; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 4773; EG-NEXT: LSHR T51.X, PV.W, literal.x, 4774; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4775; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 4776; EG-NEXT: LSHR T52.X, PV.W, literal.x, 4777; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4778; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 4779; EG-NEXT: LSHR T53.X, PV.W, literal.x, 4780; EG-NEXT: LSHR T0.Y, T41.Y, literal.y, 4781; EG-NEXT: LSHR T0.Z, T41.W, literal.y, 4782; EG-NEXT: LSHR T0.W, T48.Y, literal.y, BS:VEC_120/SCL_212 4783; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 4784; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4785; EG-NEXT: 160(2.242078e-43), 0(0.000000e+00) 4786; EG-NEXT: LSHR T54.X, PS, literal.x, 4787; EG-NEXT: LSHR T1.Y, T48.W, literal.y, 4788; EG-NEXT: LSHR T1.Z, T47.Y, literal.y, 4789; EG-NEXT: LSHR T1.W, T47.W, literal.y, BS:VEC_120/SCL_212 4790; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.z, 4791; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4792; EG-NEXT: 208(2.914701e-43), 0(0.000000e+00) 4793; EG-NEXT: LSHR T55.X, PS, literal.x, 4794; EG-NEXT: LSHR T2.Y, T46.Y, literal.y, 4795; EG-NEXT: LSHR T2.Z, T46.W, literal.y, 4796; EG-NEXT: LSHR T2.W, T45.Y, literal.y, BS:VEC_120/SCL_212 4797; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.z, 4798; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4799; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00) 4800; EG-NEXT: LSHR T56.X, PS, literal.x, 4801; EG-NEXT: LSHR T3.Y, T45.W, literal.y, 4802; EG-NEXT: BFE_INT T57.Z, T44.W, 0.0, literal.y, BS:VEC_120/SCL_212 4803; EG-NEXT: LSHR T3.W, T43.Y, literal.y, 4804; EG-NEXT: LSHR * T4.W, T43.W, literal.y, 4805; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4806; EG-NEXT: BFE_INT T57.X, T44.Z, 0.0, literal.x, 4807; EG-NEXT: LSHR T4.Y, T42.Y, literal.x, 4808; EG-NEXT: BFE_INT T58.Z, T44.Y, 0.0, literal.x, BS:VEC_120/SCL_212 4809; EG-NEXT: LSHR T5.W, T42.W, literal.x, 4810; EG-NEXT: LSHR * T6.W, T44.W, literal.x, 4811; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4812; EG-NEXT: BFE_INT T58.X, T44.X, 0.0, literal.x, 4813; EG-NEXT: LSHR T5.Y, T44.Y, literal.x, 4814; EG-NEXT: BFE_INT T59.Z, T42.W, 0.0, literal.x, 4815; EG-NEXT: BFE_INT T57.W, PS, 0.0, literal.x, 4816; EG-NEXT: LSHR * T6.W, T44.Z, literal.x, 4817; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4818; EG-NEXT: BFE_INT T59.X, T42.Z, 0.0, literal.x, 4819; EG-NEXT: BFE_INT T57.Y, PS, 0.0, literal.x, 4820; EG-NEXT: BFE_INT T44.Z, T42.Y, 0.0, literal.x, 4821; EG-NEXT: BFE_INT T58.W, PV.Y, 0.0, literal.x, 4822; EG-NEXT: LSHR * T6.W, T44.X, literal.x, 4823; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4824; EG-NEXT: BFE_INT T44.X, T42.X, 0.0, literal.x, 4825; EG-NEXT: BFE_INT T58.Y, PS, 0.0, literal.x, 4826; EG-NEXT: BFE_INT T60.Z, T43.W, 0.0, literal.x, 4827; EG-NEXT: BFE_INT T59.W, T5.W, 0.0, literal.x, BS:VEC_120/SCL_212 4828; EG-NEXT: LSHR * T5.W, T42.Z, literal.x, 4829; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4830; EG-NEXT: BFE_INT T60.X, T43.Z, 0.0, literal.x, 4831; EG-NEXT: BFE_INT T59.Y, PS, 0.0, literal.x, 4832; EG-NEXT: BFE_INT T42.Z, T43.Y, 0.0, literal.x, 4833; EG-NEXT: BFE_INT T44.W, T4.Y, 0.0, literal.x, BS:VEC_120/SCL_212 4834; EG-NEXT: LSHR * T5.W, T42.X, literal.x, 4835; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4836; EG-NEXT: BFE_INT T42.X, T43.X, 0.0, literal.x, 4837; EG-NEXT: BFE_INT T44.Y, PS, 0.0, literal.x, 4838; EG-NEXT: BFE_INT T61.Z, T45.W, 0.0, literal.x, 4839; EG-NEXT: BFE_INT * T60.W, T4.W, 0.0, literal.x, BS:VEC_120/SCL_212 4840; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4841; EG-NEXT: ALU clause starting at 133: 4842; EG-NEXT: LSHR * T4.W, T43.Z, literal.x, 4843; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4844; EG-NEXT: BFE_INT T61.X, T45.Z, 0.0, literal.x, 4845; EG-NEXT: BFE_INT T60.Y, PV.W, 0.0, literal.x, 4846; EG-NEXT: BFE_INT T43.Z, T45.Y, 0.0, literal.x, 4847; EG-NEXT: BFE_INT T42.W, T3.W, 0.0, literal.x, 4848; EG-NEXT: LSHR * T3.W, T43.X, literal.x, 4849; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4850; EG-NEXT: BFE_INT T43.X, T45.X, 0.0, literal.x, 4851; EG-NEXT: BFE_INT T42.Y, PS, 0.0, literal.x, 4852; EG-NEXT: BFE_INT T62.Z, T46.W, 0.0, literal.x, 4853; EG-NEXT: BFE_INT T61.W, T3.Y, 0.0, literal.x, 4854; EG-NEXT: LSHR * T3.W, T45.Z, literal.x, 4855; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4856; EG-NEXT: BFE_INT T62.X, T46.Z, 0.0, literal.x, 4857; EG-NEXT: BFE_INT T61.Y, PS, 0.0, literal.x, 4858; EG-NEXT: BFE_INT T45.Z, T46.Y, 0.0, literal.x, 4859; EG-NEXT: BFE_INT T43.W, T2.W, 0.0, literal.x, 4860; EG-NEXT: LSHR * T2.W, T45.X, literal.x, 4861; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4862; EG-NEXT: BFE_INT T45.X, T46.X, 0.0, literal.x, 4863; EG-NEXT: BFE_INT T43.Y, PS, 0.0, literal.x, 4864; EG-NEXT: BFE_INT T63.Z, T47.W, 0.0, literal.x, 4865; EG-NEXT: BFE_INT T62.W, T2.Z, 0.0, literal.x, 4866; EG-NEXT: LSHR * T2.W, T46.Z, literal.x, 4867; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4868; EG-NEXT: BFE_INT T63.X, T47.Z, 0.0, literal.x, 4869; EG-NEXT: BFE_INT T62.Y, PS, 0.0, literal.x, 4870; EG-NEXT: BFE_INT T46.Z, T47.Y, 0.0, literal.x, 4871; EG-NEXT: BFE_INT T45.W, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212 4872; EG-NEXT: LSHR * T2.W, T46.X, literal.x, 4873; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4874; EG-NEXT: BFE_INT T46.X, T47.X, 0.0, literal.x, 4875; EG-NEXT: BFE_INT T45.Y, PS, 0.0, literal.x, 4876; EG-NEXT: BFE_INT T64.Z, T48.W, 0.0, literal.x, 4877; EG-NEXT: BFE_INT T63.W, T1.W, 0.0, literal.x, BS:VEC_120/SCL_212 4878; EG-NEXT: LSHR * T1.W, T47.Z, literal.x, 4879; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4880; EG-NEXT: BFE_INT T64.X, T48.Z, 0.0, literal.x, 4881; EG-NEXT: BFE_INT T63.Y, PS, 0.0, literal.x, 4882; EG-NEXT: BFE_INT T47.Z, T48.Y, 0.0, literal.x, 4883; EG-NEXT: BFE_INT T46.W, T1.Z, 0.0, literal.x, BS:VEC_120/SCL_212 4884; EG-NEXT: LSHR * T1.W, T47.X, literal.x, 4885; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4886; EG-NEXT: BFE_INT T47.X, T48.X, 0.0, literal.x, 4887; EG-NEXT: BFE_INT T46.Y, PS, 0.0, literal.x, 4888; EG-NEXT: BFE_INT T65.Z, T41.W, 0.0, literal.x, 4889; EG-NEXT: BFE_INT T64.W, T1.Y, 0.0, literal.x, 4890; EG-NEXT: LSHR * T1.W, T48.Z, literal.x, 4891; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4892; EG-NEXT: BFE_INT T65.X, T41.Z, 0.0, literal.x, 4893; EG-NEXT: BFE_INT T64.Y, PS, 0.0, literal.x, 4894; EG-NEXT: BFE_INT T48.Z, T41.Y, 0.0, literal.x, 4895; EG-NEXT: BFE_INT T47.W, T0.W, 0.0, literal.x, 4896; EG-NEXT: LSHR * T0.W, T48.X, literal.x, 4897; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4898; EG-NEXT: BFE_INT T48.X, T41.X, 0.0, literal.x, 4899; EG-NEXT: BFE_INT T47.Y, PS, 0.0, literal.x, 4900; EG-NEXT: LSHR T1.Z, T41.Z, literal.x, 4901; EG-NEXT: BFE_INT T65.W, T0.Z, 0.0, literal.x, BS:VEC_120/SCL_212 4902; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4903; EG-NEXT: 16(2.242078e-44), 240(3.363116e-43) 4904; EG-NEXT: LSHR T66.X, PS, literal.x, 4905; EG-NEXT: BFE_INT T65.Y, PV.Z, 0.0, literal.y, 4906; EG-NEXT: LSHR T0.Z, T41.X, literal.y, 4907; EG-NEXT: BFE_INT T48.W, T0.Y, 0.0, literal.y, 4908; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 4909; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4910; EG-NEXT: 224(3.138909e-43), 0(0.000000e+00) 4911; EG-NEXT: LSHR T41.X, PS, literal.x, 4912; EG-NEXT: BFE_INT * T48.Y, PV.Z, 0.0, literal.y, 4913; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4914; 4915; CM-LABEL: global_sextload_v64i16_to_v64i32: 4916; CM: ; %bb.0: 4917; CM-NEXT: ALU 0, @40, KC0[CB0:0-32], KC1[] 4918; CM-NEXT: TEX 1 @24 4919; CM-NEXT: ALU 15, @41, KC0[CB0:0-32], KC1[] 4920; CM-NEXT: TEX 5 @28 4921; CM-NEXT: ALU 82, @57, KC0[CB0:0-32], KC1[] 4922; CM-NEXT: ALU 72, @140, KC0[CB0:0-32], KC1[] 4923; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T65, T66.X 4924; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T36, T35.X 4925; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T64, T56.X 4926; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T37, T55.X 4927; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T63, T54.X 4928; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T45, T53.X 4929; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T62, T52.X 4930; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T44, T51.X 4931; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T61, T50.X 4932; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T43, T49.X 4933; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T60, T48.X 4934; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T42, T47.X 4935; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T59, T46.X 4936; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T41, T40.X 4937; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T58, T39.X 4938; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T57, T38.X 4939; CM-NEXT: CF_END 4940; CM-NEXT: PAD 4941; CM-NEXT: Fetch clause starting at 24: 4942; CM-NEXT: VTX_READ_128 T36.XYZW, T37.X, 16, #1 4943; CM-NEXT: VTX_READ_128 T35.XYZW, T37.X, 0, #1 4944; CM-NEXT: Fetch clause starting at 28: 4945; CM-NEXT: VTX_READ_128 T41.XYZW, T37.X, 112, #1 4946; CM-NEXT: VTX_READ_128 T42.XYZW, T37.X, 96, #1 4947; CM-NEXT: VTX_READ_128 T43.XYZW, T37.X, 80, #1 4948; CM-NEXT: VTX_READ_128 T44.XYZW, T37.X, 64, #1 4949; CM-NEXT: VTX_READ_128 T45.XYZW, T37.X, 48, #1 4950; CM-NEXT: VTX_READ_128 T37.XYZW, T37.X, 32, #1 4951; CM-NEXT: ALU clause starting at 40: 4952; CM-NEXT: MOV * T37.X, KC0[2].Z, 4953; CM-NEXT: ALU clause starting at 41: 4954; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 4955; CM-NEXT: 224(3.138909e-43), 0(0.000000e+00) 4956; CM-NEXT: LSHR T38.X, PV.W, literal.x, 4957; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4958; CM-NEXT: 2(2.802597e-45), 240(3.363116e-43) 4959; CM-NEXT: LSHR T39.X, PV.W, literal.x, 4960; CM-NEXT: LSHR T0.Y, T35.Z, literal.y, 4961; CM-NEXT: LSHR T0.Z, T35.W, literal.y, 4962; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 4963; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4964; CM-NEXT: 192(2.690493e-43), 0(0.000000e+00) 4965; CM-NEXT: LSHR T40.X, PV.W, literal.x, 4966; CM-NEXT: LSHR T1.Y, T35.Y, literal.y, 4967; CM-NEXT: LSHR T1.Z, T36.Z, literal.y, 4968; CM-NEXT: LSHR * T0.W, T36.W, literal.y, 4969; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4970; CM-NEXT: ALU clause starting at 57: 4971; CM-NEXT: LSHR T2.Z, T36.X, literal.x, 4972; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 4973; CM-NEXT: 16(2.242078e-44), 208(2.914701e-43) 4974; CM-NEXT: LSHR T46.X, PV.W, literal.x, 4975; CM-NEXT: LSHR T2.Y, T36.Y, literal.y, 4976; CM-NEXT: LSHR T3.Z, T37.Z, literal.y, 4977; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 4978; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4979; CM-NEXT: 160(2.242078e-43), 0(0.000000e+00) 4980; CM-NEXT: LSHR T47.X, PV.W, literal.x, 4981; CM-NEXT: LSHR T3.Y, T37.W, literal.y, 4982; CM-NEXT: LSHR T4.Z, T37.X, literal.y, 4983; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 4984; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4985; CM-NEXT: 176(2.466285e-43), 0(0.000000e+00) 4986; CM-NEXT: LSHR T48.X, PV.W, literal.x, 4987; CM-NEXT: LSHR T4.Y, T37.Y, literal.y, 4988; CM-NEXT: LSHR T5.Z, T45.Z, literal.y, 4989; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 4990; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4991; CM-NEXT: 128(1.793662e-43), 0(0.000000e+00) 4992; CM-NEXT: LSHR T49.X, PV.W, literal.x, 4993; CM-NEXT: LSHR T5.Y, T45.W, literal.y, 4994; CM-NEXT: LSHR T6.Z, T45.X, literal.y, 4995; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 4996; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4997; CM-NEXT: 144(2.017870e-43), 0(0.000000e+00) 4998; CM-NEXT: LSHR T50.X, PV.W, literal.x, 4999; CM-NEXT: LSHR T6.Y, T45.Y, literal.y, 5000; CM-NEXT: LSHR T7.Z, T44.Z, literal.y, 5001; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5002; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5003; CM-NEXT: 96(1.345247e-43), 0(0.000000e+00) 5004; CM-NEXT: LSHR T51.X, PV.W, literal.x, 5005; CM-NEXT: LSHR T7.Y, T44.W, literal.y, 5006; CM-NEXT: LSHR T8.Z, T44.X, literal.y, 5007; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5008; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5009; CM-NEXT: 112(1.569454e-43), 0(0.000000e+00) 5010; CM-NEXT: LSHR T52.X, PV.W, literal.x, 5011; CM-NEXT: LSHR T8.Y, T44.Y, literal.y, 5012; CM-NEXT: LSHR T9.Z, T43.Z, literal.y, 5013; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5014; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5015; CM-NEXT: 64(8.968310e-44), 0(0.000000e+00) 5016; CM-NEXT: LSHR T53.X, PV.W, literal.x, 5017; CM-NEXT: LSHR T9.Y, T43.W, literal.y, 5018; CM-NEXT: LSHR T10.Z, T43.X, literal.y, 5019; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5020; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5021; CM-NEXT: 80(1.121039e-43), 0(0.000000e+00) 5022; CM-NEXT: LSHR T54.X, PV.W, literal.x, 5023; CM-NEXT: LSHR T10.Y, T43.Y, literal.y, 5024; CM-NEXT: LSHR T11.Z, T42.Z, literal.y, 5025; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5026; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5027; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00) 5028; CM-NEXT: LSHR T55.X, PV.W, literal.x, 5029; CM-NEXT: LSHR T11.Y, T42.W, literal.y, 5030; CM-NEXT: LSHR T12.Z, T42.X, literal.y, 5031; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5032; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5033; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00) 5034; CM-NEXT: LSHR T56.X, PV.W, literal.x, 5035; CM-NEXT: LSHR T12.Y, T42.Y, literal.y, 5036; CM-NEXT: BFE_INT T57.Z, T41.Y, 0.0, literal.y, BS:VEC_120/SCL_212 5037; CM-NEXT: LSHR * T1.W, T41.Z, literal.y, 5038; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5039; CM-NEXT: BFE_INT T57.X, T41.X, 0.0, literal.x, 5040; CM-NEXT: LSHR T13.Y, T41.W, literal.x, 5041; CM-NEXT: BFE_INT T58.Z, T41.W, 0.0, literal.x, 5042; CM-NEXT: LSHR * T2.W, T41.Y, literal.x, 5043; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5044; CM-NEXT: BFE_INT T58.X, T41.Z, 0.0, literal.x, 5045; CM-NEXT: LSHR T14.Y, T41.X, literal.x, 5046; CM-NEXT: BFE_INT T41.Z, T42.Y, 0.0, literal.x, 5047; CM-NEXT: BFE_INT * T57.W, PV.W, 0.0, literal.x, 5048; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5049; CM-NEXT: BFE_INT T41.X, T42.X, 0.0, literal.x, 5050; CM-NEXT: BFE_INT T57.Y, PV.Y, 0.0, literal.x, 5051; CM-NEXT: BFE_INT T59.Z, T42.W, 0.0, literal.x, 5052; CM-NEXT: BFE_INT * T58.W, T13.Y, 0.0, literal.x, 5053; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5054; CM-NEXT: ALU clause starting at 140: 5055; CM-NEXT: BFE_INT T59.X, T42.Z, 0.0, literal.x, 5056; CM-NEXT: BFE_INT T58.Y, T1.W, 0.0, literal.x, 5057; CM-NEXT: BFE_INT T42.Z, T43.Y, 0.0, literal.x, 5058; CM-NEXT: BFE_INT * T41.W, T12.Y, 0.0, literal.x, BS:VEC_120/SCL_212 5059; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5060; CM-NEXT: BFE_INT T42.X, T43.X, 0.0, literal.x, 5061; CM-NEXT: BFE_INT T41.Y, T12.Z, 0.0, literal.x, 5062; CM-NEXT: BFE_INT T60.Z, T43.W, 0.0, literal.x, 5063; CM-NEXT: BFE_INT * T59.W, T11.Y, 0.0, literal.x, 5064; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5065; CM-NEXT: BFE_INT T60.X, T43.Z, 0.0, literal.x, 5066; CM-NEXT: BFE_INT T59.Y, T11.Z, 0.0, literal.x, BS:VEC_120/SCL_212 5067; CM-NEXT: BFE_INT T43.Z, T44.Y, 0.0, literal.x, 5068; CM-NEXT: BFE_INT * T42.W, T10.Y, 0.0, literal.x, BS:VEC_120/SCL_212 5069; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5070; CM-NEXT: BFE_INT T43.X, T44.X, 0.0, literal.x, 5071; CM-NEXT: BFE_INT T42.Y, T10.Z, 0.0, literal.x, 5072; CM-NEXT: BFE_INT T61.Z, T44.W, 0.0, literal.x, 5073; CM-NEXT: BFE_INT * T60.W, T9.Y, 0.0, literal.x, 5074; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5075; CM-NEXT: BFE_INT T61.X, T44.Z, 0.0, literal.x, 5076; CM-NEXT: BFE_INT T60.Y, T9.Z, 0.0, literal.x, BS:VEC_120/SCL_212 5077; CM-NEXT: BFE_INT T44.Z, T45.Y, 0.0, literal.x, 5078; CM-NEXT: BFE_INT * T43.W, T8.Y, 0.0, literal.x, BS:VEC_120/SCL_212 5079; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5080; CM-NEXT: BFE_INT T44.X, T45.X, 0.0, literal.x, 5081; CM-NEXT: BFE_INT T43.Y, T8.Z, 0.0, literal.x, 5082; CM-NEXT: BFE_INT T62.Z, T45.W, 0.0, literal.x, 5083; CM-NEXT: BFE_INT * T61.W, T7.Y, 0.0, literal.x, 5084; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5085; CM-NEXT: BFE_INT T62.X, T45.Z, 0.0, literal.x, 5086; CM-NEXT: BFE_INT T61.Y, T7.Z, 0.0, literal.x, BS:VEC_120/SCL_212 5087; CM-NEXT: BFE_INT T45.Z, T37.Y, 0.0, literal.x, 5088; CM-NEXT: BFE_INT * T44.W, T6.Y, 0.0, literal.x, BS:VEC_120/SCL_212 5089; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5090; CM-NEXT: BFE_INT T45.X, T37.X, 0.0, literal.x, 5091; CM-NEXT: BFE_INT T44.Y, T6.Z, 0.0, literal.x, 5092; CM-NEXT: BFE_INT T63.Z, T37.W, 0.0, literal.x, 5093; CM-NEXT: BFE_INT * T62.W, T5.Y, 0.0, literal.x, 5094; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5095; CM-NEXT: BFE_INT T63.X, T37.Z, 0.0, literal.x, 5096; CM-NEXT: BFE_INT T62.Y, T5.Z, 0.0, literal.x, BS:VEC_120/SCL_212 5097; CM-NEXT: BFE_INT T37.Z, T36.Y, 0.0, literal.x, 5098; CM-NEXT: BFE_INT * T45.W, T4.Y, 0.0, literal.x, BS:VEC_120/SCL_212 5099; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5100; CM-NEXT: BFE_INT T37.X, T36.X, 0.0, literal.x, 5101; CM-NEXT: BFE_INT T45.Y, T4.Z, 0.0, literal.x, 5102; CM-NEXT: BFE_INT T64.Z, T36.W, 0.0, literal.x, 5103; CM-NEXT: BFE_INT * T63.W, T3.Y, 0.0, literal.x, 5104; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5105; CM-NEXT: BFE_INT T64.X, T36.Z, 0.0, literal.x, 5106; CM-NEXT: BFE_INT T63.Y, T3.Z, 0.0, literal.x, BS:VEC_120/SCL_212 5107; CM-NEXT: BFE_INT T36.Z, T35.Y, 0.0, literal.x, 5108; CM-NEXT: BFE_INT * T37.W, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212 5109; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5110; CM-NEXT: BFE_INT T36.X, T35.X, 0.0, literal.x, 5111; CM-NEXT: BFE_INT T37.Y, T2.Z, 0.0, literal.x, 5112; CM-NEXT: BFE_INT T65.Z, T35.W, 0.0, literal.x, 5113; CM-NEXT: BFE_INT * T64.W, T0.W, 0.0, literal.x, BS:VEC_120/SCL_212 5114; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5115; CM-NEXT: BFE_INT T65.X, T35.Z, 0.0, literal.x, 5116; CM-NEXT: BFE_INT T64.Y, T1.Z, 0.0, literal.x, BS:VEC_120/SCL_212 5117; CM-NEXT: LSHR T1.Z, T35.X, literal.x, 5118; CM-NEXT: BFE_INT * T36.W, T1.Y, 0.0, literal.x, 5119; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5120; CM-NEXT: LSHR T35.X, KC0[2].Y, literal.x, 5121; CM-NEXT: BFE_INT T36.Y, PV.Z, 0.0, literal.y, 5122; CM-NEXT: ADD_INT T1.Z, KC0[2].Y, literal.y, 5123; CM-NEXT: BFE_INT * T65.W, T0.Z, 0.0, literal.y, 5124; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5125; CM-NEXT: LSHR T66.X, PV.Z, literal.x, 5126; CM-NEXT: BFE_INT * T65.Y, T0.Y, 0.0, literal.y, 5127; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5128 %load = load <64 x i16>, ptr addrspace(1) %in 5129 %ext = sext <64 x i16> %load to <64 x i32> 5130 store <64 x i32> %ext, ptr addrspace(1) %out 5131 ret void 5132} 5133 5134define amdgpu_kernel void @global_zextload_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 5135; GCN-NOHSA-SI-LABEL: global_zextload_i16_to_i64: 5136; GCN-NOHSA-SI: ; %bb.0: 5137; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5138; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5139; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5140; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5141; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5142; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5143; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5144; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5145; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 5146; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5147; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5148; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5149; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5150; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5151; GCN-NOHSA-SI-NEXT: s_endpgm 5152; 5153; GCN-HSA-LABEL: global_zextload_i16_to_i64: 5154; GCN-HSA: ; %bb.0: 5155; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5156; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5157; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5158; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5159; GCN-HSA-NEXT: flat_load_ushort v0, v[0:1] 5160; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 5161; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 5162; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 5163; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5164; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 5165; GCN-HSA-NEXT: s_endpgm 5166; 5167; GCN-NOHSA-VI-LABEL: global_zextload_i16_to_i64: 5168; GCN-NOHSA-VI: ; %bb.0: 5169; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5170; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 5171; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 5172; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 5173; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 5174; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5175; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 5176; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 5177; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 5178; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 5179; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 5180; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 5181; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5182; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 5183; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5184; GCN-NOHSA-VI-NEXT: s_endpgm 5185; 5186; EG-LABEL: global_zextload_i16_to_i64: 5187; EG: ; %bb.0: 5188; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5189; EG-NEXT: TEX 0 @6 5190; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 5191; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 5192; EG-NEXT: CF_END 5193; EG-NEXT: PAD 5194; EG-NEXT: Fetch clause starting at 6: 5195; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5196; EG-NEXT: ALU clause starting at 8: 5197; EG-NEXT: MOV * T0.X, KC0[2].Z, 5198; EG-NEXT: ALU clause starting at 9: 5199; EG-NEXT: MOV * T0.Y, 0.0, 5200; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 5201; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5202; 5203; CM-LABEL: global_zextload_i16_to_i64: 5204; CM: ; %bb.0: 5205; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5206; CM-NEXT: TEX 0 @6 5207; CM-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 5208; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 5209; CM-NEXT: CF_END 5210; CM-NEXT: PAD 5211; CM-NEXT: Fetch clause starting at 6: 5212; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5213; CM-NEXT: ALU clause starting at 8: 5214; CM-NEXT: MOV * T0.X, KC0[2].Z, 5215; CM-NEXT: ALU clause starting at 9: 5216; CM-NEXT: MOV * T0.Y, 0.0, 5217; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 5218; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5219 %a = load i16, ptr addrspace(1) %in 5220 %ext = zext i16 %a to i64 5221 store i64 %ext, ptr addrspace(1) %out 5222 ret void 5223} 5224 5225; FIXME: Need to optimize this sequence to avoid extra bfe: 5226; t28: i32,ch = load<LD2[%in(addrspace=1)], anyext from i16> t12, t27, undef:i64 5227; t31: i64 = any_extend t28 5228; t33: i64 = sign_extend_inreg t31, ValueType:ch:i16 5229 5230; TODO: These could be expanded earlier using ASHR 15 5231define amdgpu_kernel void @global_sextload_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 5232; GCN-NOHSA-SI-LABEL: global_sextload_i16_to_i64: 5233; GCN-NOHSA-SI: ; %bb.0: 5234; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5235; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5236; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5237; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5238; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5239; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5240; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5241; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5242; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 5243; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5244; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5245; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5246; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5247; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5248; GCN-NOHSA-SI-NEXT: s_endpgm 5249; 5250; GCN-HSA-LABEL: global_sextload_i16_to_i64: 5251; GCN-HSA: ; %bb.0: 5252; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5253; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5254; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5255; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5256; GCN-HSA-NEXT: flat_load_sshort v0, v[0:1] 5257; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 5258; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 5259; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5260; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5261; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 5262; GCN-HSA-NEXT: s_endpgm 5263; 5264; GCN-NOHSA-VI-LABEL: global_sextload_i16_to_i64: 5265; GCN-NOHSA-VI: ; %bb.0: 5266; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5267; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 5268; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 5269; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 5270; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 5271; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5272; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 5273; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 5274; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 5275; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 5276; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 5277; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5278; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v0, 0, 16 5279; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5280; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5281; GCN-NOHSA-VI-NEXT: s_endpgm 5282; 5283; EG-LABEL: global_sextload_i16_to_i64: 5284; EG: ; %bb.0: 5285; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5286; EG-NEXT: TEX 0 @6 5287; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 5288; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 5289; EG-NEXT: CF_END 5290; EG-NEXT: PAD 5291; EG-NEXT: Fetch clause starting at 6: 5292; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5293; EG-NEXT: ALU clause starting at 8: 5294; EG-NEXT: MOV * T0.X, KC0[2].Z, 5295; EG-NEXT: ALU clause starting at 9: 5296; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 5297; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 5298; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 5299; EG-NEXT: ASHR * T0.Y, PV.X, literal.x, 5300; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5301; 5302; CM-LABEL: global_sextload_i16_to_i64: 5303; CM: ; %bb.0: 5304; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5305; CM-NEXT: TEX 0 @6 5306; CM-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 5307; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 5308; CM-NEXT: CF_END 5309; CM-NEXT: PAD 5310; CM-NEXT: Fetch clause starting at 6: 5311; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5312; CM-NEXT: ALU clause starting at 8: 5313; CM-NEXT: MOV * T0.X, KC0[2].Z, 5314; CM-NEXT: ALU clause starting at 9: 5315; CM-NEXT: BFE_INT * T0.X, T0.X, 0.0, literal.x, 5316; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5317; CM-NEXT: LSHR T1.X, KC0[2].Y, literal.x, 5318; CM-NEXT: ASHR * T0.Y, PV.X, literal.y, 5319; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 5320 %a = load i16, ptr addrspace(1) %in 5321 %ext = sext i16 %a to i64 5322 store i64 %ext, ptr addrspace(1) %out 5323 ret void 5324} 5325 5326define amdgpu_kernel void @global_zextload_v1i16_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 5327; GCN-NOHSA-SI-LABEL: global_zextload_v1i16_to_v1i64: 5328; GCN-NOHSA-SI: ; %bb.0: 5329; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5330; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5331; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5332; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5333; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5334; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5335; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5336; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5337; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 5338; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5339; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5340; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5341; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5342; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5343; GCN-NOHSA-SI-NEXT: s_endpgm 5344; 5345; GCN-HSA-LABEL: global_zextload_v1i16_to_v1i64: 5346; GCN-HSA: ; %bb.0: 5347; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5348; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5349; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5350; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5351; GCN-HSA-NEXT: flat_load_ushort v0, v[0:1] 5352; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 5353; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 5354; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 5355; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5356; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 5357; GCN-HSA-NEXT: s_endpgm 5358; 5359; GCN-NOHSA-VI-LABEL: global_zextload_v1i16_to_v1i64: 5360; GCN-NOHSA-VI: ; %bb.0: 5361; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5362; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 5363; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 5364; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 5365; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 5366; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5367; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 5368; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 5369; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 5370; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 5371; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 5372; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 5373; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5374; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 5375; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5376; GCN-NOHSA-VI-NEXT: s_endpgm 5377; 5378; EG-LABEL: global_zextload_v1i16_to_v1i64: 5379; EG: ; %bb.0: 5380; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5381; EG-NEXT: TEX 0 @6 5382; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 5383; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 5384; EG-NEXT: CF_END 5385; EG-NEXT: PAD 5386; EG-NEXT: Fetch clause starting at 6: 5387; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5388; EG-NEXT: ALU clause starting at 8: 5389; EG-NEXT: MOV * T0.X, KC0[2].Z, 5390; EG-NEXT: ALU clause starting at 9: 5391; EG-NEXT: MOV * T0.Y, 0.0, 5392; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 5393; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5394; 5395; CM-LABEL: global_zextload_v1i16_to_v1i64: 5396; CM: ; %bb.0: 5397; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5398; CM-NEXT: TEX 0 @6 5399; CM-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 5400; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 5401; CM-NEXT: CF_END 5402; CM-NEXT: PAD 5403; CM-NEXT: Fetch clause starting at 6: 5404; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5405; CM-NEXT: ALU clause starting at 8: 5406; CM-NEXT: MOV * T0.X, KC0[2].Z, 5407; CM-NEXT: ALU clause starting at 9: 5408; CM-NEXT: MOV * T0.Y, 0.0, 5409; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 5410; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5411 %load = load <1 x i16>, ptr addrspace(1) %in 5412 %ext = zext <1 x i16> %load to <1 x i64> 5413 store <1 x i64> %ext, ptr addrspace(1) %out 5414 ret void 5415} 5416 5417; TODO: These could be expanded earlier using ASHR 15 5418define amdgpu_kernel void @global_sextload_v1i16_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 5419; GCN-NOHSA-SI-LABEL: global_sextload_v1i16_to_v1i64: 5420; GCN-NOHSA-SI: ; %bb.0: 5421; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5422; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5423; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5424; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5425; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5426; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5427; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5428; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5429; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 5430; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5431; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5432; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5433; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5434; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5435; GCN-NOHSA-SI-NEXT: s_endpgm 5436; 5437; GCN-HSA-LABEL: global_sextload_v1i16_to_v1i64: 5438; GCN-HSA: ; %bb.0: 5439; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5440; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5441; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5442; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5443; GCN-HSA-NEXT: flat_load_sshort v0, v[0:1] 5444; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 5445; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 5446; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5447; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5448; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 5449; GCN-HSA-NEXT: s_endpgm 5450; 5451; GCN-NOHSA-VI-LABEL: global_sextload_v1i16_to_v1i64: 5452; GCN-NOHSA-VI: ; %bb.0: 5453; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5454; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 5455; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 5456; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 5457; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 5458; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5459; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 5460; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 5461; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 5462; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 5463; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 5464; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5465; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v0, 0, 16 5466; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5467; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5468; GCN-NOHSA-VI-NEXT: s_endpgm 5469; 5470; EG-LABEL: global_sextload_v1i16_to_v1i64: 5471; EG: ; %bb.0: 5472; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5473; EG-NEXT: TEX 0 @6 5474; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 5475; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 5476; EG-NEXT: CF_END 5477; EG-NEXT: PAD 5478; EG-NEXT: Fetch clause starting at 6: 5479; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5480; EG-NEXT: ALU clause starting at 8: 5481; EG-NEXT: MOV * T0.X, KC0[2].Z, 5482; EG-NEXT: ALU clause starting at 9: 5483; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 5484; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 5485; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 5486; EG-NEXT: ASHR * T0.Y, PV.X, literal.x, 5487; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5488; 5489; CM-LABEL: global_sextload_v1i16_to_v1i64: 5490; CM: ; %bb.0: 5491; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5492; CM-NEXT: TEX 0 @6 5493; CM-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 5494; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 5495; CM-NEXT: CF_END 5496; CM-NEXT: PAD 5497; CM-NEXT: Fetch clause starting at 6: 5498; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5499; CM-NEXT: ALU clause starting at 8: 5500; CM-NEXT: MOV * T0.X, KC0[2].Z, 5501; CM-NEXT: ALU clause starting at 9: 5502; CM-NEXT: BFE_INT * T0.X, T0.X, 0.0, literal.x, 5503; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5504; CM-NEXT: LSHR T1.X, KC0[2].Y, literal.x, 5505; CM-NEXT: ASHR * T0.Y, PV.X, literal.y, 5506; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 5507 %load = load <1 x i16>, ptr addrspace(1) %in 5508 %ext = sext <1 x i16> %load to <1 x i64> 5509 store <1 x i64> %ext, ptr addrspace(1) %out 5510 ret void 5511} 5512 5513define amdgpu_kernel void @global_zextload_v2i16_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 5514; GCN-NOHSA-SI-LABEL: global_zextload_v2i16_to_v2i64: 5515; GCN-NOHSA-SI: ; %bb.0: 5516; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5517; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5518; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5519; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5520; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5521; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5522; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5523; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5524; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 5525; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5526; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5527; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5528; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5529; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 5530; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 5531; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 5532; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 5533; GCN-NOHSA-SI-NEXT: s_endpgm 5534; 5535; GCN-HSA-LABEL: global_zextload_v2i16_to_v2i64: 5536; GCN-HSA: ; %bb.0: 5537; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5538; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5539; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5540; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5541; GCN-HSA-NEXT: flat_load_dword v0, v[0:1] 5542; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 5543; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5544; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5545; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 5546; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5547; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v0 5548; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v0 5549; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5550; GCN-HSA-NEXT: s_endpgm 5551; 5552; GCN-NOHSA-VI-LABEL: global_zextload_v2i16_to_v2i64: 5553; GCN-NOHSA-VI: ; %bb.0: 5554; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5555; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 5556; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 5557; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 5558; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 5559; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5560; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 5561; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 5562; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 5563; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 5564; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 5565; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 5566; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 5567; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5568; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 5569; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 5570; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 5571; GCN-NOHSA-VI-NEXT: s_endpgm 5572; 5573; EG-LABEL: global_zextload_v2i16_to_v2i64: 5574; EG: ; %bb.0: 5575; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5576; EG-NEXT: TEX 0 @6 5577; EG-NEXT: ALU 6, @9, KC0[CB0:0-32], KC1[] 5578; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1 5579; EG-NEXT: CF_END 5580; EG-NEXT: PAD 5581; EG-NEXT: Fetch clause starting at 6: 5582; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 5583; EG-NEXT: ALU clause starting at 8: 5584; EG-NEXT: MOV * T4.X, KC0[2].Z, 5585; EG-NEXT: ALU clause starting at 9: 5586; EG-NEXT: LSHR * T4.Z, T4.X, literal.x, 5587; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5588; EG-NEXT: AND_INT T4.X, T4.X, literal.x, 5589; EG-NEXT: MOV T4.Y, 0.0, 5590; EG-NEXT: MOV T4.W, 0.0, 5591; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 5592; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 5593; 5594; CM-LABEL: global_zextload_v2i16_to_v2i64: 5595; CM: ; %bb.0: 5596; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5597; CM-NEXT: TEX 0 @6 5598; CM-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[] 5599; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T4, T5.X 5600; CM-NEXT: CF_END 5601; CM-NEXT: PAD 5602; CM-NEXT: Fetch clause starting at 6: 5603; CM-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 5604; CM-NEXT: ALU clause starting at 8: 5605; CM-NEXT: MOV * T4.X, KC0[2].Z, 5606; CM-NEXT: ALU clause starting at 9: 5607; CM-NEXT: LSHR * T4.Z, T4.X, literal.x, 5608; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5609; CM-NEXT: AND_INT T4.X, T4.X, literal.x, 5610; CM-NEXT: MOV T4.Y, 0.0, 5611; CM-NEXT: MOV * T4.W, 0.0, 5612; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 5613; CM-NEXT: LSHR * T5.X, KC0[2].Y, literal.x, 5614; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5615 %load = load <2 x i16>, ptr addrspace(1) %in 5616 %ext = zext <2 x i16> %load to <2 x i64> 5617 store <2 x i64> %ext, ptr addrspace(1) %out 5618 ret void 5619} 5620 5621define amdgpu_kernel void @global_sextload_v2i16_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 5622; GCN-NOHSA-SI-LABEL: global_sextload_v2i16_to_v2i64: 5623; GCN-NOHSA-SI: ; %bb.0: 5624; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5625; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5626; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5627; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5628; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5629; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5630; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5631; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5632; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 5633; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5634; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5635; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5636; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 5637; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v0, 0, 16 5638; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5639; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v2, 0, 16 5640; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5641; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 5642; GCN-NOHSA-SI-NEXT: s_endpgm 5643; 5644; GCN-HSA-LABEL: global_sextload_v2i16_to_v2i64: 5645; GCN-HSA: ; %bb.0: 5646; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5647; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5648; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5649; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5650; GCN-HSA-NEXT: flat_load_dword v0, v[0:1] 5651; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5652; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5653; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5654; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v0 5655; GCN-HSA-NEXT: v_bfe_i32 v0, v0, 0, 16 5656; GCN-HSA-NEXT: v_bfe_i32 v2, v2, 0, 16 5657; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5658; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5659; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5660; GCN-HSA-NEXT: s_endpgm 5661; 5662; GCN-NOHSA-VI-LABEL: global_sextload_v2i16_to_v2i64: 5663; GCN-NOHSA-VI: ; %bb.0: 5664; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5665; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 5666; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 5667; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 5668; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 5669; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5670; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 5671; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 5672; GCN-NOHSA-VI-NEXT: buffer_load_dword v1, off, s[8:11], 0 5673; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 5674; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 5675; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5676; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v1 5677; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v1, 0, 16 5678; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v2, 0, 16 5679; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5680; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5681; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 5682; GCN-NOHSA-VI-NEXT: s_endpgm 5683; 5684; EG-LABEL: global_sextload_v2i16_to_v2i64: 5685; EG: ; %bb.0: 5686; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5687; EG-NEXT: TEX 0 @6 5688; EG-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[] 5689; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1 5690; EG-NEXT: CF_END 5691; EG-NEXT: PAD 5692; EG-NEXT: Fetch clause starting at 6: 5693; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 5694; EG-NEXT: ALU clause starting at 8: 5695; EG-NEXT: MOV * T4.X, KC0[2].Z, 5696; EG-NEXT: ALU clause starting at 9: 5697; EG-NEXT: ASHR * T4.W, T4.X, literal.x, 5698; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5699; EG-NEXT: ASHR * T4.Z, T4.X, literal.x, 5700; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5701; EG-NEXT: BFE_INT T4.X, T4.X, 0.0, literal.x, 5702; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 5703; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 5704; EG-NEXT: ASHR * T4.Y, PV.X, literal.x, 5705; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5706; 5707; CM-LABEL: global_sextload_v2i16_to_v2i64: 5708; CM: ; %bb.0: 5709; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5710; CM-NEXT: TEX 0 @6 5711; CM-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[] 5712; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T4, T5.X 5713; CM-NEXT: CF_END 5714; CM-NEXT: PAD 5715; CM-NEXT: Fetch clause starting at 6: 5716; CM-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 5717; CM-NEXT: ALU clause starting at 8: 5718; CM-NEXT: MOV * T4.X, KC0[2].Z, 5719; CM-NEXT: ALU clause starting at 9: 5720; CM-NEXT: ASHR * T4.W, T4.X, literal.x, 5721; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5722; CM-NEXT: ASHR * T4.Z, T4.X, literal.x, 5723; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5724; CM-NEXT: BFE_INT * T4.X, T4.X, 0.0, literal.x, 5725; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5726; CM-NEXT: LSHR T5.X, KC0[2].Y, literal.x, 5727; CM-NEXT: ASHR * T4.Y, PV.X, literal.y, 5728; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 5729 %load = load <2 x i16>, ptr addrspace(1) %in 5730 %ext = sext <2 x i16> %load to <2 x i64> 5731 store <2 x i64> %ext, ptr addrspace(1) %out 5732 ret void 5733} 5734 5735define amdgpu_kernel void @global_zextload_v4i16_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 5736; GCN-NOHSA-SI-LABEL: global_zextload_v4i16_to_v4i64: 5737; GCN-NOHSA-SI: ; %bb.0: 5738; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5739; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5740; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5741; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5742; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5743; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5744; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5745; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5746; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[8:9], off, s[8:11], 0 5747; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5748; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 5749; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v1 5750; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, v1 5751; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5752; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5753; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5754; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v9 5755; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v6, 16, v8 5756; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v8 5757; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v9 5758; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16 5759; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 5760; GCN-NOHSA-SI-NEXT: s_endpgm 5761; 5762; GCN-HSA-LABEL: global_zextload_v4i16_to_v4i64: 5763; GCN-HSA: ; %bb.0: 5764; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5765; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5766; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5767; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5768; GCN-HSA-NEXT: flat_load_dwordx2 v[8:9], v[0:1] 5769; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 5770; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5771; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 5772; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 5773; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 5774; GCN-HSA-NEXT: v_mov_b32_e32 v11, s1 5775; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 5776; GCN-HSA-NEXT: v_mov_b32_e32 v5, v1 5777; GCN-HSA-NEXT: v_mov_b32_e32 v7, v1 5778; GCN-HSA-NEXT: v_mov_b32_e32 v10, s0 5779; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5780; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v9 5781; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v9 5782; GCN-HSA-NEXT: v_lshrrev_b32_e32 v6, 16, v8 5783; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v8 5784; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[0:3] 5785; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 5786; GCN-HSA-NEXT: s_endpgm 5787; 5788; GCN-NOHSA-VI-LABEL: global_zextload_v4i16_to_v4i64: 5789; GCN-NOHSA-VI: ; %bb.0: 5790; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5791; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 5792; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 5793; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 5794; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 5795; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5796; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 5797; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 5798; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[8:9], off, s[8:11], 0 5799; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 5800; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 5801; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 5802; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 5803; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, v1 5804; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, v1 5805; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5806; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v9 5807; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v9 5808; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v6, 16, v8 5809; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, 0xffff, v8 5810; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16 5811; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 5812; GCN-NOHSA-VI-NEXT: s_endpgm 5813; 5814; EG-LABEL: global_zextload_v4i16_to_v4i64: 5815; EG: ; %bb.0: 5816; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5817; EG-NEXT: TEX 0 @6 5818; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[] 5819; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T8.X, 0 5820; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T7.X, 1 5821; EG-NEXT: CF_END 5822; EG-NEXT: Fetch clause starting at 6: 5823; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 5824; EG-NEXT: ALU clause starting at 8: 5825; EG-NEXT: MOV * T5.X, KC0[2].Z, 5826; EG-NEXT: ALU clause starting at 9: 5827; EG-NEXT: LSHR * T6.Z, T5.Y, literal.x, 5828; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5829; EG-NEXT: AND_INT T6.X, T5.Y, literal.x, 5830; EG-NEXT: MOV T6.Y, 0.0, 5831; EG-NEXT: LSHR T5.Z, T5.X, literal.y, 5832; EG-NEXT: AND_INT * T5.X, T5.X, literal.x, 5833; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5834; EG-NEXT: MOV T5.Y, 0.0, 5835; EG-NEXT: MOV T6.W, 0.0, 5836; EG-NEXT: MOV * T5.W, 0.0, 5837; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x, 5838; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5839; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5840; EG-NEXT: LSHR * T8.X, PV.W, literal.x, 5841; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5842; 5843; CM-LABEL: global_zextload_v4i16_to_v4i64: 5844; CM: ; %bb.0: 5845; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5846; CM-NEXT: TEX 0 @6 5847; CM-NEXT: ALU 16, @9, KC0[CB0:0-32], KC1[] 5848; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6, T8.X 5849; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T5, T7.X 5850; CM-NEXT: CF_END 5851; CM-NEXT: Fetch clause starting at 6: 5852; CM-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 5853; CM-NEXT: ALU clause starting at 8: 5854; CM-NEXT: MOV * T5.X, KC0[2].Z, 5855; CM-NEXT: ALU clause starting at 9: 5856; CM-NEXT: LSHR * T6.Z, T5.X, literal.x, 5857; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5858; CM-NEXT: AND_INT T6.X, T5.X, literal.x, 5859; CM-NEXT: MOV T6.Y, 0.0, 5860; CM-NEXT: LSHR * T5.Z, T5.Y, literal.y, 5861; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5862; CM-NEXT: AND_INT T5.X, T5.Y, literal.x, 5863; CM-NEXT: MOV T5.Y, 0.0, 5864; CM-NEXT: MOV * T6.W, 0.0, 5865; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 5866; CM-NEXT: MOV * T5.W, 0.0, 5867; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 5868; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5869; CM-NEXT: LSHR * T7.X, PV.W, literal.x, 5870; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5871; CM-NEXT: LSHR * T8.X, KC0[2].Y, literal.x, 5872; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5873 %load = load <4 x i16>, ptr addrspace(1) %in 5874 %ext = zext <4 x i16> %load to <4 x i64> 5875 store <4 x i64> %ext, ptr addrspace(1) %out 5876 ret void 5877} 5878 5879define amdgpu_kernel void @global_sextload_v4i16_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 5880; GCN-NOHSA-SI-LABEL: global_sextload_v4i16_to_v4i64: 5881; GCN-NOHSA-SI: ; %bb.0: 5882; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 5883; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5884; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5885; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5886; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5887; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5888; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5889; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5890; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[1:2], off, s[8:11], 0 5891; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5892; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5893; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5894; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v2 5895; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v1 5896; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v1, 0, 16 5897; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[6:7], v[1:2], 48 5898; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v3, 0, 16 5899; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5900; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v5, 0, 16 5901; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 31, v4 5902; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5903; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 5904; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 5905; GCN-NOHSA-SI-NEXT: s_endpgm 5906; 5907; GCN-HSA-LABEL: global_sextload_v4i16_to_v4i64: 5908; GCN-HSA: ; %bb.0: 5909; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 5910; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5911; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5912; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5913; GCN-HSA-NEXT: flat_load_dwordx2 v[1:2], v[0:1] 5914; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 5915; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5916; GCN-HSA-NEXT: v_mov_b32_e32 v11, s3 5917; GCN-HSA-NEXT: v_mov_b32_e32 v9, s1 5918; GCN-HSA-NEXT: v_mov_b32_e32 v10, s2 5919; GCN-HSA-NEXT: v_mov_b32_e32 v8, s0 5920; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5921; GCN-HSA-NEXT: v_mov_b32_e32 v3, v2 5922; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v1 5923; GCN-HSA-NEXT: v_ashr_i64 v[6:7], v[1:2], 48 5924; GCN-HSA-NEXT: v_bfe_i32 v2, v4, 0, 16 5925; GCN-HSA-NEXT: v_bfe_i32 v4, v3, 0, 16 5926; GCN-HSA-NEXT: v_bfe_i32 v0, v1, 0, 16 5927; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4 5928; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5929; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5930; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 5931; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 5932; GCN-HSA-NEXT: s_endpgm 5933; 5934; GCN-NOHSA-VI-LABEL: global_sextload_v4i16_to_v4i64: 5935; GCN-NOHSA-VI: ; %bb.0: 5936; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5937; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 5938; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 5939; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 5940; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 5941; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5942; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 5943; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 5944; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[1:2], off, s[8:11], 0 5945; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 5946; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 5947; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5948; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, v2 5949; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v2 5950; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v1 5951; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v4, 0, 16 5952; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v5, 0, 16 5953; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v1, 0, 16 5954; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v3, 0, 16 5955; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 31, v4 5956; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6 5957; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5958; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5959; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 5960; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 5961; GCN-NOHSA-VI-NEXT: s_endpgm 5962; 5963; EG-LABEL: global_sextload_v4i16_to_v4i64: 5964; EG: ; %bb.0: 5965; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5966; EG-NEXT: TEX 0 @6 5967; EG-NEXT: ALU 16, @9, KC0[CB0:0-32], KC1[] 5968; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 0 5969; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1 5970; EG-NEXT: CF_END 5971; EG-NEXT: Fetch clause starting at 6: 5972; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 5973; EG-NEXT: ALU clause starting at 8: 5974; EG-NEXT: MOV * T5.X, KC0[2].Z, 5975; EG-NEXT: ALU clause starting at 9: 5976; EG-NEXT: ASHR * T5.W, T5.X, literal.x, 5977; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5978; EG-NEXT: LSHR T6.X, KC0[2].Y, literal.x, 5979; EG-NEXT: ASHR T5.Z, T5.X, literal.y, 5980; EG-NEXT: ASHR * T7.W, T5.Y, literal.z, 5981; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5982; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5983; EG-NEXT: BFE_INT T5.X, T5.X, 0.0, literal.x, 5984; EG-NEXT: ASHR * T7.Z, T5.Y, literal.x, 5985; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5986; EG-NEXT: BFE_INT T7.X, T5.Y, 0.0, literal.x, 5987; EG-NEXT: ASHR T5.Y, PV.X, literal.y, 5988; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 5989; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 5990; EG-NEXT: LSHR T8.X, PV.W, literal.x, 5991; EG-NEXT: ASHR * T7.Y, PV.X, literal.y, 5992; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 5993; 5994; CM-LABEL: global_sextload_v4i16_to_v4i64: 5995; CM: ; %bb.0: 5996; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5997; CM-NEXT: TEX 0 @6 5998; CM-NEXT: ALU 16, @9, KC0[CB0:0-32], KC1[] 5999; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T5, T8.X 6000; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6, T7.X 6001; CM-NEXT: CF_END 6002; CM-NEXT: Fetch clause starting at 6: 6003; CM-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 6004; CM-NEXT: ALU clause starting at 8: 6005; CM-NEXT: MOV * T5.X, KC0[2].Z, 6006; CM-NEXT: ALU clause starting at 9: 6007; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.x, 6008; CM-NEXT: ASHR * T6.W, T5.Y, literal.y, 6009; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6010; CM-NEXT: LSHR T7.X, PV.Z, literal.x, 6011; CM-NEXT: ASHR T6.Z, T5.Y, literal.y, 6012; CM-NEXT: ASHR * T5.W, T5.X, literal.z, 6013; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6014; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6015; CM-NEXT: BFE_INT T6.X, T5.Y, 0.0, literal.x, 6016; CM-NEXT: ASHR * T5.Z, T5.X, literal.x, 6017; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6018; CM-NEXT: BFE_INT T5.X, T5.X, 0.0, literal.x, 6019; CM-NEXT: ASHR * T6.Y, PV.X, literal.y, 6020; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6021; CM-NEXT: LSHR T8.X, KC0[2].Y, literal.x, 6022; CM-NEXT: ASHR * T5.Y, PV.X, literal.y, 6023; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 6024 %load = load <4 x i16>, ptr addrspace(1) %in 6025 %ext = sext <4 x i16> %load to <4 x i64> 6026 store <4 x i64> %ext, ptr addrspace(1) %out 6027 ret void 6028} 6029 6030define amdgpu_kernel void @global_zextload_v8i16_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 6031; GCN-NOHSA-SI-LABEL: global_zextload_v8i16_to_v8i64: 6032; GCN-NOHSA-SI: ; %bb.0: 6033; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 6034; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 6035; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 6036; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 6037; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 6038; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6039; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 6040; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 6041; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 6042; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, 0 6043; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, v4 6044; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, v4 6045; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, v4 6046; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, v4 6047; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, v4 6048; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, v4 6049; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, v4 6050; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 6051; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 6052; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 6053; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v1 6054; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v3 6055; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v13, 16, v2 6056; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v17, 16, v0 6057; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v15, 0xffff, v0 6058; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v11, 0xffff, v2 6059; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v7, 0xffff, v1 6060; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v3, 0xffff, v3 6061; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:48 6062; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:16 6063; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:32 6064; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 6065; GCN-NOHSA-SI-NEXT: s_endpgm 6066; 6067; GCN-HSA-LABEL: global_zextload_v8i16_to_v8i64: 6068; GCN-HSA: ; %bb.0: 6069; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 6070; GCN-HSA-NEXT: v_mov_b32_e32 v4, 0 6071; GCN-HSA-NEXT: v_mov_b32_e32 v6, v4 6072; GCN-HSA-NEXT: v_mov_b32_e32 v8, v4 6073; GCN-HSA-NEXT: v_mov_b32_e32 v10, v4 6074; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6075; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 6076; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 6077; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 6078; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 6079; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6080; GCN-HSA-NEXT: v_mov_b32_e32 v22, s3 6081; GCN-HSA-NEXT: v_mov_b32_e32 v21, s2 6082; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 6083; GCN-HSA-NEXT: v_mov_b32_e32 v20, s1 6084; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6085; GCN-HSA-NEXT: v_mov_b32_e32 v19, s0 6086; GCN-HSA-NEXT: s_add_u32 s0, s0, 32 6087; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 6088; GCN-HSA-NEXT: v_mov_b32_e32 v24, s3 6089; GCN-HSA-NEXT: v_mov_b32_e32 v26, s1 6090; GCN-HSA-NEXT: v_mov_b32_e32 v12, v4 6091; GCN-HSA-NEXT: v_mov_b32_e32 v14, v4 6092; GCN-HSA-NEXT: v_mov_b32_e32 v16, v4 6093; GCN-HSA-NEXT: v_mov_b32_e32 v18, v4 6094; GCN-HSA-NEXT: v_mov_b32_e32 v23, s2 6095; GCN-HSA-NEXT: v_mov_b32_e32 v25, s0 6096; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 6097; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v3 6098; GCN-HSA-NEXT: v_and_b32_e32 v3, 0xffff, v3 6099; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v1 6100; GCN-HSA-NEXT: v_lshrrev_b32_e32 v13, 16, v2 6101; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v0 6102; GCN-HSA-NEXT: v_and_b32_e32 v15, 0xffff, v0 6103; GCN-HSA-NEXT: v_and_b32_e32 v11, 0xffff, v2 6104; GCN-HSA-NEXT: v_and_b32_e32 v7, 0xffff, v1 6105; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[3:6] 6106; GCN-HSA-NEXT: flat_store_dwordx4 v[23:24], v[7:10] 6107; GCN-HSA-NEXT: flat_store_dwordx4 v[25:26], v[11:14] 6108; GCN-HSA-NEXT: flat_store_dwordx4 v[19:20], v[15:18] 6109; GCN-HSA-NEXT: s_endpgm 6110; 6111; GCN-NOHSA-VI-LABEL: global_zextload_v8i16_to_v8i64: 6112; GCN-NOHSA-VI: ; %bb.0: 6113; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 6114; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 6115; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 6116; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 6117; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 6118; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6119; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 6120; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 6121; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 6122; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, 0 6123; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, v4 6124; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 6125; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 6126; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v8, v4 6127; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v10, v4 6128; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v12, v4 6129; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v14, v4 6130; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v16, v4 6131; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v18, v4 6132; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 6133; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v3 6134; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v3, 0xffff, v3 6135; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v17, 16, v0 6136; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v15, 0xffff, v0 6137; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v13, 16, v1 6138; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v11, 0xffff, v1 6139; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v2 6140; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v7, 0xffff, v2 6141; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:48 6142; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:32 6143; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:16 6144; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 6145; GCN-NOHSA-VI-NEXT: s_endpgm 6146; 6147; EG-LABEL: global_zextload_v8i16_to_v8i64: 6148; EG: ; %bb.0: 6149; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 6150; EG-NEXT: TEX 0 @8 6151; EG-NEXT: ALU 30, @11, KC0[CB0:0-32], KC1[] 6152; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T14.X, 0 6153; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T13.X, 0 6154; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T12.X, 0 6155; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T11.X, 1 6156; EG-NEXT: CF_END 6157; EG-NEXT: Fetch clause starting at 8: 6158; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 6159; EG-NEXT: ALU clause starting at 10: 6160; EG-NEXT: MOV * T7.X, KC0[2].Z, 6161; EG-NEXT: ALU clause starting at 11: 6162; EG-NEXT: LSHR * T8.Z, T7.W, literal.x, 6163; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6164; EG-NEXT: AND_INT T8.X, T7.W, literal.x, 6165; EG-NEXT: MOV T8.Y, 0.0, 6166; EG-NEXT: LSHR T9.Z, T7.Z, literal.y, 6167; EG-NEXT: AND_INT * T9.X, T7.Z, literal.x, 6168; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6169; EG-NEXT: MOV T9.Y, 0.0, 6170; EG-NEXT: LSHR * T10.Z, T7.Y, literal.x, 6171; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6172; EG-NEXT: AND_INT T10.X, T7.Y, literal.x, 6173; EG-NEXT: MOV T10.Y, 0.0, 6174; EG-NEXT: LSHR T7.Z, T7.X, literal.y, 6175; EG-NEXT: AND_INT * T7.X, T7.X, literal.x, 6176; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6177; EG-NEXT: MOV T7.Y, 0.0, 6178; EG-NEXT: MOV T8.W, 0.0, 6179; EG-NEXT: MOV * T9.W, 0.0, 6180; EG-NEXT: MOV T10.W, 0.0, 6181; EG-NEXT: MOV * T7.W, 0.0, 6182; EG-NEXT: LSHR T11.X, KC0[2].Y, literal.x, 6183; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6184; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6185; EG-NEXT: LSHR T12.X, PV.W, literal.x, 6186; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6187; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6188; EG-NEXT: LSHR T13.X, PV.W, literal.x, 6189; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6190; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 6191; EG-NEXT: LSHR * T14.X, PV.W, literal.x, 6192; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6193; 6194; CM-LABEL: global_zextload_v8i16_to_v8i64: 6195; CM: ; %bb.0: 6196; CM-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 6197; CM-NEXT: TEX 0 @8 6198; CM-NEXT: ALU 32, @11, KC0[CB0:0-32], KC1[] 6199; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T8, T14.X 6200; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T9, T13.X 6201; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T10, T12.X 6202; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T11.X 6203; CM-NEXT: CF_END 6204; CM-NEXT: Fetch clause starting at 8: 6205; CM-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 6206; CM-NEXT: ALU clause starting at 10: 6207; CM-NEXT: MOV * T7.X, KC0[2].Z, 6208; CM-NEXT: ALU clause starting at 11: 6209; CM-NEXT: LSHR * T8.Z, T7.X, literal.x, 6210; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6211; CM-NEXT: AND_INT T8.X, T7.X, literal.x, 6212; CM-NEXT: MOV T8.Y, 0.0, 6213; CM-NEXT: LSHR * T9.Z, T7.Y, literal.y, 6214; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6215; CM-NEXT: AND_INT T9.X, T7.Y, literal.x, 6216; CM-NEXT: MOV T9.Y, 0.0, 6217; CM-NEXT: LSHR * T10.Z, T7.Z, literal.y, 6218; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6219; CM-NEXT: AND_INT T10.X, T7.Z, literal.x, 6220; CM-NEXT: MOV T10.Y, 0.0, 6221; CM-NEXT: LSHR * T7.Z, T7.W, literal.y, 6222; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6223; CM-NEXT: AND_INT T7.X, T7.W, literal.x, 6224; CM-NEXT: MOV T7.Y, 0.0, 6225; CM-NEXT: MOV * T8.W, 0.0, 6226; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 6227; CM-NEXT: MOV * T9.W, 0.0, 6228; CM-NEXT: MOV * T10.W, 0.0, 6229; CM-NEXT: MOV * T7.W, 0.0, 6230; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 6231; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00) 6232; CM-NEXT: LSHR T11.X, PV.W, literal.x, 6233; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6234; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6235; CM-NEXT: LSHR T12.X, PV.W, literal.x, 6236; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6237; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6238; CM-NEXT: LSHR * T13.X, PV.W, literal.x, 6239; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6240; CM-NEXT: LSHR * T14.X, KC0[2].Y, literal.x, 6241; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6242 %load = load <8 x i16>, ptr addrspace(1) %in 6243 %ext = zext <8 x i16> %load to <8 x i64> 6244 store <8 x i64> %ext, ptr addrspace(1) %out 6245 ret void 6246} 6247 6248define amdgpu_kernel void @global_sextload_v8i16_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 6249; GCN-NOHSA-SI-LABEL: global_sextload_v8i16_to_v8i64: 6250; GCN-NOHSA-SI: ; %bb.0: 6251; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 6252; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 6253; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 6254; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 6255; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 6256; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6257; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 6258; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 6259; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 6260; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 6261; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 6262; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 6263; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v3 6264; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v2 6265; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v6, 16, v0 6266; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v0, 0, 16 6267; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v2, 0, 16 6268; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[14:15], v[0:1], 48 6269; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[2:3], v[2:3], 48 6270; GCN-NOHSA-SI-NEXT: v_bfe_i32 v12, v1, 0, 16 6271; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v5, 0, 16 6272; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 31, v4 6273; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6274; GCN-NOHSA-SI-NEXT: v_bfe_i32 v6, v6, 0, 16 6275; GCN-NOHSA-SI-NEXT: v_bfe_i32 v10, v7, 0, 16 6276; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v13, 31, v12 6277; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 6278; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v7, 31, v6 6279; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 31, v10 6280; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 6281; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16 6282; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32 6283; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 6284; GCN-NOHSA-SI-NEXT: s_endpgm 6285; 6286; GCN-HSA-LABEL: global_sextload_v8i16_to_v8i64: 6287; GCN-HSA: ; %bb.0: 6288; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 6289; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6290; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 6291; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 6292; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 6293; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 6294; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6295; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3 6296; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2 6297; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 6298; GCN-HSA-NEXT: v_mov_b32_e32 v17, s1 6299; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6300; GCN-HSA-NEXT: v_mov_b32_e32 v16, s0 6301; GCN-HSA-NEXT: s_add_u32 s0, s0, 32 6302; GCN-HSA-NEXT: v_mov_b32_e32 v21, s3 6303; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 6304; GCN-HSA-NEXT: v_mov_b32_e32 v20, s2 6305; GCN-HSA-NEXT: v_mov_b32_e32 v23, s1 6306; GCN-HSA-NEXT: v_mov_b32_e32 v22, s0 6307; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 6308; GCN-HSA-NEXT: v_mov_b32_e32 v7, v3 6309; GCN-HSA-NEXT: v_lshrrev_b32_e32 v10, 16, v2 6310; GCN-HSA-NEXT: v_lshrrev_b32_e32 v6, 16, v0 6311; GCN-HSA-NEXT: v_ashr_i64 v[14:15], v[0:1], 48 6312; GCN-HSA-NEXT: v_bfe_i32 v12, v1, 0, 16 6313; GCN-HSA-NEXT: v_bfe_i32 v4, v0, 0, 16 6314; GCN-HSA-NEXT: v_bfe_i32 v8, v2, 0, 16 6315; GCN-HSA-NEXT: v_ashr_i64 v[2:3], v[2:3], 48 6316; GCN-HSA-NEXT: v_bfe_i32 v6, v6, 0, 16 6317; GCN-HSA-NEXT: v_bfe_i32 v10, v10, 0, 16 6318; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 31, v12 6319; GCN-HSA-NEXT: v_bfe_i32 v0, v7, 0, 16 6320; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4 6321; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6322; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v6 6323; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v10 6324; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 6325; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[12:15] 6326; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[0:3] 6327; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[8:11] 6328; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[4:7] 6329; GCN-HSA-NEXT: s_endpgm 6330; 6331; GCN-NOHSA-VI-LABEL: global_sextload_v8i16_to_v8i64: 6332; GCN-NOHSA-VI: ; %bb.0: 6333; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 6334; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 6335; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 6336; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 6337; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 6338; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6339; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 6340; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 6341; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 6342; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 6343; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 6344; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 6345; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v11, v3 6346; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 6347; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v6, 16, v0 6348; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v1 6349; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v10, 16, v2 6350; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v11, 0, 16 6351; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v3, 0, 16 6352; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v0, 0, 16 6353; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v1, 0, 16 6354; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v2, 0, 16 6355; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v6, 0, 16 6356; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v7, 0, 16 6357; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v10, 0, 16 6358; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v13, 31, v12 6359; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 31, v14 6360; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 31, v4 6361; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 6362; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6363; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6 6364; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 6365; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 31, v10 6366; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48 6367; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32 6368; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 6369; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 6370; GCN-NOHSA-VI-NEXT: s_endpgm 6371; 6372; EG-LABEL: global_sextload_v8i16_to_v8i64: 6373; EG: ; %bb.0: 6374; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 6375; EG-NEXT: TEX 0 @8 6376; EG-NEXT: ALU 33, @11, KC0[CB0:0-32], KC1[] 6377; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T7.X, 0 6378; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 0 6379; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T9.X, 0 6380; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T8.X, 1 6381; EG-NEXT: CF_END 6382; EG-NEXT: Fetch clause starting at 8: 6383; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 6384; EG-NEXT: ALU clause starting at 10: 6385; EG-NEXT: MOV * T7.X, KC0[2].Z, 6386; EG-NEXT: ALU clause starting at 11: 6387; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x, 6388; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6389; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6390; EG-NEXT: LSHR T9.X, PV.W, literal.x, 6391; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, 6392; EG-NEXT: ASHR * T10.W, T7.X, literal.z, 6393; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6394; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6395; EG-NEXT: LSHR T11.X, PV.W, literal.x, 6396; EG-NEXT: ASHR T10.Z, T7.X, literal.y, 6397; EG-NEXT: ASHR * T12.W, T7.Y, literal.z, 6398; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6399; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6400; EG-NEXT: BFE_INT T10.X, T7.X, 0.0, literal.x, 6401; EG-NEXT: ASHR T12.Z, T7.Y, literal.x, 6402; EG-NEXT: ASHR * T13.W, T7.Z, literal.y, 6403; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6404; EG-NEXT: BFE_INT T12.X, T7.Y, 0.0, literal.x, 6405; EG-NEXT: ASHR T10.Y, PV.X, literal.y, 6406; EG-NEXT: ASHR T13.Z, T7.Z, literal.x, 6407; EG-NEXT: ASHR * T14.W, T7.W, literal.y, 6408; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6409; EG-NEXT: BFE_INT T13.X, T7.Z, 0.0, literal.x, 6410; EG-NEXT: ASHR T12.Y, PV.X, literal.y, 6411; EG-NEXT: ASHR * T14.Z, T7.W, literal.x, 6412; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6413; EG-NEXT: BFE_INT T14.X, T7.W, 0.0, literal.x, 6414; EG-NEXT: ASHR T13.Y, PV.X, literal.y, 6415; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 6416; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6417; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 6418; EG-NEXT: LSHR T7.X, PV.W, literal.x, 6419; EG-NEXT: ASHR * T14.Y, PV.X, literal.y, 6420; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 6421; 6422; CM-LABEL: global_sextload_v8i16_to_v8i64: 6423; CM: ; %bb.0: 6424; CM-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 6425; CM-NEXT: TEX 0 @8 6426; CM-NEXT: ALU 33, @11, KC0[CB0:0-32], KC1[] 6427; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T14.X 6428; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T13, T11.X 6429; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T9.X 6430; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T10, T8.X 6431; CM-NEXT: CF_END 6432; CM-NEXT: Fetch clause starting at 8: 6433; CM-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 6434; CM-NEXT: ALU clause starting at 10: 6435; CM-NEXT: MOV * T7.X, KC0[2].Z, 6436; CM-NEXT: ALU clause starting at 11: 6437; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 6438; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00) 6439; CM-NEXT: LSHR T8.X, PV.W, literal.x, 6440; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6441; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6442; CM-NEXT: LSHR T9.X, PV.W, literal.x, 6443; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y, 6444; CM-NEXT: ASHR * T10.W, T7.W, literal.z, 6445; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6446; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6447; CM-NEXT: LSHR T11.X, PV.Z, literal.x, 6448; CM-NEXT: ASHR T10.Z, T7.W, literal.y, 6449; CM-NEXT: ASHR * T12.W, T7.Z, literal.z, 6450; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6451; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6452; CM-NEXT: BFE_INT T10.X, T7.W, 0.0, literal.x, 6453; CM-NEXT: ASHR T12.Z, T7.Z, literal.x, 6454; CM-NEXT: ASHR * T13.W, T7.Y, literal.y, 6455; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6456; CM-NEXT: BFE_INT T12.X, T7.Z, 0.0, literal.x, 6457; CM-NEXT: ASHR T10.Y, PV.X, literal.y, 6458; CM-NEXT: ASHR T13.Z, T7.Y, literal.x, 6459; CM-NEXT: ASHR * T7.W, T7.X, literal.y, 6460; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6461; CM-NEXT: BFE_INT T13.X, T7.Y, 0.0, literal.x, 6462; CM-NEXT: ASHR T12.Y, PV.X, literal.y, 6463; CM-NEXT: ASHR * T7.Z, T7.X, literal.x, 6464; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6465; CM-NEXT: BFE_INT T7.X, T7.X, 0.0, literal.x, 6466; CM-NEXT: ASHR * T13.Y, PV.X, literal.y, 6467; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6468; CM-NEXT: LSHR T14.X, KC0[2].Y, literal.x, 6469; CM-NEXT: ASHR * T7.Y, PV.X, literal.y, 6470; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 6471 %load = load <8 x i16>, ptr addrspace(1) %in 6472 %ext = sext <8 x i16> %load to <8 x i64> 6473 store <8 x i64> %ext, ptr addrspace(1) %out 6474 ret void 6475} 6476 6477define amdgpu_kernel void @global_zextload_v16i16_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 6478; GCN-NOHSA-SI-LABEL: global_zextload_v16i16_to_v16i64: 6479; GCN-NOHSA-SI: ; %bb.0: 6480; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 6481; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 6482; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 6483; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 6484; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 6485; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6486; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 6487; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 6488; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 6489; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 6490; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 6491; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v10, 16, v1 6492; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v14, 16, v2 6493; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v18, 16, v0 6494; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, 0xffff, v0 6495; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v12, 0xffff, v2 6496; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v1 6497; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v3 6498; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v3 6499; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, 0 6500; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 6501; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v22, 16, v5 6502; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v26, 16, v6 6503; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v24, 0xffff, v6 6504; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v6, 16, v4 6505; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v4 6506; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v30, 16, v7 6507; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v28, 0xffff, v7 6508; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v20, 0xffff, v5 6509; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, v21 6510; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v29, v21 6511; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v31, v21 6512; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, v21 6513; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v21 6514; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, v21 6515; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, v21 6516; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v21 6517; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, v21 6518; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v25, v21 6519; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v27, v21 6520; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, v21 6521; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, v21 6522; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, v21 6523; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, v21 6524; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 6525; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 6526; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:80 6527; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:112 6528; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 6529; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16 6530; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64 6531; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:96 6532; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32 6533; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 6534; GCN-NOHSA-SI-NEXT: s_endpgm 6535; 6536; GCN-HSA-LABEL: global_zextload_v16i16_to_v16i64: 6537; GCN-HSA: ; %bb.0: 6538; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 6539; GCN-HSA-NEXT: v_mov_b32_e32 v8, 0 6540; GCN-HSA-NEXT: v_mov_b32_e32 v12, v8 6541; GCN-HSA-NEXT: v_mov_b32_e32 v14, v8 6542; GCN-HSA-NEXT: v_mov_b32_e32 v15, v8 6543; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6544; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 6545; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 6546; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 6547; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 6548; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 6549; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6550; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6551; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 6552; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 6553; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6554; GCN-HSA-NEXT: s_add_u32 s4, s0, 16 6555; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6556; GCN-HSA-NEXT: v_mov_b32_e32 v22, s5 6557; GCN-HSA-NEXT: v_mov_b32_e32 v21, s4 6558; GCN-HSA-NEXT: s_add_u32 s4, s0, 0x50 6559; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6560; GCN-HSA-NEXT: v_mov_b32_e32 v17, v8 6561; GCN-HSA-NEXT: v_mov_b32_e32 v18, v8 6562; GCN-HSA-NEXT: v_mov_b32_e32 v20, v8 6563; GCN-HSA-NEXT: v_mov_b32_e32 v10, v8 6564; GCN-HSA-NEXT: v_mov_b32_e32 v23, v8 6565; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 6566; GCN-HSA-NEXT: v_lshrrev_b32_e32 v13, 16, v1 6567; GCN-HSA-NEXT: v_and_b32_e32 v11, 0xffff, v1 6568; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[11:14] 6569; GCN-HSA-NEXT: v_mov_b32_e32 v22, s5 6570; GCN-HSA-NEXT: v_mov_b32_e32 v21, s4 6571; GCN-HSA-NEXT: s_add_u32 s4, s0, 0x70 6572; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 6573; GCN-HSA-NEXT: v_lshrrev_b32_e32 v16, 16, v5 6574; GCN-HSA-NEXT: v_and_b32_e32 v14, 0xffff, v5 6575; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6576; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[14:17] 6577; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v7 6578; GCN-HSA-NEXT: v_mov_b32_e32 v15, s5 6579; GCN-HSA-NEXT: v_mov_b32_e32 v14, s4 6580; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v7 6581; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[17:20] 6582; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v3 6583; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3 6584; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2 6585; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 6586; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6587; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3 6588; GCN-HSA-NEXT: v_and_b32_e32 v7, 0xffff, v3 6589; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2 6590; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 6591; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[7:10] 6592; GCN-HSA-NEXT: v_mov_b32_e32 v19, s1 6593; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6594; GCN-HSA-NEXT: v_mov_b32_e32 v18, s0 6595; GCN-HSA-NEXT: s_add_u32 s0, s0, 0x60 6596; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 6597; GCN-HSA-NEXT: v_mov_b32_e32 v11, v8 6598; GCN-HSA-NEXT: v_mov_b32_e32 v13, v8 6599; GCN-HSA-NEXT: v_lshrrev_b32_e32 v16, 16, v4 6600; GCN-HSA-NEXT: v_and_b32_e32 v14, 0xffff, v4 6601; GCN-HSA-NEXT: v_mov_b32_e32 v4, s3 6602; GCN-HSA-NEXT: v_mov_b32_e32 v27, s1 6603; GCN-HSA-NEXT: v_lshrrev_b32_e32 v12, 16, v2 6604; GCN-HSA-NEXT: v_and_b32_e32 v10, 0xffff, v2 6605; GCN-HSA-NEXT: v_mov_b32_e32 v21, v8 6606; GCN-HSA-NEXT: v_mov_b32_e32 v15, v8 6607; GCN-HSA-NEXT: v_mov_b32_e32 v17, v8 6608; GCN-HSA-NEXT: v_mov_b32_e32 v7, v8 6609; GCN-HSA-NEXT: v_mov_b32_e32 v9, v8 6610; GCN-HSA-NEXT: v_lshrrev_b32_e32 v8, 16, v6 6611; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v6 6612; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2 6613; GCN-HSA-NEXT: v_mov_b32_e32 v26, s0 6614; GCN-HSA-NEXT: v_lshrrev_b32_e32 v22, 16, v0 6615; GCN-HSA-NEXT: v_and_b32_e32 v20, 0xffff, v0 6616; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[10:13] 6617; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[20:23] 6618; GCN-HSA-NEXT: flat_store_dwordx4 v[3:4], v[14:17] 6619; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[6:9] 6620; GCN-HSA-NEXT: s_endpgm 6621; 6622; GCN-NOHSA-VI-LABEL: global_zextload_v16i16_to_v16i64: 6623; GCN-NOHSA-VI: ; %bb.0: 6624; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 6625; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 6626; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 6627; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 6628; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 6629; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6630; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 6631; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 6632; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 6633; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 6634; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v29, 0 6635; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v31, v29 6636; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 6637; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 6638; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v25, v29 6639; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v27, v29 6640; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v21, v29 6641; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v23, v29 6642; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v17, v29 6643; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v19, v29 6644; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v13, v29 6645; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v15, v29 6646; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v9, v29 6647; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v11, v29 6648; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 6649; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v10, 16, v0 6650; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 6651; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v30, 16, v5 6652; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v28, 0xffff, v5 6653; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, 0xffff, v0 6654; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v14, 16, v1 6655; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v12, 0xffff, v1 6656; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v18, 16, v2 6657; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v16, 0xffff, v2 6658; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v3 6659; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v3 6660; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v22, 16, v6 6661; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v20, 0xffff, v6 6662; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v26, 16, v7 6663; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v24, 0xffff, v7 6664; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v6, 16, v4 6665; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, 0xffff, v4 6666; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, v29 6667; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, v29 6668; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, v29 6669; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v29 6670; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:80 6671; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64 6672; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:112 6673; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:96 6674; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 6675; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32 6676; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16 6677; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 6678; GCN-NOHSA-VI-NEXT: s_endpgm 6679; 6680; EG-LABEL: global_zextload_v16i16_to_v16i64: 6681; EG: ; %bb.0: 6682; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 6683; EG-NEXT: TEX 1 @12 6684; EG-NEXT: ALU 62, @17, KC0[CB0:0-32], KC1[] 6685; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T26.X, 0 6686; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T25.X, 0 6687; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T24.X, 0 6688; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T23.X, 0 6689; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T22.X, 0 6690; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T21.X, 0 6691; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T20.X, 0 6692; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T19.X, 1 6693; EG-NEXT: CF_END 6694; EG-NEXT: Fetch clause starting at 12: 6695; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 6696; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 6697; EG-NEXT: ALU clause starting at 16: 6698; EG-NEXT: MOV * T11.X, KC0[2].Z, 6699; EG-NEXT: ALU clause starting at 17: 6700; EG-NEXT: LSHR * T13.Z, T12.W, literal.x, 6701; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6702; EG-NEXT: AND_INT T13.X, T12.W, literal.x, 6703; EG-NEXT: MOV T13.Y, 0.0, 6704; EG-NEXT: LSHR T14.Z, T12.Z, literal.y, 6705; EG-NEXT: AND_INT * T14.X, T12.Z, literal.x, 6706; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6707; EG-NEXT: MOV T14.Y, 0.0, 6708; EG-NEXT: LSHR * T15.Z, T12.Y, literal.x, 6709; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6710; EG-NEXT: AND_INT T15.X, T12.Y, literal.x, 6711; EG-NEXT: MOV T15.Y, 0.0, 6712; EG-NEXT: LSHR T12.Z, T12.X, literal.y, 6713; EG-NEXT: AND_INT * T12.X, T12.X, literal.x, 6714; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6715; EG-NEXT: MOV T12.Y, 0.0, 6716; EG-NEXT: LSHR * T16.Z, T11.W, literal.x, 6717; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6718; EG-NEXT: AND_INT T16.X, T11.W, literal.x, 6719; EG-NEXT: MOV T16.Y, 0.0, 6720; EG-NEXT: LSHR T17.Z, T11.Z, literal.y, 6721; EG-NEXT: AND_INT * T17.X, T11.Z, literal.x, 6722; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6723; EG-NEXT: MOV T17.Y, 0.0, 6724; EG-NEXT: LSHR * T18.Z, T11.Y, literal.x, 6725; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6726; EG-NEXT: AND_INT T18.X, T11.Y, literal.x, 6727; EG-NEXT: MOV T18.Y, 0.0, 6728; EG-NEXT: LSHR T11.Z, T11.X, literal.y, 6729; EG-NEXT: AND_INT * T11.X, T11.X, literal.x, 6730; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6731; EG-NEXT: MOV T11.Y, 0.0, 6732; EG-NEXT: MOV T13.W, 0.0, 6733; EG-NEXT: MOV * T14.W, 0.0, 6734; EG-NEXT: MOV T15.W, 0.0, 6735; EG-NEXT: MOV * T12.W, 0.0, 6736; EG-NEXT: MOV T16.W, 0.0, 6737; EG-NEXT: MOV * T17.W, 0.0, 6738; EG-NEXT: MOV T18.W, 0.0, 6739; EG-NEXT: MOV * T11.W, 0.0, 6740; EG-NEXT: LSHR T19.X, KC0[2].Y, literal.x, 6741; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6742; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6743; EG-NEXT: LSHR T20.X, PV.W, literal.x, 6744; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6745; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6746; EG-NEXT: LSHR T21.X, PV.W, literal.x, 6747; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6748; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 6749; EG-NEXT: LSHR T22.X, PV.W, literal.x, 6750; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6751; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 6752; EG-NEXT: LSHR T23.X, PV.W, literal.x, 6753; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6754; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 6755; EG-NEXT: LSHR T24.X, PV.W, literal.x, 6756; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6757; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 6758; EG-NEXT: LSHR T25.X, PV.W, literal.x, 6759; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6760; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 6761; EG-NEXT: LSHR * T26.X, PV.W, literal.x, 6762; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6763; 6764; CM-LABEL: global_zextload_v16i16_to_v16i64: 6765; CM: ; %bb.0: 6766; CM-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 6767; CM-NEXT: TEX 1 @12 6768; CM-NEXT: ALU 64, @17, KC0[CB0:0-32], KC1[] 6769; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T13, T26.X 6770; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T14, T25.X 6771; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T15, T24.X 6772; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T23.X 6773; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T16, T22.X 6774; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T17, T21.X 6775; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T18, T20.X 6776; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T11, T19.X 6777; CM-NEXT: CF_END 6778; CM-NEXT: Fetch clause starting at 12: 6779; CM-NEXT: VTX_READ_128 T12.XYZW, T11.X, 0, #1 6780; CM-NEXT: VTX_READ_128 T11.XYZW, T11.X, 16, #1 6781; CM-NEXT: ALU clause starting at 16: 6782; CM-NEXT: MOV * T11.X, KC0[2].Z, 6783; CM-NEXT: ALU clause starting at 17: 6784; CM-NEXT: LSHR * T13.Z, T12.X, literal.x, 6785; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6786; CM-NEXT: AND_INT T13.X, T12.X, literal.x, 6787; CM-NEXT: MOV T13.Y, 0.0, 6788; CM-NEXT: LSHR * T14.Z, T12.Y, literal.y, 6789; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6790; CM-NEXT: AND_INT T14.X, T12.Y, literal.x, 6791; CM-NEXT: MOV T14.Y, 0.0, 6792; CM-NEXT: LSHR * T15.Z, T12.Z, literal.y, 6793; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6794; CM-NEXT: AND_INT T15.X, T12.Z, literal.x, 6795; CM-NEXT: MOV T15.Y, 0.0, 6796; CM-NEXT: LSHR * T12.Z, T12.W, literal.y, 6797; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6798; CM-NEXT: AND_INT T12.X, T12.W, literal.x, 6799; CM-NEXT: MOV T12.Y, 0.0, 6800; CM-NEXT: LSHR * T16.Z, T11.X, literal.y, 6801; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6802; CM-NEXT: AND_INT T16.X, T11.X, literal.x, 6803; CM-NEXT: MOV T16.Y, 0.0, 6804; CM-NEXT: LSHR * T17.Z, T11.Y, literal.y, 6805; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6806; CM-NEXT: AND_INT T17.X, T11.Y, literal.x, 6807; CM-NEXT: MOV T17.Y, 0.0, 6808; CM-NEXT: LSHR * T18.Z, T11.Z, literal.y, 6809; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6810; CM-NEXT: AND_INT T18.X, T11.Z, literal.x, 6811; CM-NEXT: MOV T18.Y, 0.0, 6812; CM-NEXT: LSHR * T11.Z, T11.W, literal.y, 6813; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6814; CM-NEXT: AND_INT T11.X, T11.W, literal.x, 6815; CM-NEXT: MOV T11.Y, 0.0, 6816; CM-NEXT: MOV * T13.W, 0.0, 6817; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 6818; CM-NEXT: MOV * T14.W, 0.0, 6819; CM-NEXT: MOV * T15.W, 0.0, 6820; CM-NEXT: MOV * T12.W, 0.0, 6821; CM-NEXT: MOV * T16.W, 0.0, 6822; CM-NEXT: MOV * T17.W, 0.0, 6823; CM-NEXT: MOV * T18.W, 0.0, 6824; CM-NEXT: MOV * T11.W, 0.0, 6825; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 6826; CM-NEXT: 112(1.569454e-43), 0(0.000000e+00) 6827; CM-NEXT: LSHR T19.X, PV.W, literal.x, 6828; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6829; CM-NEXT: 2(2.802597e-45), 96(1.345247e-43) 6830; CM-NEXT: LSHR T20.X, PV.W, literal.x, 6831; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6832; CM-NEXT: 2(2.802597e-45), 80(1.121039e-43) 6833; CM-NEXT: LSHR T21.X, PV.W, literal.x, 6834; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6835; CM-NEXT: 2(2.802597e-45), 64(8.968310e-44) 6836; CM-NEXT: LSHR T22.X, PV.W, literal.x, 6837; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6838; CM-NEXT: 2(2.802597e-45), 48(6.726233e-44) 6839; CM-NEXT: LSHR T23.X, PV.W, literal.x, 6840; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6841; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6842; CM-NEXT: LSHR T24.X, PV.W, literal.x, 6843; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6844; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6845; CM-NEXT: LSHR * T25.X, PV.W, literal.x, 6846; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6847; CM-NEXT: LSHR * T26.X, KC0[2].Y, literal.x, 6848; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6849 %load = load <16 x i16>, ptr addrspace(1) %in 6850 %ext = zext <16 x i16> %load to <16 x i64> 6851 store <16 x i64> %ext, ptr addrspace(1) %out 6852 ret void 6853} 6854 6855define amdgpu_kernel void @global_sextload_v16i16_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 6856; GCN-NOHSA-SI-LABEL: global_sextload_v16i16_to_v16i64: 6857; GCN-NOHSA-SI: ; %bb.0: 6858; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 6859; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 6860; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 6861; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 6862; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 6863; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6864; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 6865; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 6866; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 6867; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 6868; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 6869; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 6870; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 6871; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, v7 6872; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, v3 6873; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v11, 16, v4 6874; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v13, 16, v2 6875; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v14, 16, v0 6876; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v0, 0, 16 6877; GCN-NOHSA-SI-NEXT: v_bfe_i32 v12, v2, 0, 16 6878; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[18:19], v[0:1], 48 6879; GCN-NOHSA-SI-NEXT: v_bfe_i32 v16, v1, 0, 16 6880; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[2:3], v[2:3], 48 6881; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v10, 0, 16 6882; GCN-NOHSA-SI-NEXT: v_bfe_i32 v20, v4, 0, 16 6883; GCN-NOHSA-SI-NEXT: v_bfe_i32 v10, v14, 0, 16 6884; GCN-NOHSA-SI-NEXT: v_bfe_i32 v14, v13, 0, 16 6885; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v11, 0, 16 6886; GCN-NOHSA-SI-NEXT: v_bfe_i32 v23, v9, 0, 16 6887; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[25:26], v[6:7], 48 6888; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v24, 31, v23 6889; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:112 6890; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6891; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[26:27], v[4:5], 48 6892; GCN-NOHSA-SI-NEXT: v_bfe_i32 v24, v5, 0, 16 6893; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v6 6894; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v6, 0, 16 6895; GCN-NOHSA-SI-NEXT: v_bfe_i32 v6, v1, 0, 16 6896; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6897; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v13, 31, v12 6898; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v21, 31, v20 6899; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 31, v4 6900; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v17, 31, v16 6901; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 6902; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v25, 31, v24 6903; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 31, v10 6904; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v15, 31, v14 6905; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 31, v22 6906; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v7, 31, v6 6907; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:80 6908; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 6909; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16 6910; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:96 6911; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64 6912; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32 6913; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 6914; GCN-NOHSA-SI-NEXT: s_endpgm 6915; 6916; GCN-HSA-LABEL: global_sextload_v16i16_to_v16i64: 6917; GCN-HSA: ; %bb.0: 6918; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 6919; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6920; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 6921; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 6922; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[0:1] 6923; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 6924; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 6925; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 6926; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 6927; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 6928; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 6929; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6930; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3 6931; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2 6932; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 6933; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6934; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3 6935; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2 6936; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 6937; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6938; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3 6939; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2 6940; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 6941; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6942; GCN-HSA-NEXT: v_mov_b32_e32 v21, s3 6943; GCN-HSA-NEXT: v_mov_b32_e32 v20, s2 6944; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 6945; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6946; GCN-HSA-NEXT: v_mov_b32_e32 v23, s3 6947; GCN-HSA-NEXT: v_mov_b32_e32 v22, s2 6948; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 6949; GCN-HSA-NEXT: v_mov_b32_e32 v13, s1 6950; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6951; GCN-HSA-NEXT: v_mov_b32_e32 v12, s0 6952; GCN-HSA-NEXT: s_add_u32 s0, s0, 64 6953; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 6954; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3 6955; GCN-HSA-NEXT: v_mov_b32_e32 v27, s1 6956; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2 6957; GCN-HSA-NEXT: v_mov_b32_e32 v26, s0 6958; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 6959; GCN-HSA-NEXT: v_ashr_i64 v[10:11], v[4:5], 48 6960; GCN-HSA-NEXT: v_bfe_i32 v8, v5, 0, 16 6961; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6962; GCN-HSA-NEXT: v_mov_b32_e32 v5, v7 6963; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 6964; GCN-HSA-NEXT: v_lshrrev_b32_e32 v16, 16, v6 6965; GCN-HSA-NEXT: v_bfe_i32 v8, v5, 0, 16 6966; GCN-HSA-NEXT: v_ashr_i64 v[10:11], v[6:7], 48 6967; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6968; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v4 6969; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11] 6970; GCN-HSA-NEXT: v_bfe_i32 v7, v6, 0, 16 6971; GCN-HSA-NEXT: v_bfe_i32 v9, v16, 0, 16 6972; GCN-HSA-NEXT: v_bfe_i32 v4, v4, 0, 16 6973; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 31, v7 6974; GCN-HSA-NEXT: v_bfe_i32 v6, v17, 0, 16 6975; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 31, v9 6976; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4 6977; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[7:10] 6978; GCN-HSA-NEXT: s_waitcnt vmcnt(3) 6979; GCN-HSA-NEXT: v_mov_b32_e32 v11, v3 6980; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v6 6981; GCN-HSA-NEXT: v_lshrrev_b32_e32 v16, 16, v2 6982; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v0 6983; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[4:7] 6984; GCN-HSA-NEXT: v_ashr_i64 v[14:15], v[0:1], 48 6985; GCN-HSA-NEXT: v_bfe_i32 v12, v1, 0, 16 6986; GCN-HSA-NEXT: v_bfe_i32 v8, v0, 0, 16 6987; GCN-HSA-NEXT: v_bfe_i32 v4, v2, 0, 16 6988; GCN-HSA-NEXT: v_ashr_i64 v[2:3], v[2:3], 48 6989; GCN-HSA-NEXT: v_bfe_i32 v10, v17, 0, 16 6990; GCN-HSA-NEXT: v_bfe_i32 v6, v16, 0, 16 6991; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 31, v12 6992; GCN-HSA-NEXT: v_bfe_i32 v0, v11, 0, 16 6993; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6994; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4 6995; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v10 6996; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v6 6997; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 6998; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[12:15] 6999; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[0:3] 7000; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[4:7] 7001; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11] 7002; GCN-HSA-NEXT: s_endpgm 7003; 7004; GCN-NOHSA-VI-LABEL: global_sextload_v16i16_to_v16i64: 7005; GCN-NOHSA-VI: ; %bb.0: 7006; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 7007; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 7008; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 7009; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 7010; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 7011; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 7012; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 7013; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 7014; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 7015; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 7016; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 7017; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 7018; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 7019; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v0, 0, 16 7020; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v0 7021; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v1, 0, 16 7022; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7023; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v2, 0, 16 7024; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v2 7025; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v1, 0, 16 7026; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 7027; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v5 7028; GCN-NOHSA-VI-NEXT: v_bfe_i32 v17, v1, 0, 16 7029; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v4 7030; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v13, v3 7031; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 7032; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v20, 16, v6 7033; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v22, v7 7034; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v7 7035; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v5, 0, 16 7036; GCN-NOHSA-VI-NEXT: v_bfe_i32 v27, v6, 0, 16 7037; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v4, 0, 16 7038; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v1, 0, 16 7039; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v9, 0, 16 7040; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v11, 0, 16 7041; GCN-NOHSA-VI-NEXT: v_bfe_i32 v19, v13, 0, 16 7042; GCN-NOHSA-VI-NEXT: v_bfe_i32 v21, v3, 0, 16 7043; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 7044; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v18, 31, v17 7045; GCN-NOHSA-VI-NEXT: v_bfe_i32 v23, v22, 0, 16 7046; GCN-NOHSA-VI-NEXT: v_bfe_i32 v25, v7, 0, 16 7047; GCN-NOHSA-VI-NEXT: v_bfe_i32 v29, v20, 0, 16 7048; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 31, v4 7049; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6 7050; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:80 7051; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 31, v8 7052; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 7053; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v13, 31, v12 7054; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v28, 31, v27 7055; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 31, v10 7056; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 7057; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 31, v14 7058; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v20, 31, v19 7059; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v22, 31, v21 7060; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v30, 31, v29 7061; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v24, 31, v23 7062; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v26, 31, v25 7063; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64 7064; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:112 7065; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:96 7066; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:48 7067; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32 7068; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 7069; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 7070; GCN-NOHSA-VI-NEXT: s_endpgm 7071; 7072; EG-LABEL: global_sextload_v16i16_to_v16i64: 7073; EG: ; %bb.0: 7074; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 7075; EG-NEXT: TEX 1 @12 7076; EG-NEXT: ALU 65, @17, KC0[CB0:0-32], KC1[] 7077; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T12.X, 0 7078; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T20.X, 0 7079; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T18.X, 0 7080; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T17.X, 0 7081; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T16.X, 0 7082; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T15.X, 0 7083; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T14.X, 0 7084; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T13.X, 1 7085; EG-NEXT: CF_END 7086; EG-NEXT: Fetch clause starting at 12: 7087; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 7088; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 7089; EG-NEXT: ALU clause starting at 16: 7090; EG-NEXT: MOV * T11.X, KC0[2].Z, 7091; EG-NEXT: ALU clause starting at 17: 7092; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x, 7093; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7094; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7095; EG-NEXT: LSHR T14.X, PV.W, literal.x, 7096; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7097; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 7098; EG-NEXT: LSHR T15.X, PV.W, literal.x, 7099; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7100; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 7101; EG-NEXT: LSHR T16.X, PV.W, literal.x, 7102; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7103; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 7104; EG-NEXT: LSHR T17.X, PV.W, literal.x, 7105; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7106; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 7107; EG-NEXT: LSHR T18.X, PV.W, literal.x, 7108; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, 7109; EG-NEXT: ASHR * T19.W, T11.X, literal.z, 7110; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 7111; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7112; EG-NEXT: LSHR T20.X, PV.W, literal.x, 7113; EG-NEXT: ASHR T19.Z, T11.X, literal.y, 7114; EG-NEXT: ASHR * T21.W, T11.Y, literal.z, 7115; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7116; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7117; EG-NEXT: BFE_INT T19.X, T11.X, 0.0, literal.x, 7118; EG-NEXT: ASHR T21.Z, T11.Y, literal.x, 7119; EG-NEXT: ASHR * T22.W, T11.Z, literal.y, 7120; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7121; EG-NEXT: BFE_INT T21.X, T11.Y, 0.0, literal.x, 7122; EG-NEXT: ASHR T19.Y, PV.X, literal.y, 7123; EG-NEXT: ASHR T22.Z, T11.Z, literal.x, 7124; EG-NEXT: ASHR * T23.W, T11.W, literal.y, 7125; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7126; EG-NEXT: BFE_INT T22.X, T11.Z, 0.0, literal.x, 7127; EG-NEXT: ASHR T21.Y, PV.X, literal.y, 7128; EG-NEXT: ASHR T23.Z, T11.W, literal.x, 7129; EG-NEXT: ASHR * T24.W, T12.X, literal.y, 7130; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7131; EG-NEXT: BFE_INT T23.X, T11.W, 0.0, literal.x, 7132; EG-NEXT: ASHR T22.Y, PV.X, literal.y, 7133; EG-NEXT: ASHR T24.Z, T12.X, literal.x, 7134; EG-NEXT: ASHR * T11.W, T12.Y, literal.y, 7135; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7136; EG-NEXT: BFE_INT T24.X, T12.X, 0.0, literal.x, 7137; EG-NEXT: ASHR T23.Y, PV.X, literal.y, 7138; EG-NEXT: ASHR T11.Z, T12.Y, literal.x, 7139; EG-NEXT: ASHR * T25.W, T12.Z, literal.y, 7140; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7141; EG-NEXT: BFE_INT T11.X, T12.Y, 0.0, literal.x, 7142; EG-NEXT: ASHR T24.Y, PV.X, literal.y, 7143; EG-NEXT: ASHR T25.Z, T12.Z, literal.x, 7144; EG-NEXT: ASHR * T26.W, T12.W, literal.y, 7145; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7146; EG-NEXT: BFE_INT T25.X, T12.Z, 0.0, literal.x, 7147; EG-NEXT: ASHR T11.Y, PV.X, literal.y, 7148; EG-NEXT: ASHR * T26.Z, T12.W, literal.x, 7149; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7150; EG-NEXT: BFE_INT T26.X, T12.W, 0.0, literal.x, 7151; EG-NEXT: ASHR T25.Y, PV.X, literal.y, 7152; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 7153; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7154; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 7155; EG-NEXT: LSHR T12.X, PV.W, literal.x, 7156; EG-NEXT: ASHR * T26.Y, PV.X, literal.y, 7157; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 7158; 7159; CM-LABEL: global_sextload_v16i16_to_v16i64: 7160; CM: ; %bb.0: 7161; CM-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 7162; CM-NEXT: TEX 1 @12 7163; CM-NEXT: ALU 65, @17, KC0[CB0:0-32], KC1[] 7164; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T26.X 7165; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T25, T20.X 7166; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T24, T18.X 7167; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T23, T17.X 7168; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T11, T16.X 7169; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T22, T15.X 7170; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T21, T14.X 7171; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T19, T13.X 7172; CM-NEXT: CF_END 7173; CM-NEXT: Fetch clause starting at 12: 7174; CM-NEXT: VTX_READ_128 T12.XYZW, T11.X, 0, #1 7175; CM-NEXT: VTX_READ_128 T11.XYZW, T11.X, 16, #1 7176; CM-NEXT: ALU clause starting at 16: 7177; CM-NEXT: MOV * T11.X, KC0[2].Z, 7178; CM-NEXT: ALU clause starting at 17: 7179; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 7180; CM-NEXT: 112(1.569454e-43), 0(0.000000e+00) 7181; CM-NEXT: LSHR T13.X, PV.W, literal.x, 7182; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7183; CM-NEXT: 2(2.802597e-45), 96(1.345247e-43) 7184; CM-NEXT: LSHR T14.X, PV.W, literal.x, 7185; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7186; CM-NEXT: 2(2.802597e-45), 80(1.121039e-43) 7187; CM-NEXT: LSHR T15.X, PV.W, literal.x, 7188; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7189; CM-NEXT: 2(2.802597e-45), 64(8.968310e-44) 7190; CM-NEXT: LSHR T16.X, PV.W, literal.x, 7191; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7192; CM-NEXT: 2(2.802597e-45), 48(6.726233e-44) 7193; CM-NEXT: LSHR T17.X, PV.W, literal.x, 7194; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7195; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44) 7196; CM-NEXT: LSHR T18.X, PV.W, literal.x, 7197; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y, 7198; CM-NEXT: ASHR * T19.W, T11.W, literal.z, 7199; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7200; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7201; CM-NEXT: LSHR T20.X, PV.Z, literal.x, 7202; CM-NEXT: ASHR T19.Z, T11.W, literal.y, 7203; CM-NEXT: ASHR * T21.W, T11.Z, literal.z, 7204; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7205; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7206; CM-NEXT: BFE_INT T19.X, T11.W, 0.0, literal.x, 7207; CM-NEXT: ASHR T21.Z, T11.Z, literal.x, 7208; CM-NEXT: ASHR * T22.W, T11.Y, literal.y, 7209; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7210; CM-NEXT: BFE_INT T21.X, T11.Z, 0.0, literal.x, 7211; CM-NEXT: ASHR T19.Y, PV.X, literal.y, 7212; CM-NEXT: ASHR T22.Z, T11.Y, literal.x, 7213; CM-NEXT: ASHR * T11.W, T11.X, literal.y, 7214; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7215; CM-NEXT: BFE_INT T22.X, T11.Y, 0.0, literal.x, 7216; CM-NEXT: ASHR T21.Y, PV.X, literal.y, 7217; CM-NEXT: ASHR T11.Z, T11.X, literal.x, 7218; CM-NEXT: ASHR * T23.W, T12.W, literal.y, 7219; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7220; CM-NEXT: BFE_INT T11.X, T11.X, 0.0, literal.x, 7221; CM-NEXT: ASHR T22.Y, PV.X, literal.y, 7222; CM-NEXT: ASHR T23.Z, T12.W, literal.x, 7223; CM-NEXT: ASHR * T24.W, T12.Z, literal.y, 7224; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7225; CM-NEXT: BFE_INT T23.X, T12.W, 0.0, literal.x, 7226; CM-NEXT: ASHR T11.Y, PV.X, literal.y, 7227; CM-NEXT: ASHR T24.Z, T12.Z, literal.x, 7228; CM-NEXT: ASHR * T25.W, T12.Y, literal.y, 7229; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7230; CM-NEXT: BFE_INT T24.X, T12.Z, 0.0, literal.x, 7231; CM-NEXT: ASHR T23.Y, PV.X, literal.y, 7232; CM-NEXT: ASHR T25.Z, T12.Y, literal.x, 7233; CM-NEXT: ASHR * T12.W, T12.X, literal.y, 7234; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7235; CM-NEXT: BFE_INT T25.X, T12.Y, 0.0, literal.x, 7236; CM-NEXT: ASHR T24.Y, PV.X, literal.y, 7237; CM-NEXT: ASHR * T12.Z, T12.X, literal.x, 7238; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7239; CM-NEXT: BFE_INT T12.X, T12.X, 0.0, literal.x, 7240; CM-NEXT: ASHR * T25.Y, PV.X, literal.y, 7241; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7242; CM-NEXT: LSHR T26.X, KC0[2].Y, literal.x, 7243; CM-NEXT: ASHR * T12.Y, PV.X, literal.y, 7244; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 7245 %load = load <16 x i16>, ptr addrspace(1) %in 7246 %ext = sext <16 x i16> %load to <16 x i64> 7247 store <16 x i64> %ext, ptr addrspace(1) %out 7248 ret void 7249} 7250 7251define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 7252; GCN-NOHSA-SI-LABEL: global_zextload_v32i16_to_v32i64: 7253; GCN-NOHSA-SI: ; %bb.0: 7254; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 7255; GCN-NOHSA-SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 7256; GCN-NOHSA-SI-NEXT: s_mov_b32 s14, -1 7257; GCN-NOHSA-SI-NEXT: s_mov_b32 s15, 0xe8f000 7258; GCN-NOHSA-SI-NEXT: s_add_u32 s12, s12, s11 7259; GCN-NOHSA-SI-NEXT: s_addc_u32 s13, s13, 0 7260; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 7261; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 7262; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 7263; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v39, 0 7264; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 7265; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 7266; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 7267; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 7268; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 7269; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[14:17], off, s[8:11], 0 7270; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[18:21], off, s[8:11], 0 offset:16 7271; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[22:25], off, s[8:11], 0 offset:32 7272; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[26:29], off, s[8:11], 0 offset:48 7273; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(3) 7274; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v32, 16, v15 7275; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v36, 16, v17 7276; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(2) 7277; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v20 7278; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v16 7279; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v14 7280; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v14 7281; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill 7282; GCN-NOHSA-SI-NEXT: buffer_store_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill 7283; GCN-NOHSA-SI-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill 7284; GCN-NOHSA-SI-NEXT: buffer_store_dword v3, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill 7285; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v12, 0xffff, v16 7286; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, v3 7287; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v30, 0xffff, v15 7288; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v34, 0xffff, v17 7289; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v10, 16, v18 7290; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v18 7291; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v20 7292; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, v5 7293; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v18, 16, v19 7294; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, 0xffff, v19 7295; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v44, 16, v21 7296; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v42, 0xffff, v21 7297; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(5) 7298; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v48, 16, v22 7299; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v46, 0xffff, v22 7300; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v22, 16, v24 7301; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v20, 0xffff, v24 7302; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v52, 16, v23 7303; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v50, 0xffff, v23 7304; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v56, 16, v25 7305; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v54, 0xffff, v25 7306; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(4) 7307; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v40, 16, v29 7308; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v60, 16, v26 7309; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v58, 0xffff, v26 7310; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v26, 16, v28 7311; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v24, 0xffff, v28 7312; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(1) 7313; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v27 7314; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v27 7315; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v38, 0xffff, v29 7316; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v41, v39 7317; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, v39 7318; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7319; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v39 7320; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v55, v39 7321; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v57, v39 7322; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v51, v39 7323; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v53, v39 7324; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v43, v39 7325; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v45, v39 7326; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, v39 7327; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, v39 7328; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v35, v39 7329; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v37, v39 7330; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v31, v39 7331; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v33, v39 7332; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v25, v39 7333; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v27, v39 7334; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v59, v39 7335; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v61, v39 7336; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, v39 7337; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, v39 7338; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v47, v39 7339; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v49, v39 7340; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v39 7341; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, v39 7342; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, v39 7343; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, v39 7344; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, v39 7345; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, v39 7346; GCN-NOHSA-SI-NEXT: buffer_store_dword v12, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill 7347; GCN-NOHSA-SI-NEXT: buffer_store_dword v13, off, s[12:15], 0 offset:20 ; 4-byte Folded Spill 7348; GCN-NOHSA-SI-NEXT: buffer_store_dword v14, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill 7349; GCN-NOHSA-SI-NEXT: buffer_store_dword v15, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill 7350; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(3) 7351; GCN-NOHSA-SI-NEXT: buffer_load_dword v12, off, s[12:15], 0 ; 4-byte Folded Reload 7352; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(2) 7353; GCN-NOHSA-SI-NEXT: buffer_load_dword v13, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload 7354; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(1) 7355; GCN-NOHSA-SI-NEXT: buffer_load_dword v14, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload 7356; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7357; GCN-NOHSA-SI-NEXT: buffer_load_dword v15, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload 7358; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(2) 7359; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, v39 7360; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 7361; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, v39 7362; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 7363; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 7364; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[38:41], off, s[0:3], 0 offset:240 7365; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208 7366; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[54:57], off, s[0:3], 0 offset:176 7367; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[50:53], off, s[0:3], 0 offset:144 7368; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[42:45], off, s[0:3], 0 offset:112 7369; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:80 7370; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[34:37], off, s[0:3], 0 offset:48 7371; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[30:33], off, s[0:3], 0 offset:16 7372; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:224 7373; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[58:61], off, s[0:3], 0 offset:192 7374; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:160 7375; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[46:49], off, s[0:3], 0 offset:128 7376; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:96 7377; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:64 7378; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload 7379; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload 7380; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload 7381; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload 7382; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 7383; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 7384; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 7385; GCN-NOHSA-SI-NEXT: s_endpgm 7386; 7387; GCN-HSA-LABEL: global_zextload_v32i16_to_v32i64: 7388; GCN-HSA: ; %bb.0: 7389; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 7390; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 7391; GCN-HSA-NEXT: s_add_u32 s4, s2, 16 7392; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 7393; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 7394; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 7395; GCN-HSA-NEXT: flat_load_dwordx4 v[2:5], v[0:1] 7396; GCN-HSA-NEXT: s_add_u32 s4, s2, 32 7397; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 7398; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 7399; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 7400; GCN-HSA-NEXT: flat_load_dwordx4 v[6:9], v[0:1] 7401; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 7402; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 7403; GCN-HSA-NEXT: s_add_u32 s2, s2, 48 7404; GCN-HSA-NEXT: flat_load_dwordx4 v[10:13], v[0:1] 7405; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 7406; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 7407; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 7408; GCN-HSA-NEXT: flat_load_dwordx4 v[14:17], v[0:1] 7409; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 7410; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 7411; GCN-HSA-NEXT: s_add_u32 s4, s0, 16 7412; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 7413; GCN-HSA-NEXT: s_add_u32 s6, s0, 0xf0 7414; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0 7415; GCN-HSA-NEXT: s_add_u32 s8, s0, 0xd0 7416; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0 7417; GCN-HSA-NEXT: s_add_u32 s10, s0, 0xb0 7418; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0 7419; GCN-HSA-NEXT: s_add_u32 s12, s0, 0x90 7420; GCN-HSA-NEXT: s_addc_u32 s13, s1, 0 7421; GCN-HSA-NEXT: s_add_u32 s14, s0, 0x70 7422; GCN-HSA-NEXT: s_addc_u32 s15, s1, 0 7423; GCN-HSA-NEXT: v_mov_b32_e32 v23, s15 7424; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 7425; GCN-HSA-NEXT: v_mov_b32_e32 v22, s14 7426; GCN-HSA-NEXT: s_add_u32 s14, s0, 0x50 7427; GCN-HSA-NEXT: v_mov_b32_e32 v19, v1 7428; GCN-HSA-NEXT: v_mov_b32_e32 v21, v1 7429; GCN-HSA-NEXT: s_addc_u32 s15, s1, 0 7430; GCN-HSA-NEXT: v_mov_b32_e32 v24, v1 7431; GCN-HSA-NEXT: s_waitcnt vmcnt(3) 7432; GCN-HSA-NEXT: v_lshrrev_b32_e32 v20, 16, v5 7433; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v5 7434; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[18:21] 7435; GCN-HSA-NEXT: v_mov_b32_e32 v23, s15 7436; GCN-HSA-NEXT: v_mov_b32_e32 v22, s14 7437; GCN-HSA-NEXT: v_lshrrev_b32_e32 v20, 16, v3 7438; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v3 7439; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[18:21] 7440; GCN-HSA-NEXT: v_mov_b32_e32 v23, s11 7441; GCN-HSA-NEXT: v_mov_b32_e32 v22, s10 7442; GCN-HSA-NEXT: s_waitcnt vmcnt(4) 7443; GCN-HSA-NEXT: v_lshrrev_b32_e32 v20, 16, v9 7444; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v9 7445; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[18:21] 7446; GCN-HSA-NEXT: v_mov_b32_e32 v23, s13 7447; GCN-HSA-NEXT: v_mov_b32_e32 v22, s12 7448; GCN-HSA-NEXT: v_lshrrev_b32_e32 v20, 16, v7 7449; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v7 7450; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[18:21] 7451; GCN-HSA-NEXT: v_mov_b32_e32 v23, s5 7452; GCN-HSA-NEXT: v_mov_b32_e32 v22, s4 7453; GCN-HSA-NEXT: s_waitcnt vmcnt(5) 7454; GCN-HSA-NEXT: v_lshrrev_b32_e32 v20, 16, v11 7455; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v11 7456; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[18:21] 7457; GCN-HSA-NEXT: v_mov_b32_e32 v22, s7 7458; GCN-HSA-NEXT: v_mov_b32_e32 v18, v1 7459; GCN-HSA-NEXT: v_mov_b32_e32 v20, v1 7460; GCN-HSA-NEXT: v_mov_b32_e32 v21, s6 7461; GCN-HSA-NEXT: s_waitcnt vmcnt(5) 7462; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v17 7463; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v17 7464; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[17:20] 7465; GCN-HSA-NEXT: v_mov_b32_e32 v22, s9 7466; GCN-HSA-NEXT: s_add_u32 s4, s0, 32 7467; GCN-HSA-NEXT: v_mov_b32_e32 v21, s8 7468; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v15 7469; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v15 7470; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 7471; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[17:20] 7472; GCN-HSA-NEXT: v_mov_b32_e32 v22, s5 7473; GCN-HSA-NEXT: v_mov_b32_e32 v21, s4 7474; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v12 7475; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v12 7476; GCN-HSA-NEXT: v_mov_b32_e32 v12, s1 7477; GCN-HSA-NEXT: s_add_u32 s4, s0, 0xe0 7478; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[17:20] 7479; GCN-HSA-NEXT: v_mov_b32_e32 v11, s0 7480; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v10 7481; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v10 7482; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 7483; GCN-HSA-NEXT: flat_store_dwordx4 v[11:12], v[17:20] 7484; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v16 7485; GCN-HSA-NEXT: v_and_b32_e32 v9, 0xffff, v16 7486; GCN-HSA-NEXT: v_mov_b32_e32 v16, s5 7487; GCN-HSA-NEXT: v_mov_b32_e32 v10, v1 7488; GCN-HSA-NEXT: v_mov_b32_e32 v12, v1 7489; GCN-HSA-NEXT: v_mov_b32_e32 v15, s4 7490; GCN-HSA-NEXT: flat_store_dwordx4 v[15:16], v[9:12] 7491; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v13 7492; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v2 7493; GCN-HSA-NEXT: v_and_b32_e32 v9, 0xffff, v2 7494; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v13 7495; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 7496; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 7497; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 7498; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0 7499; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[0:3] 7500; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 7501; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 7502; GCN-HSA-NEXT: v_lshrrev_b32_e32 v23, 16, v14 7503; GCN-HSA-NEXT: v_and_b32_e32 v21, 0xffff, v14 7504; GCN-HSA-NEXT: v_mov_b32_e32 v22, v1 7505; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 7506; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0 7507; GCN-HSA-NEXT: flat_store_dwordx4 v[2:3], v[21:24] 7508; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 7509; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 7510; GCN-HSA-NEXT: v_lshrrev_b32_e32 v20, 16, v8 7511; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v8 7512; GCN-HSA-NEXT: v_mov_b32_e32 v19, v1 7513; GCN-HSA-NEXT: v_mov_b32_e32 v21, v1 7514; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 7515; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80 7516; GCN-HSA-NEXT: flat_store_dwordx4 v[2:3], v[18:21] 7517; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 7518; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 7519; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 7520; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 7521; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 7522; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v6 7523; GCN-HSA-NEXT: v_and_b32_e32 v15, 0xffff, v6 7524; GCN-HSA-NEXT: v_mov_b32_e32 v16, v1 7525; GCN-HSA-NEXT: v_mov_b32_e32 v18, v1 7526; GCN-HSA-NEXT: s_add_u32 s0, s0, 64 7527; GCN-HSA-NEXT: flat_store_dwordx4 v[2:3], v[15:18] 7528; GCN-HSA-NEXT: v_mov_b32_e32 v6, v1 7529; GCN-HSA-NEXT: v_mov_b32_e32 v8, v1 7530; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 7531; GCN-HSA-NEXT: v_mov_b32_e32 v12, v1 7532; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 7533; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 7534; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v4 7535; GCN-HSA-NEXT: v_and_b32_e32 v5, 0xffff, v4 7536; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 7537; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 7538; GCN-HSA-NEXT: flat_store_dwordx4 v[2:3], v[5:8] 7539; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[9:12] 7540; GCN-HSA-NEXT: s_endpgm 7541; 7542; GCN-NOHSA-VI-LABEL: global_zextload_v32i16_to_v32i64: 7543; GCN-NOHSA-VI: ; %bb.0: 7544; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 7545; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 7546; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 7547; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 7548; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 7549; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 7550; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 7551; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 7552; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[7:10], off, s[8:11], 0 7553; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[13:16], off, s[8:11], 0 offset:16 7554; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 7555; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 7556; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 7557; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v6, 0xffff, v10 7558; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 7559; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v19, 16, v14 7560; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v17, 0xffff, v14 7561; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v23, 16, v13 7562; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v21, 0xffff, v13 7563; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v27, 16, v16 7564; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v25, 0xffff, v16 7565; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v30, 16, v15 7566; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v28, 0xffff, v15 7567; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[13:16], off, s[8:11], 0 offset:32 7568; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[31:34], off, s[8:11], 0 offset:48 7569; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v8 7570; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v8 7571; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v7 7572; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v3, 0xffff, v7 7573; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v8, 16, v10 7574; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v12, 16, v9 7575; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v10, 0xffff, v9 7576; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 7577; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v40, 16, v13 7578; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v38, 0xffff, v13 7579; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v46, 16, v15 7580; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v44, 0xffff, v15 7581; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 7582; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v32 7583; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v13, 0xffff, v32 7584; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v52, 16, v34 7585; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v50, 0xffff, v34 7586; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v34, 16, v33 7587; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v32, 0xffff, v33 7588; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v33, 0 7589; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v51, v33 7590; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v53, v33 7591; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v49, 16, v31 7592; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v47, 0xffff, v31 7593; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[50:53], off, s[0:3], 0 offset:240 7594; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v48, v33 7595; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v50, v33 7596; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[47:50], off, s[0:3], 0 offset:192 7597; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v45, v33 7598; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v47, v33 7599; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v43, 16, v16 7600; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v41, 0xffff, v16 7601; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:160 7602; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v42, v33 7603; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v44, v33 7604; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[41:44], off, s[0:3], 0 offset:176 7605; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v39, v33 7606; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v41, v33 7607; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v29, v33 7608; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v31, v33 7609; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v37, 16, v14 7610; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v35, 0xffff, v14 7611; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v14, v33 7612; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v16, v33 7613; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[38:41], off, s[0:3], 0 offset:128 7614; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v36, v33 7615; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v38, v33 7616; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:96 7617; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v26, v33 7618; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v28, v33 7619; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:208 7620; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[35:38], off, s[0:3], 0 offset:144 7621; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v22, v33 7622; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v35, v33 7623; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v24, v33 7624; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v18, v33 7625; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v20, v33 7626; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v11, v33 7627; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v13, v33 7628; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, v33 7629; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v9, v33 7630; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, v33 7631; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[25:28], off, s[0:3], 0 offset:112 7632; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:224 7633; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:64 7634; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:80 7635; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:32 7636; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:48 7637; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, v33 7638; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, v33 7639; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[3:6], off, s[0:3], 0 7640; GCN-NOHSA-VI-NEXT: s_nop 0 7641; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v33 7642; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 7643; GCN-NOHSA-VI-NEXT: s_endpgm 7644; 7645; EG-LABEL: global_zextload_v32i16_to_v32i64: 7646; EG: ; %bb.0: 7647; EG-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[] 7648; EG-NEXT: TEX 2 @22 7649; EG-NEXT: ALU 33, @31, KC0[], KC1[] 7650; EG-NEXT: TEX 0 @28 7651; EG-NEXT: ALU 93, @65, KC0[CB0:0-32], KC1[] 7652; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T50.X, 0 7653; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T49.X, 0 7654; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T48.X, 0 7655; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T47.X, 0 7656; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T46.X, 0 7657; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T45.X, 0 7658; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T44.X, 0 7659; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T43.X, 0 7660; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T42.X, 0 7661; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T41.X, 0 7662; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T40.X, 0 7663; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T39.X, 0 7664; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T38.X, 0 7665; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T37.X, 0 7666; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T36.X, 0 7667; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T35.X, 1 7668; EG-NEXT: CF_END 7669; EG-NEXT: Fetch clause starting at 22: 7670; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 48, #1 7671; EG-NEXT: VTX_READ_128 T21.XYZW, T19.X, 16, #1 7672; EG-NEXT: VTX_READ_128 T22.XYZW, T19.X, 32, #1 7673; EG-NEXT: Fetch clause starting at 28: 7674; EG-NEXT: VTX_READ_128 T29.XYZW, T19.X, 0, #1 7675; EG-NEXT: ALU clause starting at 30: 7676; EG-NEXT: MOV * T19.X, KC0[2].Z, 7677; EG-NEXT: ALU clause starting at 31: 7678; EG-NEXT: LSHR * T23.Z, T20.Z, literal.x, 7679; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7680; EG-NEXT: AND_INT T23.X, T20.Z, literal.x, 7681; EG-NEXT: MOV T23.Y, 0.0, 7682; EG-NEXT: LSHR T24.Z, T20.W, literal.y, 7683; EG-NEXT: AND_INT * T24.X, T20.W, literal.x, 7684; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7685; EG-NEXT: MOV T24.Y, 0.0, 7686; EG-NEXT: LSHR * T25.Z, T20.X, literal.x, 7687; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7688; EG-NEXT: AND_INT T25.X, T20.X, literal.x, 7689; EG-NEXT: MOV T25.Y, 0.0, 7690; EG-NEXT: LSHR T20.Z, T20.Y, literal.y, 7691; EG-NEXT: AND_INT * T20.X, T20.Y, literal.x, 7692; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7693; EG-NEXT: MOV T20.Y, 0.0, 7694; EG-NEXT: LSHR * T26.Z, T22.Z, literal.x, 7695; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7696; EG-NEXT: AND_INT T26.X, T22.Z, literal.x, 7697; EG-NEXT: MOV T26.Y, 0.0, 7698; EG-NEXT: LSHR T27.Z, T22.W, literal.y, 7699; EG-NEXT: AND_INT * T27.X, T22.W, literal.x, 7700; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7701; EG-NEXT: MOV T27.Y, 0.0, 7702; EG-NEXT: LSHR * T28.Z, T22.X, literal.x, 7703; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7704; EG-NEXT: AND_INT T28.X, T22.X, literal.x, 7705; EG-NEXT: MOV T28.Y, 0.0, 7706; EG-NEXT: LSHR T22.Z, T22.Y, literal.y, 7707; EG-NEXT: AND_INT * T22.X, T22.Y, literal.x, 7708; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7709; EG-NEXT: MOV T22.Y, 0.0, 7710; EG-NEXT: LSHR * T19.Z, T21.Z, literal.x, 7711; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7712; EG-NEXT: ALU clause starting at 65: 7713; EG-NEXT: AND_INT T19.X, T21.Z, literal.x, 7714; EG-NEXT: MOV T19.Y, 0.0, 7715; EG-NEXT: LSHR T30.Z, T21.W, literal.y, 7716; EG-NEXT: AND_INT * T30.X, T21.W, literal.x, 7717; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7718; EG-NEXT: MOV T30.Y, 0.0, 7719; EG-NEXT: LSHR * T31.Z, T21.X, literal.x, 7720; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7721; EG-NEXT: AND_INT T31.X, T21.X, literal.x, 7722; EG-NEXT: MOV T31.Y, 0.0, 7723; EG-NEXT: LSHR T21.Z, T21.Y, literal.y, 7724; EG-NEXT: AND_INT * T21.X, T21.Y, literal.x, 7725; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7726; EG-NEXT: MOV T21.Y, 0.0, 7727; EG-NEXT: LSHR * T32.Z, T29.Z, literal.x, 7728; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7729; EG-NEXT: AND_INT T32.X, T29.Z, literal.x, 7730; EG-NEXT: MOV T32.Y, 0.0, 7731; EG-NEXT: LSHR T33.Z, T29.W, literal.y, 7732; EG-NEXT: AND_INT * T33.X, T29.W, literal.x, 7733; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7734; EG-NEXT: MOV T33.Y, 0.0, 7735; EG-NEXT: LSHR * T34.Z, T29.X, literal.x, 7736; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7737; EG-NEXT: AND_INT T34.X, T29.X, literal.x, 7738; EG-NEXT: MOV T34.Y, 0.0, 7739; EG-NEXT: LSHR T29.Z, T29.Y, literal.y, 7740; EG-NEXT: AND_INT * T29.X, T29.Y, literal.x, 7741; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7742; EG-NEXT: MOV T29.Y, 0.0, 7743; EG-NEXT: MOV T23.W, 0.0, 7744; EG-NEXT: MOV * T24.W, 0.0, 7745; EG-NEXT: MOV T25.W, 0.0, 7746; EG-NEXT: MOV * T20.W, 0.0, 7747; EG-NEXT: MOV T26.W, 0.0, 7748; EG-NEXT: MOV * T27.W, 0.0, 7749; EG-NEXT: MOV T28.W, 0.0, 7750; EG-NEXT: MOV * T22.W, 0.0, 7751; EG-NEXT: MOV T19.W, 0.0, 7752; EG-NEXT: MOV * T30.W, 0.0, 7753; EG-NEXT: MOV T31.W, 0.0, 7754; EG-NEXT: MOV * T21.W, 0.0, 7755; EG-NEXT: MOV T32.W, 0.0, 7756; EG-NEXT: MOV * T33.W, 0.0, 7757; EG-NEXT: MOV T34.W, 0.0, 7758; EG-NEXT: MOV * T29.W, 0.0, 7759; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 7760; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7761; EG-NEXT: LSHR T35.X, PV.W, literal.x, 7762; EG-NEXT: LSHR * T36.X, KC0[2].Y, literal.x, 7763; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 7764; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 7765; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 7766; EG-NEXT: LSHR T37.X, PV.W, literal.x, 7767; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7768; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 7769; EG-NEXT: LSHR T38.X, PV.W, literal.x, 7770; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7771; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 7772; EG-NEXT: LSHR T39.X, PV.W, literal.x, 7773; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7774; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 7775; EG-NEXT: LSHR T40.X, PV.W, literal.x, 7776; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7777; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 7778; EG-NEXT: LSHR T41.X, PV.W, literal.x, 7779; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7780; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 7781; EG-NEXT: LSHR T42.X, PV.W, literal.x, 7782; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7783; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 7784; EG-NEXT: LSHR T43.X, PV.W, literal.x, 7785; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7786; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 7787; EG-NEXT: LSHR T44.X, PV.W, literal.x, 7788; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7789; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 7790; EG-NEXT: LSHR T45.X, PV.W, literal.x, 7791; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7792; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 7793; EG-NEXT: LSHR T46.X, PV.W, literal.x, 7794; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7795; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43) 7796; EG-NEXT: LSHR T47.X, PV.W, literal.x, 7797; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7798; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43) 7799; EG-NEXT: LSHR T48.X, PV.W, literal.x, 7800; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7801; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43) 7802; EG-NEXT: LSHR T49.X, PV.W, literal.x, 7803; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7804; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43) 7805; EG-NEXT: LSHR * T50.X, PV.W, literal.x, 7806; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 7807; 7808; CM-LABEL: global_zextload_v32i16_to_v32i64: 7809; CM: ; %bb.0: 7810; CM-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[] 7811; CM-NEXT: TEX 2 @22 7812; CM-NEXT: ALU 33, @31, KC0[], KC1[] 7813; CM-NEXT: TEX 0 @28 7814; CM-NEXT: ALU 94, @65, KC0[CB0:0-32], KC1[] 7815; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T19, T50.X 7816; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T24, T49.X 7817; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T25, T48.X 7818; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T26, T47.X 7819; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T21, T46.X 7820; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T27, T45.X 7821; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T28, T44.X 7822; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T29, T43.X 7823; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T20, T42.X 7824; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T30, T41.X 7825; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T31, T40.X 7826; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T32, T39.X 7827; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T22, T38.X 7828; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T33, T37.X 7829; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T34, T36.X 7830; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T35, T23.X 7831; CM-NEXT: CF_END 7832; CM-NEXT: Fetch clause starting at 22: 7833; CM-NEXT: VTX_READ_128 T21.XYZW, T20.X, 0, #1 7834; CM-NEXT: VTX_READ_128 T22.XYZW, T20.X, 32, #1 7835; CM-NEXT: VTX_READ_128 T23.XYZW, T20.X, 16, #1 7836; CM-NEXT: Fetch clause starting at 28: 7837; CM-NEXT: VTX_READ_128 T23.XYZW, T20.X, 48, #1 7838; CM-NEXT: ALU clause starting at 30: 7839; CM-NEXT: MOV * T20.X, KC0[2].Z, 7840; CM-NEXT: ALU clause starting at 31: 7841; CM-NEXT: LSHR * T19.Z, T21.Y, literal.x, 7842; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7843; CM-NEXT: AND_INT T19.X, T21.Y, literal.x, 7844; CM-NEXT: MOV T19.Y, 0.0, 7845; CM-NEXT: LSHR * T24.Z, T21.X, literal.y, 7846; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7847; CM-NEXT: AND_INT T24.X, T21.X, literal.x, 7848; CM-NEXT: MOV T24.Y, 0.0, 7849; CM-NEXT: LSHR * T25.Z, T21.W, literal.y, 7850; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7851; CM-NEXT: AND_INT T25.X, T21.W, literal.x, 7852; CM-NEXT: MOV T25.Y, 0.0, 7853; CM-NEXT: LSHR * T26.Z, T21.Z, literal.y, 7854; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7855; CM-NEXT: AND_INT T26.X, T21.Z, literal.x, 7856; CM-NEXT: MOV T26.Y, 0.0, 7857; CM-NEXT: LSHR * T21.Z, T23.Y, literal.y, 7858; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7859; CM-NEXT: AND_INT T21.X, T23.Y, literal.x, 7860; CM-NEXT: MOV T21.Y, 0.0, 7861; CM-NEXT: LSHR * T27.Z, T23.X, literal.y, 7862; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7863; CM-NEXT: AND_INT T27.X, T23.X, literal.x, 7864; CM-NEXT: MOV T27.Y, 0.0, 7865; CM-NEXT: LSHR * T28.Z, T23.W, literal.y, 7866; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7867; CM-NEXT: AND_INT T28.X, T23.W, literal.x, 7868; CM-NEXT: MOV T28.Y, 0.0, 7869; CM-NEXT: LSHR * T29.Z, T23.Z, literal.y, 7870; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7871; CM-NEXT: AND_INT T29.X, T23.Z, literal.x, 7872; CM-NEXT: MOV T29.Y, 0.0, 7873; CM-NEXT: LSHR * T20.Z, T22.Y, literal.y, 7874; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7875; CM-NEXT: ALU clause starting at 65: 7876; CM-NEXT: AND_INT T20.X, T22.Y, literal.x, 7877; CM-NEXT: MOV T20.Y, 0.0, 7878; CM-NEXT: LSHR * T30.Z, T22.X, literal.y, 7879; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7880; CM-NEXT: AND_INT T30.X, T22.X, literal.x, 7881; CM-NEXT: MOV T30.Y, 0.0, 7882; CM-NEXT: LSHR * T31.Z, T22.W, literal.y, 7883; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7884; CM-NEXT: AND_INT T31.X, T22.W, literal.x, 7885; CM-NEXT: MOV T31.Y, 0.0, 7886; CM-NEXT: LSHR * T32.Z, T22.Z, literal.y, 7887; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7888; CM-NEXT: AND_INT T32.X, T22.Z, literal.x, 7889; CM-NEXT: MOV T32.Y, 0.0, 7890; CM-NEXT: LSHR * T22.Z, T23.Y, literal.y, 7891; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7892; CM-NEXT: AND_INT T22.X, T23.Y, literal.x, 7893; CM-NEXT: MOV T22.Y, 0.0, 7894; CM-NEXT: LSHR * T33.Z, T23.X, literal.y, 7895; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7896; CM-NEXT: AND_INT T33.X, T23.X, literal.x, 7897; CM-NEXT: MOV T33.Y, 0.0, 7898; CM-NEXT: LSHR * T34.Z, T23.W, literal.y, 7899; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7900; CM-NEXT: AND_INT T34.X, T23.W, literal.x, 7901; CM-NEXT: MOV T34.Y, 0.0, 7902; CM-NEXT: LSHR * T35.Z, T23.Z, literal.y, 7903; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7904; CM-NEXT: AND_INT T35.X, T23.Z, literal.x, 7905; CM-NEXT: MOV T35.Y, 0.0, 7906; CM-NEXT: MOV * T19.W, 0.0, 7907; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 7908; CM-NEXT: MOV * T24.W, 0.0, 7909; CM-NEXT: MOV * T25.W, 0.0, 7910; CM-NEXT: MOV * T26.W, 0.0, 7911; CM-NEXT: MOV * T21.W, 0.0, 7912; CM-NEXT: MOV * T27.W, 0.0, 7913; CM-NEXT: MOV * T28.W, 0.0, 7914; CM-NEXT: MOV * T29.W, 0.0, 7915; CM-NEXT: MOV * T20.W, 0.0, 7916; CM-NEXT: MOV * T30.W, 0.0, 7917; CM-NEXT: MOV * T31.W, 0.0, 7918; CM-NEXT: MOV * T32.W, 0.0, 7919; CM-NEXT: MOV * T22.W, 0.0, 7920; CM-NEXT: MOV * T33.W, 0.0, 7921; CM-NEXT: MOV * T34.W, 0.0, 7922; CM-NEXT: MOV * T35.W, 0.0, 7923; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 7924; CM-NEXT: 224(3.138909e-43), 0(0.000000e+00) 7925; CM-NEXT: LSHR T23.X, PV.W, literal.x, 7926; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7927; CM-NEXT: 2(2.802597e-45), 240(3.363116e-43) 7928; CM-NEXT: LSHR T36.X, PV.W, literal.x, 7929; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7930; CM-NEXT: 2(2.802597e-45), 192(2.690493e-43) 7931; CM-NEXT: LSHR T37.X, PV.W, literal.x, 7932; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7933; CM-NEXT: 2(2.802597e-45), 208(2.914701e-43) 7934; CM-NEXT: LSHR T38.X, PV.W, literal.x, 7935; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7936; CM-NEXT: 2(2.802597e-45), 160(2.242078e-43) 7937; CM-NEXT: LSHR T39.X, PV.W, literal.x, 7938; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7939; CM-NEXT: 2(2.802597e-45), 176(2.466285e-43) 7940; CM-NEXT: LSHR T40.X, PV.W, literal.x, 7941; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7942; CM-NEXT: 2(2.802597e-45), 128(1.793662e-43) 7943; CM-NEXT: LSHR T41.X, PV.W, literal.x, 7944; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7945; CM-NEXT: 2(2.802597e-45), 144(2.017870e-43) 7946; CM-NEXT: LSHR T42.X, PV.W, literal.x, 7947; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7948; CM-NEXT: 2(2.802597e-45), 96(1.345247e-43) 7949; CM-NEXT: LSHR T43.X, PV.W, literal.x, 7950; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7951; CM-NEXT: 2(2.802597e-45), 112(1.569454e-43) 7952; CM-NEXT: LSHR T44.X, PV.W, literal.x, 7953; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7954; CM-NEXT: 2(2.802597e-45), 64(8.968310e-44) 7955; CM-NEXT: LSHR T45.X, PV.W, literal.x, 7956; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7957; CM-NEXT: 2(2.802597e-45), 80(1.121039e-43) 7958; CM-NEXT: LSHR T46.X, PV.W, literal.x, 7959; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7960; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44) 7961; CM-NEXT: LSHR T47.X, PV.W, literal.x, 7962; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7963; CM-NEXT: 2(2.802597e-45), 48(6.726233e-44) 7964; CM-NEXT: LSHR * T48.X, PV.W, literal.x, 7965; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 7966; CM-NEXT: LSHR T49.X, KC0[2].Y, literal.x, 7967; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7968; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7969; CM-NEXT: LSHR * T50.X, PV.W, literal.x, 7970; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 7971 %load = load <32 x i16>, ptr addrspace(1) %in 7972 %ext = zext <32 x i16> %load to <32 x i64> 7973 store <32 x i64> %ext, ptr addrspace(1) %out 7974 ret void 7975} 7976 7977define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 7978; GCN-NOHSA-SI-LABEL: global_sextload_v32i16_to_v32i64: 7979; GCN-NOHSA-SI: ; %bb.0: 7980; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 7981; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 7982; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 7983; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 7984; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 7985; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 7986; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 7987; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 7988; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 7989; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:16 7990; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:32 7991; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:48 7992; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 7993; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 7994; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 7995; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, v3 7996; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v26, v7 7997; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v27, v11 7998; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, v15 7999; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v20, 16, v2 8000; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v16, 16, v4 8001; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v17, 16, v10 8002; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v18, 16, v8 8003; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v19, 16, v14 8004; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v22, 0, 16 8005; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[24:25], v[2:3], 48 8006; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 31, v22 8007; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:240 8008; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8009; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[24:25], v[0:1], 48 8010; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v1, 0, 16 8011; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 31, v22 8012; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:208 8013; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v12 8014; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8015; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v26, 0, 16 8016; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[24:25], v[6:7], 48 8017; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 31, v22 8018; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:176 8019; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8020; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[24:25], v[4:5], 48 8021; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v5, 0, 16 8022; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 31, v22 8023; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:144 8024; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8025; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[24:25], v[10:11], 48 8026; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v27, 0, 16 8027; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 31, v22 8028; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:112 8029; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8030; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[24:25], v[8:9], 48 8031; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v9, 0, 16 8032; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 31, v22 8033; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:80 8034; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8035; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[23:24], v[14:15], 48 8036; GCN-NOHSA-SI-NEXT: v_bfe_i32 v21, v21, 0, 16 8037; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v22, 31, v21 8038; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:48 8039; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8040; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[23:24], v[12:13], 48 8041; GCN-NOHSA-SI-NEXT: v_bfe_i32 v21, v13, 0, 16 8042; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v22, 31, v21 8043; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:16 8044; GCN-NOHSA-SI-NEXT: v_bfe_i32 v1, v12, 0, 16 8045; GCN-NOHSA-SI-NEXT: v_bfe_i32 v5, v14, 0, 16 8046; GCN-NOHSA-SI-NEXT: v_bfe_i32 v3, v3, 0, 16 8047; GCN-NOHSA-SI-NEXT: v_bfe_i32 v13, v20, 0, 16 8048; GCN-NOHSA-SI-NEXT: v_bfe_i32 v11, v2, 0, 16 8049; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 8050; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 8051; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:224 8052; GCN-NOHSA-SI-NEXT: v_bfe_i32 v9, v8, 0, 16 8053; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8054; GCN-NOHSA-SI-NEXT: v_bfe_i32 v13, v10, 0, 16 8055; GCN-NOHSA-SI-NEXT: v_bfe_i32 v7, v19, 0, 16 8056; GCN-NOHSA-SI-NEXT: v_bfe_i32 v11, v18, 0, 16 8057; GCN-NOHSA-SI-NEXT: v_bfe_i32 v15, v17, 0, 16 8058; GCN-NOHSA-SI-NEXT: v_bfe_i32 v19, v16, 0, 16 8059; GCN-NOHSA-SI-NEXT: v_bfe_i32 v17, v4, 0, 16 8060; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v6 8061; GCN-NOHSA-SI-NEXT: v_bfe_i32 v21, v6, 0, 16 8062; GCN-NOHSA-SI-NEXT: v_bfe_i32 v23, v2, 0, 16 8063; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 8064; GCN-NOHSA-SI-NEXT: v_bfe_i32 v25, v0, 0, 16 8065; GCN-NOHSA-SI-NEXT: v_bfe_i32 v27, v2, 0, 16 8066; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v2, 31, v1 8067; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v6, 31, v5 8068; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v10, 31, v9 8069; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 8070; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v18, 31, v17 8071; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v22, 31, v21 8072; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v26, 31, v25 8073; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v4, 31, v3 8074; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v8, 31, v7 8075; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 8076; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8077; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v20, 31, v19 8078; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v24, 31, v23 8079; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v28, 31, v27 8080; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[25:28], off, s[0:3], 0 offset:192 8081; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:160 8082; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:128 8083; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:96 8084; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:64 8085; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[5:8], off, s[0:3], 0 offset:32 8086; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[1:4], off, s[0:3], 0 8087; GCN-NOHSA-SI-NEXT: s_endpgm 8088; 8089; GCN-HSA-LABEL: global_sextload_v32i16_to_v32i64: 8090; GCN-HSA: ; %bb.0: 8091; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 8092; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 8093; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 8094; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 8095; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[0:1] 8096; GCN-HSA-NEXT: s_add_u32 s4, s2, 48 8097; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 8098; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 8099; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 8100; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 8101; GCN-HSA-NEXT: s_add_u32 s4, s2, 32 8102; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 8103; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 8104; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 8105; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 8106; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 8107; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 8108; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 8109; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 8110; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[12:13] 8111; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 8112; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8113; GCN-HSA-NEXT: v_mov_b32_e32 v21, s3 8114; GCN-HSA-NEXT: v_mov_b32_e32 v20, s2 8115; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 8116; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8117; GCN-HSA-NEXT: v_mov_b32_e32 v23, s3 8118; GCN-HSA-NEXT: v_mov_b32_e32 v22, s2 8119; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xf0 8120; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8121; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3 8122; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2 8123; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xd0 8124; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8125; GCN-HSA-NEXT: v_mov_b32_e32 v27, s3 8126; GCN-HSA-NEXT: v_mov_b32_e32 v26, s2 8127; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xb0 8128; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8129; GCN-HSA-NEXT: s_waitcnt vmcnt(3) 8130; GCN-HSA-NEXT: v_ashr_i64 v[18:19], v[8:9], 48 8131; GCN-HSA-NEXT: v_bfe_i32 v16, v9, 0, 16 8132; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8133; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[16:19] 8134; GCN-HSA-NEXT: v_mov_b32_e32 v23, s3 8135; GCN-HSA-NEXT: v_mov_b32_e32 v22, s2 8136; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90 8137; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8138; GCN-HSA-NEXT: s_add_u32 s4, s0, 0x70 8139; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 8140; GCN-HSA-NEXT: s_add_u32 s6, s0, 0x50 8141; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0 8142; GCN-HSA-NEXT: v_mov_b32_e32 v9, v11 8143; GCN-HSA-NEXT: s_add_u32 s8, s0, 32 8144; GCN-HSA-NEXT: v_bfe_i32 v16, v9, 0, 16 8145; GCN-HSA-NEXT: v_ashr_i64 v[18:19], v[10:11], 48 8146; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0 8147; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8148; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v10 8149; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19] 8150; GCN-HSA-NEXT: v_mov_b32_e32 v21, s9 8151; GCN-HSA-NEXT: v_bfe_i32 v18, v9, 0, 16 8152; GCN-HSA-NEXT: v_bfe_i32 v16, v10, 0, 16 8153; GCN-HSA-NEXT: v_mov_b32_e32 v20, s8 8154; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8155; GCN-HSA-NEXT: v_ashrrev_i32_e32 v19, 31, v18 8156; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19] 8157; GCN-HSA-NEXT: v_mov_b32_e32 v10, s3 8158; GCN-HSA-NEXT: s_waitcnt vmcnt(5) 8159; GCN-HSA-NEXT: v_ashr_i64 v[18:19], v[0:1], 48 8160; GCN-HSA-NEXT: v_bfe_i32 v16, v1, 0, 16 8161; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8162; GCN-HSA-NEXT: v_mov_b32_e32 v1, v3 8163; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[16:19] 8164; GCN-HSA-NEXT: v_mov_b32_e32 v9, s2 8165; GCN-HSA-NEXT: v_bfe_i32 v16, v1, 0, 16 8166; GCN-HSA-NEXT: v_ashr_i64 v[18:19], v[2:3], 48 8167; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8168; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[16:19] 8169; GCN-HSA-NEXT: s_waitcnt vmcnt(6) 8170; GCN-HSA-NEXT: v_mov_b32_e32 v3, v7 8171; GCN-HSA-NEXT: v_ashr_i64 v[18:19], v[4:5], 48 8172; GCN-HSA-NEXT: v_bfe_i32 v16, v5, 0, 16 8173; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8174; GCN-HSA-NEXT: flat_store_dwordx4 v[9:10], v[16:19] 8175; GCN-HSA-NEXT: v_mov_b32_e32 v25, s7 8176; GCN-HSA-NEXT: v_bfe_i32 v16, v3, 0, 16 8177; GCN-HSA-NEXT: v_ashr_i64 v[18:19], v[6:7], 48 8178; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8179; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[16:19] 8180; GCN-HSA-NEXT: v_mov_b32_e32 v24, s6 8181; GCN-HSA-NEXT: s_waitcnt vmcnt(7) 8182; GCN-HSA-NEXT: v_ashr_i64 v[18:19], v[12:13], 48 8183; GCN-HSA-NEXT: v_bfe_i32 v16, v13, 0, 16 8184; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8185; GCN-HSA-NEXT: v_mov_b32_e32 v3, v15 8186; GCN-HSA-NEXT: v_mov_b32_e32 v21, s5 8187; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[16:19] 8188; GCN-HSA-NEXT: v_mov_b32_e32 v20, s4 8189; GCN-HSA-NEXT: v_bfe_i32 v16, v3, 0, 16 8190; GCN-HSA-NEXT: v_ashr_i64 v[18:19], v[14:15], 48 8191; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v8 8192; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8193; GCN-HSA-NEXT: v_bfe_i32 v8, v8, 0, 16 8194; GCN-HSA-NEXT: v_lshrrev_b32_e32 v24, 16, v14 8195; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19] 8196; GCN-HSA-NEXT: v_bfe_i32 v10, v1, 0, 16 8197; GCN-HSA-NEXT: v_bfe_i32 v16, v14, 0, 16 8198; GCN-HSA-NEXT: v_mov_b32_e32 v15, s1 8199; GCN-HSA-NEXT: v_lshrrev_b32_e32 v26, 16, v2 8200; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v12 8201; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8 8202; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v10 8203; GCN-HSA-NEXT: v_mov_b32_e32 v14, s0 8204; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0 8205; GCN-HSA-NEXT: v_lshrrev_b32_e32 v22, 16, v0 8206; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11] 8207; GCN-HSA-NEXT: v_bfe_i32 v14, v18, 0, 16 8208; GCN-HSA-NEXT: v_bfe_i32 v18, v24, 0, 16 8209; GCN-HSA-NEXT: v_bfe_i32 v20, v0, 0, 16 8210; GCN-HSA-NEXT: v_bfe_i32 v24, v2, 0, 16 8211; GCN-HSA-NEXT: v_bfe_i32 v26, v26, 0, 16 8212; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8213; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 8214; GCN-HSA-NEXT: v_ashrrev_i32_e32 v25, 31, v24 8215; GCN-HSA-NEXT: v_ashrrev_i32_e32 v27, 31, v26 8216; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 8217; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0 8218; GCN-HSA-NEXT: v_lshrrev_b32_e32 v23, 16, v6 8219; GCN-HSA-NEXT: v_bfe_i32 v22, v22, 0, 16 8220; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[24:27] 8221; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8222; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 8223; GCN-HSA-NEXT: v_ashrrev_i32_e32 v21, 31, v20 8224; GCN-HSA-NEXT: v_bfe_i32 v9, v23, 0, 16 8225; GCN-HSA-NEXT: v_ashrrev_i32_e32 v23, 31, v22 8226; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 8227; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0 8228; GCN-HSA-NEXT: v_bfe_i32 v7, v6, 0, 16 8229; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[20:23] 8230; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8231; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 8232; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v4 8233; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 31, v7 8234; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 31, v9 8235; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 8236; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80 8237; GCN-HSA-NEXT: v_bfe_i32 v3, v4, 0, 16 8238; GCN-HSA-NEXT: v_bfe_i32 v5, v5, 0, 16 8239; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[7:10] 8240; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8241; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 8242; GCN-HSA-NEXT: v_ashrrev_i32_e32 v4, 31, v3 8243; GCN-HSA-NEXT: v_ashrrev_i32_e32 v6, 31, v5 8244; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 8245; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 8246; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[3:6] 8247; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8248; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 8249; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8250; GCN-HSA-NEXT: v_ashrrev_i32_e32 v19, 31, v18 8251; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 8252; GCN-HSA-NEXT: s_add_u32 s0, s0, 64 8253; GCN-HSA-NEXT: v_bfe_i32 v12, v12, 0, 16 8254; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[16:19] 8255; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 8256; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 8257; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 31, v12 8258; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 31, v14 8259; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 8260; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[12:15] 8261; GCN-HSA-NEXT: s_endpgm 8262; 8263; GCN-NOHSA-VI-LABEL: global_sextload_v32i16_to_v32i64: 8264; GCN-NOHSA-VI: ; %bb.0: 8265; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 8266; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 8267; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 8268; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 8269; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 8270; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 8271; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 8272; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 8273; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[5:8], off, s[8:11], 0 8274; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[13:16], off, s[8:11], 0 offset:48 8275; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[9:12], off, s[8:11], 0 offset:32 8276; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[1:4], off, s[8:11], 0 offset:16 8277; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 8278; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 8279; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 8280; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v6, 0, 16 8281; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(2) 8282; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v15, 0, 16 8283; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v15 8284; GCN-NOHSA-VI-NEXT: v_bfe_i32 v20, v15, 0, 16 8285; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v17, 16, v6 8286; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, v16 8287; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v16, 16, v16 8288; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 31, v18 8289; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v21, 31, v20 8290; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:224 8291; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v14 8292; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v6, 0, 16 8293; GCN-NOHSA-VI-NEXT: v_bfe_i32 v20, v16, 0, 16 8294; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 31, v18 8295; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v21, 31, v20 8296; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:240 8297; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v6, 16, v13 8298; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v13, 0, 16 8299; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v14, 0, 16 8300; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v15, 0, 16 8301; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 8302; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8303; GCN-NOHSA-VI-NEXT: v_bfe_i32 v20, v6, 0, 16 8304; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:208 8305; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 31, v18 8306; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(4) 8307; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v13, 16, v11 8308; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v21, 31, v20 8309; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v13, 0, 16 8310; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v11, 0, 16 8311; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:192 8312; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 8313; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v19, v12 8314; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8315; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v12, 16, v12 8316; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:160 8317; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v19, 0, 16 8318; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v12, 0, 16 8319; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v18, v8 8320; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v16, 16, v8 8321; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 8322; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 8323; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v8, 16, v9 8324; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:176 8325; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v5, 0, 16 8326; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v8, 0, 16 8327; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v9, 0, 16 8328; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 8329; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 8330; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v10 8331; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:128 8332; GCN-NOHSA-VI-NEXT: v_bfe_i32 v9, v9, 0, 16 8333; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v7, 0, 16 8334; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v12, 16, v7 8335; GCN-NOHSA-VI-NEXT: v_bfe_i32 v7, v10, 0, 16 8336; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v8, 31, v7 8337; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v10, 31, v9 8338; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:144 8339; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v5 8340; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(8) 8341; GCN-NOHSA-VI-NEXT: v_bfe_i32 v7, v3, 0, 16 8342; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 8343; GCN-NOHSA-VI-NEXT: v_bfe_i32 v9, v3, 0, 16 8344; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v8, 31, v7 8345; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v10, 31, v9 8346; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:96 8347; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v2 8348; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v5, 0, 16 8349; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, v4 8350; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v4, 16, v4 8351; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v1 8352; GCN-NOHSA-VI-NEXT: v_bfe_i32 v27, v5, 0, 16 8353; GCN-NOHSA-VI-NEXT: v_bfe_i32 v29, v4, 0, 16 8354; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v2, 0, 16 8355; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v17, 0, 16 8356; GCN-NOHSA-VI-NEXT: v_bfe_i32 v19, v18, 0, 16 8357; GCN-NOHSA-VI-NEXT: v_bfe_i32 v21, v16, 0, 16 8358; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v12, 0, 16 8359; GCN-NOHSA-VI-NEXT: v_bfe_i32 v23, v1, 0, 16 8360; GCN-NOHSA-VI-NEXT: v_bfe_i32 v17, v3, 0, 16 8361; GCN-NOHSA-VI-NEXT: v_bfe_i32 v25, v7, 0, 16 8362; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v28, 31, v27 8363; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v30, 31, v29 8364; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 8365; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 8366; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6 8367; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 31, v8 8368; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v20, 31, v19 8369; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v22, 31, v21 8370; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 8371; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 8372; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8373; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v18, 31, v17 8374; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v24, 31, v23 8375; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v26, 31, v25 8376; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:112 8377; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:64 8378; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:80 8379; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:32 8380; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:48 8381; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 8382; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 8383; GCN-NOHSA-VI-NEXT: s_endpgm 8384; 8385; EG-LABEL: global_sextload_v32i16_to_v32i64: 8386; EG: ; %bb.0: 8387; EG-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[] 8388; EG-NEXT: TEX 0 @22 8389; EG-NEXT: ALU 56, @31, KC0[CB0:0-32], KC1[] 8390; EG-NEXT: TEX 2 @24 8391; EG-NEXT: ALU 74, @88, KC0[CB0:0-32], KC1[] 8392; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T38.X, 0 8393; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T36.X, 0 8394; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T34.X, 0 8395; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T33.X, 0 8396; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T32.X, 0 8397; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T31.X, 0 8398; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T30.X, 0 8399; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T29.X, 0 8400; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T28.X, 0 8401; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T27.X, 0 8402; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T26.X, 0 8403; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T25.X, 0 8404; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T24.X, 0 8405; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T23.X, 0 8406; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T22.X, 0 8407; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T21.X, 1 8408; EG-NEXT: CF_END 8409; EG-NEXT: Fetch clause starting at 22: 8410; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1 8411; EG-NEXT: Fetch clause starting at 24: 8412; EG-NEXT: VTX_READ_128 T38.XYZW, T19.X, 48, #1 8413; EG-NEXT: VTX_READ_128 T39.XYZW, T19.X, 32, #1 8414; EG-NEXT: VTX_READ_128 T40.XYZW, T19.X, 16, #1 8415; EG-NEXT: ALU clause starting at 30: 8416; EG-NEXT: MOV * T19.X, KC0[2].Z, 8417; EG-NEXT: ALU clause starting at 31: 8418; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 8419; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 8420; EG-NEXT: LSHR T21.X, PV.W, literal.x, 8421; EG-NEXT: LSHR * T22.X, KC0[2].Y, literal.x, 8422; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 8423; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 8424; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 8425; EG-NEXT: LSHR T23.X, PV.W, literal.x, 8426; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8427; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 8428; EG-NEXT: LSHR T24.X, PV.W, literal.x, 8429; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8430; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 8431; EG-NEXT: LSHR T25.X, PV.W, literal.x, 8432; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8433; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 8434; EG-NEXT: LSHR T26.X, PV.W, literal.x, 8435; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8436; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 8437; EG-NEXT: LSHR T27.X, PV.W, literal.x, 8438; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8439; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 8440; EG-NEXT: LSHR T28.X, PV.W, literal.x, 8441; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8442; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 8443; EG-NEXT: LSHR T29.X, PV.W, literal.x, 8444; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8445; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 8446; EG-NEXT: LSHR T30.X, PV.W, literal.x, 8447; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8448; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 8449; EG-NEXT: LSHR T31.X, PV.W, literal.x, 8450; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8451; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 8452; EG-NEXT: LSHR T32.X, PV.W, literal.x, 8453; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8454; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43) 8455; EG-NEXT: LSHR T33.X, PV.W, literal.x, 8456; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8457; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43) 8458; EG-NEXT: LSHR T34.X, PV.W, literal.x, 8459; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, 8460; EG-NEXT: ASHR * T35.W, T20.Y, literal.z, 8461; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43) 8462; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 8463; EG-NEXT: LSHR T36.X, PV.W, literal.x, 8464; EG-NEXT: ASHR T35.Z, T20.Y, literal.y, 8465; EG-NEXT: ASHR * T37.W, T20.X, literal.z, 8466; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 8467; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 8468; EG-NEXT: BFE_INT T35.X, T20.Y, 0.0, literal.x, 8469; EG-NEXT: ASHR * T37.Z, T20.X, literal.x, 8470; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 8471; EG-NEXT: BFE_INT T37.X, T20.X, 0.0, literal.x, 8472; EG-NEXT: ASHR T35.Y, PV.X, literal.y, 8473; EG-NEXT: ASHR * T19.W, T20.W, literal.y, 8474; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8475; EG-NEXT: ALU clause starting at 88: 8476; EG-NEXT: ASHR T19.Z, T20.W, literal.x, 8477; EG-NEXT: ASHR * T41.W, T20.Z, literal.y, 8478; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8479; EG-NEXT: BFE_INT T19.X, T20.W, 0.0, literal.x, 8480; EG-NEXT: ASHR T37.Y, T37.X, literal.y, 8481; EG-NEXT: ASHR T41.Z, T20.Z, literal.x, 8482; EG-NEXT: ASHR * T20.W, T40.Y, literal.y, 8483; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8484; EG-NEXT: BFE_INT T41.X, T20.Z, 0.0, literal.x, 8485; EG-NEXT: ASHR T19.Y, PV.X, literal.y, 8486; EG-NEXT: ASHR T20.Z, T40.Y, literal.x, 8487; EG-NEXT: ASHR * T42.W, T40.X, literal.y, 8488; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8489; EG-NEXT: BFE_INT T20.X, T40.Y, 0.0, literal.x, 8490; EG-NEXT: ASHR T41.Y, PV.X, literal.y, 8491; EG-NEXT: ASHR T42.Z, T40.X, literal.x, 8492; EG-NEXT: ASHR * T43.W, T40.W, literal.y, 8493; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8494; EG-NEXT: BFE_INT T42.X, T40.X, 0.0, literal.x, 8495; EG-NEXT: ASHR T20.Y, PV.X, literal.y, 8496; EG-NEXT: ASHR T43.Z, T40.W, literal.x, 8497; EG-NEXT: ASHR * T44.W, T40.Z, literal.y, 8498; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8499; EG-NEXT: BFE_INT T43.X, T40.W, 0.0, literal.x, 8500; EG-NEXT: ASHR T42.Y, PV.X, literal.y, 8501; EG-NEXT: ASHR T44.Z, T40.Z, literal.x, 8502; EG-NEXT: ASHR * T40.W, T39.Y, literal.y, 8503; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8504; EG-NEXT: BFE_INT T44.X, T40.Z, 0.0, literal.x, 8505; EG-NEXT: ASHR T43.Y, PV.X, literal.y, 8506; EG-NEXT: ASHR T40.Z, T39.Y, literal.x, 8507; EG-NEXT: ASHR * T45.W, T39.X, literal.y, 8508; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8509; EG-NEXT: BFE_INT T40.X, T39.Y, 0.0, literal.x, 8510; EG-NEXT: ASHR T44.Y, PV.X, literal.y, 8511; EG-NEXT: ASHR T45.Z, T39.X, literal.x, 8512; EG-NEXT: ASHR * T46.W, T39.W, literal.y, 8513; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8514; EG-NEXT: BFE_INT T45.X, T39.X, 0.0, literal.x, 8515; EG-NEXT: ASHR T40.Y, PV.X, literal.y, 8516; EG-NEXT: ASHR T46.Z, T39.W, literal.x, 8517; EG-NEXT: ASHR * T47.W, T39.Z, literal.y, 8518; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8519; EG-NEXT: BFE_INT T46.X, T39.W, 0.0, literal.x, 8520; EG-NEXT: ASHR T45.Y, PV.X, literal.y, 8521; EG-NEXT: ASHR T47.Z, T39.Z, literal.x, 8522; EG-NEXT: ASHR * T39.W, T38.Y, literal.y, 8523; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8524; EG-NEXT: BFE_INT T47.X, T39.Z, 0.0, literal.x, 8525; EG-NEXT: ASHR T46.Y, PV.X, literal.y, 8526; EG-NEXT: ASHR T39.Z, T38.Y, literal.x, 8527; EG-NEXT: ASHR * T48.W, T38.X, literal.y, 8528; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8529; EG-NEXT: BFE_INT T39.X, T38.Y, 0.0, literal.x, 8530; EG-NEXT: ASHR T47.Y, PV.X, literal.y, 8531; EG-NEXT: ASHR T48.Z, T38.X, literal.x, 8532; EG-NEXT: ASHR * T49.W, T38.W, literal.y, 8533; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8534; EG-NEXT: BFE_INT T48.X, T38.X, 0.0, literal.x, 8535; EG-NEXT: ASHR T39.Y, PV.X, literal.y, 8536; EG-NEXT: ASHR T49.Z, T38.W, literal.x, 8537; EG-NEXT: ASHR * T50.W, T38.Z, literal.y, 8538; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8539; EG-NEXT: BFE_INT T49.X, T38.W, 0.0, literal.x, 8540; EG-NEXT: ASHR T48.Y, PV.X, literal.y, 8541; EG-NEXT: ASHR * T50.Z, T38.Z, literal.x, 8542; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8543; EG-NEXT: BFE_INT T50.X, T38.Z, 0.0, literal.x, 8544; EG-NEXT: ASHR T49.Y, PV.X, literal.y, 8545; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 8546; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8547; EG-NEXT: 224(3.138909e-43), 0(0.000000e+00) 8548; EG-NEXT: LSHR T38.X, PV.W, literal.x, 8549; EG-NEXT: ASHR * T50.Y, PV.X, literal.y, 8550; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 8551; 8552; CM-LABEL: global_sextload_v32i16_to_v32i64: 8553; CM: ; %bb.0: 8554; CM-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[] 8555; CM-NEXT: TEX 0 @22 8556; CM-NEXT: ALU 55, @31, KC0[CB0:0-32], KC1[] 8557; CM-NEXT: TEX 2 @24 8558; CM-NEXT: ALU 73, @87, KC0[CB0:0-32], KC1[] 8559; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T38, T50.X 8560; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T49, T36.X 8561; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T48, T34.X 8562; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T47, T33.X 8563; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T39, T32.X 8564; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T46, T31.X 8565; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T45, T30.X 8566; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T44, T29.X 8567; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T40, T28.X 8568; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T43, T27.X 8569; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T42, T26.X 8570; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T41, T25.X 8571; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T20, T24.X 8572; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T19, T23.X 8573; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T37, T22.X 8574; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T35, T21.X 8575; CM-NEXT: CF_END 8576; CM-NEXT: Fetch clause starting at 22: 8577; CM-NEXT: VTX_READ_128 T20.XYZW, T19.X, 48, #1 8578; CM-NEXT: Fetch clause starting at 24: 8579; CM-NEXT: VTX_READ_128 T38.XYZW, T19.X, 0, #1 8580; CM-NEXT: VTX_READ_128 T39.XYZW, T19.X, 16, #1 8581; CM-NEXT: VTX_READ_128 T40.XYZW, T19.X, 32, #1 8582; CM-NEXT: ALU clause starting at 30: 8583; CM-NEXT: MOV * T19.X, KC0[2].Z, 8584; CM-NEXT: ALU clause starting at 31: 8585; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 8586; CM-NEXT: 224(3.138909e-43), 0(0.000000e+00) 8587; CM-NEXT: LSHR T21.X, PV.W, literal.x, 8588; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8589; CM-NEXT: 2(2.802597e-45), 240(3.363116e-43) 8590; CM-NEXT: LSHR T22.X, PV.W, literal.x, 8591; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8592; CM-NEXT: 2(2.802597e-45), 192(2.690493e-43) 8593; CM-NEXT: LSHR T23.X, PV.W, literal.x, 8594; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8595; CM-NEXT: 2(2.802597e-45), 208(2.914701e-43) 8596; CM-NEXT: LSHR T24.X, PV.W, literal.x, 8597; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8598; CM-NEXT: 2(2.802597e-45), 160(2.242078e-43) 8599; CM-NEXT: LSHR T25.X, PV.W, literal.x, 8600; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8601; CM-NEXT: 2(2.802597e-45), 176(2.466285e-43) 8602; CM-NEXT: LSHR T26.X, PV.W, literal.x, 8603; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8604; CM-NEXT: 2(2.802597e-45), 128(1.793662e-43) 8605; CM-NEXT: LSHR T27.X, PV.W, literal.x, 8606; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8607; CM-NEXT: 2(2.802597e-45), 144(2.017870e-43) 8608; CM-NEXT: LSHR T28.X, PV.W, literal.x, 8609; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8610; CM-NEXT: 2(2.802597e-45), 96(1.345247e-43) 8611; CM-NEXT: LSHR T29.X, PV.W, literal.x, 8612; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8613; CM-NEXT: 2(2.802597e-45), 112(1.569454e-43) 8614; CM-NEXT: LSHR T30.X, PV.W, literal.x, 8615; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8616; CM-NEXT: 2(2.802597e-45), 64(8.968310e-44) 8617; CM-NEXT: LSHR T31.X, PV.W, literal.x, 8618; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8619; CM-NEXT: 2(2.802597e-45), 80(1.121039e-43) 8620; CM-NEXT: LSHR T32.X, PV.W, literal.x, 8621; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8622; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44) 8623; CM-NEXT: LSHR T33.X, PV.W, literal.x, 8624; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8625; CM-NEXT: 2(2.802597e-45), 48(6.726233e-44) 8626; CM-NEXT: LSHR T34.X, PV.W, literal.x, 8627; CM-NEXT: ASHR * T35.W, T20.Z, literal.y, 8628; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 8629; CM-NEXT: LSHR T36.X, KC0[2].Y, literal.x, 8630; CM-NEXT: ASHR T35.Z, T20.Z, literal.y, 8631; CM-NEXT: ASHR * T37.W, T20.W, literal.z, 8632; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 8633; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00) 8634; CM-NEXT: BFE_INT T35.X, T20.Z, 0.0, literal.x, 8635; CM-NEXT: ASHR * T37.Z, T20.W, literal.x, 8636; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 8637; CM-NEXT: BFE_INT T37.X, T20.W, 0.0, literal.x, 8638; CM-NEXT: ASHR T35.Y, PV.X, literal.y, 8639; CM-NEXT: ASHR * T19.W, T20.X, literal.y, 8640; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8641; CM-NEXT: ALU clause starting at 87: 8642; CM-NEXT: ASHR T19.Z, T20.X, literal.x, 8643; CM-NEXT: ASHR * T20.W, T20.Y, literal.y, 8644; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8645; CM-NEXT: BFE_INT T19.X, T20.X, 0.0, literal.x, 8646; CM-NEXT: ASHR T37.Y, T37.X, literal.y, BS:VEC_120/SCL_212 8647; CM-NEXT: ASHR T20.Z, T20.Y, literal.x, 8648; CM-NEXT: ASHR * T41.W, T40.Z, literal.y, 8649; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8650; CM-NEXT: BFE_INT T20.X, T20.Y, 0.0, literal.x, 8651; CM-NEXT: ASHR T19.Y, PV.X, literal.y, 8652; CM-NEXT: ASHR T41.Z, T40.Z, literal.x, 8653; CM-NEXT: ASHR * T42.W, T40.W, literal.y, 8654; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8655; CM-NEXT: BFE_INT T41.X, T40.Z, 0.0, literal.x, 8656; CM-NEXT: ASHR T20.Y, PV.X, literal.y, 8657; CM-NEXT: ASHR T42.Z, T40.W, literal.x, 8658; CM-NEXT: ASHR * T43.W, T40.X, literal.y, 8659; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8660; CM-NEXT: BFE_INT T42.X, T40.W, 0.0, literal.x, 8661; CM-NEXT: ASHR T41.Y, PV.X, literal.y, 8662; CM-NEXT: ASHR T43.Z, T40.X, literal.x, 8663; CM-NEXT: ASHR * T40.W, T40.Y, literal.y, 8664; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8665; CM-NEXT: BFE_INT T43.X, T40.X, 0.0, literal.x, 8666; CM-NEXT: ASHR T42.Y, PV.X, literal.y, 8667; CM-NEXT: ASHR T40.Z, T40.Y, literal.x, 8668; CM-NEXT: ASHR * T44.W, T39.Z, literal.y, 8669; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8670; CM-NEXT: BFE_INT T40.X, T40.Y, 0.0, literal.x, 8671; CM-NEXT: ASHR T43.Y, PV.X, literal.y, 8672; CM-NEXT: ASHR T44.Z, T39.Z, literal.x, 8673; CM-NEXT: ASHR * T45.W, T39.W, literal.y, 8674; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8675; CM-NEXT: BFE_INT T44.X, T39.Z, 0.0, literal.x, 8676; CM-NEXT: ASHR T40.Y, PV.X, literal.y, 8677; CM-NEXT: ASHR T45.Z, T39.W, literal.x, 8678; CM-NEXT: ASHR * T46.W, T39.X, literal.y, 8679; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8680; CM-NEXT: BFE_INT T45.X, T39.W, 0.0, literal.x, 8681; CM-NEXT: ASHR T44.Y, PV.X, literal.y, 8682; CM-NEXT: ASHR T46.Z, T39.X, literal.x, 8683; CM-NEXT: ASHR * T39.W, T39.Y, literal.y, 8684; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8685; CM-NEXT: BFE_INT T46.X, T39.X, 0.0, literal.x, 8686; CM-NEXT: ASHR T45.Y, PV.X, literal.y, 8687; CM-NEXT: ASHR T39.Z, T39.Y, literal.x, 8688; CM-NEXT: ASHR * T47.W, T38.Z, literal.y, 8689; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8690; CM-NEXT: BFE_INT T39.X, T39.Y, 0.0, literal.x, 8691; CM-NEXT: ASHR T46.Y, PV.X, literal.y, 8692; CM-NEXT: ASHR T47.Z, T38.Z, literal.x, 8693; CM-NEXT: ASHR * T48.W, T38.W, literal.y, 8694; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8695; CM-NEXT: BFE_INT T47.X, T38.Z, 0.0, literal.x, 8696; CM-NEXT: ASHR T39.Y, PV.X, literal.y, 8697; CM-NEXT: ASHR T48.Z, T38.W, literal.x, 8698; CM-NEXT: ASHR * T49.W, T38.X, literal.y, 8699; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8700; CM-NEXT: BFE_INT T48.X, T38.W, 0.0, literal.x, 8701; CM-NEXT: ASHR T47.Y, PV.X, literal.y, 8702; CM-NEXT: ASHR T49.Z, T38.X, literal.x, 8703; CM-NEXT: ASHR * T38.W, T38.Y, literal.y, 8704; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8705; CM-NEXT: BFE_INT T49.X, T38.X, 0.0, literal.x, 8706; CM-NEXT: ASHR T48.Y, PV.X, literal.y, 8707; CM-NEXT: ASHR * T38.Z, T38.Y, literal.x, 8708; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8709; CM-NEXT: BFE_INT T38.X, T38.Y, 0.0, literal.x, 8710; CM-NEXT: ASHR T49.Y, PV.X, literal.y, 8711; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 8712; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8713; CM-NEXT: LSHR T50.X, PV.W, literal.x, 8714; CM-NEXT: ASHR * T38.Y, PV.X, literal.y, 8715; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 8716 %load = load <32 x i16>, ptr addrspace(1) %in 8717 %ext = sext <32 x i16> %load to <32 x i64> 8718 store <32 x i64> %ext, ptr addrspace(1) %out 8719 ret void 8720} 8721 8722; define amdgpu_kernel void @global_zextload_v64i16_to_v64i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 8723; %load = load <64 x i16>, ptr addrspace(1) %in 8724; %ext = zext <64 x i16> %load to <64 x i64> 8725; store <64 x i64> %ext, ptr addrspace(1) %out 8726; ret void 8727; } 8728 8729; define amdgpu_kernel void @global_sextload_v64i16_to_v64i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 8730; %load = load <64 x i16>, ptr addrspace(1) %in 8731; %ext = sext <64 x i16> %load to <64 x i64> 8732; store <64 x i64> %ext, ptr addrspace(1) %out 8733; ret void 8734; } 8735 8736attributes #0 = { nounwind "amdgpu-flat-work-group-size"="1024,1024" } 8737