1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s 4; RUN: llc -mtriple=r600-- -mcpu=cypress < %s | FileCheck -check-prefix=EG %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s 6 7define amdgpu_kernel void @constant_load_i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 8; GFX6-LABEL: constant_load_i1: 9; GFX6: ; %bb.0: 10; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 11; GFX6-NEXT: s_mov_b32 s7, 0xf000 12; GFX6-NEXT: s_mov_b32 s6, -1 13; GFX6-NEXT: s_mov_b32 s10, s6 14; GFX6-NEXT: s_mov_b32 s11, s7 15; GFX6-NEXT: s_waitcnt lgkmcnt(0) 16; GFX6-NEXT: s_mov_b32 s8, s2 17; GFX6-NEXT: s_mov_b32 s9, s3 18; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 19; GFX6-NEXT: s_mov_b32 s4, s0 20; GFX6-NEXT: s_mov_b32 s5, s1 21; GFX6-NEXT: s_waitcnt vmcnt(0) 22; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 23; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0 24; GFX6-NEXT: s_endpgm 25; 26; GFX8-LABEL: constant_load_i1: 27; GFX8: ; %bb.0: 28; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 29; GFX8-NEXT: s_waitcnt lgkmcnt(0) 30; GFX8-NEXT: v_mov_b32_e32 v0, s2 31; GFX8-NEXT: v_mov_b32_e32 v1, s3 32; GFX8-NEXT: flat_load_ubyte v2, v[0:1] 33; GFX8-NEXT: v_mov_b32_e32 v0, s0 34; GFX8-NEXT: v_mov_b32_e32 v1, s1 35; GFX8-NEXT: s_waitcnt vmcnt(0) 36; GFX8-NEXT: v_and_b32_e32 v2, 1, v2 37; GFX8-NEXT: flat_store_byte v[0:1], v2 38; GFX8-NEXT: s_endpgm 39; 40; EG-LABEL: constant_load_i1: 41; EG: ; %bb.0: 42; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 43; EG-NEXT: TEX 0 @6 44; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 45; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 46; EG-NEXT: CF_END 47; EG-NEXT: PAD 48; EG-NEXT: Fetch clause starting at 6: 49; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 50; EG-NEXT: ALU clause starting at 8: 51; EG-NEXT: MOV * T0.X, KC0[2].Z, 52; EG-NEXT: ALU clause starting at 9: 53; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 54; EG-NEXT: AND_INT * T1.W, T0.X, 1, 55; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 56; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 57; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 58; EG-NEXT: LSHL T0.X, T1.W, PV.W, 59; EG-NEXT: LSHL * T0.W, literal.x, PV.W, 60; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 61; EG-NEXT: MOV T0.Y, 0.0, 62; EG-NEXT: MOV * T0.Z, 0.0, 63; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 64; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 65; 66; GFX12-LABEL: constant_load_i1: 67; GFX12: ; %bb.0: 68; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 69; GFX12-NEXT: s_wait_kmcnt 0x0 70; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0 71; GFX12-NEXT: s_wait_kmcnt 0x0 72; GFX12-NEXT: s_and_b32 s2, s2, 1 73; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 74; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 75; GFX12-NEXT: global_store_b8 v0, v1, s[0:1] 76; GFX12-NEXT: s_endpgm 77 %load = load i1, ptr addrspace(4) %in 78 store i1 %load, ptr addrspace(1) %out 79 ret void 80} 81 82define amdgpu_kernel void @constant_load_v2i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 83; GFX6-LABEL: constant_load_v2i1: 84; GFX6: ; %bb.0: 85; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 86; GFX6-NEXT: s_mov_b32 s7, 0xf000 87; GFX6-NEXT: s_mov_b32 s6, -1 88; GFX6-NEXT: s_mov_b32 s10, s6 89; GFX6-NEXT: s_mov_b32 s11, s7 90; GFX6-NEXT: s_waitcnt lgkmcnt(0) 91; GFX6-NEXT: s_mov_b32 s8, s2 92; GFX6-NEXT: s_mov_b32 s9, s3 93; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 94; GFX6-NEXT: s_mov_b32 s4, s0 95; GFX6-NEXT: s_mov_b32 s5, s1 96; GFX6-NEXT: s_waitcnt vmcnt(0) 97; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0 98; GFX6-NEXT: s_endpgm 99; 100; GFX8-LABEL: constant_load_v2i1: 101; GFX8: ; %bb.0: 102; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 103; GFX8-NEXT: s_waitcnt lgkmcnt(0) 104; GFX8-NEXT: v_mov_b32_e32 v0, s2 105; GFX8-NEXT: v_mov_b32_e32 v1, s3 106; GFX8-NEXT: flat_load_ubyte v2, v[0:1] 107; GFX8-NEXT: v_mov_b32_e32 v0, s0 108; GFX8-NEXT: v_mov_b32_e32 v1, s1 109; GFX8-NEXT: s_waitcnt vmcnt(0) 110; GFX8-NEXT: flat_store_byte v[0:1], v2 111; GFX8-NEXT: s_endpgm 112; 113; EG-LABEL: constant_load_v2i1: 114; EG: ; %bb.0: 115; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 116; EG-NEXT: TEX 0 @6 117; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 118; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 119; EG-NEXT: CF_END 120; EG-NEXT: PAD 121; EG-NEXT: Fetch clause starting at 6: 122; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 123; EG-NEXT: ALU clause starting at 8: 124; EG-NEXT: MOV * T0.X, KC0[2].Z, 125; EG-NEXT: ALU clause starting at 9: 126; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 127; EG-NEXT: AND_INT * T1.W, T0.X, literal.x, 128; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 129; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 130; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 131; EG-NEXT: LSHL T0.X, T1.W, PV.W, 132; EG-NEXT: LSHL * T0.W, literal.x, PV.W, 133; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 134; EG-NEXT: MOV T0.Y, 0.0, 135; EG-NEXT: MOV * T0.Z, 0.0, 136; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 137; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 138; 139; GFX12-LABEL: constant_load_v2i1: 140; GFX12: ; %bb.0: 141; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 142; GFX12-NEXT: v_mov_b32_e32 v0, 0 143; GFX12-NEXT: s_wait_kmcnt 0x0 144; GFX12-NEXT: global_load_u8 v1, v0, s[2:3] 145; GFX12-NEXT: s_wait_loadcnt 0x0 146; GFX12-NEXT: global_store_b8 v0, v1, s[0:1] 147; GFX12-NEXT: s_endpgm 148 %load = load <2 x i1>, ptr addrspace(4) %in 149 store <2 x i1> %load, ptr addrspace(1) %out 150 ret void 151} 152 153define amdgpu_kernel void @constant_load_v3i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 154; GFX6-LABEL: constant_load_v3i1: 155; GFX6: ; %bb.0: 156; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 157; GFX6-NEXT: s_mov_b32 s7, 0xf000 158; GFX6-NEXT: s_mov_b32 s6, -1 159; GFX6-NEXT: s_mov_b32 s10, s6 160; GFX6-NEXT: s_mov_b32 s11, s7 161; GFX6-NEXT: s_waitcnt lgkmcnt(0) 162; GFX6-NEXT: s_mov_b32 s8, s2 163; GFX6-NEXT: s_mov_b32 s9, s3 164; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 165; GFX6-NEXT: s_mov_b32 s4, s0 166; GFX6-NEXT: s_mov_b32 s5, s1 167; GFX6-NEXT: s_waitcnt vmcnt(0) 168; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0 169; GFX6-NEXT: s_endpgm 170; 171; GFX8-LABEL: constant_load_v3i1: 172; GFX8: ; %bb.0: 173; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 174; GFX8-NEXT: s_waitcnt lgkmcnt(0) 175; GFX8-NEXT: v_mov_b32_e32 v0, s2 176; GFX8-NEXT: v_mov_b32_e32 v1, s3 177; GFX8-NEXT: flat_load_ubyte v2, v[0:1] 178; GFX8-NEXT: v_mov_b32_e32 v0, s0 179; GFX8-NEXT: v_mov_b32_e32 v1, s1 180; GFX8-NEXT: s_waitcnt vmcnt(0) 181; GFX8-NEXT: flat_store_byte v[0:1], v2 182; GFX8-NEXT: s_endpgm 183; 184; EG-LABEL: constant_load_v3i1: 185; EG: ; %bb.0: 186; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 187; EG-NEXT: TEX 0 @6 188; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[] 189; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 190; EG-NEXT: CF_END 191; EG-NEXT: PAD 192; EG-NEXT: Fetch clause starting at 6: 193; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 194; EG-NEXT: ALU clause starting at 8: 195; EG-NEXT: MOV * T0.X, KC0[2].Z, 196; EG-NEXT: ALU clause starting at 9: 197; EG-NEXT: AND_INT * T0.W, KC0[2].Y, literal.x, 198; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 199; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 200; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 201; EG-NEXT: LSHL T0.X, T0.X, PV.W, 202; EG-NEXT: LSHL * T0.W, literal.x, PV.W, 203; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 204; EG-NEXT: MOV T0.Y, 0.0, 205; EG-NEXT: MOV * T0.Z, 0.0, 206; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 207; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 208; 209; GFX12-LABEL: constant_load_v3i1: 210; GFX12: ; %bb.0: 211; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 212; GFX12-NEXT: v_mov_b32_e32 v0, 0 213; GFX12-NEXT: s_wait_kmcnt 0x0 214; GFX12-NEXT: global_load_u8 v1, v0, s[2:3] 215; GFX12-NEXT: s_wait_loadcnt 0x0 216; GFX12-NEXT: global_store_b8 v0, v1, s[0:1] 217; GFX12-NEXT: s_endpgm 218 %load = load <3 x i1>, ptr addrspace(4) %in 219 store <3 x i1> %load, ptr addrspace(1) %out 220 ret void 221} 222 223define amdgpu_kernel void @constant_load_v4i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 224; GFX6-LABEL: constant_load_v4i1: 225; GFX6: ; %bb.0: 226; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 227; GFX6-NEXT: s_mov_b32 s7, 0xf000 228; GFX6-NEXT: s_mov_b32 s6, -1 229; GFX6-NEXT: s_mov_b32 s10, s6 230; GFX6-NEXT: s_mov_b32 s11, s7 231; GFX6-NEXT: s_waitcnt lgkmcnt(0) 232; GFX6-NEXT: s_mov_b32 s8, s2 233; GFX6-NEXT: s_mov_b32 s9, s3 234; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 235; GFX6-NEXT: s_mov_b32 s4, s0 236; GFX6-NEXT: s_mov_b32 s5, s1 237; GFX6-NEXT: s_waitcnt vmcnt(0) 238; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0 239; GFX6-NEXT: s_endpgm 240; 241; GFX8-LABEL: constant_load_v4i1: 242; GFX8: ; %bb.0: 243; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 244; GFX8-NEXT: s_waitcnt lgkmcnt(0) 245; GFX8-NEXT: v_mov_b32_e32 v0, s2 246; GFX8-NEXT: v_mov_b32_e32 v1, s3 247; GFX8-NEXT: flat_load_ubyte v2, v[0:1] 248; GFX8-NEXT: v_mov_b32_e32 v0, s0 249; GFX8-NEXT: v_mov_b32_e32 v1, s1 250; GFX8-NEXT: s_waitcnt vmcnt(0) 251; GFX8-NEXT: flat_store_byte v[0:1], v2 252; GFX8-NEXT: s_endpgm 253; 254; EG-LABEL: constant_load_v4i1: 255; EG: ; %bb.0: 256; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 257; EG-NEXT: TEX 0 @6 258; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 259; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 260; EG-NEXT: CF_END 261; EG-NEXT: PAD 262; EG-NEXT: Fetch clause starting at 6: 263; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 264; EG-NEXT: ALU clause starting at 8: 265; EG-NEXT: MOV * T0.X, KC0[2].Z, 266; EG-NEXT: ALU clause starting at 9: 267; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 268; EG-NEXT: AND_INT * T1.W, T0.X, literal.y, 269; EG-NEXT: 3(4.203895e-45), 15(2.101948e-44) 270; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 271; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 272; EG-NEXT: LSHL T0.X, T1.W, PV.W, 273; EG-NEXT: LSHL * T0.W, literal.x, PV.W, 274; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 275; EG-NEXT: MOV T0.Y, 0.0, 276; EG-NEXT: MOV * T0.Z, 0.0, 277; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 278; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 279; 280; GFX12-LABEL: constant_load_v4i1: 281; GFX12: ; %bb.0: 282; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 283; GFX12-NEXT: v_mov_b32_e32 v0, 0 284; GFX12-NEXT: s_wait_kmcnt 0x0 285; GFX12-NEXT: global_load_u8 v1, v0, s[2:3] 286; GFX12-NEXT: s_wait_loadcnt 0x0 287; GFX12-NEXT: global_store_b8 v0, v1, s[0:1] 288; GFX12-NEXT: s_endpgm 289 %load = load <4 x i1>, ptr addrspace(4) %in 290 store <4 x i1> %load, ptr addrspace(1) %out 291 ret void 292} 293 294define amdgpu_kernel void @constant_load_v8i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 295; GFX6-LABEL: constant_load_v8i1: 296; GFX6: ; %bb.0: 297; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 298; GFX6-NEXT: s_mov_b32 s7, 0xf000 299; GFX6-NEXT: s_mov_b32 s6, -1 300; GFX6-NEXT: s_mov_b32 s10, s6 301; GFX6-NEXT: s_mov_b32 s11, s7 302; GFX6-NEXT: s_waitcnt lgkmcnt(0) 303; GFX6-NEXT: s_mov_b32 s8, s2 304; GFX6-NEXT: s_mov_b32 s9, s3 305; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 306; GFX6-NEXT: s_mov_b32 s4, s0 307; GFX6-NEXT: s_mov_b32 s5, s1 308; GFX6-NEXT: s_waitcnt vmcnt(0) 309; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0 310; GFX6-NEXT: s_endpgm 311; 312; GFX8-LABEL: constant_load_v8i1: 313; GFX8: ; %bb.0: 314; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 315; GFX8-NEXT: s_waitcnt lgkmcnt(0) 316; GFX8-NEXT: v_mov_b32_e32 v0, s2 317; GFX8-NEXT: v_mov_b32_e32 v1, s3 318; GFX8-NEXT: flat_load_ubyte v2, v[0:1] 319; GFX8-NEXT: v_mov_b32_e32 v0, s0 320; GFX8-NEXT: v_mov_b32_e32 v1, s1 321; GFX8-NEXT: s_waitcnt vmcnt(0) 322; GFX8-NEXT: flat_store_byte v[0:1], v2 323; GFX8-NEXT: s_endpgm 324; 325; EG-LABEL: constant_load_v8i1: 326; EG: ; %bb.0: 327; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 328; EG-NEXT: TEX 0 @6 329; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 330; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 331; EG-NEXT: CF_END 332; EG-NEXT: PAD 333; EG-NEXT: Fetch clause starting at 6: 334; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 335; EG-NEXT: ALU clause starting at 8: 336; EG-NEXT: MOV * T0.X, KC0[2].Z, 337; EG-NEXT: ALU clause starting at 9: 338; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 339; EG-NEXT: AND_INT * T1.W, T0.X, literal.y, 340; EG-NEXT: 3(4.203895e-45), 255(3.573311e-43) 341; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 342; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 343; EG-NEXT: LSHL T0.X, T1.W, PV.W, 344; EG-NEXT: LSHL * T0.W, literal.x, PV.W, 345; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 346; EG-NEXT: MOV T0.Y, 0.0, 347; EG-NEXT: MOV * T0.Z, 0.0, 348; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 349; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 350; 351; GFX12-LABEL: constant_load_v8i1: 352; GFX12: ; %bb.0: 353; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 354; GFX12-NEXT: v_mov_b32_e32 v0, 0 355; GFX12-NEXT: s_wait_kmcnt 0x0 356; GFX12-NEXT: global_load_u8 v1, v0, s[2:3] 357; GFX12-NEXT: s_wait_loadcnt 0x0 358; GFX12-NEXT: global_store_b8 v0, v1, s[0:1] 359; GFX12-NEXT: s_endpgm 360 %load = load <8 x i1>, ptr addrspace(4) %in 361 store <8 x i1> %load, ptr addrspace(1) %out 362 ret void 363} 364 365define amdgpu_kernel void @constant_load_v16i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 366; GFX6-LABEL: constant_load_v16i1: 367; GFX6: ; %bb.0: 368; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 369; GFX6-NEXT: s_mov_b32 s7, 0xf000 370; GFX6-NEXT: s_mov_b32 s6, -1 371; GFX6-NEXT: s_mov_b32 s10, s6 372; GFX6-NEXT: s_mov_b32 s11, s7 373; GFX6-NEXT: s_waitcnt lgkmcnt(0) 374; GFX6-NEXT: s_mov_b32 s8, s2 375; GFX6-NEXT: s_mov_b32 s9, s3 376; GFX6-NEXT: buffer_load_ushort v0, off, s[8:11], 0 377; GFX6-NEXT: s_mov_b32 s4, s0 378; GFX6-NEXT: s_mov_b32 s5, s1 379; GFX6-NEXT: s_waitcnt vmcnt(0) 380; GFX6-NEXT: buffer_store_short v0, off, s[4:7], 0 381; GFX6-NEXT: s_endpgm 382; 383; GFX8-LABEL: constant_load_v16i1: 384; GFX8: ; %bb.0: 385; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 386; GFX8-NEXT: s_waitcnt lgkmcnt(0) 387; GFX8-NEXT: v_mov_b32_e32 v0, s2 388; GFX8-NEXT: v_mov_b32_e32 v1, s3 389; GFX8-NEXT: flat_load_ushort v2, v[0:1] 390; GFX8-NEXT: v_mov_b32_e32 v0, s0 391; GFX8-NEXT: v_mov_b32_e32 v1, s1 392; GFX8-NEXT: s_waitcnt vmcnt(0) 393; GFX8-NEXT: flat_store_short v[0:1], v2 394; GFX8-NEXT: s_endpgm 395; 396; EG-LABEL: constant_load_v16i1: 397; EG: ; %bb.0: 398; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 399; EG-NEXT: TEX 0 @6 400; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 401; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 402; EG-NEXT: CF_END 403; EG-NEXT: PAD 404; EG-NEXT: Fetch clause starting at 6: 405; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 406; EG-NEXT: ALU clause starting at 8: 407; EG-NEXT: MOV * T0.X, KC0[2].Z, 408; EG-NEXT: ALU clause starting at 9: 409; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 410; EG-NEXT: AND_INT * T1.W, T0.X, literal.y, 411; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41) 412; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 413; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 414; EG-NEXT: LSHL T0.X, T1.W, PV.W, 415; EG-NEXT: LSHL * T0.W, literal.x, PV.W, 416; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 417; EG-NEXT: MOV T0.Y, 0.0, 418; EG-NEXT: MOV * T0.Z, 0.0, 419; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 420; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 421; 422; GFX12-LABEL: constant_load_v16i1: 423; GFX12: ; %bb.0: 424; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 425; GFX12-NEXT: v_mov_b32_e32 v0, 0 426; GFX12-NEXT: s_wait_kmcnt 0x0 427; GFX12-NEXT: global_load_u16 v1, v0, s[2:3] 428; GFX12-NEXT: s_wait_loadcnt 0x0 429; GFX12-NEXT: global_store_b16 v0, v1, s[0:1] 430; GFX12-NEXT: s_endpgm 431 %load = load <16 x i1>, ptr addrspace(4) %in 432 store <16 x i1> %load, ptr addrspace(1) %out 433 ret void 434} 435 436define amdgpu_kernel void @constant_load_v32i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 437; GFX6-LABEL: constant_load_v32i1: 438; GFX6: ; %bb.0: 439; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 440; GFX6-NEXT: s_waitcnt lgkmcnt(0) 441; GFX6-NEXT: s_load_dword s4, s[2:3], 0x0 442; GFX6-NEXT: s_mov_b32 s3, 0xf000 443; GFX6-NEXT: s_mov_b32 s2, -1 444; GFX6-NEXT: s_waitcnt lgkmcnt(0) 445; GFX6-NEXT: v_mov_b32_e32 v0, s4 446; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 447; GFX6-NEXT: s_endpgm 448; 449; GFX8-LABEL: constant_load_v32i1: 450; GFX8: ; %bb.0: 451; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 452; GFX8-NEXT: s_waitcnt lgkmcnt(0) 453; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0 454; GFX8-NEXT: v_mov_b32_e32 v0, s0 455; GFX8-NEXT: v_mov_b32_e32 v1, s1 456; GFX8-NEXT: s_waitcnt lgkmcnt(0) 457; GFX8-NEXT: v_mov_b32_e32 v2, s2 458; GFX8-NEXT: flat_store_dword v[0:1], v2 459; GFX8-NEXT: s_endpgm 460; 461; EG-LABEL: constant_load_v32i1: 462; EG: ; %bb.0: 463; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 464; EG-NEXT: TEX 0 @6 465; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 466; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 467; EG-NEXT: CF_END 468; EG-NEXT: PAD 469; EG-NEXT: Fetch clause starting at 6: 470; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 471; EG-NEXT: ALU clause starting at 8: 472; EG-NEXT: MOV * T0.X, KC0[2].Z, 473; EG-NEXT: ALU clause starting at 9: 474; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 475; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 476; 477; GFX12-LABEL: constant_load_v32i1: 478; GFX12: ; %bb.0: 479; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 480; GFX12-NEXT: s_wait_kmcnt 0x0 481; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0 482; GFX12-NEXT: s_wait_kmcnt 0x0 483; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 484; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] 485; GFX12-NEXT: s_endpgm 486 %load = load <32 x i1>, ptr addrspace(4) %in 487 store <32 x i1> %load, ptr addrspace(1) %out 488 ret void 489} 490 491define amdgpu_kernel void @constant_load_v64i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 492; GFX6-LABEL: constant_load_v64i1: 493; GFX6: ; %bb.0: 494; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 495; GFX6-NEXT: s_waitcnt lgkmcnt(0) 496; GFX6-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 497; GFX6-NEXT: s_mov_b32 s3, 0xf000 498; GFX6-NEXT: s_mov_b32 s2, -1 499; GFX6-NEXT: s_waitcnt lgkmcnt(0) 500; GFX6-NEXT: v_mov_b32_e32 v0, s4 501; GFX6-NEXT: v_mov_b32_e32 v1, s5 502; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 503; GFX6-NEXT: s_endpgm 504; 505; GFX8-LABEL: constant_load_v64i1: 506; GFX8: ; %bb.0: 507; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 508; GFX8-NEXT: s_waitcnt lgkmcnt(0) 509; GFX8-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 510; GFX8-NEXT: v_mov_b32_e32 v0, s0 511; GFX8-NEXT: v_mov_b32_e32 v1, s1 512; GFX8-NEXT: s_waitcnt lgkmcnt(0) 513; GFX8-NEXT: v_mov_b32_e32 v2, s2 514; GFX8-NEXT: v_mov_b32_e32 v3, s3 515; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 516; GFX8-NEXT: s_endpgm 517; 518; EG-LABEL: constant_load_v64i1: 519; EG: ; %bb.0: 520; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 521; EG-NEXT: TEX 0 @6 522; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 523; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 524; EG-NEXT: CF_END 525; EG-NEXT: PAD 526; EG-NEXT: Fetch clause starting at 6: 527; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 528; EG-NEXT: ALU clause starting at 8: 529; EG-NEXT: MOV * T0.X, KC0[2].Z, 530; EG-NEXT: ALU clause starting at 9: 531; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 532; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 533; 534; GFX12-LABEL: constant_load_v64i1: 535; GFX12: ; %bb.0: 536; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 537; GFX12-NEXT: s_wait_kmcnt 0x0 538; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 539; GFX12-NEXT: v_mov_b32_e32 v2, 0 540; GFX12-NEXT: s_wait_kmcnt 0x0 541; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 542; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] 543; GFX12-NEXT: s_endpgm 544 %load = load <64 x i1>, ptr addrspace(4) %in 545 store <64 x i1> %load, ptr addrspace(1) %out 546 ret void 547} 548 549define amdgpu_kernel void @constant_zextload_i1_to_i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 550; GFX6-LABEL: constant_zextload_i1_to_i32: 551; GFX6: ; %bb.0: 552; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 553; GFX6-NEXT: s_mov_b32 s7, 0xf000 554; GFX6-NEXT: s_mov_b32 s6, -1 555; GFX6-NEXT: s_mov_b32 s10, s6 556; GFX6-NEXT: s_mov_b32 s11, s7 557; GFX6-NEXT: s_waitcnt lgkmcnt(0) 558; GFX6-NEXT: s_mov_b32 s8, s2 559; GFX6-NEXT: s_mov_b32 s9, s3 560; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 561; GFX6-NEXT: s_mov_b32 s4, s0 562; GFX6-NEXT: s_mov_b32 s5, s1 563; GFX6-NEXT: s_waitcnt vmcnt(0) 564; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 565; GFX6-NEXT: s_endpgm 566; 567; GFX8-LABEL: constant_zextload_i1_to_i32: 568; GFX8: ; %bb.0: 569; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 570; GFX8-NEXT: s_waitcnt lgkmcnt(0) 571; GFX8-NEXT: v_mov_b32_e32 v0, s2 572; GFX8-NEXT: v_mov_b32_e32 v1, s3 573; GFX8-NEXT: flat_load_ubyte v2, v[0:1] 574; GFX8-NEXT: v_mov_b32_e32 v0, s0 575; GFX8-NEXT: v_mov_b32_e32 v1, s1 576; GFX8-NEXT: s_waitcnt vmcnt(0) 577; GFX8-NEXT: flat_store_dword v[0:1], v2 578; GFX8-NEXT: s_endpgm 579; 580; EG-LABEL: constant_zextload_i1_to_i32: 581; EG: ; %bb.0: 582; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 583; EG-NEXT: TEX 0 @6 584; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 585; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 586; EG-NEXT: CF_END 587; EG-NEXT: PAD 588; EG-NEXT: Fetch clause starting at 6: 589; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 590; EG-NEXT: ALU clause starting at 8: 591; EG-NEXT: MOV * T0.X, KC0[2].Z, 592; EG-NEXT: ALU clause starting at 9: 593; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 594; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 595; 596; GFX12-LABEL: constant_zextload_i1_to_i32: 597; GFX12: ; %bb.0: 598; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 599; GFX12-NEXT: s_wait_kmcnt 0x0 600; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0 601; GFX12-NEXT: s_wait_kmcnt 0x0 602; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 603; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] 604; GFX12-NEXT: s_endpgm 605 %a = load i1, ptr addrspace(4) %in 606 %ext = zext i1 %a to i32 607 store i32 %ext, ptr addrspace(1) %out 608 ret void 609} 610 611define amdgpu_kernel void @constant_sextload_i1_to_i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 612; GFX6-LABEL: constant_sextload_i1_to_i32: 613; GFX6: ; %bb.0: 614; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 615; GFX6-NEXT: s_mov_b32 s7, 0xf000 616; GFX6-NEXT: s_mov_b32 s6, -1 617; GFX6-NEXT: s_mov_b32 s10, s6 618; GFX6-NEXT: s_mov_b32 s11, s7 619; GFX6-NEXT: s_waitcnt lgkmcnt(0) 620; GFX6-NEXT: s_mov_b32 s8, s2 621; GFX6-NEXT: s_mov_b32 s9, s3 622; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 623; GFX6-NEXT: s_mov_b32 s4, s0 624; GFX6-NEXT: s_mov_b32 s5, s1 625; GFX6-NEXT: s_waitcnt vmcnt(0) 626; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1 627; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 628; GFX6-NEXT: s_endpgm 629; 630; GFX8-LABEL: constant_sextload_i1_to_i32: 631; GFX8: ; %bb.0: 632; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 633; GFX8-NEXT: s_waitcnt lgkmcnt(0) 634; GFX8-NEXT: v_mov_b32_e32 v0, s2 635; GFX8-NEXT: v_mov_b32_e32 v1, s3 636; GFX8-NEXT: flat_load_ubyte v2, v[0:1] 637; GFX8-NEXT: v_mov_b32_e32 v0, s0 638; GFX8-NEXT: v_mov_b32_e32 v1, s1 639; GFX8-NEXT: s_waitcnt vmcnt(0) 640; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1 641; GFX8-NEXT: flat_store_dword v[0:1], v2 642; GFX8-NEXT: s_endpgm 643; 644; EG-LABEL: constant_sextload_i1_to_i32: 645; EG: ; %bb.0: 646; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 647; EG-NEXT: TEX 0 @6 648; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 649; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 650; EG-NEXT: CF_END 651; EG-NEXT: PAD 652; EG-NEXT: Fetch clause starting at 6: 653; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 654; EG-NEXT: ALU clause starting at 8: 655; EG-NEXT: MOV * T0.X, KC0[2].Z, 656; EG-NEXT: ALU clause starting at 9: 657; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, 1, 658; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 659; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 660; 661; GFX12-LABEL: constant_sextload_i1_to_i32: 662; GFX12: ; %bb.0: 663; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 664; GFX12-NEXT: s_wait_kmcnt 0x0 665; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0 666; GFX12-NEXT: s_wait_kmcnt 0x0 667; GFX12-NEXT: s_bfe_i32 s2, s2, 0x10000 668; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 669; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 670; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] 671; GFX12-NEXT: s_endpgm 672 %a = load i1, ptr addrspace(4) %in 673 %ext = sext i1 %a to i32 674 store i32 %ext, ptr addrspace(1) %out 675 ret void 676} 677 678define amdgpu_kernel void @constant_zextload_v1i1_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 679; GFX6-LABEL: constant_zextload_v1i1_to_v1i32: 680; GFX6: ; %bb.0: 681; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 682; GFX6-NEXT: s_mov_b32 s7, 0xf000 683; GFX6-NEXT: s_mov_b32 s6, -1 684; GFX6-NEXT: s_mov_b32 s10, s6 685; GFX6-NEXT: s_mov_b32 s11, s7 686; GFX6-NEXT: s_waitcnt lgkmcnt(0) 687; GFX6-NEXT: s_mov_b32 s8, s2 688; GFX6-NEXT: s_mov_b32 s9, s3 689; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 690; GFX6-NEXT: s_mov_b32 s4, s0 691; GFX6-NEXT: s_mov_b32 s5, s1 692; GFX6-NEXT: s_waitcnt vmcnt(0) 693; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 694; GFX6-NEXT: s_endpgm 695; 696; GFX8-LABEL: constant_zextload_v1i1_to_v1i32: 697; GFX8: ; %bb.0: 698; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 699; GFX8-NEXT: s_waitcnt lgkmcnt(0) 700; GFX8-NEXT: v_mov_b32_e32 v0, s2 701; GFX8-NEXT: v_mov_b32_e32 v1, s3 702; GFX8-NEXT: flat_load_ubyte v2, v[0:1] 703; GFX8-NEXT: v_mov_b32_e32 v0, s0 704; GFX8-NEXT: v_mov_b32_e32 v1, s1 705; GFX8-NEXT: s_waitcnt vmcnt(0) 706; GFX8-NEXT: flat_store_dword v[0:1], v2 707; GFX8-NEXT: s_endpgm 708; 709; EG-LABEL: constant_zextload_v1i1_to_v1i32: 710; EG: ; %bb.0: 711; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 712; EG-NEXT: TEX 0 @6 713; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 714; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 715; EG-NEXT: CF_END 716; EG-NEXT: PAD 717; EG-NEXT: Fetch clause starting at 6: 718; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 719; EG-NEXT: ALU clause starting at 8: 720; EG-NEXT: MOV * T0.X, KC0[2].Z, 721; EG-NEXT: ALU clause starting at 9: 722; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 723; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 724; 725; GFX12-LABEL: constant_zextload_v1i1_to_v1i32: 726; GFX12: ; %bb.0: 727; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 728; GFX12-NEXT: s_wait_kmcnt 0x0 729; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0 730; GFX12-NEXT: s_wait_kmcnt 0x0 731; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 732; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] 733; GFX12-NEXT: s_endpgm 734 %load = load <1 x i1>, ptr addrspace(4) %in 735 %ext = zext <1 x i1> %load to <1 x i32> 736 store <1 x i32> %ext, ptr addrspace(1) %out 737 ret void 738} 739 740define amdgpu_kernel void @constant_sextload_v1i1_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 741; GFX6-LABEL: constant_sextload_v1i1_to_v1i32: 742; GFX6: ; %bb.0: 743; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 744; GFX6-NEXT: s_mov_b32 s7, 0xf000 745; GFX6-NEXT: s_mov_b32 s6, -1 746; GFX6-NEXT: s_mov_b32 s10, s6 747; GFX6-NEXT: s_mov_b32 s11, s7 748; GFX6-NEXT: s_waitcnt lgkmcnt(0) 749; GFX6-NEXT: s_mov_b32 s8, s2 750; GFX6-NEXT: s_mov_b32 s9, s3 751; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 752; GFX6-NEXT: s_mov_b32 s4, s0 753; GFX6-NEXT: s_mov_b32 s5, s1 754; GFX6-NEXT: s_waitcnt vmcnt(0) 755; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1 756; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 757; GFX6-NEXT: s_endpgm 758; 759; GFX8-LABEL: constant_sextload_v1i1_to_v1i32: 760; GFX8: ; %bb.0: 761; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 762; GFX8-NEXT: s_waitcnt lgkmcnt(0) 763; GFX8-NEXT: v_mov_b32_e32 v0, s2 764; GFX8-NEXT: v_mov_b32_e32 v1, s3 765; GFX8-NEXT: flat_load_ubyte v2, v[0:1] 766; GFX8-NEXT: v_mov_b32_e32 v0, s0 767; GFX8-NEXT: v_mov_b32_e32 v1, s1 768; GFX8-NEXT: s_waitcnt vmcnt(0) 769; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1 770; GFX8-NEXT: flat_store_dword v[0:1], v2 771; GFX8-NEXT: s_endpgm 772; 773; EG-LABEL: constant_sextload_v1i1_to_v1i32: 774; EG: ; %bb.0: 775; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 776; EG-NEXT: TEX 0 @6 777; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 778; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 779; EG-NEXT: CF_END 780; EG-NEXT: PAD 781; EG-NEXT: Fetch clause starting at 6: 782; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 783; EG-NEXT: ALU clause starting at 8: 784; EG-NEXT: MOV * T0.X, KC0[2].Z, 785; EG-NEXT: ALU clause starting at 9: 786; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, 1, 787; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 788; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 789; 790; GFX12-LABEL: constant_sextload_v1i1_to_v1i32: 791; GFX12: ; %bb.0: 792; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 793; GFX12-NEXT: s_wait_kmcnt 0x0 794; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0 795; GFX12-NEXT: s_wait_kmcnt 0x0 796; GFX12-NEXT: s_bfe_i32 s2, s2, 0x10000 797; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 798; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 799; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] 800; GFX12-NEXT: s_endpgm 801 %load = load <1 x i1>, ptr addrspace(4) %in 802 %ext = sext <1 x i1> %load to <1 x i32> 803 store <1 x i32> %ext, ptr addrspace(1) %out 804 ret void 805} 806 807define amdgpu_kernel void @constant_zextload_v2i1_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 808; GFX6-LABEL: constant_zextload_v2i1_to_v2i32: 809; GFX6: ; %bb.0: 810; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 811; GFX6-NEXT: s_mov_b32 s7, 0xf000 812; GFX6-NEXT: s_mov_b32 s6, -1 813; GFX6-NEXT: s_mov_b32 s10, s6 814; GFX6-NEXT: s_mov_b32 s11, s7 815; GFX6-NEXT: s_waitcnt lgkmcnt(0) 816; GFX6-NEXT: s_mov_b32 s8, s2 817; GFX6-NEXT: s_mov_b32 s9, s3 818; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 819; GFX6-NEXT: s_mov_b32 s4, s0 820; GFX6-NEXT: s_mov_b32 s5, s1 821; GFX6-NEXT: s_waitcnt vmcnt(0) 822; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v0 823; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 824; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 825; GFX6-NEXT: s_endpgm 826; 827; GFX8-LABEL: constant_zextload_v2i1_to_v2i32: 828; GFX8: ; %bb.0: 829; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 830; GFX8-NEXT: v_mov_b32_e32 v3, 1 831; GFX8-NEXT: s_waitcnt lgkmcnt(0) 832; GFX8-NEXT: v_mov_b32_e32 v0, s2 833; GFX8-NEXT: v_mov_b32_e32 v1, s3 834; GFX8-NEXT: flat_load_ubyte v2, v[0:1] 835; GFX8-NEXT: v_mov_b32_e32 v0, s0 836; GFX8-NEXT: v_mov_b32_e32 v1, s1 837; GFX8-NEXT: s_waitcnt vmcnt(0) 838; GFX8-NEXT: v_lshrrev_b32_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 839; GFX8-NEXT: v_and_b32_e32 v2, 1, v2 840; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 841; GFX8-NEXT: s_endpgm 842; 843; EG-LABEL: constant_zextload_v2i1_to_v2i32: 844; EG: ; %bb.0: 845; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 846; EG-NEXT: TEX 0 @6 847; EG-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[] 848; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 849; EG-NEXT: CF_END 850; EG-NEXT: PAD 851; EG-NEXT: Fetch clause starting at 6: 852; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 853; EG-NEXT: ALU clause starting at 8: 854; EG-NEXT: MOV * T0.X, KC0[2].Z, 855; EG-NEXT: ALU clause starting at 9: 856; EG-NEXT: BFE_UINT * T0.Y, T0.X, 1, 1, 857; EG-NEXT: AND_INT T0.X, T0.X, 1, 858; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 859; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 860; 861; GFX12-LABEL: constant_zextload_v2i1_to_v2i32: 862; GFX12: ; %bb.0: 863; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 864; GFX12-NEXT: v_mov_b32_e32 v2, 0 865; GFX12-NEXT: s_wait_kmcnt 0x0 866; GFX12-NEXT: global_load_u8 v0, v2, s[2:3] 867; GFX12-NEXT: s_wait_loadcnt 0x0 868; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v0 869; GFX12-NEXT: v_and_b32_e32 v0, 1, v0 870; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 871; GFX12-NEXT: v_lshrrev_b32_e32 v1, 1, v1 872; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] 873; GFX12-NEXT: s_endpgm 874 %load = load <2 x i1>, ptr addrspace(4) %in 875 %ext = zext <2 x i1> %load to <2 x i32> 876 store <2 x i32> %ext, ptr addrspace(1) %out 877 ret void 878} 879 880define amdgpu_kernel void @constant_sextload_v2i1_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 881; GFX6-LABEL: constant_sextload_v2i1_to_v2i32: 882; GFX6: ; %bb.0: 883; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 884; GFX6-NEXT: s_mov_b32 s7, 0xf000 885; GFX6-NEXT: s_mov_b32 s6, -1 886; GFX6-NEXT: s_mov_b32 s10, s6 887; GFX6-NEXT: s_mov_b32 s11, s7 888; GFX6-NEXT: s_waitcnt lgkmcnt(0) 889; GFX6-NEXT: s_mov_b32 s8, s2 890; GFX6-NEXT: s_mov_b32 s9, s3 891; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 892; GFX6-NEXT: s_mov_b32 s4, s0 893; GFX6-NEXT: s_mov_b32 s5, s1 894; GFX6-NEXT: s_waitcnt vmcnt(0) 895; GFX6-NEXT: v_bfe_i32 v1, v0, 1, 1 896; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1 897; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 898; GFX6-NEXT: s_endpgm 899; 900; GFX8-LABEL: constant_sextload_v2i1_to_v2i32: 901; GFX8: ; %bb.0: 902; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 903; GFX8-NEXT: s_waitcnt lgkmcnt(0) 904; GFX8-NEXT: v_mov_b32_e32 v0, s2 905; GFX8-NEXT: v_mov_b32_e32 v1, s3 906; GFX8-NEXT: flat_load_ubyte v2, v[0:1] 907; GFX8-NEXT: v_mov_b32_e32 v0, s0 908; GFX8-NEXT: v_mov_b32_e32 v1, s1 909; GFX8-NEXT: s_waitcnt vmcnt(0) 910; GFX8-NEXT: v_bfe_i32 v3, v2, 1, 1 911; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1 912; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 913; GFX8-NEXT: s_endpgm 914; 915; EG-LABEL: constant_sextload_v2i1_to_v2i32: 916; EG: ; %bb.0: 917; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 918; EG-NEXT: TEX 0 @6 919; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 920; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1 921; EG-NEXT: CF_END 922; EG-NEXT: PAD 923; EG-NEXT: Fetch clause starting at 6: 924; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 925; EG-NEXT: ALU clause starting at 8: 926; EG-NEXT: MOV * T0.X, KC0[2].Z, 927; EG-NEXT: ALU clause starting at 9: 928; EG-NEXT: BFE_INT T1.X, T0.X, 0.0, 1, 929; EG-NEXT: LSHR T0.W, T0.X, 1, 930; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, 931; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 932; EG-NEXT: BFE_INT * T1.Y, PV.W, 0.0, 1, 933; 934; GFX12-LABEL: constant_sextload_v2i1_to_v2i32: 935; GFX12: ; %bb.0: 936; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 937; GFX12-NEXT: s_wait_kmcnt 0x0 938; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0 939; GFX12-NEXT: s_wait_kmcnt 0x0 940; GFX12-NEXT: s_bfe_i32 s3, s2, 0x10000 941; GFX12-NEXT: s_bfe_i32 s2, s2, 0x10001 942; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 943; GFX12-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2 944; GFX12-NEXT: v_mov_b32_e32 v0, s3 945; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] 946; GFX12-NEXT: s_endpgm 947 %load = load <2 x i1>, ptr addrspace(4) %in 948 %ext = sext <2 x i1> %load to <2 x i32> 949 store <2 x i32> %ext, ptr addrspace(1) %out 950 ret void 951} 952 953define amdgpu_kernel void @constant_zextload_v3i1_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 954; GFX6-LABEL: constant_zextload_v3i1_to_v3i32: 955; GFX6: ; %bb.0: 956; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 957; GFX6-NEXT: s_mov_b32 s7, 0xf000 958; GFX6-NEXT: s_mov_b32 s6, -1 959; GFX6-NEXT: s_mov_b32 s10, s6 960; GFX6-NEXT: s_mov_b32 s11, s7 961; GFX6-NEXT: s_waitcnt lgkmcnt(0) 962; GFX6-NEXT: s_mov_b32 s8, s2 963; GFX6-NEXT: s_mov_b32 s9, s3 964; GFX6-NEXT: buffer_load_ubyte v2, off, s[8:11], 0 965; GFX6-NEXT: s_mov_b32 s4, s0 966; GFX6-NEXT: s_mov_b32 s5, s1 967; GFX6-NEXT: s_waitcnt vmcnt(0) 968; GFX6-NEXT: v_and_b32_e32 v0, 1, v2 969; GFX6-NEXT: v_bfe_u32 v1, v2, 1, 1 970; GFX6-NEXT: v_lshrrev_b32_e32 v2, 2, v2 971; GFX6-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 972; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 973; GFX6-NEXT: s_endpgm 974; 975; GFX8-LABEL: constant_zextload_v3i1_to_v3i32: 976; GFX8: ; %bb.0: 977; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 978; GFX8-NEXT: s_waitcnt lgkmcnt(0) 979; GFX8-NEXT: v_mov_b32_e32 v0, s2 980; GFX8-NEXT: v_mov_b32_e32 v1, s3 981; GFX8-NEXT: flat_load_ubyte v1, v[0:1] 982; GFX8-NEXT: v_mov_b32_e32 v0, 2 983; GFX8-NEXT: v_mov_b32_e32 v3, s0 984; GFX8-NEXT: v_mov_b32_e32 v4, s1 985; GFX8-NEXT: s_waitcnt vmcnt(0) 986; GFX8-NEXT: v_lshrrev_b32_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 987; GFX8-NEXT: v_and_b32_e32 v0, 1, v1 988; GFX8-NEXT: v_bfe_u32 v1, v1, 1, 1 989; GFX8-NEXT: flat_store_dwordx3 v[3:4], v[0:2] 990; GFX8-NEXT: s_endpgm 991; 992; EG-LABEL: constant_zextload_v3i1_to_v3i32: 993; EG: ; %bb.0: 994; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 995; EG-NEXT: TEX 0 @6 996; EG-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[] 997; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T3.X, 0 998; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T2.X, 1 999; EG-NEXT: CF_END 1000; EG-NEXT: Fetch clause starting at 6: 1001; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 1002; EG-NEXT: ALU clause starting at 8: 1003; EG-NEXT: MOV * T0.X, KC0[2].Z, 1004; EG-NEXT: ALU clause starting at 9: 1005; EG-NEXT: BFE_UINT * T1.Y, T0.X, 1, 1, 1006; EG-NEXT: AND_INT T1.X, T0.X, 1, 1007; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, 1008; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1009; EG-NEXT: LSHR T0.X, T0.X, literal.x, 1010; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1011; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44) 1012; EG-NEXT: LSHR * T3.X, PV.W, literal.x, 1013; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1014; 1015; GFX12-LABEL: constant_zextload_v3i1_to_v3i32: 1016; GFX12: ; %bb.0: 1017; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1018; GFX12-NEXT: v_mov_b32_e32 v3, 0 1019; GFX12-NEXT: s_wait_kmcnt 0x0 1020; GFX12-NEXT: global_load_u8 v1, v3, s[2:3] 1021; GFX12-NEXT: s_wait_loadcnt 0x0 1022; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v1 1023; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) 1024; GFX12-NEXT: v_lshrrev_b32_e32 v2, 2, v0 1025; GFX12-NEXT: v_and_b32_e32 v0, 1, v1 1026; GFX12-NEXT: v_bfe_u32 v1, v1, 1, 1 1027; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v2 1028; GFX12-NEXT: global_store_b96 v3, v[0:2], s[0:1] 1029; GFX12-NEXT: s_endpgm 1030 %load = load <3 x i1>, ptr addrspace(4) %in 1031 %ext = zext <3 x i1> %load to <3 x i32> 1032 store <3 x i32> %ext, ptr addrspace(1) %out 1033 ret void 1034} 1035 1036define amdgpu_kernel void @constant_sextload_v3i1_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 1037; GFX6-LABEL: constant_sextload_v3i1_to_v3i32: 1038; GFX6: ; %bb.0: 1039; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1040; GFX6-NEXT: s_mov_b32 s7, 0xf000 1041; GFX6-NEXT: s_mov_b32 s6, -1 1042; GFX6-NEXT: s_mov_b32 s10, s6 1043; GFX6-NEXT: s_mov_b32 s11, s7 1044; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1045; GFX6-NEXT: s_mov_b32 s8, s2 1046; GFX6-NEXT: s_mov_b32 s9, s3 1047; GFX6-NEXT: buffer_load_ubyte v2, off, s[8:11], 0 1048; GFX6-NEXT: s_mov_b32 s4, s0 1049; GFX6-NEXT: s_mov_b32 s5, s1 1050; GFX6-NEXT: s_waitcnt vmcnt(0) 1051; GFX6-NEXT: v_bfe_i32 v1, v2, 1, 1 1052; GFX6-NEXT: v_bfe_i32 v0, v2, 0, 1 1053; GFX6-NEXT: v_bfe_i32 v2, v2, 2, 1 1054; GFX6-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 1055; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1056; GFX6-NEXT: s_endpgm 1057; 1058; GFX8-LABEL: constant_sextload_v3i1_to_v3i32: 1059; GFX8: ; %bb.0: 1060; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1061; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1062; GFX8-NEXT: v_mov_b32_e32 v0, s2 1063; GFX8-NEXT: v_mov_b32_e32 v1, s3 1064; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1065; GFX8-NEXT: v_mov_b32_e32 v3, s0 1066; GFX8-NEXT: v_mov_b32_e32 v4, s1 1067; GFX8-NEXT: s_waitcnt vmcnt(0) 1068; GFX8-NEXT: v_bfe_i32 v2, v0, 2, 1 1069; GFX8-NEXT: v_bfe_i32 v1, v0, 1, 1 1070; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 1 1071; GFX8-NEXT: flat_store_dwordx3 v[3:4], v[0:2] 1072; GFX8-NEXT: s_endpgm 1073; 1074; EG-LABEL: constant_sextload_v3i1_to_v3i32: 1075; EG: ; %bb.0: 1076; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1077; EG-NEXT: TEX 0 @6 1078; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[] 1079; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T0.X, 0 1080; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XY, T1.X, 1 1081; EG-NEXT: CF_END 1082; EG-NEXT: Fetch clause starting at 6: 1083; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 1084; EG-NEXT: ALU clause starting at 8: 1085; EG-NEXT: MOV * T0.X, KC0[2].Z, 1086; EG-NEXT: ALU clause starting at 9: 1087; EG-NEXT: LSHR T1.X, KC0[2].Y, literal.x, 1088; EG-NEXT: LSHR * T0.W, T0.X, literal.x, 1089; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1090; EG-NEXT: BFE_INT * T2.X, PV.W, 0.0, 1, 1091; EG-NEXT: BFE_INT T3.X, T0.X, 0.0, 1, 1092; EG-NEXT: LSHR T0.W, T0.X, 1, 1093; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, 1094; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 1095; EG-NEXT: LSHR T0.X, PS, literal.x, 1096; EG-NEXT: BFE_INT * T3.Y, PV.W, 0.0, 1, 1097; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1098; 1099; GFX12-LABEL: constant_sextload_v3i1_to_v3i32: 1100; GFX12: ; %bb.0: 1101; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1102; GFX12-NEXT: s_wait_kmcnt 0x0 1103; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0 1104; GFX12-NEXT: s_wait_kmcnt 0x0 1105; GFX12-NEXT: s_bfe_i32 s3, s2, 0x10002 1106; GFX12-NEXT: s_bfe_i32 s4, s2, 0x10000 1107; GFX12-NEXT: s_bfe_i32 s2, s2, 0x10001 1108; GFX12-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s4 1109; GFX12-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s3 1110; GFX12-NEXT: global_store_b96 v3, v[0:2], s[0:1] 1111; GFX12-NEXT: s_endpgm 1112 %load = load <3 x i1>, ptr addrspace(4) %in 1113 %ext = sext <3 x i1> %load to <3 x i32> 1114 store <3 x i32> %ext, ptr addrspace(1) %out 1115 ret void 1116} 1117 1118define amdgpu_kernel void @constant_zextload_v4i1_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 1119; GFX6-LABEL: constant_zextload_v4i1_to_v4i32: 1120; GFX6: ; %bb.0: 1121; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1122; GFX6-NEXT: s_mov_b32 s7, 0xf000 1123; GFX6-NEXT: s_mov_b32 s6, -1 1124; GFX6-NEXT: s_mov_b32 s10, s6 1125; GFX6-NEXT: s_mov_b32 s11, s7 1126; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1127; GFX6-NEXT: s_mov_b32 s8, s2 1128; GFX6-NEXT: s_mov_b32 s9, s3 1129; GFX6-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 1130; GFX6-NEXT: s_mov_b32 s4, s0 1131; GFX6-NEXT: s_mov_b32 s5, s1 1132; GFX6-NEXT: s_waitcnt vmcnt(0) 1133; GFX6-NEXT: v_lshrrev_b32_e32 v3, 3, v1 1134; GFX6-NEXT: v_and_b32_e32 v0, 1, v1 1135; GFX6-NEXT: v_bfe_u32 v2, v1, 2, 1 1136; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 1 1137; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1138; GFX6-NEXT: s_endpgm 1139; 1140; GFX8-LABEL: constant_zextload_v4i1_to_v4i32: 1141; GFX8: ; %bb.0: 1142; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1143; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1144; GFX8-NEXT: v_mov_b32_e32 v0, s2 1145; GFX8-NEXT: v_mov_b32_e32 v1, s3 1146; GFX8-NEXT: flat_load_ubyte v1, v[0:1] 1147; GFX8-NEXT: v_mov_b32_e32 v0, 3 1148; GFX8-NEXT: v_mov_b32_e32 v4, s0 1149; GFX8-NEXT: v_mov_b32_e32 v5, s1 1150; GFX8-NEXT: s_waitcnt vmcnt(0) 1151; GFX8-NEXT: v_lshrrev_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 1152; GFX8-NEXT: v_and_b32_e32 v0, 1, v1 1153; GFX8-NEXT: v_bfe_u32 v2, v1, 2, 1 1154; GFX8-NEXT: v_bfe_u32 v1, v1, 1, 1 1155; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1156; GFX8-NEXT: s_endpgm 1157; 1158; EG-LABEL: constant_zextload_v4i1_to_v4i32: 1159; EG: ; %bb.0: 1160; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1161; EG-NEXT: TEX 0 @6 1162; EG-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[] 1163; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 1164; EG-NEXT: CF_END 1165; EG-NEXT: PAD 1166; EG-NEXT: Fetch clause starting at 6: 1167; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 1168; EG-NEXT: ALU clause starting at 8: 1169; EG-NEXT: MOV * T0.X, KC0[2].Z, 1170; EG-NEXT: ALU clause starting at 9: 1171; EG-NEXT: BFE_UINT * T0.W, T0.X, literal.x, 1, 1172; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 1173; EG-NEXT: BFE_UINT * T0.Z, T0.X, literal.x, 1, 1174; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1175; EG-NEXT: BFE_UINT * T0.Y, T0.X, 1, 1, 1176; EG-NEXT: AND_INT T0.X, T0.X, 1, 1177; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1178; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1179; 1180; GFX12-LABEL: constant_zextload_v4i1_to_v4i32: 1181; GFX12: ; %bb.0: 1182; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1183; GFX12-NEXT: v_mov_b32_e32 v4, 0 1184; GFX12-NEXT: s_wait_kmcnt 0x0 1185; GFX12-NEXT: global_load_u8 v1, v4, s[2:3] 1186; GFX12-NEXT: s_wait_loadcnt 0x0 1187; GFX12-NEXT: v_and_b32_e32 v3, 0xffff, v1 1188; GFX12-NEXT: v_and_b32_e32 v0, 1, v1 1189; GFX12-NEXT: v_bfe_u32 v2, v1, 2, 1 1190; GFX12-NEXT: v_bfe_u32 v1, v1, 1, 1 1191; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) 1192; GFX12-NEXT: v_lshrrev_b32_e32 v3, 3, v3 1193; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1] 1194; GFX12-NEXT: s_endpgm 1195 %load = load <4 x i1>, ptr addrspace(4) %in 1196 %ext = zext <4 x i1> %load to <4 x i32> 1197 store <4 x i32> %ext, ptr addrspace(1) %out 1198 ret void 1199} 1200 1201define amdgpu_kernel void @constant_sextload_v4i1_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 1202; GFX6-LABEL: constant_sextload_v4i1_to_v4i32: 1203; GFX6: ; %bb.0: 1204; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1205; GFX6-NEXT: s_mov_b32 s7, 0xf000 1206; GFX6-NEXT: s_mov_b32 s6, -1 1207; GFX6-NEXT: s_mov_b32 s10, s6 1208; GFX6-NEXT: s_mov_b32 s11, s7 1209; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1210; GFX6-NEXT: s_mov_b32 s8, s2 1211; GFX6-NEXT: s_mov_b32 s9, s3 1212; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 1213; GFX6-NEXT: s_mov_b32 s4, s0 1214; GFX6-NEXT: s_mov_b32 s5, s1 1215; GFX6-NEXT: s_waitcnt vmcnt(0) 1216; GFX6-NEXT: v_bfe_i32 v3, v0, 3, 1 1217; GFX6-NEXT: v_bfe_i32 v2, v0, 2, 1 1218; GFX6-NEXT: v_bfe_i32 v1, v0, 1, 1 1219; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1 1220; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1221; GFX6-NEXT: s_endpgm 1222; 1223; GFX8-LABEL: constant_sextload_v4i1_to_v4i32: 1224; GFX8: ; %bb.0: 1225; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1226; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1227; GFX8-NEXT: v_mov_b32_e32 v0, s2 1228; GFX8-NEXT: v_mov_b32_e32 v1, s3 1229; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1230; GFX8-NEXT: v_mov_b32_e32 v4, s0 1231; GFX8-NEXT: v_mov_b32_e32 v5, s1 1232; GFX8-NEXT: s_waitcnt vmcnt(0) 1233; GFX8-NEXT: v_bfe_i32 v3, v0, 3, 1 1234; GFX8-NEXT: v_bfe_i32 v2, v0, 2, 1 1235; GFX8-NEXT: v_bfe_i32 v1, v0, 1, 1 1236; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 1 1237; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1238; GFX8-NEXT: s_endpgm 1239; 1240; EG-LABEL: constant_sextload_v4i1_to_v4i32: 1241; EG: ; %bb.0: 1242; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1243; EG-NEXT: TEX 0 @6 1244; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[] 1245; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1 1246; EG-NEXT: CF_END 1247; EG-NEXT: PAD 1248; EG-NEXT: Fetch clause starting at 6: 1249; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 1250; EG-NEXT: ALU clause starting at 8: 1251; EG-NEXT: MOV * T0.X, KC0[2].Z, 1252; EG-NEXT: ALU clause starting at 9: 1253; EG-NEXT: BFE_INT T1.X, T0.X, 0.0, 1, 1254; EG-NEXT: LSHR * T0.W, T0.X, literal.x, 1255; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 1256; EG-NEXT: BFE_INT T1.W, PV.W, 0.0, 1, 1257; EG-NEXT: LSHR * T0.W, T0.X, literal.x, 1258; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1259; EG-NEXT: BFE_INT T1.Z, PS, 0.0, 1, 1260; EG-NEXT: LSHR * T0.W, T0.X, 1, 1261; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, 1262; EG-NEXT: BFE_INT * T1.Y, PV.W, 0.0, 1, 1263; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1264; 1265; GFX12-LABEL: constant_sextload_v4i1_to_v4i32: 1266; GFX12: ; %bb.0: 1267; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1268; GFX12-NEXT: s_wait_kmcnt 0x0 1269; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0 1270; GFX12-NEXT: s_wait_kmcnt 0x0 1271; GFX12-NEXT: s_bfe_i32 s3, s2, 0x10003 1272; GFX12-NEXT: s_bfe_i32 s4, s2, 0x10002 1273; GFX12-NEXT: s_bfe_i32 s5, s2, 0x10000 1274; GFX12-NEXT: s_bfe_i32 s2, s2, 0x10001 1275; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1276; GFX12-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s2 1277; GFX12-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v3, s3 1278; GFX12-NEXT: v_mov_b32_e32 v2, s4 1279; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1] 1280; GFX12-NEXT: s_endpgm 1281 %load = load <4 x i1>, ptr addrspace(4) %in 1282 %ext = sext <4 x i1> %load to <4 x i32> 1283 store <4 x i32> %ext, ptr addrspace(1) %out 1284 ret void 1285} 1286 1287define amdgpu_kernel void @constant_zextload_v8i1_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 1288; GFX6-LABEL: constant_zextload_v8i1_to_v8i32: 1289; GFX6: ; %bb.0: 1290; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1291; GFX6-NEXT: s_mov_b32 s7, 0xf000 1292; GFX6-NEXT: s_mov_b32 s6, -1 1293; GFX6-NEXT: s_mov_b32 s10, s6 1294; GFX6-NEXT: s_mov_b32 s11, s7 1295; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1296; GFX6-NEXT: s_mov_b32 s8, s2 1297; GFX6-NEXT: s_mov_b32 s9, s3 1298; GFX6-NEXT: buffer_load_ubyte v4, off, s[8:11], 0 1299; GFX6-NEXT: s_mov_b32 s4, s0 1300; GFX6-NEXT: s_mov_b32 s5, s1 1301; GFX6-NEXT: s_waitcnt vmcnt(0) 1302; GFX6-NEXT: v_bfe_u32 v3, v4, 3, 1 1303; GFX6-NEXT: v_bfe_u32 v1, v4, 1, 1 1304; GFX6-NEXT: v_lshrrev_b32_e32 v7, 7, v4 1305; GFX6-NEXT: v_bfe_u32 v5, v4, 5, 1 1306; GFX6-NEXT: v_and_b32_e32 v0, 1, v4 1307; GFX6-NEXT: v_bfe_u32 v2, v4, 2, 1 1308; GFX6-NEXT: v_bfe_u32 v6, v4, 6, 1 1309; GFX6-NEXT: v_bfe_u32 v4, v4, 4, 1 1310; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 1311; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1312; GFX6-NEXT: s_endpgm 1313; 1314; GFX8-LABEL: constant_zextload_v8i1_to_v8i32: 1315; GFX8: ; %bb.0: 1316; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1317; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1318; GFX8-NEXT: v_mov_b32_e32 v0, s2 1319; GFX8-NEXT: v_mov_b32_e32 v1, s3 1320; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 1321; GFX8-NEXT: v_mov_b32_e32 v9, s1 1322; GFX8-NEXT: v_mov_b32_e32 v8, s0 1323; GFX8-NEXT: s_waitcnt vmcnt(0) 1324; GFX8-NEXT: v_readfirstlane_b32 s2, v0 1325; GFX8-NEXT: s_bfe_u32 s3, s2, 0x10003 1326; GFX8-NEXT: s_bfe_u32 s4, s2, 0x10001 1327; GFX8-NEXT: s_bfe_u32 s5, s2, 0x10005 1328; GFX8-NEXT: s_and_b32 s6, s2, 1 1329; GFX8-NEXT: s_bfe_u32 s7, s2, 0x10002 1330; GFX8-NEXT: s_bfe_u32 s2, s2, 0x10004 1331; GFX8-NEXT: s_add_u32 s0, s0, 16 1332; GFX8-NEXT: s_addc_u32 s1, s1, 0 1333; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 1334; GFX8-NEXT: v_mov_b32_e32 v11, s1 1335; GFX8-NEXT: v_lshrrev_b32_e32 v3, 7, v0 1336; GFX8-NEXT: v_bfe_u32 v2, v0, 6, 1 1337; GFX8-NEXT: v_mov_b32_e32 v0, s2 1338; GFX8-NEXT: v_mov_b32_e32 v1, s5 1339; GFX8-NEXT: v_mov_b32_e32 v10, s0 1340; GFX8-NEXT: v_mov_b32_e32 v4, s6 1341; GFX8-NEXT: v_mov_b32_e32 v5, s4 1342; GFX8-NEXT: v_mov_b32_e32 v6, s7 1343; GFX8-NEXT: v_mov_b32_e32 v7, s3 1344; GFX8-NEXT: flat_store_dwordx4 v[10:11], v[0:3] 1345; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 1346; GFX8-NEXT: s_endpgm 1347; 1348; EG-LABEL: constant_zextload_v8i1_to_v8i32: 1349; EG: ; %bb.0: 1350; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1351; EG-NEXT: TEX 0 @6 1352; EG-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[] 1353; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T8.X, 0 1354; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T7.X, 1 1355; EG-NEXT: CF_END 1356; EG-NEXT: Fetch clause starting at 6: 1357; EG-NEXT: VTX_READ_8 T5.X, T5.X, 0, #1 1358; EG-NEXT: ALU clause starting at 8: 1359; EG-NEXT: MOV * T5.X, KC0[2].Z, 1360; EG-NEXT: ALU clause starting at 9: 1361; EG-NEXT: BFE_UINT * T6.W, T5.X, literal.x, 1, 1362; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 1363; EG-NEXT: BFE_UINT * T6.Z, T5.X, literal.x, 1, 1364; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1365; EG-NEXT: BFE_UINT T6.Y, T5.X, 1, 1, 1366; EG-NEXT: BFE_UINT * T5.W, T5.X, literal.x, 1, 1367; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00) 1368; EG-NEXT: AND_INT T6.X, T5.X, 1, 1369; EG-NEXT: BFE_UINT T5.Z, T5.X, literal.x, 1, 1370; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.y, 1371; EG-NEXT: 6(8.407791e-45), 2(2.802597e-45) 1372; EG-NEXT: BFE_UINT * T5.Y, T5.X, literal.x, 1, 1373; EG-NEXT: 5(7.006492e-45), 0(0.000000e+00) 1374; EG-NEXT: BFE_UINT T5.X, T5.X, literal.x, 1, 1375; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1376; EG-NEXT: 4(5.605194e-45), 16(2.242078e-44) 1377; EG-NEXT: LSHR * T8.X, PV.W, literal.x, 1378; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1379; 1380; GFX12-LABEL: constant_zextload_v8i1_to_v8i32: 1381; GFX12: ; %bb.0: 1382; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1383; GFX12-NEXT: v_mov_b32_e32 v8, 0 1384; GFX12-NEXT: s_wait_kmcnt 0x0 1385; GFX12-NEXT: global_load_u8 v0, v8, s[2:3] 1386; GFX12-NEXT: s_wait_loadcnt 0x0 1387; GFX12-NEXT: v_readfirstlane_b32 s2, v0 1388; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 1389; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10001 1390; GFX12-NEXT: v_dual_mov_b32 v5, s4 :: v_dual_and_b32 v0, 0xffff, v0 1391; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10003 1392; GFX12-NEXT: s_bfe_u32 s5, s2, 0x10005 1393; GFX12-NEXT: s_and_b32 s6, s2, 1 1394; GFX12-NEXT: s_bfe_u32 s7, s2, 0x10002 1395; GFX12-NEXT: s_bfe_u32 s2, s2, 0x10004 1396; GFX12-NEXT: v_lshrrev_b32_e32 v3, 7, v0 1397; GFX12-NEXT: v_bfe_u32 v2, v0, 6, 1 1398; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v7, s3 1399; GFX12-NEXT: s_wait_alu 0xfffe 1400; GFX12-NEXT: v_dual_mov_b32 v1, s5 :: v_dual_mov_b32 v4, s6 1401; GFX12-NEXT: v_mov_b32_e32 v6, s7 1402; GFX12-NEXT: s_clause 0x1 1403; GFX12-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:16 1404; GFX12-NEXT: global_store_b128 v8, v[4:7], s[0:1] 1405; GFX12-NEXT: s_endpgm 1406 %load = load <8 x i1>, ptr addrspace(4) %in 1407 %ext = zext <8 x i1> %load to <8 x i32> 1408 store <8 x i32> %ext, ptr addrspace(1) %out 1409 ret void 1410} 1411 1412define amdgpu_kernel void @constant_sextload_v8i1_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 1413; GFX6-LABEL: constant_sextload_v8i1_to_v8i32: 1414; GFX6: ; %bb.0: 1415; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1416; GFX6-NEXT: s_mov_b32 s7, 0xf000 1417; GFX6-NEXT: s_mov_b32 s6, -1 1418; GFX6-NEXT: s_mov_b32 s10, s6 1419; GFX6-NEXT: s_mov_b32 s11, s7 1420; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1421; GFX6-NEXT: s_mov_b32 s8, s2 1422; GFX6-NEXT: s_mov_b32 s9, s3 1423; GFX6-NEXT: buffer_load_ubyte v4, off, s[8:11], 0 1424; GFX6-NEXT: s_mov_b32 s4, s0 1425; GFX6-NEXT: s_mov_b32 s5, s1 1426; GFX6-NEXT: s_waitcnt vmcnt(0) 1427; GFX6-NEXT: v_bfe_i32 v3, v4, 3, 1 1428; GFX6-NEXT: v_bfe_i32 v2, v4, 2, 1 1429; GFX6-NEXT: v_bfe_i32 v1, v4, 1, 1 1430; GFX6-NEXT: v_bfe_i32 v0, v4, 0, 1 1431; GFX6-NEXT: v_bfe_i32 v7, v4, 7, 1 1432; GFX6-NEXT: v_bfe_i32 v6, v4, 6, 1 1433; GFX6-NEXT: v_bfe_i32 v5, v4, 5, 1 1434; GFX6-NEXT: v_bfe_i32 v4, v4, 4, 1 1435; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 1436; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1437; GFX6-NEXT: s_endpgm 1438; 1439; GFX8-LABEL: constant_sextload_v8i1_to_v8i32: 1440; GFX8: ; %bb.0: 1441; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1442; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1443; GFX8-NEXT: v_mov_b32_e32 v0, s2 1444; GFX8-NEXT: v_mov_b32_e32 v1, s3 1445; GFX8-NEXT: flat_load_ubyte v4, v[0:1] 1446; GFX8-NEXT: s_add_u32 s2, s0, 16 1447; GFX8-NEXT: s_addc_u32 s3, s1, 0 1448; GFX8-NEXT: v_mov_b32_e32 v11, s3 1449; GFX8-NEXT: v_mov_b32_e32 v9, s1 1450; GFX8-NEXT: v_mov_b32_e32 v10, s2 1451; GFX8-NEXT: v_mov_b32_e32 v8, s0 1452; GFX8-NEXT: s_waitcnt vmcnt(0) 1453; GFX8-NEXT: v_bfe_i32 v3, v4, 3, 1 1454; GFX8-NEXT: v_bfe_i32 v2, v4, 2, 1 1455; GFX8-NEXT: v_bfe_i32 v1, v4, 1, 1 1456; GFX8-NEXT: v_bfe_i32 v0, v4, 0, 1 1457; GFX8-NEXT: v_bfe_i32 v7, v4, 7, 1 1458; GFX8-NEXT: v_bfe_i32 v6, v4, 6, 1 1459; GFX8-NEXT: v_bfe_i32 v5, v4, 5, 1 1460; GFX8-NEXT: v_bfe_i32 v4, v4, 4, 1 1461; GFX8-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 1462; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 1463; GFX8-NEXT: s_endpgm 1464; 1465; EG-LABEL: constant_sextload_v8i1_to_v8i32: 1466; EG: ; %bb.0: 1467; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1468; EG-NEXT: TEX 0 @6 1469; EG-NEXT: ALU 23, @9, KC0[CB0:0-32], KC1[] 1470; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 0 1471; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 1 1472; EG-NEXT: CF_END 1473; EG-NEXT: Fetch clause starting at 6: 1474; EG-NEXT: VTX_READ_8 T5.X, T5.X, 0, #1 1475; EG-NEXT: ALU clause starting at 8: 1476; EG-NEXT: MOV * T5.X, KC0[2].Z, 1477; EG-NEXT: ALU clause starting at 9: 1478; EG-NEXT: LSHR * T0.W, T5.X, literal.x, 1479; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00) 1480; EG-NEXT: BFE_INT T6.W, PV.W, 0.0, 1, 1481; EG-NEXT: LSHR * T0.W, T5.X, literal.x, 1482; EG-NEXT: 6(8.407791e-45), 0(0.000000e+00) 1483; EG-NEXT: BFE_INT T7.X, T5.X, 0.0, 1, 1484; EG-NEXT: BFE_INT T6.Z, PS, 0.0, 1, 1485; EG-NEXT: LSHR T0.W, T5.X, literal.x, 1486; EG-NEXT: LSHR * T1.W, T5.X, literal.y, 1487; EG-NEXT: 3(4.203895e-45), 5(7.006492e-45) 1488; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x, 1489; EG-NEXT: BFE_INT T6.Y, PS, 0.0, 1, 1490; EG-NEXT: LSHR T0.Z, T5.X, literal.x, 1491; EG-NEXT: BFE_INT T7.W, PV.W, 0.0, 1, 1492; EG-NEXT: LSHR * T0.W, T5.X, literal.y, 1493; EG-NEXT: 2(2.802597e-45), 4(5.605194e-45) 1494; EG-NEXT: BFE_INT T6.X, PS, 0.0, 1, 1495; EG-NEXT: BFE_INT T7.Z, PV.Z, 0.0, 1, 1496; EG-NEXT: LSHR T0.W, T5.X, 1, 1497; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, 1498; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1499; EG-NEXT: LSHR T5.X, PS, literal.x, 1500; EG-NEXT: BFE_INT * T7.Y, PV.W, 0.0, 1, 1501; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1502; 1503; GFX12-LABEL: constant_sextload_v8i1_to_v8i32: 1504; GFX12: ; %bb.0: 1505; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1506; GFX12-NEXT: s_wait_kmcnt 0x0 1507; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0 1508; GFX12-NEXT: s_wait_kmcnt 0x0 1509; GFX12-NEXT: s_bfe_i32 s3, s2, 0x10003 1510; GFX12-NEXT: s_bfe_i32 s4, s2, 0x10002 1511; GFX12-NEXT: s_bfe_i32 s5, s2, 0x10001 1512; GFX12-NEXT: s_bfe_i32 s6, s2, 0x10000 1513; GFX12-NEXT: s_bfe_i32 s7, s2, 0x10007 1514; GFX12-NEXT: s_bfe_i32 s8, s2, 0x10006 1515; GFX12-NEXT: s_bfe_i32 s9, s2, 0x10004 1516; GFX12-NEXT: s_bfe_i32 s2, s2, 0x10005 1517; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1518; GFX12-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s2 1519; GFX12-NEXT: v_dual_mov_b32 v0, s9 :: v_dual_mov_b32 v3, s7 1520; GFX12-NEXT: v_dual_mov_b32 v2, s8 :: v_dual_mov_b32 v5, s5 1521; GFX12-NEXT: v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v7, s3 1522; GFX12-NEXT: v_mov_b32_e32 v6, s4 1523; GFX12-NEXT: s_clause 0x1 1524; GFX12-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:16 1525; GFX12-NEXT: global_store_b128 v8, v[4:7], s[0:1] 1526; GFX12-NEXT: s_endpgm 1527 %load = load <8 x i1>, ptr addrspace(4) %in 1528 %ext = sext <8 x i1> %load to <8 x i32> 1529 store <8 x i32> %ext, ptr addrspace(1) %out 1530 ret void 1531} 1532 1533define amdgpu_kernel void @constant_zextload_v16i1_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 1534; GFX6-LABEL: constant_zextload_v16i1_to_v16i32: 1535; GFX6: ; %bb.0: 1536; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 1537; GFX6-NEXT: s_mov_b32 s3, 0xf000 1538; GFX6-NEXT: s_mov_b32 s2, -1 1539; GFX6-NEXT: s_mov_b32 s10, s2 1540; GFX6-NEXT: s_mov_b32 s11, s3 1541; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1542; GFX6-NEXT: s_mov_b32 s8, s6 1543; GFX6-NEXT: s_mov_b32 s9, s7 1544; GFX6-NEXT: buffer_load_ushort v12, off, s[8:11], 0 1545; GFX6-NEXT: s_mov_b32 s0, s4 1546; GFX6-NEXT: s_mov_b32 s1, s5 1547; GFX6-NEXT: s_waitcnt vmcnt(0) 1548; GFX6-NEXT: v_bfe_u32 v3, v12, 3, 1 1549; GFX6-NEXT: v_bfe_u32 v1, v12, 1, 1 1550; GFX6-NEXT: v_bfe_u32 v7, v12, 7, 1 1551; GFX6-NEXT: v_bfe_u32 v5, v12, 5, 1 1552; GFX6-NEXT: v_bfe_u32 v11, v12, 11, 1 1553; GFX6-NEXT: v_bfe_u32 v9, v12, 9, 1 1554; GFX6-NEXT: v_lshrrev_b32_e32 v15, 15, v12 1555; GFX6-NEXT: v_bfe_u32 v13, v12, 13, 1 1556; GFX6-NEXT: v_and_b32_e32 v0, 1, v12 1557; GFX6-NEXT: v_bfe_u32 v2, v12, 2, 1 1558; GFX6-NEXT: v_bfe_u32 v6, v12, 6, 1 1559; GFX6-NEXT: v_bfe_u32 v4, v12, 4, 1 1560; GFX6-NEXT: v_bfe_u32 v10, v12, 10, 1 1561; GFX6-NEXT: v_bfe_u32 v8, v12, 8, 1 1562; GFX6-NEXT: v_bfe_u32 v14, v12, 14, 1 1563; GFX6-NEXT: v_bfe_u32 v12, v12, 12, 1 1564; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48 1565; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32 1566; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 1567; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1568; GFX6-NEXT: s_endpgm 1569; 1570; GFX8-LABEL: constant_zextload_v16i1_to_v16i32: 1571; GFX8: ; %bb.0: 1572; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1573; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1574; GFX8-NEXT: v_mov_b32_e32 v0, s2 1575; GFX8-NEXT: v_mov_b32_e32 v1, s3 1576; GFX8-NEXT: flat_load_ushort v0, v[0:1] 1577; GFX8-NEXT: v_mov_b32_e32 v17, s1 1578; GFX8-NEXT: v_mov_b32_e32 v16, s0 1579; GFX8-NEXT: s_waitcnt vmcnt(0) 1580; GFX8-NEXT: v_readfirstlane_b32 s2, v0 1581; GFX8-NEXT: s_and_b32 s6, 0xffff, s2 1582; GFX8-NEXT: s_bfe_u32 s3, s2, 0x10003 1583; GFX8-NEXT: s_bfe_u32 s4, s2, 0x10001 1584; GFX8-NEXT: s_bfe_u32 s5, s2, 0x10007 1585; GFX8-NEXT: s_bfe_u32 s7, s2, 0x10009 1586; GFX8-NEXT: s_bfe_u32 s8, s2, 0x1000d 1587; GFX8-NEXT: s_and_b32 s9, s2, 1 1588; GFX8-NEXT: s_bfe_u32 s10, s2, 0x1000a 1589; GFX8-NEXT: s_bfe_u32 s2, s2, 0x1000c 1590; GFX8-NEXT: s_bfe_u32 s11, s6, 0x10005 1591; GFX8-NEXT: s_bfe_u32 s12, s6, 0x1000b 1592; GFX8-NEXT: s_lshr_b32 s13, s6, 15 1593; GFX8-NEXT: s_bfe_u32 s14, s6, 0x10002 1594; GFX8-NEXT: s_bfe_u32 s15, s6, 0x10006 1595; GFX8-NEXT: s_bfe_u32 s16, s6, 0x10004 1596; GFX8-NEXT: s_bfe_u32 s17, s6, 0x10008 1597; GFX8-NEXT: s_bfe_u32 s6, s6, 0x1000e 1598; GFX8-NEXT: v_mov_b32_e32 v0, s2 1599; GFX8-NEXT: s_add_u32 s2, s0, 48 1600; GFX8-NEXT: v_mov_b32_e32 v15, s3 1601; GFX8-NEXT: s_addc_u32 s3, s1, 0 1602; GFX8-NEXT: v_mov_b32_e32 v19, s3 1603; GFX8-NEXT: v_mov_b32_e32 v1, s8 1604; GFX8-NEXT: v_mov_b32_e32 v2, s6 1605; GFX8-NEXT: v_mov_b32_e32 v3, s13 1606; GFX8-NEXT: v_mov_b32_e32 v18, s2 1607; GFX8-NEXT: s_add_u32 s2, s0, 32 1608; GFX8-NEXT: flat_store_dwordx4 v[18:19], v[0:3] 1609; GFX8-NEXT: s_addc_u32 s3, s1, 0 1610; GFX8-NEXT: v_mov_b32_e32 v0, s2 1611; GFX8-NEXT: v_mov_b32_e32 v5, s7 1612; GFX8-NEXT: v_mov_b32_e32 v6, s10 1613; GFX8-NEXT: v_mov_b32_e32 v4, s17 1614; GFX8-NEXT: v_mov_b32_e32 v7, s12 1615; GFX8-NEXT: v_mov_b32_e32 v1, s3 1616; GFX8-NEXT: s_add_u32 s0, s0, 16 1617; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 1618; GFX8-NEXT: s_addc_u32 s1, s1, 0 1619; GFX8-NEXT: v_mov_b32_e32 v0, s0 1620; GFX8-NEXT: v_mov_b32_e32 v11, s5 1621; GFX8-NEXT: v_mov_b32_e32 v8, s16 1622; GFX8-NEXT: v_mov_b32_e32 v9, s11 1623; GFX8-NEXT: v_mov_b32_e32 v10, s15 1624; GFX8-NEXT: v_mov_b32_e32 v1, s1 1625; GFX8-NEXT: v_mov_b32_e32 v12, s9 1626; GFX8-NEXT: v_mov_b32_e32 v13, s4 1627; GFX8-NEXT: v_mov_b32_e32 v14, s14 1628; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[8:11] 1629; GFX8-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 1630; GFX8-NEXT: s_endpgm 1631; 1632; EG-LABEL: constant_zextload_v16i1_to_v16i32: 1633; EG: ; %bb.0: 1634; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 1635; EG-NEXT: TEX 0 @8 1636; EG-NEXT: ALU 36, @11, KC0[CB0:0-32], KC1[] 1637; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T14.X, 0 1638; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T13.X, 0 1639; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T12.X, 0 1640; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T10.X, 1 1641; EG-NEXT: CF_END 1642; EG-NEXT: Fetch clause starting at 8: 1643; EG-NEXT: VTX_READ_16 T7.X, T7.X, 0, #1 1644; EG-NEXT: ALU clause starting at 10: 1645; EG-NEXT: MOV * T7.X, KC0[2].Z, 1646; EG-NEXT: ALU clause starting at 11: 1647; EG-NEXT: BFE_UINT * T8.W, T7.X, literal.x, 1, 1648; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 1649; EG-NEXT: BFE_UINT * T8.Z, T7.X, literal.x, 1, 1650; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1651; EG-NEXT: BFE_UINT T8.Y, T7.X, 1, 1, 1652; EG-NEXT: BFE_UINT * T9.W, T7.X, literal.x, 1, 1653; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00) 1654; EG-NEXT: AND_INT T8.X, T7.X, 1, 1655; EG-NEXT: BFE_UINT T9.Z, T7.X, literal.x, 1, 1656; EG-NEXT: LSHR * T10.X, KC0[2].Y, literal.y, 1657; EG-NEXT: 6(8.407791e-45), 2(2.802597e-45) 1658; EG-NEXT: BFE_UINT T9.Y, T7.X, literal.x, 1, 1659; EG-NEXT: BFE_UINT * T11.W, T7.X, literal.y, 1, 1660; EG-NEXT: 5(7.006492e-45), 11(1.541428e-44) 1661; EG-NEXT: BFE_UINT T9.X, T7.X, literal.x, 1, 1662; EG-NEXT: BFE_UINT T11.Z, T7.X, literal.y, 1, 1663; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 1664; EG-NEXT: 4(5.605194e-45), 10(1.401298e-44) 1665; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1666; EG-NEXT: LSHR T12.X, PV.W, literal.x, 1667; EG-NEXT: BFE_UINT T11.Y, T7.X, literal.y, 1, 1668; EG-NEXT: LSHR * T7.W, T7.X, literal.z, 1669; EG-NEXT: 2(2.802597e-45), 9(1.261169e-44) 1670; EG-NEXT: 15(2.101948e-44), 0(0.000000e+00) 1671; EG-NEXT: BFE_UINT T11.X, T7.X, literal.x, 1, 1672; EG-NEXT: BFE_UINT T7.Z, T7.X, literal.y, 1, 1673; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 1674; EG-NEXT: 8(1.121039e-44), 14(1.961818e-44) 1675; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 1676; EG-NEXT: LSHR T13.X, PV.W, literal.x, 1677; EG-NEXT: BFE_UINT * T7.Y, T7.X, literal.y, 1, 1678; EG-NEXT: 2(2.802597e-45), 13(1.821688e-44) 1679; EG-NEXT: BFE_UINT T7.X, T7.X, literal.x, 1, 1680; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1681; EG-NEXT: 12(1.681558e-44), 48(6.726233e-44) 1682; EG-NEXT: LSHR * T14.X, PV.W, literal.x, 1683; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1684; 1685; GFX12-LABEL: constant_zextload_v16i1_to_v16i32: 1686; GFX12: ; %bb.0: 1687; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1688; GFX12-NEXT: v_mov_b32_e32 v16, 0 1689; GFX12-NEXT: s_wait_kmcnt 0x0 1690; GFX12-NEXT: global_load_u16 v0, v16, s[2:3] 1691; GFX12-NEXT: s_wait_loadcnt 0x0 1692; GFX12-NEXT: v_readfirstlane_b32 s2, v0 1693; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1694; GFX12-NEXT: s_and_b32 s6, 0xffff, s2 1695; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10003 1696; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10001 1697; GFX12-NEXT: s_bfe_u32 s5, s2, 0x10007 1698; GFX12-NEXT: s_bfe_u32 s7, s2, 0x10009 1699; GFX12-NEXT: s_bfe_u32 s8, s2, 0x1000d 1700; GFX12-NEXT: s_and_b32 s9, s2, 1 1701; GFX12-NEXT: v_mov_b32_e32 v1, s8 1702; GFX12-NEXT: s_bfe_u32 s10, s2, 0x1000a 1703; GFX12-NEXT: s_bfe_u32 s2, s2, 0x1000c 1704; GFX12-NEXT: s_bfe_u32 s11, s6, 0x10005 1705; GFX12-NEXT: s_bfe_u32 s12, s6, 0x1000b 1706; GFX12-NEXT: s_lshr_b32 s13, s6, 15 1707; GFX12-NEXT: s_bfe_u32 s14, s6, 0x10002 1708; GFX12-NEXT: s_bfe_u32 s15, s6, 0x10006 1709; GFX12-NEXT: s_bfe_u32 s16, s6, 0x10004 1710; GFX12-NEXT: s_bfe_u32 s17, s6, 0x10008 1711; GFX12-NEXT: s_bfe_u32 s6, s6, 0x1000e 1712; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v5, s7 1713; GFX12-NEXT: v_dual_mov_b32 v15, s3 :: v_dual_mov_b32 v2, s6 1714; GFX12-NEXT: v_dual_mov_b32 v3, s13 :: v_dual_mov_b32 v4, s17 1715; GFX12-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v11, s5 1716; GFX12-NEXT: v_dual_mov_b32 v7, s12 :: v_dual_mov_b32 v8, s16 1717; GFX12-NEXT: v_dual_mov_b32 v9, s11 :: v_dual_mov_b32 v10, s15 1718; GFX12-NEXT: v_dual_mov_b32 v12, s9 :: v_dual_mov_b32 v13, s4 1719; GFX12-NEXT: v_mov_b32_e32 v14, s14 1720; GFX12-NEXT: s_clause 0x3 1721; GFX12-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:48 1722; GFX12-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:32 1723; GFX12-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:16 1724; GFX12-NEXT: global_store_b128 v16, v[12:15], s[0:1] 1725; GFX12-NEXT: s_endpgm 1726 %load = load <16 x i1>, ptr addrspace(4) %in 1727 %ext = zext <16 x i1> %load to <16 x i32> 1728 store <16 x i32> %ext, ptr addrspace(1) %out 1729 ret void 1730} 1731 1732define amdgpu_kernel void @constant_sextload_v16i1_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 1733; GFX6-LABEL: constant_sextload_v16i1_to_v16i32: 1734; GFX6: ; %bb.0: 1735; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 1736; GFX6-NEXT: s_mov_b32 s3, 0xf000 1737; GFX6-NEXT: s_mov_b32 s2, -1 1738; GFX6-NEXT: s_mov_b32 s10, s2 1739; GFX6-NEXT: s_mov_b32 s11, s3 1740; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1741; GFX6-NEXT: s_mov_b32 s8, s6 1742; GFX6-NEXT: s_mov_b32 s9, s7 1743; GFX6-NEXT: buffer_load_ushort v12, off, s[8:11], 0 1744; GFX6-NEXT: s_mov_b32 s0, s4 1745; GFX6-NEXT: s_mov_b32 s1, s5 1746; GFX6-NEXT: s_waitcnt vmcnt(0) 1747; GFX6-NEXT: v_bfe_i32 v3, v12, 3, 1 1748; GFX6-NEXT: v_bfe_i32 v2, v12, 2, 1 1749; GFX6-NEXT: v_bfe_i32 v1, v12, 1, 1 1750; GFX6-NEXT: v_bfe_i32 v0, v12, 0, 1 1751; GFX6-NEXT: v_bfe_i32 v7, v12, 7, 1 1752; GFX6-NEXT: v_bfe_i32 v6, v12, 6, 1 1753; GFX6-NEXT: v_bfe_i32 v5, v12, 5, 1 1754; GFX6-NEXT: v_bfe_i32 v4, v12, 4, 1 1755; GFX6-NEXT: v_bfe_i32 v11, v12, 11, 1 1756; GFX6-NEXT: v_bfe_i32 v10, v12, 10, 1 1757; GFX6-NEXT: v_bfe_i32 v9, v12, 9, 1 1758; GFX6-NEXT: v_bfe_i32 v8, v12, 8, 1 1759; GFX6-NEXT: v_bfe_i32 v15, v12, 15, 1 1760; GFX6-NEXT: v_bfe_i32 v14, v12, 14, 1 1761; GFX6-NEXT: v_bfe_i32 v13, v12, 13, 1 1762; GFX6-NEXT: v_bfe_i32 v12, v12, 12, 1 1763; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48 1764; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32 1765; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 1766; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1767; GFX6-NEXT: s_endpgm 1768; 1769; GFX8-LABEL: constant_sextload_v16i1_to_v16i32: 1770; GFX8: ; %bb.0: 1771; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1772; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1773; GFX8-NEXT: v_mov_b32_e32 v0, s2 1774; GFX8-NEXT: v_mov_b32_e32 v1, s3 1775; GFX8-NEXT: flat_load_ushort v18, v[0:1] 1776; GFX8-NEXT: s_add_u32 s2, s0, 48 1777; GFX8-NEXT: s_addc_u32 s3, s1, 0 1778; GFX8-NEXT: v_mov_b32_e32 v9, s3 1779; GFX8-NEXT: v_mov_b32_e32 v8, s2 1780; GFX8-NEXT: s_add_u32 s2, s0, 32 1781; GFX8-NEXT: v_mov_b32_e32 v13, s1 1782; GFX8-NEXT: s_addc_u32 s3, s1, 0 1783; GFX8-NEXT: v_mov_b32_e32 v12, s0 1784; GFX8-NEXT: s_add_u32 s0, s0, 16 1785; GFX8-NEXT: v_mov_b32_e32 v15, s3 1786; GFX8-NEXT: s_addc_u32 s1, s1, 0 1787; GFX8-NEXT: v_mov_b32_e32 v14, s2 1788; GFX8-NEXT: v_mov_b32_e32 v17, s1 1789; GFX8-NEXT: v_mov_b32_e32 v16, s0 1790; GFX8-NEXT: s_waitcnt vmcnt(0) 1791; GFX8-NEXT: v_bfe_i32 v7, v18, 15, 1 1792; GFX8-NEXT: v_bfe_i32 v6, v18, 14, 1 1793; GFX8-NEXT: v_bfe_i32 v5, v18, 13, 1 1794; GFX8-NEXT: v_bfe_i32 v4, v18, 12, 1 1795; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 1796; GFX8-NEXT: v_bfe_i32 v11, v18, 11, 1 1797; GFX8-NEXT: v_bfe_i32 v10, v18, 10, 1 1798; GFX8-NEXT: v_bfe_i32 v9, v18, 9, 1 1799; GFX8-NEXT: v_bfe_i32 v8, v18, 8, 1 1800; GFX8-NEXT: v_bfe_i32 v3, v18, 3, 1 1801; GFX8-NEXT: v_bfe_i32 v2, v18, 2, 1 1802; GFX8-NEXT: v_bfe_i32 v1, v18, 1, 1 1803; GFX8-NEXT: v_bfe_i32 v0, v18, 0, 1 1804; GFX8-NEXT: v_bfe_i32 v7, v18, 7, 1 1805; GFX8-NEXT: v_bfe_i32 v6, v18, 6, 1 1806; GFX8-NEXT: v_bfe_i32 v5, v18, 5, 1 1807; GFX8-NEXT: v_bfe_i32 v4, v18, 4, 1 1808; GFX8-NEXT: flat_store_dwordx4 v[14:15], v[8:11] 1809; GFX8-NEXT: flat_store_dwordx4 v[16:17], v[4:7] 1810; GFX8-NEXT: flat_store_dwordx4 v[12:13], v[0:3] 1811; GFX8-NEXT: s_endpgm 1812; 1813; EG-LABEL: constant_sextload_v16i1_to_v16i32: 1814; EG: ; %bb.0: 1815; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 1816; EG-NEXT: TEX 0 @8 1817; EG-NEXT: ALU 51, @11, KC0[CB0:0-32], KC1[] 1818; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T7.X, 0 1819; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T14.X, 0 1820; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T11.X, 0 1821; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T9.X, 1 1822; EG-NEXT: CF_END 1823; EG-NEXT: Fetch clause starting at 8: 1824; EG-NEXT: VTX_READ_16 T7.X, T7.X, 0, #1 1825; EG-NEXT: ALU clause starting at 10: 1826; EG-NEXT: MOV * T7.X, KC0[2].Z, 1827; EG-NEXT: ALU clause starting at 11: 1828; EG-NEXT: LSHR * T0.W, T7.X, literal.x, 1829; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00) 1830; EG-NEXT: BFE_INT T8.W, PV.W, 0.0, 1, 1831; EG-NEXT: LSHR * T0.W, T7.X, literal.x, 1832; EG-NEXT: 6(8.407791e-45), 0(0.000000e+00) 1833; EG-NEXT: BFE_INT T8.Z, PS, 0.0, 1, 1834; EG-NEXT: LSHR T0.W, T7.X, literal.x, 1835; EG-NEXT: LSHR * T1.W, T7.X, literal.y, 1836; EG-NEXT: 11(1.541428e-44), 5(7.006492e-45) 1837; EG-NEXT: LSHR T9.X, KC0[2].Y, literal.x, 1838; EG-NEXT: BFE_INT T8.Y, PS, 0.0, 1, 1839; EG-NEXT: LSHR T0.Z, T7.X, literal.y, 1840; EG-NEXT: BFE_INT T10.W, PV.W, 0.0, 1, 1841; EG-NEXT: LSHR * T0.W, T7.X, literal.z, 1842; EG-NEXT: 2(2.802597e-45), 10(1.401298e-44) 1843; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) 1844; EG-NEXT: BFE_INT T8.X, PS, 0.0, 1, 1845; EG-NEXT: BFE_INT T10.Z, PV.Z, 0.0, 1, 1846; EG-NEXT: LSHR T0.W, T7.X, literal.x, 1847; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 1848; EG-NEXT: 9(1.261169e-44), 16(2.242078e-44) 1849; EG-NEXT: LSHR T11.X, PS, literal.x, 1850; EG-NEXT: BFE_INT T10.Y, PV.W, 0.0, 1, 1851; EG-NEXT: LSHR T0.W, T7.X, literal.y, 1852; EG-NEXT: LSHR * T1.W, T7.X, literal.z, 1853; EG-NEXT: 2(2.802597e-45), 15(2.101948e-44) 1854; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 1855; EG-NEXT: BFE_INT T10.X, PS, 0.0, 1, 1856; EG-NEXT: BFE_INT T12.W, PV.W, 0.0, 1, 1857; EG-NEXT: LSHR * T0.W, T7.X, literal.x, 1858; EG-NEXT: 14(1.961818e-44), 0(0.000000e+00) 1859; EG-NEXT: BFE_INT T13.X, T7.X, 0.0, 1, 1860; EG-NEXT: LSHR T0.Y, T7.X, literal.x, 1861; EG-NEXT: BFE_INT T12.Z, PS, 0.0, 1, 1862; EG-NEXT: LSHR T0.W, T7.X, literal.y, 1863; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 1864; EG-NEXT: 3(4.203895e-45), 13(1.821688e-44) 1865; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 1866; EG-NEXT: LSHR T14.X, PS, literal.x, 1867; EG-NEXT: BFE_INT T12.Y, PV.W, 0.0, 1, 1868; EG-NEXT: LSHR T0.Z, T7.X, literal.x, 1869; EG-NEXT: BFE_INT T13.W, PV.Y, 0.0, 1, 1870; EG-NEXT: LSHR * T0.W, T7.X, literal.y, 1871; EG-NEXT: 2(2.802597e-45), 12(1.681558e-44) 1872; EG-NEXT: BFE_INT T12.X, PS, 0.0, 1, 1873; EG-NEXT: BFE_INT T13.Z, PV.Z, 0.0, 1, 1874; EG-NEXT: LSHR T0.W, T7.X, 1, 1875; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, 1876; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 1877; EG-NEXT: LSHR T7.X, PS, literal.x, 1878; EG-NEXT: BFE_INT * T13.Y, PV.W, 0.0, 1, 1879; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1880; 1881; GFX12-LABEL: constant_sextload_v16i1_to_v16i32: 1882; GFX12: ; %bb.0: 1883; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1884; GFX12-NEXT: s_wait_kmcnt 0x0 1885; GFX12-NEXT: s_load_u16 s2, s[2:3], 0x0 1886; GFX12-NEXT: s_wait_kmcnt 0x0 1887; GFX12-NEXT: s_bfe_i32 s3, s2, 0x10003 1888; GFX12-NEXT: s_bfe_i32 s4, s2, 0x10002 1889; GFX12-NEXT: s_bfe_i32 s5, s2, 0x10001 1890; GFX12-NEXT: s_bfe_i32 s6, s2, 0x10000 1891; GFX12-NEXT: s_bfe_i32 s7, s2, 0x10007 1892; GFX12-NEXT: s_bfe_i32 s8, s2, 0x10006 1893; GFX12-NEXT: s_bfe_i32 s9, s2, 0x10005 1894; GFX12-NEXT: s_bfe_i32 s10, s2, 0x10004 1895; GFX12-NEXT: s_bfe_i32 s11, s2, 0x1000b 1896; GFX12-NEXT: s_bfe_i32 s12, s2, 0x1000a 1897; GFX12-NEXT: s_bfe_i32 s13, s2, 0x10009 1898; GFX12-NEXT: s_bfe_i32 s14, s2, 0x10008 1899; GFX12-NEXT: s_bfe_i32 s15, s2, 0x1000f 1900; GFX12-NEXT: s_bfe_i32 s16, s2, 0x1000e 1901; GFX12-NEXT: s_bfe_i32 s17, s2, 0x1000c 1902; GFX12-NEXT: s_bfe_i32 s2, s2, 0x1000d 1903; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1904; GFX12-NEXT: v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v1, s2 1905; GFX12-NEXT: v_dual_mov_b32 v0, s17 :: v_dual_mov_b32 v3, s15 1906; GFX12-NEXT: v_dual_mov_b32 v2, s16 :: v_dual_mov_b32 v5, s13 1907; GFX12-NEXT: v_dual_mov_b32 v4, s14 :: v_dual_mov_b32 v7, s11 1908; GFX12-NEXT: v_dual_mov_b32 v6, s12 :: v_dual_mov_b32 v9, s9 1909; GFX12-NEXT: v_dual_mov_b32 v8, s10 :: v_dual_mov_b32 v11, s7 1910; GFX12-NEXT: v_dual_mov_b32 v10, s8 :: v_dual_mov_b32 v13, s5 1911; GFX12-NEXT: v_dual_mov_b32 v12, s6 :: v_dual_mov_b32 v15, s3 1912; GFX12-NEXT: v_mov_b32_e32 v14, s4 1913; GFX12-NEXT: s_clause 0x3 1914; GFX12-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:48 1915; GFX12-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:32 1916; GFX12-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:16 1917; GFX12-NEXT: global_store_b128 v16, v[12:15], s[0:1] 1918; GFX12-NEXT: s_endpgm 1919 %load = load <16 x i1>, ptr addrspace(4) %in 1920 %ext = sext <16 x i1> %load to <16 x i32> 1921 store <16 x i32> %ext, ptr addrspace(1) %out 1922 ret void 1923} 1924 1925define amdgpu_kernel void @constant_zextload_v32i1_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 1926; GFX6-LABEL: constant_zextload_v32i1_to_v32i32: 1927; GFX6: ; %bb.0: 1928; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1929; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1930; GFX6-NEXT: s_load_dword s4, s[2:3], 0x0 1931; GFX6-NEXT: s_mov_b32 s3, 0xf000 1932; GFX6-NEXT: s_mov_b32 s2, -1 1933; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1934; GFX6-NEXT: s_bfe_u32 s5, s4, 0x10003 1935; GFX6-NEXT: s_bfe_u32 s6, s4, 0x10001 1936; GFX6-NEXT: s_bfe_u32 s7, s4, 0x10007 1937; GFX6-NEXT: s_bfe_u32 s8, s4, 0x10005 1938; GFX6-NEXT: s_bfe_u32 s9, s4, 0x1000b 1939; GFX6-NEXT: s_bfe_u32 s10, s4, 0x10009 1940; GFX6-NEXT: s_bfe_u32 s11, s4, 0x1000f 1941; GFX6-NEXT: s_bfe_u32 s12, s4, 0x1000d 1942; GFX6-NEXT: s_bfe_u32 s13, s4, 0x10013 1943; GFX6-NEXT: s_bfe_u32 s14, s4, 0x10011 1944; GFX6-NEXT: s_bfe_u32 s15, s4, 0x10017 1945; GFX6-NEXT: s_bfe_u32 s16, s4, 0x10015 1946; GFX6-NEXT: s_bfe_u32 s17, s4, 0x1001b 1947; GFX6-NEXT: s_bfe_u32 s18, s4, 0x10019 1948; GFX6-NEXT: s_lshr_b32 s19, s4, 31 1949; GFX6-NEXT: s_bfe_u32 s20, s4, 0x1001d 1950; GFX6-NEXT: s_and_b32 s21, s4, 1 1951; GFX6-NEXT: s_bfe_u32 s22, s4, 0x10002 1952; GFX6-NEXT: s_bfe_u32 s23, s4, 0x10006 1953; GFX6-NEXT: s_bfe_u32 s24, s4, 0x10004 1954; GFX6-NEXT: s_bfe_u32 s25, s4, 0x1000a 1955; GFX6-NEXT: s_bfe_u32 s26, s4, 0x10008 1956; GFX6-NEXT: s_bfe_u32 s27, s4, 0x1000e 1957; GFX6-NEXT: s_bfe_u32 s28, s4, 0x1000c 1958; GFX6-NEXT: s_bfe_u32 s29, s4, 0x10012 1959; GFX6-NEXT: s_bfe_u32 s30, s4, 0x10010 1960; GFX6-NEXT: s_bfe_u32 s31, s4, 0x10016 1961; GFX6-NEXT: s_bfe_u32 s33, s4, 0x10014 1962; GFX6-NEXT: s_bfe_u32 s34, s4, 0x1001a 1963; GFX6-NEXT: s_bfe_u32 s35, s4, 0x1001e 1964; GFX6-NEXT: s_bfe_u32 s36, s4, 0x1001c 1965; GFX6-NEXT: s_bfe_u32 s4, s4, 0x10018 1966; GFX6-NEXT: v_mov_b32_e32 v0, s36 1967; GFX6-NEXT: v_mov_b32_e32 v1, s20 1968; GFX6-NEXT: v_mov_b32_e32 v2, s35 1969; GFX6-NEXT: v_mov_b32_e32 v3, s19 1970; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 1971; GFX6-NEXT: s_waitcnt expcnt(0) 1972; GFX6-NEXT: v_mov_b32_e32 v0, s4 1973; GFX6-NEXT: v_mov_b32_e32 v1, s18 1974; GFX6-NEXT: v_mov_b32_e32 v2, s34 1975; GFX6-NEXT: v_mov_b32_e32 v3, s17 1976; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 1977; GFX6-NEXT: s_waitcnt expcnt(0) 1978; GFX6-NEXT: v_mov_b32_e32 v0, s33 1979; GFX6-NEXT: v_mov_b32_e32 v1, s16 1980; GFX6-NEXT: v_mov_b32_e32 v2, s31 1981; GFX6-NEXT: v_mov_b32_e32 v3, s15 1982; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 1983; GFX6-NEXT: s_waitcnt expcnt(0) 1984; GFX6-NEXT: v_mov_b32_e32 v0, s30 1985; GFX6-NEXT: v_mov_b32_e32 v1, s14 1986; GFX6-NEXT: v_mov_b32_e32 v2, s29 1987; GFX6-NEXT: v_mov_b32_e32 v3, s13 1988; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 1989; GFX6-NEXT: s_waitcnt expcnt(0) 1990; GFX6-NEXT: v_mov_b32_e32 v0, s28 1991; GFX6-NEXT: v_mov_b32_e32 v1, s12 1992; GFX6-NEXT: v_mov_b32_e32 v2, s27 1993; GFX6-NEXT: v_mov_b32_e32 v3, s11 1994; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 1995; GFX6-NEXT: s_waitcnt expcnt(0) 1996; GFX6-NEXT: v_mov_b32_e32 v0, s26 1997; GFX6-NEXT: v_mov_b32_e32 v1, s10 1998; GFX6-NEXT: v_mov_b32_e32 v2, s25 1999; GFX6-NEXT: v_mov_b32_e32 v3, s9 2000; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2001; GFX6-NEXT: s_waitcnt expcnt(0) 2002; GFX6-NEXT: v_mov_b32_e32 v0, s24 2003; GFX6-NEXT: v_mov_b32_e32 v1, s8 2004; GFX6-NEXT: v_mov_b32_e32 v2, s23 2005; GFX6-NEXT: v_mov_b32_e32 v3, s7 2006; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 2007; GFX6-NEXT: s_waitcnt expcnt(0) 2008; GFX6-NEXT: v_mov_b32_e32 v0, s21 2009; GFX6-NEXT: v_mov_b32_e32 v1, s6 2010; GFX6-NEXT: v_mov_b32_e32 v2, s22 2011; GFX6-NEXT: v_mov_b32_e32 v3, s5 2012; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2013; GFX6-NEXT: s_endpgm 2014; 2015; GFX8-LABEL: constant_zextload_v32i1_to_v32i32: 2016; GFX8: ; %bb.0: 2017; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2018; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2019; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0 2020; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2021; GFX8-NEXT: s_bfe_u32 s4, s2, 0x10003 2022; GFX8-NEXT: s_bfe_u32 s5, s2, 0x10001 2023; GFX8-NEXT: s_bfe_u32 s6, s2, 0x10007 2024; GFX8-NEXT: s_bfe_u32 s7, s2, 0x10005 2025; GFX8-NEXT: s_bfe_u32 s8, s2, 0x1000b 2026; GFX8-NEXT: s_bfe_u32 s9, s2, 0x10009 2027; GFX8-NEXT: s_bfe_u32 s10, s2, 0x1000f 2028; GFX8-NEXT: s_bfe_u32 s11, s2, 0x1000d 2029; GFX8-NEXT: s_bfe_u32 s12, s2, 0x10013 2030; GFX8-NEXT: s_bfe_u32 s13, s2, 0x10011 2031; GFX8-NEXT: s_bfe_u32 s14, s2, 0x10017 2032; GFX8-NEXT: s_bfe_u32 s15, s2, 0x1001b 2033; GFX8-NEXT: s_bfe_u32 s16, s2, 0x10019 2034; GFX8-NEXT: s_lshr_b32 s3, s2, 31 2035; GFX8-NEXT: s_bfe_u32 s17, s2, 0x1001d 2036; GFX8-NEXT: s_and_b32 s18, s2, 1 2037; GFX8-NEXT: s_bfe_u32 s19, s2, 0x10002 2038; GFX8-NEXT: s_bfe_u32 s20, s2, 0x10006 2039; GFX8-NEXT: s_bfe_u32 s21, s2, 0x10004 2040; GFX8-NEXT: s_bfe_u32 s22, s2, 0x1000a 2041; GFX8-NEXT: s_bfe_u32 s23, s2, 0x10008 2042; GFX8-NEXT: s_bfe_u32 s24, s2, 0x1000e 2043; GFX8-NEXT: s_bfe_u32 s25, s2, 0x1000c 2044; GFX8-NEXT: s_bfe_u32 s26, s2, 0x10012 2045; GFX8-NEXT: s_bfe_u32 s27, s2, 0x10010 2046; GFX8-NEXT: s_bfe_u32 s28, s2, 0x10016 2047; GFX8-NEXT: s_bfe_u32 s29, s2, 0x10015 2048; GFX8-NEXT: s_bfe_u32 s30, s2, 0x10014 2049; GFX8-NEXT: s_bfe_u32 s31, s2, 0x1001a 2050; GFX8-NEXT: s_bfe_u32 s33, s2, 0x10018 2051; GFX8-NEXT: s_bfe_u32 s34, s2, 0x1001e 2052; GFX8-NEXT: s_bfe_u32 s2, s2, 0x1001c 2053; GFX8-NEXT: v_mov_b32_e32 v0, s2 2054; GFX8-NEXT: s_add_u32 s2, s0, 0x70 2055; GFX8-NEXT: v_mov_b32_e32 v3, s3 2056; GFX8-NEXT: s_addc_u32 s3, s1, 0 2057; GFX8-NEXT: v_mov_b32_e32 v5, s3 2058; GFX8-NEXT: v_mov_b32_e32 v4, s2 2059; GFX8-NEXT: s_add_u32 s2, s0, 0x60 2060; GFX8-NEXT: v_mov_b32_e32 v1, s17 2061; GFX8-NEXT: v_mov_b32_e32 v2, s34 2062; GFX8-NEXT: s_addc_u32 s3, s1, 0 2063; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2064; GFX8-NEXT: v_mov_b32_e32 v5, s3 2065; GFX8-NEXT: v_mov_b32_e32 v4, s2 2066; GFX8-NEXT: s_add_u32 s2, s0, 0x50 2067; GFX8-NEXT: v_mov_b32_e32 v0, s33 2068; GFX8-NEXT: v_mov_b32_e32 v1, s16 2069; GFX8-NEXT: v_mov_b32_e32 v2, s31 2070; GFX8-NEXT: v_mov_b32_e32 v3, s15 2071; GFX8-NEXT: s_addc_u32 s3, s1, 0 2072; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2073; GFX8-NEXT: v_mov_b32_e32 v5, s3 2074; GFX8-NEXT: v_mov_b32_e32 v4, s2 2075; GFX8-NEXT: s_add_u32 s2, s0, 64 2076; GFX8-NEXT: v_mov_b32_e32 v0, s30 2077; GFX8-NEXT: v_mov_b32_e32 v1, s29 2078; GFX8-NEXT: v_mov_b32_e32 v2, s28 2079; GFX8-NEXT: v_mov_b32_e32 v3, s14 2080; GFX8-NEXT: s_addc_u32 s3, s1, 0 2081; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2082; GFX8-NEXT: v_mov_b32_e32 v5, s3 2083; GFX8-NEXT: v_mov_b32_e32 v4, s2 2084; GFX8-NEXT: s_add_u32 s2, s0, 48 2085; GFX8-NEXT: v_mov_b32_e32 v0, s27 2086; GFX8-NEXT: v_mov_b32_e32 v1, s13 2087; GFX8-NEXT: v_mov_b32_e32 v2, s26 2088; GFX8-NEXT: v_mov_b32_e32 v3, s12 2089; GFX8-NEXT: s_addc_u32 s3, s1, 0 2090; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2091; GFX8-NEXT: v_mov_b32_e32 v5, s3 2092; GFX8-NEXT: v_mov_b32_e32 v4, s2 2093; GFX8-NEXT: s_add_u32 s2, s0, 32 2094; GFX8-NEXT: v_mov_b32_e32 v0, s25 2095; GFX8-NEXT: v_mov_b32_e32 v1, s11 2096; GFX8-NEXT: v_mov_b32_e32 v2, s24 2097; GFX8-NEXT: v_mov_b32_e32 v3, s10 2098; GFX8-NEXT: s_addc_u32 s3, s1, 0 2099; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2100; GFX8-NEXT: v_mov_b32_e32 v5, s3 2101; GFX8-NEXT: v_mov_b32_e32 v4, s2 2102; GFX8-NEXT: s_add_u32 s2, s0, 16 2103; GFX8-NEXT: v_mov_b32_e32 v0, s23 2104; GFX8-NEXT: v_mov_b32_e32 v1, s9 2105; GFX8-NEXT: v_mov_b32_e32 v2, s22 2106; GFX8-NEXT: v_mov_b32_e32 v3, s8 2107; GFX8-NEXT: s_addc_u32 s3, s1, 0 2108; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2109; GFX8-NEXT: v_mov_b32_e32 v5, s3 2110; GFX8-NEXT: v_mov_b32_e32 v0, s21 2111; GFX8-NEXT: v_mov_b32_e32 v1, s7 2112; GFX8-NEXT: v_mov_b32_e32 v2, s20 2113; GFX8-NEXT: v_mov_b32_e32 v3, s6 2114; GFX8-NEXT: v_mov_b32_e32 v4, s2 2115; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2116; GFX8-NEXT: v_mov_b32_e32 v5, s1 2117; GFX8-NEXT: v_mov_b32_e32 v0, s18 2118; GFX8-NEXT: v_mov_b32_e32 v1, s5 2119; GFX8-NEXT: v_mov_b32_e32 v2, s19 2120; GFX8-NEXT: v_mov_b32_e32 v3, s4 2121; GFX8-NEXT: v_mov_b32_e32 v4, s0 2122; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2123; GFX8-NEXT: s_endpgm 2124; 2125; EG-LABEL: constant_zextload_v32i1_to_v32i32: 2126; EG: ; %bb.0: 2127; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[] 2128; EG-NEXT: TEX 0 @12 2129; EG-NEXT: ALU 76, @15, KC0[CB0:0-32], KC1[] 2130; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T26.X, 0 2131; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T25.X, 0 2132; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T24.X, 0 2133; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T22.X, 0 2134; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T20.X, 0 2135; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T18.X, 0 2136; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T16.X, 0 2137; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T14.X, 1 2138; EG-NEXT: CF_END 2139; EG-NEXT: Fetch clause starting at 12: 2140; EG-NEXT: VTX_READ_32 T11.X, T11.X, 0, #1 2141; EG-NEXT: ALU clause starting at 14: 2142; EG-NEXT: MOV * T11.X, KC0[2].Z, 2143; EG-NEXT: ALU clause starting at 15: 2144; EG-NEXT: BFE_UINT * T12.W, T11.X, literal.x, 1, 2145; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 2146; EG-NEXT: BFE_UINT * T12.Z, T11.X, literal.x, 1, 2147; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2148; EG-NEXT: BFE_UINT T12.Y, T11.X, 1, 1, 2149; EG-NEXT: BFE_UINT * T13.W, T11.X, literal.x, 1, 2150; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00) 2151; EG-NEXT: AND_INT T12.X, T11.X, 1, 2152; EG-NEXT: BFE_UINT T13.Z, T11.X, literal.x, 1, 2153; EG-NEXT: LSHR * T14.X, KC0[2].Y, literal.y, 2154; EG-NEXT: 6(8.407791e-45), 2(2.802597e-45) 2155; EG-NEXT: BFE_UINT T13.Y, T11.X, literal.x, 1, 2156; EG-NEXT: BFE_UINT * T15.W, T11.X, literal.y, 1, 2157; EG-NEXT: 5(7.006492e-45), 11(1.541428e-44) 2158; EG-NEXT: BFE_UINT T13.X, T11.X, literal.x, 1, 2159; EG-NEXT: BFE_UINT T15.Z, T11.X, literal.y, 1, 2160; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 2161; EG-NEXT: 4(5.605194e-45), 10(1.401298e-44) 2162; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2163; EG-NEXT: LSHR T16.X, PV.W, literal.x, 2164; EG-NEXT: BFE_UINT T15.Y, T11.X, literal.y, 1, 2165; EG-NEXT: BFE_UINT * T17.W, T11.X, literal.z, 1, 2166; EG-NEXT: 2(2.802597e-45), 9(1.261169e-44) 2167; EG-NEXT: 15(2.101948e-44), 0(0.000000e+00) 2168; EG-NEXT: BFE_UINT T15.X, T11.X, literal.x, 1, 2169; EG-NEXT: BFE_UINT T17.Z, T11.X, literal.y, 1, 2170; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 2171; EG-NEXT: 8(1.121039e-44), 14(1.961818e-44) 2172; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 2173; EG-NEXT: LSHR T18.X, PV.W, literal.x, 2174; EG-NEXT: BFE_UINT T17.Y, T11.X, literal.y, 1, 2175; EG-NEXT: BFE_UINT * T19.W, T11.X, literal.z, 1, 2176; EG-NEXT: 2(2.802597e-45), 13(1.821688e-44) 2177; EG-NEXT: 19(2.662467e-44), 0(0.000000e+00) 2178; EG-NEXT: BFE_UINT T17.X, T11.X, literal.x, 1, 2179; EG-NEXT: BFE_UINT T19.Z, T11.X, literal.y, 1, 2180; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 2181; EG-NEXT: 12(1.681558e-44), 18(2.522337e-44) 2182; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 2183; EG-NEXT: LSHR T20.X, PV.W, literal.x, 2184; EG-NEXT: BFE_UINT T19.Y, T11.X, literal.y, 1, 2185; EG-NEXT: BFE_UINT * T21.W, T11.X, literal.z, 1, 2186; EG-NEXT: 2(2.802597e-45), 17(2.382207e-44) 2187; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) 2188; EG-NEXT: BFE_UINT T19.X, T11.X, literal.x, 1, 2189; EG-NEXT: BFE_UINT T21.Z, T11.X, literal.y, 1, 2190; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 2191; EG-NEXT: 16(2.242078e-44), 22(3.082857e-44) 2192; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00) 2193; EG-NEXT: LSHR T22.X, PV.W, literal.x, 2194; EG-NEXT: BFE_UINT T21.Y, T11.X, literal.y, 1, 2195; EG-NEXT: BFE_UINT * T23.W, T11.X, literal.z, 1, 2196; EG-NEXT: 2(2.802597e-45), 21(2.942727e-44) 2197; EG-NEXT: 27(3.783506e-44), 0(0.000000e+00) 2198; EG-NEXT: BFE_UINT T21.X, T11.X, literal.x, 1, 2199; EG-NEXT: BFE_UINT T23.Z, T11.X, literal.y, 1, 2200; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 2201; EG-NEXT: 20(2.802597e-44), 26(3.643376e-44) 2202; EG-NEXT: 80(1.121039e-43), 0(0.000000e+00) 2203; EG-NEXT: LSHR T24.X, PV.W, literal.x, 2204; EG-NEXT: BFE_UINT T23.Y, T11.X, literal.y, 1, 2205; EG-NEXT: LSHR * T11.W, T11.X, literal.z, 2206; EG-NEXT: 2(2.802597e-45), 25(3.503246e-44) 2207; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 2208; EG-NEXT: BFE_UINT T23.X, T11.X, literal.x, 1, 2209; EG-NEXT: BFE_UINT T11.Z, T11.X, literal.y, 1, 2210; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 2211; EG-NEXT: 24(3.363116e-44), 30(4.203895e-44) 2212; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) 2213; EG-NEXT: LSHR T25.X, PV.W, literal.x, 2214; EG-NEXT: BFE_UINT * T11.Y, T11.X, literal.y, 1, 2215; EG-NEXT: 2(2.802597e-45), 29(4.063766e-44) 2216; EG-NEXT: BFE_UINT T11.X, T11.X, literal.x, 1, 2217; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2218; EG-NEXT: 28(3.923636e-44), 112(1.569454e-43) 2219; EG-NEXT: LSHR * T26.X, PV.W, literal.x, 2220; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2221; 2222; GFX12-LABEL: constant_zextload_v32i1_to_v32i32: 2223; GFX12: ; %bb.0: 2224; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 2225; GFX12-NEXT: s_wait_kmcnt 0x0 2226; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0 2227; GFX12-NEXT: s_wait_kmcnt 0x0 2228; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10003 2229; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10001 2230; GFX12-NEXT: s_bfe_u32 s5, s2, 0x10007 2231; GFX12-NEXT: s_bfe_u32 s6, s2, 0x10005 2232; GFX12-NEXT: s_bfe_u32 s7, s2, 0x1000b 2233; GFX12-NEXT: s_bfe_u32 s8, s2, 0x10009 2234; GFX12-NEXT: s_bfe_u32 s9, s2, 0x1000f 2235; GFX12-NEXT: s_bfe_u32 s10, s2, 0x1000d 2236; GFX12-NEXT: s_bfe_u32 s11, s2, 0x10013 2237; GFX12-NEXT: s_bfe_u32 s12, s2, 0x10011 2238; GFX12-NEXT: s_bfe_u32 s13, s2, 0x10017 2239; GFX12-NEXT: s_bfe_u32 s14, s2, 0x1001b 2240; GFX12-NEXT: s_bfe_u32 s15, s2, 0x10019 2241; GFX12-NEXT: s_lshr_b32 s16, s2, 31 2242; GFX12-NEXT: s_bfe_u32 s17, s2, 0x1001d 2243; GFX12-NEXT: s_and_b32 s18, s2, 1 2244; GFX12-NEXT: s_bfe_u32 s19, s2, 0x10002 2245; GFX12-NEXT: s_bfe_u32 s20, s2, 0x10006 2246; GFX12-NEXT: s_bfe_u32 s21, s2, 0x10004 2247; GFX12-NEXT: s_bfe_u32 s22, s2, 0x1000a 2248; GFX12-NEXT: s_bfe_u32 s23, s2, 0x10008 2249; GFX12-NEXT: s_bfe_u32 s24, s2, 0x1000e 2250; GFX12-NEXT: s_bfe_u32 s25, s2, 0x1000c 2251; GFX12-NEXT: s_bfe_u32 s26, s2, 0x10012 2252; GFX12-NEXT: s_bfe_u32 s27, s2, 0x10010 2253; GFX12-NEXT: s_bfe_u32 s28, s2, 0x10016 2254; GFX12-NEXT: s_bfe_u32 s29, s2, 0x10015 2255; GFX12-NEXT: s_bfe_u32 s30, s2, 0x10014 2256; GFX12-NEXT: s_bfe_u32 s31, s2, 0x1001a 2257; GFX12-NEXT: s_bfe_u32 s33, s2, 0x10018 2258; GFX12-NEXT: s_bfe_u32 s34, s2, 0x1001c 2259; GFX12-NEXT: s_bfe_u32 s2, s2, 0x1001e 2260; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s17 2261; GFX12-NEXT: v_dual_mov_b32 v0, s34 :: v_dual_mov_b32 v3, s16 2262; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v5, s15 2263; GFX12-NEXT: v_dual_mov_b32 v4, s33 :: v_dual_mov_b32 v7, s14 2264; GFX12-NEXT: v_dual_mov_b32 v6, s31 :: v_dual_mov_b32 v9, s29 2265; GFX12-NEXT: v_dual_mov_b32 v8, s30 :: v_dual_mov_b32 v11, s13 2266; GFX12-NEXT: v_mov_b32_e32 v10, s28 2267; GFX12-NEXT: s_clause 0x1 2268; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:112 2269; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:96 2270; GFX12-NEXT: v_dual_mov_b32 v1, s12 :: v_dual_mov_b32 v0, s27 2271; GFX12-NEXT: v_dual_mov_b32 v3, s11 :: v_dual_mov_b32 v2, s26 2272; GFX12-NEXT: v_dual_mov_b32 v5, s10 :: v_dual_mov_b32 v4, s25 2273; GFX12-NEXT: v_dual_mov_b32 v7, s9 :: v_dual_mov_b32 v6, s24 2274; GFX12-NEXT: v_dual_mov_b32 v13, s8 :: v_dual_mov_b32 v12, s23 2275; GFX12-NEXT: v_dual_mov_b32 v15, s7 :: v_dual_mov_b32 v14, s22 2276; GFX12-NEXT: v_dual_mov_b32 v17, s6 :: v_dual_mov_b32 v16, s21 2277; GFX12-NEXT: v_dual_mov_b32 v19, s5 :: v_dual_mov_b32 v18, s20 2278; GFX12-NEXT: v_dual_mov_b32 v21, s4 :: v_dual_mov_b32 v20, s18 2279; GFX12-NEXT: v_dual_mov_b32 v23, s3 :: v_dual_mov_b32 v22, s19 2280; GFX12-NEXT: s_clause 0x5 2281; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:80 2282; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:64 2283; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:48 2284; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:32 2285; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:16 2286; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1] 2287; GFX12-NEXT: s_endpgm 2288 %load = load <32 x i1>, ptr addrspace(4) %in 2289 %ext = zext <32 x i1> %load to <32 x i32> 2290 store <32 x i32> %ext, ptr addrspace(1) %out 2291 ret void 2292} 2293 2294define amdgpu_kernel void @constant_sextload_v32i1_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 2295; GFX6-LABEL: constant_sextload_v32i1_to_v32i32: 2296; GFX6: ; %bb.0: 2297; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2298; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2299; GFX6-NEXT: s_load_dword s4, s[2:3], 0x0 2300; GFX6-NEXT: s_mov_b32 s3, 0xf000 2301; GFX6-NEXT: s_mov_b32 s2, -1 2302; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2303; GFX6-NEXT: s_bfe_i32 s5, s4, 0x10003 2304; GFX6-NEXT: s_bfe_i32 s6, s4, 0x10002 2305; GFX6-NEXT: s_bfe_i32 s7, s4, 0x10001 2306; GFX6-NEXT: s_bfe_i32 s8, s4, 0x10000 2307; GFX6-NEXT: s_bfe_i32 s9, s4, 0x10007 2308; GFX6-NEXT: s_bfe_i32 s10, s4, 0x10006 2309; GFX6-NEXT: s_bfe_i32 s11, s4, 0x10005 2310; GFX6-NEXT: s_bfe_i32 s12, s4, 0x10004 2311; GFX6-NEXT: s_bfe_i32 s13, s4, 0x1000b 2312; GFX6-NEXT: s_bfe_i32 s14, s4, 0x1000a 2313; GFX6-NEXT: s_bfe_i32 s15, s4, 0x10009 2314; GFX6-NEXT: s_bfe_i32 s16, s4, 0x10008 2315; GFX6-NEXT: s_bfe_i32 s17, s4, 0x1000f 2316; GFX6-NEXT: s_bfe_i32 s18, s4, 0x1000e 2317; GFX6-NEXT: s_bfe_i32 s19, s4, 0x1000d 2318; GFX6-NEXT: s_bfe_i32 s20, s4, 0x1000c 2319; GFX6-NEXT: s_bfe_i32 s21, s4, 0x10013 2320; GFX6-NEXT: s_bfe_i32 s22, s4, 0x10012 2321; GFX6-NEXT: s_bfe_i32 s23, s4, 0x10011 2322; GFX6-NEXT: s_bfe_i32 s24, s4, 0x10010 2323; GFX6-NEXT: s_bfe_i32 s25, s4, 0x10017 2324; GFX6-NEXT: s_bfe_i32 s26, s4, 0x10016 2325; GFX6-NEXT: s_bfe_i32 s27, s4, 0x10015 2326; GFX6-NEXT: s_bfe_i32 s28, s4, 0x10014 2327; GFX6-NEXT: s_bfe_i32 s29, s4, 0x1001b 2328; GFX6-NEXT: s_bfe_i32 s30, s4, 0x1001a 2329; GFX6-NEXT: s_bfe_i32 s31, s4, 0x10019 2330; GFX6-NEXT: s_ashr_i32 s33, s4, 31 2331; GFX6-NEXT: s_bfe_i32 s34, s4, 0x1001e 2332; GFX6-NEXT: s_bfe_i32 s35, s4, 0x1001d 2333; GFX6-NEXT: s_bfe_i32 s36, s4, 0x1001c 2334; GFX6-NEXT: s_bfe_i32 s4, s4, 0x10018 2335; GFX6-NEXT: v_mov_b32_e32 v0, s36 2336; GFX6-NEXT: v_mov_b32_e32 v1, s35 2337; GFX6-NEXT: v_mov_b32_e32 v2, s34 2338; GFX6-NEXT: v_mov_b32_e32 v3, s33 2339; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 2340; GFX6-NEXT: s_waitcnt expcnt(0) 2341; GFX6-NEXT: v_mov_b32_e32 v0, s4 2342; GFX6-NEXT: v_mov_b32_e32 v1, s31 2343; GFX6-NEXT: v_mov_b32_e32 v2, s30 2344; GFX6-NEXT: v_mov_b32_e32 v3, s29 2345; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 2346; GFX6-NEXT: s_waitcnt expcnt(0) 2347; GFX6-NEXT: v_mov_b32_e32 v0, s28 2348; GFX6-NEXT: v_mov_b32_e32 v1, s27 2349; GFX6-NEXT: v_mov_b32_e32 v2, s26 2350; GFX6-NEXT: v_mov_b32_e32 v3, s25 2351; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 2352; GFX6-NEXT: s_waitcnt expcnt(0) 2353; GFX6-NEXT: v_mov_b32_e32 v0, s24 2354; GFX6-NEXT: v_mov_b32_e32 v1, s23 2355; GFX6-NEXT: v_mov_b32_e32 v2, s22 2356; GFX6-NEXT: v_mov_b32_e32 v3, s21 2357; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 2358; GFX6-NEXT: s_waitcnt expcnt(0) 2359; GFX6-NEXT: v_mov_b32_e32 v0, s20 2360; GFX6-NEXT: v_mov_b32_e32 v1, s19 2361; GFX6-NEXT: v_mov_b32_e32 v2, s18 2362; GFX6-NEXT: v_mov_b32_e32 v3, s17 2363; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 2364; GFX6-NEXT: s_waitcnt expcnt(0) 2365; GFX6-NEXT: v_mov_b32_e32 v0, s16 2366; GFX6-NEXT: v_mov_b32_e32 v1, s15 2367; GFX6-NEXT: v_mov_b32_e32 v2, s14 2368; GFX6-NEXT: v_mov_b32_e32 v3, s13 2369; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2370; GFX6-NEXT: s_waitcnt expcnt(0) 2371; GFX6-NEXT: v_mov_b32_e32 v0, s12 2372; GFX6-NEXT: v_mov_b32_e32 v1, s11 2373; GFX6-NEXT: v_mov_b32_e32 v2, s10 2374; GFX6-NEXT: v_mov_b32_e32 v3, s9 2375; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 2376; GFX6-NEXT: s_waitcnt expcnt(0) 2377; GFX6-NEXT: v_mov_b32_e32 v0, s8 2378; GFX6-NEXT: v_mov_b32_e32 v1, s7 2379; GFX6-NEXT: v_mov_b32_e32 v2, s6 2380; GFX6-NEXT: v_mov_b32_e32 v3, s5 2381; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2382; GFX6-NEXT: s_endpgm 2383; 2384; GFX8-LABEL: constant_sextload_v32i1_to_v32i32: 2385; GFX8: ; %bb.0: 2386; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2387; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2388; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0 2389; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2390; GFX8-NEXT: s_bfe_i32 s4, s2, 0x10003 2391; GFX8-NEXT: s_bfe_i32 s5, s2, 0x10002 2392; GFX8-NEXT: s_bfe_i32 s6, s2, 0x10001 2393; GFX8-NEXT: s_bfe_i32 s7, s2, 0x10000 2394; GFX8-NEXT: s_bfe_i32 s8, s2, 0x10007 2395; GFX8-NEXT: s_bfe_i32 s9, s2, 0x10006 2396; GFX8-NEXT: s_bfe_i32 s10, s2, 0x10005 2397; GFX8-NEXT: s_bfe_i32 s11, s2, 0x10004 2398; GFX8-NEXT: s_bfe_i32 s12, s2, 0x1000b 2399; GFX8-NEXT: s_bfe_i32 s13, s2, 0x1000a 2400; GFX8-NEXT: s_bfe_i32 s14, s2, 0x10009 2401; GFX8-NEXT: s_bfe_i32 s15, s2, 0x10008 2402; GFX8-NEXT: s_bfe_i32 s16, s2, 0x1000f 2403; GFX8-NEXT: s_bfe_i32 s17, s2, 0x1000e 2404; GFX8-NEXT: s_bfe_i32 s18, s2, 0x1000d 2405; GFX8-NEXT: s_bfe_i32 s19, s2, 0x1000c 2406; GFX8-NEXT: s_bfe_i32 s20, s2, 0x10013 2407; GFX8-NEXT: s_bfe_i32 s21, s2, 0x10012 2408; GFX8-NEXT: s_bfe_i32 s22, s2, 0x10011 2409; GFX8-NEXT: s_bfe_i32 s23, s2, 0x10010 2410; GFX8-NEXT: s_bfe_i32 s24, s2, 0x10017 2411; GFX8-NEXT: s_bfe_i32 s25, s2, 0x10016 2412; GFX8-NEXT: s_bfe_i32 s26, s2, 0x10015 2413; GFX8-NEXT: s_bfe_i32 s27, s2, 0x10014 2414; GFX8-NEXT: s_bfe_i32 s28, s2, 0x1001b 2415; GFX8-NEXT: s_bfe_i32 s29, s2, 0x1001a 2416; GFX8-NEXT: s_bfe_i32 s30, s2, 0x10019 2417; GFX8-NEXT: s_bfe_i32 s31, s2, 0x10018 2418; GFX8-NEXT: s_ashr_i32 s3, s2, 31 2419; GFX8-NEXT: s_bfe_i32 s33, s2, 0x1001e 2420; GFX8-NEXT: s_bfe_i32 s34, s2, 0x1001d 2421; GFX8-NEXT: s_bfe_i32 s2, s2, 0x1001c 2422; GFX8-NEXT: v_mov_b32_e32 v0, s2 2423; GFX8-NEXT: s_add_u32 s2, s0, 0x70 2424; GFX8-NEXT: v_mov_b32_e32 v3, s3 2425; GFX8-NEXT: s_addc_u32 s3, s1, 0 2426; GFX8-NEXT: v_mov_b32_e32 v5, s3 2427; GFX8-NEXT: v_mov_b32_e32 v4, s2 2428; GFX8-NEXT: s_add_u32 s2, s0, 0x60 2429; GFX8-NEXT: v_mov_b32_e32 v1, s34 2430; GFX8-NEXT: v_mov_b32_e32 v2, s33 2431; GFX8-NEXT: s_addc_u32 s3, s1, 0 2432; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2433; GFX8-NEXT: v_mov_b32_e32 v5, s3 2434; GFX8-NEXT: v_mov_b32_e32 v4, s2 2435; GFX8-NEXT: s_add_u32 s2, s0, 0x50 2436; GFX8-NEXT: v_mov_b32_e32 v0, s31 2437; GFX8-NEXT: v_mov_b32_e32 v1, s30 2438; GFX8-NEXT: v_mov_b32_e32 v2, s29 2439; GFX8-NEXT: v_mov_b32_e32 v3, s28 2440; GFX8-NEXT: s_addc_u32 s3, s1, 0 2441; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2442; GFX8-NEXT: v_mov_b32_e32 v5, s3 2443; GFX8-NEXT: v_mov_b32_e32 v4, s2 2444; GFX8-NEXT: s_add_u32 s2, s0, 64 2445; GFX8-NEXT: v_mov_b32_e32 v0, s27 2446; GFX8-NEXT: v_mov_b32_e32 v1, s26 2447; GFX8-NEXT: v_mov_b32_e32 v2, s25 2448; GFX8-NEXT: v_mov_b32_e32 v3, s24 2449; GFX8-NEXT: s_addc_u32 s3, s1, 0 2450; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2451; GFX8-NEXT: v_mov_b32_e32 v5, s3 2452; GFX8-NEXT: v_mov_b32_e32 v4, s2 2453; GFX8-NEXT: s_add_u32 s2, s0, 48 2454; GFX8-NEXT: v_mov_b32_e32 v0, s23 2455; GFX8-NEXT: v_mov_b32_e32 v1, s22 2456; GFX8-NEXT: v_mov_b32_e32 v2, s21 2457; GFX8-NEXT: v_mov_b32_e32 v3, s20 2458; GFX8-NEXT: s_addc_u32 s3, s1, 0 2459; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2460; GFX8-NEXT: v_mov_b32_e32 v5, s3 2461; GFX8-NEXT: v_mov_b32_e32 v4, s2 2462; GFX8-NEXT: s_add_u32 s2, s0, 32 2463; GFX8-NEXT: v_mov_b32_e32 v0, s19 2464; GFX8-NEXT: v_mov_b32_e32 v1, s18 2465; GFX8-NEXT: v_mov_b32_e32 v2, s17 2466; GFX8-NEXT: v_mov_b32_e32 v3, s16 2467; GFX8-NEXT: s_addc_u32 s3, s1, 0 2468; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2469; GFX8-NEXT: v_mov_b32_e32 v5, s3 2470; GFX8-NEXT: v_mov_b32_e32 v4, s2 2471; GFX8-NEXT: s_add_u32 s2, s0, 16 2472; GFX8-NEXT: v_mov_b32_e32 v0, s15 2473; GFX8-NEXT: v_mov_b32_e32 v1, s14 2474; GFX8-NEXT: v_mov_b32_e32 v2, s13 2475; GFX8-NEXT: v_mov_b32_e32 v3, s12 2476; GFX8-NEXT: s_addc_u32 s3, s1, 0 2477; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2478; GFX8-NEXT: v_mov_b32_e32 v5, s3 2479; GFX8-NEXT: v_mov_b32_e32 v0, s11 2480; GFX8-NEXT: v_mov_b32_e32 v1, s10 2481; GFX8-NEXT: v_mov_b32_e32 v2, s9 2482; GFX8-NEXT: v_mov_b32_e32 v3, s8 2483; GFX8-NEXT: v_mov_b32_e32 v4, s2 2484; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2485; GFX8-NEXT: v_mov_b32_e32 v5, s1 2486; GFX8-NEXT: v_mov_b32_e32 v0, s7 2487; GFX8-NEXT: v_mov_b32_e32 v1, s6 2488; GFX8-NEXT: v_mov_b32_e32 v2, s5 2489; GFX8-NEXT: v_mov_b32_e32 v3, s4 2490; GFX8-NEXT: v_mov_b32_e32 v4, s0 2491; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2492; GFX8-NEXT: s_endpgm 2493; 2494; EG-LABEL: constant_sextload_v32i1_to_v32i32: 2495; EG: ; %bb.0: 2496; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 2497; EG-NEXT: TEX 0 @14 2498; EG-NEXT: ALU 99, @17, KC0[CB0:0-32], KC1[] 2499; EG-NEXT: ALU 5, @117, KC0[CB0:0-32], KC1[] 2500; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T11.X, 0 2501; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T26.X, 0 2502; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T23.X, 0 2503; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T21.X, 0 2504; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T19.X, 0 2505; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T17.X, 0 2506; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T15.X, 0 2507; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T13.X, 1 2508; EG-NEXT: CF_END 2509; EG-NEXT: PAD 2510; EG-NEXT: Fetch clause starting at 14: 2511; EG-NEXT: VTX_READ_32 T11.X, T11.X, 0, #1 2512; EG-NEXT: ALU clause starting at 16: 2513; EG-NEXT: MOV * T11.X, KC0[2].Z, 2514; EG-NEXT: ALU clause starting at 17: 2515; EG-NEXT: LSHR * T0.W, T11.X, literal.x, 2516; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00) 2517; EG-NEXT: BFE_INT T12.W, PV.W, 0.0, 1, 2518; EG-NEXT: LSHR * T0.W, T11.X, literal.x, 2519; EG-NEXT: 6(8.407791e-45), 0(0.000000e+00) 2520; EG-NEXT: BFE_INT T12.Z, PS, 0.0, 1, 2521; EG-NEXT: LSHR T0.W, T11.X, literal.x, 2522; EG-NEXT: LSHR * T1.W, T11.X, literal.y, 2523; EG-NEXT: 11(1.541428e-44), 5(7.006492e-45) 2524; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x, 2525; EG-NEXT: BFE_INT T12.Y, PS, 0.0, 1, 2526; EG-NEXT: LSHR T0.Z, T11.X, literal.y, 2527; EG-NEXT: BFE_INT T14.W, PV.W, 0.0, 1, 2528; EG-NEXT: LSHR * T0.W, T11.X, literal.z, 2529; EG-NEXT: 2(2.802597e-45), 10(1.401298e-44) 2530; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) 2531; EG-NEXT: BFE_INT T12.X, PS, 0.0, 1, 2532; EG-NEXT: LSHR T0.Y, T11.X, literal.x, 2533; EG-NEXT: BFE_INT T14.Z, PV.Z, 0.0, 1, 2534; EG-NEXT: LSHR T0.W, T11.X, literal.y, 2535; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 2536; EG-NEXT: 15(2.101948e-44), 9(1.261169e-44) 2537; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2538; EG-NEXT: LSHR T15.X, PS, literal.x, 2539; EG-NEXT: BFE_INT T14.Y, PV.W, 0.0, 1, 2540; EG-NEXT: LSHR T0.Z, T11.X, literal.y, 2541; EG-NEXT: BFE_INT T16.W, PV.Y, 0.0, 1, 2542; EG-NEXT: LSHR * T0.W, T11.X, literal.z, 2543; EG-NEXT: 2(2.802597e-45), 14(1.961818e-44) 2544; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 2545; EG-NEXT: BFE_INT T14.X, PS, 0.0, 1, 2546; EG-NEXT: LSHR T0.Y, T11.X, literal.x, 2547; EG-NEXT: BFE_INT T16.Z, PV.Z, 0.0, 1, 2548; EG-NEXT: LSHR T0.W, T11.X, literal.y, 2549; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 2550; EG-NEXT: 19(2.662467e-44), 13(1.821688e-44) 2551; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 2552; EG-NEXT: LSHR T17.X, PS, literal.x, 2553; EG-NEXT: BFE_INT T16.Y, PV.W, 0.0, 1, 2554; EG-NEXT: LSHR T0.Z, T11.X, literal.y, 2555; EG-NEXT: BFE_INT T18.W, PV.Y, 0.0, 1, 2556; EG-NEXT: LSHR * T0.W, T11.X, literal.z, 2557; EG-NEXT: 2(2.802597e-45), 18(2.522337e-44) 2558; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) 2559; EG-NEXT: BFE_INT T16.X, PS, 0.0, 1, 2560; EG-NEXT: LSHR T0.Y, T11.X, literal.x, 2561; EG-NEXT: BFE_INT T18.Z, PV.Z, 0.0, 1, 2562; EG-NEXT: LSHR T0.W, T11.X, literal.y, 2563; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 2564; EG-NEXT: 23(3.222986e-44), 17(2.382207e-44) 2565; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 2566; EG-NEXT: LSHR T19.X, PS, literal.x, 2567; EG-NEXT: BFE_INT T18.Y, PV.W, 0.0, 1, 2568; EG-NEXT: LSHR T0.Z, T11.X, literal.y, 2569; EG-NEXT: BFE_INT T20.W, PV.Y, 0.0, 1, 2570; EG-NEXT: LSHR * T0.W, T11.X, literal.z, 2571; EG-NEXT: 2(2.802597e-45), 22(3.082857e-44) 2572; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2573; EG-NEXT: BFE_INT T18.X, PS, 0.0, 1, 2574; EG-NEXT: LSHR T0.Y, T11.X, literal.x, 2575; EG-NEXT: BFE_INT T20.Z, PV.Z, 0.0, 1, 2576; EG-NEXT: LSHR T0.W, T11.X, literal.y, 2577; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 2578; EG-NEXT: 27(3.783506e-44), 21(2.942727e-44) 2579; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00) 2580; EG-NEXT: LSHR T21.X, PS, literal.x, 2581; EG-NEXT: BFE_INT T20.Y, PV.W, 0.0, 1, 2582; EG-NEXT: LSHR T0.Z, T11.X, literal.y, 2583; EG-NEXT: BFE_INT T22.W, PV.Y, 0.0, 1, 2584; EG-NEXT: LSHR * T0.W, T11.X, literal.z, 2585; EG-NEXT: 2(2.802597e-45), 26(3.643376e-44) 2586; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) 2587; EG-NEXT: BFE_INT T20.X, PS, 0.0, 1, 2588; EG-NEXT: BFE_INT T22.Z, PV.Z, 0.0, 1, 2589; EG-NEXT: LSHR T0.W, T11.X, literal.x, 2590; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 2591; EG-NEXT: 25(3.503246e-44), 80(1.121039e-43) 2592; EG-NEXT: LSHR T23.X, PS, literal.x, 2593; EG-NEXT: BFE_INT T22.Y, PV.W, 0.0, 1, 2594; EG-NEXT: LSHR * T0.W, T11.X, literal.y, 2595; EG-NEXT: 2(2.802597e-45), 24(3.363116e-44) 2596; EG-NEXT: BFE_INT T22.X, PV.W, 0.0, 1, 2597; EG-NEXT: LSHR T0.W, T11.X, literal.x, 2598; EG-NEXT: ASHR * T24.W, T11.X, literal.y, 2599; EG-NEXT: 30(4.203895e-44), 31(4.344025e-44) 2600; EG-NEXT: BFE_INT T25.X, T11.X, 0.0, 1, 2601; EG-NEXT: LSHR T0.Y, T11.X, literal.x, 2602; EG-NEXT: BFE_INT T24.Z, PV.W, 0.0, 1, 2603; EG-NEXT: LSHR T0.W, T11.X, literal.y, 2604; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 2605; EG-NEXT: 3(4.203895e-45), 29(4.063766e-44) 2606; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) 2607; EG-NEXT: LSHR T26.X, PS, literal.x, 2608; EG-NEXT: BFE_INT T24.Y, PV.W, 0.0, 1, 2609; EG-NEXT: LSHR T0.Z, T11.X, literal.x, 2610; EG-NEXT: BFE_INT T25.W, PV.Y, 0.0, 1, 2611; EG-NEXT: LSHR * T0.W, T11.X, literal.y, 2612; EG-NEXT: 2(2.802597e-45), 28(3.923636e-44) 2613; EG-NEXT: BFE_INT T24.X, PS, 0.0, 1, 2614; EG-NEXT: BFE_INT * T25.Z, PV.Z, 0.0, 1, 2615; EG-NEXT: ALU clause starting at 117: 2616; EG-NEXT: LSHR T0.W, T11.X, 1, 2617; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, 2618; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 2619; EG-NEXT: LSHR T11.X, PS, literal.x, 2620; EG-NEXT: BFE_INT * T25.Y, PV.W, 0.0, 1, 2621; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2622; 2623; GFX12-LABEL: constant_sextload_v32i1_to_v32i32: 2624; GFX12: ; %bb.0: 2625; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 2626; GFX12-NEXT: s_wait_kmcnt 0x0 2627; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0 2628; GFX12-NEXT: s_wait_kmcnt 0x0 2629; GFX12-NEXT: s_bfe_i32 s3, s2, 0x10003 2630; GFX12-NEXT: s_bfe_i32 s4, s2, 0x10002 2631; GFX12-NEXT: s_bfe_i32 s5, s2, 0x10001 2632; GFX12-NEXT: s_bfe_i32 s6, s2, 0x10000 2633; GFX12-NEXT: s_bfe_i32 s7, s2, 0x10007 2634; GFX12-NEXT: s_bfe_i32 s8, s2, 0x10006 2635; GFX12-NEXT: s_bfe_i32 s9, s2, 0x10005 2636; GFX12-NEXT: s_bfe_i32 s10, s2, 0x10004 2637; GFX12-NEXT: s_bfe_i32 s11, s2, 0x1000b 2638; GFX12-NEXT: s_bfe_i32 s12, s2, 0x1000a 2639; GFX12-NEXT: s_bfe_i32 s13, s2, 0x10009 2640; GFX12-NEXT: s_bfe_i32 s14, s2, 0x10008 2641; GFX12-NEXT: s_bfe_i32 s15, s2, 0x1000f 2642; GFX12-NEXT: s_bfe_i32 s16, s2, 0x1000e 2643; GFX12-NEXT: s_bfe_i32 s17, s2, 0x1000d 2644; GFX12-NEXT: s_bfe_i32 s18, s2, 0x1000c 2645; GFX12-NEXT: s_bfe_i32 s19, s2, 0x10013 2646; GFX12-NEXT: s_bfe_i32 s20, s2, 0x10012 2647; GFX12-NEXT: s_bfe_i32 s21, s2, 0x10011 2648; GFX12-NEXT: s_bfe_i32 s22, s2, 0x10010 2649; GFX12-NEXT: s_bfe_i32 s23, s2, 0x10017 2650; GFX12-NEXT: s_bfe_i32 s24, s2, 0x10016 2651; GFX12-NEXT: s_bfe_i32 s25, s2, 0x10015 2652; GFX12-NEXT: s_bfe_i32 s26, s2, 0x10014 2653; GFX12-NEXT: s_bfe_i32 s27, s2, 0x1001b 2654; GFX12-NEXT: s_bfe_i32 s28, s2, 0x1001a 2655; GFX12-NEXT: s_bfe_i32 s29, s2, 0x10019 2656; GFX12-NEXT: s_bfe_i32 s30, s2, 0x10018 2657; GFX12-NEXT: s_ashr_i32 s31, s2, 31 2658; GFX12-NEXT: s_bfe_i32 s33, s2, 0x1001e 2659; GFX12-NEXT: s_bfe_i32 s34, s2, 0x1001c 2660; GFX12-NEXT: s_bfe_i32 s2, s2, 0x1001d 2661; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2662; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s2 2663; GFX12-NEXT: v_dual_mov_b32 v0, s34 :: v_dual_mov_b32 v3, s31 2664; GFX12-NEXT: v_dual_mov_b32 v2, s33 :: v_dual_mov_b32 v5, s29 2665; GFX12-NEXT: v_dual_mov_b32 v4, s30 :: v_dual_mov_b32 v7, s27 2666; GFX12-NEXT: v_dual_mov_b32 v6, s28 :: v_dual_mov_b32 v9, s25 2667; GFX12-NEXT: v_dual_mov_b32 v8, s26 :: v_dual_mov_b32 v11, s23 2668; GFX12-NEXT: v_mov_b32_e32 v10, s24 2669; GFX12-NEXT: s_clause 0x1 2670; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:112 2671; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:96 2672; GFX12-NEXT: v_dual_mov_b32 v1, s21 :: v_dual_mov_b32 v0, s22 2673; GFX12-NEXT: v_dual_mov_b32 v3, s19 :: v_dual_mov_b32 v2, s20 2674; GFX12-NEXT: v_dual_mov_b32 v5, s17 :: v_dual_mov_b32 v4, s18 2675; GFX12-NEXT: v_dual_mov_b32 v7, s15 :: v_dual_mov_b32 v6, s16 2676; GFX12-NEXT: v_dual_mov_b32 v13, s13 :: v_dual_mov_b32 v12, s14 2677; GFX12-NEXT: v_dual_mov_b32 v15, s11 :: v_dual_mov_b32 v14, s12 2678; GFX12-NEXT: v_dual_mov_b32 v17, s9 :: v_dual_mov_b32 v16, s10 2679; GFX12-NEXT: v_dual_mov_b32 v19, s7 :: v_dual_mov_b32 v18, s8 2680; GFX12-NEXT: v_dual_mov_b32 v21, s5 :: v_dual_mov_b32 v20, s6 2681; GFX12-NEXT: v_dual_mov_b32 v23, s3 :: v_dual_mov_b32 v22, s4 2682; GFX12-NEXT: s_clause 0x5 2683; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:80 2684; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:64 2685; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:48 2686; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:32 2687; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:16 2688; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1] 2689; GFX12-NEXT: s_endpgm 2690 %load = load <32 x i1>, ptr addrspace(4) %in 2691 %ext = sext <32 x i1> %load to <32 x i32> 2692 store <32 x i32> %ext, ptr addrspace(1) %out 2693 ret void 2694} 2695 2696define amdgpu_kernel void @constant_zextload_v64i1_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 2697; GFX6-LABEL: constant_zextload_v64i1_to_v64i32: 2698; GFX6: ; %bb.0: 2699; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2700; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2701; GFX6-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 2702; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2703; GFX6-NEXT: s_bfe_u32 s4, s2, 0x10003 2704; GFX6-NEXT: s_bfe_u32 s5, s2, 0x10001 2705; GFX6-NEXT: s_bfe_u32 s6, s2, 0x10007 2706; GFX6-NEXT: s_bfe_u32 s7, s2, 0x10005 2707; GFX6-NEXT: s_bfe_u32 s8, s2, 0x1000b 2708; GFX6-NEXT: s_bfe_u32 s9, s2, 0x10009 2709; GFX6-NEXT: s_bfe_u32 s10, s2, 0x1000f 2710; GFX6-NEXT: s_bfe_u32 s13, s2, 0x1000d 2711; GFX6-NEXT: s_bfe_u32 s14, s2, 0x10013 2712; GFX6-NEXT: s_bfe_u32 s15, s2, 0x10011 2713; GFX6-NEXT: s_bfe_u32 s16, s2, 0x10017 2714; GFX6-NEXT: s_bfe_u32 s17, s2, 0x10015 2715; GFX6-NEXT: s_bfe_u32 s18, s2, 0x1001b 2716; GFX6-NEXT: s_bfe_u32 s19, s2, 0x10019 2717; GFX6-NEXT: s_lshr_b32 s20, s2, 31 2718; GFX6-NEXT: s_bfe_u32 s21, s2, 0x1001d 2719; GFX6-NEXT: s_bfe_u32 s22, s3, 0x10003 2720; GFX6-NEXT: s_bfe_u32 s23, s3, 0x10001 2721; GFX6-NEXT: s_bfe_u32 s24, s3, 0x10007 2722; GFX6-NEXT: s_bfe_u32 s25, s3, 0x10005 2723; GFX6-NEXT: s_bfe_u32 s26, s3, 0x1000b 2724; GFX6-NEXT: s_bfe_u32 s27, s3, 0x10009 2725; GFX6-NEXT: s_bfe_u32 s28, s3, 0x1000f 2726; GFX6-NEXT: s_bfe_u32 s29, s3, 0x1000d 2727; GFX6-NEXT: s_bfe_u32 s30, s3, 0x10013 2728; GFX6-NEXT: s_bfe_u32 s31, s3, 0x10011 2729; GFX6-NEXT: s_bfe_u32 s33, s3, 0x10017 2730; GFX6-NEXT: s_bfe_u32 s34, s3, 0x10015 2731; GFX6-NEXT: s_bfe_u32 s35, s3, 0x1001b 2732; GFX6-NEXT: s_bfe_u32 s36, s3, 0x10019 2733; GFX6-NEXT: s_lshr_b32 s37, s3, 31 2734; GFX6-NEXT: s_bfe_u32 s38, s3, 0x1001d 2735; GFX6-NEXT: s_and_b32 s12, s2, 1 2736; GFX6-NEXT: s_bfe_u32 s11, s2, 0x10002 2737; GFX6-NEXT: s_bfe_u32 s39, s2, 0x10006 2738; GFX6-NEXT: s_bfe_u32 s40, s2, 0x10004 2739; GFX6-NEXT: s_bfe_u32 s41, s2, 0x1000a 2740; GFX6-NEXT: s_bfe_u32 s42, s2, 0x10008 2741; GFX6-NEXT: s_bfe_u32 s43, s2, 0x1000e 2742; GFX6-NEXT: s_bfe_u32 s44, s2, 0x1000c 2743; GFX6-NEXT: s_bfe_u32 s45, s2, 0x10012 2744; GFX6-NEXT: s_bfe_u32 s46, s2, 0x10010 2745; GFX6-NEXT: s_bfe_u32 s47, s2, 0x10016 2746; GFX6-NEXT: s_bfe_u32 s48, s2, 0x10014 2747; GFX6-NEXT: s_bfe_u32 s49, s2, 0x1001a 2748; GFX6-NEXT: s_bfe_u32 s50, s2, 0x10018 2749; GFX6-NEXT: s_bfe_u32 s51, s2, 0x1001e 2750; GFX6-NEXT: s_bfe_u32 s52, s2, 0x1001c 2751; GFX6-NEXT: s_and_b32 s53, s3, 1 2752; GFX6-NEXT: s_bfe_u32 s54, s3, 0x10002 2753; GFX6-NEXT: s_bfe_u32 s55, s3, 0x10006 2754; GFX6-NEXT: s_bfe_u32 s56, s3, 0x10004 2755; GFX6-NEXT: s_bfe_u32 s57, s3, 0x10008 2756; GFX6-NEXT: s_bfe_u32 s58, s3, 0x1000e 2757; GFX6-NEXT: s_bfe_u32 s59, s3, 0x1000c 2758; GFX6-NEXT: s_bfe_u32 s60, s3, 0x10012 2759; GFX6-NEXT: s_bfe_u32 s61, s3, 0x10010 2760; GFX6-NEXT: s_bfe_u32 s62, s3, 0x10016 2761; GFX6-NEXT: s_bfe_u32 s63, s3, 0x10014 2762; GFX6-NEXT: s_bfe_u32 s64, s3, 0x1001a 2763; GFX6-NEXT: s_bfe_u32 s65, s3, 0x10018 2764; GFX6-NEXT: s_bfe_u32 s66, s3, 0x1001e 2765; GFX6-NEXT: s_bfe_u32 s67, s3, 0x1001c 2766; GFX6-NEXT: s_bfe_u32 s68, s3, 0x1000a 2767; GFX6-NEXT: s_mov_b32 s3, 0xf000 2768; GFX6-NEXT: s_mov_b32 s2, -1 2769; GFX6-NEXT: v_mov_b32_e32 v0, s67 2770; GFX6-NEXT: v_mov_b32_e32 v1, s38 2771; GFX6-NEXT: v_mov_b32_e32 v2, s66 2772; GFX6-NEXT: v_mov_b32_e32 v3, s37 2773; GFX6-NEXT: v_mov_b32_e32 v4, s65 2774; GFX6-NEXT: v_mov_b32_e32 v5, s36 2775; GFX6-NEXT: v_mov_b32_e32 v6, s64 2776; GFX6-NEXT: v_mov_b32_e32 v7, s35 2777; GFX6-NEXT: v_mov_b32_e32 v8, s63 2778; GFX6-NEXT: v_mov_b32_e32 v9, s34 2779; GFX6-NEXT: v_mov_b32_e32 v10, s62 2780; GFX6-NEXT: v_mov_b32_e32 v11, s33 2781; GFX6-NEXT: v_mov_b32_e32 v12, s61 2782; GFX6-NEXT: v_mov_b32_e32 v13, s31 2783; GFX6-NEXT: v_mov_b32_e32 v14, s60 2784; GFX6-NEXT: v_mov_b32_e32 v15, s30 2785; GFX6-NEXT: v_mov_b32_e32 v16, s59 2786; GFX6-NEXT: v_mov_b32_e32 v17, s29 2787; GFX6-NEXT: v_mov_b32_e32 v18, s58 2788; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 2789; GFX6-NEXT: s_waitcnt expcnt(0) 2790; GFX6-NEXT: v_mov_b32_e32 v0, s57 2791; GFX6-NEXT: v_mov_b32_e32 v19, s28 2792; GFX6-NEXT: v_mov_b32_e32 v1, s27 2793; GFX6-NEXT: v_mov_b32_e32 v2, s68 2794; GFX6-NEXT: v_mov_b32_e32 v3, s26 2795; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224 2796; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208 2797; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192 2798; GFX6-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176 2799; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 2800; GFX6-NEXT: s_waitcnt expcnt(0) 2801; GFX6-NEXT: v_mov_b32_e32 v0, s56 2802; GFX6-NEXT: v_mov_b32_e32 v1, s25 2803; GFX6-NEXT: v_mov_b32_e32 v2, s55 2804; GFX6-NEXT: v_mov_b32_e32 v3, s24 2805; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 2806; GFX6-NEXT: s_waitcnt expcnt(0) 2807; GFX6-NEXT: v_mov_b32_e32 v0, s53 2808; GFX6-NEXT: v_mov_b32_e32 v1, s23 2809; GFX6-NEXT: v_mov_b32_e32 v2, s54 2810; GFX6-NEXT: v_mov_b32_e32 v3, s22 2811; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 2812; GFX6-NEXT: s_waitcnt expcnt(0) 2813; GFX6-NEXT: v_mov_b32_e32 v0, s52 2814; GFX6-NEXT: v_mov_b32_e32 v1, s21 2815; GFX6-NEXT: v_mov_b32_e32 v2, s51 2816; GFX6-NEXT: v_mov_b32_e32 v3, s20 2817; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 2818; GFX6-NEXT: s_waitcnt expcnt(0) 2819; GFX6-NEXT: v_mov_b32_e32 v0, s50 2820; GFX6-NEXT: v_mov_b32_e32 v1, s19 2821; GFX6-NEXT: v_mov_b32_e32 v2, s49 2822; GFX6-NEXT: v_mov_b32_e32 v3, s18 2823; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 2824; GFX6-NEXT: s_waitcnt expcnt(0) 2825; GFX6-NEXT: v_mov_b32_e32 v0, s48 2826; GFX6-NEXT: v_mov_b32_e32 v1, s17 2827; GFX6-NEXT: v_mov_b32_e32 v2, s47 2828; GFX6-NEXT: v_mov_b32_e32 v3, s16 2829; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 2830; GFX6-NEXT: s_waitcnt expcnt(0) 2831; GFX6-NEXT: v_mov_b32_e32 v0, s46 2832; GFX6-NEXT: v_mov_b32_e32 v1, s15 2833; GFX6-NEXT: v_mov_b32_e32 v2, s45 2834; GFX6-NEXT: v_mov_b32_e32 v3, s14 2835; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 2836; GFX6-NEXT: s_waitcnt expcnt(0) 2837; GFX6-NEXT: v_mov_b32_e32 v0, s44 2838; GFX6-NEXT: v_mov_b32_e32 v1, s13 2839; GFX6-NEXT: v_mov_b32_e32 v2, s43 2840; GFX6-NEXT: v_mov_b32_e32 v3, s10 2841; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 2842; GFX6-NEXT: s_waitcnt expcnt(0) 2843; GFX6-NEXT: v_mov_b32_e32 v0, s42 2844; GFX6-NEXT: v_mov_b32_e32 v1, s9 2845; GFX6-NEXT: v_mov_b32_e32 v2, s41 2846; GFX6-NEXT: v_mov_b32_e32 v3, s8 2847; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2848; GFX6-NEXT: s_waitcnt expcnt(0) 2849; GFX6-NEXT: v_mov_b32_e32 v0, s40 2850; GFX6-NEXT: v_mov_b32_e32 v1, s7 2851; GFX6-NEXT: v_mov_b32_e32 v2, s39 2852; GFX6-NEXT: v_mov_b32_e32 v3, s6 2853; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 2854; GFX6-NEXT: s_waitcnt expcnt(0) 2855; GFX6-NEXT: v_mov_b32_e32 v0, s12 2856; GFX6-NEXT: v_mov_b32_e32 v1, s5 2857; GFX6-NEXT: v_mov_b32_e32 v2, s11 2858; GFX6-NEXT: v_mov_b32_e32 v3, s4 2859; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2860; GFX6-NEXT: s_endpgm 2861; 2862; GFX8-LABEL: constant_zextload_v64i1_to_v64i32: 2863; GFX8: ; %bb.0: 2864; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2865; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2866; GFX8-NEXT: s_load_dwordx2 s[26:27], s[2:3], 0x0 2867; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2868; GFX8-NEXT: s_bfe_u32 s2, s26, 0x10003 2869; GFX8-NEXT: s_bfe_u32 s3, s26, 0x10001 2870; GFX8-NEXT: s_bfe_u32 s4, s26, 0x10007 2871; GFX8-NEXT: s_bfe_u32 s5, s26, 0x10005 2872; GFX8-NEXT: s_bfe_u32 s6, s26, 0x1000b 2873; GFX8-NEXT: s_bfe_u32 s9, s26, 0x10009 2874; GFX8-NEXT: s_bfe_u32 s11, s26, 0x1000f 2875; GFX8-NEXT: s_bfe_u32 s13, s26, 0x1000d 2876; GFX8-NEXT: s_bfe_u32 s15, s26, 0x10013 2877; GFX8-NEXT: s_bfe_u32 s17, s26, 0x10011 2878; GFX8-NEXT: s_bfe_u32 s19, s26, 0x10017 2879; GFX8-NEXT: s_bfe_u32 s21, s26, 0x1001b 2880; GFX8-NEXT: s_bfe_u32 s23, s26, 0x10019 2881; GFX8-NEXT: s_lshr_b32 s25, s26, 31 2882; GFX8-NEXT: s_bfe_u32 s28, s26, 0x1001d 2883; GFX8-NEXT: s_bfe_u32 s29, s27, 0x10003 2884; GFX8-NEXT: s_bfe_u32 s30, s27, 0x10001 2885; GFX8-NEXT: s_bfe_u32 s31, s27, 0x10007 2886; GFX8-NEXT: s_bfe_u32 s33, s27, 0x10005 2887; GFX8-NEXT: s_bfe_u32 s34, s27, 0x1000b 2888; GFX8-NEXT: s_bfe_u32 s35, s27, 0x10009 2889; GFX8-NEXT: s_bfe_u32 s36, s27, 0x1000f 2890; GFX8-NEXT: s_bfe_u32 s37, s27, 0x1000d 2891; GFX8-NEXT: s_bfe_u32 s38, s27, 0x10013 2892; GFX8-NEXT: s_bfe_u32 s39, s27, 0x10011 2893; GFX8-NEXT: s_bfe_u32 s40, s27, 0x10017 2894; GFX8-NEXT: s_bfe_u32 s41, s27, 0x1001b 2895; GFX8-NEXT: s_bfe_u32 s42, s27, 0x10019 2896; GFX8-NEXT: s_lshr_b32 s43, s27, 31 2897; GFX8-NEXT: s_bfe_u32 s44, s27, 0x1001d 2898; GFX8-NEXT: s_and_b32 s8, s26, 1 2899; GFX8-NEXT: s_bfe_u32 s7, s26, 0x10002 2900; GFX8-NEXT: s_bfe_u32 s10, s26, 0x10006 2901; GFX8-NEXT: s_bfe_u32 s12, s26, 0x10004 2902; GFX8-NEXT: s_bfe_u32 s14, s26, 0x1000a 2903; GFX8-NEXT: s_bfe_u32 s16, s26, 0x10008 2904; GFX8-NEXT: s_bfe_u32 s18, s26, 0x1000e 2905; GFX8-NEXT: s_bfe_u32 s20, s26, 0x1000c 2906; GFX8-NEXT: s_bfe_u32 s22, s26, 0x10012 2907; GFX8-NEXT: s_bfe_u32 s24, s26, 0x10010 2908; GFX8-NEXT: s_bfe_u32 s45, s26, 0x10016 2909; GFX8-NEXT: s_bfe_u32 s46, s26, 0x10015 2910; GFX8-NEXT: s_bfe_u32 s47, s26, 0x10014 2911; GFX8-NEXT: s_bfe_u32 s48, s26, 0x1001a 2912; GFX8-NEXT: s_bfe_u32 s49, s26, 0x10018 2913; GFX8-NEXT: s_bfe_u32 s50, s26, 0x1001e 2914; GFX8-NEXT: s_bfe_u32 s51, s26, 0x1001c 2915; GFX8-NEXT: s_and_b32 s52, s27, 1 2916; GFX8-NEXT: s_bfe_u32 s53, s27, 0x10002 2917; GFX8-NEXT: s_bfe_u32 s54, s27, 0x10006 2918; GFX8-NEXT: s_bfe_u32 s55, s27, 0x10004 2919; GFX8-NEXT: s_bfe_u32 s56, s27, 0x1000a 2920; GFX8-NEXT: s_bfe_u32 s57, s27, 0x10008 2921; GFX8-NEXT: s_bfe_u32 s58, s27, 0x1000e 2922; GFX8-NEXT: s_bfe_u32 s59, s27, 0x1000c 2923; GFX8-NEXT: s_bfe_u32 s60, s27, 0x10012 2924; GFX8-NEXT: s_bfe_u32 s61, s27, 0x10010 2925; GFX8-NEXT: s_bfe_u32 s62, s27, 0x10016 2926; GFX8-NEXT: s_bfe_u32 s63, s27, 0x10015 2927; GFX8-NEXT: s_bfe_u32 s64, s27, 0x10014 2928; GFX8-NEXT: s_bfe_u32 s65, s27, 0x1001a 2929; GFX8-NEXT: s_bfe_u32 s66, s27, 0x10018 2930; GFX8-NEXT: s_bfe_u32 s26, s27, 0x1001e 2931; GFX8-NEXT: s_bfe_u32 s27, s27, 0x1001c 2932; GFX8-NEXT: v_mov_b32_e32 v2, s26 2933; GFX8-NEXT: s_add_u32 s26, s0, 0xf0 2934; GFX8-NEXT: v_mov_b32_e32 v0, s27 2935; GFX8-NEXT: s_addc_u32 s27, s1, 0 2936; GFX8-NEXT: v_mov_b32_e32 v4, s26 2937; GFX8-NEXT: v_mov_b32_e32 v1, s44 2938; GFX8-NEXT: v_mov_b32_e32 v3, s43 2939; GFX8-NEXT: v_mov_b32_e32 v5, s27 2940; GFX8-NEXT: s_add_u32 s26, s0, 0xe0 2941; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2942; GFX8-NEXT: s_addc_u32 s27, s1, 0 2943; GFX8-NEXT: v_mov_b32_e32 v4, s26 2944; GFX8-NEXT: v_mov_b32_e32 v0, s66 2945; GFX8-NEXT: v_mov_b32_e32 v1, s42 2946; GFX8-NEXT: v_mov_b32_e32 v2, s65 2947; GFX8-NEXT: v_mov_b32_e32 v3, s41 2948; GFX8-NEXT: v_mov_b32_e32 v5, s27 2949; GFX8-NEXT: s_add_u32 s26, s0, 0xd0 2950; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2951; GFX8-NEXT: s_addc_u32 s27, s1, 0 2952; GFX8-NEXT: v_mov_b32_e32 v4, s26 2953; GFX8-NEXT: v_mov_b32_e32 v0, s64 2954; GFX8-NEXT: v_mov_b32_e32 v1, s63 2955; GFX8-NEXT: v_mov_b32_e32 v2, s62 2956; GFX8-NEXT: v_mov_b32_e32 v3, s40 2957; GFX8-NEXT: v_mov_b32_e32 v5, s27 2958; GFX8-NEXT: s_add_u32 s26, s0, 0xc0 2959; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2960; GFX8-NEXT: s_addc_u32 s27, s1, 0 2961; GFX8-NEXT: v_mov_b32_e32 v4, s26 2962; GFX8-NEXT: v_mov_b32_e32 v0, s61 2963; GFX8-NEXT: v_mov_b32_e32 v1, s39 2964; GFX8-NEXT: v_mov_b32_e32 v2, s60 2965; GFX8-NEXT: v_mov_b32_e32 v3, s38 2966; GFX8-NEXT: v_mov_b32_e32 v5, s27 2967; GFX8-NEXT: s_add_u32 s26, s0, 0xb0 2968; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2969; GFX8-NEXT: s_addc_u32 s27, s1, 0 2970; GFX8-NEXT: v_mov_b32_e32 v4, s26 2971; GFX8-NEXT: v_mov_b32_e32 v0, s59 2972; GFX8-NEXT: v_mov_b32_e32 v1, s37 2973; GFX8-NEXT: v_mov_b32_e32 v2, s58 2974; GFX8-NEXT: v_mov_b32_e32 v3, s36 2975; GFX8-NEXT: v_mov_b32_e32 v5, s27 2976; GFX8-NEXT: s_add_u32 s26, s0, 0xa0 2977; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2978; GFX8-NEXT: s_addc_u32 s27, s1, 0 2979; GFX8-NEXT: v_mov_b32_e32 v4, s26 2980; GFX8-NEXT: v_mov_b32_e32 v0, s57 2981; GFX8-NEXT: v_mov_b32_e32 v1, s35 2982; GFX8-NEXT: v_mov_b32_e32 v2, s56 2983; GFX8-NEXT: v_mov_b32_e32 v3, s34 2984; GFX8-NEXT: v_mov_b32_e32 v5, s27 2985; GFX8-NEXT: s_add_u32 s26, s0, 0x90 2986; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2987; GFX8-NEXT: s_addc_u32 s27, s1, 0 2988; GFX8-NEXT: v_mov_b32_e32 v4, s26 2989; GFX8-NEXT: v_mov_b32_e32 v0, s55 2990; GFX8-NEXT: v_mov_b32_e32 v1, s33 2991; GFX8-NEXT: v_mov_b32_e32 v2, s54 2992; GFX8-NEXT: v_mov_b32_e32 v3, s31 2993; GFX8-NEXT: v_mov_b32_e32 v5, s27 2994; GFX8-NEXT: s_add_u32 s26, s0, 0x80 2995; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2996; GFX8-NEXT: s_addc_u32 s27, s1, 0 2997; GFX8-NEXT: v_mov_b32_e32 v4, s26 2998; GFX8-NEXT: v_mov_b32_e32 v0, s52 2999; GFX8-NEXT: v_mov_b32_e32 v1, s30 3000; GFX8-NEXT: v_mov_b32_e32 v2, s53 3001; GFX8-NEXT: v_mov_b32_e32 v3, s29 3002; GFX8-NEXT: v_mov_b32_e32 v5, s27 3003; GFX8-NEXT: s_add_u32 s26, s0, 0x70 3004; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3005; GFX8-NEXT: s_addc_u32 s27, s1, 0 3006; GFX8-NEXT: v_mov_b32_e32 v4, s26 3007; GFX8-NEXT: v_mov_b32_e32 v0, s51 3008; GFX8-NEXT: v_mov_b32_e32 v1, s28 3009; GFX8-NEXT: v_mov_b32_e32 v2, s50 3010; GFX8-NEXT: v_mov_b32_e32 v3, s25 3011; GFX8-NEXT: v_mov_b32_e32 v5, s27 3012; GFX8-NEXT: s_add_u32 s26, s0, 0x60 3013; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3014; GFX8-NEXT: s_addc_u32 s27, s1, 0 3015; GFX8-NEXT: v_mov_b32_e32 v4, s26 3016; GFX8-NEXT: v_mov_b32_e32 v0, s49 3017; GFX8-NEXT: v_mov_b32_e32 v1, s23 3018; GFX8-NEXT: v_mov_b32_e32 v2, s48 3019; GFX8-NEXT: v_mov_b32_e32 v3, s21 3020; GFX8-NEXT: v_mov_b32_e32 v5, s27 3021; GFX8-NEXT: s_add_u32 s26, s0, 0x50 3022; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3023; GFX8-NEXT: s_addc_u32 s27, s1, 0 3024; GFX8-NEXT: v_mov_b32_e32 v4, s26 3025; GFX8-NEXT: v_mov_b32_e32 v0, s47 3026; GFX8-NEXT: v_mov_b32_e32 v1, s46 3027; GFX8-NEXT: v_mov_b32_e32 v2, s45 3028; GFX8-NEXT: v_mov_b32_e32 v3, s19 3029; GFX8-NEXT: v_mov_b32_e32 v5, s27 3030; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3031; GFX8-NEXT: s_nop 0 3032; GFX8-NEXT: v_mov_b32_e32 v2, s22 3033; GFX8-NEXT: s_add_u32 s22, s0, 64 3034; GFX8-NEXT: s_addc_u32 s23, s1, 0 3035; GFX8-NEXT: v_mov_b32_e32 v4, s22 3036; GFX8-NEXT: v_mov_b32_e32 v0, s24 3037; GFX8-NEXT: v_mov_b32_e32 v1, s17 3038; GFX8-NEXT: v_mov_b32_e32 v3, s15 3039; GFX8-NEXT: v_mov_b32_e32 v5, s23 3040; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3041; GFX8-NEXT: s_nop 0 3042; GFX8-NEXT: v_mov_b32_e32 v2, s18 3043; GFX8-NEXT: s_add_u32 s18, s0, 48 3044; GFX8-NEXT: s_addc_u32 s19, s1, 0 3045; GFX8-NEXT: v_mov_b32_e32 v4, s18 3046; GFX8-NEXT: v_mov_b32_e32 v0, s20 3047; GFX8-NEXT: v_mov_b32_e32 v1, s13 3048; GFX8-NEXT: v_mov_b32_e32 v3, s11 3049; GFX8-NEXT: v_mov_b32_e32 v5, s19 3050; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3051; GFX8-NEXT: s_nop 0 3052; GFX8-NEXT: v_mov_b32_e32 v2, s14 3053; GFX8-NEXT: s_add_u32 s14, s0, 32 3054; GFX8-NEXT: s_addc_u32 s15, s1, 0 3055; GFX8-NEXT: v_mov_b32_e32 v4, s14 3056; GFX8-NEXT: v_mov_b32_e32 v0, s16 3057; GFX8-NEXT: v_mov_b32_e32 v1, s9 3058; GFX8-NEXT: v_mov_b32_e32 v3, s6 3059; GFX8-NEXT: v_mov_b32_e32 v5, s15 3060; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3061; GFX8-NEXT: s_nop 0 3062; GFX8-NEXT: v_mov_b32_e32 v3, s4 3063; GFX8-NEXT: s_add_u32 s4, s0, 16 3064; GFX8-NEXT: v_mov_b32_e32 v1, s5 3065; GFX8-NEXT: s_addc_u32 s5, s1, 0 3066; GFX8-NEXT: v_mov_b32_e32 v4, s4 3067; GFX8-NEXT: v_mov_b32_e32 v0, s12 3068; GFX8-NEXT: v_mov_b32_e32 v2, s10 3069; GFX8-NEXT: v_mov_b32_e32 v5, s5 3070; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3071; GFX8-NEXT: v_mov_b32_e32 v5, s1 3072; GFX8-NEXT: v_mov_b32_e32 v0, s8 3073; GFX8-NEXT: v_mov_b32_e32 v1, s3 3074; GFX8-NEXT: v_mov_b32_e32 v2, s7 3075; GFX8-NEXT: v_mov_b32_e32 v3, s2 3076; GFX8-NEXT: v_mov_b32_e32 v4, s0 3077; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3078; GFX8-NEXT: s_endpgm 3079; 3080; EG-LABEL: constant_zextload_v64i1_to_v64i32: 3081; EG: ; %bb.0: 3082; EG-NEXT: ALU 0, @24, KC0[CB0:0-32], KC1[] 3083; EG-NEXT: TEX 0 @22 3084; EG-NEXT: ALU 96, @25, KC0[CB0:0-32], KC1[] 3085; EG-NEXT: ALU 57, @122, KC0[CB0:0-32], KC1[] 3086; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T50.X, 0 3087; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T49.X, 0 3088; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T47.X, 0 3089; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T45.X, 0 3090; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T43.X, 0 3091; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T41.X, 0 3092; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T39.X, 0 3093; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T37.X, 0 3094; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T21.X, 0 3095; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T34.X, 0 3096; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T32.X, 0 3097; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T30.X, 0 3098; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T28.X, 0 3099; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T26.X, 0 3100; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T24.X, 0 3101; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T22.X, 1 3102; EG-NEXT: CF_END 3103; EG-NEXT: PAD 3104; EG-NEXT: Fetch clause starting at 22: 3105; EG-NEXT: VTX_READ_64 T21.XY, T19.X, 0, #1 3106; EG-NEXT: ALU clause starting at 24: 3107; EG-NEXT: MOV * T19.X, KC0[2].Z, 3108; EG-NEXT: ALU clause starting at 25: 3109; EG-NEXT: BFE_UINT * T19.W, T21.X, literal.x, 1, 3110; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 3111; EG-NEXT: BFE_UINT * T19.Z, T21.X, literal.x, 1, 3112; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 3113; EG-NEXT: BFE_UINT T19.Y, T21.X, 1, 1, 3114; EG-NEXT: BFE_UINT * T20.W, T21.X, literal.x, 1, 3115; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00) 3116; EG-NEXT: AND_INT T19.X, T21.X, 1, 3117; EG-NEXT: BFE_UINT T20.Z, T21.X, literal.x, 1, 3118; EG-NEXT: LSHR * T22.X, KC0[2].Y, literal.y, 3119; EG-NEXT: 6(8.407791e-45), 2(2.802597e-45) 3120; EG-NEXT: BFE_UINT T20.Y, T21.X, literal.x, 1, 3121; EG-NEXT: BFE_UINT * T23.W, T21.X, literal.y, 1, 3122; EG-NEXT: 5(7.006492e-45), 11(1.541428e-44) 3123; EG-NEXT: BFE_UINT T20.X, T21.X, literal.x, 1, 3124; EG-NEXT: BFE_UINT T23.Z, T21.X, literal.y, 1, 3125; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3126; EG-NEXT: 4(5.605194e-45), 10(1.401298e-44) 3127; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3128; EG-NEXT: LSHR T24.X, PV.W, literal.x, 3129; EG-NEXT: BFE_UINT T23.Y, T21.X, literal.y, 1, 3130; EG-NEXT: BFE_UINT * T25.W, T21.X, literal.z, 1, 3131; EG-NEXT: 2(2.802597e-45), 9(1.261169e-44) 3132; EG-NEXT: 15(2.101948e-44), 0(0.000000e+00) 3133; EG-NEXT: BFE_UINT T23.X, T21.X, literal.x, 1, 3134; EG-NEXT: BFE_UINT T25.Z, T21.X, literal.y, 1, 3135; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3136; EG-NEXT: 8(1.121039e-44), 14(1.961818e-44) 3137; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 3138; EG-NEXT: LSHR T26.X, PV.W, literal.x, 3139; EG-NEXT: BFE_UINT T25.Y, T21.X, literal.y, 1, 3140; EG-NEXT: BFE_UINT * T27.W, T21.X, literal.z, 1, 3141; EG-NEXT: 2(2.802597e-45), 13(1.821688e-44) 3142; EG-NEXT: 19(2.662467e-44), 0(0.000000e+00) 3143; EG-NEXT: BFE_UINT T25.X, T21.X, literal.x, 1, 3144; EG-NEXT: BFE_UINT T27.Z, T21.X, literal.y, 1, 3145; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3146; EG-NEXT: 12(1.681558e-44), 18(2.522337e-44) 3147; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 3148; EG-NEXT: LSHR T28.X, PV.W, literal.x, 3149; EG-NEXT: BFE_UINT T27.Y, T21.X, literal.y, 1, 3150; EG-NEXT: BFE_UINT * T29.W, T21.X, literal.z, 1, 3151; EG-NEXT: 2(2.802597e-45), 17(2.382207e-44) 3152; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) 3153; EG-NEXT: BFE_UINT T27.X, T21.X, literal.x, 1, 3154; EG-NEXT: BFE_UINT T29.Z, T21.X, literal.y, 1, 3155; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3156; EG-NEXT: 16(2.242078e-44), 22(3.082857e-44) 3157; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00) 3158; EG-NEXT: LSHR T30.X, PV.W, literal.x, 3159; EG-NEXT: BFE_UINT T29.Y, T21.X, literal.y, 1, 3160; EG-NEXT: BFE_UINT * T31.W, T21.X, literal.z, 1, 3161; EG-NEXT: 2(2.802597e-45), 21(2.942727e-44) 3162; EG-NEXT: 27(3.783506e-44), 0(0.000000e+00) 3163; EG-NEXT: BFE_UINT T29.X, T21.X, literal.x, 1, 3164; EG-NEXT: BFE_UINT T31.Z, T21.X, literal.y, 1, 3165; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3166; EG-NEXT: 20(2.802597e-44), 26(3.643376e-44) 3167; EG-NEXT: 80(1.121039e-43), 0(0.000000e+00) 3168; EG-NEXT: LSHR T32.X, PV.W, literal.x, 3169; EG-NEXT: BFE_UINT T31.Y, T21.X, literal.y, 1, 3170; EG-NEXT: LSHR * T33.W, T21.X, literal.z, 3171; EG-NEXT: 2(2.802597e-45), 25(3.503246e-44) 3172; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 3173; EG-NEXT: BFE_UINT T31.X, T21.X, literal.x, 1, 3174; EG-NEXT: BFE_UINT T33.Z, T21.X, literal.y, 1, 3175; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3176; EG-NEXT: 24(3.363116e-44), 30(4.203895e-44) 3177; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) 3178; EG-NEXT: LSHR T34.X, PV.W, literal.x, 3179; EG-NEXT: BFE_UINT T33.Y, T21.X, literal.y, 1, 3180; EG-NEXT: BFE_UINT * T35.W, T21.Y, literal.z, 1, 3181; EG-NEXT: 2(2.802597e-45), 29(4.063766e-44) 3182; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 3183; EG-NEXT: BFE_UINT T33.X, T21.X, literal.x, 1, 3184; EG-NEXT: BFE_UINT T35.Z, T21.Y, literal.y, 1, 3185; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3186; EG-NEXT: 28(3.923636e-44), 2(2.802597e-45) 3187; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 3188; EG-NEXT: LSHR T21.X, PV.W, literal.x, 3189; EG-NEXT: BFE_UINT T35.Y, T21.Y, 1, 1, 3190; EG-NEXT: BFE_UINT T36.W, T21.Y, literal.y, 1, 3191; EG-NEXT: AND_INT * T35.X, T21.Y, 1, 3192; EG-NEXT: 2(2.802597e-45), 7(9.809089e-45) 3193; EG-NEXT: BFE_UINT T36.Z, T21.Y, literal.x, 1, 3194; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3195; EG-NEXT: 6(8.407791e-45), 128(1.793662e-43) 3196; EG-NEXT: LSHR T37.X, PV.W, literal.x, 3197; EG-NEXT: BFE_UINT T36.Y, T21.Y, literal.y, 1, 3198; EG-NEXT: BFE_UINT * T38.W, T21.Y, literal.z, 1, 3199; EG-NEXT: 2(2.802597e-45), 5(7.006492e-45) 3200; EG-NEXT: 11(1.541428e-44), 0(0.000000e+00) 3201; EG-NEXT: BFE_UINT T36.X, T21.Y, literal.x, 1, 3202; EG-NEXT: BFE_UINT T38.Z, T21.Y, literal.y, 1, 3203; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3204; EG-NEXT: 4(5.605194e-45), 10(1.401298e-44) 3205; EG-NEXT: 144(2.017870e-43), 0(0.000000e+00) 3206; EG-NEXT: ALU clause starting at 122: 3207; EG-NEXT: LSHR T39.X, T0.W, literal.x, 3208; EG-NEXT: BFE_UINT T38.Y, T21.Y, literal.y, 1, 3209; EG-NEXT: BFE_UINT * T40.W, T21.Y, literal.z, 1, 3210; EG-NEXT: 2(2.802597e-45), 9(1.261169e-44) 3211; EG-NEXT: 15(2.101948e-44), 0(0.000000e+00) 3212; EG-NEXT: BFE_UINT T38.X, T21.Y, literal.x, 1, 3213; EG-NEXT: BFE_UINT T40.Z, T21.Y, literal.y, 1, 3214; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3215; EG-NEXT: 8(1.121039e-44), 14(1.961818e-44) 3216; EG-NEXT: 160(2.242078e-43), 0(0.000000e+00) 3217; EG-NEXT: LSHR T41.X, PV.W, literal.x, 3218; EG-NEXT: BFE_UINT T40.Y, T21.Y, literal.y, 1, 3219; EG-NEXT: BFE_UINT * T42.W, T21.Y, literal.z, 1, 3220; EG-NEXT: 2(2.802597e-45), 13(1.821688e-44) 3221; EG-NEXT: 19(2.662467e-44), 0(0.000000e+00) 3222; EG-NEXT: BFE_UINT T40.X, T21.Y, literal.x, 1, 3223; EG-NEXT: BFE_UINT T42.Z, T21.Y, literal.y, 1, 3224; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3225; EG-NEXT: 12(1.681558e-44), 18(2.522337e-44) 3226; EG-NEXT: 176(2.466285e-43), 0(0.000000e+00) 3227; EG-NEXT: LSHR T43.X, PV.W, literal.x, 3228; EG-NEXT: BFE_UINT T42.Y, T21.Y, literal.y, 1, 3229; EG-NEXT: BFE_UINT * T44.W, T21.Y, literal.z, 1, 3230; EG-NEXT: 2(2.802597e-45), 17(2.382207e-44) 3231; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) 3232; EG-NEXT: BFE_UINT T42.X, T21.Y, literal.x, 1, 3233; EG-NEXT: BFE_UINT T44.Z, T21.Y, literal.y, 1, 3234; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3235; EG-NEXT: 16(2.242078e-44), 22(3.082857e-44) 3236; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00) 3237; EG-NEXT: LSHR T45.X, PV.W, literal.x, 3238; EG-NEXT: BFE_UINT T44.Y, T21.Y, literal.y, 1, 3239; EG-NEXT: BFE_UINT * T46.W, T21.Y, literal.z, 1, 3240; EG-NEXT: 2(2.802597e-45), 21(2.942727e-44) 3241; EG-NEXT: 27(3.783506e-44), 0(0.000000e+00) 3242; EG-NEXT: BFE_UINT T44.X, T21.Y, literal.x, 1, 3243; EG-NEXT: BFE_UINT T46.Z, T21.Y, literal.y, 1, 3244; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3245; EG-NEXT: 20(2.802597e-44), 26(3.643376e-44) 3246; EG-NEXT: 208(2.914701e-43), 0(0.000000e+00) 3247; EG-NEXT: LSHR T47.X, PV.W, literal.x, 3248; EG-NEXT: BFE_UINT T46.Y, T21.Y, literal.y, 1, 3249; EG-NEXT: LSHR * T48.W, T21.Y, literal.z, 3250; EG-NEXT: 2(2.802597e-45), 25(3.503246e-44) 3251; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 3252; EG-NEXT: BFE_UINT T46.X, T21.Y, literal.x, 1, 3253; EG-NEXT: BFE_UINT T48.Z, T21.Y, literal.y, 1, 3254; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3255; EG-NEXT: 24(3.363116e-44), 30(4.203895e-44) 3256; EG-NEXT: 224(3.138909e-43), 0(0.000000e+00) 3257; EG-NEXT: LSHR T49.X, PV.W, literal.x, 3258; EG-NEXT: BFE_UINT * T48.Y, T21.Y, literal.y, 1, 3259; EG-NEXT: 2(2.802597e-45), 29(4.063766e-44) 3260; EG-NEXT: BFE_UINT T48.X, T21.Y, literal.x, 1, 3261; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3262; EG-NEXT: 28(3.923636e-44), 240(3.363116e-43) 3263; EG-NEXT: LSHR * T50.X, PV.W, literal.x, 3264; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 3265; 3266; GFX12-LABEL: constant_zextload_v64i1_to_v64i32: 3267; GFX12: ; %bb.0: 3268; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 3269; GFX12-NEXT: s_wait_kmcnt 0x0 3270; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 3271; GFX12-NEXT: s_wait_kmcnt 0x0 3272; GFX12-NEXT: s_lshr_b32 s33, s3, 31 3273; GFX12-NEXT: s_bfe_u32 s34, s3, 0x1001d 3274; GFX12-NEXT: s_bfe_u32 s65, s3, 0x1001c 3275; GFX12-NEXT: s_bfe_u32 s66, s3, 0x1001e 3276; GFX12-NEXT: s_bfe_u32 s30, s3, 0x1001b 3277; GFX12-NEXT: s_bfe_u32 s31, s3, 0x10019 3278; GFX12-NEXT: s_bfe_u32 s63, s3, 0x1001a 3279; GFX12-NEXT: s_bfe_u32 s64, s3, 0x10018 3280; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s34 3281; GFX12-NEXT: s_bfe_u32 s29, s3, 0x10017 3282; GFX12-NEXT: s_bfe_u32 s60, s3, 0x10016 3283; GFX12-NEXT: s_bfe_u32 s61, s3, 0x10015 3284; GFX12-NEXT: s_bfe_u32 s62, s3, 0x10014 3285; GFX12-NEXT: v_dual_mov_b32 v0, s65 :: v_dual_mov_b32 v3, s33 3286; GFX12-NEXT: v_dual_mov_b32 v2, s66 :: v_dual_mov_b32 v5, s31 3287; GFX12-NEXT: s_bfe_u32 s27, s3, 0x10013 3288; GFX12-NEXT: s_bfe_u32 s28, s3, 0x10011 3289; GFX12-NEXT: s_bfe_u32 s58, s3, 0x10012 3290; GFX12-NEXT: s_bfe_u32 s59, s3, 0x10010 3291; GFX12-NEXT: v_dual_mov_b32 v4, s64 :: v_dual_mov_b32 v7, s30 3292; GFX12-NEXT: v_dual_mov_b32 v6, s63 :: v_dual_mov_b32 v9, s61 3293; GFX12-NEXT: v_dual_mov_b32 v8, s62 :: v_dual_mov_b32 v11, s29 3294; GFX12-NEXT: v_dual_mov_b32 v10, s60 :: v_dual_mov_b32 v13, s28 3295; GFX12-NEXT: s_bfe_u32 s19, s3, 0x10003 3296; GFX12-NEXT: s_bfe_u32 s20, s3, 0x10001 3297; GFX12-NEXT: s_bfe_u32 s21, s3, 0x10007 3298; GFX12-NEXT: s_bfe_u32 s22, s3, 0x10005 3299; GFX12-NEXT: s_bfe_u32 s23, s3, 0x1000b 3300; GFX12-NEXT: s_bfe_u32 s24, s3, 0x10009 3301; GFX12-NEXT: s_bfe_u32 s25, s3, 0x1000f 3302; GFX12-NEXT: s_bfe_u32 s26, s3, 0x1000d 3303; GFX12-NEXT: s_and_b32 s51, s3, 1 3304; GFX12-NEXT: s_bfe_u32 s52, s3, 0x10002 3305; GFX12-NEXT: s_bfe_u32 s53, s3, 0x10006 3306; GFX12-NEXT: s_bfe_u32 s54, s3, 0x10004 3307; GFX12-NEXT: s_bfe_u32 s55, s3, 0x1000a 3308; GFX12-NEXT: s_bfe_u32 s56, s3, 0x10008 3309; GFX12-NEXT: s_bfe_u32 s57, s3, 0x1000e 3310; GFX12-NEXT: v_dual_mov_b32 v12, s59 :: v_dual_mov_b32 v15, s27 3311; GFX12-NEXT: v_mov_b32_e32 v14, s58 3312; GFX12-NEXT: s_bfe_u32 s3, s3, 0x1000c 3313; GFX12-NEXT: s_clause 0x3 3314; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:240 3315; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:224 3316; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:208 3317; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:192 3318; GFX12-NEXT: v_dual_mov_b32 v1, s26 :: v_dual_mov_b32 v0, s3 3319; GFX12-NEXT: v_dual_mov_b32 v3, s25 :: v_dual_mov_b32 v2, s57 3320; GFX12-NEXT: v_dual_mov_b32 v5, s24 :: v_dual_mov_b32 v4, s56 3321; GFX12-NEXT: v_dual_mov_b32 v7, s23 :: v_dual_mov_b32 v6, s55 3322; GFX12-NEXT: v_mov_b32_e32 v9, s22 3323; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10003 3324; GFX12-NEXT: s_bfe_u32 s5, s2, 0x10001 3325; GFX12-NEXT: s_bfe_u32 s6, s2, 0x10007 3326; GFX12-NEXT: s_bfe_u32 s7, s2, 0x10005 3327; GFX12-NEXT: s_bfe_u32 s8, s2, 0x1000b 3328; GFX12-NEXT: s_bfe_u32 s9, s2, 0x10009 3329; GFX12-NEXT: s_bfe_u32 s10, s2, 0x1000f 3330; GFX12-NEXT: s_bfe_u32 s11, s2, 0x1000d 3331; GFX12-NEXT: s_bfe_u32 s12, s2, 0x10013 3332; GFX12-NEXT: s_bfe_u32 s13, s2, 0x10011 3333; GFX12-NEXT: s_bfe_u32 s14, s2, 0x10017 3334; GFX12-NEXT: s_bfe_u32 s15, s2, 0x1001b 3335; GFX12-NEXT: s_bfe_u32 s16, s2, 0x10019 3336; GFX12-NEXT: s_lshr_b32 s17, s2, 31 3337; GFX12-NEXT: s_bfe_u32 s18, s2, 0x1001d 3338; GFX12-NEXT: s_and_b32 s35, s2, 1 3339; GFX12-NEXT: s_bfe_u32 s36, s2, 0x10002 3340; GFX12-NEXT: s_bfe_u32 s37, s2, 0x10006 3341; GFX12-NEXT: s_bfe_u32 s38, s2, 0x10004 3342; GFX12-NEXT: s_bfe_u32 s39, s2, 0x1000a 3343; GFX12-NEXT: s_bfe_u32 s40, s2, 0x10008 3344; GFX12-NEXT: s_bfe_u32 s41, s2, 0x1000e 3345; GFX12-NEXT: s_bfe_u32 s42, s2, 0x1000c 3346; GFX12-NEXT: s_bfe_u32 s43, s2, 0x10012 3347; GFX12-NEXT: s_bfe_u32 s44, s2, 0x10010 3348; GFX12-NEXT: s_bfe_u32 s45, s2, 0x10016 3349; GFX12-NEXT: s_bfe_u32 s46, s2, 0x10015 3350; GFX12-NEXT: s_bfe_u32 s47, s2, 0x10014 3351; GFX12-NEXT: s_bfe_u32 s48, s2, 0x1001a 3352; GFX12-NEXT: s_bfe_u32 s49, s2, 0x10018 3353; GFX12-NEXT: s_bfe_u32 s50, s2, 0x1001e 3354; GFX12-NEXT: s_bfe_u32 s2, s2, 0x1001c 3355; GFX12-NEXT: v_dual_mov_b32 v8, s54 :: v_dual_mov_b32 v11, s21 3356; GFX12-NEXT: v_dual_mov_b32 v10, s53 :: v_dual_mov_b32 v13, s20 3357; GFX12-NEXT: v_dual_mov_b32 v12, s51 :: v_dual_mov_b32 v15, s19 3358; GFX12-NEXT: v_dual_mov_b32 v14, s52 :: v_dual_mov_b32 v17, s18 3359; GFX12-NEXT: s_wait_alu 0xfffe 3360; GFX12-NEXT: v_dual_mov_b32 v16, s2 :: v_dual_mov_b32 v19, s17 3361; GFX12-NEXT: v_dual_mov_b32 v18, s50 :: v_dual_mov_b32 v21, s16 3362; GFX12-NEXT: v_dual_mov_b32 v20, s49 :: v_dual_mov_b32 v23, s15 3363; GFX12-NEXT: v_mov_b32_e32 v22, s48 3364; GFX12-NEXT: s_clause 0x5 3365; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:176 3366; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:160 3367; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:144 3368; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:128 3369; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:112 3370; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1] offset:96 3371; GFX12-NEXT: v_dual_mov_b32 v1, s46 :: v_dual_mov_b32 v0, s47 3372; GFX12-NEXT: v_dual_mov_b32 v3, s14 :: v_dual_mov_b32 v2, s45 3373; GFX12-NEXT: v_dual_mov_b32 v5, s13 :: v_dual_mov_b32 v4, s44 3374; GFX12-NEXT: v_dual_mov_b32 v7, s12 :: v_dual_mov_b32 v6, s43 3375; GFX12-NEXT: v_dual_mov_b32 v9, s11 :: v_dual_mov_b32 v8, s42 3376; GFX12-NEXT: v_dual_mov_b32 v11, s10 :: v_dual_mov_b32 v10, s41 3377; GFX12-NEXT: v_dual_mov_b32 v13, s9 :: v_dual_mov_b32 v12, s40 3378; GFX12-NEXT: v_dual_mov_b32 v15, s8 :: v_dual_mov_b32 v14, s39 3379; GFX12-NEXT: v_dual_mov_b32 v17, s7 :: v_dual_mov_b32 v16, s38 3380; GFX12-NEXT: v_dual_mov_b32 v19, s6 :: v_dual_mov_b32 v18, s37 3381; GFX12-NEXT: v_dual_mov_b32 v21, s5 :: v_dual_mov_b32 v20, s35 3382; GFX12-NEXT: v_dual_mov_b32 v23, s4 :: v_dual_mov_b32 v22, s36 3383; GFX12-NEXT: s_clause 0x5 3384; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:80 3385; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:64 3386; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:48 3387; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:32 3388; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:16 3389; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1] 3390; GFX12-NEXT: s_endpgm 3391 %load = load <64 x i1>, ptr addrspace(4) %in 3392 %ext = zext <64 x i1> %load to <64 x i32> 3393 store <64 x i32> %ext, ptr addrspace(1) %out 3394 ret void 3395} 3396 3397define amdgpu_kernel void @constant_sextload_v64i1_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 3398; GFX6-LABEL: constant_sextload_v64i1_to_v64i32: 3399; GFX6: ; %bb.0: 3400; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 3401; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3402; GFX6-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 3403; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3404; GFX6-NEXT: s_bfe_i32 s4, s2, 0x10003 3405; GFX6-NEXT: s_bfe_i32 s5, s2, 0x10002 3406; GFX6-NEXT: s_bfe_i32 s6, s2, 0x10001 3407; GFX6-NEXT: s_bfe_i32 s7, s2, 0x10000 3408; GFX6-NEXT: s_bfe_i32 s8, s2, 0x10007 3409; GFX6-NEXT: s_bfe_i32 s9, s2, 0x10006 3410; GFX6-NEXT: s_bfe_i32 s10, s2, 0x10005 3411; GFX6-NEXT: s_bfe_i32 s11, s2, 0x10004 3412; GFX6-NEXT: s_bfe_i32 s12, s2, 0x1000b 3413; GFX6-NEXT: s_bfe_i32 s13, s2, 0x1000a 3414; GFX6-NEXT: s_bfe_i32 s14, s2, 0x10009 3415; GFX6-NEXT: s_bfe_i32 s15, s2, 0x10008 3416; GFX6-NEXT: s_bfe_i32 s16, s2, 0x1000f 3417; GFX6-NEXT: s_bfe_i32 s17, s2, 0x1000e 3418; GFX6-NEXT: s_bfe_i32 s18, s2, 0x1000d 3419; GFX6-NEXT: s_bfe_i32 s19, s2, 0x1000c 3420; GFX6-NEXT: s_bfe_i32 s20, s2, 0x10013 3421; GFX6-NEXT: s_bfe_i32 s21, s2, 0x10012 3422; GFX6-NEXT: s_bfe_i32 s22, s2, 0x10011 3423; GFX6-NEXT: s_bfe_i32 s23, s2, 0x10010 3424; GFX6-NEXT: s_bfe_i32 s24, s2, 0x10017 3425; GFX6-NEXT: s_bfe_i32 s25, s2, 0x10016 3426; GFX6-NEXT: s_bfe_i32 s26, s2, 0x10015 3427; GFX6-NEXT: s_bfe_i32 s27, s2, 0x10014 3428; GFX6-NEXT: s_bfe_i32 s28, s2, 0x1001b 3429; GFX6-NEXT: s_bfe_i32 s29, s2, 0x1001a 3430; GFX6-NEXT: s_bfe_i32 s30, s2, 0x10019 3431; GFX6-NEXT: s_bfe_i32 s31, s2, 0x10018 3432; GFX6-NEXT: s_ashr_i32 s33, s2, 31 3433; GFX6-NEXT: s_bfe_i32 s34, s2, 0x1001e 3434; GFX6-NEXT: s_bfe_i32 s35, s2, 0x1001d 3435; GFX6-NEXT: s_bfe_i32 s36, s2, 0x1001c 3436; GFX6-NEXT: s_bfe_i32 s37, s3, 0x10003 3437; GFX6-NEXT: s_bfe_i32 s38, s3, 0x10002 3438; GFX6-NEXT: s_bfe_i32 s39, s3, 0x10001 3439; GFX6-NEXT: s_bfe_i32 s40, s3, 0x10000 3440; GFX6-NEXT: s_bfe_i32 s41, s3, 0x10007 3441; GFX6-NEXT: s_bfe_i32 s42, s3, 0x10006 3442; GFX6-NEXT: s_bfe_i32 s43, s3, 0x10005 3443; GFX6-NEXT: s_bfe_i32 s44, s3, 0x10004 3444; GFX6-NEXT: s_bfe_i32 s45, s3, 0x1000b 3445; GFX6-NEXT: s_bfe_i32 s46, s3, 0x1000a 3446; GFX6-NEXT: s_bfe_i32 s47, s3, 0x10009 3447; GFX6-NEXT: s_bfe_i32 s48, s3, 0x10008 3448; GFX6-NEXT: s_bfe_i32 s49, s3, 0x1000e 3449; GFX6-NEXT: s_bfe_i32 s50, s3, 0x1000d 3450; GFX6-NEXT: s_bfe_i32 s51, s3, 0x1000c 3451; GFX6-NEXT: s_bfe_i32 s52, s3, 0x10013 3452; GFX6-NEXT: s_bfe_i32 s53, s3, 0x10012 3453; GFX6-NEXT: s_bfe_i32 s54, s3, 0x10011 3454; GFX6-NEXT: s_bfe_i32 s55, s3, 0x10010 3455; GFX6-NEXT: s_bfe_i32 s56, s3, 0x10017 3456; GFX6-NEXT: s_bfe_i32 s57, s3, 0x10016 3457; GFX6-NEXT: s_bfe_i32 s58, s3, 0x10015 3458; GFX6-NEXT: s_bfe_i32 s59, s3, 0x10014 3459; GFX6-NEXT: s_bfe_i32 s60, s3, 0x1001b 3460; GFX6-NEXT: s_bfe_i32 s61, s3, 0x1001a 3461; GFX6-NEXT: s_bfe_i32 s62, s3, 0x10019 3462; GFX6-NEXT: s_bfe_i32 s63, s3, 0x10018 3463; GFX6-NEXT: s_ashr_i32 s64, s3, 31 3464; GFX6-NEXT: s_bfe_i32 s65, s3, 0x1001e 3465; GFX6-NEXT: s_bfe_i32 s66, s3, 0x1001d 3466; GFX6-NEXT: s_bfe_i32 s67, s3, 0x1001c 3467; GFX6-NEXT: s_bfe_i32 s68, s3, 0x1000f 3468; GFX6-NEXT: s_mov_b32 s3, 0xf000 3469; GFX6-NEXT: s_mov_b32 s2, -1 3470; GFX6-NEXT: v_mov_b32_e32 v0, s67 3471; GFX6-NEXT: v_mov_b32_e32 v1, s66 3472; GFX6-NEXT: v_mov_b32_e32 v2, s65 3473; GFX6-NEXT: v_mov_b32_e32 v3, s64 3474; GFX6-NEXT: v_mov_b32_e32 v4, s63 3475; GFX6-NEXT: v_mov_b32_e32 v5, s62 3476; GFX6-NEXT: v_mov_b32_e32 v6, s61 3477; GFX6-NEXT: v_mov_b32_e32 v7, s60 3478; GFX6-NEXT: v_mov_b32_e32 v8, s59 3479; GFX6-NEXT: v_mov_b32_e32 v9, s58 3480; GFX6-NEXT: v_mov_b32_e32 v10, s57 3481; GFX6-NEXT: v_mov_b32_e32 v11, s56 3482; GFX6-NEXT: v_mov_b32_e32 v12, s55 3483; GFX6-NEXT: v_mov_b32_e32 v13, s54 3484; GFX6-NEXT: v_mov_b32_e32 v14, s53 3485; GFX6-NEXT: v_mov_b32_e32 v15, s52 3486; GFX6-NEXT: v_mov_b32_e32 v16, s51 3487; GFX6-NEXT: v_mov_b32_e32 v17, s50 3488; GFX6-NEXT: v_mov_b32_e32 v18, s49 3489; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 3490; GFX6-NEXT: s_waitcnt expcnt(0) 3491; GFX6-NEXT: v_mov_b32_e32 v0, s48 3492; GFX6-NEXT: v_mov_b32_e32 v19, s68 3493; GFX6-NEXT: v_mov_b32_e32 v1, s47 3494; GFX6-NEXT: v_mov_b32_e32 v2, s46 3495; GFX6-NEXT: v_mov_b32_e32 v3, s45 3496; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224 3497; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208 3498; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192 3499; GFX6-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176 3500; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 3501; GFX6-NEXT: s_waitcnt expcnt(0) 3502; GFX6-NEXT: v_mov_b32_e32 v0, s44 3503; GFX6-NEXT: v_mov_b32_e32 v1, s43 3504; GFX6-NEXT: v_mov_b32_e32 v2, s42 3505; GFX6-NEXT: v_mov_b32_e32 v3, s41 3506; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 3507; GFX6-NEXT: s_waitcnt expcnt(0) 3508; GFX6-NEXT: v_mov_b32_e32 v0, s40 3509; GFX6-NEXT: v_mov_b32_e32 v1, s39 3510; GFX6-NEXT: v_mov_b32_e32 v2, s38 3511; GFX6-NEXT: v_mov_b32_e32 v3, s37 3512; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 3513; GFX6-NEXT: s_waitcnt expcnt(0) 3514; GFX6-NEXT: v_mov_b32_e32 v0, s36 3515; GFX6-NEXT: v_mov_b32_e32 v1, s35 3516; GFX6-NEXT: v_mov_b32_e32 v2, s34 3517; GFX6-NEXT: v_mov_b32_e32 v3, s33 3518; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 3519; GFX6-NEXT: s_waitcnt expcnt(0) 3520; GFX6-NEXT: v_mov_b32_e32 v0, s31 3521; GFX6-NEXT: v_mov_b32_e32 v1, s30 3522; GFX6-NEXT: v_mov_b32_e32 v2, s29 3523; GFX6-NEXT: v_mov_b32_e32 v3, s28 3524; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 3525; GFX6-NEXT: s_waitcnt expcnt(0) 3526; GFX6-NEXT: v_mov_b32_e32 v0, s27 3527; GFX6-NEXT: v_mov_b32_e32 v1, s26 3528; GFX6-NEXT: v_mov_b32_e32 v2, s25 3529; GFX6-NEXT: v_mov_b32_e32 v3, s24 3530; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 3531; GFX6-NEXT: s_waitcnt expcnt(0) 3532; GFX6-NEXT: v_mov_b32_e32 v0, s23 3533; GFX6-NEXT: v_mov_b32_e32 v1, s22 3534; GFX6-NEXT: v_mov_b32_e32 v2, s21 3535; GFX6-NEXT: v_mov_b32_e32 v3, s20 3536; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 3537; GFX6-NEXT: s_waitcnt expcnt(0) 3538; GFX6-NEXT: v_mov_b32_e32 v0, s19 3539; GFX6-NEXT: v_mov_b32_e32 v1, s18 3540; GFX6-NEXT: v_mov_b32_e32 v2, s17 3541; GFX6-NEXT: v_mov_b32_e32 v3, s16 3542; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 3543; GFX6-NEXT: s_waitcnt expcnt(0) 3544; GFX6-NEXT: v_mov_b32_e32 v0, s15 3545; GFX6-NEXT: v_mov_b32_e32 v1, s14 3546; GFX6-NEXT: v_mov_b32_e32 v2, s13 3547; GFX6-NEXT: v_mov_b32_e32 v3, s12 3548; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 3549; GFX6-NEXT: s_waitcnt expcnt(0) 3550; GFX6-NEXT: v_mov_b32_e32 v0, s11 3551; GFX6-NEXT: v_mov_b32_e32 v1, s10 3552; GFX6-NEXT: v_mov_b32_e32 v2, s9 3553; GFX6-NEXT: v_mov_b32_e32 v3, s8 3554; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 3555; GFX6-NEXT: s_waitcnt expcnt(0) 3556; GFX6-NEXT: v_mov_b32_e32 v0, s7 3557; GFX6-NEXT: v_mov_b32_e32 v1, s6 3558; GFX6-NEXT: v_mov_b32_e32 v2, s5 3559; GFX6-NEXT: v_mov_b32_e32 v3, s4 3560; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 3561; GFX6-NEXT: s_endpgm 3562; 3563; GFX8-LABEL: constant_sextload_v64i1_to_v64i32: 3564; GFX8: ; %bb.0: 3565; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3566; GFX8-NEXT: s_waitcnt lgkmcnt(0) 3567; GFX8-NEXT: s_load_dwordx2 s[26:27], s[2:3], 0x0 3568; GFX8-NEXT: s_waitcnt lgkmcnt(0) 3569; GFX8-NEXT: s_bfe_i32 s2, s26, 0x10003 3570; GFX8-NEXT: s_bfe_i32 s3, s26, 0x10002 3571; GFX8-NEXT: s_bfe_i32 s4, s26, 0x10001 3572; GFX8-NEXT: s_bfe_i32 s5, s26, 0x10000 3573; GFX8-NEXT: s_bfe_i32 s6, s26, 0x10007 3574; GFX8-NEXT: s_bfe_i32 s7, s26, 0x10006 3575; GFX8-NEXT: s_bfe_i32 s8, s26, 0x10005 3576; GFX8-NEXT: s_bfe_i32 s9, s26, 0x10004 3577; GFX8-NEXT: s_bfe_i32 s10, s26, 0x1000b 3578; GFX8-NEXT: s_bfe_i32 s11, s26, 0x1000a 3579; GFX8-NEXT: s_bfe_i32 s12, s26, 0x10009 3580; GFX8-NEXT: s_bfe_i32 s13, s26, 0x10008 3581; GFX8-NEXT: s_bfe_i32 s14, s26, 0x1000f 3582; GFX8-NEXT: s_bfe_i32 s15, s26, 0x1000e 3583; GFX8-NEXT: s_bfe_i32 s16, s26, 0x1000d 3584; GFX8-NEXT: s_bfe_i32 s17, s26, 0x1000c 3585; GFX8-NEXT: s_bfe_i32 s18, s26, 0x10013 3586; GFX8-NEXT: s_bfe_i32 s19, s26, 0x10012 3587; GFX8-NEXT: s_bfe_i32 s20, s26, 0x10011 3588; GFX8-NEXT: s_bfe_i32 s21, s26, 0x10010 3589; GFX8-NEXT: s_bfe_i32 s22, s26, 0x10017 3590; GFX8-NEXT: s_bfe_i32 s23, s26, 0x10016 3591; GFX8-NEXT: s_bfe_i32 s24, s26, 0x10015 3592; GFX8-NEXT: s_bfe_i32 s25, s26, 0x10014 3593; GFX8-NEXT: s_bfe_i32 s28, s26, 0x1001b 3594; GFX8-NEXT: s_bfe_i32 s29, s26, 0x1001a 3595; GFX8-NEXT: s_bfe_i32 s30, s26, 0x10019 3596; GFX8-NEXT: s_bfe_i32 s31, s26, 0x10018 3597; GFX8-NEXT: s_ashr_i32 s33, s26, 31 3598; GFX8-NEXT: s_bfe_i32 s34, s26, 0x1001e 3599; GFX8-NEXT: s_bfe_i32 s35, s26, 0x1001d 3600; GFX8-NEXT: s_bfe_i32 s36, s26, 0x1001c 3601; GFX8-NEXT: s_bfe_i32 s37, s27, 0x10003 3602; GFX8-NEXT: s_bfe_i32 s38, s27, 0x10002 3603; GFX8-NEXT: s_bfe_i32 s39, s27, 0x10001 3604; GFX8-NEXT: s_bfe_i32 s40, s27, 0x10000 3605; GFX8-NEXT: s_bfe_i32 s41, s27, 0x10007 3606; GFX8-NEXT: s_bfe_i32 s42, s27, 0x10006 3607; GFX8-NEXT: s_bfe_i32 s43, s27, 0x10005 3608; GFX8-NEXT: s_bfe_i32 s44, s27, 0x10004 3609; GFX8-NEXT: s_bfe_i32 s45, s27, 0x1000b 3610; GFX8-NEXT: s_bfe_i32 s46, s27, 0x1000a 3611; GFX8-NEXT: s_bfe_i32 s47, s27, 0x10009 3612; GFX8-NEXT: s_bfe_i32 s48, s27, 0x10008 3613; GFX8-NEXT: s_bfe_i32 s49, s27, 0x1000f 3614; GFX8-NEXT: s_bfe_i32 s50, s27, 0x1000e 3615; GFX8-NEXT: s_bfe_i32 s51, s27, 0x1000d 3616; GFX8-NEXT: s_bfe_i32 s52, s27, 0x1000c 3617; GFX8-NEXT: s_bfe_i32 s53, s27, 0x10013 3618; GFX8-NEXT: s_bfe_i32 s54, s27, 0x10012 3619; GFX8-NEXT: s_bfe_i32 s55, s27, 0x10011 3620; GFX8-NEXT: s_bfe_i32 s56, s27, 0x10010 3621; GFX8-NEXT: s_bfe_i32 s57, s27, 0x10017 3622; GFX8-NEXT: s_bfe_i32 s58, s27, 0x10016 3623; GFX8-NEXT: s_bfe_i32 s59, s27, 0x10015 3624; GFX8-NEXT: s_bfe_i32 s60, s27, 0x10014 3625; GFX8-NEXT: s_bfe_i32 s61, s27, 0x1001b 3626; GFX8-NEXT: s_bfe_i32 s62, s27, 0x1001a 3627; GFX8-NEXT: s_bfe_i32 s63, s27, 0x10019 3628; GFX8-NEXT: s_bfe_i32 s64, s27, 0x10018 3629; GFX8-NEXT: s_ashr_i32 s26, s27, 31 3630; GFX8-NEXT: s_bfe_i32 s65, s27, 0x1001e 3631; GFX8-NEXT: s_bfe_i32 s66, s27, 0x1001d 3632; GFX8-NEXT: s_bfe_i32 s27, s27, 0x1001c 3633; GFX8-NEXT: v_mov_b32_e32 v3, s26 3634; GFX8-NEXT: s_add_u32 s26, s0, 0xf0 3635; GFX8-NEXT: v_mov_b32_e32 v0, s27 3636; GFX8-NEXT: s_addc_u32 s27, s1, 0 3637; GFX8-NEXT: v_mov_b32_e32 v4, s26 3638; GFX8-NEXT: v_mov_b32_e32 v1, s66 3639; GFX8-NEXT: v_mov_b32_e32 v2, s65 3640; GFX8-NEXT: v_mov_b32_e32 v5, s27 3641; GFX8-NEXT: s_add_u32 s26, s0, 0xe0 3642; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3643; GFX8-NEXT: s_addc_u32 s27, s1, 0 3644; GFX8-NEXT: v_mov_b32_e32 v4, s26 3645; GFX8-NEXT: v_mov_b32_e32 v0, s64 3646; GFX8-NEXT: v_mov_b32_e32 v1, s63 3647; GFX8-NEXT: v_mov_b32_e32 v2, s62 3648; GFX8-NEXT: v_mov_b32_e32 v3, s61 3649; GFX8-NEXT: v_mov_b32_e32 v5, s27 3650; GFX8-NEXT: s_add_u32 s26, s0, 0xd0 3651; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3652; GFX8-NEXT: s_addc_u32 s27, s1, 0 3653; GFX8-NEXT: v_mov_b32_e32 v4, s26 3654; GFX8-NEXT: v_mov_b32_e32 v0, s60 3655; GFX8-NEXT: v_mov_b32_e32 v1, s59 3656; GFX8-NEXT: v_mov_b32_e32 v2, s58 3657; GFX8-NEXT: v_mov_b32_e32 v3, s57 3658; GFX8-NEXT: v_mov_b32_e32 v5, s27 3659; GFX8-NEXT: s_add_u32 s26, s0, 0xc0 3660; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3661; GFX8-NEXT: s_addc_u32 s27, s1, 0 3662; GFX8-NEXT: v_mov_b32_e32 v4, s26 3663; GFX8-NEXT: v_mov_b32_e32 v0, s56 3664; GFX8-NEXT: v_mov_b32_e32 v1, s55 3665; GFX8-NEXT: v_mov_b32_e32 v2, s54 3666; GFX8-NEXT: v_mov_b32_e32 v3, s53 3667; GFX8-NEXT: v_mov_b32_e32 v5, s27 3668; GFX8-NEXT: s_add_u32 s26, s0, 0xb0 3669; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3670; GFX8-NEXT: s_addc_u32 s27, s1, 0 3671; GFX8-NEXT: v_mov_b32_e32 v4, s26 3672; GFX8-NEXT: v_mov_b32_e32 v0, s52 3673; GFX8-NEXT: v_mov_b32_e32 v1, s51 3674; GFX8-NEXT: v_mov_b32_e32 v2, s50 3675; GFX8-NEXT: v_mov_b32_e32 v3, s49 3676; GFX8-NEXT: v_mov_b32_e32 v5, s27 3677; GFX8-NEXT: s_add_u32 s26, s0, 0xa0 3678; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3679; GFX8-NEXT: s_addc_u32 s27, s1, 0 3680; GFX8-NEXT: v_mov_b32_e32 v4, s26 3681; GFX8-NEXT: v_mov_b32_e32 v0, s48 3682; GFX8-NEXT: v_mov_b32_e32 v1, s47 3683; GFX8-NEXT: v_mov_b32_e32 v2, s46 3684; GFX8-NEXT: v_mov_b32_e32 v3, s45 3685; GFX8-NEXT: v_mov_b32_e32 v5, s27 3686; GFX8-NEXT: s_add_u32 s26, s0, 0x90 3687; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3688; GFX8-NEXT: s_addc_u32 s27, s1, 0 3689; GFX8-NEXT: v_mov_b32_e32 v4, s26 3690; GFX8-NEXT: v_mov_b32_e32 v0, s44 3691; GFX8-NEXT: v_mov_b32_e32 v1, s43 3692; GFX8-NEXT: v_mov_b32_e32 v2, s42 3693; GFX8-NEXT: v_mov_b32_e32 v3, s41 3694; GFX8-NEXT: v_mov_b32_e32 v5, s27 3695; GFX8-NEXT: s_add_u32 s26, s0, 0x80 3696; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3697; GFX8-NEXT: s_addc_u32 s27, s1, 0 3698; GFX8-NEXT: v_mov_b32_e32 v4, s26 3699; GFX8-NEXT: v_mov_b32_e32 v0, s40 3700; GFX8-NEXT: v_mov_b32_e32 v1, s39 3701; GFX8-NEXT: v_mov_b32_e32 v2, s38 3702; GFX8-NEXT: v_mov_b32_e32 v3, s37 3703; GFX8-NEXT: v_mov_b32_e32 v5, s27 3704; GFX8-NEXT: s_add_u32 s26, s0, 0x70 3705; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3706; GFX8-NEXT: s_addc_u32 s27, s1, 0 3707; GFX8-NEXT: v_mov_b32_e32 v4, s26 3708; GFX8-NEXT: v_mov_b32_e32 v0, s36 3709; GFX8-NEXT: v_mov_b32_e32 v1, s35 3710; GFX8-NEXT: v_mov_b32_e32 v2, s34 3711; GFX8-NEXT: v_mov_b32_e32 v3, s33 3712; GFX8-NEXT: v_mov_b32_e32 v5, s27 3713; GFX8-NEXT: s_add_u32 s26, s0, 0x60 3714; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3715; GFX8-NEXT: s_addc_u32 s27, s1, 0 3716; GFX8-NEXT: v_mov_b32_e32 v4, s26 3717; GFX8-NEXT: v_mov_b32_e32 v0, s31 3718; GFX8-NEXT: v_mov_b32_e32 v1, s30 3719; GFX8-NEXT: v_mov_b32_e32 v2, s29 3720; GFX8-NEXT: v_mov_b32_e32 v3, s28 3721; GFX8-NEXT: v_mov_b32_e32 v5, s27 3722; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3723; GFX8-NEXT: s_nop 0 3724; GFX8-NEXT: v_mov_b32_e32 v3, s22 3725; GFX8-NEXT: s_add_u32 s22, s0, 0x50 3726; GFX8-NEXT: v_mov_b32_e32 v2, s23 3727; GFX8-NEXT: s_addc_u32 s23, s1, 0 3728; GFX8-NEXT: v_mov_b32_e32 v4, s22 3729; GFX8-NEXT: v_mov_b32_e32 v0, s25 3730; GFX8-NEXT: v_mov_b32_e32 v1, s24 3731; GFX8-NEXT: v_mov_b32_e32 v5, s23 3732; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3733; GFX8-NEXT: s_nop 0 3734; GFX8-NEXT: v_mov_b32_e32 v3, s18 3735; GFX8-NEXT: s_add_u32 s18, s0, 64 3736; GFX8-NEXT: v_mov_b32_e32 v2, s19 3737; GFX8-NEXT: s_addc_u32 s19, s1, 0 3738; GFX8-NEXT: v_mov_b32_e32 v4, s18 3739; GFX8-NEXT: v_mov_b32_e32 v0, s21 3740; GFX8-NEXT: v_mov_b32_e32 v1, s20 3741; GFX8-NEXT: v_mov_b32_e32 v5, s19 3742; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3743; GFX8-NEXT: s_nop 0 3744; GFX8-NEXT: v_mov_b32_e32 v3, s14 3745; GFX8-NEXT: s_add_u32 s14, s0, 48 3746; GFX8-NEXT: v_mov_b32_e32 v2, s15 3747; GFX8-NEXT: s_addc_u32 s15, s1, 0 3748; GFX8-NEXT: v_mov_b32_e32 v4, s14 3749; GFX8-NEXT: v_mov_b32_e32 v0, s17 3750; GFX8-NEXT: v_mov_b32_e32 v1, s16 3751; GFX8-NEXT: v_mov_b32_e32 v5, s15 3752; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3753; GFX8-NEXT: s_nop 0 3754; GFX8-NEXT: v_mov_b32_e32 v3, s10 3755; GFX8-NEXT: s_add_u32 s10, s0, 32 3756; GFX8-NEXT: v_mov_b32_e32 v2, s11 3757; GFX8-NEXT: s_addc_u32 s11, s1, 0 3758; GFX8-NEXT: v_mov_b32_e32 v4, s10 3759; GFX8-NEXT: v_mov_b32_e32 v0, s13 3760; GFX8-NEXT: v_mov_b32_e32 v1, s12 3761; GFX8-NEXT: v_mov_b32_e32 v5, s11 3762; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3763; GFX8-NEXT: s_nop 0 3764; GFX8-NEXT: v_mov_b32_e32 v3, s6 3765; GFX8-NEXT: s_add_u32 s6, s0, 16 3766; GFX8-NEXT: v_mov_b32_e32 v2, s7 3767; GFX8-NEXT: s_addc_u32 s7, s1, 0 3768; GFX8-NEXT: v_mov_b32_e32 v4, s6 3769; GFX8-NEXT: v_mov_b32_e32 v0, s9 3770; GFX8-NEXT: v_mov_b32_e32 v1, s8 3771; GFX8-NEXT: v_mov_b32_e32 v5, s7 3772; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3773; GFX8-NEXT: v_mov_b32_e32 v5, s1 3774; GFX8-NEXT: v_mov_b32_e32 v0, s5 3775; GFX8-NEXT: v_mov_b32_e32 v1, s4 3776; GFX8-NEXT: v_mov_b32_e32 v2, s3 3777; GFX8-NEXT: v_mov_b32_e32 v3, s2 3778; GFX8-NEXT: v_mov_b32_e32 v4, s0 3779; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3780; GFX8-NEXT: s_endpgm 3781; 3782; EG-LABEL: constant_sextload_v64i1_to_v64i32: 3783; EG: ; %bb.0: 3784; EG-NEXT: ALU 0, @24, KC0[CB0:0-32], KC1[] 3785; EG-NEXT: TEX 0 @22 3786; EG-NEXT: ALU 99, @25, KC0[CB0:0-32], KC1[] 3787; EG-NEXT: ALU 98, @125, KC0[CB0:0-32], KC1[] 3788; EG-NEXT: ALU 13, @224, KC0[CB0:0-32], KC1[] 3789; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T50.X, 0 3790; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T49.X, 0 3791; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T46.X, 0 3792; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T44.X, 0 3793; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T42.X, 0 3794; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T40.X, 0 3795; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T38.X, 0 3796; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T36.X, 0 3797; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T35.X, 0 3798; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T33.X, 0 3799; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T31.X, 0 3800; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T29.X, 0 3801; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T27.X, 0 3802; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T25.X, 0 3803; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T23.X, 0 3804; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T21.X, 1 3805; EG-NEXT: CF_END 3806; EG-NEXT: Fetch clause starting at 22: 3807; EG-NEXT: VTX_READ_64 T19.XY, T19.X, 0, #1 3808; EG-NEXT: ALU clause starting at 24: 3809; EG-NEXT: MOV * T19.X, KC0[2].Z, 3810; EG-NEXT: ALU clause starting at 25: 3811; EG-NEXT: LSHR * T0.W, T19.X, literal.x, 3812; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00) 3813; EG-NEXT: BFE_INT T20.W, PV.W, 0.0, 1, 3814; EG-NEXT: LSHR * T0.W, T19.X, literal.x, 3815; EG-NEXT: 6(8.407791e-45), 0(0.000000e+00) 3816; EG-NEXT: BFE_INT T20.Z, PS, 0.0, 1, 3817; EG-NEXT: LSHR T0.W, T19.X, literal.x, 3818; EG-NEXT: LSHR * T1.W, T19.X, literal.y, 3819; EG-NEXT: 11(1.541428e-44), 5(7.006492e-45) 3820; EG-NEXT: LSHR T21.X, KC0[2].Y, literal.x, 3821; EG-NEXT: BFE_INT T20.Y, PS, 0.0, 1, 3822; EG-NEXT: LSHR T0.Z, T19.X, literal.y, 3823; EG-NEXT: BFE_INT T22.W, PV.W, 0.0, 1, 3824; EG-NEXT: LSHR * T0.W, T19.X, literal.z, 3825; EG-NEXT: 2(2.802597e-45), 10(1.401298e-44) 3826; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) 3827; EG-NEXT: BFE_INT T20.X, PS, 0.0, 1, 3828; EG-NEXT: LSHR T0.Y, T19.X, literal.x, 3829; EG-NEXT: BFE_INT T22.Z, PV.Z, 0.0, 1, 3830; EG-NEXT: LSHR T0.W, T19.X, literal.y, 3831; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 3832; EG-NEXT: 15(2.101948e-44), 9(1.261169e-44) 3833; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3834; EG-NEXT: LSHR T23.X, PS, literal.x, 3835; EG-NEXT: BFE_INT T22.Y, PV.W, 0.0, 1, 3836; EG-NEXT: LSHR T0.Z, T19.X, literal.y, 3837; EG-NEXT: BFE_INT T24.W, PV.Y, 0.0, 1, 3838; EG-NEXT: LSHR * T0.W, T19.X, literal.z, 3839; EG-NEXT: 2(2.802597e-45), 14(1.961818e-44) 3840; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 3841; EG-NEXT: BFE_INT T22.X, PS, 0.0, 1, 3842; EG-NEXT: LSHR T0.Y, T19.X, literal.x, 3843; EG-NEXT: BFE_INT T24.Z, PV.Z, 0.0, 1, 3844; EG-NEXT: LSHR T0.W, T19.X, literal.y, 3845; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 3846; EG-NEXT: 19(2.662467e-44), 13(1.821688e-44) 3847; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 3848; EG-NEXT: LSHR T25.X, PS, literal.x, 3849; EG-NEXT: BFE_INT T24.Y, PV.W, 0.0, 1, 3850; EG-NEXT: LSHR T0.Z, T19.X, literal.y, 3851; EG-NEXT: BFE_INT T26.W, PV.Y, 0.0, 1, 3852; EG-NEXT: LSHR * T0.W, T19.X, literal.z, 3853; EG-NEXT: 2(2.802597e-45), 18(2.522337e-44) 3854; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) 3855; EG-NEXT: BFE_INT T24.X, PS, 0.0, 1, 3856; EG-NEXT: LSHR T0.Y, T19.X, literal.x, 3857; EG-NEXT: BFE_INT T26.Z, PV.Z, 0.0, 1, 3858; EG-NEXT: LSHR T0.W, T19.X, literal.y, 3859; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 3860; EG-NEXT: 23(3.222986e-44), 17(2.382207e-44) 3861; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 3862; EG-NEXT: LSHR T27.X, PS, literal.x, 3863; EG-NEXT: BFE_INT T26.Y, PV.W, 0.0, 1, 3864; EG-NEXT: LSHR T0.Z, T19.X, literal.y, 3865; EG-NEXT: BFE_INT T28.W, PV.Y, 0.0, 1, 3866; EG-NEXT: LSHR * T0.W, T19.X, literal.z, 3867; EG-NEXT: 2(2.802597e-45), 22(3.082857e-44) 3868; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3869; EG-NEXT: BFE_INT T26.X, PS, 0.0, 1, 3870; EG-NEXT: LSHR T0.Y, T19.X, literal.x, 3871; EG-NEXT: BFE_INT T28.Z, PV.Z, 0.0, 1, 3872; EG-NEXT: LSHR T0.W, T19.X, literal.y, 3873; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 3874; EG-NEXT: 27(3.783506e-44), 21(2.942727e-44) 3875; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00) 3876; EG-NEXT: LSHR T29.X, PS, literal.x, 3877; EG-NEXT: BFE_INT T28.Y, PV.W, 0.0, 1, 3878; EG-NEXT: LSHR T0.Z, T19.X, literal.y, 3879; EG-NEXT: BFE_INT T30.W, PV.Y, 0.0, 1, 3880; EG-NEXT: LSHR * T0.W, T19.X, literal.z, 3881; EG-NEXT: 2(2.802597e-45), 26(3.643376e-44) 3882; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) 3883; EG-NEXT: BFE_INT T28.X, PS, 0.0, 1, 3884; EG-NEXT: BFE_INT T30.Z, PV.Z, 0.0, 1, 3885; EG-NEXT: LSHR T0.W, T19.X, literal.x, 3886; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 3887; EG-NEXT: 25(3.503246e-44), 80(1.121039e-43) 3888; EG-NEXT: LSHR T31.X, PS, literal.x, 3889; EG-NEXT: BFE_INT T30.Y, PV.W, 0.0, 1, 3890; EG-NEXT: LSHR T0.Z, T19.X, literal.y, 3891; EG-NEXT: LSHR T0.W, T19.X, literal.z, 3892; EG-NEXT: ASHR * T32.W, T19.X, literal.w, 3893; EG-NEXT: 2(2.802597e-45), 30(4.203895e-44) 3894; EG-NEXT: 24(3.363116e-44), 31(4.344025e-44) 3895; EG-NEXT: BFE_INT T30.X, PV.W, 0.0, 1, 3896; EG-NEXT: BFE_INT T32.Z, PV.Z, 0.0, 1, 3897; EG-NEXT: LSHR T0.W, T19.X, literal.x, 3898; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 3899; EG-NEXT: 29(4.063766e-44), 96(1.345247e-43) 3900; EG-NEXT: LSHR T33.X, PS, literal.x, 3901; EG-NEXT: BFE_INT T32.Y, PV.W, 0.0, 1, 3902; EG-NEXT: LSHR T0.W, T19.Y, literal.y, 3903; EG-NEXT: LSHR * T1.W, T19.X, literal.z, 3904; EG-NEXT: 2(2.802597e-45), 7(9.809089e-45) 3905; EG-NEXT: 28(3.923636e-44), 0(0.000000e+00) 3906; EG-NEXT: BFE_INT T32.X, PS, 0.0, 1, 3907; EG-NEXT: LSHR T0.Z, T19.Y, literal.x, 3908; EG-NEXT: BFE_INT T34.W, PV.W, 0.0, 1, 3909; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3910; EG-NEXT: 6(8.407791e-45), 112(1.569454e-43) 3911; EG-NEXT: ALU clause starting at 125: 3912; EG-NEXT: LSHR T35.X, T0.W, literal.x, 3913; EG-NEXT: LSHR T0.Y, T19.Y, literal.y, 3914; EG-NEXT: BFE_INT T34.Z, T0.Z, 0.0, 1, 3915; EG-NEXT: LSHR T0.W, T19.Y, literal.z, 3916; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.w, 3917; EG-NEXT: 2(2.802597e-45), 11(1.541428e-44) 3918; EG-NEXT: 5(7.006492e-45), 128(1.793662e-43) 3919; EG-NEXT: LSHR T36.X, PS, literal.x, 3920; EG-NEXT: BFE_INT T34.Y, PV.W, 0.0, 1, 3921; EG-NEXT: LSHR T0.Z, T19.Y, literal.y, 3922; EG-NEXT: BFE_INT T37.W, PV.Y, 0.0, 1, 3923; EG-NEXT: LSHR * T0.W, T19.Y, literal.z, 3924; EG-NEXT: 2(2.802597e-45), 10(1.401298e-44) 3925; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) 3926; EG-NEXT: BFE_INT T34.X, PS, 0.0, 1, 3927; EG-NEXT: LSHR T0.Y, T19.Y, literal.x, 3928; EG-NEXT: BFE_INT T37.Z, PV.Z, 0.0, 1, 3929; EG-NEXT: LSHR T0.W, T19.Y, literal.y, 3930; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 3931; EG-NEXT: 15(2.101948e-44), 9(1.261169e-44) 3932; EG-NEXT: 144(2.017870e-43), 0(0.000000e+00) 3933; EG-NEXT: LSHR T38.X, PS, literal.x, 3934; EG-NEXT: BFE_INT T37.Y, PV.W, 0.0, 1, 3935; EG-NEXT: LSHR T0.Z, T19.Y, literal.y, 3936; EG-NEXT: BFE_INT T39.W, PV.Y, 0.0, 1, 3937; EG-NEXT: LSHR * T0.W, T19.Y, literal.z, 3938; EG-NEXT: 2(2.802597e-45), 14(1.961818e-44) 3939; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 3940; EG-NEXT: BFE_INT T37.X, PS, 0.0, 1, 3941; EG-NEXT: LSHR T0.Y, T19.Y, literal.x, 3942; EG-NEXT: BFE_INT T39.Z, PV.Z, 0.0, 1, 3943; EG-NEXT: LSHR T0.W, T19.Y, literal.y, 3944; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 3945; EG-NEXT: 19(2.662467e-44), 13(1.821688e-44) 3946; EG-NEXT: 160(2.242078e-43), 0(0.000000e+00) 3947; EG-NEXT: LSHR T40.X, PS, literal.x, 3948; EG-NEXT: BFE_INT T39.Y, PV.W, 0.0, 1, 3949; EG-NEXT: LSHR T0.Z, T19.Y, literal.y, 3950; EG-NEXT: BFE_INT T41.W, PV.Y, 0.0, 1, 3951; EG-NEXT: LSHR * T0.W, T19.Y, literal.z, 3952; EG-NEXT: 2(2.802597e-45), 18(2.522337e-44) 3953; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) 3954; EG-NEXT: BFE_INT T39.X, PS, 0.0, 1, 3955; EG-NEXT: LSHR T0.Y, T19.Y, literal.x, 3956; EG-NEXT: BFE_INT T41.Z, PV.Z, 0.0, 1, 3957; EG-NEXT: LSHR T0.W, T19.Y, literal.y, 3958; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 3959; EG-NEXT: 23(3.222986e-44), 17(2.382207e-44) 3960; EG-NEXT: 176(2.466285e-43), 0(0.000000e+00) 3961; EG-NEXT: LSHR T42.X, PS, literal.x, 3962; EG-NEXT: BFE_INT T41.Y, PV.W, 0.0, 1, 3963; EG-NEXT: LSHR T0.Z, T19.Y, literal.y, 3964; EG-NEXT: BFE_INT T43.W, PV.Y, 0.0, 1, 3965; EG-NEXT: LSHR * T0.W, T19.Y, literal.z, 3966; EG-NEXT: 2(2.802597e-45), 22(3.082857e-44) 3967; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3968; EG-NEXT: BFE_INT T41.X, PS, 0.0, 1, 3969; EG-NEXT: LSHR T0.Y, T19.Y, literal.x, 3970; EG-NEXT: BFE_INT T43.Z, PV.Z, 0.0, 1, 3971; EG-NEXT: LSHR T0.W, T19.Y, literal.y, 3972; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 3973; EG-NEXT: 27(3.783506e-44), 21(2.942727e-44) 3974; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00) 3975; EG-NEXT: LSHR T44.X, PS, literal.x, 3976; EG-NEXT: BFE_INT T43.Y, PV.W, 0.0, 1, 3977; EG-NEXT: LSHR T0.Z, T19.Y, literal.y, 3978; EG-NEXT: BFE_INT T45.W, PV.Y, 0.0, 1, 3979; EG-NEXT: LSHR * T0.W, T19.Y, literal.z, 3980; EG-NEXT: 2(2.802597e-45), 26(3.643376e-44) 3981; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) 3982; EG-NEXT: BFE_INT T43.X, PS, 0.0, 1, 3983; EG-NEXT: BFE_INT T45.Z, PV.Z, 0.0, 1, 3984; EG-NEXT: LSHR T0.W, T19.Y, literal.x, 3985; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 3986; EG-NEXT: 25(3.503246e-44), 208(2.914701e-43) 3987; EG-NEXT: LSHR T46.X, PS, literal.x, 3988; EG-NEXT: BFE_INT T45.Y, PV.W, 0.0, 1, 3989; EG-NEXT: LSHR * T0.W, T19.Y, literal.y, 3990; EG-NEXT: 2(2.802597e-45), 24(3.363116e-44) 3991; EG-NEXT: BFE_INT T45.X, PV.W, 0.0, 1, 3992; EG-NEXT: LSHR T0.Z, T19.Y, literal.x, 3993; EG-NEXT: LSHR T0.W, T19.X, 1, 3994; EG-NEXT: LSHR * T1.W, T19.Y, literal.y, 3995; EG-NEXT: 2(2.802597e-45), 3(4.203895e-45) 3996; EG-NEXT: BFE_INT T47.X, T19.X, 0.0, 1, 3997; EG-NEXT: LSHR T0.Y, T19.X, literal.x, 3998; EG-NEXT: LSHR T1.Z, T19.X, literal.y, 3999; EG-NEXT: LSHR T2.W, T19.Y, literal.z, 4000; EG-NEXT: ASHR * T48.W, T19.Y, literal.w, 4001; EG-NEXT: 2(2.802597e-45), 3(4.203895e-45) 4002; EG-NEXT: 30(4.203895e-44), 31(4.344025e-44) 4003; EG-NEXT: BFE_INT T19.X, T19.Y, 0.0, 1, 4004; EG-NEXT: LSHR T1.Y, T19.Y, literal.x, 4005; EG-NEXT: BFE_INT T48.Z, PV.W, 0.0, 1, 4006; EG-NEXT: BFE_INT T47.W, PV.Z, 0.0, 1, 4007; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, 4008; EG-NEXT: 29(4.063766e-44), 224(3.138909e-43) 4009; EG-NEXT: LSHR * T49.X, PS, literal.x, 4010; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4011; EG-NEXT: ALU clause starting at 224: 4012; EG-NEXT: BFE_INT T48.Y, T1.Y, 0.0, 1, 4013; EG-NEXT: BFE_INT T47.Z, T0.Y, 0.0, 1, BS:VEC_120/SCL_212 4014; EG-NEXT: BFE_INT T19.W, T1.W, 0.0, 1, 4015; EG-NEXT: LSHR * T1.W, T19.Y, literal.x, 4016; EG-NEXT: 28(3.923636e-44), 0(0.000000e+00) 4017; EG-NEXT: BFE_INT T48.X, PS, 0.0, 1, 4018; EG-NEXT: BFE_INT T47.Y, T0.W, 0.0, 1, 4019; EG-NEXT: BFE_INT T19.Z, T0.Z, 0.0, 1, 4020; EG-NEXT: LSHR T0.W, T19.Y, 1, 4021; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, 4022; EG-NEXT: 240(3.363116e-43), 0(0.000000e+00) 4023; EG-NEXT: LSHR T50.X, PS, literal.x, 4024; EG-NEXT: BFE_INT * T19.Y, PV.W, 0.0, 1, 4025; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4026; 4027; GFX12-LABEL: constant_sextload_v64i1_to_v64i32: 4028; GFX12: ; %bb.0: 4029; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 4030; GFX12-NEXT: s_wait_kmcnt 0x0 4031; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 4032; GFX12-NEXT: s_wait_kmcnt 0x0 4033; GFX12-NEXT: s_ashr_i32 s63, s3, 31 4034; GFX12-NEXT: s_bfe_i32 s64, s3, 0x1001e 4035; GFX12-NEXT: s_bfe_i32 s65, s3, 0x1001c 4036; GFX12-NEXT: s_bfe_i32 s66, s3, 0x1001d 4037; GFX12-NEXT: s_bfe_i32 s59, s3, 0x1001b 4038; GFX12-NEXT: s_bfe_i32 s60, s3, 0x1001a 4039; GFX12-NEXT: s_bfe_i32 s61, s3, 0x10019 4040; GFX12-NEXT: s_bfe_i32 s62, s3, 0x10018 4041; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s66 4042; GFX12-NEXT: s_bfe_i32 s55, s3, 0x10017 4043; GFX12-NEXT: s_bfe_i32 s56, s3, 0x10016 4044; GFX12-NEXT: s_bfe_i32 s57, s3, 0x10015 4045; GFX12-NEXT: s_bfe_i32 s58, s3, 0x10014 4046; GFX12-NEXT: v_dual_mov_b32 v0, s65 :: v_dual_mov_b32 v3, s63 4047; GFX12-NEXT: v_dual_mov_b32 v2, s64 :: v_dual_mov_b32 v5, s61 4048; GFX12-NEXT: s_bfe_i32 s51, s3, 0x10013 4049; GFX12-NEXT: s_bfe_i32 s52, s3, 0x10012 4050; GFX12-NEXT: s_bfe_i32 s53, s3, 0x10011 4051; GFX12-NEXT: s_bfe_i32 s54, s3, 0x10010 4052; GFX12-NEXT: v_dual_mov_b32 v4, s62 :: v_dual_mov_b32 v7, s59 4053; GFX12-NEXT: v_dual_mov_b32 v6, s60 :: v_dual_mov_b32 v9, s57 4054; GFX12-NEXT: v_dual_mov_b32 v8, s58 :: v_dual_mov_b32 v11, s55 4055; GFX12-NEXT: v_dual_mov_b32 v10, s56 :: v_dual_mov_b32 v13, s53 4056; GFX12-NEXT: s_bfe_i32 s36, s3, 0x10003 4057; GFX12-NEXT: s_bfe_i32 s37, s3, 0x10002 4058; GFX12-NEXT: s_bfe_i32 s38, s3, 0x10001 4059; GFX12-NEXT: s_bfe_i32 s39, s3, 0x10000 4060; GFX12-NEXT: s_bfe_i32 s40, s3, 0x10007 4061; GFX12-NEXT: s_bfe_i32 s41, s3, 0x10006 4062; GFX12-NEXT: s_bfe_i32 s42, s3, 0x10005 4063; GFX12-NEXT: s_bfe_i32 s43, s3, 0x10004 4064; GFX12-NEXT: s_bfe_i32 s44, s3, 0x1000b 4065; GFX12-NEXT: s_bfe_i32 s45, s3, 0x1000a 4066; GFX12-NEXT: s_bfe_i32 s46, s3, 0x10009 4067; GFX12-NEXT: s_bfe_i32 s47, s3, 0x10008 4068; GFX12-NEXT: s_bfe_i32 s48, s3, 0x1000f 4069; GFX12-NEXT: s_bfe_i32 s49, s3, 0x1000e 4070; GFX12-NEXT: s_bfe_i32 s50, s3, 0x1000d 4071; GFX12-NEXT: v_dual_mov_b32 v12, s54 :: v_dual_mov_b32 v15, s51 4072; GFX12-NEXT: v_mov_b32_e32 v14, s52 4073; GFX12-NEXT: s_bfe_i32 s3, s3, 0x1000c 4074; GFX12-NEXT: s_clause 0x3 4075; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:240 4076; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:224 4077; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:208 4078; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:192 4079; GFX12-NEXT: v_dual_mov_b32 v1, s50 :: v_dual_mov_b32 v0, s3 4080; GFX12-NEXT: v_dual_mov_b32 v3, s48 :: v_dual_mov_b32 v2, s49 4081; GFX12-NEXT: v_dual_mov_b32 v5, s46 :: v_dual_mov_b32 v4, s47 4082; GFX12-NEXT: v_dual_mov_b32 v7, s44 :: v_dual_mov_b32 v6, s45 4083; GFX12-NEXT: v_mov_b32_e32 v9, s42 4084; GFX12-NEXT: s_bfe_i32 s4, s2, 0x10003 4085; GFX12-NEXT: s_bfe_i32 s5, s2, 0x10002 4086; GFX12-NEXT: s_bfe_i32 s6, s2, 0x10001 4087; GFX12-NEXT: s_bfe_i32 s7, s2, 0x10000 4088; GFX12-NEXT: s_bfe_i32 s8, s2, 0x10007 4089; GFX12-NEXT: s_bfe_i32 s9, s2, 0x10006 4090; GFX12-NEXT: s_bfe_i32 s10, s2, 0x10005 4091; GFX12-NEXT: s_bfe_i32 s11, s2, 0x10004 4092; GFX12-NEXT: s_bfe_i32 s12, s2, 0x1000b 4093; GFX12-NEXT: s_bfe_i32 s13, s2, 0x1000a 4094; GFX12-NEXT: s_bfe_i32 s14, s2, 0x10009 4095; GFX12-NEXT: s_bfe_i32 s15, s2, 0x10008 4096; GFX12-NEXT: s_bfe_i32 s16, s2, 0x1000f 4097; GFX12-NEXT: s_bfe_i32 s17, s2, 0x1000e 4098; GFX12-NEXT: s_bfe_i32 s18, s2, 0x1000d 4099; GFX12-NEXT: s_bfe_i32 s19, s2, 0x1000c 4100; GFX12-NEXT: s_bfe_i32 s20, s2, 0x10013 4101; GFX12-NEXT: s_bfe_i32 s21, s2, 0x10012 4102; GFX12-NEXT: s_bfe_i32 s22, s2, 0x10011 4103; GFX12-NEXT: s_bfe_i32 s23, s2, 0x10010 4104; GFX12-NEXT: s_bfe_i32 s24, s2, 0x10017 4105; GFX12-NEXT: s_bfe_i32 s25, s2, 0x10016 4106; GFX12-NEXT: s_bfe_i32 s26, s2, 0x10015 4107; GFX12-NEXT: s_bfe_i32 s27, s2, 0x10014 4108; GFX12-NEXT: s_bfe_i32 s28, s2, 0x1001b 4109; GFX12-NEXT: s_bfe_i32 s29, s2, 0x1001a 4110; GFX12-NEXT: s_bfe_i32 s30, s2, 0x10019 4111; GFX12-NEXT: s_bfe_i32 s31, s2, 0x10018 4112; GFX12-NEXT: s_ashr_i32 s33, s2, 31 4113; GFX12-NEXT: s_bfe_i32 s34, s2, 0x1001e 4114; GFX12-NEXT: s_bfe_i32 s35, s2, 0x1001d 4115; GFX12-NEXT: s_bfe_i32 s2, s2, 0x1001c 4116; GFX12-NEXT: v_dual_mov_b32 v8, s43 :: v_dual_mov_b32 v11, s40 4117; GFX12-NEXT: v_dual_mov_b32 v10, s41 :: v_dual_mov_b32 v13, s38 4118; GFX12-NEXT: v_dual_mov_b32 v12, s39 :: v_dual_mov_b32 v15, s36 4119; GFX12-NEXT: v_dual_mov_b32 v14, s37 :: v_dual_mov_b32 v17, s35 4120; GFX12-NEXT: s_wait_alu 0xfffe 4121; GFX12-NEXT: v_dual_mov_b32 v16, s2 :: v_dual_mov_b32 v19, s33 4122; GFX12-NEXT: v_dual_mov_b32 v18, s34 :: v_dual_mov_b32 v21, s30 4123; GFX12-NEXT: v_dual_mov_b32 v20, s31 :: v_dual_mov_b32 v23, s28 4124; GFX12-NEXT: v_mov_b32_e32 v22, s29 4125; GFX12-NEXT: s_clause 0x5 4126; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:176 4127; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:160 4128; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:144 4129; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:128 4130; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:112 4131; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1] offset:96 4132; GFX12-NEXT: v_dual_mov_b32 v1, s26 :: v_dual_mov_b32 v0, s27 4133; GFX12-NEXT: v_dual_mov_b32 v3, s24 :: v_dual_mov_b32 v2, s25 4134; GFX12-NEXT: v_dual_mov_b32 v5, s22 :: v_dual_mov_b32 v4, s23 4135; GFX12-NEXT: v_dual_mov_b32 v7, s20 :: v_dual_mov_b32 v6, s21 4136; GFX12-NEXT: v_dual_mov_b32 v9, s18 :: v_dual_mov_b32 v8, s19 4137; GFX12-NEXT: v_dual_mov_b32 v11, s16 :: v_dual_mov_b32 v10, s17 4138; GFX12-NEXT: v_dual_mov_b32 v13, s14 :: v_dual_mov_b32 v12, s15 4139; GFX12-NEXT: v_dual_mov_b32 v15, s12 :: v_dual_mov_b32 v14, s13 4140; GFX12-NEXT: v_dual_mov_b32 v17, s10 :: v_dual_mov_b32 v16, s11 4141; GFX12-NEXT: v_dual_mov_b32 v19, s8 :: v_dual_mov_b32 v18, s9 4142; GFX12-NEXT: v_dual_mov_b32 v21, s6 :: v_dual_mov_b32 v20, s7 4143; GFX12-NEXT: v_dual_mov_b32 v23, s4 :: v_dual_mov_b32 v22, s5 4144; GFX12-NEXT: s_clause 0x5 4145; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:80 4146; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:64 4147; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:48 4148; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:32 4149; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:16 4150; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1] 4151; GFX12-NEXT: s_endpgm 4152 %load = load <64 x i1>, ptr addrspace(4) %in 4153 %ext = sext <64 x i1> %load to <64 x i32> 4154 store <64 x i32> %ext, ptr addrspace(1) %out 4155 ret void 4156} 4157 4158define amdgpu_kernel void @constant_zextload_i1_to_i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 4159; GFX6-LABEL: constant_zextload_i1_to_i64: 4160; GFX6: ; %bb.0: 4161; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 4162; GFX6-NEXT: s_mov_b32 s7, 0xf000 4163; GFX6-NEXT: s_mov_b32 s6, -1 4164; GFX6-NEXT: s_mov_b32 s10, s6 4165; GFX6-NEXT: s_mov_b32 s11, s7 4166; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4167; GFX6-NEXT: s_mov_b32 s8, s2 4168; GFX6-NEXT: s_mov_b32 s9, s3 4169; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 4170; GFX6-NEXT: s_mov_b32 s4, s0 4171; GFX6-NEXT: s_mov_b32 s5, s1 4172; GFX6-NEXT: s_waitcnt vmcnt(0) 4173; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 4174; GFX6-NEXT: v_mov_b32_e32 v1, 0 4175; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4176; GFX6-NEXT: s_endpgm 4177; 4178; GFX8-LABEL: constant_zextload_i1_to_i64: 4179; GFX8: ; %bb.0: 4180; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4181; GFX8-NEXT: v_mov_b32_e32 v3, 0 4182; GFX8-NEXT: s_waitcnt lgkmcnt(0) 4183; GFX8-NEXT: v_mov_b32_e32 v0, s2 4184; GFX8-NEXT: v_mov_b32_e32 v1, s3 4185; GFX8-NEXT: flat_load_ubyte v2, v[0:1] 4186; GFX8-NEXT: v_mov_b32_e32 v0, s0 4187; GFX8-NEXT: v_mov_b32_e32 v1, s1 4188; GFX8-NEXT: s_waitcnt vmcnt(0) 4189; GFX8-NEXT: v_and_b32_e32 v2, 1, v2 4190; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 4191; GFX8-NEXT: s_endpgm 4192; 4193; EG-LABEL: constant_zextload_i1_to_i64: 4194; EG: ; %bb.0: 4195; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4196; EG-NEXT: TEX 0 @6 4197; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 4198; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 4199; EG-NEXT: CF_END 4200; EG-NEXT: PAD 4201; EG-NEXT: Fetch clause starting at 6: 4202; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 4203; EG-NEXT: ALU clause starting at 8: 4204; EG-NEXT: MOV * T0.X, KC0[2].Z, 4205; EG-NEXT: ALU clause starting at 9: 4206; EG-NEXT: MOV * T0.Y, 0.0, 4207; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 4208; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4209; 4210; GFX12-LABEL: constant_zextload_i1_to_i64: 4211; GFX12: ; %bb.0: 4212; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 4213; GFX12-NEXT: s_wait_kmcnt 0x0 4214; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0 4215; GFX12-NEXT: s_wait_kmcnt 0x0 4216; GFX12-NEXT: s_and_b32 s2, s2, 1 4217; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 4218; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 4219; GFX12-NEXT: global_store_b64 v1, v[0:1], s[0:1] 4220; GFX12-NEXT: s_endpgm 4221 %a = load i1, ptr addrspace(4) %in 4222 %ext = zext i1 %a to i64 4223 store i64 %ext, ptr addrspace(1) %out 4224 ret void 4225} 4226 4227define amdgpu_kernel void @constant_sextload_i1_to_i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 4228; GFX6-LABEL: constant_sextload_i1_to_i64: 4229; GFX6: ; %bb.0: 4230; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 4231; GFX6-NEXT: s_mov_b32 s7, 0xf000 4232; GFX6-NEXT: s_mov_b32 s6, -1 4233; GFX6-NEXT: s_mov_b32 s10, s6 4234; GFX6-NEXT: s_mov_b32 s11, s7 4235; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4236; GFX6-NEXT: s_mov_b32 s8, s2 4237; GFX6-NEXT: s_mov_b32 s9, s3 4238; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 4239; GFX6-NEXT: s_mov_b32 s4, s0 4240; GFX6-NEXT: s_mov_b32 s5, s1 4241; GFX6-NEXT: s_waitcnt vmcnt(0) 4242; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1 4243; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4244; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4245; GFX6-NEXT: s_endpgm 4246; 4247; GFX8-LABEL: constant_sextload_i1_to_i64: 4248; GFX8: ; %bb.0: 4249; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4250; GFX8-NEXT: s_waitcnt lgkmcnt(0) 4251; GFX8-NEXT: v_mov_b32_e32 v0, s2 4252; GFX8-NEXT: v_mov_b32_e32 v1, s3 4253; GFX8-NEXT: flat_load_ubyte v2, v[0:1] 4254; GFX8-NEXT: v_mov_b32_e32 v0, s0 4255; GFX8-NEXT: v_mov_b32_e32 v1, s1 4256; GFX8-NEXT: s_waitcnt vmcnt(0) 4257; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1 4258; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2 4259; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 4260; GFX8-NEXT: s_endpgm 4261; 4262; EG-LABEL: constant_sextload_i1_to_i64: 4263; EG: ; %bb.0: 4264; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4265; EG-NEXT: TEX 0 @6 4266; EG-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[] 4267; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 4268; EG-NEXT: CF_END 4269; EG-NEXT: PAD 4270; EG-NEXT: Fetch clause starting at 6: 4271; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 4272; EG-NEXT: ALU clause starting at 8: 4273; EG-NEXT: MOV * T0.X, KC0[2].Z, 4274; EG-NEXT: ALU clause starting at 9: 4275; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, 1, 4276; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 4277; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4278; EG-NEXT: MOV * T0.Y, PV.X, 4279; 4280; GFX12-LABEL: constant_sextload_i1_to_i64: 4281; GFX12: ; %bb.0: 4282; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 4283; GFX12-NEXT: s_wait_kmcnt 0x0 4284; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0 4285; GFX12-NEXT: s_wait_kmcnt 0x0 4286; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000 4287; GFX12-NEXT: v_mov_b32_e32 v2, 0 4288; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 4289; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] 4290; GFX12-NEXT: s_endpgm 4291 %a = load i1, ptr addrspace(4) %in 4292 %ext = sext i1 %a to i64 4293 store i64 %ext, ptr addrspace(1) %out 4294 ret void 4295} 4296 4297define amdgpu_kernel void @constant_zextload_v1i1_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 4298; GFX6-LABEL: constant_zextload_v1i1_to_v1i64: 4299; GFX6: ; %bb.0: 4300; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 4301; GFX6-NEXT: s_mov_b32 s7, 0xf000 4302; GFX6-NEXT: s_mov_b32 s6, -1 4303; GFX6-NEXT: s_mov_b32 s10, s6 4304; GFX6-NEXT: s_mov_b32 s11, s7 4305; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4306; GFX6-NEXT: s_mov_b32 s8, s2 4307; GFX6-NEXT: s_mov_b32 s9, s3 4308; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 4309; GFX6-NEXT: s_mov_b32 s4, s0 4310; GFX6-NEXT: s_mov_b32 s5, s1 4311; GFX6-NEXT: s_waitcnt vmcnt(0) 4312; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 4313; GFX6-NEXT: v_mov_b32_e32 v1, 0 4314; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4315; GFX6-NEXT: s_endpgm 4316; 4317; GFX8-LABEL: constant_zextload_v1i1_to_v1i64: 4318; GFX8: ; %bb.0: 4319; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4320; GFX8-NEXT: v_mov_b32_e32 v3, 0 4321; GFX8-NEXT: s_waitcnt lgkmcnt(0) 4322; GFX8-NEXT: v_mov_b32_e32 v0, s2 4323; GFX8-NEXT: v_mov_b32_e32 v1, s3 4324; GFX8-NEXT: flat_load_ubyte v2, v[0:1] 4325; GFX8-NEXT: v_mov_b32_e32 v0, s0 4326; GFX8-NEXT: v_mov_b32_e32 v1, s1 4327; GFX8-NEXT: s_waitcnt vmcnt(0) 4328; GFX8-NEXT: v_and_b32_e32 v2, 1, v2 4329; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 4330; GFX8-NEXT: s_endpgm 4331; 4332; EG-LABEL: constant_zextload_v1i1_to_v1i64: 4333; EG: ; %bb.0: 4334; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4335; EG-NEXT: TEX 0 @6 4336; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 4337; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 4338; EG-NEXT: CF_END 4339; EG-NEXT: PAD 4340; EG-NEXT: Fetch clause starting at 6: 4341; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 4342; EG-NEXT: ALU clause starting at 8: 4343; EG-NEXT: MOV * T0.X, KC0[2].Z, 4344; EG-NEXT: ALU clause starting at 9: 4345; EG-NEXT: MOV * T0.Y, 0.0, 4346; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 4347; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4348; 4349; GFX12-LABEL: constant_zextload_v1i1_to_v1i64: 4350; GFX12: ; %bb.0: 4351; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 4352; GFX12-NEXT: s_wait_kmcnt 0x0 4353; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0 4354; GFX12-NEXT: s_wait_kmcnt 0x0 4355; GFX12-NEXT: s_and_b32 s2, s2, 1 4356; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 4357; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 4358; GFX12-NEXT: global_store_b64 v1, v[0:1], s[0:1] 4359; GFX12-NEXT: s_endpgm 4360 %load = load <1 x i1>, ptr addrspace(4) %in 4361 %ext = zext <1 x i1> %load to <1 x i64> 4362 store <1 x i64> %ext, ptr addrspace(1) %out 4363 ret void 4364} 4365 4366define amdgpu_kernel void @constant_sextload_v1i1_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 4367; GFX6-LABEL: constant_sextload_v1i1_to_v1i64: 4368; GFX6: ; %bb.0: 4369; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 4370; GFX6-NEXT: s_mov_b32 s7, 0xf000 4371; GFX6-NEXT: s_mov_b32 s6, -1 4372; GFX6-NEXT: s_mov_b32 s10, s6 4373; GFX6-NEXT: s_mov_b32 s11, s7 4374; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4375; GFX6-NEXT: s_mov_b32 s8, s2 4376; GFX6-NEXT: s_mov_b32 s9, s3 4377; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 4378; GFX6-NEXT: s_mov_b32 s4, s0 4379; GFX6-NEXT: s_mov_b32 s5, s1 4380; GFX6-NEXT: s_waitcnt vmcnt(0) 4381; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1 4382; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4383; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4384; GFX6-NEXT: s_endpgm 4385; 4386; GFX8-LABEL: constant_sextload_v1i1_to_v1i64: 4387; GFX8: ; %bb.0: 4388; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4389; GFX8-NEXT: s_waitcnt lgkmcnt(0) 4390; GFX8-NEXT: v_mov_b32_e32 v0, s2 4391; GFX8-NEXT: v_mov_b32_e32 v1, s3 4392; GFX8-NEXT: flat_load_ubyte v2, v[0:1] 4393; GFX8-NEXT: v_mov_b32_e32 v0, s0 4394; GFX8-NEXT: v_mov_b32_e32 v1, s1 4395; GFX8-NEXT: s_waitcnt vmcnt(0) 4396; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1 4397; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2 4398; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 4399; GFX8-NEXT: s_endpgm 4400; 4401; EG-LABEL: constant_sextload_v1i1_to_v1i64: 4402; EG: ; %bb.0: 4403; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4404; EG-NEXT: TEX 0 @6 4405; EG-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[] 4406; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 4407; EG-NEXT: CF_END 4408; EG-NEXT: PAD 4409; EG-NEXT: Fetch clause starting at 6: 4410; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 4411; EG-NEXT: ALU clause starting at 8: 4412; EG-NEXT: MOV * T0.X, KC0[2].Z, 4413; EG-NEXT: ALU clause starting at 9: 4414; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, 1, 4415; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 4416; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4417; EG-NEXT: MOV * T0.Y, PV.X, 4418; 4419; GFX12-LABEL: constant_sextload_v1i1_to_v1i64: 4420; GFX12: ; %bb.0: 4421; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 4422; GFX12-NEXT: s_wait_kmcnt 0x0 4423; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0 4424; GFX12-NEXT: s_wait_kmcnt 0x0 4425; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000 4426; GFX12-NEXT: v_mov_b32_e32 v2, 0 4427; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 4428; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] 4429; GFX12-NEXT: s_endpgm 4430 %load = load <1 x i1>, ptr addrspace(4) %in 4431 %ext = sext <1 x i1> %load to <1 x i64> 4432 store <1 x i64> %ext, ptr addrspace(1) %out 4433 ret void 4434} 4435 4436define amdgpu_kernel void @constant_zextload_v2i1_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 4437; GFX6-LABEL: constant_zextload_v2i1_to_v2i64: 4438; GFX6: ; %bb.0: 4439; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 4440; GFX6-NEXT: s_mov_b32 s7, 0xf000 4441; GFX6-NEXT: s_mov_b32 s6, -1 4442; GFX6-NEXT: s_mov_b32 s10, s6 4443; GFX6-NEXT: s_mov_b32 s11, s7 4444; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4445; GFX6-NEXT: s_mov_b32 s8, s2 4446; GFX6-NEXT: s_mov_b32 s9, s3 4447; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 4448; GFX6-NEXT: v_mov_b32_e32 v1, 0 4449; GFX6-NEXT: s_mov_b32 s4, s0 4450; GFX6-NEXT: s_mov_b32 s5, s1 4451; GFX6-NEXT: s_waitcnt vmcnt(0) 4452; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v0 4453; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 4454; GFX6-NEXT: v_mov_b32_e32 v3, v1 4455; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 4456; GFX6-NEXT: s_endpgm 4457; 4458; GFX8-LABEL: constant_zextload_v2i1_to_v2i64: 4459; GFX8: ; %bb.0: 4460; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4461; GFX8-NEXT: v_mov_b32_e32 v2, 1 4462; GFX8-NEXT: s_waitcnt lgkmcnt(0) 4463; GFX8-NEXT: v_mov_b32_e32 v0, s2 4464; GFX8-NEXT: v_mov_b32_e32 v1, s3 4465; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 4466; GFX8-NEXT: v_mov_b32_e32 v1, 0 4467; GFX8-NEXT: v_mov_b32_e32 v4, s0 4468; GFX8-NEXT: v_mov_b32_e32 v5, s1 4469; GFX8-NEXT: v_mov_b32_e32 v3, v1 4470; GFX8-NEXT: s_waitcnt vmcnt(0) 4471; GFX8-NEXT: v_lshrrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 4472; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 4473; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4474; GFX8-NEXT: s_endpgm 4475; 4476; EG-LABEL: constant_zextload_v2i1_to_v2i64: 4477; EG: ; %bb.0: 4478; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4479; EG-NEXT: TEX 0 @6 4480; EG-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[] 4481; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 4482; EG-NEXT: CF_END 4483; EG-NEXT: PAD 4484; EG-NEXT: Fetch clause starting at 6: 4485; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 4486; EG-NEXT: ALU clause starting at 8: 4487; EG-NEXT: MOV * T0.X, KC0[2].Z, 4488; EG-NEXT: ALU clause starting at 9: 4489; EG-NEXT: BFE_UINT * T0.Z, T0.X, 1, 1, 4490; EG-NEXT: AND_INT T0.X, T0.X, 1, 4491; EG-NEXT: MOV T0.Y, 0.0, 4492; EG-NEXT: MOV T0.W, 0.0, 4493; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 4494; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4495; 4496; GFX12-LABEL: constant_zextload_v2i1_to_v2i64: 4497; GFX12: ; %bb.0: 4498; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 4499; GFX12-NEXT: v_mov_b32_e32 v1, 0 4500; GFX12-NEXT: s_wait_kmcnt 0x0 4501; GFX12-NEXT: global_load_u8 v0, v1, s[2:3] 4502; GFX12-NEXT: s_wait_loadcnt 0x0 4503; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v0 4504; GFX12-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v0, 1, v0 4505; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 4506; GFX12-NEXT: v_lshrrev_b32_e32 v2, 1, v2 4507; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0 4508; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 4509; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v2 4510; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] 4511; GFX12-NEXT: s_endpgm 4512 %load = load <2 x i1>, ptr addrspace(4) %in 4513 %ext = zext <2 x i1> %load to <2 x i64> 4514 store <2 x i64> %ext, ptr addrspace(1) %out 4515 ret void 4516} 4517 4518define amdgpu_kernel void @constant_sextload_v2i1_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 4519; GFX6-LABEL: constant_sextload_v2i1_to_v2i64: 4520; GFX6: ; %bb.0: 4521; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 4522; GFX6-NEXT: s_mov_b32 s7, 0xf000 4523; GFX6-NEXT: s_mov_b32 s6, -1 4524; GFX6-NEXT: s_mov_b32 s10, s6 4525; GFX6-NEXT: s_mov_b32 s11, s7 4526; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4527; GFX6-NEXT: s_mov_b32 s8, s2 4528; GFX6-NEXT: s_mov_b32 s9, s3 4529; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 4530; GFX6-NEXT: s_mov_b32 s4, s0 4531; GFX6-NEXT: s_mov_b32 s5, s1 4532; GFX6-NEXT: s_waitcnt vmcnt(0) 4533; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v0 4534; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1 4535; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4536; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 1 4537; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v2 4538; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 4539; GFX6-NEXT: s_endpgm 4540; 4541; GFX8-LABEL: constant_sextload_v2i1_to_v2i64: 4542; GFX8: ; %bb.0: 4543; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4544; GFX8-NEXT: s_waitcnt lgkmcnt(0) 4545; GFX8-NEXT: v_mov_b32_e32 v0, s2 4546; GFX8-NEXT: v_mov_b32_e32 v1, s3 4547; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 4548; GFX8-NEXT: v_mov_b32_e32 v4, s0 4549; GFX8-NEXT: v_mov_b32_e32 v5, s1 4550; GFX8-NEXT: s_waitcnt vmcnt(0) 4551; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v0 4552; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 1 4553; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1 4554; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4555; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2 4556; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4557; GFX8-NEXT: s_endpgm 4558; 4559; EG-LABEL: constant_sextload_v2i1_to_v2i64: 4560; EG: ; %bb.0: 4561; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4562; EG-NEXT: TEX 0 @6 4563; EG-NEXT: ALU 6, @9, KC0[CB0:0-32], KC1[] 4564; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1 4565; EG-NEXT: CF_END 4566; EG-NEXT: PAD 4567; EG-NEXT: Fetch clause starting at 6: 4568; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 4569; EG-NEXT: ALU clause starting at 8: 4570; EG-NEXT: MOV * T0.X, KC0[2].Z, 4571; EG-NEXT: ALU clause starting at 9: 4572; EG-NEXT: BFE_INT T1.X, T0.X, 0.0, 1, 4573; EG-NEXT: LSHR * T0.W, T0.X, 1, 4574; EG-NEXT: BFE_INT * T1.Z, PV.W, 0.0, 1, 4575; EG-NEXT: MOV * T1.Y, T1.X, 4576; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, 4577; EG-NEXT: MOV * T1.W, T1.Z, 4578; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4579; 4580; GFX12-LABEL: constant_sextload_v2i1_to_v2i64: 4581; GFX12: ; %bb.0: 4582; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 4583; GFX12-NEXT: v_mov_b32_e32 v4, 0 4584; GFX12-NEXT: s_wait_kmcnt 0x0 4585; GFX12-NEXT: global_load_u8 v0, v4, s[2:3] 4586; GFX12-NEXT: s_wait_loadcnt 0x0 4587; GFX12-NEXT: v_lshrrev_b32_e32 v1, 1, v0 4588; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 1 4589; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 4590; GFX12-NEXT: v_bfe_i32 v2, v1, 0, 1 4591; GFX12-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4592; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 4593; GFX12-NEXT: v_ashrrev_i32_e32 v3, 31, v2 4594; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1] 4595; GFX12-NEXT: s_endpgm 4596 %load = load <2 x i1>, ptr addrspace(4) %in 4597 %ext = sext <2 x i1> %load to <2 x i64> 4598 store <2 x i64> %ext, ptr addrspace(1) %out 4599 ret void 4600} 4601 4602define amdgpu_kernel void @constant_zextload_v3i1_to_v3i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 4603; GFX6-LABEL: constant_zextload_v3i1_to_v3i64: 4604; GFX6: ; %bb.0: 4605; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 4606; GFX6-NEXT: s_mov_b32 s7, 0xf000 4607; GFX6-NEXT: s_mov_b32 s6, -1 4608; GFX6-NEXT: s_mov_b32 s10, s6 4609; GFX6-NEXT: s_mov_b32 s11, s7 4610; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4611; GFX6-NEXT: s_mov_b32 s8, s2 4612; GFX6-NEXT: s_mov_b32 s9, s3 4613; GFX6-NEXT: buffer_load_ubyte v4, off, s[8:11], 0 4614; GFX6-NEXT: v_mov_b32_e32 v5, 0 4615; GFX6-NEXT: v_mov_b32_e32 v1, v5 4616; GFX6-NEXT: v_mov_b32_e32 v3, v5 4617; GFX6-NEXT: s_mov_b32 s4, s0 4618; GFX6-NEXT: s_mov_b32 s5, s1 4619; GFX6-NEXT: s_waitcnt vmcnt(0) 4620; GFX6-NEXT: v_and_b32_e32 v0, 1, v4 4621; GFX6-NEXT: v_bfe_u32 v2, v4, 1, 1 4622; GFX6-NEXT: v_lshrrev_b32_e32 v4, 2, v4 4623; GFX6-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0 offset:16 4624; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 4625; GFX6-NEXT: s_endpgm 4626; 4627; GFX8-LABEL: constant_zextload_v3i1_to_v3i64: 4628; GFX8: ; %bb.0: 4629; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4630; GFX8-NEXT: v_mov_b32_e32 v10, 2 4631; GFX8-NEXT: v_mov_b32_e32 v5, 0 4632; GFX8-NEXT: v_mov_b32_e32 v3, v5 4633; GFX8-NEXT: s_waitcnt lgkmcnt(0) 4634; GFX8-NEXT: v_mov_b32_e32 v0, s2 4635; GFX8-NEXT: v_mov_b32_e32 v1, s3 4636; GFX8-NEXT: flat_load_ubyte v4, v[0:1] 4637; GFX8-NEXT: s_add_u32 s2, s0, 16 4638; GFX8-NEXT: s_addc_u32 s3, s1, 0 4639; GFX8-NEXT: v_mov_b32_e32 v9, s3 4640; GFX8-NEXT: v_mov_b32_e32 v7, s1 4641; GFX8-NEXT: v_mov_b32_e32 v8, s2 4642; GFX8-NEXT: v_mov_b32_e32 v1, v5 4643; GFX8-NEXT: v_mov_b32_e32 v6, s0 4644; GFX8-NEXT: s_waitcnt vmcnt(0) 4645; GFX8-NEXT: v_and_b32_e32 v0, 1, v4 4646; GFX8-NEXT: v_bfe_u32 v2, v4, 1, 1 4647; GFX8-NEXT: v_lshrrev_b32_sdwa v4, v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 4648; GFX8-NEXT: flat_store_dwordx2 v[8:9], v[4:5] 4649; GFX8-NEXT: flat_store_dwordx4 v[6:7], v[0:3] 4650; GFX8-NEXT: s_endpgm 4651; 4652; EG-LABEL: constant_zextload_v3i1_to_v3i64: 4653; EG: ; %bb.0: 4654; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4655; EG-NEXT: TEX 0 @6 4656; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 4657; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T3.X, 0 4658; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 1 4659; EG-NEXT: CF_END 4660; EG-NEXT: Fetch clause starting at 6: 4661; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 4662; EG-NEXT: ALU clause starting at 8: 4663; EG-NEXT: MOV * T0.X, KC0[2].Z, 4664; EG-NEXT: ALU clause starting at 9: 4665; EG-NEXT: BFE_UINT * T1.Z, T0.X, 1, 1, 4666; EG-NEXT: AND_INT T1.X, T0.X, 1, 4667; EG-NEXT: MOV T1.Y, 0.0, 4668; EG-NEXT: LSHR * T0.X, T0.X, literal.x, 4669; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4670; EG-NEXT: MOV T0.Y, 0.0, 4671; EG-NEXT: MOV * T1.W, 0.0, 4672; EG-NEXT: LSHR T2.X, KC0[2].Y, literal.x, 4673; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4674; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4675; EG-NEXT: LSHR * T3.X, PV.W, literal.x, 4676; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4677; 4678; GFX12-LABEL: constant_zextload_v3i1_to_v3i64: 4679; GFX12: ; %bb.0: 4680; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 4681; GFX12-NEXT: v_mov_b32_e32 v5, 0 4682; GFX12-NEXT: s_wait_kmcnt 0x0 4683; GFX12-NEXT: global_load_u8 v0, v5, s[2:3] 4684; GFX12-NEXT: s_wait_loadcnt 0x0 4685; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v0 4686; GFX12-NEXT: v_bfe_u32 v2, v0, 1, 1 4687; GFX12-NEXT: v_and_b32_e32 v0, 1, v0 4688; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) 4689; GFX12-NEXT: v_lshrrev_b32_e32 v4, 2, v1 4690; GFX12-NEXT: v_mov_b32_e32 v3, v5 4691; GFX12-NEXT: v_dual_mov_b32 v1, v5 :: v_dual_and_b32 v2, 0xffff, v2 4692; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 4693; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0 4694; GFX12-NEXT: v_and_b32_e32 v4, 0xffff, v4 4695; GFX12-NEXT: s_clause 0x1 4696; GFX12-NEXT: global_store_b64 v5, v[4:5], s[0:1] offset:16 4697; GFX12-NEXT: global_store_b128 v5, v[0:3], s[0:1] 4698; GFX12-NEXT: s_endpgm 4699 %load = load <3 x i1>, ptr addrspace(4) %in 4700 %ext = zext <3 x i1> %load to <3 x i64> 4701 store <3 x i64> %ext, ptr addrspace(1) %out 4702 ret void 4703} 4704 4705define amdgpu_kernel void @constant_sextload_v3i1_to_v3i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 4706; GFX6-LABEL: constant_sextload_v3i1_to_v3i64: 4707; GFX6: ; %bb.0: 4708; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 4709; GFX6-NEXT: s_mov_b32 s7, 0xf000 4710; GFX6-NEXT: s_mov_b32 s6, -1 4711; GFX6-NEXT: s_mov_b32 s10, s6 4712; GFX6-NEXT: s_mov_b32 s11, s7 4713; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4714; GFX6-NEXT: s_mov_b32 s8, s2 4715; GFX6-NEXT: s_mov_b32 s9, s3 4716; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 4717; GFX6-NEXT: s_mov_b32 s4, s0 4718; GFX6-NEXT: s_mov_b32 s5, s1 4719; GFX6-NEXT: s_waitcnt vmcnt(0) 4720; GFX6-NEXT: v_lshrrev_b32_e32 v3, 2, v0 4721; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v0 4722; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1 4723; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4724; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 1 4725; GFX6-NEXT: v_bfe_i32 v4, v3, 0, 1 4726; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v2 4727; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v4 4728; GFX6-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0 offset:16 4729; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 4730; GFX6-NEXT: s_endpgm 4731; 4732; GFX8-LABEL: constant_sextload_v3i1_to_v3i64: 4733; GFX8: ; %bb.0: 4734; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4735; GFX8-NEXT: s_waitcnt lgkmcnt(0) 4736; GFX8-NEXT: v_mov_b32_e32 v0, s2 4737; GFX8-NEXT: v_mov_b32_e32 v1, s3 4738; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 4739; GFX8-NEXT: s_add_u32 s2, s0, 16 4740; GFX8-NEXT: s_addc_u32 s3, s1, 0 4741; GFX8-NEXT: v_mov_b32_e32 v7, s3 4742; GFX8-NEXT: v_mov_b32_e32 v5, s1 4743; GFX8-NEXT: v_mov_b32_e32 v6, s2 4744; GFX8-NEXT: v_mov_b32_e32 v4, s0 4745; GFX8-NEXT: s_waitcnt vmcnt(0) 4746; GFX8-NEXT: v_lshrrev_b32_e32 v3, 2, v0 4747; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v0 4748; GFX8-NEXT: v_bfe_i32 v8, v3, 0, 1 4749; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 1 4750; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1 4751; GFX8-NEXT: v_ashrrev_i32_e32 v9, 31, v8 4752; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4753; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2 4754; GFX8-NEXT: flat_store_dwordx2 v[6:7], v[8:9] 4755; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4756; GFX8-NEXT: s_endpgm 4757; 4758; EG-LABEL: constant_sextload_v3i1_to_v3i64: 4759; EG: ; %bb.0: 4760; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4761; EG-NEXT: TEX 0 @6 4762; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[] 4763; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T3.X, 0 4764; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 1 4765; EG-NEXT: CF_END 4766; EG-NEXT: Fetch clause starting at 6: 4767; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 4768; EG-NEXT: ALU clause starting at 8: 4769; EG-NEXT: MOV * T0.X, KC0[2].Z, 4770; EG-NEXT: ALU clause starting at 9: 4771; EG-NEXT: BFE_INT T1.X, T0.X, 0.0, 1, 4772; EG-NEXT: LSHR T0.W, T0.X, 1, 4773; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, 4774; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4775; EG-NEXT: BFE_INT T1.Z, PV.W, 0.0, 1, 4776; EG-NEXT: LSHR * T0.W, T0.X, literal.x, 4777; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4778; EG-NEXT: BFE_INT T0.X, PV.W, 0.0, 1, 4779; EG-NEXT: MOV T1.Y, T1.X, 4780; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 4781; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4782; EG-NEXT: LSHR T3.X, PV.W, literal.x, 4783; EG-NEXT: MOV T0.Y, PV.X, 4784; EG-NEXT: MOV * T1.W, T1.Z, 4785; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4786; 4787; GFX12-LABEL: constant_sextload_v3i1_to_v3i64: 4788; GFX12: ; %bb.0: 4789; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 4790; GFX12-NEXT: v_mov_b32_e32 v6, 0 4791; GFX12-NEXT: s_wait_kmcnt 0x0 4792; GFX12-NEXT: global_load_u8 v0, v6, s[2:3] 4793; GFX12-NEXT: s_wait_loadcnt 0x0 4794; GFX12-NEXT: v_lshrrev_b32_e32 v1, 2, v0 4795; GFX12-NEXT: v_lshrrev_b32_e32 v2, 1, v0 4796; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 1 4797; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 4798; GFX12-NEXT: v_bfe_i32 v4, v1, 0, 1 4799; GFX12-NEXT: v_bfe_i32 v2, v2, 0, 1 4800; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 4801; GFX12-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4802; GFX12-NEXT: v_ashrrev_i32_e32 v5, 31, v4 4803; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) 4804; GFX12-NEXT: v_ashrrev_i32_e32 v3, 31, v2 4805; GFX12-NEXT: s_clause 0x1 4806; GFX12-NEXT: global_store_b64 v6, v[4:5], s[0:1] offset:16 4807; GFX12-NEXT: global_store_b128 v6, v[0:3], s[0:1] 4808; GFX12-NEXT: s_endpgm 4809 %load = load <3 x i1>, ptr addrspace(4) %in 4810 %ext = sext <3 x i1> %load to <3 x i64> 4811 store <3 x i64> %ext, ptr addrspace(1) %out 4812 ret void 4813} 4814 4815define amdgpu_kernel void @constant_zextload_v4i1_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 4816; GFX6-LABEL: constant_zextload_v4i1_to_v4i64: 4817; GFX6: ; %bb.0: 4818; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 4819; GFX6-NEXT: s_mov_b32 s7, 0xf000 4820; GFX6-NEXT: s_mov_b32 s6, -1 4821; GFX6-NEXT: s_mov_b32 s10, s6 4822; GFX6-NEXT: s_mov_b32 s11, s7 4823; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4824; GFX6-NEXT: s_mov_b32 s8, s2 4825; GFX6-NEXT: s_mov_b32 s9, s3 4826; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 4827; GFX6-NEXT: v_mov_b32_e32 v1, 0 4828; GFX6-NEXT: v_mov_b32_e32 v3, v1 4829; GFX6-NEXT: v_mov_b32_e32 v5, v1 4830; GFX6-NEXT: v_mov_b32_e32 v7, v1 4831; GFX6-NEXT: s_mov_b32 s4, s0 4832; GFX6-NEXT: s_mov_b32 s5, s1 4833; GFX6-NEXT: s_waitcnt vmcnt(0) 4834; GFX6-NEXT: v_and_b32_e32 v4, 1, v0 4835; GFX6-NEXT: v_bfe_u32 v6, v0, 1, 1 4836; GFX6-NEXT: v_lshrrev_b32_e32 v2, 3, v0 4837; GFX6-NEXT: v_bfe_u32 v0, v0, 2, 1 4838; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16 4839; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 4840; GFX6-NEXT: s_endpgm 4841; 4842; GFX8-LABEL: constant_zextload_v4i1_to_v4i64: 4843; GFX8: ; %bb.0: 4844; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4845; GFX8-NEXT: v_mov_b32_e32 v2, 3 4846; GFX8-NEXT: s_waitcnt lgkmcnt(0) 4847; GFX8-NEXT: v_mov_b32_e32 v0, s2 4848; GFX8-NEXT: v_mov_b32_e32 v1, s3 4849; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 4850; GFX8-NEXT: s_add_u32 s2, s0, 16 4851; GFX8-NEXT: s_addc_u32 s3, s1, 0 4852; GFX8-NEXT: v_mov_b32_e32 v1, 0 4853; GFX8-NEXT: v_mov_b32_e32 v11, s3 4854; GFX8-NEXT: v_mov_b32_e32 v3, v1 4855; GFX8-NEXT: v_mov_b32_e32 v9, s1 4856; GFX8-NEXT: v_mov_b32_e32 v10, s2 4857; GFX8-NEXT: v_mov_b32_e32 v5, v1 4858; GFX8-NEXT: v_mov_b32_e32 v7, v1 4859; GFX8-NEXT: v_mov_b32_e32 v8, s0 4860; GFX8-NEXT: s_waitcnt vmcnt(0) 4861; GFX8-NEXT: v_and_b32_e32 v4, 1, v0 4862; GFX8-NEXT: v_bfe_u32 v6, v0, 1, 1 4863; GFX8-NEXT: v_lshrrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 4864; GFX8-NEXT: v_bfe_u32 v0, v0, 2, 1 4865; GFX8-NEXT: flat_store_dwordx4 v[10:11], v[0:3] 4866; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 4867; GFX8-NEXT: s_endpgm 4868; 4869; EG-LABEL: constant_zextload_v4i1_to_v4i64: 4870; EG: ; %bb.0: 4871; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4872; EG-NEXT: TEX 0 @6 4873; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[] 4874; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T3.X, 0 4875; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1 4876; EG-NEXT: CF_END 4877; EG-NEXT: Fetch clause starting at 6: 4878; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 4879; EG-NEXT: ALU clause starting at 8: 4880; EG-NEXT: MOV * T0.X, KC0[2].Z, 4881; EG-NEXT: ALU clause starting at 9: 4882; EG-NEXT: BFE_UINT * T1.Z, T0.X, literal.x, 1, 4883; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 4884; EG-NEXT: BFE_UINT T1.X, T0.X, literal.x, 1, 4885; EG-NEXT: MOV T1.Y, 0.0, 4886; EG-NEXT: BFE_UINT T0.Z, T0.X, 1, 1, 4887; EG-NEXT: AND_INT * T0.X, T0.X, 1, 4888; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4889; EG-NEXT: MOV T0.Y, 0.0, 4890; EG-NEXT: MOV T1.W, 0.0, 4891; EG-NEXT: MOV * T0.W, 0.0, 4892; EG-NEXT: LSHR T2.X, KC0[2].Y, literal.x, 4893; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, 4894; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4895; EG-NEXT: LSHR * T3.X, PV.W, literal.x, 4896; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4897; 4898; GFX12-LABEL: constant_zextload_v4i1_to_v4i64: 4899; GFX12: ; %bb.0: 4900; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 4901; GFX12-NEXT: v_mov_b32_e32 v1, 0 4902; GFX12-NEXT: s_wait_kmcnt 0x0 4903; GFX12-NEXT: global_load_u8 v0, v1, s[2:3] 4904; GFX12-NEXT: s_wait_loadcnt 0x0 4905; GFX12-NEXT: v_readfirstlane_b32 s2, v0 4906; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0 4907; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 4908; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10002 4909; GFX12-NEXT: v_lshrrev_b32_e32 v2, 3, v0 4910; GFX12-NEXT: s_and_b32 s3, 0xffff, s3 4911; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 4912; GFX12-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v0, s3 4913; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10001 4914; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v2 4915; GFX12-NEXT: s_and_b32 s2, s2, 1 4916; GFX12-NEXT: s_wait_alu 0xfffe 4917; GFX12-NEXT: s_and_b32 s3, 0xffff, s3 4918; GFX12-NEXT: s_and_b32 s2, 0xffff, s2 4919; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16 4920; GFX12-NEXT: s_wait_alu 0xfffe 4921; GFX12-NEXT: v_mov_b32_e32 v0, s2 4922; GFX12-NEXT: v_mov_b32_e32 v2, s3 4923; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] 4924; GFX12-NEXT: s_endpgm 4925 %load = load <4 x i1>, ptr addrspace(4) %in 4926 %ext = zext <4 x i1> %load to <4 x i64> 4927 store <4 x i64> %ext, ptr addrspace(1) %out 4928 ret void 4929} 4930 4931define amdgpu_kernel void @constant_sextload_v4i1_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 4932; GFX6-LABEL: constant_sextload_v4i1_to_v4i64: 4933; GFX6: ; %bb.0: 4934; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 4935; GFX6-NEXT: s_mov_b32 s7, 0xf000 4936; GFX6-NEXT: s_mov_b32 s6, -1 4937; GFX6-NEXT: s_mov_b32 s10, s6 4938; GFX6-NEXT: s_mov_b32 s11, s7 4939; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4940; GFX6-NEXT: s_mov_b32 s8, s2 4941; GFX6-NEXT: s_mov_b32 s9, s3 4942; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 4943; GFX6-NEXT: s_mov_b32 s4, s0 4944; GFX6-NEXT: s_mov_b32 s5, s1 4945; GFX6-NEXT: s_waitcnt vmcnt(0) 4946; GFX6-NEXT: v_lshrrev_b32_e32 v3, 2, v0 4947; GFX6-NEXT: v_lshrrev_b32_e32 v4, 3, v0 4948; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v0 4949; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1 4950; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4951; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 1 4952; GFX6-NEXT: v_bfe_i32 v6, v4, 0, 1 4953; GFX6-NEXT: v_bfe_i32 v4, v3, 0, 1 4954; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v2 4955; GFX6-NEXT: v_ashrrev_i32_e32 v7, 31, v6 4956; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v4 4957; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 4958; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 4959; GFX6-NEXT: s_endpgm 4960; 4961; GFX8-LABEL: constant_sextload_v4i1_to_v4i64: 4962; GFX8: ; %bb.0: 4963; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4964; GFX8-NEXT: s_waitcnt lgkmcnt(0) 4965; GFX8-NEXT: v_mov_b32_e32 v0, s2 4966; GFX8-NEXT: v_mov_b32_e32 v1, s3 4967; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 4968; GFX8-NEXT: s_add_u32 s2, s0, 16 4969; GFX8-NEXT: s_addc_u32 s3, s1, 0 4970; GFX8-NEXT: v_mov_b32_e32 v11, s3 4971; GFX8-NEXT: v_mov_b32_e32 v9, s1 4972; GFX8-NEXT: v_mov_b32_e32 v10, s2 4973; GFX8-NEXT: v_mov_b32_e32 v8, s0 4974; GFX8-NEXT: s_waitcnt vmcnt(0) 4975; GFX8-NEXT: v_lshrrev_b32_e32 v3, 2, v0 4976; GFX8-NEXT: v_lshrrev_b32_e32 v4, 3, v0 4977; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v0 4978; GFX8-NEXT: v_bfe_i32 v6, v4, 0, 1 4979; GFX8-NEXT: v_bfe_i32 v4, v3, 0, 1 4980; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 1 4981; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1 4982; GFX8-NEXT: v_ashrrev_i32_e32 v7, 31, v6 4983; GFX8-NEXT: v_ashrrev_i32_e32 v5, 31, v4 4984; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4985; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2 4986; GFX8-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 4987; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 4988; GFX8-NEXT: s_endpgm 4989; 4990; EG-LABEL: constant_sextload_v4i1_to_v4i64: 4991; EG: ; %bb.0: 4992; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4993; EG-NEXT: TEX 0 @6 4994; EG-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[] 4995; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T3.X, 0 4996; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1 4997; EG-NEXT: CF_END 4998; EG-NEXT: Fetch clause starting at 6: 4999; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 5000; EG-NEXT: ALU clause starting at 8: 5001; EG-NEXT: MOV * T0.X, KC0[2].Z, 5002; EG-NEXT: ALU clause starting at 9: 5003; EG-NEXT: LSHR * T0.W, T0.X, literal.x, 5004; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 5005; EG-NEXT: BFE_INT T1.X, T0.X, 0.0, 1, 5006; EG-NEXT: BFE_INT T2.Z, PV.W, 0.0, 1, 5007; EG-NEXT: LSHR * T0.W, T0.X, literal.x, 5008; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5009; EG-NEXT: BFE_INT T2.X, PV.W, 0.0, 1, 5010; EG-NEXT: LSHR * T0.W, T0.X, 1, 5011; EG-NEXT: MOV T2.Y, PV.X, 5012; EG-NEXT: BFE_INT * T1.Z, PV.W, 0.0, 1, 5013; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, 5014; EG-NEXT: MOV T1.Y, T1.X, 5015; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5016; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5017; EG-NEXT: LSHR T3.X, PV.W, literal.x, 5018; EG-NEXT: MOV T1.W, T1.Z, 5019; EG-NEXT: MOV * T2.W, T2.Z, 5020; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5021; 5022; GFX12-LABEL: constant_sextload_v4i1_to_v4i64: 5023; GFX12: ; %bb.0: 5024; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 5025; GFX12-NEXT: v_mov_b32_e32 v8, 0 5026; GFX12-NEXT: s_wait_kmcnt 0x0 5027; GFX12-NEXT: global_load_u8 v0, v8, s[2:3] 5028; GFX12-NEXT: s_wait_loadcnt 0x0 5029; GFX12-NEXT: v_lshrrev_b32_e32 v1, 3, v0 5030; GFX12-NEXT: v_lshrrev_b32_e32 v2, 2, v0 5031; GFX12-NEXT: v_lshrrev_b32_e32 v3, 1, v0 5032; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 1 5033; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 5034; GFX12-NEXT: v_bfe_i32 v6, v1, 0, 1 5035; GFX12-NEXT: v_bfe_i32 v4, v2, 0, 1 5036; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 5037; GFX12-NEXT: v_bfe_i32 v2, v3, 0, 1 5038; GFX12-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5039; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 5040; GFX12-NEXT: v_ashrrev_i32_e32 v7, 31, v6 5041; GFX12-NEXT: v_ashrrev_i32_e32 v5, 31, v4 5042; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) 5043; GFX12-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5044; GFX12-NEXT: s_clause 0x1 5045; GFX12-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:16 5046; GFX12-NEXT: global_store_b128 v8, v[0:3], s[0:1] 5047; GFX12-NEXT: s_endpgm 5048 %load = load <4 x i1>, ptr addrspace(4) %in 5049 %ext = sext <4 x i1> %load to <4 x i64> 5050 store <4 x i64> %ext, ptr addrspace(1) %out 5051 ret void 5052} 5053 5054define amdgpu_kernel void @constant_zextload_v8i1_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 5055; GFX6-LABEL: constant_zextload_v8i1_to_v8i64: 5056; GFX6: ; %bb.0: 5057; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 5058; GFX6-NEXT: s_mov_b32 s3, 0xf000 5059; GFX6-NEXT: s_mov_b32 s2, -1 5060; GFX6-NEXT: s_mov_b32 s10, s2 5061; GFX6-NEXT: s_mov_b32 s11, s3 5062; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5063; GFX6-NEXT: s_mov_b32 s8, s6 5064; GFX6-NEXT: s_mov_b32 s9, s7 5065; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 5066; GFX6-NEXT: v_mov_b32_e32 v1, 0 5067; GFX6-NEXT: v_mov_b32_e32 v3, v1 5068; GFX6-NEXT: v_mov_b32_e32 v5, v1 5069; GFX6-NEXT: v_mov_b32_e32 v7, v1 5070; GFX6-NEXT: v_mov_b32_e32 v9, v1 5071; GFX6-NEXT: v_mov_b32_e32 v11, v1 5072; GFX6-NEXT: v_mov_b32_e32 v13, v1 5073; GFX6-NEXT: v_mov_b32_e32 v15, v1 5074; GFX6-NEXT: s_mov_b32 s0, s4 5075; GFX6-NEXT: s_mov_b32 s1, s5 5076; GFX6-NEXT: s_waitcnt vmcnt(0) 5077; GFX6-NEXT: v_bfe_u32 v14, v0, 1, 1 5078; GFX6-NEXT: v_bfe_u32 v10, v0, 3, 1 5079; GFX6-NEXT: v_bfe_u32 v6, v0, 5, 1 5080; GFX6-NEXT: v_lshrrev_b32_e32 v2, 7, v0 5081; GFX6-NEXT: v_and_b32_e32 v12, 1, v0 5082; GFX6-NEXT: v_bfe_u32 v8, v0, 2, 1 5083; GFX6-NEXT: v_bfe_u32 v4, v0, 4, 1 5084; GFX6-NEXT: v_bfe_u32 v0, v0, 6, 1 5085; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5086; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32 5087; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16 5088; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 5089; GFX6-NEXT: s_endpgm 5090; 5091; GFX8-LABEL: constant_zextload_v8i1_to_v8i64: 5092; GFX8: ; %bb.0: 5093; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5094; GFX8-NEXT: s_waitcnt lgkmcnt(0) 5095; GFX8-NEXT: v_mov_b32_e32 v0, s2 5096; GFX8-NEXT: v_mov_b32_e32 v1, s3 5097; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 5098; GFX8-NEXT: s_add_u32 s2, s0, 48 5099; GFX8-NEXT: s_addc_u32 s3, s1, 0 5100; GFX8-NEXT: s_add_u32 s4, s0, 32 5101; GFX8-NEXT: s_addc_u32 s5, s1, 0 5102; GFX8-NEXT: v_mov_b32_e32 v1, 0 5103; GFX8-NEXT: v_mov_b32_e32 v16, s5 5104; GFX8-NEXT: v_mov_b32_e32 v5, v1 5105; GFX8-NEXT: v_mov_b32_e32 v7, v1 5106; GFX8-NEXT: v_mov_b32_e32 v15, s4 5107; GFX8-NEXT: v_mov_b32_e32 v8, v1 5108; GFX8-NEXT: v_mov_b32_e32 v10, v1 5109; GFX8-NEXT: v_mov_b32_e32 v3, v1 5110; GFX8-NEXT: v_mov_b32_e32 v12, v1 5111; GFX8-NEXT: v_mov_b32_e32 v14, v1 5112; GFX8-NEXT: s_waitcnt vmcnt(0) 5113; GFX8-NEXT: v_bfe_u32 v6, v0, 5, 1 5114; GFX8-NEXT: v_bfe_u32 v4, v0, 4, 1 5115; GFX8-NEXT: flat_store_dwordx4 v[15:16], v[4:7] 5116; GFX8-NEXT: v_mov_b32_e32 v16, s3 5117; GFX8-NEXT: v_mov_b32_e32 v5, s1 5118; GFX8-NEXT: v_mov_b32_e32 v4, s0 5119; GFX8-NEXT: s_add_u32 s0, s0, 16 5120; GFX8-NEXT: s_addc_u32 s1, s1, 0 5121; GFX8-NEXT: v_mov_b32_e32 v18, s1 5122; GFX8-NEXT: v_mov_b32_e32 v17, s0 5123; GFX8-NEXT: v_and_b32_e32 v6, 0xffff, v0 5124; GFX8-NEXT: v_bfe_u32 v9, v0, 3, 1 5125; GFX8-NEXT: v_bfe_u32 v7, v0, 2, 1 5126; GFX8-NEXT: v_mov_b32_e32 v15, s2 5127; GFX8-NEXT: v_bfe_u32 v13, v0, 1, 1 5128; GFX8-NEXT: v_and_b32_e32 v11, 1, v0 5129; GFX8-NEXT: v_lshrrev_b32_e32 v2, 7, v6 5130; GFX8-NEXT: v_bfe_u32 v0, v6, 6, 1 5131; GFX8-NEXT: flat_store_dwordx4 v[17:18], v[7:10] 5132; GFX8-NEXT: flat_store_dwordx4 v[15:16], v[0:3] 5133; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[11:14] 5134; GFX8-NEXT: s_endpgm 5135; 5136; EG-LABEL: constant_zextload_v8i1_to_v8i64: 5137; EG: ; %bb.0: 5138; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 5139; EG-NEXT: TEX 0 @8 5140; EG-NEXT: ALU 30, @11, KC0[CB0:0-32], KC1[] 5141; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T12.X, 0 5142; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T11.X, 0 5143; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T10.X, 0 5144; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T9.X, 1 5145; EG-NEXT: CF_END 5146; EG-NEXT: Fetch clause starting at 8: 5147; EG-NEXT: VTX_READ_8 T5.X, T5.X, 0, #1 5148; EG-NEXT: ALU clause starting at 10: 5149; EG-NEXT: MOV * T5.X, KC0[2].Z, 5150; EG-NEXT: ALU clause starting at 11: 5151; EG-NEXT: BFE_UINT * T6.Z, T5.X, literal.x, 1, 5152; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00) 5153; EG-NEXT: BFE_UINT T6.X, T5.X, literal.x, 1, 5154; EG-NEXT: MOV T6.Y, 0.0, 5155; EG-NEXT: BFE_UINT * T7.Z, T5.X, literal.y, 1, 5156; EG-NEXT: 6(8.407791e-45), 5(7.006492e-45) 5157; EG-NEXT: BFE_UINT T7.X, T5.X, literal.x, 1, 5158; EG-NEXT: MOV T7.Y, 0.0, 5159; EG-NEXT: BFE_UINT * T8.Z, T5.X, literal.y, 1, 5160; EG-NEXT: 4(5.605194e-45), 3(4.203895e-45) 5161; EG-NEXT: BFE_UINT T8.X, T5.X, literal.x, 1, 5162; EG-NEXT: MOV T8.Y, 0.0, 5163; EG-NEXT: BFE_UINT T5.Z, T5.X, 1, 1, 5164; EG-NEXT: AND_INT * T5.X, T5.X, 1, 5165; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5166; EG-NEXT: MOV T5.Y, 0.0, 5167; EG-NEXT: MOV T6.W, 0.0, 5168; EG-NEXT: MOV * T7.W, 0.0, 5169; EG-NEXT: MOV T8.W, 0.0, 5170; EG-NEXT: MOV * T5.W, 0.0, 5171; EG-NEXT: LSHR T9.X, KC0[2].Y, literal.x, 5172; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5173; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5174; EG-NEXT: LSHR T10.X, PV.W, literal.x, 5175; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5176; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 5177; EG-NEXT: LSHR T11.X, PV.W, literal.x, 5178; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5179; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 5180; EG-NEXT: LSHR * T12.X, PV.W, literal.x, 5181; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5182; 5183; GFX12-LABEL: constant_zextload_v8i1_to_v8i64: 5184; GFX12: ; %bb.0: 5185; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 5186; GFX12-NEXT: v_mov_b32_e32 v1, 0 5187; GFX12-NEXT: s_wait_kmcnt 0x0 5188; GFX12-NEXT: global_load_u8 v12, v1, s[2:3] 5189; GFX12-NEXT: s_wait_loadcnt 0x0 5190; GFX12-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v0, 0xffff, v12 5191; GFX12-NEXT: v_mov_b32_e32 v5, v1 5192; GFX12-NEXT: v_mov_b32_e32 v7, v1 5193; GFX12-NEXT: v_bfe_u32 v6, v12, 5, 1 5194; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) 5195; GFX12-NEXT: v_lshrrev_b32_e32 v2, 7, v0 5196; GFX12-NEXT: v_bfe_u32 v0, v0, 6, 1 5197; GFX12-NEXT: v_bfe_u32 v4, v12, 4, 1 5198; GFX12-NEXT: v_mov_b32_e32 v9, v1 5199; GFX12-NEXT: v_mov_b32_e32 v11, v1 5200; GFX12-NEXT: v_bfe_u32 v10, v12, 3, 1 5201; GFX12-NEXT: v_bfe_u32 v8, v12, 2, 1 5202; GFX12-NEXT: v_mov_b32_e32 v13, v1 5203; GFX12-NEXT: v_mov_b32_e32 v15, v1 5204; GFX12-NEXT: v_bfe_u32 v14, v12, 1, 1 5205; GFX12-NEXT: v_and_b32_e32 v12, 1, v12 5206; GFX12-NEXT: s_clause 0x3 5207; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:48 5208; GFX12-NEXT: global_store_b128 v1, v[4:7], s[0:1] offset:32 5209; GFX12-NEXT: global_store_b128 v1, v[8:11], s[0:1] offset:16 5210; GFX12-NEXT: global_store_b128 v1, v[12:15], s[0:1] 5211; GFX12-NEXT: s_endpgm 5212 %load = load <8 x i1>, ptr addrspace(4) %in 5213 %ext = zext <8 x i1> %load to <8 x i64> 5214 store <8 x i64> %ext, ptr addrspace(1) %out 5215 ret void 5216} 5217 5218define amdgpu_kernel void @constant_sextload_v8i1_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 5219; GFX6-LABEL: constant_sextload_v8i1_to_v8i64: 5220; GFX6: ; %bb.0: 5221; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 5222; GFX6-NEXT: s_mov_b32 s3, 0xf000 5223; GFX6-NEXT: s_mov_b32 s2, -1 5224; GFX6-NEXT: s_mov_b32 s10, s2 5225; GFX6-NEXT: s_mov_b32 s11, s3 5226; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5227; GFX6-NEXT: s_mov_b32 s8, s6 5228; GFX6-NEXT: s_mov_b32 s9, s7 5229; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 5230; GFX6-NEXT: s_mov_b32 s0, s4 5231; GFX6-NEXT: s_mov_b32 s1, s5 5232; GFX6-NEXT: s_waitcnt vmcnt(0) 5233; GFX6-NEXT: v_lshrrev_b32_e32 v3, 6, v0 5234; GFX6-NEXT: v_lshrrev_b32_e32 v5, 7, v0 5235; GFX6-NEXT: v_lshrrev_b32_e32 v7, 4, v0 5236; GFX6-NEXT: v_lshrrev_b32_e32 v8, 5, v0 5237; GFX6-NEXT: v_lshrrev_b32_e32 v4, 2, v0 5238; GFX6-NEXT: v_lshrrev_b32_e32 v6, 3, v0 5239; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v0 5240; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1 5241; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5242; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 1 5243; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 1 5244; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 1 5245; GFX6-NEXT: v_bfe_i32 v10, v8, 0, 1 5246; GFX6-NEXT: v_bfe_i32 v8, v7, 0, 1 5247; GFX6-NEXT: v_bfe_i32 v14, v5, 0, 1 5248; GFX6-NEXT: v_bfe_i32 v12, v3, 0, 1 5249; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5250; GFX6-NEXT: v_ashrrev_i32_e32 v7, 31, v6 5251; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v4 5252; GFX6-NEXT: v_ashrrev_i32_e32 v11, 31, v10 5253; GFX6-NEXT: v_ashrrev_i32_e32 v9, 31, v8 5254; GFX6-NEXT: v_ashrrev_i32_e32 v15, 31, v14 5255; GFX6-NEXT: v_ashrrev_i32_e32 v13, 31, v12 5256; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48 5257; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32 5258; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 5259; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5260; GFX6-NEXT: s_endpgm 5261; 5262; GFX8-LABEL: constant_sextload_v8i1_to_v8i64: 5263; GFX8: ; %bb.0: 5264; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5265; GFX8-NEXT: s_waitcnt lgkmcnt(0) 5266; GFX8-NEXT: v_mov_b32_e32 v0, s2 5267; GFX8-NEXT: v_mov_b32_e32 v1, s3 5268; GFX8-NEXT: flat_load_ubyte v0, v[0:1] 5269; GFX8-NEXT: v_mov_b32_e32 v17, s1 5270; GFX8-NEXT: v_mov_b32_e32 v16, s0 5271; GFX8-NEXT: s_waitcnt vmcnt(0) 5272; GFX8-NEXT: v_readfirstlane_b32 s3, v0 5273; GFX8-NEXT: s_lshr_b32 s2, s3, 6 5274; GFX8-NEXT: s_lshr_b32 s4, s3, 7 5275; GFX8-NEXT: s_lshr_b32 s6, s3, 4 5276; GFX8-NEXT: s_lshr_b32 s8, s3, 5 5277; GFX8-NEXT: s_lshr_b32 s10, s3, 2 5278; GFX8-NEXT: s_lshr_b32 s12, s3, 3 5279; GFX8-NEXT: s_lshr_b32 s14, s3, 1 5280; GFX8-NEXT: v_mov_b32_e32 v0, s3 5281; GFX8-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000 5282; GFX8-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000 5283; GFX8-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000 5284; GFX8-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000 5285; GFX8-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000 5286; GFX8-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000 5287; GFX8-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000 5288; GFX8-NEXT: v_mov_b32_e32 v4, s2 5289; GFX8-NEXT: s_add_u32 s2, s0, 48 5290; GFX8-NEXT: v_mov_b32_e32 v5, s3 5291; GFX8-NEXT: s_addc_u32 s3, s1, 0 5292; GFX8-NEXT: v_mov_b32_e32 v19, s3 5293; GFX8-NEXT: v_mov_b32_e32 v18, s2 5294; GFX8-NEXT: s_add_u32 s2, s0, 32 5295; GFX8-NEXT: v_mov_b32_e32 v6, s4 5296; GFX8-NEXT: v_mov_b32_e32 v7, s5 5297; GFX8-NEXT: s_addc_u32 s3, s1, 0 5298; GFX8-NEXT: flat_store_dwordx4 v[18:19], v[4:7] 5299; GFX8-NEXT: s_add_u32 s0, s0, 16 5300; GFX8-NEXT: v_mov_b32_e32 v5, s3 5301; GFX8-NEXT: v_mov_b32_e32 v8, s6 5302; GFX8-NEXT: v_mov_b32_e32 v9, s7 5303; GFX8-NEXT: v_mov_b32_e32 v10, s8 5304; GFX8-NEXT: v_mov_b32_e32 v11, s9 5305; GFX8-NEXT: v_mov_b32_e32 v4, s2 5306; GFX8-NEXT: s_addc_u32 s1, s1, 0 5307; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[8:11] 5308; GFX8-NEXT: v_mov_b32_e32 v5, s1 5309; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 1 5310; GFX8-NEXT: v_mov_b32_e32 v12, s10 5311; GFX8-NEXT: v_mov_b32_e32 v13, s11 5312; GFX8-NEXT: v_mov_b32_e32 v14, s12 5313; GFX8-NEXT: v_mov_b32_e32 v15, s13 5314; GFX8-NEXT: v_mov_b32_e32 v4, s0 5315; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5316; GFX8-NEXT: v_mov_b32_e32 v2, s14 5317; GFX8-NEXT: v_mov_b32_e32 v3, s15 5318; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[12:15] 5319; GFX8-NEXT: flat_store_dwordx4 v[16:17], v[0:3] 5320; GFX8-NEXT: s_endpgm 5321; 5322; EG-LABEL: constant_sextload_v8i1_to_v8i64: 5323; EG: ; %bb.0: 5324; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 5325; EG-NEXT: TEX 0 @8 5326; EG-NEXT: ALU 37, @11, KC0[CB0:0-32], KC1[] 5327; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T12.X, 0 5328; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T11.X, 0 5329; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0 5330; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T6.X, 1 5331; EG-NEXT: CF_END 5332; EG-NEXT: Fetch clause starting at 8: 5333; EG-NEXT: VTX_READ_8 T5.X, T5.X, 0, #1 5334; EG-NEXT: ALU clause starting at 10: 5335; EG-NEXT: MOV * T5.X, KC0[2].Z, 5336; EG-NEXT: ALU clause starting at 11: 5337; EG-NEXT: LSHR T6.X, KC0[2].Y, literal.x, 5338; EG-NEXT: LSHR * T0.W, T5.X, literal.y, 5339; EG-NEXT: 2(2.802597e-45), 7(9.809089e-45) 5340; EG-NEXT: BFE_INT T7.X, T5.X, 0.0, 1, 5341; EG-NEXT: BFE_INT T8.Z, PV.W, 0.0, 1, 5342; EG-NEXT: LSHR T0.W, T5.X, literal.x, 5343; EG-NEXT: LSHR * T1.W, T5.X, literal.y, 5344; EG-NEXT: 3(4.203895e-45), 6(8.407791e-45) 5345; EG-NEXT: BFE_INT T8.X, PS, 0.0, 1, 5346; EG-NEXT: BFE_INT T9.Z, PV.W, 0.0, 1, 5347; EG-NEXT: LSHR T0.W, T5.X, 1, 5348; EG-NEXT: LSHR * T1.W, T5.X, literal.x, 5349; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5350; EG-NEXT: BFE_INT T9.X, PS, 0.0, 1, 5351; EG-NEXT: MOV T8.Y, PV.X, 5352; EG-NEXT: BFE_INT T7.Z, PV.W, 0.0, 1, 5353; EG-NEXT: LSHR T0.W, T5.X, literal.x, 5354; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 5355; EG-NEXT: 5(7.006492e-45), 16(2.242078e-44) 5356; EG-NEXT: LSHR T10.X, PS, literal.x, 5357; EG-NEXT: MOV T9.Y, PV.X, 5358; EG-NEXT: BFE_INT T5.Z, PV.W, 0.0, 1, 5359; EG-NEXT: LSHR * T0.W, T5.X, literal.y, 5360; EG-NEXT: 2(2.802597e-45), 4(5.605194e-45) 5361; EG-NEXT: BFE_INT T5.X, PV.W, 0.0, 1, 5362; EG-NEXT: MOV T7.Y, T7.X, 5363; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 5364; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 5365; EG-NEXT: LSHR T11.X, PV.W, literal.x, 5366; EG-NEXT: MOV T5.Y, PV.X, 5367; EG-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y, 5368; EG-NEXT: MOV T7.W, T7.Z, 5369; EG-NEXT: MOV * T9.W, T9.Z, 5370; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 5371; EG-NEXT: LSHR T12.X, PV.Z, literal.x, 5372; EG-NEXT: MOV T5.W, T5.Z, 5373; EG-NEXT: MOV * T8.W, T8.Z, 5374; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5375; 5376; GFX12-LABEL: constant_sextload_v8i1_to_v8i64: 5377; GFX12: ; %bb.0: 5378; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 5379; GFX12-NEXT: v_mov_b32_e32 v16, 0 5380; GFX12-NEXT: s_wait_kmcnt 0x0 5381; GFX12-NEXT: global_load_u8 v0, v16, s[2:3] 5382; GFX12-NEXT: s_wait_loadcnt 0x0 5383; GFX12-NEXT: v_readfirstlane_b32 s3, v0 5384; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 5385; GFX12-NEXT: v_mov_b32_e32 v9, s3 5386; GFX12-NEXT: s_lshr_b32 s2, s3, 6 5387; GFX12-NEXT: s_lshr_b32 s4, s3, 7 5388; GFX12-NEXT: s_lshr_b32 s6, s3, 4 5389; GFX12-NEXT: s_lshr_b32 s8, s3, 5 5390; GFX12-NEXT: s_lshr_b32 s10, s3, 2 5391; GFX12-NEXT: s_lshr_b32 s12, s3, 3 5392; GFX12-NEXT: s_lshr_b32 s14, s3, 1 5393; GFX12-NEXT: s_wait_alu 0xfffe 5394; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000 5395; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000 5396; GFX12-NEXT: v_bfe_i32 v12, v9, 0, 1 5397; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000 5398; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000 5399; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000 5400; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000 5401; GFX12-NEXT: s_wait_alu 0xfffe 5402; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 5403; GFX12-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5 5404; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000 5405; GFX12-NEXT: v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v5, s7 5406; GFX12-NEXT: v_dual_mov_b32 v6, s8 :: v_dual_mov_b32 v7, s9 5407; GFX12-NEXT: v_dual_mov_b32 v8, s10 :: v_dual_mov_b32 v9, s11 5408; GFX12-NEXT: v_dual_mov_b32 v10, s12 :: v_dual_mov_b32 v11, s13 5409; GFX12-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 5410; GFX12-NEXT: v_ashrrev_i32_e32 v13, 31, v12 5411; GFX12-NEXT: s_clause 0x3 5412; GFX12-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:48 5413; GFX12-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:32 5414; GFX12-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:16 5415; GFX12-NEXT: global_store_b128 v16, v[12:15], s[0:1] 5416; GFX12-NEXT: s_endpgm 5417 %load = load <8 x i1>, ptr addrspace(4) %in 5418 %ext = sext <8 x i1> %load to <8 x i64> 5419 store <8 x i64> %ext, ptr addrspace(1) %out 5420 ret void 5421} 5422 5423define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 5424; GFX6-LABEL: constant_zextload_v16i1_to_v16i64: 5425; GFX6: ; %bb.0: 5426; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 5427; GFX6-NEXT: s_mov_b32 s3, 0xf000 5428; GFX6-NEXT: s_mov_b32 s2, -1 5429; GFX6-NEXT: s_mov_b32 s10, s2 5430; GFX6-NEXT: s_mov_b32 s11, s3 5431; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5432; GFX6-NEXT: s_mov_b32 s8, s6 5433; GFX6-NEXT: s_mov_b32 s9, s7 5434; GFX6-NEXT: buffer_load_ushort v29, off, s[8:11], 0 5435; GFX6-NEXT: v_mov_b32_e32 v1, 0 5436; GFX6-NEXT: v_mov_b32_e32 v3, v1 5437; GFX6-NEXT: v_mov_b32_e32 v4, v1 5438; GFX6-NEXT: v_mov_b32_e32 v6, v1 5439; GFX6-NEXT: v_mov_b32_e32 v7, v1 5440; GFX6-NEXT: v_mov_b32_e32 v9, v1 5441; GFX6-NEXT: v_mov_b32_e32 v10, v1 5442; GFX6-NEXT: v_mov_b32_e32 v12, v1 5443; GFX6-NEXT: v_mov_b32_e32 v14, v1 5444; GFX6-NEXT: v_mov_b32_e32 v16, v1 5445; GFX6-NEXT: v_mov_b32_e32 v18, v1 5446; GFX6-NEXT: v_mov_b32_e32 v20, v1 5447; GFX6-NEXT: v_mov_b32_e32 v22, v1 5448; GFX6-NEXT: v_mov_b32_e32 v24, v1 5449; GFX6-NEXT: v_mov_b32_e32 v26, v1 5450; GFX6-NEXT: v_mov_b32_e32 v28, v1 5451; GFX6-NEXT: s_mov_b32 s0, s4 5452; GFX6-NEXT: s_mov_b32 s1, s5 5453; GFX6-NEXT: s_waitcnt vmcnt(0) 5454; GFX6-NEXT: v_bfe_u32 v2, v29, 11, 1 5455; GFX6-NEXT: v_bfe_u32 v0, v29, 10, 1 5456; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 5457; GFX6-NEXT: v_bfe_u32 v5, v29, 9, 1 5458; GFX6-NEXT: s_waitcnt expcnt(0) 5459; GFX6-NEXT: v_bfe_u32 v3, v29, 8, 1 5460; GFX6-NEXT: buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:64 5461; GFX6-NEXT: v_lshrrev_b32_e32 v8, 15, v29 5462; GFX6-NEXT: s_waitcnt expcnt(0) 5463; GFX6-NEXT: v_bfe_u32 v6, v29, 14, 1 5464; GFX6-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:112 5465; GFX6-NEXT: v_bfe_u32 v27, v29, 5, 1 5466; GFX6-NEXT: v_bfe_u32 v23, v29, 7, 1 5467; GFX6-NEXT: v_bfe_u32 v19, v29, 1, 1 5468; GFX6-NEXT: v_bfe_u32 v15, v29, 3, 1 5469; GFX6-NEXT: v_bfe_u32 v11, v29, 13, 1 5470; GFX6-NEXT: v_bfe_u32 v25, v29, 4, 1 5471; GFX6-NEXT: v_bfe_u32 v21, v29, 6, 1 5472; GFX6-NEXT: v_and_b32_e32 v17, 1, v29 5473; GFX6-NEXT: v_bfe_u32 v13, v29, 2, 1 5474; GFX6-NEXT: s_waitcnt expcnt(0) 5475; GFX6-NEXT: v_bfe_u32 v9, v29, 12, 1 5476; GFX6-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:96 5477; GFX6-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:16 5478; GFX6-NEXT: buffer_store_dwordx4 v[17:20], off, s[0:3], 0 5479; GFX6-NEXT: buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:48 5480; GFX6-NEXT: buffer_store_dwordx4 v[25:28], off, s[0:3], 0 offset:32 5481; GFX6-NEXT: s_endpgm 5482; 5483; GFX8-LABEL: constant_zextload_v16i1_to_v16i64: 5484; GFX8: ; %bb.0: 5485; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5486; GFX8-NEXT: s_waitcnt lgkmcnt(0) 5487; GFX8-NEXT: v_mov_b32_e32 v0, s2 5488; GFX8-NEXT: v_mov_b32_e32 v1, s3 5489; GFX8-NEXT: flat_load_ushort v0, v[0:1] 5490; GFX8-NEXT: v_mov_b32_e32 v1, 0 5491; GFX8-NEXT: v_mov_b32_e32 v3, v1 5492; GFX8-NEXT: v_mov_b32_e32 v5, v1 5493; GFX8-NEXT: v_mov_b32_e32 v7, v1 5494; GFX8-NEXT: v_mov_b32_e32 v9, v1 5495; GFX8-NEXT: v_mov_b32_e32 v11, v1 5496; GFX8-NEXT: s_waitcnt vmcnt(0) 5497; GFX8-NEXT: v_readfirstlane_b32 s2, v0 5498; GFX8-NEXT: s_bfe_u32 s3, s2, 0x10009 5499; GFX8-NEXT: s_bfe_u32 s4, s2, 0x1000d 5500; GFX8-NEXT: s_bfe_u32 s5, s2, 0x10007 5501; GFX8-NEXT: s_bfe_u32 s6, s2, 0x10003 5502; GFX8-NEXT: s_bfe_u32 s7, s2, 0x10001 5503; GFX8-NEXT: s_and_b32 s8, s2, 1 5504; GFX8-NEXT: s_bfe_u32 s9, s2, 0x10002 5505; GFX8-NEXT: s_bfe_u32 s10, s2, 0x10004 5506; GFX8-NEXT: s_bfe_u32 s11, s2, 0x10006 5507; GFX8-NEXT: s_bfe_u32 s12, s2, 0x1000c 5508; GFX8-NEXT: s_bfe_u32 s2, s2, 0x1000a 5509; GFX8-NEXT: v_and_b32_e32 v4, 0xffff, v0 5510; GFX8-NEXT: v_mov_b32_e32 v0, s2 5511; GFX8-NEXT: s_add_u32 s2, s0, 0x50 5512; GFX8-NEXT: v_mov_b32_e32 v6, s3 5513; GFX8-NEXT: s_addc_u32 s3, s1, 0 5514; GFX8-NEXT: v_mov_b32_e32 v13, s3 5515; GFX8-NEXT: v_mov_b32_e32 v12, s2 5516; GFX8-NEXT: s_add_u32 s2, s0, 64 5517; GFX8-NEXT: v_bfe_u32 v2, v4, 11, 1 5518; GFX8-NEXT: s_addc_u32 s3, s1, 0 5519; GFX8-NEXT: flat_store_dwordx4 v[12:13], v[0:3] 5520; GFX8-NEXT: v_mov_b32_e32 v13, s3 5521; GFX8-NEXT: v_mov_b32_e32 v12, s2 5522; GFX8-NEXT: s_add_u32 s2, s0, 0x70 5523; GFX8-NEXT: v_lshrrev_b32_e32 v10, 15, v4 5524; GFX8-NEXT: v_bfe_u32 v14, v4, 5, 1 5525; GFX8-NEXT: v_bfe_u32 v8, v4, 14, 1 5526; GFX8-NEXT: v_bfe_u32 v4, v4, 8, 1 5527; GFX8-NEXT: s_addc_u32 s3, s1, 0 5528; GFX8-NEXT: flat_store_dwordx4 v[12:13], v[4:7] 5529; GFX8-NEXT: v_mov_b32_e32 v0, s12 5530; GFX8-NEXT: v_mov_b32_e32 v5, s3 5531; GFX8-NEXT: v_mov_b32_e32 v4, s2 5532; GFX8-NEXT: s_add_u32 s2, s0, 0x60 5533; GFX8-NEXT: s_addc_u32 s3, s1, 0 5534; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[8:11] 5535; GFX8-NEXT: v_mov_b32_e32 v5, s3 5536; GFX8-NEXT: v_mov_b32_e32 v4, s2 5537; GFX8-NEXT: s_add_u32 s2, s0, 48 5538; GFX8-NEXT: v_mov_b32_e32 v2, s4 5539; GFX8-NEXT: s_addc_u32 s3, s1, 0 5540; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5541; GFX8-NEXT: v_mov_b32_e32 v5, s3 5542; GFX8-NEXT: v_mov_b32_e32 v4, s2 5543; GFX8-NEXT: s_add_u32 s2, s0, 32 5544; GFX8-NEXT: v_mov_b32_e32 v0, s11 5545; GFX8-NEXT: v_mov_b32_e32 v2, s5 5546; GFX8-NEXT: s_addc_u32 s3, s1, 0 5547; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5548; GFX8-NEXT: v_mov_b32_e32 v5, s3 5549; GFX8-NEXT: v_mov_b32_e32 v4, s2 5550; GFX8-NEXT: s_add_u32 s2, s0, 16 5551; GFX8-NEXT: v_mov_b32_e32 v0, s10 5552; GFX8-NEXT: v_mov_b32_e32 v2, v14 5553; GFX8-NEXT: s_addc_u32 s3, s1, 0 5554; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5555; GFX8-NEXT: v_mov_b32_e32 v5, s3 5556; GFX8-NEXT: v_mov_b32_e32 v0, s9 5557; GFX8-NEXT: v_mov_b32_e32 v2, s6 5558; GFX8-NEXT: v_mov_b32_e32 v4, s2 5559; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5560; GFX8-NEXT: v_mov_b32_e32 v5, s1 5561; GFX8-NEXT: v_mov_b32_e32 v0, s8 5562; GFX8-NEXT: v_mov_b32_e32 v2, s7 5563; GFX8-NEXT: v_mov_b32_e32 v4, s0 5564; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5565; GFX8-NEXT: s_endpgm 5566; 5567; EG-LABEL: constant_zextload_v16i1_to_v16i64: 5568; EG: ; %bb.0: 5569; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[] 5570; EG-NEXT: TEX 0 @12 5571; EG-NEXT: ALU 62, @15, KC0[CB0:0-32], KC1[] 5572; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T22.X, 0 5573; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T21.X, 0 5574; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T20.X, 0 5575; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T19.X, 0 5576; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T18.X, 0 5577; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T17.X, 0 5578; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0 5579; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T15.X, 1 5580; EG-NEXT: CF_END 5581; EG-NEXT: Fetch clause starting at 12: 5582; EG-NEXT: VTX_READ_16 T7.X, T7.X, 0, #1 5583; EG-NEXT: ALU clause starting at 14: 5584; EG-NEXT: MOV * T7.X, KC0[2].Z, 5585; EG-NEXT: ALU clause starting at 15: 5586; EG-NEXT: LSHR * T8.Z, T7.X, literal.x, 5587; EG-NEXT: 15(2.101948e-44), 0(0.000000e+00) 5588; EG-NEXT: BFE_UINT T8.X, T7.X, literal.x, 1, 5589; EG-NEXT: MOV T8.Y, 0.0, 5590; EG-NEXT: BFE_UINT * T9.Z, T7.X, literal.y, 1, 5591; EG-NEXT: 14(1.961818e-44), 13(1.821688e-44) 5592; EG-NEXT: BFE_UINT T9.X, T7.X, literal.x, 1, 5593; EG-NEXT: MOV T9.Y, 0.0, 5594; EG-NEXT: BFE_UINT * T10.Z, T7.X, literal.y, 1, 5595; EG-NEXT: 12(1.681558e-44), 11(1.541428e-44) 5596; EG-NEXT: BFE_UINT T10.X, T7.X, literal.x, 1, 5597; EG-NEXT: MOV T10.Y, 0.0, 5598; EG-NEXT: BFE_UINT * T11.Z, T7.X, literal.y, 1, 5599; EG-NEXT: 10(1.401298e-44), 9(1.261169e-44) 5600; EG-NEXT: BFE_UINT T11.X, T7.X, literal.x, 1, 5601; EG-NEXT: MOV T11.Y, 0.0, 5602; EG-NEXT: BFE_UINT * T12.Z, T7.X, literal.y, 1, 5603; EG-NEXT: 8(1.121039e-44), 7(9.809089e-45) 5604; EG-NEXT: BFE_UINT T12.X, T7.X, literal.x, 1, 5605; EG-NEXT: MOV T12.Y, 0.0, 5606; EG-NEXT: BFE_UINT * T13.Z, T7.X, literal.y, 1, 5607; EG-NEXT: 6(8.407791e-45), 5(7.006492e-45) 5608; EG-NEXT: BFE_UINT T13.X, T7.X, literal.x, 1, 5609; EG-NEXT: MOV T13.Y, 0.0, 5610; EG-NEXT: BFE_UINT * T14.Z, T7.X, literal.y, 1, 5611; EG-NEXT: 4(5.605194e-45), 3(4.203895e-45) 5612; EG-NEXT: BFE_UINT T14.X, T7.X, literal.x, 1, 5613; EG-NEXT: MOV T14.Y, 0.0, 5614; EG-NEXT: BFE_UINT T7.Z, T7.X, 1, 1, 5615; EG-NEXT: AND_INT * T7.X, T7.X, 1, 5616; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5617; EG-NEXT: MOV T7.Y, 0.0, 5618; EG-NEXT: MOV T8.W, 0.0, 5619; EG-NEXT: MOV * T9.W, 0.0, 5620; EG-NEXT: MOV T10.W, 0.0, 5621; EG-NEXT: MOV * T11.W, 0.0, 5622; EG-NEXT: MOV T12.W, 0.0, 5623; EG-NEXT: MOV * T13.W, 0.0, 5624; EG-NEXT: MOV T14.W, 0.0, 5625; EG-NEXT: MOV * T7.W, 0.0, 5626; EG-NEXT: LSHR T15.X, KC0[2].Y, literal.x, 5627; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5628; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5629; EG-NEXT: LSHR T16.X, PV.W, literal.x, 5630; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5631; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 5632; EG-NEXT: LSHR T17.X, PV.W, literal.x, 5633; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5634; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 5635; EG-NEXT: LSHR T18.X, PV.W, literal.x, 5636; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5637; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 5638; EG-NEXT: LSHR T19.X, PV.W, literal.x, 5639; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5640; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 5641; EG-NEXT: LSHR T20.X, PV.W, literal.x, 5642; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5643; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 5644; EG-NEXT: LSHR T21.X, PV.W, literal.x, 5645; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5646; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 5647; EG-NEXT: LSHR * T22.X, PV.W, literal.x, 5648; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5649; 5650; GFX12-LABEL: constant_zextload_v16i1_to_v16i64: 5651; GFX12: ; %bb.0: 5652; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 5653; GFX12-NEXT: v_mov_b32_e32 v1, 0 5654; GFX12-NEXT: s_wait_kmcnt 0x0 5655; GFX12-NEXT: global_load_u16 v0, v1, s[2:3] 5656; GFX12-NEXT: s_wait_loadcnt 0x0 5657; GFX12-NEXT: v_and_b32_e32 v4, 0xffff, v0 5658; GFX12-NEXT: v_readfirstlane_b32 s2, v0 5659; GFX12-NEXT: v_mov_b32_e32 v7, v1 5660; GFX12-NEXT: v_mov_b32_e32 v11, v1 5661; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 5662; GFX12-NEXT: v_bfe_u32 v2, v4, 11, 1 5663; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000a 5664; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5665; GFX12-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v0, s3 5666; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000d 5667; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000c 5668; GFX12-NEXT: v_mov_b32_e32 v5, v1 5669; GFX12-NEXT: v_bfe_u32 v6, v4, 5, 1 5670; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:80 5671; GFX12-NEXT: v_mov_b32_e32 v0, s4 5672; GFX12-NEXT: s_wait_alu 0xfffe 5673; GFX12-NEXT: v_mov_b32_e32 v2, s3 5674; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10007 5675; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10006 5676; GFX12-NEXT: v_mov_b32_e32 v9, v1 5677; GFX12-NEXT: s_bfe_u32 s6, s2, 0x10002 5678; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:96 5679; GFX12-NEXT: s_wait_alu 0xfffe 5680; GFX12-NEXT: v_mov_b32_e32 v0, s4 5681; GFX12-NEXT: v_mov_b32_e32 v2, s3 5682; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10004 5683; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10009 5684; GFX12-NEXT: s_bfe_u32 s5, s2, 0x10001 5685; GFX12-NEXT: v_lshrrev_b32_e32 v10, 15, v4 5686; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:48 5687; GFX12-NEXT: s_wait_alu 0xfffe 5688; GFX12-NEXT: v_mov_b32_e32 v0, s4 5689; GFX12-NEXT: v_mov_b32_e32 v2, v6 5690; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10003 5691; GFX12-NEXT: s_and_b32 s2, s2, 1 5692; GFX12-NEXT: v_bfe_u32 v8, v4, 14, 1 5693; GFX12-NEXT: v_bfe_u32 v4, v4, 8, 1 5694; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:32 5695; GFX12-NEXT: v_mov_b32_e32 v0, s6 5696; GFX12-NEXT: s_wait_alu 0xfffe 5697; GFX12-NEXT: v_mov_b32_e32 v2, s4 5698; GFX12-NEXT: v_mov_b32_e32 v6, s3 5699; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16 5700; GFX12-NEXT: v_mov_b32_e32 v0, s2 5701; GFX12-NEXT: v_mov_b32_e32 v2, s5 5702; GFX12-NEXT: s_clause 0x2 5703; GFX12-NEXT: global_store_b128 v1, v[8:11], s[0:1] offset:112 5704; GFX12-NEXT: global_store_b128 v1, v[4:7], s[0:1] offset:64 5705; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] 5706; GFX12-NEXT: s_endpgm 5707 %load = load <16 x i1>, ptr addrspace(4) %in 5708 %ext = zext <16 x i1> %load to <16 x i64> 5709 store <16 x i64> %ext, ptr addrspace(1) %out 5710 ret void 5711} 5712 5713define amdgpu_kernel void @constant_sextload_v16i1_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 5714; GFX6-LABEL: constant_sextload_v16i1_to_v16i64: 5715; GFX6: ; %bb.0: 5716; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 5717; GFX6-NEXT: s_mov_b32 s3, 0xf000 5718; GFX6-NEXT: s_mov_b32 s2, -1 5719; GFX6-NEXT: s_mov_b32 s10, s2 5720; GFX6-NEXT: s_mov_b32 s11, s3 5721; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5722; GFX6-NEXT: s_mov_b32 s8, s6 5723; GFX6-NEXT: s_mov_b32 s9, s7 5724; GFX6-NEXT: buffer_load_ushort v1, off, s[8:11], 0 5725; GFX6-NEXT: s_mov_b32 s0, s4 5726; GFX6-NEXT: s_mov_b32 s1, s5 5727; GFX6-NEXT: s_waitcnt vmcnt(0) 5728; GFX6-NEXT: v_lshrrev_b32_e32 v3, 14, v1 5729; GFX6-NEXT: v_lshrrev_b32_e32 v4, 15, v1 5730; GFX6-NEXT: v_lshrrev_b32_e32 v7, 12, v1 5731; GFX6-NEXT: v_lshrrev_b32_e32 v8, 13, v1 5732; GFX6-NEXT: v_lshrrev_b32_e32 v11, 10, v1 5733; GFX6-NEXT: v_lshrrev_b32_e32 v12, 11, v1 5734; GFX6-NEXT: v_lshrrev_b32_e32 v14, 8, v1 5735; GFX6-NEXT: v_lshrrev_b32_e32 v16, 9, v1 5736; GFX6-NEXT: v_lshrrev_b32_e32 v15, 6, v1 5737; GFX6-NEXT: v_lshrrev_b32_e32 v9, 4, v1 5738; GFX6-NEXT: v_lshrrev_b32_e32 v10, 5, v1 5739; GFX6-NEXT: v_lshrrev_b32_e32 v0, 2, v1 5740; GFX6-NEXT: v_lshrrev_b32_e32 v2, 3, v1 5741; GFX6-NEXT: v_lshrrev_b32_e32 v13, 1, v1 5742; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 1 5743; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1 5744; GFX6-NEXT: v_bfe_i32 v5, v4, 0, 1 5745; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 1 5746; GFX6-NEXT: v_ashrrev_i32_e32 v6, 31, v5 5747; GFX6-NEXT: v_ashrrev_i32_e32 v4, 31, v3 5748; GFX6-NEXT: buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:112 5749; GFX6-NEXT: s_waitcnt expcnt(0) 5750; GFX6-NEXT: v_bfe_i32 v6, v10, 0, 1 5751; GFX6-NEXT: v_bfe_i32 v4, v9, 0, 1 5752; GFX6-NEXT: v_bfe_i32 v9, v8, 0, 1 5753; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 1 5754; GFX6-NEXT: v_ashrrev_i32_e32 v10, 31, v9 5755; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v7 5756; GFX6-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:96 5757; GFX6-NEXT: s_waitcnt expcnt(0) 5758; GFX6-NEXT: v_bfe_i32 v9, v12, 0, 1 5759; GFX6-NEXT: v_bfe_i32 v7, v11, 0, 1 5760; GFX6-NEXT: v_bfe_i32 v13, v13, 0, 1 5761; GFX6-NEXT: v_bfe_i32 v11, v1, 0, 1 5762; GFX6-NEXT: v_lshrrev_b32_e32 v1, 7, v1 5763; GFX6-NEXT: v_ashrrev_i32_e32 v10, 31, v9 5764; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v7 5765; GFX6-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:80 5766; GFX6-NEXT: v_bfe_i32 v17, v1, 0, 1 5767; GFX6-NEXT: v_bfe_i32 v15, v15, 0, 1 5768; GFX6-NEXT: v_bfe_i32 v21, v16, 0, 1 5769; GFX6-NEXT: v_bfe_i32 v19, v14, 0, 1 5770; GFX6-NEXT: v_ashrrev_i32_e32 v12, 31, v11 5771; GFX6-NEXT: v_ashrrev_i32_e32 v14, 31, v13 5772; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5773; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5774; GFX6-NEXT: s_waitcnt expcnt(0) 5775; GFX6-NEXT: v_ashrrev_i32_e32 v7, 31, v6 5776; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v4 5777; GFX6-NEXT: v_ashrrev_i32_e32 v18, 31, v17 5778; GFX6-NEXT: v_ashrrev_i32_e32 v16, 31, v15 5779; GFX6-NEXT: v_ashrrev_i32_e32 v22, 31, v21 5780; GFX6-NEXT: v_ashrrev_i32_e32 v20, 31, v19 5781; GFX6-NEXT: buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:64 5782; GFX6-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:48 5783; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32 5784; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5785; GFX6-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 5786; GFX6-NEXT: s_endpgm 5787; 5788; GFX8-LABEL: constant_sextload_v16i1_to_v16i64: 5789; GFX8: ; %bb.0: 5790; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 5791; GFX8-NEXT: s_waitcnt lgkmcnt(0) 5792; GFX8-NEXT: v_mov_b32_e32 v0, s2 5793; GFX8-NEXT: v_mov_b32_e32 v1, s3 5794; GFX8-NEXT: flat_load_ushort v0, v[0:1] 5795; GFX8-NEXT: v_mov_b32_e32 v19, s1 5796; GFX8-NEXT: v_mov_b32_e32 v18, s0 5797; GFX8-NEXT: s_waitcnt vmcnt(0) 5798; GFX8-NEXT: v_readfirstlane_b32 s3, v0 5799; GFX8-NEXT: s_lshr_b32 s2, s3, 14 5800; GFX8-NEXT: s_lshr_b32 s4, s3, 15 5801; GFX8-NEXT: s_lshr_b32 s6, s3, 12 5802; GFX8-NEXT: s_lshr_b32 s8, s3, 13 5803; GFX8-NEXT: s_lshr_b32 s10, s3, 10 5804; GFX8-NEXT: s_lshr_b32 s12, s3, 11 5805; GFX8-NEXT: s_lshr_b32 s14, s3, 8 5806; GFX8-NEXT: s_lshr_b32 s16, s3, 9 5807; GFX8-NEXT: s_lshr_b32 s18, s3, 6 5808; GFX8-NEXT: s_lshr_b32 s20, s3, 7 5809; GFX8-NEXT: s_lshr_b32 s22, s3, 4 5810; GFX8-NEXT: s_lshr_b32 s24, s3, 5 5811; GFX8-NEXT: s_lshr_b32 s26, s3, 2 5812; GFX8-NEXT: s_lshr_b32 s28, s3, 3 5813; GFX8-NEXT: s_lshr_b32 s30, s3, 1 5814; GFX8-NEXT: v_mov_b32_e32 v0, s3 5815; GFX8-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 5816; GFX8-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000 5817; GFX8-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000 5818; GFX8-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000 5819; GFX8-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000 5820; GFX8-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000 5821; GFX8-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000 5822; GFX8-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000 5823; GFX8-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000 5824; GFX8-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000 5825; GFX8-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000 5826; GFX8-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000 5827; GFX8-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000 5828; GFX8-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000 5829; GFX8-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000 5830; GFX8-NEXT: v_mov_b32_e32 v2, s2 5831; GFX8-NEXT: s_add_u32 s2, s0, 0x70 5832; GFX8-NEXT: v_mov_b32_e32 v3, s3 5833; GFX8-NEXT: s_addc_u32 s3, s1, 0 5834; GFX8-NEXT: v_mov_b32_e32 v15, s3 5835; GFX8-NEXT: v_mov_b32_e32 v14, s2 5836; GFX8-NEXT: s_add_u32 s2, s0, 0x60 5837; GFX8-NEXT: v_mov_b32_e32 v4, s4 5838; GFX8-NEXT: v_mov_b32_e32 v5, s5 5839; GFX8-NEXT: s_addc_u32 s3, s1, 0 5840; GFX8-NEXT: flat_store_dwordx4 v[14:15], v[2:5] 5841; GFX8-NEXT: v_mov_b32_e32 v15, s3 5842; GFX8-NEXT: v_mov_b32_e32 v14, s2 5843; GFX8-NEXT: s_add_u32 s2, s0, 0x50 5844; GFX8-NEXT: v_mov_b32_e32 v6, s6 5845; GFX8-NEXT: v_mov_b32_e32 v7, s7 5846; GFX8-NEXT: v_mov_b32_e32 v8, s8 5847; GFX8-NEXT: v_mov_b32_e32 v9, s9 5848; GFX8-NEXT: s_addc_u32 s3, s1, 0 5849; GFX8-NEXT: flat_store_dwordx4 v[14:15], v[6:9] 5850; GFX8-NEXT: v_mov_b32_e32 v15, s3 5851; GFX8-NEXT: v_mov_b32_e32 v14, s2 5852; GFX8-NEXT: s_add_u32 s2, s0, 64 5853; GFX8-NEXT: v_mov_b32_e32 v10, s10 5854; GFX8-NEXT: v_mov_b32_e32 v11, s11 5855; GFX8-NEXT: v_mov_b32_e32 v12, s12 5856; GFX8-NEXT: v_mov_b32_e32 v13, s13 5857; GFX8-NEXT: s_addc_u32 s3, s1, 0 5858; GFX8-NEXT: flat_store_dwordx4 v[14:15], v[10:13] 5859; GFX8-NEXT: v_mov_b32_e32 v15, s3 5860; GFX8-NEXT: v_mov_b32_e32 v14, s2 5861; GFX8-NEXT: s_add_u32 s2, s0, 48 5862; GFX8-NEXT: v_mov_b32_e32 v2, s14 5863; GFX8-NEXT: v_mov_b32_e32 v3, s15 5864; GFX8-NEXT: v_mov_b32_e32 v4, s16 5865; GFX8-NEXT: v_mov_b32_e32 v5, s17 5866; GFX8-NEXT: s_addc_u32 s3, s1, 0 5867; GFX8-NEXT: flat_store_dwordx4 v[14:15], v[2:5] 5868; GFX8-NEXT: v_mov_b32_e32 v6, s18 5869; GFX8-NEXT: v_mov_b32_e32 v5, s3 5870; GFX8-NEXT: v_mov_b32_e32 v4, s2 5871; GFX8-NEXT: s_add_u32 s2, s0, 32 5872; GFX8-NEXT: v_mov_b32_e32 v7, s19 5873; GFX8-NEXT: v_mov_b32_e32 v8, s20 5874; GFX8-NEXT: v_mov_b32_e32 v9, s21 5875; GFX8-NEXT: s_addc_u32 s3, s1, 0 5876; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[6:9] 5877; GFX8-NEXT: v_mov_b32_e32 v5, s3 5878; GFX8-NEXT: s_add_u32 s0, s0, 16 5879; GFX8-NEXT: v_mov_b32_e32 v10, s22 5880; GFX8-NEXT: v_mov_b32_e32 v11, s23 5881; GFX8-NEXT: v_mov_b32_e32 v12, s24 5882; GFX8-NEXT: v_mov_b32_e32 v13, s25 5883; GFX8-NEXT: v_mov_b32_e32 v4, s2 5884; GFX8-NEXT: s_addc_u32 s1, s1, 0 5885; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[10:13] 5886; GFX8-NEXT: v_mov_b32_e32 v5, s1 5887; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 1 5888; GFX8-NEXT: v_mov_b32_e32 v14, s26 5889; GFX8-NEXT: v_mov_b32_e32 v15, s27 5890; GFX8-NEXT: v_mov_b32_e32 v16, s28 5891; GFX8-NEXT: v_mov_b32_e32 v17, s29 5892; GFX8-NEXT: v_mov_b32_e32 v4, s0 5893; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5894; GFX8-NEXT: v_mov_b32_e32 v2, s30 5895; GFX8-NEXT: v_mov_b32_e32 v3, s31 5896; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[14:17] 5897; GFX8-NEXT: flat_store_dwordx4 v[18:19], v[0:3] 5898; GFX8-NEXT: s_endpgm 5899; 5900; EG-LABEL: constant_sextload_v16i1_to_v16i64: 5901; EG: ; %bb.0: 5902; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[] 5903; EG-NEXT: TEX 0 @12 5904; EG-NEXT: ALU 78, @15, KC0[CB0:0-32], KC1[] 5905; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T22.X, 0 5906; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T21.X, 0 5907; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T18.X, 0 5908; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T12.X, 0 5909; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T11.X, 0 5910; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T10.X, 0 5911; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T9.X, 0 5912; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T8.X, 1 5913; EG-NEXT: CF_END 5914; EG-NEXT: Fetch clause starting at 12: 5915; EG-NEXT: VTX_READ_16 T7.X, T7.X, 0, #1 5916; EG-NEXT: ALU clause starting at 14: 5917; EG-NEXT: MOV * T7.X, KC0[2].Z, 5918; EG-NEXT: ALU clause starting at 15: 5919; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x, 5920; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5921; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5922; EG-NEXT: LSHR T9.X, PV.W, literal.x, 5923; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5924; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 5925; EG-NEXT: LSHR T10.X, PV.W, literal.x, 5926; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5927; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 5928; EG-NEXT: LSHR T11.X, PV.W, literal.x, 5929; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5930; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 5931; EG-NEXT: LSHR T12.X, PV.W, literal.x, 5932; EG-NEXT: LSHR * T0.W, T7.X, literal.y, 5933; EG-NEXT: 2(2.802597e-45), 15(2.101948e-44) 5934; EG-NEXT: BFE_INT T13.X, T7.X, 0.0, 1, 5935; EG-NEXT: BFE_INT T14.Z, PV.W, 0.0, 1, 5936; EG-NEXT: LSHR T0.W, T7.X, literal.x, 5937; EG-NEXT: LSHR * T1.W, T7.X, literal.y, 5938; EG-NEXT: 11(1.541428e-44), 14(1.961818e-44) 5939; EG-NEXT: BFE_INT T14.X, PS, 0.0, 1, 5940; EG-NEXT: LSHR T0.Y, T7.X, literal.x, 5941; EG-NEXT: BFE_INT T15.Z, PV.W, 0.0, 1, 5942; EG-NEXT: LSHR T0.W, T7.X, literal.y, 5943; EG-NEXT: LSHR * T1.W, T7.X, literal.z, 5944; EG-NEXT: 12(1.681558e-44), 7(9.809089e-45) 5945; EG-NEXT: 10(1.401298e-44), 0(0.000000e+00) 5946; EG-NEXT: BFE_INT T15.X, PS, 0.0, 1, 5947; EG-NEXT: MOV T14.Y, PV.X, 5948; EG-NEXT: BFE_INT T16.Z, PV.W, 0.0, 1, 5949; EG-NEXT: LSHR T0.W, T7.X, literal.x, 5950; EG-NEXT: LSHR * T1.W, T7.X, literal.y, 5951; EG-NEXT: 3(4.203895e-45), 6(8.407791e-45) 5952; EG-NEXT: BFE_INT T16.X, PS, 0.0, 1, 5953; EG-NEXT: MOV T15.Y, PV.X, 5954; EG-NEXT: BFE_INT T17.Z, PV.W, 0.0, 1, 5955; EG-NEXT: LSHR T0.W, T7.X, 1, 5956; EG-NEXT: LSHR * T1.W, T7.X, literal.x, 5957; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5958; EG-NEXT: BFE_INT T17.X, PS, 0.0, 1, 5959; EG-NEXT: MOV T16.Y, PV.X, 5960; EG-NEXT: BFE_INT T13.Z, PV.W, 0.0, 1, 5961; EG-NEXT: LSHR T0.W, T7.X, literal.x, 5962; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 5963; EG-NEXT: 5(7.006492e-45), 80(1.121039e-43) 5964; EG-NEXT: LSHR T18.X, PS, literal.x, 5965; EG-NEXT: MOV T17.Y, PV.X, 5966; EG-NEXT: BFE_INT T19.Z, PV.W, 0.0, 1, 5967; EG-NEXT: LSHR T0.W, T7.X, literal.y, 5968; EG-NEXT: LSHR * T1.W, T7.X, literal.z, 5969; EG-NEXT: 2(2.802597e-45), 9(1.261169e-44) 5970; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) 5971; EG-NEXT: BFE_INT T19.X, PS, 0.0, 1, 5972; EG-NEXT: MOV T13.Y, T13.X, 5973; EG-NEXT: BFE_INT T7.Z, PV.W, 0.0, 1, 5974; EG-NEXT: LSHR T0.W, T7.X, literal.x, BS:VEC_120/SCL_212 5975; EG-NEXT: LSHR * T1.W, T7.X, literal.y, 5976; EG-NEXT: 13(1.821688e-44), 8(1.121039e-44) 5977; EG-NEXT: BFE_INT T7.X, PS, 0.0, 1, 5978; EG-NEXT: MOV T19.Y, PV.X, 5979; EG-NEXT: BFE_INT T20.Z, PV.W, 0.0, 1, 5980; EG-NEXT: MOV T13.W, T13.Z, 5981; EG-NEXT: MOV * T17.W, T17.Z, 5982; EG-NEXT: BFE_INT T20.X, T0.Y, 0.0, 1, 5983; EG-NEXT: MOV T7.Y, PV.X, 5984; EG-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.x, 5985; EG-NEXT: MOV T19.W, T19.Z, 5986; EG-NEXT: MOV * T16.W, T16.Z, 5987; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) 5988; EG-NEXT: LSHR T21.X, PV.Z, literal.x, 5989; EG-NEXT: MOV T20.Y, PV.X, 5990; EG-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y, 5991; EG-NEXT: MOV T7.W, T7.Z, 5992; EG-NEXT: MOV * T15.W, T15.Z, 5993; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 5994; EG-NEXT: LSHR T22.X, PV.Z, literal.x, 5995; EG-NEXT: MOV T20.W, T20.Z, 5996; EG-NEXT: MOV * T14.W, T14.Z, 5997; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5998; 5999; GFX12-LABEL: constant_sextload_v16i1_to_v16i64: 6000; GFX12: ; %bb.0: 6001; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 6002; GFX12-NEXT: v_mov_b32_e32 v32, 0 6003; GFX12-NEXT: s_wait_kmcnt 0x0 6004; GFX12-NEXT: global_load_u16 v0, v32, s[2:3] 6005; GFX12-NEXT: s_wait_loadcnt 0x0 6006; GFX12-NEXT: v_readfirstlane_b32 s3, v0 6007; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) 6008; GFX12-NEXT: s_lshr_b32 s4, s3, 15 6009; GFX12-NEXT: s_lshr_b32 s2, s3, 14 6010; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000 6011; GFX12-NEXT: v_dual_mov_b32 v28, s3 :: v_dual_mov_b32 v3, s5 6012; GFX12-NEXT: s_lshr_b32 s6, s3, 12 6013; GFX12-NEXT: s_lshr_b32 s8, s3, 13 6014; GFX12-NEXT: s_lshr_b32 s10, s3, 10 6015; GFX12-NEXT: s_lshr_b32 s12, s3, 11 6016; GFX12-NEXT: s_lshr_b32 s14, s3, 8 6017; GFX12-NEXT: s_lshr_b32 s16, s3, 9 6018; GFX12-NEXT: s_lshr_b32 s18, s3, 6 6019; GFX12-NEXT: s_lshr_b32 s20, s3, 7 6020; GFX12-NEXT: s_lshr_b32 s22, s3, 4 6021; GFX12-NEXT: s_lshr_b32 s24, s3, 5 6022; GFX12-NEXT: s_lshr_b32 s26, s3, 2 6023; GFX12-NEXT: s_lshr_b32 s28, s3, 3 6024; GFX12-NEXT: s_lshr_b32 s30, s3, 1 6025; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000 6026; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000 6027; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000 6028; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000 6029; GFX12-NEXT: s_wait_alu 0xfffe 6030; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000 6031; GFX12-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000 6032; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000 6033; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000 6034; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000 6035; GFX12-NEXT: s_wait_alu 0xfffe 6036; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v5, s7 6037; GFX12-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mov_b32 v2, s4 6038; GFX12-NEXT: v_dual_mov_b32 v7, s9 :: v_dual_mov_b32 v4, s6 6039; GFX12-NEXT: v_dual_mov_b32 v9, s11 :: v_dual_mov_b32 v6, s8 6040; GFX12-NEXT: v_dual_mov_b32 v11, s13 :: v_dual_mov_b32 v8, s10 6041; GFX12-NEXT: v_dual_mov_b32 v13, s15 :: v_dual_mov_b32 v10, s12 6042; GFX12-NEXT: v_mov_b32_e32 v15, s17 6043; GFX12-NEXT: v_bfe_i32 v28, v28, 0, 1 6044; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000 6045; GFX12-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000 6046; GFX12-NEXT: v_dual_mov_b32 v12, s14 :: v_dual_mov_b32 v17, s19 6047; GFX12-NEXT: v_dual_mov_b32 v14, s16 :: v_dual_mov_b32 v19, s21 6048; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000 6049; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000 6050; GFX12-NEXT: v_dual_mov_b32 v16, s18 :: v_dual_mov_b32 v21, s23 6051; GFX12-NEXT: v_dual_mov_b32 v18, s20 :: v_dual_mov_b32 v23, s25 6052; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 6053; GFX12-NEXT: v_dual_mov_b32 v20, s22 :: v_dual_mov_b32 v25, s27 6054; GFX12-NEXT: v_dual_mov_b32 v22, s24 :: v_dual_mov_b32 v27, s29 6055; GFX12-NEXT: v_dual_mov_b32 v24, s26 :: v_dual_mov_b32 v31, s31 6056; GFX12-NEXT: v_mov_b32_e32 v26, s28 6057; GFX12-NEXT: v_mov_b32_e32 v30, s30 6058; GFX12-NEXT: s_clause 0x1 6059; GFX12-NEXT: global_store_b128 v32, v[0:3], s[0:1] offset:112 6060; GFX12-NEXT: global_store_b128 v32, v[4:7], s[0:1] offset:96 6061; GFX12-NEXT: v_ashrrev_i32_e32 v29, 31, v28 6062; GFX12-NEXT: s_clause 0x5 6063; GFX12-NEXT: global_store_b128 v32, v[8:11], s[0:1] offset:80 6064; GFX12-NEXT: global_store_b128 v32, v[12:15], s[0:1] offset:64 6065; GFX12-NEXT: global_store_b128 v32, v[16:19], s[0:1] offset:48 6066; GFX12-NEXT: global_store_b128 v32, v[20:23], s[0:1] offset:32 6067; GFX12-NEXT: global_store_b128 v32, v[24:27], s[0:1] offset:16 6068; GFX12-NEXT: global_store_b128 v32, v[28:31], s[0:1] 6069; GFX12-NEXT: s_endpgm 6070 %load = load <16 x i1>, ptr addrspace(4) %in 6071 %ext = sext <16 x i1> %load to <16 x i64> 6072 store <16 x i64> %ext, ptr addrspace(1) %out 6073 ret void 6074} 6075 6076define amdgpu_kernel void @constant_zextload_v32i1_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 6077; GFX6-LABEL: constant_zextload_v32i1_to_v32i64: 6078; GFX6: ; %bb.0: 6079; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 6080; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6081; GFX6-NEXT: s_load_dword s4, s[2:3], 0x0 6082; GFX6-NEXT: s_mov_b32 s3, 0xf000 6083; GFX6-NEXT: v_mov_b32_e32 v1, 0 6084; GFX6-NEXT: s_mov_b32 s2, -1 6085; GFX6-NEXT: v_mov_b32_e32 v3, v1 6086; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6087; GFX6-NEXT: s_bfe_u32 s5, s4, 0x10001 6088; GFX6-NEXT: s_bfe_u32 s6, s4, 0x10003 6089; GFX6-NEXT: s_bfe_u32 s7, s4, 0x10005 6090; GFX6-NEXT: s_bfe_u32 s8, s4, 0x10007 6091; GFX6-NEXT: s_bfe_u32 s9, s4, 0x10009 6092; GFX6-NEXT: s_bfe_u32 s10, s4, 0x1000b 6093; GFX6-NEXT: s_bfe_u32 s11, s4, 0x1000d 6094; GFX6-NEXT: s_bfe_u32 s12, s4, 0x1000f 6095; GFX6-NEXT: s_bfe_u32 s13, s4, 0x10011 6096; GFX6-NEXT: s_bfe_u32 s14, s4, 0x10013 6097; GFX6-NEXT: s_bfe_u32 s15, s4, 0x10015 6098; GFX6-NEXT: s_bfe_u32 s16, s4, 0x10017 6099; GFX6-NEXT: s_bfe_u32 s17, s4, 0x10019 6100; GFX6-NEXT: s_bfe_u32 s18, s4, 0x1001b 6101; GFX6-NEXT: s_bfe_u32 s19, s4, 0x1001d 6102; GFX6-NEXT: s_lshr_b32 s20, s4, 31 6103; GFX6-NEXT: s_and_b32 s21, s4, 1 6104; GFX6-NEXT: s_bfe_u32 s22, s4, 0x10002 6105; GFX6-NEXT: s_bfe_u32 s23, s4, 0x10004 6106; GFX6-NEXT: s_bfe_u32 s24, s4, 0x10006 6107; GFX6-NEXT: s_bfe_u32 s25, s4, 0x10008 6108; GFX6-NEXT: s_bfe_u32 s26, s4, 0x1000a 6109; GFX6-NEXT: s_bfe_u32 s27, s4, 0x1000c 6110; GFX6-NEXT: s_bfe_u32 s28, s4, 0x1000e 6111; GFX6-NEXT: s_bfe_u32 s29, s4, 0x10010 6112; GFX6-NEXT: s_bfe_u32 s30, s4, 0x10012 6113; GFX6-NEXT: s_bfe_u32 s31, s4, 0x10014 6114; GFX6-NEXT: s_bfe_u32 s33, s4, 0x10016 6115; GFX6-NEXT: s_bfe_u32 s34, s4, 0x10018 6116; GFX6-NEXT: s_bfe_u32 s35, s4, 0x1001a 6117; GFX6-NEXT: s_bfe_u32 s36, s4, 0x1001e 6118; GFX6-NEXT: s_bfe_u32 s4, s4, 0x1001c 6119; GFX6-NEXT: v_mov_b32_e32 v0, s36 6120; GFX6-NEXT: v_mov_b32_e32 v2, s20 6121; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 6122; GFX6-NEXT: s_waitcnt expcnt(0) 6123; GFX6-NEXT: v_mov_b32_e32 v0, s4 6124; GFX6-NEXT: v_mov_b32_e32 v2, s19 6125; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 6126; GFX6-NEXT: s_waitcnt expcnt(0) 6127; GFX6-NEXT: v_mov_b32_e32 v0, s35 6128; GFX6-NEXT: v_mov_b32_e32 v2, s18 6129; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208 6130; GFX6-NEXT: s_waitcnt expcnt(0) 6131; GFX6-NEXT: v_mov_b32_e32 v0, s34 6132; GFX6-NEXT: v_mov_b32_e32 v2, s17 6133; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192 6134; GFX6-NEXT: s_waitcnt expcnt(0) 6135; GFX6-NEXT: v_mov_b32_e32 v0, s33 6136; GFX6-NEXT: v_mov_b32_e32 v2, s16 6137; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176 6138; GFX6-NEXT: s_waitcnt expcnt(0) 6139; GFX6-NEXT: v_mov_b32_e32 v0, s31 6140; GFX6-NEXT: v_mov_b32_e32 v2, s15 6141; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 6142; GFX6-NEXT: s_waitcnt expcnt(0) 6143; GFX6-NEXT: v_mov_b32_e32 v0, s30 6144; GFX6-NEXT: v_mov_b32_e32 v2, s14 6145; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 6146; GFX6-NEXT: s_waitcnt expcnt(0) 6147; GFX6-NEXT: v_mov_b32_e32 v0, s29 6148; GFX6-NEXT: v_mov_b32_e32 v2, s13 6149; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 6150; GFX6-NEXT: s_waitcnt expcnt(0) 6151; GFX6-NEXT: v_mov_b32_e32 v0, s28 6152; GFX6-NEXT: v_mov_b32_e32 v2, s12 6153; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 6154; GFX6-NEXT: s_waitcnt expcnt(0) 6155; GFX6-NEXT: v_mov_b32_e32 v0, s27 6156; GFX6-NEXT: v_mov_b32_e32 v2, s11 6157; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 6158; GFX6-NEXT: s_waitcnt expcnt(0) 6159; GFX6-NEXT: v_mov_b32_e32 v0, s26 6160; GFX6-NEXT: v_mov_b32_e32 v2, s10 6161; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 6162; GFX6-NEXT: s_waitcnt expcnt(0) 6163; GFX6-NEXT: v_mov_b32_e32 v0, s25 6164; GFX6-NEXT: v_mov_b32_e32 v2, s9 6165; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 6166; GFX6-NEXT: s_waitcnt expcnt(0) 6167; GFX6-NEXT: v_mov_b32_e32 v0, s24 6168; GFX6-NEXT: v_mov_b32_e32 v2, s8 6169; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 6170; GFX6-NEXT: s_waitcnt expcnt(0) 6171; GFX6-NEXT: v_mov_b32_e32 v0, s23 6172; GFX6-NEXT: v_mov_b32_e32 v2, s7 6173; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 6174; GFX6-NEXT: s_waitcnt expcnt(0) 6175; GFX6-NEXT: v_mov_b32_e32 v0, s22 6176; GFX6-NEXT: v_mov_b32_e32 v2, s6 6177; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 6178; GFX6-NEXT: s_waitcnt expcnt(0) 6179; GFX6-NEXT: v_mov_b32_e32 v0, s21 6180; GFX6-NEXT: v_mov_b32_e32 v2, s5 6181; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 6182; GFX6-NEXT: s_endpgm 6183; 6184; GFX8-LABEL: constant_zextload_v32i1_to_v32i64: 6185; GFX8: ; %bb.0: 6186; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 6187; GFX8-NEXT: v_mov_b32_e32 v1, 0 6188; GFX8-NEXT: v_mov_b32_e32 v3, v1 6189; GFX8-NEXT: s_waitcnt lgkmcnt(0) 6190; GFX8-NEXT: s_load_dword s6, s[2:3], 0x0 6191; GFX8-NEXT: s_waitcnt lgkmcnt(0) 6192; GFX8-NEXT: s_lshr_b32 s7, s6, 31 6193; GFX8-NEXT: s_bfe_u32 s8, s6, 0x1001d 6194; GFX8-NEXT: s_bfe_u32 s9, s6, 0x1001b 6195; GFX8-NEXT: s_bfe_u32 s10, s6, 0x10019 6196; GFX8-NEXT: s_bfe_u32 s11, s6, 0x10017 6197; GFX8-NEXT: s_bfe_u32 s12, s6, 0x10013 6198; GFX8-NEXT: s_bfe_u32 s13, s6, 0x10011 6199; GFX8-NEXT: s_bfe_u32 s14, s6, 0x1000f 6200; GFX8-NEXT: s_bfe_u32 s15, s6, 0x1000d 6201; GFX8-NEXT: s_bfe_u32 s16, s6, 0x1000b 6202; GFX8-NEXT: s_bfe_u32 s17, s6, 0x10009 6203; GFX8-NEXT: s_bfe_u32 s18, s6, 0x10007 6204; GFX8-NEXT: s_bfe_u32 s19, s6, 0x10005 6205; GFX8-NEXT: s_bfe_u32 s4, s6, 0x10003 6206; GFX8-NEXT: s_bfe_u32 s2, s6, 0x10001 6207; GFX8-NEXT: s_and_b32 s3, s6, 1 6208; GFX8-NEXT: s_bfe_u32 s5, s6, 0x10002 6209; GFX8-NEXT: s_bfe_u32 s20, s6, 0x10004 6210; GFX8-NEXT: s_bfe_u32 s21, s6, 0x10006 6211; GFX8-NEXT: s_bfe_u32 s22, s6, 0x10008 6212; GFX8-NEXT: s_bfe_u32 s23, s6, 0x1000a 6213; GFX8-NEXT: s_bfe_u32 s24, s6, 0x1000c 6214; GFX8-NEXT: s_bfe_u32 s25, s6, 0x1000e 6215; GFX8-NEXT: s_bfe_u32 s26, s6, 0x10010 6216; GFX8-NEXT: s_bfe_u32 s27, s6, 0x10012 6217; GFX8-NEXT: s_bfe_u32 s28, s6, 0x10014 6218; GFX8-NEXT: s_bfe_u32 s29, s6, 0x10015 6219; GFX8-NEXT: s_bfe_u32 s30, s6, 0x10016 6220; GFX8-NEXT: s_bfe_u32 s31, s6, 0x10018 6221; GFX8-NEXT: s_bfe_u32 s33, s6, 0x1001a 6222; GFX8-NEXT: s_bfe_u32 s34, s6, 0x1001c 6223; GFX8-NEXT: s_bfe_u32 s6, s6, 0x1001e 6224; GFX8-NEXT: v_mov_b32_e32 v0, s6 6225; GFX8-NEXT: s_add_u32 s6, s0, 0xf0 6226; GFX8-NEXT: v_mov_b32_e32 v2, s7 6227; GFX8-NEXT: s_addc_u32 s7, s1, 0 6228; GFX8-NEXT: v_mov_b32_e32 v4, s6 6229; GFX8-NEXT: v_mov_b32_e32 v5, s7 6230; GFX8-NEXT: s_add_u32 s6, s0, 0xe0 6231; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6232; GFX8-NEXT: s_addc_u32 s7, s1, 0 6233; GFX8-NEXT: v_mov_b32_e32 v4, s6 6234; GFX8-NEXT: v_mov_b32_e32 v0, s34 6235; GFX8-NEXT: v_mov_b32_e32 v2, s8 6236; GFX8-NEXT: v_mov_b32_e32 v5, s7 6237; GFX8-NEXT: s_add_u32 s6, s0, 0xd0 6238; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6239; GFX8-NEXT: s_addc_u32 s7, s1, 0 6240; GFX8-NEXT: v_mov_b32_e32 v4, s6 6241; GFX8-NEXT: v_mov_b32_e32 v0, s33 6242; GFX8-NEXT: v_mov_b32_e32 v2, s9 6243; GFX8-NEXT: v_mov_b32_e32 v5, s7 6244; GFX8-NEXT: s_add_u32 s6, s0, 0xc0 6245; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6246; GFX8-NEXT: s_addc_u32 s7, s1, 0 6247; GFX8-NEXT: v_mov_b32_e32 v4, s6 6248; GFX8-NEXT: v_mov_b32_e32 v0, s31 6249; GFX8-NEXT: v_mov_b32_e32 v2, s10 6250; GFX8-NEXT: v_mov_b32_e32 v5, s7 6251; GFX8-NEXT: s_add_u32 s6, s0, 0xb0 6252; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6253; GFX8-NEXT: s_addc_u32 s7, s1, 0 6254; GFX8-NEXT: v_mov_b32_e32 v4, s6 6255; GFX8-NEXT: v_mov_b32_e32 v0, s30 6256; GFX8-NEXT: v_mov_b32_e32 v2, s11 6257; GFX8-NEXT: v_mov_b32_e32 v5, s7 6258; GFX8-NEXT: s_add_u32 s6, s0, 0xa0 6259; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6260; GFX8-NEXT: s_addc_u32 s7, s1, 0 6261; GFX8-NEXT: v_mov_b32_e32 v4, s6 6262; GFX8-NEXT: v_mov_b32_e32 v0, s28 6263; GFX8-NEXT: v_mov_b32_e32 v2, s29 6264; GFX8-NEXT: v_mov_b32_e32 v5, s7 6265; GFX8-NEXT: s_add_u32 s6, s0, 0x90 6266; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6267; GFX8-NEXT: s_addc_u32 s7, s1, 0 6268; GFX8-NEXT: v_mov_b32_e32 v4, s6 6269; GFX8-NEXT: v_mov_b32_e32 v0, s27 6270; GFX8-NEXT: v_mov_b32_e32 v2, s12 6271; GFX8-NEXT: v_mov_b32_e32 v5, s7 6272; GFX8-NEXT: s_add_u32 s6, s0, 0x80 6273; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6274; GFX8-NEXT: s_addc_u32 s7, s1, 0 6275; GFX8-NEXT: v_mov_b32_e32 v4, s6 6276; GFX8-NEXT: v_mov_b32_e32 v0, s26 6277; GFX8-NEXT: v_mov_b32_e32 v2, s13 6278; GFX8-NEXT: v_mov_b32_e32 v5, s7 6279; GFX8-NEXT: s_add_u32 s6, s0, 0x70 6280; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6281; GFX8-NEXT: s_addc_u32 s7, s1, 0 6282; GFX8-NEXT: v_mov_b32_e32 v4, s6 6283; GFX8-NEXT: v_mov_b32_e32 v0, s25 6284; GFX8-NEXT: v_mov_b32_e32 v2, s14 6285; GFX8-NEXT: v_mov_b32_e32 v5, s7 6286; GFX8-NEXT: s_add_u32 s6, s0, 0x60 6287; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6288; GFX8-NEXT: s_addc_u32 s7, s1, 0 6289; GFX8-NEXT: v_mov_b32_e32 v4, s6 6290; GFX8-NEXT: v_mov_b32_e32 v0, s24 6291; GFX8-NEXT: v_mov_b32_e32 v2, s15 6292; GFX8-NEXT: v_mov_b32_e32 v5, s7 6293; GFX8-NEXT: s_add_u32 s6, s0, 0x50 6294; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6295; GFX8-NEXT: s_addc_u32 s7, s1, 0 6296; GFX8-NEXT: v_mov_b32_e32 v4, s6 6297; GFX8-NEXT: v_mov_b32_e32 v0, s23 6298; GFX8-NEXT: v_mov_b32_e32 v2, s16 6299; GFX8-NEXT: v_mov_b32_e32 v5, s7 6300; GFX8-NEXT: s_add_u32 s6, s0, 64 6301; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6302; GFX8-NEXT: s_addc_u32 s7, s1, 0 6303; GFX8-NEXT: v_mov_b32_e32 v4, s6 6304; GFX8-NEXT: v_mov_b32_e32 v0, s22 6305; GFX8-NEXT: v_mov_b32_e32 v2, s17 6306; GFX8-NEXT: v_mov_b32_e32 v5, s7 6307; GFX8-NEXT: s_add_u32 s6, s0, 48 6308; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6309; GFX8-NEXT: s_addc_u32 s7, s1, 0 6310; GFX8-NEXT: v_mov_b32_e32 v4, s6 6311; GFX8-NEXT: v_mov_b32_e32 v0, s21 6312; GFX8-NEXT: v_mov_b32_e32 v2, s18 6313; GFX8-NEXT: v_mov_b32_e32 v5, s7 6314; GFX8-NEXT: s_add_u32 s6, s0, 32 6315; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6316; GFX8-NEXT: s_addc_u32 s7, s1, 0 6317; GFX8-NEXT: v_mov_b32_e32 v4, s6 6318; GFX8-NEXT: v_mov_b32_e32 v0, s20 6319; GFX8-NEXT: v_mov_b32_e32 v2, s19 6320; GFX8-NEXT: v_mov_b32_e32 v5, s7 6321; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6322; GFX8-NEXT: s_nop 0 6323; GFX8-NEXT: v_mov_b32_e32 v2, s4 6324; GFX8-NEXT: s_add_u32 s4, s0, 16 6325; GFX8-NEXT: v_mov_b32_e32 v0, s5 6326; GFX8-NEXT: s_addc_u32 s5, s1, 0 6327; GFX8-NEXT: v_mov_b32_e32 v4, s4 6328; GFX8-NEXT: v_mov_b32_e32 v5, s5 6329; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6330; GFX8-NEXT: v_mov_b32_e32 v5, s1 6331; GFX8-NEXT: v_mov_b32_e32 v0, s3 6332; GFX8-NEXT: v_mov_b32_e32 v2, s2 6333; GFX8-NEXT: v_mov_b32_e32 v4, s0 6334; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6335; GFX8-NEXT: s_endpgm 6336; 6337; EG-LABEL: constant_zextload_v32i1_to_v32i64: 6338; EG: ; %bb.0: 6339; EG-NEXT: ALU 0, @24, KC0[CB0:0-32], KC1[] 6340; EG-NEXT: TEX 0 @22 6341; EG-NEXT: ALU 96, @25, KC0[CB0:0-32], KC1[] 6342; EG-NEXT: ALU 30, @122, KC0[CB0:0-32], KC1[] 6343; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T42.X, 0 6344; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T41.X, 0 6345; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T40.X, 0 6346; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T39.X, 0 6347; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T38.X, 0 6348; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T37.X, 0 6349; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T36.X, 0 6350; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T35.X, 0 6351; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T34.X, 0 6352; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T33.X, 0 6353; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T32.X, 0 6354; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T31.X, 0 6355; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T30.X, 0 6356; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T29.X, 0 6357; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T28.X, 0 6358; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T27.X, 1 6359; EG-NEXT: CF_END 6360; EG-NEXT: PAD 6361; EG-NEXT: Fetch clause starting at 22: 6362; EG-NEXT: VTX_READ_32 T11.X, T11.X, 0, #1 6363; EG-NEXT: ALU clause starting at 24: 6364; EG-NEXT: MOV * T11.X, KC0[2].Z, 6365; EG-NEXT: ALU clause starting at 25: 6366; EG-NEXT: LSHR * T12.Z, T11.X, literal.x, 6367; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6368; EG-NEXT: BFE_UINT T12.X, T11.X, literal.x, 1, 6369; EG-NEXT: MOV T12.Y, 0.0, 6370; EG-NEXT: BFE_UINT * T13.Z, T11.X, literal.y, 1, 6371; EG-NEXT: 30(4.203895e-44), 29(4.063766e-44) 6372; EG-NEXT: BFE_UINT T13.X, T11.X, literal.x, 1, 6373; EG-NEXT: MOV T13.Y, 0.0, 6374; EG-NEXT: BFE_UINT * T14.Z, T11.X, literal.y, 1, 6375; EG-NEXT: 28(3.923636e-44), 27(3.783506e-44) 6376; EG-NEXT: BFE_UINT T14.X, T11.X, literal.x, 1, 6377; EG-NEXT: MOV T14.Y, 0.0, 6378; EG-NEXT: BFE_UINT * T15.Z, T11.X, literal.y, 1, 6379; EG-NEXT: 26(3.643376e-44), 25(3.503246e-44) 6380; EG-NEXT: BFE_UINT T15.X, T11.X, literal.x, 1, 6381; EG-NEXT: MOV T15.Y, 0.0, 6382; EG-NEXT: BFE_UINT * T16.Z, T11.X, literal.y, 1, 6383; EG-NEXT: 24(3.363116e-44), 23(3.222986e-44) 6384; EG-NEXT: BFE_UINT T16.X, T11.X, literal.x, 1, 6385; EG-NEXT: MOV T16.Y, 0.0, 6386; EG-NEXT: BFE_UINT * T17.Z, T11.X, literal.y, 1, 6387; EG-NEXT: 22(3.082857e-44), 21(2.942727e-44) 6388; EG-NEXT: BFE_UINT T17.X, T11.X, literal.x, 1, 6389; EG-NEXT: MOV T17.Y, 0.0, 6390; EG-NEXT: BFE_UINT * T18.Z, T11.X, literal.y, 1, 6391; EG-NEXT: 20(2.802597e-44), 19(2.662467e-44) 6392; EG-NEXT: BFE_UINT T18.X, T11.X, literal.x, 1, 6393; EG-NEXT: MOV T18.Y, 0.0, 6394; EG-NEXT: BFE_UINT * T19.Z, T11.X, literal.y, 1, 6395; EG-NEXT: 18(2.522337e-44), 17(2.382207e-44) 6396; EG-NEXT: BFE_UINT T19.X, T11.X, literal.x, 1, 6397; EG-NEXT: MOV T19.Y, 0.0, 6398; EG-NEXT: BFE_UINT * T20.Z, T11.X, literal.y, 1, 6399; EG-NEXT: 16(2.242078e-44), 15(2.101948e-44) 6400; EG-NEXT: BFE_UINT T20.X, T11.X, literal.x, 1, 6401; EG-NEXT: MOV T20.Y, 0.0, 6402; EG-NEXT: BFE_UINT * T21.Z, T11.X, literal.y, 1, 6403; EG-NEXT: 14(1.961818e-44), 13(1.821688e-44) 6404; EG-NEXT: BFE_UINT T21.X, T11.X, literal.x, 1, 6405; EG-NEXT: MOV T21.Y, 0.0, 6406; EG-NEXT: BFE_UINT * T22.Z, T11.X, literal.y, 1, 6407; EG-NEXT: 12(1.681558e-44), 11(1.541428e-44) 6408; EG-NEXT: BFE_UINT T22.X, T11.X, literal.x, 1, 6409; EG-NEXT: MOV T22.Y, 0.0, 6410; EG-NEXT: BFE_UINT * T23.Z, T11.X, literal.y, 1, 6411; EG-NEXT: 10(1.401298e-44), 9(1.261169e-44) 6412; EG-NEXT: BFE_UINT T23.X, T11.X, literal.x, 1, 6413; EG-NEXT: MOV T23.Y, 0.0, 6414; EG-NEXT: BFE_UINT * T24.Z, T11.X, literal.y, 1, 6415; EG-NEXT: 8(1.121039e-44), 7(9.809089e-45) 6416; EG-NEXT: BFE_UINT T24.X, T11.X, literal.x, 1, 6417; EG-NEXT: MOV T24.Y, 0.0, 6418; EG-NEXT: BFE_UINT * T25.Z, T11.X, literal.y, 1, 6419; EG-NEXT: 6(8.407791e-45), 5(7.006492e-45) 6420; EG-NEXT: BFE_UINT T25.X, T11.X, literal.x, 1, 6421; EG-NEXT: MOV T25.Y, 0.0, 6422; EG-NEXT: BFE_UINT * T26.Z, T11.X, literal.y, 1, 6423; EG-NEXT: 4(5.605194e-45), 3(4.203895e-45) 6424; EG-NEXT: BFE_UINT T26.X, T11.X, literal.x, 1, 6425; EG-NEXT: MOV T26.Y, 0.0, 6426; EG-NEXT: BFE_UINT T11.Z, T11.X, 1, 1, 6427; EG-NEXT: AND_INT * T11.X, T11.X, 1, 6428; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6429; EG-NEXT: MOV T11.Y, 0.0, 6430; EG-NEXT: MOV T12.W, 0.0, 6431; EG-NEXT: MOV * T13.W, 0.0, 6432; EG-NEXT: MOV T14.W, 0.0, 6433; EG-NEXT: MOV * T15.W, 0.0, 6434; EG-NEXT: MOV T16.W, 0.0, 6435; EG-NEXT: MOV * T17.W, 0.0, 6436; EG-NEXT: MOV T18.W, 0.0, 6437; EG-NEXT: MOV * T19.W, 0.0, 6438; EG-NEXT: MOV T20.W, 0.0, 6439; EG-NEXT: MOV * T21.W, 0.0, 6440; EG-NEXT: MOV T22.W, 0.0, 6441; EG-NEXT: MOV * T23.W, 0.0, 6442; EG-NEXT: MOV T24.W, 0.0, 6443; EG-NEXT: MOV * T25.W, 0.0, 6444; EG-NEXT: MOV T26.W, 0.0, 6445; EG-NEXT: MOV * T11.W, 0.0, 6446; EG-NEXT: LSHR T27.X, KC0[2].Y, literal.x, 6447; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6448; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6449; EG-NEXT: LSHR T28.X, PV.W, literal.x, 6450; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6451; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6452; EG-NEXT: LSHR T29.X, PV.W, literal.x, 6453; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6454; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 6455; EG-NEXT: LSHR T30.X, PV.W, literal.x, 6456; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6457; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 6458; EG-NEXT: LSHR T31.X, PV.W, literal.x, 6459; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6460; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 6461; EG-NEXT: LSHR * T32.X, PV.W, literal.x, 6462; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6463; EG-NEXT: ALU clause starting at 122: 6464; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 6465; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) 6466; EG-NEXT: LSHR T33.X, PV.W, literal.x, 6467; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6468; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 6469; EG-NEXT: LSHR T34.X, PV.W, literal.x, 6470; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6471; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 6472; EG-NEXT: LSHR T35.X, PV.W, literal.x, 6473; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6474; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 6475; EG-NEXT: LSHR T36.X, PV.W, literal.x, 6476; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6477; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 6478; EG-NEXT: LSHR T37.X, PV.W, literal.x, 6479; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6480; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 6481; EG-NEXT: LSHR T38.X, PV.W, literal.x, 6482; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6483; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43) 6484; EG-NEXT: LSHR T39.X, PV.W, literal.x, 6485; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6486; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43) 6487; EG-NEXT: LSHR T40.X, PV.W, literal.x, 6488; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6489; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43) 6490; EG-NEXT: LSHR T41.X, PV.W, literal.x, 6491; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6492; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43) 6493; EG-NEXT: LSHR * T42.X, PV.W, literal.x, 6494; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6495; 6496; GFX12-LABEL: constant_zextload_v32i1_to_v32i64: 6497; GFX12: ; %bb.0: 6498; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 6499; GFX12-NEXT: s_wait_kmcnt 0x0 6500; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0 6501; GFX12-NEXT: s_wait_kmcnt 0x0 6502; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1001e 6503; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) 6504; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s3 6505; GFX12-NEXT: s_lshr_b32 s4, s2, 31 6506; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1001d 6507; GFX12-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, v1 6508; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1001c 6509; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:240 6510; GFX12-NEXT: s_wait_alu 0xfffe 6511; GFX12-NEXT: v_mov_b32_e32 v0, s4 6512; GFX12-NEXT: v_mov_b32_e32 v2, s3 6513; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1001b 6514; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1001a 6515; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:224 6516; GFX12-NEXT: s_wait_alu 0xfffe 6517; GFX12-NEXT: v_mov_b32_e32 v0, s4 6518; GFX12-NEXT: v_mov_b32_e32 v2, s3 6519; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10019 6520; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10018 6521; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:208 6522; GFX12-NEXT: s_wait_alu 0xfffe 6523; GFX12-NEXT: v_mov_b32_e32 v0, s4 6524; GFX12-NEXT: v_mov_b32_e32 v2, s3 6525; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10017 6526; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10016 6527; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:192 6528; GFX12-NEXT: s_wait_alu 0xfffe 6529; GFX12-NEXT: v_mov_b32_e32 v0, s4 6530; GFX12-NEXT: v_mov_b32_e32 v2, s3 6531; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10014 6532; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10015 6533; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:176 6534; GFX12-NEXT: s_wait_alu 0xfffe 6535; GFX12-NEXT: v_mov_b32_e32 v0, s3 6536; GFX12-NEXT: v_mov_b32_e32 v2, s4 6537; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10013 6538; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10012 6539; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:160 6540; GFX12-NEXT: s_wait_alu 0xfffe 6541; GFX12-NEXT: v_mov_b32_e32 v0, s4 6542; GFX12-NEXT: v_mov_b32_e32 v2, s3 6543; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10011 6544; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10010 6545; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:144 6546; GFX12-NEXT: s_wait_alu 0xfffe 6547; GFX12-NEXT: v_mov_b32_e32 v0, s4 6548; GFX12-NEXT: v_mov_b32_e32 v2, s3 6549; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000f 6550; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000e 6551; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:128 6552; GFX12-NEXT: s_wait_alu 0xfffe 6553; GFX12-NEXT: v_mov_b32_e32 v0, s4 6554; GFX12-NEXT: v_mov_b32_e32 v2, s3 6555; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000d 6556; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000c 6557; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:112 6558; GFX12-NEXT: s_wait_alu 0xfffe 6559; GFX12-NEXT: v_mov_b32_e32 v0, s4 6560; GFX12-NEXT: v_mov_b32_e32 v2, s3 6561; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000b 6562; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000a 6563; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:96 6564; GFX12-NEXT: s_wait_alu 0xfffe 6565; GFX12-NEXT: v_mov_b32_e32 v0, s4 6566; GFX12-NEXT: v_mov_b32_e32 v2, s3 6567; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10009 6568; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10008 6569; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:80 6570; GFX12-NEXT: s_wait_alu 0xfffe 6571; GFX12-NEXT: v_mov_b32_e32 v0, s4 6572; GFX12-NEXT: v_mov_b32_e32 v2, s3 6573; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10007 6574; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10006 6575; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:64 6576; GFX12-NEXT: s_wait_alu 0xfffe 6577; GFX12-NEXT: v_mov_b32_e32 v0, s4 6578; GFX12-NEXT: v_mov_b32_e32 v2, s3 6579; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10005 6580; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10004 6581; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:48 6582; GFX12-NEXT: s_wait_alu 0xfffe 6583; GFX12-NEXT: v_mov_b32_e32 v0, s4 6584; GFX12-NEXT: v_mov_b32_e32 v2, s3 6585; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10003 6586; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10002 6587; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:32 6588; GFX12-NEXT: s_wait_alu 0xfffe 6589; GFX12-NEXT: v_mov_b32_e32 v0, s4 6590; GFX12-NEXT: v_mov_b32_e32 v2, s3 6591; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10001 6592; GFX12-NEXT: s_and_b32 s2, s2, 1 6593; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16 6594; GFX12-NEXT: s_wait_alu 0xfffe 6595; GFX12-NEXT: v_mov_b32_e32 v0, s2 6596; GFX12-NEXT: v_mov_b32_e32 v2, s3 6597; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] 6598; GFX12-NEXT: s_endpgm 6599 %load = load <32 x i1>, ptr addrspace(4) %in 6600 %ext = zext <32 x i1> %load to <32 x i64> 6601 store <32 x i64> %ext, ptr addrspace(1) %out 6602 ret void 6603} 6604 6605define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 6606; GFX6-LABEL: constant_sextload_v32i1_to_v32i64: 6607; GFX6: ; %bb.0: 6608; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 6609; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6610; GFX6-NEXT: s_load_dword s4, s[2:3], 0x0 6611; GFX6-NEXT: s_mov_b32 s3, 0xf000 6612; GFX6-NEXT: s_mov_b32 s2, -1 6613; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6614; GFX6-NEXT: s_lshr_b32 s38, s4, 30 6615; GFX6-NEXT: s_lshr_b32 s40, s4, 31 6616; GFX6-NEXT: s_lshr_b32 s34, s4, 28 6617; GFX6-NEXT: s_lshr_b32 s36, s4, 29 6618; GFX6-NEXT: s_lshr_b32 s28, s4, 26 6619; GFX6-NEXT: s_lshr_b32 s30, s4, 27 6620; GFX6-NEXT: s_lshr_b32 s24, s4, 24 6621; GFX6-NEXT: s_lshr_b32 s26, s4, 25 6622; GFX6-NEXT: s_lshr_b32 s20, s4, 22 6623; GFX6-NEXT: s_lshr_b32 s22, s4, 23 6624; GFX6-NEXT: s_lshr_b32 s18, s4, 20 6625; GFX6-NEXT: s_lshr_b32 s6, s4, 21 6626; GFX6-NEXT: s_lshr_b32 s8, s4, 18 6627; GFX6-NEXT: s_lshr_b32 s10, s4, 19 6628; GFX6-NEXT: s_lshr_b32 s12, s4, 16 6629; GFX6-NEXT: s_lshr_b32 s14, s4, 17 6630; GFX6-NEXT: s_lshr_b32 s16, s4, 14 6631; GFX6-NEXT: s_bfe_i64 s[44:45], s[4:5], 0x10000 6632; GFX6-NEXT: s_lshr_b32 s42, s4, 15 6633; GFX6-NEXT: v_mov_b32_e32 v0, s44 6634; GFX6-NEXT: v_mov_b32_e32 v1, s45 6635; GFX6-NEXT: s_lshr_b32 s44, s4, 12 6636; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000 6637; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000 6638; GFX6-NEXT: v_mov_b32_e32 v2, s38 6639; GFX6-NEXT: v_mov_b32_e32 v3, s39 6640; GFX6-NEXT: s_lshr_b32 s38, s4, 13 6641; GFX6-NEXT: v_mov_b32_e32 v4, s40 6642; GFX6-NEXT: v_mov_b32_e32 v5, s41 6643; GFX6-NEXT: s_lshr_b32 s40, s4, 10 6644; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000 6645; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000 6646; GFX6-NEXT: v_mov_b32_e32 v6, s34 6647; GFX6-NEXT: v_mov_b32_e32 v7, s35 6648; GFX6-NEXT: s_lshr_b32 s34, s4, 11 6649; GFX6-NEXT: v_mov_b32_e32 v8, s36 6650; GFX6-NEXT: v_mov_b32_e32 v9, s37 6651; GFX6-NEXT: s_lshr_b32 s36, s4, 8 6652; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000 6653; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 6654; GFX6-NEXT: v_mov_b32_e32 v10, s28 6655; GFX6-NEXT: v_mov_b32_e32 v11, s29 6656; GFX6-NEXT: s_lshr_b32 s28, s4, 9 6657; GFX6-NEXT: v_mov_b32_e32 v12, s30 6658; GFX6-NEXT: v_mov_b32_e32 v13, s31 6659; GFX6-NEXT: s_lshr_b32 s30, s4, 6 6660; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000 6661; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000 6662; GFX6-NEXT: v_mov_b32_e32 v14, s24 6663; GFX6-NEXT: v_mov_b32_e32 v15, s25 6664; GFX6-NEXT: s_lshr_b32 s24, s4, 7 6665; GFX6-NEXT: v_mov_b32_e32 v16, s26 6666; GFX6-NEXT: v_mov_b32_e32 v17, s27 6667; GFX6-NEXT: s_lshr_b32 s26, s4, 4 6668; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000 6669; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000 6670; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:240 6671; GFX6-NEXT: s_waitcnt expcnt(0) 6672; GFX6-NEXT: v_mov_b32_e32 v2, s20 6673; GFX6-NEXT: v_mov_b32_e32 v3, s21 6674; GFX6-NEXT: s_lshr_b32 s20, s4, 5 6675; GFX6-NEXT: v_mov_b32_e32 v4, s22 6676; GFX6-NEXT: v_mov_b32_e32 v5, s23 6677; GFX6-NEXT: s_lshr_b32 s22, s4, 2 6678; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000 6679; GFX6-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:224 6680; GFX6-NEXT: s_waitcnt expcnt(0) 6681; GFX6-NEXT: v_mov_b32_e32 v6, s18 6682; GFX6-NEXT: v_mov_b32_e32 v7, s19 6683; GFX6-NEXT: s_lshr_b32 s18, s4, 3 6684; GFX6-NEXT: s_lshr_b32 s4, s4, 1 6685; GFX6-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000 6686; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000 6687; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000 6688; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000 6689; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000 6690; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000 6691; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 6692; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000 6693; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000 6694; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000 6695; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000 6696; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000 6697; GFX6-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000 6698; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000 6699; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000 6700; GFX6-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000 6701; GFX6-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000 6702; GFX6-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000 6703; GFX6-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000 6704; GFX6-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000 6705; GFX6-NEXT: buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:208 6706; GFX6-NEXT: buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:192 6707; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:176 6708; GFX6-NEXT: v_mov_b32_e32 v8, s6 6709; GFX6-NEXT: v_mov_b32_e32 v9, s7 6710; GFX6-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:160 6711; GFX6-NEXT: s_waitcnt expcnt(1) 6712; GFX6-NEXT: v_mov_b32_e32 v2, s8 6713; GFX6-NEXT: v_mov_b32_e32 v3, s9 6714; GFX6-NEXT: v_mov_b32_e32 v4, s10 6715; GFX6-NEXT: v_mov_b32_e32 v5, s11 6716; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:144 6717; GFX6-NEXT: s_waitcnt expcnt(0) 6718; GFX6-NEXT: v_mov_b32_e32 v2, s12 6719; GFX6-NEXT: v_mov_b32_e32 v3, s13 6720; GFX6-NEXT: v_mov_b32_e32 v4, s14 6721; GFX6-NEXT: v_mov_b32_e32 v5, s15 6722; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:128 6723; GFX6-NEXT: s_waitcnt expcnt(0) 6724; GFX6-NEXT: v_mov_b32_e32 v2, s16 6725; GFX6-NEXT: v_mov_b32_e32 v3, s17 6726; GFX6-NEXT: v_mov_b32_e32 v4, s42 6727; GFX6-NEXT: v_mov_b32_e32 v5, s43 6728; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:112 6729; GFX6-NEXT: s_waitcnt expcnt(0) 6730; GFX6-NEXT: v_mov_b32_e32 v2, s44 6731; GFX6-NEXT: v_mov_b32_e32 v3, s45 6732; GFX6-NEXT: v_mov_b32_e32 v4, s38 6733; GFX6-NEXT: v_mov_b32_e32 v5, s39 6734; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:96 6735; GFX6-NEXT: s_waitcnt expcnt(0) 6736; GFX6-NEXT: v_mov_b32_e32 v2, s40 6737; GFX6-NEXT: v_mov_b32_e32 v3, s41 6738; GFX6-NEXT: v_mov_b32_e32 v4, s34 6739; GFX6-NEXT: v_mov_b32_e32 v5, s35 6740; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:80 6741; GFX6-NEXT: s_waitcnt expcnt(0) 6742; GFX6-NEXT: v_mov_b32_e32 v2, s36 6743; GFX6-NEXT: v_mov_b32_e32 v3, s37 6744; GFX6-NEXT: v_mov_b32_e32 v4, s28 6745; GFX6-NEXT: v_mov_b32_e32 v5, s29 6746; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:64 6747; GFX6-NEXT: s_waitcnt expcnt(0) 6748; GFX6-NEXT: v_mov_b32_e32 v2, s30 6749; GFX6-NEXT: v_mov_b32_e32 v3, s31 6750; GFX6-NEXT: v_mov_b32_e32 v4, s24 6751; GFX6-NEXT: v_mov_b32_e32 v5, s25 6752; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:48 6753; GFX6-NEXT: s_waitcnt expcnt(0) 6754; GFX6-NEXT: v_mov_b32_e32 v2, s26 6755; GFX6-NEXT: v_mov_b32_e32 v3, s27 6756; GFX6-NEXT: v_mov_b32_e32 v4, s20 6757; GFX6-NEXT: v_mov_b32_e32 v5, s21 6758; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:32 6759; GFX6-NEXT: s_waitcnt expcnt(0) 6760; GFX6-NEXT: v_mov_b32_e32 v2, s22 6761; GFX6-NEXT: v_mov_b32_e32 v3, s23 6762; GFX6-NEXT: v_mov_b32_e32 v4, s18 6763; GFX6-NEXT: v_mov_b32_e32 v5, s19 6764; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:16 6765; GFX6-NEXT: s_waitcnt expcnt(0) 6766; GFX6-NEXT: v_mov_b32_e32 v2, s4 6767; GFX6-NEXT: v_mov_b32_e32 v3, s5 6768; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 6769; GFX6-NEXT: s_endpgm 6770; 6771; GFX8-LABEL: constant_sextload_v32i1_to_v32i64: 6772; GFX8: ; %bb.0: 6773; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 6774; GFX8-NEXT: s_waitcnt lgkmcnt(0) 6775; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0 6776; GFX8-NEXT: s_waitcnt lgkmcnt(0) 6777; GFX8-NEXT: s_lshr_b32 s44, s2, 30 6778; GFX8-NEXT: s_lshr_b32 s46, s2, 31 6779; GFX8-NEXT: s_lshr_b32 s48, s2, 28 6780; GFX8-NEXT: s_lshr_b32 s50, s2, 29 6781; GFX8-NEXT: s_lshr_b32 s52, s2, 26 6782; GFX8-NEXT: s_lshr_b32 s54, s2, 27 6783; GFX8-NEXT: s_lshr_b32 s56, s2, 24 6784; GFX8-NEXT: s_lshr_b32 s58, s2, 25 6785; GFX8-NEXT: s_lshr_b32 s60, s2, 22 6786; GFX8-NEXT: s_lshr_b32 s62, s2, 23 6787; GFX8-NEXT: s_lshr_b32 s64, s2, 20 6788; GFX8-NEXT: s_lshr_b32 s66, s2, 21 6789; GFX8-NEXT: s_lshr_b32 s42, s2, 18 6790; GFX8-NEXT: s_lshr_b32 s40, s2, 19 6791; GFX8-NEXT: s_lshr_b32 s38, s2, 16 6792; GFX8-NEXT: s_lshr_b32 s36, s2, 17 6793; GFX8-NEXT: s_lshr_b32 s34, s2, 14 6794; GFX8-NEXT: s_lshr_b32 s30, s2, 15 6795; GFX8-NEXT: s_lshr_b32 s28, s2, 12 6796; GFX8-NEXT: s_lshr_b32 s26, s2, 13 6797; GFX8-NEXT: s_lshr_b32 s24, s2, 10 6798; GFX8-NEXT: s_lshr_b32 s22, s2, 11 6799; GFX8-NEXT: s_lshr_b32 s20, s2, 8 6800; GFX8-NEXT: s_lshr_b32 s18, s2, 9 6801; GFX8-NEXT: s_lshr_b32 s16, s2, 6 6802; GFX8-NEXT: s_lshr_b32 s14, s2, 7 6803; GFX8-NEXT: s_lshr_b32 s12, s2, 4 6804; GFX8-NEXT: s_lshr_b32 s10, s2, 5 6805; GFX8-NEXT: s_lshr_b32 s8, s2, 2 6806; GFX8-NEXT: s_lshr_b32 s6, s2, 3 6807; GFX8-NEXT: s_lshr_b32 s68, s2, 1 6808; GFX8-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000 6809; GFX8-NEXT: s_bfe_i64 s[2:3], s[68:69], 0x10000 6810; GFX8-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000 6811; GFX8-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000 6812; GFX8-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000 6813; GFX8-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000 6814; GFX8-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000 6815; GFX8-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000 6816; GFX8-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000 6817; GFX8-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000 6818; GFX8-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000 6819; GFX8-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000 6820; GFX8-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000 6821; GFX8-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000 6822; GFX8-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 6823; GFX8-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000 6824; GFX8-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000 6825; GFX8-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000 6826; GFX8-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000 6827; GFX8-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000 6828; GFX8-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000 6829; GFX8-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000 6830; GFX8-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000 6831; GFX8-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000 6832; GFX8-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000 6833; GFX8-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000 6834; GFX8-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000 6835; GFX8-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000 6836; GFX8-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000 6837; GFX8-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000 6838; GFX8-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000 6839; GFX8-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000 6840; GFX8-NEXT: v_mov_b32_e32 v0, s44 6841; GFX8-NEXT: s_add_u32 s44, s0, 0xf0 6842; GFX8-NEXT: v_mov_b32_e32 v1, s45 6843; GFX8-NEXT: s_addc_u32 s45, s1, 0 6844; GFX8-NEXT: v_mov_b32_e32 v4, s44 6845; GFX8-NEXT: v_mov_b32_e32 v2, s46 6846; GFX8-NEXT: v_mov_b32_e32 v3, s47 6847; GFX8-NEXT: v_mov_b32_e32 v5, s45 6848; GFX8-NEXT: s_add_u32 s44, s0, 0xe0 6849; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6850; GFX8-NEXT: s_addc_u32 s45, s1, 0 6851; GFX8-NEXT: v_mov_b32_e32 v4, s44 6852; GFX8-NEXT: v_mov_b32_e32 v0, s48 6853; GFX8-NEXT: v_mov_b32_e32 v1, s49 6854; GFX8-NEXT: v_mov_b32_e32 v2, s50 6855; GFX8-NEXT: v_mov_b32_e32 v3, s51 6856; GFX8-NEXT: v_mov_b32_e32 v5, s45 6857; GFX8-NEXT: s_add_u32 s44, s0, 0xd0 6858; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6859; GFX8-NEXT: s_addc_u32 s45, s1, 0 6860; GFX8-NEXT: v_mov_b32_e32 v4, s44 6861; GFX8-NEXT: v_mov_b32_e32 v0, s52 6862; GFX8-NEXT: v_mov_b32_e32 v1, s53 6863; GFX8-NEXT: v_mov_b32_e32 v2, s54 6864; GFX8-NEXT: v_mov_b32_e32 v3, s55 6865; GFX8-NEXT: v_mov_b32_e32 v5, s45 6866; GFX8-NEXT: s_add_u32 s44, s0, 0xc0 6867; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6868; GFX8-NEXT: s_addc_u32 s45, s1, 0 6869; GFX8-NEXT: v_mov_b32_e32 v4, s44 6870; GFX8-NEXT: v_mov_b32_e32 v0, s56 6871; GFX8-NEXT: v_mov_b32_e32 v1, s57 6872; GFX8-NEXT: v_mov_b32_e32 v2, s58 6873; GFX8-NEXT: v_mov_b32_e32 v3, s59 6874; GFX8-NEXT: v_mov_b32_e32 v5, s45 6875; GFX8-NEXT: s_add_u32 s44, s0, 0xb0 6876; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6877; GFX8-NEXT: s_addc_u32 s45, s1, 0 6878; GFX8-NEXT: v_mov_b32_e32 v4, s44 6879; GFX8-NEXT: v_mov_b32_e32 v0, s60 6880; GFX8-NEXT: v_mov_b32_e32 v1, s61 6881; GFX8-NEXT: v_mov_b32_e32 v2, s62 6882; GFX8-NEXT: v_mov_b32_e32 v3, s63 6883; GFX8-NEXT: v_mov_b32_e32 v5, s45 6884; GFX8-NEXT: s_add_u32 s44, s0, 0xa0 6885; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6886; GFX8-NEXT: s_addc_u32 s45, s1, 0 6887; GFX8-NEXT: v_mov_b32_e32 v4, s44 6888; GFX8-NEXT: v_mov_b32_e32 v0, s64 6889; GFX8-NEXT: v_mov_b32_e32 v1, s65 6890; GFX8-NEXT: v_mov_b32_e32 v2, s66 6891; GFX8-NEXT: v_mov_b32_e32 v3, s67 6892; GFX8-NEXT: v_mov_b32_e32 v5, s45 6893; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6894; GFX8-NEXT: s_nop 0 6895; GFX8-NEXT: v_mov_b32_e32 v2, s40 6896; GFX8-NEXT: s_add_u32 s40, s0, 0x90 6897; GFX8-NEXT: v_mov_b32_e32 v3, s41 6898; GFX8-NEXT: s_addc_u32 s41, s1, 0 6899; GFX8-NEXT: v_mov_b32_e32 v4, s40 6900; GFX8-NEXT: v_mov_b32_e32 v0, s42 6901; GFX8-NEXT: v_mov_b32_e32 v1, s43 6902; GFX8-NEXT: v_mov_b32_e32 v5, s41 6903; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6904; GFX8-NEXT: s_nop 0 6905; GFX8-NEXT: v_mov_b32_e32 v2, s36 6906; GFX8-NEXT: s_add_u32 s36, s0, 0x80 6907; GFX8-NEXT: v_mov_b32_e32 v3, s37 6908; GFX8-NEXT: s_addc_u32 s37, s1, 0 6909; GFX8-NEXT: v_mov_b32_e32 v4, s36 6910; GFX8-NEXT: v_mov_b32_e32 v0, s38 6911; GFX8-NEXT: v_mov_b32_e32 v1, s39 6912; GFX8-NEXT: v_mov_b32_e32 v5, s37 6913; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6914; GFX8-NEXT: s_nop 0 6915; GFX8-NEXT: v_mov_b32_e32 v2, s30 6916; GFX8-NEXT: s_add_u32 s30, s0, 0x70 6917; GFX8-NEXT: v_mov_b32_e32 v3, s31 6918; GFX8-NEXT: s_addc_u32 s31, s1, 0 6919; GFX8-NEXT: v_mov_b32_e32 v4, s30 6920; GFX8-NEXT: v_mov_b32_e32 v0, s34 6921; GFX8-NEXT: v_mov_b32_e32 v1, s35 6922; GFX8-NEXT: v_mov_b32_e32 v5, s31 6923; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6924; GFX8-NEXT: s_nop 0 6925; GFX8-NEXT: v_mov_b32_e32 v2, s26 6926; GFX8-NEXT: s_add_u32 s26, s0, 0x60 6927; GFX8-NEXT: v_mov_b32_e32 v3, s27 6928; GFX8-NEXT: s_addc_u32 s27, s1, 0 6929; GFX8-NEXT: v_mov_b32_e32 v4, s26 6930; GFX8-NEXT: v_mov_b32_e32 v0, s28 6931; GFX8-NEXT: v_mov_b32_e32 v1, s29 6932; GFX8-NEXT: v_mov_b32_e32 v5, s27 6933; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6934; GFX8-NEXT: s_nop 0 6935; GFX8-NEXT: v_mov_b32_e32 v2, s22 6936; GFX8-NEXT: s_add_u32 s22, s0, 0x50 6937; GFX8-NEXT: v_mov_b32_e32 v3, s23 6938; GFX8-NEXT: s_addc_u32 s23, s1, 0 6939; GFX8-NEXT: v_mov_b32_e32 v4, s22 6940; GFX8-NEXT: v_mov_b32_e32 v0, s24 6941; GFX8-NEXT: v_mov_b32_e32 v1, s25 6942; GFX8-NEXT: v_mov_b32_e32 v5, s23 6943; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6944; GFX8-NEXT: s_nop 0 6945; GFX8-NEXT: v_mov_b32_e32 v2, s18 6946; GFX8-NEXT: s_add_u32 s18, s0, 64 6947; GFX8-NEXT: v_mov_b32_e32 v3, s19 6948; GFX8-NEXT: s_addc_u32 s19, s1, 0 6949; GFX8-NEXT: v_mov_b32_e32 v4, s18 6950; GFX8-NEXT: v_mov_b32_e32 v0, s20 6951; GFX8-NEXT: v_mov_b32_e32 v1, s21 6952; GFX8-NEXT: v_mov_b32_e32 v5, s19 6953; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6954; GFX8-NEXT: s_nop 0 6955; GFX8-NEXT: v_mov_b32_e32 v2, s14 6956; GFX8-NEXT: s_add_u32 s14, s0, 48 6957; GFX8-NEXT: v_mov_b32_e32 v3, s15 6958; GFX8-NEXT: s_addc_u32 s15, s1, 0 6959; GFX8-NEXT: v_mov_b32_e32 v4, s14 6960; GFX8-NEXT: v_mov_b32_e32 v0, s16 6961; GFX8-NEXT: v_mov_b32_e32 v1, s17 6962; GFX8-NEXT: v_mov_b32_e32 v5, s15 6963; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6964; GFX8-NEXT: s_nop 0 6965; GFX8-NEXT: v_mov_b32_e32 v2, s10 6966; GFX8-NEXT: s_add_u32 s10, s0, 32 6967; GFX8-NEXT: v_mov_b32_e32 v3, s11 6968; GFX8-NEXT: s_addc_u32 s11, s1, 0 6969; GFX8-NEXT: v_mov_b32_e32 v4, s10 6970; GFX8-NEXT: v_mov_b32_e32 v0, s12 6971; GFX8-NEXT: v_mov_b32_e32 v1, s13 6972; GFX8-NEXT: v_mov_b32_e32 v5, s11 6973; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6974; GFX8-NEXT: s_nop 0 6975; GFX8-NEXT: v_mov_b32_e32 v2, s6 6976; GFX8-NEXT: s_add_u32 s6, s0, 16 6977; GFX8-NEXT: v_mov_b32_e32 v3, s7 6978; GFX8-NEXT: s_addc_u32 s7, s1, 0 6979; GFX8-NEXT: v_mov_b32_e32 v4, s6 6980; GFX8-NEXT: v_mov_b32_e32 v0, s8 6981; GFX8-NEXT: v_mov_b32_e32 v1, s9 6982; GFX8-NEXT: v_mov_b32_e32 v5, s7 6983; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6984; GFX8-NEXT: v_mov_b32_e32 v5, s1 6985; GFX8-NEXT: v_mov_b32_e32 v0, s4 6986; GFX8-NEXT: v_mov_b32_e32 v1, s5 6987; GFX8-NEXT: v_mov_b32_e32 v2, s2 6988; GFX8-NEXT: v_mov_b32_e32 v3, s3 6989; GFX8-NEXT: v_mov_b32_e32 v4, s0 6990; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6991; GFX8-NEXT: s_endpgm 6992; 6993; EG-LABEL: constant_sextload_v32i1_to_v32i64: 6994; EG: ; %bb.0: 6995; EG-NEXT: ALU 0, @24, KC0[CB0:0-32], KC1[] 6996; EG-NEXT: TEX 0 @22 6997; EG-NEXT: ALU 92, @25, KC0[CB0:0-32], KC1[] 6998; EG-NEXT: ALU 65, @118, KC0[CB0:0-32], KC1[] 6999; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T42.X, 0 7000; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T41.X, 0 7001; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T34.X, 0 7002; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T24.X, 0 7003; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T23.X, 0 7004; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T22.X, 0 7005; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T21.X, 0 7006; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T20.X, 0 7007; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T19.X, 0 7008; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T18.X, 0 7009; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T17.X, 0 7010; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T16.X, 0 7011; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T15.X, 0 7012; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T14.X, 0 7013; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T13.X, 0 7014; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T12.X, 1 7015; EG-NEXT: CF_END 7016; EG-NEXT: PAD 7017; EG-NEXT: Fetch clause starting at 22: 7018; EG-NEXT: VTX_READ_32 T11.X, T11.X, 0, #1 7019; EG-NEXT: ALU clause starting at 24: 7020; EG-NEXT: MOV * T11.X, KC0[2].Z, 7021; EG-NEXT: ALU clause starting at 25: 7022; EG-NEXT: LSHR T12.X, KC0[2].Y, literal.x, 7023; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7024; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7025; EG-NEXT: LSHR T13.X, PV.W, literal.x, 7026; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7027; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 7028; EG-NEXT: LSHR T14.X, PV.W, literal.x, 7029; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7030; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 7031; EG-NEXT: LSHR T15.X, PV.W, literal.x, 7032; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7033; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 7034; EG-NEXT: LSHR T16.X, PV.W, literal.x, 7035; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7036; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 7037; EG-NEXT: LSHR T17.X, PV.W, literal.x, 7038; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7039; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 7040; EG-NEXT: LSHR T18.X, PV.W, literal.x, 7041; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7042; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 7043; EG-NEXT: LSHR T19.X, PV.W, literal.x, 7044; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7045; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 7046; EG-NEXT: LSHR T20.X, PV.W, literal.x, 7047; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7048; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 7049; EG-NEXT: LSHR T21.X, PV.W, literal.x, 7050; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7051; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 7052; EG-NEXT: LSHR T22.X, PV.W, literal.x, 7053; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7054; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 7055; EG-NEXT: LSHR T23.X, PV.W, literal.x, 7056; EG-NEXT: LSHR T0.Y, T11.X, literal.y, 7057; EG-NEXT: LSHR T0.Z, T11.X, literal.z, 7058; EG-NEXT: LSHR * T0.W, T11.X, literal.w, 7059; EG-NEXT: 2(2.802597e-45), 28(3.923636e-44) 7060; EG-NEXT: 29(4.063766e-44), 24(3.363116e-44) 7061; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, 7062; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00) 7063; EG-NEXT: LSHR T24.X, PV.W, literal.x, 7064; EG-NEXT: LSHR T1.Y, T11.X, literal.y, 7065; EG-NEXT: LSHR T1.Z, T11.X, literal.z, 7066; EG-NEXT: LSHR * T1.W, T11.X, literal.w, 7067; EG-NEXT: 2(2.802597e-45), 25(3.503246e-44) 7068; EG-NEXT: 20(2.802597e-44), 21(2.942727e-44) 7069; EG-NEXT: LSHR * T2.W, T11.X, literal.x, 7070; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7071; EG-NEXT: BFE_INT T25.X, T11.X, 0.0, 1, 7072; EG-NEXT: LSHR T2.Y, T11.X, literal.x, 7073; EG-NEXT: ASHR T26.Z, T11.X, literal.y, 7074; EG-NEXT: LSHR T3.W, T11.X, literal.z, 7075; EG-NEXT: LSHR * T4.W, T11.X, literal.w, 7076; EG-NEXT: 17(2.382207e-44), 31(4.344025e-44) 7077; EG-NEXT: 27(3.783506e-44), 30(4.203895e-44) 7078; EG-NEXT: BFE_INT T26.X, PS, 0.0, 1, 7079; EG-NEXT: LSHR T3.Y, T11.X, literal.x, 7080; EG-NEXT: BFE_INT T27.Z, PV.W, 0.0, 1, 7081; EG-NEXT: LSHR T3.W, T11.X, literal.y, 7082; EG-NEXT: LSHR * T4.W, T11.X, literal.z, 7083; EG-NEXT: 12(1.681558e-44), 23(3.222986e-44) 7084; EG-NEXT: 26(3.643376e-44), 0(0.000000e+00) 7085; EG-NEXT: BFE_INT T27.X, PS, 0.0, 1, 7086; EG-NEXT: MOV T26.Y, PV.X, 7087; EG-NEXT: BFE_INT T28.Z, PV.W, 0.0, 1, 7088; EG-NEXT: LSHR T3.W, T11.X, literal.x, 7089; EG-NEXT: LSHR * T4.W, T11.X, literal.y, 7090; EG-NEXT: 19(2.662467e-44), 22(3.082857e-44) 7091; EG-NEXT: BFE_INT T28.X, PS, 0.0, 1, 7092; EG-NEXT: MOV T27.Y, PV.X, 7093; EG-NEXT: BFE_INT T29.Z, PV.W, 0.0, 1, 7094; EG-NEXT: LSHR T3.W, T11.X, literal.x, 7095; EG-NEXT: LSHR * T4.W, T11.X, literal.y, 7096; EG-NEXT: 15(2.101948e-44), 18(2.522337e-44) 7097; EG-NEXT: BFE_INT T29.X, PS, 0.0, 1, 7098; EG-NEXT: MOV T28.Y, PV.X, 7099; EG-NEXT: BFE_INT T30.Z, PV.W, 0.0, 1, 7100; EG-NEXT: LSHR T3.W, T11.X, literal.x, 7101; EG-NEXT: LSHR * T4.W, T11.X, literal.y, 7102; EG-NEXT: 11(1.541428e-44), 14(1.961818e-44) 7103; EG-NEXT: BFE_INT T30.X, PS, 0.0, 1, 7104; EG-NEXT: MOV T29.Y, PV.X, 7105; EG-NEXT: BFE_INT T31.Z, PV.W, 0.0, 1, 7106; EG-NEXT: LSHR T3.W, T11.X, literal.x, 7107; EG-NEXT: LSHR * T4.W, T11.X, literal.y, 7108; EG-NEXT: 7(9.809089e-45), 10(1.401298e-44) 7109; EG-NEXT: BFE_INT T31.X, PS, 0.0, 1, 7110; EG-NEXT: MOV T30.Y, PV.X, 7111; EG-NEXT: BFE_INT T32.Z, PV.W, 0.0, 1, 7112; EG-NEXT: LSHR T3.W, T11.X, literal.x, 7113; EG-NEXT: LSHR * T4.W, T11.X, literal.y, 7114; EG-NEXT: 3(4.203895e-45), 6(8.407791e-45) 7115; EG-NEXT: ALU clause starting at 118: 7116; EG-NEXT: BFE_INT T32.X, T4.W, 0.0, 1, 7117; EG-NEXT: MOV T31.Y, T31.X, 7118; EG-NEXT: BFE_INT T33.Z, T3.W, 0.0, 1, BS:VEC_120/SCL_212 7119; EG-NEXT: LSHR T3.W, T11.X, 1, BS:VEC_120/SCL_212 7120; EG-NEXT: LSHR * T4.W, T11.X, literal.x, 7121; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 7122; EG-NEXT: BFE_INT T33.X, PS, 0.0, 1, 7123; EG-NEXT: MOV T32.Y, PV.X, 7124; EG-NEXT: BFE_INT T25.Z, PV.W, 0.0, 1, 7125; EG-NEXT: LSHR T3.W, T11.X, literal.x, 7126; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, 7127; EG-NEXT: 5(7.006492e-45), 208(2.914701e-43) 7128; EG-NEXT: LSHR T34.X, PS, literal.x, 7129; EG-NEXT: MOV T33.Y, PV.X, 7130; EG-NEXT: BFE_INT T35.Z, PV.W, 0.0, 1, 7131; EG-NEXT: LSHR T3.W, T11.X, literal.y, 7132; EG-NEXT: LSHR * T4.W, T11.X, literal.z, 7133; EG-NEXT: 2(2.802597e-45), 9(1.261169e-44) 7134; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) 7135; EG-NEXT: BFE_INT T35.X, PS, 0.0, 1, 7136; EG-NEXT: MOV T25.Y, T25.X, 7137; EG-NEXT: BFE_INT T11.Z, PV.W, 0.0, 1, 7138; EG-NEXT: LSHR T3.W, T11.X, literal.x, BS:VEC_120/SCL_212 7139; EG-NEXT: LSHR * T4.W, T11.X, literal.y, 7140; EG-NEXT: 13(1.821688e-44), 8(1.121039e-44) 7141; EG-NEXT: BFE_INT T11.X, PS, 0.0, 1, 7142; EG-NEXT: MOV T35.Y, PV.X, 7143; EG-NEXT: BFE_INT T36.Z, PV.W, 0.0, 1, 7144; EG-NEXT: MOV T25.W, T25.Z, 7145; EG-NEXT: MOV * T33.W, T33.Z, 7146; EG-NEXT: BFE_INT T36.X, T3.Y, 0.0, 1, 7147; EG-NEXT: MOV T11.Y, PV.X, 7148; EG-NEXT: BFE_INT T37.Z, T2.Y, 0.0, 1, BS:VEC_120/SCL_212 7149; EG-NEXT: MOV T35.W, T35.Z, 7150; EG-NEXT: MOV * T32.W, T32.Z, 7151; EG-NEXT: BFE_INT T37.X, T2.W, 0.0, 1, 7152; EG-NEXT: MOV T36.Y, PV.X, 7153; EG-NEXT: BFE_INT T38.Z, T1.W, 0.0, 1, BS:VEC_120/SCL_212 7154; EG-NEXT: MOV T11.W, T11.Z, 7155; EG-NEXT: MOV * T31.W, T31.Z, 7156; EG-NEXT: BFE_INT T38.X, T1.Z, 0.0, 1, 7157; EG-NEXT: MOV T37.Y, PV.X, 7158; EG-NEXT: BFE_INT T39.Z, T1.Y, 0.0, 1, 7159; EG-NEXT: MOV T36.W, T36.Z, BS:VEC_120/SCL_212 7160; EG-NEXT: MOV * T30.W, T30.Z, 7161; EG-NEXT: BFE_INT T39.X, T0.W, 0.0, 1, 7162; EG-NEXT: MOV T38.Y, PV.X, 7163; EG-NEXT: BFE_INT T40.Z, T0.Z, 0.0, 1, 7164; EG-NEXT: MOV T37.W, T37.Z, BS:VEC_120/SCL_212 7165; EG-NEXT: MOV * T29.W, T29.Z, 7166; EG-NEXT: BFE_INT T40.X, T0.Y, 0.0, 1, 7167; EG-NEXT: MOV T39.Y, PV.X, 7168; EG-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.x, 7169; EG-NEXT: MOV T38.W, T38.Z, 7170; EG-NEXT: MOV * T28.W, T28.Z, 7171; EG-NEXT: 224(3.138909e-43), 0(0.000000e+00) 7172; EG-NEXT: LSHR T41.X, PV.Z, literal.x, 7173; EG-NEXT: MOV T40.Y, PV.X, 7174; EG-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y, 7175; EG-NEXT: MOV T39.W, T39.Z, 7176; EG-NEXT: MOV * T27.W, T27.Z, 7177; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43) 7178; EG-NEXT: LSHR T42.X, PV.Z, literal.x, 7179; EG-NEXT: MOV T40.W, T40.Z, 7180; EG-NEXT: MOV * T26.W, T26.Z, 7181; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 7182; 7183; GFX12-LABEL: constant_sextload_v32i1_to_v32i64: 7184; GFX12: ; %bb.0: 7185; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 7186; GFX12-NEXT: s_wait_kmcnt 0x0 7187; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0 7188; GFX12-NEXT: s_wait_kmcnt 0x0 7189; GFX12-NEXT: s_lshr_b32 s34, s2, 30 7190; GFX12-NEXT: s_lshr_b32 s36, s2, 31 7191; GFX12-NEXT: s_lshr_b32 s38, s2, 28 7192; GFX12-NEXT: s_lshr_b32 s40, s2, 29 7193; GFX12-NEXT: s_lshr_b32 s42, s2, 26 7194; GFX12-NEXT: s_lshr_b32 s44, s2, 27 7195; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000 7196; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000 7197; GFX12-NEXT: s_lshr_b32 s46, s2, 24 7198; GFX12-NEXT: s_lshr_b32 s48, s2, 25 7199; GFX12-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000 7200; GFX12-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000 7201; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s35 7202; GFX12-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000 7203; GFX12-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000 7204; GFX12-NEXT: v_dual_mov_b32 v0, s34 :: v_dual_mov_b32 v3, s37 7205; GFX12-NEXT: v_dual_mov_b32 v2, s36 :: v_dual_mov_b32 v5, s39 7206; GFX12-NEXT: s_lshr_b32 s26, s2, 22 7207; GFX12-NEXT: s_lshr_b32 s50, s2, 23 7208; GFX12-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000 7209; GFX12-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000 7210; GFX12-NEXT: v_dual_mov_b32 v4, s38 :: v_dual_mov_b32 v7, s41 7211; GFX12-NEXT: v_dual_mov_b32 v6, s40 :: v_dual_mov_b32 v9, s43 7212; GFX12-NEXT: s_lshr_b32 s52, s2, 20 7213; GFX12-NEXT: s_lshr_b32 s54, s2, 21 7214; GFX12-NEXT: v_dual_mov_b32 v8, s42 :: v_dual_mov_b32 v11, s45 7215; GFX12-NEXT: v_dual_mov_b32 v10, s44 :: v_dual_mov_b32 v13, s47 7216; GFX12-NEXT: s_lshr_b32 s56, s2, 18 7217; GFX12-NEXT: s_lshr_b32 s58, s2, 19 7218; GFX12-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000 7219; GFX12-NEXT: v_dual_mov_b32 v12, s46 :: v_dual_mov_b32 v15, s49 7220; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000 7221; GFX12-NEXT: v_mov_b32_e32 v14, s48 7222; GFX12-NEXT: s_lshr_b32 s60, s2, 16 7223; GFX12-NEXT: s_lshr_b32 s62, s2, 17 7224; GFX12-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000 7225; GFX12-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000 7226; GFX12-NEXT: s_lshr_b32 s64, s2, 14 7227; GFX12-NEXT: s_lshr_b32 s66, s2, 15 7228; GFX12-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000 7229; GFX12-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000 7230; GFX12-NEXT: s_clause 0x3 7231; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:240 7232; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:224 7233; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:208 7234; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:192 7235; GFX12-NEXT: v_dual_mov_b32 v1, s27 :: v_dual_mov_b32 v0, s26 7236; GFX12-NEXT: v_dual_mov_b32 v3, s51 :: v_dual_mov_b32 v2, s50 7237; GFX12-NEXT: v_mov_b32_e32 v5, s53 7238; GFX12-NEXT: s_lshr_b32 s30, s2, 12 7239; GFX12-NEXT: s_lshr_b32 s28, s2, 13 7240; GFX12-NEXT: s_lshr_b32 s24, s2, 10 7241; GFX12-NEXT: s_lshr_b32 s22, s2, 11 7242; GFX12-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000 7243; GFX12-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000 7244; GFX12-NEXT: v_dual_mov_b32 v4, s52 :: v_dual_mov_b32 v7, s55 7245; GFX12-NEXT: v_dual_mov_b32 v6, s54 :: v_dual_mov_b32 v9, s57 7246; GFX12-NEXT: s_lshr_b32 s20, s2, 8 7247; GFX12-NEXT: s_lshr_b32 s18, s2, 9 7248; GFX12-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000 7249; GFX12-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000 7250; GFX12-NEXT: v_dual_mov_b32 v8, s56 :: v_dual_mov_b32 v11, s59 7251; GFX12-NEXT: v_dual_mov_b32 v10, s58 :: v_dual_mov_b32 v13, s61 7252; GFX12-NEXT: s_lshr_b32 s16, s2, 6 7253; GFX12-NEXT: s_lshr_b32 s14, s2, 7 7254; GFX12-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000 7255; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000 7256; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000 7257; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 7258; GFX12-NEXT: v_dual_mov_b32 v12, s60 :: v_dual_mov_b32 v15, s63 7259; GFX12-NEXT: v_dual_mov_b32 v14, s62 :: v_dual_mov_b32 v17, s65 7260; GFX12-NEXT: s_lshr_b32 s12, s2, 4 7261; GFX12-NEXT: s_lshr_b32 s10, s2, 5 7262; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000 7263; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000 7264; GFX12-NEXT: v_dual_mov_b32 v16, s64 :: v_dual_mov_b32 v19, s67 7265; GFX12-NEXT: v_dual_mov_b32 v18, s66 :: v_dual_mov_b32 v21, s31 7266; GFX12-NEXT: s_lshr_b32 s8, s2, 2 7267; GFX12-NEXT: s_lshr_b32 s6, s2, 3 7268; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000 7269; GFX12-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000 7270; GFX12-NEXT: v_dual_mov_b32 v20, s30 :: v_dual_mov_b32 v23, s29 7271; GFX12-NEXT: v_mov_b32_e32 v22, s28 7272; GFX12-NEXT: s_clause 0x5 7273; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:176 7274; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:160 7275; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:144 7276; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:128 7277; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:112 7278; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1] offset:96 7279; GFX12-NEXT: v_dual_mov_b32 v1, s25 :: v_dual_mov_b32 v0, s24 7280; GFX12-NEXT: v_dual_mov_b32 v3, s23 :: v_dual_mov_b32 v2, s22 7281; GFX12-NEXT: v_mov_b32_e32 v5, s21 7282; GFX12-NEXT: s_lshr_b32 s68, s2, 1 7283; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000 7284; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000 7285; GFX12-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v7, s19 7286; GFX12-NEXT: v_dual_mov_b32 v6, s18 :: v_dual_mov_b32 v9, s17 7287; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000 7288; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000 7289; GFX12-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v11, s15 7290; GFX12-NEXT: v_dual_mov_b32 v10, s14 :: v_dual_mov_b32 v13, s13 7291; GFX12-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000 7292; GFX12-NEXT: s_bfe_i64 s[2:3], s[68:69], 0x10000 7293; GFX12-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v15, s11 7294; GFX12-NEXT: v_dual_mov_b32 v14, s10 :: v_dual_mov_b32 v17, s9 7295; GFX12-NEXT: v_dual_mov_b32 v16, s8 :: v_dual_mov_b32 v19, s7 7296; GFX12-NEXT: v_dual_mov_b32 v18, s6 :: v_dual_mov_b32 v21, s5 7297; GFX12-NEXT: v_dual_mov_b32 v20, s4 :: v_dual_mov_b32 v23, s3 7298; GFX12-NEXT: v_mov_b32_e32 v22, s2 7299; GFX12-NEXT: s_clause 0x5 7300; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:80 7301; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:64 7302; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:48 7303; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:32 7304; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:16 7305; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1] 7306; GFX12-NEXT: s_endpgm 7307 %load = load <32 x i1>, ptr addrspace(4) %in 7308 %ext = sext <32 x i1> %load to <32 x i64> 7309 store <32 x i64> %ext, ptr addrspace(1) %out 7310 ret void 7311} 7312 7313define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 7314; GFX6-LABEL: constant_zextload_v64i1_to_v64i64: 7315; GFX6: ; %bb.0: 7316; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 7317; GFX6-NEXT: s_waitcnt lgkmcnt(0) 7318; GFX6-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 7319; GFX6-NEXT: s_waitcnt lgkmcnt(0) 7320; GFX6-NEXT: s_bfe_u32 s4, s2, 0x10003 7321; GFX6-NEXT: s_bfe_u32 s5, s2, 0x10005 7322; GFX6-NEXT: s_bfe_u32 s8, s2, 0x10007 7323; GFX6-NEXT: s_bfe_u32 s11, s2, 0x10009 7324; GFX6-NEXT: s_bfe_u32 s13, s2, 0x1000b 7325; GFX6-NEXT: s_bfe_u32 s15, s2, 0x1000d 7326; GFX6-NEXT: s_bfe_u32 s17, s2, 0x1000f 7327; GFX6-NEXT: s_bfe_u32 s19, s2, 0x10011 7328; GFX6-NEXT: s_bfe_u32 s21, s2, 0x10013 7329; GFX6-NEXT: s_bfe_u32 s23, s2, 0x10015 7330; GFX6-NEXT: s_bfe_u32 s25, s2, 0x10017 7331; GFX6-NEXT: s_bfe_u32 s27, s2, 0x10019 7332; GFX6-NEXT: s_bfe_u32 s29, s2, 0x1001b 7333; GFX6-NEXT: s_bfe_u32 s31, s2, 0x1001d 7334; GFX6-NEXT: s_lshr_b32 s34, s2, 31 7335; GFX6-NEXT: s_bfe_u32 s35, s3, 0x10003 7336; GFX6-NEXT: s_bfe_u32 s36, s3, 0x10005 7337; GFX6-NEXT: s_bfe_u32 s37, s3, 0x10007 7338; GFX6-NEXT: s_bfe_u32 s38, s3, 0x10009 7339; GFX6-NEXT: s_bfe_u32 s39, s3, 0x1000b 7340; GFX6-NEXT: s_bfe_u32 s40, s3, 0x1000d 7341; GFX6-NEXT: s_bfe_u32 s41, s3, 0x1000f 7342; GFX6-NEXT: s_bfe_u32 s42, s3, 0x10011 7343; GFX6-NEXT: s_bfe_u32 s43, s3, 0x10013 7344; GFX6-NEXT: s_bfe_u32 s44, s3, 0x10015 7345; GFX6-NEXT: s_bfe_u32 s45, s3, 0x10017 7346; GFX6-NEXT: s_bfe_u32 s46, s3, 0x10019 7347; GFX6-NEXT: s_bfe_u32 s47, s3, 0x1001b 7348; GFX6-NEXT: s_bfe_u32 s48, s3, 0x1001d 7349; GFX6-NEXT: s_lshr_b32 s49, s3, 31 7350; GFX6-NEXT: s_bfe_u32 s9, s3, 0x10001 7351; GFX6-NEXT: s_bfe_u32 s6, s2, 0x10001 7352; GFX6-NEXT: s_and_b32 s7, s2, 1 7353; GFX6-NEXT: s_and_b32 s10, s3, 1 7354; GFX6-NEXT: s_bfe_u32 s12, s2, 0x10002 7355; GFX6-NEXT: s_bfe_u32 s14, s2, 0x10004 7356; GFX6-NEXT: s_bfe_u32 s16, s2, 0x10006 7357; GFX6-NEXT: s_bfe_u32 s18, s2, 0x10008 7358; GFX6-NEXT: s_bfe_u32 s20, s2, 0x1000a 7359; GFX6-NEXT: s_bfe_u32 s22, s2, 0x1000c 7360; GFX6-NEXT: s_bfe_u32 s24, s2, 0x1000e 7361; GFX6-NEXT: s_bfe_u32 s26, s2, 0x10010 7362; GFX6-NEXT: s_bfe_u32 s28, s2, 0x10012 7363; GFX6-NEXT: s_bfe_u32 s30, s2, 0x10014 7364; GFX6-NEXT: s_bfe_u32 s33, s2, 0x10016 7365; GFX6-NEXT: s_bfe_u32 s50, s2, 0x10018 7366; GFX6-NEXT: s_bfe_u32 s51, s2, 0x1001a 7367; GFX6-NEXT: s_bfe_u32 s52, s2, 0x1001c 7368; GFX6-NEXT: s_bfe_u32 s53, s2, 0x1001e 7369; GFX6-NEXT: s_bfe_u32 s54, s3, 0x10002 7370; GFX6-NEXT: s_bfe_u32 s55, s3, 0x10004 7371; GFX6-NEXT: s_bfe_u32 s56, s3, 0x10006 7372; GFX6-NEXT: s_bfe_u32 s57, s3, 0x10008 7373; GFX6-NEXT: s_bfe_u32 s58, s3, 0x1000a 7374; GFX6-NEXT: s_bfe_u32 s59, s3, 0x1000c 7375; GFX6-NEXT: s_bfe_u32 s60, s3, 0x1000e 7376; GFX6-NEXT: s_bfe_u32 s61, s3, 0x10010 7377; GFX6-NEXT: s_bfe_u32 s62, s3, 0x10012 7378; GFX6-NEXT: s_bfe_u32 s63, s3, 0x10014 7379; GFX6-NEXT: s_bfe_u32 s64, s3, 0x10016 7380; GFX6-NEXT: s_bfe_u32 s65, s3, 0x10018 7381; GFX6-NEXT: s_bfe_u32 s66, s3, 0x1001a 7382; GFX6-NEXT: s_bfe_u32 s67, s3, 0x1001e 7383; GFX6-NEXT: s_bfe_u32 s68, s3, 0x1001c 7384; GFX6-NEXT: s_mov_b32 s3, 0xf000 7385; GFX6-NEXT: v_mov_b32_e32 v1, 0 7386; GFX6-NEXT: s_mov_b32 s2, -1 7387; GFX6-NEXT: v_mov_b32_e32 v3, v1 7388; GFX6-NEXT: v_mov_b32_e32 v0, s67 7389; GFX6-NEXT: v_mov_b32_e32 v2, s49 7390; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:496 7391; GFX6-NEXT: s_waitcnt expcnt(0) 7392; GFX6-NEXT: v_mov_b32_e32 v0, s68 7393; GFX6-NEXT: v_mov_b32_e32 v2, s48 7394; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:480 7395; GFX6-NEXT: s_waitcnt expcnt(0) 7396; GFX6-NEXT: v_mov_b32_e32 v0, s66 7397; GFX6-NEXT: v_mov_b32_e32 v2, s47 7398; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:464 7399; GFX6-NEXT: s_waitcnt expcnt(0) 7400; GFX6-NEXT: v_mov_b32_e32 v0, s65 7401; GFX6-NEXT: v_mov_b32_e32 v2, s46 7402; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:448 7403; GFX6-NEXT: s_waitcnt expcnt(0) 7404; GFX6-NEXT: v_mov_b32_e32 v0, s64 7405; GFX6-NEXT: v_mov_b32_e32 v2, s45 7406; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:432 7407; GFX6-NEXT: s_waitcnt expcnt(0) 7408; GFX6-NEXT: v_mov_b32_e32 v0, s63 7409; GFX6-NEXT: v_mov_b32_e32 v2, s44 7410; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:416 7411; GFX6-NEXT: s_waitcnt expcnt(0) 7412; GFX6-NEXT: v_mov_b32_e32 v0, s62 7413; GFX6-NEXT: v_mov_b32_e32 v2, s43 7414; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:400 7415; GFX6-NEXT: s_waitcnt expcnt(0) 7416; GFX6-NEXT: v_mov_b32_e32 v0, s61 7417; GFX6-NEXT: v_mov_b32_e32 v2, s42 7418; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:384 7419; GFX6-NEXT: s_waitcnt expcnt(0) 7420; GFX6-NEXT: v_mov_b32_e32 v0, s60 7421; GFX6-NEXT: v_mov_b32_e32 v2, s41 7422; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:368 7423; GFX6-NEXT: s_waitcnt expcnt(0) 7424; GFX6-NEXT: v_mov_b32_e32 v0, s59 7425; GFX6-NEXT: v_mov_b32_e32 v2, s40 7426; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:352 7427; GFX6-NEXT: s_waitcnt expcnt(0) 7428; GFX6-NEXT: v_mov_b32_e32 v0, s58 7429; GFX6-NEXT: v_mov_b32_e32 v2, s39 7430; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:336 7431; GFX6-NEXT: s_waitcnt expcnt(0) 7432; GFX6-NEXT: v_mov_b32_e32 v0, s57 7433; GFX6-NEXT: v_mov_b32_e32 v2, s38 7434; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:320 7435; GFX6-NEXT: s_waitcnt expcnt(0) 7436; GFX6-NEXT: v_mov_b32_e32 v0, s56 7437; GFX6-NEXT: v_mov_b32_e32 v2, s37 7438; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:304 7439; GFX6-NEXT: s_waitcnt expcnt(0) 7440; GFX6-NEXT: v_mov_b32_e32 v0, s55 7441; GFX6-NEXT: v_mov_b32_e32 v2, s36 7442; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:288 7443; GFX6-NEXT: s_waitcnt expcnt(0) 7444; GFX6-NEXT: v_mov_b32_e32 v0, s54 7445; GFX6-NEXT: v_mov_b32_e32 v2, s35 7446; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:272 7447; GFX6-NEXT: s_waitcnt expcnt(0) 7448; GFX6-NEXT: v_mov_b32_e32 v0, s53 7449; GFX6-NEXT: v_mov_b32_e32 v2, s34 7450; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 7451; GFX6-NEXT: s_waitcnt expcnt(0) 7452; GFX6-NEXT: v_mov_b32_e32 v0, s52 7453; GFX6-NEXT: v_mov_b32_e32 v2, s31 7454; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 7455; GFX6-NEXT: s_waitcnt expcnt(0) 7456; GFX6-NEXT: v_mov_b32_e32 v0, s51 7457; GFX6-NEXT: v_mov_b32_e32 v2, s29 7458; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208 7459; GFX6-NEXT: s_waitcnt expcnt(0) 7460; GFX6-NEXT: v_mov_b32_e32 v0, s50 7461; GFX6-NEXT: v_mov_b32_e32 v2, s27 7462; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192 7463; GFX6-NEXT: s_waitcnt expcnt(0) 7464; GFX6-NEXT: v_mov_b32_e32 v0, s33 7465; GFX6-NEXT: v_mov_b32_e32 v2, s25 7466; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176 7467; GFX6-NEXT: s_waitcnt expcnt(0) 7468; GFX6-NEXT: v_mov_b32_e32 v0, s30 7469; GFX6-NEXT: v_mov_b32_e32 v2, s23 7470; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 7471; GFX6-NEXT: s_waitcnt expcnt(0) 7472; GFX6-NEXT: v_mov_b32_e32 v0, s28 7473; GFX6-NEXT: v_mov_b32_e32 v2, s21 7474; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 7475; GFX6-NEXT: s_waitcnt expcnt(0) 7476; GFX6-NEXT: v_mov_b32_e32 v0, s26 7477; GFX6-NEXT: v_mov_b32_e32 v2, s19 7478; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 7479; GFX6-NEXT: s_waitcnt expcnt(0) 7480; GFX6-NEXT: v_mov_b32_e32 v0, s24 7481; GFX6-NEXT: v_mov_b32_e32 v2, s17 7482; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 7483; GFX6-NEXT: s_waitcnt expcnt(0) 7484; GFX6-NEXT: v_mov_b32_e32 v0, s22 7485; GFX6-NEXT: v_mov_b32_e32 v2, s15 7486; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 7487; GFX6-NEXT: s_waitcnt expcnt(0) 7488; GFX6-NEXT: v_mov_b32_e32 v0, s20 7489; GFX6-NEXT: v_mov_b32_e32 v2, s13 7490; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 7491; GFX6-NEXT: s_waitcnt expcnt(0) 7492; GFX6-NEXT: v_mov_b32_e32 v0, s18 7493; GFX6-NEXT: v_mov_b32_e32 v2, s11 7494; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 7495; GFX6-NEXT: s_waitcnt expcnt(0) 7496; GFX6-NEXT: v_mov_b32_e32 v0, s16 7497; GFX6-NEXT: v_mov_b32_e32 v2, s8 7498; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 7499; GFX6-NEXT: s_waitcnt expcnt(0) 7500; GFX6-NEXT: v_mov_b32_e32 v0, s14 7501; GFX6-NEXT: v_mov_b32_e32 v2, s5 7502; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 7503; GFX6-NEXT: s_waitcnt expcnt(0) 7504; GFX6-NEXT: v_mov_b32_e32 v0, s12 7505; GFX6-NEXT: v_mov_b32_e32 v2, s4 7506; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 7507; GFX6-NEXT: s_waitcnt expcnt(0) 7508; GFX6-NEXT: v_mov_b32_e32 v0, s10 7509; GFX6-NEXT: v_mov_b32_e32 v2, s9 7510; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:256 7511; GFX6-NEXT: s_waitcnt expcnt(0) 7512; GFX6-NEXT: v_mov_b32_e32 v0, s7 7513; GFX6-NEXT: v_mov_b32_e32 v2, s6 7514; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 7515; GFX6-NEXT: s_endpgm 7516; 7517; GFX8-LABEL: constant_zextload_v64i1_to_v64i64: 7518; GFX8: ; %bb.0: 7519; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 7520; GFX8-NEXT: v_mov_b32_e32 v1, 0 7521; GFX8-NEXT: v_mov_b32_e32 v3, v1 7522; GFX8-NEXT: s_waitcnt lgkmcnt(0) 7523; GFX8-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x0 7524; GFX8-NEXT: s_waitcnt lgkmcnt(0) 7525; GFX8-NEXT: s_lshr_b32 s44, s43, 31 7526; GFX8-NEXT: s_bfe_u32 s45, s43, 0x1001d 7527; GFX8-NEXT: s_bfe_u32 s46, s43, 0x1001b 7528; GFX8-NEXT: s_bfe_u32 s47, s43, 0x10019 7529; GFX8-NEXT: s_bfe_u32 s48, s43, 0x10017 7530; GFX8-NEXT: s_bfe_u32 s49, s43, 0x10013 7531; GFX8-NEXT: s_bfe_u32 s50, s43, 0x10011 7532; GFX8-NEXT: s_bfe_u32 s51, s43, 0x1000f 7533; GFX8-NEXT: s_bfe_u32 s52, s43, 0x1000d 7534; GFX8-NEXT: s_bfe_u32 s53, s43, 0x1000b 7535; GFX8-NEXT: s_bfe_u32 s40, s43, 0x10009 7536; GFX8-NEXT: s_bfe_u32 s38, s43, 0x10007 7537; GFX8-NEXT: s_bfe_u32 s37, s43, 0x10005 7538; GFX8-NEXT: s_bfe_u32 s35, s43, 0x10003 7539; GFX8-NEXT: s_bfe_u32 s33, s43, 0x10001 7540; GFX8-NEXT: s_lshr_b32 s30, s42, 31 7541; GFX8-NEXT: s_bfe_u32 s28, s42, 0x1001d 7542; GFX8-NEXT: s_bfe_u32 s26, s42, 0x1001b 7543; GFX8-NEXT: s_bfe_u32 s25, s42, 0x10019 7544; GFX8-NEXT: s_bfe_u32 s22, s42, 0x10017 7545; GFX8-NEXT: s_bfe_u32 s19, s42, 0x10013 7546; GFX8-NEXT: s_bfe_u32 s17, s42, 0x10011 7547; GFX8-NEXT: s_bfe_u32 s15, s42, 0x1000f 7548; GFX8-NEXT: s_bfe_u32 s13, s42, 0x1000d 7549; GFX8-NEXT: s_bfe_u32 s12, s42, 0x1000b 7550; GFX8-NEXT: s_bfe_u32 s10, s42, 0x10009 7551; GFX8-NEXT: s_bfe_u32 s8, s42, 0x10007 7552; GFX8-NEXT: s_bfe_u32 s6, s42, 0x10005 7553; GFX8-NEXT: s_bfe_u32 s4, s42, 0x10003 7554; GFX8-NEXT: s_bfe_u32 s2, s42, 0x10001 7555; GFX8-NEXT: s_and_b32 s3, s42, 1 7556; GFX8-NEXT: s_bfe_u32 s5, s42, 0x10002 7557; GFX8-NEXT: s_bfe_u32 s7, s42, 0x10004 7558; GFX8-NEXT: s_bfe_u32 s9, s42, 0x10006 7559; GFX8-NEXT: s_bfe_u32 s11, s42, 0x10008 7560; GFX8-NEXT: s_bfe_u32 s14, s42, 0x1000a 7561; GFX8-NEXT: s_bfe_u32 s16, s42, 0x1000c 7562; GFX8-NEXT: s_bfe_u32 s18, s42, 0x1000e 7563; GFX8-NEXT: s_bfe_u32 s20, s42, 0x10010 7564; GFX8-NEXT: s_bfe_u32 s21, s42, 0x10012 7565; GFX8-NEXT: s_bfe_u32 s23, s42, 0x10014 7566; GFX8-NEXT: s_bfe_u32 s24, s42, 0x10015 7567; GFX8-NEXT: s_bfe_u32 s27, s42, 0x10016 7568; GFX8-NEXT: s_bfe_u32 s29, s42, 0x10018 7569; GFX8-NEXT: s_bfe_u32 s31, s42, 0x1001a 7570; GFX8-NEXT: s_bfe_u32 s34, s42, 0x1001c 7571; GFX8-NEXT: s_bfe_u32 s36, s42, 0x1001e 7572; GFX8-NEXT: s_and_b32 s39, s43, 1 7573; GFX8-NEXT: s_bfe_u32 s41, s43, 0x10002 7574; GFX8-NEXT: s_bfe_u32 s54, s43, 0x10004 7575; GFX8-NEXT: s_bfe_u32 s55, s43, 0x10006 7576; GFX8-NEXT: s_bfe_u32 s56, s43, 0x10008 7577; GFX8-NEXT: s_bfe_u32 s57, s43, 0x1000a 7578; GFX8-NEXT: s_bfe_u32 s58, s43, 0x1000c 7579; GFX8-NEXT: s_bfe_u32 s59, s43, 0x1000e 7580; GFX8-NEXT: s_bfe_u32 s60, s43, 0x10010 7581; GFX8-NEXT: s_bfe_u32 s61, s43, 0x10012 7582; GFX8-NEXT: s_bfe_u32 s62, s43, 0x10016 7583; GFX8-NEXT: s_bfe_u32 s63, s43, 0x10018 7584; GFX8-NEXT: s_bfe_u32 s64, s43, 0x1001a 7585; GFX8-NEXT: s_bfe_u32 s65, s43, 0x1001c 7586; GFX8-NEXT: s_bfe_u32 s66, s43, 0x1001e 7587; GFX8-NEXT: s_bfe_u32 s42, s43, 0x10015 7588; GFX8-NEXT: s_bfe_u32 s43, s43, 0x10014 7589; GFX8-NEXT: v_mov_b32_e32 v2, s42 7590; GFX8-NEXT: s_add_u32 s42, s0, 0x1a0 7591; GFX8-NEXT: v_mov_b32_e32 v0, s43 7592; GFX8-NEXT: s_addc_u32 s43, s1, 0 7593; GFX8-NEXT: v_mov_b32_e32 v4, s42 7594; GFX8-NEXT: v_mov_b32_e32 v5, s43 7595; GFX8-NEXT: s_add_u32 s42, s0, 0x1f0 7596; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7597; GFX8-NEXT: s_addc_u32 s43, s1, 0 7598; GFX8-NEXT: v_mov_b32_e32 v4, s42 7599; GFX8-NEXT: v_mov_b32_e32 v0, s66 7600; GFX8-NEXT: v_mov_b32_e32 v2, s44 7601; GFX8-NEXT: v_mov_b32_e32 v5, s43 7602; GFX8-NEXT: s_add_u32 s42, s0, 0x1e0 7603; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7604; GFX8-NEXT: s_addc_u32 s43, s1, 0 7605; GFX8-NEXT: v_mov_b32_e32 v4, s42 7606; GFX8-NEXT: v_mov_b32_e32 v0, s65 7607; GFX8-NEXT: v_mov_b32_e32 v2, s45 7608; GFX8-NEXT: v_mov_b32_e32 v5, s43 7609; GFX8-NEXT: s_add_u32 s42, s0, 0x1d0 7610; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7611; GFX8-NEXT: s_addc_u32 s43, s1, 0 7612; GFX8-NEXT: v_mov_b32_e32 v4, s42 7613; GFX8-NEXT: v_mov_b32_e32 v0, s64 7614; GFX8-NEXT: v_mov_b32_e32 v2, s46 7615; GFX8-NEXT: v_mov_b32_e32 v5, s43 7616; GFX8-NEXT: s_add_u32 s42, s0, 0x1c0 7617; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7618; GFX8-NEXT: s_addc_u32 s43, s1, 0 7619; GFX8-NEXT: v_mov_b32_e32 v4, s42 7620; GFX8-NEXT: v_mov_b32_e32 v0, s63 7621; GFX8-NEXT: v_mov_b32_e32 v2, s47 7622; GFX8-NEXT: v_mov_b32_e32 v5, s43 7623; GFX8-NEXT: s_add_u32 s42, s0, 0x1b0 7624; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7625; GFX8-NEXT: s_addc_u32 s43, s1, 0 7626; GFX8-NEXT: v_mov_b32_e32 v4, s42 7627; GFX8-NEXT: v_mov_b32_e32 v0, s62 7628; GFX8-NEXT: v_mov_b32_e32 v2, s48 7629; GFX8-NEXT: v_mov_b32_e32 v5, s43 7630; GFX8-NEXT: s_add_u32 s42, s0, 0x190 7631; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7632; GFX8-NEXT: s_addc_u32 s43, s1, 0 7633; GFX8-NEXT: v_mov_b32_e32 v4, s42 7634; GFX8-NEXT: v_mov_b32_e32 v0, s61 7635; GFX8-NEXT: v_mov_b32_e32 v2, s49 7636; GFX8-NEXT: v_mov_b32_e32 v5, s43 7637; GFX8-NEXT: s_add_u32 s42, s0, 0x180 7638; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7639; GFX8-NEXT: s_addc_u32 s43, s1, 0 7640; GFX8-NEXT: v_mov_b32_e32 v4, s42 7641; GFX8-NEXT: v_mov_b32_e32 v0, s60 7642; GFX8-NEXT: v_mov_b32_e32 v2, s50 7643; GFX8-NEXT: v_mov_b32_e32 v5, s43 7644; GFX8-NEXT: s_add_u32 s42, s0, 0x170 7645; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7646; GFX8-NEXT: s_addc_u32 s43, s1, 0 7647; GFX8-NEXT: v_mov_b32_e32 v4, s42 7648; GFX8-NEXT: v_mov_b32_e32 v0, s59 7649; GFX8-NEXT: v_mov_b32_e32 v2, s51 7650; GFX8-NEXT: v_mov_b32_e32 v5, s43 7651; GFX8-NEXT: s_add_u32 s42, s0, 0x160 7652; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7653; GFX8-NEXT: s_addc_u32 s43, s1, 0 7654; GFX8-NEXT: v_mov_b32_e32 v4, s42 7655; GFX8-NEXT: v_mov_b32_e32 v0, s58 7656; GFX8-NEXT: v_mov_b32_e32 v2, s52 7657; GFX8-NEXT: v_mov_b32_e32 v5, s43 7658; GFX8-NEXT: s_add_u32 s42, s0, 0x150 7659; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7660; GFX8-NEXT: s_addc_u32 s43, s1, 0 7661; GFX8-NEXT: v_mov_b32_e32 v4, s42 7662; GFX8-NEXT: v_mov_b32_e32 v0, s57 7663; GFX8-NEXT: v_mov_b32_e32 v2, s53 7664; GFX8-NEXT: v_mov_b32_e32 v5, s43 7665; GFX8-NEXT: s_add_u32 s42, s0, 0x140 7666; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7667; GFX8-NEXT: s_addc_u32 s43, s1, 0 7668; GFX8-NEXT: v_mov_b32_e32 v4, s42 7669; GFX8-NEXT: v_mov_b32_e32 v0, s56 7670; GFX8-NEXT: v_mov_b32_e32 v2, s40 7671; GFX8-NEXT: v_mov_b32_e32 v5, s43 7672; GFX8-NEXT: s_add_u32 s42, s0, 0x130 7673; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7674; GFX8-NEXT: s_addc_u32 s43, s1, 0 7675; GFX8-NEXT: v_mov_b32_e32 v4, s42 7676; GFX8-NEXT: v_mov_b32_e32 v0, s55 7677; GFX8-NEXT: v_mov_b32_e32 v2, s38 7678; GFX8-NEXT: v_mov_b32_e32 v5, s43 7679; GFX8-NEXT: s_add_u32 s42, s0, 0x120 7680; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7681; GFX8-NEXT: s_addc_u32 s43, s1, 0 7682; GFX8-NEXT: v_mov_b32_e32 v4, s42 7683; GFX8-NEXT: v_mov_b32_e32 v0, s54 7684; GFX8-NEXT: v_mov_b32_e32 v2, s37 7685; GFX8-NEXT: v_mov_b32_e32 v5, s43 7686; GFX8-NEXT: s_add_u32 s40, s0, 0x110 7687; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7688; GFX8-NEXT: s_nop 0 7689; GFX8-NEXT: v_mov_b32_e32 v0, s41 7690; GFX8-NEXT: s_addc_u32 s41, s1, 0 7691; GFX8-NEXT: v_mov_b32_e32 v4, s40 7692; GFX8-NEXT: v_mov_b32_e32 v2, s35 7693; GFX8-NEXT: v_mov_b32_e32 v5, s41 7694; GFX8-NEXT: s_add_u32 s38, s0, 0x100 7695; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7696; GFX8-NEXT: s_nop 0 7697; GFX8-NEXT: v_mov_b32_e32 v0, s39 7698; GFX8-NEXT: s_addc_u32 s39, s1, 0 7699; GFX8-NEXT: v_mov_b32_e32 v4, s38 7700; GFX8-NEXT: v_mov_b32_e32 v2, s33 7701; GFX8-NEXT: v_mov_b32_e32 v5, s39 7702; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7703; GFX8-NEXT: s_nop 0 7704; GFX8-NEXT: v_mov_b32_e32 v0, s36 7705; GFX8-NEXT: s_add_u32 s36, s0, 0xf0 7706; GFX8-NEXT: s_addc_u32 s37, s1, 0 7707; GFX8-NEXT: v_mov_b32_e32 v4, s36 7708; GFX8-NEXT: v_mov_b32_e32 v2, s30 7709; GFX8-NEXT: v_mov_b32_e32 v5, s37 7710; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7711; GFX8-NEXT: s_nop 0 7712; GFX8-NEXT: v_mov_b32_e32 v0, s34 7713; GFX8-NEXT: s_add_u32 s34, s0, 0xe0 7714; GFX8-NEXT: s_addc_u32 s35, s1, 0 7715; GFX8-NEXT: v_mov_b32_e32 v4, s34 7716; GFX8-NEXT: v_mov_b32_e32 v2, s28 7717; GFX8-NEXT: v_mov_b32_e32 v5, s35 7718; GFX8-NEXT: s_add_u32 s30, s0, 0xd0 7719; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7720; GFX8-NEXT: s_nop 0 7721; GFX8-NEXT: v_mov_b32_e32 v0, s31 7722; GFX8-NEXT: s_addc_u32 s31, s1, 0 7723; GFX8-NEXT: v_mov_b32_e32 v4, s30 7724; GFX8-NEXT: v_mov_b32_e32 v2, s26 7725; GFX8-NEXT: v_mov_b32_e32 v5, s31 7726; GFX8-NEXT: s_add_u32 s28, s0, 0xc0 7727; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7728; GFX8-NEXT: s_nop 0 7729; GFX8-NEXT: v_mov_b32_e32 v0, s29 7730; GFX8-NEXT: s_addc_u32 s29, s1, 0 7731; GFX8-NEXT: v_mov_b32_e32 v4, s28 7732; GFX8-NEXT: v_mov_b32_e32 v2, s25 7733; GFX8-NEXT: v_mov_b32_e32 v5, s29 7734; GFX8-NEXT: s_add_u32 s26, s0, 0xb0 7735; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7736; GFX8-NEXT: s_nop 0 7737; GFX8-NEXT: v_mov_b32_e32 v0, s27 7738; GFX8-NEXT: s_addc_u32 s27, s1, 0 7739; GFX8-NEXT: v_mov_b32_e32 v4, s26 7740; GFX8-NEXT: v_mov_b32_e32 v2, s22 7741; GFX8-NEXT: v_mov_b32_e32 v5, s27 7742; GFX8-NEXT: s_add_u32 s22, s0, 0xa0 7743; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7744; GFX8-NEXT: s_nop 0 7745; GFX8-NEXT: v_mov_b32_e32 v0, s23 7746; GFX8-NEXT: s_addc_u32 s23, s1, 0 7747; GFX8-NEXT: v_mov_b32_e32 v4, s22 7748; GFX8-NEXT: v_mov_b32_e32 v2, s24 7749; GFX8-NEXT: v_mov_b32_e32 v5, s23 7750; GFX8-NEXT: s_add_u32 s22, s0, 0x90 7751; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7752; GFX8-NEXT: s_addc_u32 s23, s1, 0 7753; GFX8-NEXT: v_mov_b32_e32 v4, s22 7754; GFX8-NEXT: v_mov_b32_e32 v0, s21 7755; GFX8-NEXT: v_mov_b32_e32 v2, s19 7756; GFX8-NEXT: v_mov_b32_e32 v5, s23 7757; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7758; GFX8-NEXT: s_nop 0 7759; GFX8-NEXT: v_mov_b32_e32 v0, s20 7760; GFX8-NEXT: s_add_u32 s20, s0, 0x80 7761; GFX8-NEXT: s_addc_u32 s21, s1, 0 7762; GFX8-NEXT: v_mov_b32_e32 v4, s20 7763; GFX8-NEXT: v_mov_b32_e32 v2, s17 7764; GFX8-NEXT: v_mov_b32_e32 v5, s21 7765; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7766; GFX8-NEXT: s_nop 0 7767; GFX8-NEXT: v_mov_b32_e32 v0, s18 7768; GFX8-NEXT: s_add_u32 s18, s0, 0x70 7769; GFX8-NEXT: s_addc_u32 s19, s1, 0 7770; GFX8-NEXT: v_mov_b32_e32 v4, s18 7771; GFX8-NEXT: v_mov_b32_e32 v2, s15 7772; GFX8-NEXT: v_mov_b32_e32 v5, s19 7773; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7774; GFX8-NEXT: s_nop 0 7775; GFX8-NEXT: v_mov_b32_e32 v0, s16 7776; GFX8-NEXT: s_add_u32 s16, s0, 0x60 7777; GFX8-NEXT: s_addc_u32 s17, s1, 0 7778; GFX8-NEXT: v_mov_b32_e32 v4, s16 7779; GFX8-NEXT: v_mov_b32_e32 v2, s13 7780; GFX8-NEXT: v_mov_b32_e32 v5, s17 7781; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7782; GFX8-NEXT: s_nop 0 7783; GFX8-NEXT: v_mov_b32_e32 v2, s12 7784; GFX8-NEXT: s_add_u32 s12, s0, 0x50 7785; GFX8-NEXT: s_addc_u32 s13, s1, 0 7786; GFX8-NEXT: v_mov_b32_e32 v4, s12 7787; GFX8-NEXT: v_mov_b32_e32 v0, s14 7788; GFX8-NEXT: v_mov_b32_e32 v5, s13 7789; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7790; GFX8-NEXT: s_nop 0 7791; GFX8-NEXT: v_mov_b32_e32 v2, s10 7792; GFX8-NEXT: s_add_u32 s10, s0, 64 7793; GFX8-NEXT: v_mov_b32_e32 v0, s11 7794; GFX8-NEXT: s_addc_u32 s11, s1, 0 7795; GFX8-NEXT: v_mov_b32_e32 v4, s10 7796; GFX8-NEXT: v_mov_b32_e32 v5, s11 7797; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7798; GFX8-NEXT: s_nop 0 7799; GFX8-NEXT: v_mov_b32_e32 v2, s8 7800; GFX8-NEXT: s_add_u32 s8, s0, 48 7801; GFX8-NEXT: v_mov_b32_e32 v0, s9 7802; GFX8-NEXT: s_addc_u32 s9, s1, 0 7803; GFX8-NEXT: v_mov_b32_e32 v4, s8 7804; GFX8-NEXT: v_mov_b32_e32 v5, s9 7805; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7806; GFX8-NEXT: s_nop 0 7807; GFX8-NEXT: v_mov_b32_e32 v2, s6 7808; GFX8-NEXT: s_add_u32 s6, s0, 32 7809; GFX8-NEXT: v_mov_b32_e32 v0, s7 7810; GFX8-NEXT: s_addc_u32 s7, s1, 0 7811; GFX8-NEXT: v_mov_b32_e32 v4, s6 7812; GFX8-NEXT: v_mov_b32_e32 v5, s7 7813; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7814; GFX8-NEXT: s_nop 0 7815; GFX8-NEXT: v_mov_b32_e32 v2, s4 7816; GFX8-NEXT: s_add_u32 s4, s0, 16 7817; GFX8-NEXT: v_mov_b32_e32 v0, s5 7818; GFX8-NEXT: s_addc_u32 s5, s1, 0 7819; GFX8-NEXT: v_mov_b32_e32 v4, s4 7820; GFX8-NEXT: v_mov_b32_e32 v5, s5 7821; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7822; GFX8-NEXT: v_mov_b32_e32 v5, s1 7823; GFX8-NEXT: v_mov_b32_e32 v0, s3 7824; GFX8-NEXT: v_mov_b32_e32 v2, s2 7825; GFX8-NEXT: v_mov_b32_e32 v4, s0 7826; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 7827; GFX8-NEXT: s_endpgm 7828; 7829; EG-LABEL: constant_zextload_v64i1_to_v64i64: 7830; EG: ; %bb.0: 7831; EG-NEXT: ALU 0, @40, KC0[CB0:0-32], KC1[] 7832; EG-NEXT: TEX 0 @38 7833; EG-NEXT: ALU 95, @41, KC0[], KC1[] 7834; EG-NEXT: ALU 99, @137, KC0[CB0:0-32], KC1[] 7835; EG-NEXT: ALU 60, @237, KC0[CB0:0-32], KC1[] 7836; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T82.X, 0 7837; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T81.X, 0 7838; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T80.X, 0 7839; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T79.X, 0 7840; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T78.X, 0 7841; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T77.X, 0 7842; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T76.X, 0 7843; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T75.X, 0 7844; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T74.X, 0 7845; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T73.X, 0 7846; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T72.X, 0 7847; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T71.X, 0 7848; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T70.X, 0 7849; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T69.X, 0 7850; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T68.X, 0 7851; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T67.X, 0 7852; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T66.X, 0 7853; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T65.X, 0 7854; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T64.X, 0 7855; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T63.X, 0 7856; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T62.X, 0 7857; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T61.X, 0 7858; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T60.X, 0 7859; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T59.X, 0 7860; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T58.X, 0 7861; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T57.X, 0 7862; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T56.X, 0 7863; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T55.X, 0 7864; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T54.X, 0 7865; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T53.X, 0 7866; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T52.X, 0 7867; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T51.X, 1 7868; EG-NEXT: CF_END 7869; EG-NEXT: Fetch clause starting at 38: 7870; EG-NEXT: VTX_READ_64 T25.XY, T19.X, 0, #1 7871; EG-NEXT: ALU clause starting at 40: 7872; EG-NEXT: MOV * T19.X, KC0[2].Z, 7873; EG-NEXT: ALU clause starting at 41: 7874; EG-NEXT: LSHR * T19.Z, T25.Y, literal.x, 7875; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7876; EG-NEXT: BFE_UINT T19.X, T25.Y, literal.x, 1, 7877; EG-NEXT: MOV T19.Y, 0.0, 7878; EG-NEXT: BFE_UINT * T20.Z, T25.Y, literal.y, 1, 7879; EG-NEXT: 30(4.203895e-44), 29(4.063766e-44) 7880; EG-NEXT: BFE_UINT T20.X, T25.Y, literal.x, 1, 7881; EG-NEXT: MOV T20.Y, 0.0, 7882; EG-NEXT: BFE_UINT * T21.Z, T25.Y, literal.y, 1, 7883; EG-NEXT: 28(3.923636e-44), 27(3.783506e-44) 7884; EG-NEXT: BFE_UINT T21.X, T25.Y, literal.x, 1, 7885; EG-NEXT: MOV T21.Y, 0.0, 7886; EG-NEXT: BFE_UINT * T22.Z, T25.Y, literal.y, 1, 7887; EG-NEXT: 26(3.643376e-44), 25(3.503246e-44) 7888; EG-NEXT: BFE_UINT T22.X, T25.Y, literal.x, 1, 7889; EG-NEXT: MOV T22.Y, 0.0, 7890; EG-NEXT: BFE_UINT * T23.Z, T25.Y, literal.y, 1, 7891; EG-NEXT: 24(3.363116e-44), 23(3.222986e-44) 7892; EG-NEXT: BFE_UINT T23.X, T25.Y, literal.x, 1, 7893; EG-NEXT: MOV T23.Y, 0.0, 7894; EG-NEXT: BFE_UINT * T24.Z, T25.Y, literal.y, 1, 7895; EG-NEXT: 22(3.082857e-44), 21(2.942727e-44) 7896; EG-NEXT: BFE_UINT T24.X, T25.Y, literal.x, 1, 7897; EG-NEXT: MOV T24.Y, 0.0, 7898; EG-NEXT: BFE_UINT * T26.Z, T25.Y, literal.y, 1, 7899; EG-NEXT: 20(2.802597e-44), 19(2.662467e-44) 7900; EG-NEXT: BFE_UINT T26.X, T25.Y, literal.x, 1, 7901; EG-NEXT: MOV T26.Y, 0.0, 7902; EG-NEXT: BFE_UINT * T27.Z, T25.Y, literal.y, 1, 7903; EG-NEXT: 18(2.522337e-44), 17(2.382207e-44) 7904; EG-NEXT: BFE_UINT T27.X, T25.Y, literal.x, 1, 7905; EG-NEXT: MOV T27.Y, 0.0, 7906; EG-NEXT: BFE_UINT * T28.Z, T25.Y, literal.y, 1, 7907; EG-NEXT: 16(2.242078e-44), 15(2.101948e-44) 7908; EG-NEXT: BFE_UINT T28.X, T25.Y, literal.x, 1, 7909; EG-NEXT: MOV T28.Y, 0.0, 7910; EG-NEXT: BFE_UINT * T29.Z, T25.Y, literal.y, 1, 7911; EG-NEXT: 14(1.961818e-44), 13(1.821688e-44) 7912; EG-NEXT: BFE_UINT T29.X, T25.Y, literal.x, 1, 7913; EG-NEXT: MOV T29.Y, 0.0, 7914; EG-NEXT: BFE_UINT * T30.Z, T25.Y, literal.y, 1, 7915; EG-NEXT: 12(1.681558e-44), 11(1.541428e-44) 7916; EG-NEXT: BFE_UINT T30.X, T25.Y, literal.x, 1, 7917; EG-NEXT: MOV T30.Y, 0.0, 7918; EG-NEXT: BFE_UINT * T31.Z, T25.Y, literal.y, 1, 7919; EG-NEXT: 10(1.401298e-44), 9(1.261169e-44) 7920; EG-NEXT: BFE_UINT T31.X, T25.Y, literal.x, 1, 7921; EG-NEXT: MOV T31.Y, 0.0, 7922; EG-NEXT: BFE_UINT * T32.Z, T25.Y, literal.y, 1, 7923; EG-NEXT: 8(1.121039e-44), 7(9.809089e-45) 7924; EG-NEXT: BFE_UINT T32.X, T25.Y, literal.x, 1, 7925; EG-NEXT: MOV T32.Y, 0.0, 7926; EG-NEXT: BFE_UINT * T33.Z, T25.Y, literal.y, 1, 7927; EG-NEXT: 6(8.407791e-45), 5(7.006492e-45) 7928; EG-NEXT: BFE_UINT T33.X, T25.Y, literal.x, 1, 7929; EG-NEXT: MOV T33.Y, 0.0, 7930; EG-NEXT: BFE_UINT * T34.Z, T25.Y, literal.y, 1, 7931; EG-NEXT: 4(5.605194e-45), 3(4.203895e-45) 7932; EG-NEXT: BFE_UINT T34.X, T25.Y, literal.x, 1, 7933; EG-NEXT: MOV T34.Y, 0.0, 7934; EG-NEXT: BFE_UINT T35.Z, T25.Y, 1, 1, 7935; EG-NEXT: AND_INT * T35.X, T25.Y, 1, 7936; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 7937; EG-NEXT: MOV T35.Y, 0.0, 7938; EG-NEXT: LSHR * T36.Z, T25.X, literal.x, 7939; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7940; EG-NEXT: BFE_UINT T36.X, T25.X, literal.x, 1, 7941; EG-NEXT: MOV T36.Y, 0.0, 7942; EG-NEXT: BFE_UINT * T37.Z, T25.X, literal.y, 1, 7943; EG-NEXT: 30(4.203895e-44), 29(4.063766e-44) 7944; EG-NEXT: BFE_UINT T37.X, T25.X, literal.x, 1, 7945; EG-NEXT: MOV T37.Y, 0.0, 7946; EG-NEXT: BFE_UINT * T38.Z, T25.X, literal.y, 1, 7947; EG-NEXT: 28(3.923636e-44), 27(3.783506e-44) 7948; EG-NEXT: BFE_UINT T38.X, T25.X, literal.x, 1, 7949; EG-NEXT: MOV T38.Y, 0.0, 7950; EG-NEXT: BFE_UINT * T39.Z, T25.X, literal.y, 1, 7951; EG-NEXT: 26(3.643376e-44), 25(3.503246e-44) 7952; EG-NEXT: BFE_UINT T39.X, T25.X, literal.x, 1, 7953; EG-NEXT: MOV T39.Y, 0.0, 7954; EG-NEXT: BFE_UINT * T40.Z, T25.X, literal.y, 1, 7955; EG-NEXT: 24(3.363116e-44), 23(3.222986e-44) 7956; EG-NEXT: BFE_UINT T40.X, T25.X, literal.x, 1, 7957; EG-NEXT: MOV T40.Y, 0.0, 7958; EG-NEXT: BFE_UINT * T41.Z, T25.X, literal.y, 1, 7959; EG-NEXT: 22(3.082857e-44), 21(2.942727e-44) 7960; EG-NEXT: BFE_UINT T41.X, T25.X, literal.x, 1, 7961; EG-NEXT: MOV T41.Y, 0.0, 7962; EG-NEXT: BFE_UINT * T42.Z, T25.X, literal.y, 1, 7963; EG-NEXT: 20(2.802597e-44), 19(2.662467e-44) 7964; EG-NEXT: BFE_UINT T42.X, T25.X, literal.x, 1, 7965; EG-NEXT: MOV T42.Y, 0.0, 7966; EG-NEXT: BFE_UINT * T43.Z, T25.X, literal.y, 1, 7967; EG-NEXT: 18(2.522337e-44), 17(2.382207e-44) 7968; EG-NEXT: BFE_UINT * T43.X, T25.X, literal.x, 1, 7969; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7970; EG-NEXT: ALU clause starting at 137: 7971; EG-NEXT: MOV T43.Y, 0.0, 7972; EG-NEXT: BFE_UINT * T44.Z, T25.X, literal.x, 1, 7973; EG-NEXT: 15(2.101948e-44), 0(0.000000e+00) 7974; EG-NEXT: BFE_UINT T44.X, T25.X, literal.x, 1, 7975; EG-NEXT: MOV T44.Y, 0.0, 7976; EG-NEXT: BFE_UINT * T45.Z, T25.X, literal.y, 1, 7977; EG-NEXT: 14(1.961818e-44), 13(1.821688e-44) 7978; EG-NEXT: BFE_UINT T45.X, T25.X, literal.x, 1, 7979; EG-NEXT: MOV T45.Y, 0.0, 7980; EG-NEXT: BFE_UINT * T46.Z, T25.X, literal.y, 1, 7981; EG-NEXT: 12(1.681558e-44), 11(1.541428e-44) 7982; EG-NEXT: BFE_UINT T46.X, T25.X, literal.x, 1, 7983; EG-NEXT: MOV T46.Y, 0.0, 7984; EG-NEXT: BFE_UINT * T47.Z, T25.X, literal.y, 1, 7985; EG-NEXT: 10(1.401298e-44), 9(1.261169e-44) 7986; EG-NEXT: BFE_UINT T47.X, T25.X, literal.x, 1, 7987; EG-NEXT: MOV T47.Y, 0.0, 7988; EG-NEXT: BFE_UINT * T48.Z, T25.X, literal.y, 1, 7989; EG-NEXT: 8(1.121039e-44), 7(9.809089e-45) 7990; EG-NEXT: BFE_UINT T48.X, T25.X, literal.x, 1, 7991; EG-NEXT: MOV T48.Y, 0.0, 7992; EG-NEXT: BFE_UINT * T49.Z, T25.X, literal.y, 1, 7993; EG-NEXT: 6(8.407791e-45), 5(7.006492e-45) 7994; EG-NEXT: BFE_UINT T49.X, T25.X, literal.x, 1, 7995; EG-NEXT: MOV T49.Y, 0.0, 7996; EG-NEXT: BFE_UINT * T50.Z, T25.X, literal.y, 1, 7997; EG-NEXT: 4(5.605194e-45), 3(4.203895e-45) 7998; EG-NEXT: BFE_UINT T50.X, T25.X, literal.x, 1, 7999; EG-NEXT: MOV T50.Y, 0.0, 8000; EG-NEXT: BFE_UINT T25.Z, T25.X, 1, 1, 8001; EG-NEXT: AND_INT * T25.X, T25.X, 1, 8002; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 8003; EG-NEXT: MOV T25.Y, 0.0, 8004; EG-NEXT: MOV T19.W, 0.0, 8005; EG-NEXT: MOV * T20.W, 0.0, 8006; EG-NEXT: MOV T21.W, 0.0, 8007; EG-NEXT: MOV * T22.W, 0.0, 8008; EG-NEXT: MOV T23.W, 0.0, 8009; EG-NEXT: MOV * T24.W, 0.0, 8010; EG-NEXT: MOV T26.W, 0.0, 8011; EG-NEXT: MOV * T27.W, 0.0, 8012; EG-NEXT: MOV T28.W, 0.0, 8013; EG-NEXT: MOV * T29.W, 0.0, 8014; EG-NEXT: MOV T30.W, 0.0, 8015; EG-NEXT: MOV * T31.W, 0.0, 8016; EG-NEXT: MOV T32.W, 0.0, 8017; EG-NEXT: MOV * T33.W, 0.0, 8018; EG-NEXT: MOV T34.W, 0.0, 8019; EG-NEXT: MOV * T35.W, 0.0, 8020; EG-NEXT: MOV T36.W, 0.0, 8021; EG-NEXT: MOV * T37.W, 0.0, 8022; EG-NEXT: MOV T38.W, 0.0, 8023; EG-NEXT: MOV * T39.W, 0.0, 8024; EG-NEXT: MOV T40.W, 0.0, 8025; EG-NEXT: MOV * T41.W, 0.0, 8026; EG-NEXT: MOV T42.W, 0.0, 8027; EG-NEXT: MOV * T43.W, 0.0, 8028; EG-NEXT: MOV T44.W, 0.0, 8029; EG-NEXT: MOV * T45.W, 0.0, 8030; EG-NEXT: MOV T46.W, 0.0, 8031; EG-NEXT: MOV * T47.W, 0.0, 8032; EG-NEXT: MOV T48.W, 0.0, 8033; EG-NEXT: MOV * T49.W, 0.0, 8034; EG-NEXT: MOV T50.W, 0.0, 8035; EG-NEXT: MOV * T25.W, 0.0, 8036; EG-NEXT: LSHR T51.X, KC0[2].Y, literal.x, 8037; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8038; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 8039; EG-NEXT: LSHR T52.X, PV.W, literal.x, 8040; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8041; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 8042; EG-NEXT: LSHR T53.X, PV.W, literal.x, 8043; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8044; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 8045; EG-NEXT: LSHR T54.X, PV.W, literal.x, 8046; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8047; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 8048; EG-NEXT: LSHR T55.X, PV.W, literal.x, 8049; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8050; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 8051; EG-NEXT: LSHR T56.X, PV.W, literal.x, 8052; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8053; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 8054; EG-NEXT: LSHR T57.X, PV.W, literal.x, 8055; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8056; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 8057; EG-NEXT: LSHR T58.X, PV.W, literal.x, 8058; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8059; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 8060; EG-NEXT: LSHR T59.X, PV.W, literal.x, 8061; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8062; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 8063; EG-NEXT: LSHR T60.X, PV.W, literal.x, 8064; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8065; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 8066; EG-NEXT: LSHR T61.X, PV.W, literal.x, 8067; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8068; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 8069; EG-NEXT: LSHR * T62.X, PV.W, literal.x, 8070; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 8071; EG-NEXT: ALU clause starting at 237: 8072; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 8073; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00) 8074; EG-NEXT: LSHR T63.X, PV.W, literal.x, 8075; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8076; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43) 8077; EG-NEXT: LSHR T64.X, PV.W, literal.x, 8078; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8079; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43) 8080; EG-NEXT: LSHR T65.X, PV.W, literal.x, 8081; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8082; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43) 8083; EG-NEXT: LSHR T66.X, PV.W, literal.x, 8084; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8085; EG-NEXT: 2(2.802597e-45), 256(3.587324e-43) 8086; EG-NEXT: LSHR T67.X, PV.W, literal.x, 8087; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8088; EG-NEXT: 2(2.802597e-45), 272(3.811532e-43) 8089; EG-NEXT: LSHR T68.X, PV.W, literal.x, 8090; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8091; EG-NEXT: 2(2.802597e-45), 288(4.035740e-43) 8092; EG-NEXT: LSHR T69.X, PV.W, literal.x, 8093; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8094; EG-NEXT: 2(2.802597e-45), 304(4.259947e-43) 8095; EG-NEXT: LSHR T70.X, PV.W, literal.x, 8096; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8097; EG-NEXT: 2(2.802597e-45), 320(4.484155e-43) 8098; EG-NEXT: LSHR T71.X, PV.W, literal.x, 8099; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8100; EG-NEXT: 2(2.802597e-45), 336(4.708363e-43) 8101; EG-NEXT: LSHR T72.X, PV.W, literal.x, 8102; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8103; EG-NEXT: 2(2.802597e-45), 352(4.932571e-43) 8104; EG-NEXT: LSHR T73.X, PV.W, literal.x, 8105; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8106; EG-NEXT: 2(2.802597e-45), 368(5.156778e-43) 8107; EG-NEXT: LSHR T74.X, PV.W, literal.x, 8108; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8109; EG-NEXT: 2(2.802597e-45), 384(5.380986e-43) 8110; EG-NEXT: LSHR T75.X, PV.W, literal.x, 8111; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8112; EG-NEXT: 2(2.802597e-45), 400(5.605194e-43) 8113; EG-NEXT: LSHR T76.X, PV.W, literal.x, 8114; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8115; EG-NEXT: 2(2.802597e-45), 416(5.829402e-43) 8116; EG-NEXT: LSHR T77.X, PV.W, literal.x, 8117; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8118; EG-NEXT: 2(2.802597e-45), 432(6.053609e-43) 8119; EG-NEXT: LSHR T78.X, PV.W, literal.x, 8120; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8121; EG-NEXT: 2(2.802597e-45), 448(6.277817e-43) 8122; EG-NEXT: LSHR T79.X, PV.W, literal.x, 8123; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8124; EG-NEXT: 2(2.802597e-45), 464(6.502025e-43) 8125; EG-NEXT: LSHR T80.X, PV.W, literal.x, 8126; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8127; EG-NEXT: 2(2.802597e-45), 480(6.726233e-43) 8128; EG-NEXT: LSHR T81.X, PV.W, literal.x, 8129; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8130; EG-NEXT: 2(2.802597e-45), 496(6.950440e-43) 8131; EG-NEXT: LSHR * T82.X, PV.W, literal.x, 8132; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 8133; 8134; GFX12-LABEL: constant_zextload_v64i1_to_v64i64: 8135; GFX12: ; %bb.0: 8136; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 8137; GFX12-NEXT: s_wait_kmcnt 0x0 8138; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 8139; GFX12-NEXT: s_wait_kmcnt 0x0 8140; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10014 8141; GFX12-NEXT: s_wait_alu 0xfffe 8142; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s4 8143; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10015 8144; GFX12-NEXT: s_lshr_b32 s4, s3, 31 8145; GFX12-NEXT: s_wait_alu 0xfffe 8146; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 8147; GFX12-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v3, v1 8148; GFX12-NEXT: s_bfe_u32 s5, s3, 0x1001e 8149; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:416 8150; GFX12-NEXT: s_wait_alu 0xfffe 8151; GFX12-NEXT: v_mov_b32_e32 v0, s5 8152; GFX12-NEXT: v_mov_b32_e32 v2, s4 8153; GFX12-NEXT: s_bfe_u32 s4, s3, 0x1001d 8154; GFX12-NEXT: s_bfe_u32 s5, s3, 0x1001c 8155; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:496 8156; GFX12-NEXT: s_wait_alu 0xfffe 8157; GFX12-NEXT: v_mov_b32_e32 v0, s5 8158; GFX12-NEXT: v_mov_b32_e32 v2, s4 8159; GFX12-NEXT: s_bfe_u32 s4, s3, 0x1001b 8160; GFX12-NEXT: s_bfe_u32 s5, s3, 0x1001a 8161; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:480 8162; GFX12-NEXT: s_wait_alu 0xfffe 8163; GFX12-NEXT: v_mov_b32_e32 v0, s5 8164; GFX12-NEXT: v_mov_b32_e32 v2, s4 8165; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10019 8166; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10018 8167; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:464 8168; GFX12-NEXT: s_wait_alu 0xfffe 8169; GFX12-NEXT: v_mov_b32_e32 v0, s5 8170; GFX12-NEXT: v_mov_b32_e32 v2, s4 8171; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10017 8172; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10016 8173; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:448 8174; GFX12-NEXT: s_wait_alu 0xfffe 8175; GFX12-NEXT: v_mov_b32_e32 v0, s5 8176; GFX12-NEXT: v_mov_b32_e32 v2, s4 8177; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10013 8178; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10012 8179; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:432 8180; GFX12-NEXT: s_wait_alu 0xfffe 8181; GFX12-NEXT: v_mov_b32_e32 v0, s5 8182; GFX12-NEXT: v_mov_b32_e32 v2, s4 8183; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10011 8184; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10010 8185; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:400 8186; GFX12-NEXT: s_wait_alu 0xfffe 8187; GFX12-NEXT: v_mov_b32_e32 v0, s5 8188; GFX12-NEXT: v_mov_b32_e32 v2, s4 8189; GFX12-NEXT: s_bfe_u32 s4, s3, 0x1000f 8190; GFX12-NEXT: s_bfe_u32 s5, s3, 0x1000e 8191; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:384 8192; GFX12-NEXT: s_wait_alu 0xfffe 8193; GFX12-NEXT: v_mov_b32_e32 v0, s5 8194; GFX12-NEXT: v_mov_b32_e32 v2, s4 8195; GFX12-NEXT: s_bfe_u32 s4, s3, 0x1000d 8196; GFX12-NEXT: s_bfe_u32 s5, s3, 0x1000c 8197; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:368 8198; GFX12-NEXT: s_wait_alu 0xfffe 8199; GFX12-NEXT: v_mov_b32_e32 v0, s5 8200; GFX12-NEXT: v_mov_b32_e32 v2, s4 8201; GFX12-NEXT: s_bfe_u32 s4, s3, 0x1000b 8202; GFX12-NEXT: s_bfe_u32 s5, s3, 0x1000a 8203; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:352 8204; GFX12-NEXT: s_wait_alu 0xfffe 8205; GFX12-NEXT: v_mov_b32_e32 v0, s5 8206; GFX12-NEXT: v_mov_b32_e32 v2, s4 8207; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10009 8208; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10008 8209; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:336 8210; GFX12-NEXT: s_wait_alu 0xfffe 8211; GFX12-NEXT: v_mov_b32_e32 v0, s5 8212; GFX12-NEXT: v_mov_b32_e32 v2, s4 8213; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10007 8214; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10006 8215; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:320 8216; GFX12-NEXT: s_wait_alu 0xfffe 8217; GFX12-NEXT: v_mov_b32_e32 v0, s5 8218; GFX12-NEXT: v_mov_b32_e32 v2, s4 8219; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10005 8220; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10004 8221; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:304 8222; GFX12-NEXT: s_wait_alu 0xfffe 8223; GFX12-NEXT: v_mov_b32_e32 v0, s5 8224; GFX12-NEXT: v_mov_b32_e32 v2, s4 8225; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10003 8226; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10002 8227; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:288 8228; GFX12-NEXT: s_wait_alu 0xfffe 8229; GFX12-NEXT: v_mov_b32_e32 v0, s5 8230; GFX12-NEXT: v_mov_b32_e32 v2, s4 8231; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10001 8232; GFX12-NEXT: s_and_b32 s3, s3, 1 8233; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:272 8234; GFX12-NEXT: s_wait_alu 0xfffe 8235; GFX12-NEXT: v_mov_b32_e32 v0, s3 8236; GFX12-NEXT: v_mov_b32_e32 v2, s4 8237; GFX12-NEXT: s_lshr_b32 s3, s2, 31 8238; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1001e 8239; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:256 8240; GFX12-NEXT: s_wait_alu 0xfffe 8241; GFX12-NEXT: v_mov_b32_e32 v0, s4 8242; GFX12-NEXT: v_mov_b32_e32 v2, s3 8243; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1001d 8244; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1001c 8245; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:240 8246; GFX12-NEXT: s_wait_alu 0xfffe 8247; GFX12-NEXT: v_mov_b32_e32 v0, s4 8248; GFX12-NEXT: v_mov_b32_e32 v2, s3 8249; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1001b 8250; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1001a 8251; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:224 8252; GFX12-NEXT: s_wait_alu 0xfffe 8253; GFX12-NEXT: v_mov_b32_e32 v0, s4 8254; GFX12-NEXT: v_mov_b32_e32 v2, s3 8255; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10019 8256; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10018 8257; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:208 8258; GFX12-NEXT: s_wait_alu 0xfffe 8259; GFX12-NEXT: v_mov_b32_e32 v0, s4 8260; GFX12-NEXT: v_mov_b32_e32 v2, s3 8261; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10017 8262; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10016 8263; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:192 8264; GFX12-NEXT: s_wait_alu 0xfffe 8265; GFX12-NEXT: v_mov_b32_e32 v0, s4 8266; GFX12-NEXT: v_mov_b32_e32 v2, s3 8267; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10014 8268; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10015 8269; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:176 8270; GFX12-NEXT: s_wait_alu 0xfffe 8271; GFX12-NEXT: v_mov_b32_e32 v0, s3 8272; GFX12-NEXT: v_mov_b32_e32 v2, s4 8273; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10013 8274; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10012 8275; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:160 8276; GFX12-NEXT: s_wait_alu 0xfffe 8277; GFX12-NEXT: v_mov_b32_e32 v0, s4 8278; GFX12-NEXT: v_mov_b32_e32 v2, s3 8279; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10011 8280; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10010 8281; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:144 8282; GFX12-NEXT: s_wait_alu 0xfffe 8283; GFX12-NEXT: v_mov_b32_e32 v0, s4 8284; GFX12-NEXT: v_mov_b32_e32 v2, s3 8285; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000f 8286; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000e 8287; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:128 8288; GFX12-NEXT: s_wait_alu 0xfffe 8289; GFX12-NEXT: v_mov_b32_e32 v0, s4 8290; GFX12-NEXT: v_mov_b32_e32 v2, s3 8291; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000d 8292; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000c 8293; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:112 8294; GFX12-NEXT: s_wait_alu 0xfffe 8295; GFX12-NEXT: v_mov_b32_e32 v0, s4 8296; GFX12-NEXT: v_mov_b32_e32 v2, s3 8297; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000b 8298; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000a 8299; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:96 8300; GFX12-NEXT: s_wait_alu 0xfffe 8301; GFX12-NEXT: v_mov_b32_e32 v0, s4 8302; GFX12-NEXT: v_mov_b32_e32 v2, s3 8303; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10009 8304; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10008 8305; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:80 8306; GFX12-NEXT: s_wait_alu 0xfffe 8307; GFX12-NEXT: v_mov_b32_e32 v0, s4 8308; GFX12-NEXT: v_mov_b32_e32 v2, s3 8309; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10007 8310; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10006 8311; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:64 8312; GFX12-NEXT: s_wait_alu 0xfffe 8313; GFX12-NEXT: v_mov_b32_e32 v0, s4 8314; GFX12-NEXT: v_mov_b32_e32 v2, s3 8315; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10005 8316; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10004 8317; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:48 8318; GFX12-NEXT: s_wait_alu 0xfffe 8319; GFX12-NEXT: v_mov_b32_e32 v0, s4 8320; GFX12-NEXT: v_mov_b32_e32 v2, s3 8321; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10003 8322; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10002 8323; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:32 8324; GFX12-NEXT: s_wait_alu 0xfffe 8325; GFX12-NEXT: v_mov_b32_e32 v0, s4 8326; GFX12-NEXT: v_mov_b32_e32 v2, s3 8327; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10001 8328; GFX12-NEXT: s_and_b32 s2, s2, 1 8329; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16 8330; GFX12-NEXT: s_wait_alu 0xfffe 8331; GFX12-NEXT: v_mov_b32_e32 v0, s2 8332; GFX12-NEXT: v_mov_b32_e32 v2, s3 8333; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] 8334; GFX12-NEXT: s_endpgm 8335 %load = load <64 x i1>, ptr addrspace(4) %in 8336 %ext = zext <64 x i1> %load to <64 x i64> 8337 store <64 x i64> %ext, ptr addrspace(1) %out 8338 ret void 8339} 8340 8341define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 { 8342; GFX6-LABEL: constant_sextload_v64i1_to_v64i64: 8343; GFX6: ; %bb.0: 8344; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 8345; GFX6-NEXT: s_waitcnt lgkmcnt(0) 8346; GFX6-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 8347; GFX6-NEXT: s_mov_b32 s3, 0xf000 8348; GFX6-NEXT: s_mov_b32 s2, -1 8349; GFX6-NEXT: s_waitcnt lgkmcnt(0) 8350; GFX6-NEXT: s_lshr_b32 s42, s5, 30 8351; GFX6-NEXT: s_lshr_b32 s36, s5, 28 8352; GFX6-NEXT: s_lshr_b32 s38, s5, 29 8353; GFX6-NEXT: s_lshr_b32 s30, s5, 26 8354; GFX6-NEXT: s_lshr_b32 s34, s5, 27 8355; GFX6-NEXT: s_lshr_b32 s26, s5, 24 8356; GFX6-NEXT: s_lshr_b32 s28, s5, 25 8357; GFX6-NEXT: s_lshr_b32 s22, s5, 22 8358; GFX6-NEXT: s_lshr_b32 s24, s5, 23 8359; GFX6-NEXT: s_lshr_b32 s18, s5, 20 8360; GFX6-NEXT: s_lshr_b32 s20, s5, 21 8361; GFX6-NEXT: s_lshr_b32 s14, s5, 18 8362; GFX6-NEXT: s_lshr_b32 s16, s5, 19 8363; GFX6-NEXT: s_lshr_b32 s10, s5, 16 8364; GFX6-NEXT: s_lshr_b32 s12, s5, 17 8365; GFX6-NEXT: s_lshr_b32 s6, s5, 14 8366; GFX6-NEXT: s_lshr_b32 s8, s5, 15 8367; GFX6-NEXT: s_mov_b32 s40, s5 8368; GFX6-NEXT: s_ashr_i32 s7, s5, 31 8369; GFX6-NEXT: s_bfe_i64 s[44:45], s[40:41], 0x10000 8370; GFX6-NEXT: v_mov_b32_e32 v4, s7 8371; GFX6-NEXT: s_lshr_b32 s40, s5, 12 8372; GFX6-NEXT: v_mov_b32_e32 v0, s44 8373; GFX6-NEXT: v_mov_b32_e32 v1, s45 8374; GFX6-NEXT: s_bfe_i64 s[44:45], s[4:5], 0x10000 8375; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000 8376; GFX6-NEXT: v_mov_b32_e32 v6, s44 8377; GFX6-NEXT: v_mov_b32_e32 v7, s45 8378; GFX6-NEXT: s_lshr_b32 s44, s5, 13 8379; GFX6-NEXT: v_mov_b32_e32 v2, s42 8380; GFX6-NEXT: v_mov_b32_e32 v3, s43 8381; GFX6-NEXT: s_lshr_b32 s42, s5, 10 8382; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000 8383; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000 8384; GFX6-NEXT: v_mov_b32_e32 v8, s36 8385; GFX6-NEXT: v_mov_b32_e32 v9, s37 8386; GFX6-NEXT: s_lshr_b32 s36, s5, 11 8387; GFX6-NEXT: v_mov_b32_e32 v10, s38 8388; GFX6-NEXT: v_mov_b32_e32 v11, s39 8389; GFX6-NEXT: s_lshr_b32 s38, s5, 8 8390; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 8391; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000 8392; GFX6-NEXT: v_mov_b32_e32 v12, s30 8393; GFX6-NEXT: v_mov_b32_e32 v13, s31 8394; GFX6-NEXT: s_lshr_b32 s30, s5, 9 8395; GFX6-NEXT: v_mov_b32_e32 v14, s34 8396; GFX6-NEXT: v_mov_b32_e32 v15, s35 8397; GFX6-NEXT: s_lshr_b32 s34, s5, 6 8398; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000 8399; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000 8400; GFX6-NEXT: v_mov_b32_e32 v5, s7 8401; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:496 8402; GFX6-NEXT: s_waitcnt expcnt(0) 8403; GFX6-NEXT: v_mov_b32_e32 v2, s26 8404; GFX6-NEXT: v_mov_b32_e32 v3, s27 8405; GFX6-NEXT: s_lshr_b32 s26, s5, 7 8406; GFX6-NEXT: v_mov_b32_e32 v4, s28 8407; GFX6-NEXT: v_mov_b32_e32 v5, s29 8408; GFX6-NEXT: s_lshr_b32 s28, s5, 4 8409; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000 8410; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000 8411; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:480 8412; GFX6-NEXT: s_waitcnt expcnt(0) 8413; GFX6-NEXT: v_mov_b32_e32 v8, s22 8414; GFX6-NEXT: v_mov_b32_e32 v9, s23 8415; GFX6-NEXT: s_lshr_b32 s22, s5, 5 8416; GFX6-NEXT: v_mov_b32_e32 v10, s24 8417; GFX6-NEXT: v_mov_b32_e32 v11, s25 8418; GFX6-NEXT: s_lshr_b32 s24, s5, 2 8419; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000 8420; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000 8421; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:464 8422; GFX6-NEXT: s_waitcnt expcnt(0) 8423; GFX6-NEXT: v_mov_b32_e32 v12, s18 8424; GFX6-NEXT: v_mov_b32_e32 v13, s19 8425; GFX6-NEXT: s_lshr_b32 s18, s5, 3 8426; GFX6-NEXT: v_mov_b32_e32 v14, s20 8427; GFX6-NEXT: v_mov_b32_e32 v15, s21 8428; GFX6-NEXT: s_lshr_b32 s20, s5, 1 8429; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000 8430; GFX6-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000 8431; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:448 8432; GFX6-NEXT: s_waitcnt expcnt(0) 8433; GFX6-NEXT: v_mov_b32_e32 v2, s14 8434; GFX6-NEXT: v_mov_b32_e32 v3, s15 8435; GFX6-NEXT: s_lshr_b32 s14, s4, 30 8436; GFX6-NEXT: v_mov_b32_e32 v4, s16 8437; GFX6-NEXT: v_mov_b32_e32 v5, s17 8438; GFX6-NEXT: s_lshr_b32 s16, s4, 31 8439; GFX6-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000 8440; GFX6-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000 8441; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:432 8442; GFX6-NEXT: s_waitcnt expcnt(0) 8443; GFX6-NEXT: v_mov_b32_e32 v8, s10 8444; GFX6-NEXT: v_mov_b32_e32 v9, s11 8445; GFX6-NEXT: s_lshr_b32 s10, s4, 28 8446; GFX6-NEXT: v_mov_b32_e32 v10, s12 8447; GFX6-NEXT: v_mov_b32_e32 v11, s13 8448; GFX6-NEXT: s_lshr_b32 s12, s4, 29 8449; GFX6-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000 8450; GFX6-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000 8451; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:416 8452; GFX6-NEXT: s_waitcnt expcnt(0) 8453; GFX6-NEXT: v_mov_b32_e32 v12, s6 8454; GFX6-NEXT: v_mov_b32_e32 v13, s7 8455; GFX6-NEXT: s_lshr_b32 s46, s4, 26 8456; GFX6-NEXT: v_mov_b32_e32 v14, s8 8457; GFX6-NEXT: v_mov_b32_e32 v15, s9 8458; GFX6-NEXT: s_lshr_b32 s8, s4, 27 8459; GFX6-NEXT: s_bfe_i64 s[6:7], s[44:45], 0x10000 8460; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000 8461; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:400 8462; GFX6-NEXT: s_waitcnt expcnt(0) 8463; GFX6-NEXT: v_mov_b32_e32 v2, s40 8464; GFX6-NEXT: v_mov_b32_e32 v3, s41 8465; GFX6-NEXT: s_lshr_b32 s40, s4, 24 8466; GFX6-NEXT: v_mov_b32_e32 v4, s6 8467; GFX6-NEXT: v_mov_b32_e32 v5, s7 8468; GFX6-NEXT: s_lshr_b32 s44, s4, 25 8469; GFX6-NEXT: s_bfe_i64 s[6:7], s[36:37], 0x10000 8470; GFX6-NEXT: s_bfe_i64 s[36:37], s[42:43], 0x10000 8471; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:384 8472; GFX6-NEXT: s_waitcnt expcnt(0) 8473; GFX6-NEXT: v_mov_b32_e32 v8, s36 8474; GFX6-NEXT: v_mov_b32_e32 v9, s37 8475; GFX6-NEXT: s_lshr_b32 s36, s4, 22 8476; GFX6-NEXT: v_mov_b32_e32 v10, s6 8477; GFX6-NEXT: v_mov_b32_e32 v11, s7 8478; GFX6-NEXT: s_lshr_b32 s42, s4, 23 8479; GFX6-NEXT: s_bfe_i64 s[6:7], s[30:31], 0x10000 8480; GFX6-NEXT: s_bfe_i64 s[30:31], s[38:39], 0x10000 8481; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:368 8482; GFX6-NEXT: s_waitcnt expcnt(0) 8483; GFX6-NEXT: v_mov_b32_e32 v12, s30 8484; GFX6-NEXT: v_mov_b32_e32 v13, s31 8485; GFX6-NEXT: s_lshr_b32 s30, s4, 20 8486; GFX6-NEXT: v_mov_b32_e32 v14, s6 8487; GFX6-NEXT: v_mov_b32_e32 v15, s7 8488; GFX6-NEXT: s_lshr_b32 s6, s4, 21 8489; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000 8490; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000 8491; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:352 8492; GFX6-NEXT: v_mov_b32_e32 v16, s34 8493; GFX6-NEXT: v_mov_b32_e32 v17, s35 8494; GFX6-NEXT: s_lshr_b32 s34, s4, 18 8495; GFX6-NEXT: v_mov_b32_e32 v18, s26 8496; GFX6-NEXT: v_mov_b32_e32 v19, s27 8497; GFX6-NEXT: s_lshr_b32 s26, s4, 19 8498; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000 8499; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000 8500; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:336 8501; GFX6-NEXT: s_waitcnt expcnt(0) 8502; GFX6-NEXT: v_mov_b32_e32 v8, s28 8503; GFX6-NEXT: v_mov_b32_e32 v9, s29 8504; GFX6-NEXT: s_lshr_b32 s28, s4, 16 8505; GFX6-NEXT: v_mov_b32_e32 v10, s22 8506; GFX6-NEXT: v_mov_b32_e32 v11, s23 8507; GFX6-NEXT: s_lshr_b32 s22, s4, 17 8508; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000 8509; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:320 8510; GFX6-NEXT: s_waitcnt expcnt(0) 8511; GFX6-NEXT: v_mov_b32_e32 v12, s24 8512; GFX6-NEXT: v_mov_b32_e32 v13, s25 8513; GFX6-NEXT: s_lshr_b32 s24, s4, 14 8514; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000 8515; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000 8516; GFX6-NEXT: v_mov_b32_e32 v14, s18 8517; GFX6-NEXT: v_mov_b32_e32 v15, s19 8518; GFX6-NEXT: s_lshr_b32 s18, s4, 15 8519; GFX6-NEXT: v_mov_b32_e32 v2, s20 8520; GFX6-NEXT: v_mov_b32_e32 v3, s21 8521; GFX6-NEXT: s_lshr_b32 s20, s4, 12 8522; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000 8523; GFX6-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000 8524; GFX6-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:304 8525; GFX6-NEXT: s_waitcnt expcnt(0) 8526; GFX6-NEXT: v_mov_b32_e32 v16, s14 8527; GFX6-NEXT: v_mov_b32_e32 v17, s15 8528; GFX6-NEXT: s_lshr_b32 s14, s4, 13 8529; GFX6-NEXT: v_mov_b32_e32 v18, s16 8530; GFX6-NEXT: v_mov_b32_e32 v19, s17 8531; GFX6-NEXT: s_lshr_b32 s16, s4, 10 8532; GFX6-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000 8533; GFX6-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000 8534; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:288 8535; GFX6-NEXT: s_waitcnt expcnt(0) 8536; GFX6-NEXT: v_mov_b32_e32 v8, s10 8537; GFX6-NEXT: v_mov_b32_e32 v9, s11 8538; GFX6-NEXT: s_lshr_b32 s10, s4, 11 8539; GFX6-NEXT: v_mov_b32_e32 v10, s12 8540; GFX6-NEXT: v_mov_b32_e32 v11, s13 8541; GFX6-NEXT: s_lshr_b32 s12, s4, 8 8542; GFX6-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000 8543; GFX6-NEXT: s_bfe_i64 s[38:39], s[46:47], 0x10000 8544; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:272 8545; GFX6-NEXT: s_waitcnt expcnt(0) 8546; GFX6-NEXT: v_mov_b32_e32 v12, s38 8547; GFX6-NEXT: v_mov_b32_e32 v13, s39 8548; GFX6-NEXT: s_lshr_b32 s38, s4, 9 8549; GFX6-NEXT: v_mov_b32_e32 v14, s8 8550; GFX6-NEXT: v_mov_b32_e32 v15, s9 8551; GFX6-NEXT: s_lshr_b32 s8, s4, 6 8552; GFX6-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000 8553; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000 8554; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:256 8555; GFX6-NEXT: s_waitcnt expcnt(0) 8556; GFX6-NEXT: v_mov_b32_e32 v0, s40 8557; GFX6-NEXT: v_mov_b32_e32 v1, s41 8558; GFX6-NEXT: s_lshr_b32 s40, s4, 7 8559; GFX6-NEXT: v_mov_b32_e32 v2, s44 8560; GFX6-NEXT: v_mov_b32_e32 v3, s45 8561; GFX6-NEXT: s_lshr_b32 s44, s4, 4 8562; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000 8563; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000 8564; GFX6-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:240 8565; GFX6-NEXT: s_waitcnt expcnt(0) 8566; GFX6-NEXT: v_mov_b32_e32 v16, s36 8567; GFX6-NEXT: v_mov_b32_e32 v17, s37 8568; GFX6-NEXT: s_lshr_b32 s36, s4, 5 8569; GFX6-NEXT: v_mov_b32_e32 v18, s42 8570; GFX6-NEXT: v_mov_b32_e32 v19, s43 8571; GFX6-NEXT: s_lshr_b32 s42, s4, 2 8572; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 8573; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:224 8574; GFX6-NEXT: s_waitcnt expcnt(0) 8575; GFX6-NEXT: v_mov_b32_e32 v8, s30 8576; GFX6-NEXT: v_mov_b32_e32 v9, s31 8577; GFX6-NEXT: s_lshr_b32 s30, s4, 3 8578; GFX6-NEXT: s_lshr_b32 s4, s4, 1 8579; GFX6-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000 8580; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 8581; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000 8582; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000 8583; GFX6-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000 8584; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000 8585; GFX6-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000 8586; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000 8587; GFX6-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000 8588; GFX6-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000 8589; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000 8590; GFX6-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000 8591; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000 8592; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000 8593; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000 8594; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000 8595; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000 8596; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000 8597; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000 8598; GFX6-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000 8599; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:208 8600; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192 8601; GFX6-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176 8602; GFX6-NEXT: v_mov_b32_e32 v10, s6 8603; GFX6-NEXT: v_mov_b32_e32 v11, s7 8604; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:160 8605; GFX6-NEXT: s_waitcnt expcnt(2) 8606; GFX6-NEXT: v_mov_b32_e32 v0, s34 8607; GFX6-NEXT: v_mov_b32_e32 v1, s35 8608; GFX6-NEXT: v_mov_b32_e32 v2, s26 8609; GFX6-NEXT: v_mov_b32_e32 v3, s27 8610; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 8611; GFX6-NEXT: s_waitcnt expcnt(0) 8612; GFX6-NEXT: v_mov_b32_e32 v0, s28 8613; GFX6-NEXT: v_mov_b32_e32 v1, s29 8614; GFX6-NEXT: v_mov_b32_e32 v2, s22 8615; GFX6-NEXT: v_mov_b32_e32 v3, s23 8616; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 8617; GFX6-NEXT: s_waitcnt expcnt(0) 8618; GFX6-NEXT: v_mov_b32_e32 v0, s24 8619; GFX6-NEXT: v_mov_b32_e32 v1, s25 8620; GFX6-NEXT: v_mov_b32_e32 v2, s18 8621; GFX6-NEXT: v_mov_b32_e32 v3, s19 8622; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 8623; GFX6-NEXT: s_waitcnt expcnt(0) 8624; GFX6-NEXT: v_mov_b32_e32 v0, s20 8625; GFX6-NEXT: v_mov_b32_e32 v1, s21 8626; GFX6-NEXT: v_mov_b32_e32 v2, s14 8627; GFX6-NEXT: v_mov_b32_e32 v3, s15 8628; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 8629; GFX6-NEXT: s_waitcnt expcnt(0) 8630; GFX6-NEXT: v_mov_b32_e32 v0, s16 8631; GFX6-NEXT: v_mov_b32_e32 v1, s17 8632; GFX6-NEXT: v_mov_b32_e32 v2, s10 8633; GFX6-NEXT: v_mov_b32_e32 v3, s11 8634; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 8635; GFX6-NEXT: s_waitcnt expcnt(0) 8636; GFX6-NEXT: v_mov_b32_e32 v0, s12 8637; GFX6-NEXT: v_mov_b32_e32 v1, s13 8638; GFX6-NEXT: v_mov_b32_e32 v2, s38 8639; GFX6-NEXT: v_mov_b32_e32 v3, s39 8640; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 8641; GFX6-NEXT: s_waitcnt expcnt(0) 8642; GFX6-NEXT: v_mov_b32_e32 v0, s8 8643; GFX6-NEXT: v_mov_b32_e32 v1, s9 8644; GFX6-NEXT: v_mov_b32_e32 v2, s40 8645; GFX6-NEXT: v_mov_b32_e32 v3, s41 8646; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 8647; GFX6-NEXT: s_waitcnt expcnt(0) 8648; GFX6-NEXT: v_mov_b32_e32 v0, s44 8649; GFX6-NEXT: v_mov_b32_e32 v1, s45 8650; GFX6-NEXT: v_mov_b32_e32 v2, s36 8651; GFX6-NEXT: v_mov_b32_e32 v3, s37 8652; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 8653; GFX6-NEXT: s_waitcnt expcnt(0) 8654; GFX6-NEXT: v_mov_b32_e32 v0, s42 8655; GFX6-NEXT: v_mov_b32_e32 v1, s43 8656; GFX6-NEXT: v_mov_b32_e32 v2, s30 8657; GFX6-NEXT: v_mov_b32_e32 v3, s31 8658; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 8659; GFX6-NEXT: v_mov_b32_e32 v8, s4 8660; GFX6-NEXT: v_mov_b32_e32 v9, s5 8661; GFX6-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 8662; GFX6-NEXT: s_endpgm 8663; 8664; GFX8-LABEL: constant_sextload_v64i1_to_v64i64: 8665; GFX8: ; %bb.0: 8666; GFX8-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 8667; GFX8-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane 8668; GFX8-NEXT: s_waitcnt lgkmcnt(0) 8669; GFX8-NEXT: s_load_dwordx2 s[2:3], s[10:11], 0x0 8670; GFX8-NEXT: s_waitcnt lgkmcnt(0) 8671; GFX8-NEXT: s_lshr_b32 s0, s3, 8 8672; GFX8-NEXT: s_lshr_b32 s48, s3, 15 8673; GFX8-NEXT: v_writelane_b32 v62, s0, 0 8674; GFX8-NEXT: s_lshr_b32 s74, s3, 30 8675; GFX8-NEXT: s_lshr_b32 s30, s3, 31 8676; GFX8-NEXT: s_lshr_b32 s72, s3, 28 8677; GFX8-NEXT: s_lshr_b32 s34, s3, 29 8678; GFX8-NEXT: s_lshr_b32 s70, s3, 26 8679; GFX8-NEXT: s_lshr_b32 s36, s3, 27 8680; GFX8-NEXT: s_lshr_b32 s68, s3, 24 8681; GFX8-NEXT: s_lshr_b32 s38, s3, 25 8682; GFX8-NEXT: s_lshr_b32 s64, s3, 22 8683; GFX8-NEXT: s_lshr_b32 s40, s3, 23 8684; GFX8-NEXT: s_lshr_b32 s60, s3, 20 8685; GFX8-NEXT: s_lshr_b32 s42, s3, 21 8686; GFX8-NEXT: s_lshr_b32 s66, s3, 18 8687; GFX8-NEXT: s_lshr_b32 s44, s3, 19 8688; GFX8-NEXT: s_lshr_b32 s56, s3, 16 8689; GFX8-NEXT: s_lshr_b32 s46, s3, 17 8690; GFX8-NEXT: s_lshr_b32 s58, s3, 14 8691; GFX8-NEXT: s_lshr_b32 s62, s3, 12 8692; GFX8-NEXT: s_lshr_b32 s54, s3, 10 8693; GFX8-NEXT: v_writelane_b32 v62, s1, 1 8694; GFX8-NEXT: s_lshr_b32 s0, s3, 9 8695; GFX8-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000 8696; GFX8-NEXT: s_lshr_b32 s52, s3, 11 8697; GFX8-NEXT: v_writelane_b32 v62, s0, 2 8698; GFX8-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000 8699; GFX8-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000 8700; GFX8-NEXT: s_bfe_i64 s[68:69], s[68:69], 0x10000 8701; GFX8-NEXT: s_bfe_i64 s[70:71], s[70:71], 0x10000 8702; GFX8-NEXT: s_bfe_i64 s[72:73], s[72:73], 0x10000 8703; GFX8-NEXT: s_bfe_i64 s[74:75], s[74:75], 0x10000 8704; GFX8-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000 8705; GFX8-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000 8706; GFX8-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000 8707; GFX8-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000 8708; GFX8-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000 8709; GFX8-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000 8710; GFX8-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000 8711; GFX8-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000 8712; GFX8-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000 8713; GFX8-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000 8714; GFX8-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000 8715; GFX8-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000 8716; GFX8-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 8717; GFX8-NEXT: v_mov_b32_e32 v34, s48 8718; GFX8-NEXT: s_lshr_b32 s48, s2, 1 8719; GFX8-NEXT: s_lshr_b32 s50, s3, 13 8720; GFX8-NEXT: v_writelane_b32 v62, s1, 3 8721; GFX8-NEXT: s_lshr_b32 s6, s3, 6 8722; GFX8-NEXT: s_lshr_b32 s10, s3, 7 8723; GFX8-NEXT: s_lshr_b32 s12, s3, 4 8724; GFX8-NEXT: s_lshr_b32 s14, s3, 5 8725; GFX8-NEXT: s_lshr_b32 s16, s3, 2 8726; GFX8-NEXT: s_lshr_b32 s18, s3, 3 8727; GFX8-NEXT: s_lshr_b32 s20, s3, 1 8728; GFX8-NEXT: s_mov_b32 s22, s3 8729; GFX8-NEXT: s_lshr_b32 s24, s2, 30 8730; GFX8-NEXT: s_lshr_b32 s26, s2, 31 8731; GFX8-NEXT: s_lshr_b32 s28, s2, 28 8732; GFX8-NEXT: v_mov_b32_e32 v4, s74 8733; GFX8-NEXT: v_mov_b32_e32 v12, s72 8734; GFX8-NEXT: v_mov_b32_e32 v0, s70 8735; GFX8-NEXT: v_mov_b32_e32 v8, s68 8736; GFX8-NEXT: v_mov_b32_e32 v16, s64 8737; GFX8-NEXT: v_mov_b32_e32 v20, s60 8738; GFX8-NEXT: v_mov_b32_e32 v24, s66 8739; GFX8-NEXT: v_mov_b32_e32 v28, s56 8740; GFX8-NEXT: v_mov_b32_e32 v32, s58 8741; GFX8-NEXT: v_mov_b32_e32 v36, s62 8742; GFX8-NEXT: s_lshr_b32 s86, s2, 29 8743; GFX8-NEXT: v_mov_b32_e32 v40, s54 8744; GFX8-NEXT: s_lshr_b32 s84, s2, 26 8745; GFX8-NEXT: s_lshr_b32 s82, s2, 27 8746; GFX8-NEXT: s_bfe_i64 vcc, s[52:53], 0x10000 8747; GFX8-NEXT: s_lshr_b32 s80, s2, 24 8748; GFX8-NEXT: v_mov_b32_e32 v6, s30 8749; GFX8-NEXT: v_mov_b32_e32 v7, s31 8750; GFX8-NEXT: s_lshr_b32 s78, s2, 25 8751; GFX8-NEXT: s_lshr_b32 s76, s2, 22 8752; GFX8-NEXT: v_mov_b32_e32 v14, s34 8753; GFX8-NEXT: s_lshr_b32 s74, s2, 23 8754; GFX8-NEXT: s_lshr_b32 s72, s2, 20 8755; GFX8-NEXT: v_mov_b32_e32 v2, s36 8756; GFX8-NEXT: s_lshr_b32 s70, s2, 21 8757; GFX8-NEXT: s_lshr_b32 s68, s2, 18 8758; GFX8-NEXT: v_mov_b32_e32 v10, s38 8759; GFX8-NEXT: s_lshr_b32 s66, s2, 19 8760; GFX8-NEXT: s_lshr_b32 s64, s2, 16 8761; GFX8-NEXT: v_mov_b32_e32 v18, s40 8762; GFX8-NEXT: s_lshr_b32 s62, s2, 17 8763; GFX8-NEXT: s_lshr_b32 s60, s2, 14 8764; GFX8-NEXT: v_mov_b32_e32 v22, s42 8765; GFX8-NEXT: s_lshr_b32 s58, s2, 15 8766; GFX8-NEXT: s_lshr_b32 s56, s2, 12 8767; GFX8-NEXT: v_mov_b32_e32 v26, s44 8768; GFX8-NEXT: s_lshr_b32 s54, s2, 13 8769; GFX8-NEXT: s_lshr_b32 s52, s2, 10 8770; GFX8-NEXT: v_mov_b32_e32 v30, s46 8771; GFX8-NEXT: s_lshr_b32 s4, s2, 11 8772; GFX8-NEXT: s_lshr_b32 s0, s2, 8 8773; GFX8-NEXT: s_lshr_b32 s46, s2, 9 8774; GFX8-NEXT: s_lshr_b32 s44, s2, 6 8775; GFX8-NEXT: s_lshr_b32 s42, s2, 7 8776; GFX8-NEXT: s_lshr_b32 s40, s2, 4 8777; GFX8-NEXT: s_lshr_b32 s38, s2, 5 8778; GFX8-NEXT: s_lshr_b32 s36, s2, 2 8779; GFX8-NEXT: s_lshr_b32 s34, s2, 3 8780; GFX8-NEXT: s_bfe_i64 s[30:31], s[2:3], 0x10000 8781; GFX8-NEXT: s_bfe_i64 s[2:3], s[48:49], 0x10000 8782; GFX8-NEXT: v_writelane_b32 v62, s2, 4 8783; GFX8-NEXT: v_writelane_b32 v62, s3, 5 8784; GFX8-NEXT: v_readlane_b32 s2, v62, 2 8785; GFX8-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000 8786; GFX8-NEXT: v_readlane_b32 s3, v62, 3 8787; GFX8-NEXT: v_mov_b32_e32 v38, s50 8788; GFX8-NEXT: v_mov_b32_e32 v39, s51 8789; GFX8-NEXT: s_bfe_i64 s[50:51], s[4:5], 0x10000 8790; GFX8-NEXT: s_bfe_i64 s[4:5], s[6:7], 0x10000 8791; GFX8-NEXT: s_bfe_i64 s[6:7], s[2:3], 0x10000 8792; GFX8-NEXT: v_readlane_b32 s2, v62, 0 8793; GFX8-NEXT: v_readlane_b32 s3, v62, 1 8794; GFX8-NEXT: v_mov_b32_e32 v5, s75 8795; GFX8-NEXT: v_mov_b32_e32 v13, s73 8796; GFX8-NEXT: v_mov_b32_e32 v15, s35 8797; GFX8-NEXT: v_mov_b32_e32 v1, s71 8798; GFX8-NEXT: v_mov_b32_e32 v3, s37 8799; GFX8-NEXT: v_mov_b32_e32 v9, s69 8800; GFX8-NEXT: v_mov_b32_e32 v11, s39 8801; GFX8-NEXT: v_mov_b32_e32 v17, s65 8802; GFX8-NEXT: v_mov_b32_e32 v19, s41 8803; GFX8-NEXT: v_mov_b32_e32 v21, s61 8804; GFX8-NEXT: v_mov_b32_e32 v23, s43 8805; GFX8-NEXT: v_mov_b32_e32 v25, s67 8806; GFX8-NEXT: v_mov_b32_e32 v27, s45 8807; GFX8-NEXT: v_mov_b32_e32 v29, s57 8808; GFX8-NEXT: v_mov_b32_e32 v31, s47 8809; GFX8-NEXT: v_mov_b32_e32 v33, s59 8810; GFX8-NEXT: v_mov_b32_e32 v35, s49 8811; GFX8-NEXT: v_mov_b32_e32 v37, s63 8812; GFX8-NEXT: v_mov_b32_e32 v41, s55 8813; GFX8-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000 8814; GFX8-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000 8815; GFX8-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000 8816; GFX8-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000 8817; GFX8-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000 8818; GFX8-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000 8819; GFX8-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000 8820; GFX8-NEXT: s_bfe_i64 s[48:49], s[0:1], 0x10000 8821; GFX8-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000 8822; GFX8-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000 8823; GFX8-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000 8824; GFX8-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000 8825; GFX8-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000 8826; GFX8-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000 8827; GFX8-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000 8828; GFX8-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000 8829; GFX8-NEXT: s_bfe_i64 s[68:69], s[68:69], 0x10000 8830; GFX8-NEXT: s_bfe_i64 s[70:71], s[70:71], 0x10000 8831; GFX8-NEXT: s_bfe_i64 s[72:73], s[72:73], 0x10000 8832; GFX8-NEXT: s_bfe_i64 s[74:75], s[74:75], 0x10000 8833; GFX8-NEXT: s_bfe_i64 s[76:77], s[76:77], 0x10000 8834; GFX8-NEXT: s_bfe_i64 s[78:79], s[78:79], 0x10000 8835; GFX8-NEXT: s_bfe_i64 s[80:81], s[80:81], 0x10000 8836; GFX8-NEXT: s_bfe_i64 s[82:83], s[82:83], 0x10000 8837; GFX8-NEXT: s_bfe_i64 s[84:85], s[84:85], 0x10000 8838; GFX8-NEXT: s_bfe_i64 s[86:87], s[86:87], 0x10000 8839; GFX8-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000 8840; GFX8-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000 8841; GFX8-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000 8842; GFX8-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000 8843; GFX8-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000 8844; GFX8-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000 8845; GFX8-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000 8846; GFX8-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000 8847; GFX8-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000 8848; GFX8-NEXT: s_bfe_i64 s[0:1], s[10:11], 0x10000 8849; GFX8-NEXT: s_bfe_i64 s[10:11], s[2:3], 0x10000 8850; GFX8-NEXT: s_add_u32 s2, s8, 0x1f0 8851; GFX8-NEXT: s_addc_u32 s3, s9, 0 8852; GFX8-NEXT: v_mov_b32_e32 v43, s3 8853; GFX8-NEXT: v_mov_b32_e32 v42, s2 8854; GFX8-NEXT: s_add_u32 s2, s8, 0x1e0 8855; GFX8-NEXT: s_addc_u32 s3, s9, 0 8856; GFX8-NEXT: v_mov_b32_e32 v45, s3 8857; GFX8-NEXT: v_mov_b32_e32 v44, s2 8858; GFX8-NEXT: s_add_u32 s2, s8, 0x1d0 8859; GFX8-NEXT: s_addc_u32 s3, s9, 0 8860; GFX8-NEXT: v_mov_b32_e32 v47, s3 8861; GFX8-NEXT: v_mov_b32_e32 v46, s2 8862; GFX8-NEXT: s_add_u32 s2, s8, 0x1c0 8863; GFX8-NEXT: s_addc_u32 s3, s9, 0 8864; GFX8-NEXT: v_mov_b32_e32 v49, s3 8865; GFX8-NEXT: v_mov_b32_e32 v48, s2 8866; GFX8-NEXT: s_add_u32 s2, s8, 0x1b0 8867; GFX8-NEXT: s_addc_u32 s3, s9, 0 8868; GFX8-NEXT: v_mov_b32_e32 v51, s3 8869; GFX8-NEXT: v_mov_b32_e32 v50, s2 8870; GFX8-NEXT: s_add_u32 s2, s8, 0x1a0 8871; GFX8-NEXT: s_addc_u32 s3, s9, 0 8872; GFX8-NEXT: v_mov_b32_e32 v53, s3 8873; GFX8-NEXT: v_mov_b32_e32 v52, s2 8874; GFX8-NEXT: s_add_u32 s2, s8, 0x190 8875; GFX8-NEXT: s_addc_u32 s3, s9, 0 8876; GFX8-NEXT: v_mov_b32_e32 v55, s3 8877; GFX8-NEXT: v_mov_b32_e32 v54, s2 8878; GFX8-NEXT: s_add_u32 s2, s8, 0x180 8879; GFX8-NEXT: s_addc_u32 s3, s9, 0 8880; GFX8-NEXT: v_mov_b32_e32 v57, s3 8881; GFX8-NEXT: v_mov_b32_e32 v56, s2 8882; GFX8-NEXT: s_add_u32 s2, s8, 0x170 8883; GFX8-NEXT: s_addc_u32 s3, s9, 0 8884; GFX8-NEXT: v_mov_b32_e32 v59, s3 8885; GFX8-NEXT: v_mov_b32_e32 v58, s2 8886; GFX8-NEXT: s_add_u32 s2, s8, 0x160 8887; GFX8-NEXT: s_addc_u32 s3, s9, 0 8888; GFX8-NEXT: v_mov_b32_e32 v61, s3 8889; GFX8-NEXT: v_mov_b32_e32 v60, s2 8890; GFX8-NEXT: s_add_u32 s2, s8, 0x150 8891; GFX8-NEXT: s_addc_u32 s3, s9, 0 8892; GFX8-NEXT: flat_store_dwordx4 v[44:45], v[12:15] 8893; GFX8-NEXT: flat_store_dwordx4 v[46:47], v[0:3] 8894; GFX8-NEXT: v_mov_b32_e32 v13, s3 8895; GFX8-NEXT: v_mov_b32_e32 v12, s2 8896; GFX8-NEXT: s_add_u32 s2, s8, 0x140 8897; GFX8-NEXT: s_addc_u32 s3, s9, 0 8898; GFX8-NEXT: v_mov_b32_e32 v2, s0 8899; GFX8-NEXT: s_add_u32 s0, s8, 0x130 8900; GFX8-NEXT: v_mov_b32_e32 v3, s1 8901; GFX8-NEXT: s_addc_u32 s1, s9, 0 8902; GFX8-NEXT: flat_store_dwordx4 v[42:43], v[4:7] 8903; GFX8-NEXT: flat_store_dwordx4 v[48:49], v[8:11] 8904; GFX8-NEXT: flat_store_dwordx4 v[50:51], v[16:19] 8905; GFX8-NEXT: v_mov_b32_e32 v4, s10 8906; GFX8-NEXT: v_mov_b32_e32 v17, s1 8907; GFX8-NEXT: v_mov_b32_e32 v16, s0 8908; GFX8-NEXT: s_add_u32 s0, s8, 0x120 8909; GFX8-NEXT: s_addc_u32 s1, s9, 0 8910; GFX8-NEXT: v_mov_b32_e32 v19, s1 8911; GFX8-NEXT: v_mov_b32_e32 v18, s0 8912; GFX8-NEXT: s_add_u32 s0, s8, 0x110 8913; GFX8-NEXT: v_mov_b32_e32 v5, s11 8914; GFX8-NEXT: v_mov_b32_e32 v15, s3 8915; GFX8-NEXT: s_addc_u32 s1, s9, 0 8916; GFX8-NEXT: v_mov_b32_e32 v42, vcc_lo 8917; GFX8-NEXT: v_mov_b32_e32 v43, vcc_hi 8918; GFX8-NEXT: v_mov_b32_e32 v14, s2 8919; GFX8-NEXT: v_mov_b32_e32 v6, s6 8920; GFX8-NEXT: v_mov_b32_e32 v7, s7 8921; GFX8-NEXT: v_mov_b32_e32 v0, s4 8922; GFX8-NEXT: v_mov_b32_e32 v1, s5 8923; GFX8-NEXT: v_mov_b32_e32 v8, s12 8924; GFX8-NEXT: flat_store_dwordx4 v[52:53], v[20:23] 8925; GFX8-NEXT: v_mov_b32_e32 v9, s13 8926; GFX8-NEXT: flat_store_dwordx4 v[54:55], v[24:27] 8927; GFX8-NEXT: v_mov_b32_e32 v10, s14 8928; GFX8-NEXT: v_mov_b32_e32 v11, s15 8929; GFX8-NEXT: flat_store_dwordx4 v[56:57], v[28:31] 8930; GFX8-NEXT: flat_store_dwordx4 v[58:59], v[32:35] 8931; GFX8-NEXT: flat_store_dwordx4 v[60:61], v[36:39] 8932; GFX8-NEXT: flat_store_dwordx4 v[12:13], v[40:43] 8933; GFX8-NEXT: flat_store_dwordx4 v[14:15], v[4:7] 8934; GFX8-NEXT: flat_store_dwordx4 v[16:17], v[0:3] 8935; GFX8-NEXT: flat_store_dwordx4 v[18:19], v[8:11] 8936; GFX8-NEXT: v_mov_b32_e32 v5, s1 8937; GFX8-NEXT: v_mov_b32_e32 v4, s0 8938; GFX8-NEXT: s_add_u32 s0, s8, 0x100 8939; GFX8-NEXT: v_mov_b32_e32 v0, s16 8940; GFX8-NEXT: v_mov_b32_e32 v1, s17 8941; GFX8-NEXT: v_mov_b32_e32 v2, s18 8942; GFX8-NEXT: v_mov_b32_e32 v3, s19 8943; GFX8-NEXT: s_addc_u32 s1, s9, 0 8944; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8945; GFX8-NEXT: v_mov_b32_e32 v5, s1 8946; GFX8-NEXT: v_mov_b32_e32 v4, s0 8947; GFX8-NEXT: s_add_u32 s0, s8, 0xf0 8948; GFX8-NEXT: v_mov_b32_e32 v0, s22 8949; GFX8-NEXT: v_mov_b32_e32 v1, s23 8950; GFX8-NEXT: v_mov_b32_e32 v2, s20 8951; GFX8-NEXT: v_mov_b32_e32 v3, s21 8952; GFX8-NEXT: s_addc_u32 s1, s9, 0 8953; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8954; GFX8-NEXT: v_mov_b32_e32 v5, s1 8955; GFX8-NEXT: v_mov_b32_e32 v4, s0 8956; GFX8-NEXT: s_add_u32 s0, s8, 0xe0 8957; GFX8-NEXT: v_mov_b32_e32 v0, s24 8958; GFX8-NEXT: v_mov_b32_e32 v1, s25 8959; GFX8-NEXT: v_mov_b32_e32 v2, s26 8960; GFX8-NEXT: v_mov_b32_e32 v3, s27 8961; GFX8-NEXT: s_addc_u32 s1, s9, 0 8962; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8963; GFX8-NEXT: v_mov_b32_e32 v5, s1 8964; GFX8-NEXT: v_mov_b32_e32 v4, s0 8965; GFX8-NEXT: s_add_u32 s0, s8, 0xd0 8966; GFX8-NEXT: v_mov_b32_e32 v0, s28 8967; GFX8-NEXT: v_mov_b32_e32 v1, s29 8968; GFX8-NEXT: v_mov_b32_e32 v2, s86 8969; GFX8-NEXT: v_mov_b32_e32 v3, s87 8970; GFX8-NEXT: s_addc_u32 s1, s9, 0 8971; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8972; GFX8-NEXT: v_mov_b32_e32 v5, s1 8973; GFX8-NEXT: v_mov_b32_e32 v4, s0 8974; GFX8-NEXT: s_add_u32 s0, s8, 0xc0 8975; GFX8-NEXT: v_mov_b32_e32 v0, s84 8976; GFX8-NEXT: v_mov_b32_e32 v1, s85 8977; GFX8-NEXT: v_mov_b32_e32 v2, s82 8978; GFX8-NEXT: v_mov_b32_e32 v3, s83 8979; GFX8-NEXT: s_addc_u32 s1, s9, 0 8980; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8981; GFX8-NEXT: v_mov_b32_e32 v5, s1 8982; GFX8-NEXT: v_mov_b32_e32 v4, s0 8983; GFX8-NEXT: s_add_u32 s0, s8, 0xb0 8984; GFX8-NEXT: v_mov_b32_e32 v0, s80 8985; GFX8-NEXT: v_mov_b32_e32 v1, s81 8986; GFX8-NEXT: v_mov_b32_e32 v2, s78 8987; GFX8-NEXT: v_mov_b32_e32 v3, s79 8988; GFX8-NEXT: s_addc_u32 s1, s9, 0 8989; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8990; GFX8-NEXT: v_mov_b32_e32 v5, s1 8991; GFX8-NEXT: v_mov_b32_e32 v4, s0 8992; GFX8-NEXT: s_add_u32 s0, s8, 0xa0 8993; GFX8-NEXT: v_mov_b32_e32 v0, s76 8994; GFX8-NEXT: v_mov_b32_e32 v1, s77 8995; GFX8-NEXT: v_mov_b32_e32 v2, s74 8996; GFX8-NEXT: v_mov_b32_e32 v3, s75 8997; GFX8-NEXT: s_addc_u32 s1, s9, 0 8998; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8999; GFX8-NEXT: v_mov_b32_e32 v5, s1 9000; GFX8-NEXT: v_mov_b32_e32 v4, s0 9001; GFX8-NEXT: s_add_u32 s0, s8, 0x90 9002; GFX8-NEXT: v_mov_b32_e32 v0, s72 9003; GFX8-NEXT: v_mov_b32_e32 v1, s73 9004; GFX8-NEXT: v_mov_b32_e32 v2, s70 9005; GFX8-NEXT: v_mov_b32_e32 v3, s71 9006; GFX8-NEXT: s_addc_u32 s1, s9, 0 9007; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 9008; GFX8-NEXT: v_mov_b32_e32 v5, s1 9009; GFX8-NEXT: v_mov_b32_e32 v4, s0 9010; GFX8-NEXT: s_add_u32 s0, s8, 0x80 9011; GFX8-NEXT: v_mov_b32_e32 v0, s68 9012; GFX8-NEXT: v_mov_b32_e32 v1, s69 9013; GFX8-NEXT: v_mov_b32_e32 v2, s66 9014; GFX8-NEXT: v_mov_b32_e32 v3, s67 9015; GFX8-NEXT: s_addc_u32 s1, s9, 0 9016; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 9017; GFX8-NEXT: v_mov_b32_e32 v5, s1 9018; GFX8-NEXT: v_mov_b32_e32 v4, s0 9019; GFX8-NEXT: s_add_u32 s0, s8, 0x70 9020; GFX8-NEXT: v_mov_b32_e32 v0, s64 9021; GFX8-NEXT: v_mov_b32_e32 v1, s65 9022; GFX8-NEXT: v_mov_b32_e32 v2, s62 9023; GFX8-NEXT: v_mov_b32_e32 v3, s63 9024; GFX8-NEXT: s_addc_u32 s1, s9, 0 9025; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 9026; GFX8-NEXT: v_mov_b32_e32 v5, s1 9027; GFX8-NEXT: v_mov_b32_e32 v4, s0 9028; GFX8-NEXT: s_add_u32 s0, s8, 0x60 9029; GFX8-NEXT: v_mov_b32_e32 v0, s60 9030; GFX8-NEXT: v_mov_b32_e32 v1, s61 9031; GFX8-NEXT: v_mov_b32_e32 v2, s58 9032; GFX8-NEXT: v_mov_b32_e32 v3, s59 9033; GFX8-NEXT: s_addc_u32 s1, s9, 0 9034; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 9035; GFX8-NEXT: v_mov_b32_e32 v5, s1 9036; GFX8-NEXT: v_mov_b32_e32 v4, s0 9037; GFX8-NEXT: s_add_u32 s0, s8, 0x50 9038; GFX8-NEXT: v_mov_b32_e32 v0, s56 9039; GFX8-NEXT: v_mov_b32_e32 v1, s57 9040; GFX8-NEXT: v_mov_b32_e32 v2, s54 9041; GFX8-NEXT: v_mov_b32_e32 v3, s55 9042; GFX8-NEXT: s_addc_u32 s1, s9, 0 9043; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 9044; GFX8-NEXT: v_mov_b32_e32 v5, s1 9045; GFX8-NEXT: v_mov_b32_e32 v4, s0 9046; GFX8-NEXT: s_add_u32 s0, s8, 64 9047; GFX8-NEXT: v_mov_b32_e32 v0, s52 9048; GFX8-NEXT: v_mov_b32_e32 v1, s53 9049; GFX8-NEXT: v_mov_b32_e32 v2, s50 9050; GFX8-NEXT: v_mov_b32_e32 v3, s51 9051; GFX8-NEXT: s_addc_u32 s1, s9, 0 9052; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 9053; GFX8-NEXT: v_mov_b32_e32 v5, s1 9054; GFX8-NEXT: v_mov_b32_e32 v4, s0 9055; GFX8-NEXT: s_add_u32 s0, s8, 48 9056; GFX8-NEXT: v_mov_b32_e32 v0, s48 9057; GFX8-NEXT: v_mov_b32_e32 v1, s49 9058; GFX8-NEXT: v_mov_b32_e32 v2, s46 9059; GFX8-NEXT: v_mov_b32_e32 v3, s47 9060; GFX8-NEXT: s_addc_u32 s1, s9, 0 9061; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 9062; GFX8-NEXT: v_mov_b32_e32 v5, s1 9063; GFX8-NEXT: v_mov_b32_e32 v4, s0 9064; GFX8-NEXT: s_add_u32 s0, s8, 32 9065; GFX8-NEXT: v_mov_b32_e32 v0, s44 9066; GFX8-NEXT: v_mov_b32_e32 v1, s45 9067; GFX8-NEXT: v_mov_b32_e32 v2, s42 9068; GFX8-NEXT: v_mov_b32_e32 v3, s43 9069; GFX8-NEXT: s_addc_u32 s1, s9, 0 9070; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 9071; GFX8-NEXT: v_mov_b32_e32 v5, s1 9072; GFX8-NEXT: v_mov_b32_e32 v4, s0 9073; GFX8-NEXT: s_add_u32 s0, s8, 16 9074; GFX8-NEXT: v_mov_b32_e32 v0, s40 9075; GFX8-NEXT: v_mov_b32_e32 v1, s41 9076; GFX8-NEXT: v_mov_b32_e32 v2, s38 9077; GFX8-NEXT: v_mov_b32_e32 v3, s39 9078; GFX8-NEXT: s_addc_u32 s1, s9, 0 9079; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 9080; GFX8-NEXT: v_mov_b32_e32 v5, s1 9081; GFX8-NEXT: v_mov_b32_e32 v0, s36 9082; GFX8-NEXT: v_mov_b32_e32 v1, s37 9083; GFX8-NEXT: v_mov_b32_e32 v2, s34 9084; GFX8-NEXT: v_mov_b32_e32 v3, s35 9085; GFX8-NEXT: v_mov_b32_e32 v4, s0 9086; GFX8-NEXT: v_readlane_b32 s0, v62, 4 9087; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 9088; GFX8-NEXT: v_readlane_b32 s1, v62, 5 9089; GFX8-NEXT: v_mov_b32_e32 v4, s8 9090; GFX8-NEXT: v_mov_b32_e32 v0, s30 9091; GFX8-NEXT: v_mov_b32_e32 v1, s31 9092; GFX8-NEXT: v_mov_b32_e32 v2, s0 9093; GFX8-NEXT: v_mov_b32_e32 v3, s1 9094; GFX8-NEXT: v_mov_b32_e32 v5, s9 9095; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 9096; GFX8-NEXT: s_endpgm 9097; 9098; EG-LABEL: constant_sextload_v64i1_to_v64i64: 9099; EG: ; %bb.0: 9100; EG-NEXT: ALU 22, @40, KC0[CB0:0-32], KC1[] 9101; EG-NEXT: TEX 0 @38 9102; EG-NEXT: ALU 89, @63, KC0[CB0:0-32], KC1[] 9103; EG-NEXT: ALU 99, @153, KC0[], KC1[] 9104; EG-NEXT: ALU 107, @253, KC0[CB0:0-32], KC1[] 9105; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T82.X, 0 9106; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T80.XYZW, T81.X, 0 9107; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T51.XYZW, T73.X, 0 9108; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T79.XYZW, T48.X, 0 9109; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T52.XYZW, T47.X, 0 9110; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T78.XYZW, T46.X, 0 9111; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T53.XYZW, T45.X, 0 9112; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T77.XYZW, T44.X, 0 9113; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T54.XYZW, T43.X, 0 9114; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T76.XYZW, T42.X, 0 9115; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T55.XYZW, T41.X, 0 9116; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T75.XYZW, T39.X, 0 9117; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T56.XYZW, T38.X, 0 9118; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T74.XYZW, T37.X, 0 9119; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T36.X, 0 9120; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T66.XYZW, T35.X, 0 9121; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T58.XYZW, T34.X, 0 9122; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T72.XYZW, T33.X, 0 9123; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T32.X, 0 9124; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T71.XYZW, T31.X, 0 9125; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T30.X, 0 9126; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T70.XYZW, T29.X, 0 9127; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T61.XYZW, T28.X, 0 9128; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T69.XYZW, T27.X, 0 9129; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T26.X, 0 9130; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T68.XYZW, T25.X, 0 9131; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T24.X, 0 9132; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T23.X, 0 9133; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T64.XYZW, T22.X, 0 9134; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T67.XYZW, T21.X, 0 9135; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T20.X, 0 9136; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T19.X, 1 9137; EG-NEXT: CF_END 9138; EG-NEXT: Fetch clause starting at 38: 9139; EG-NEXT: VTX_READ_64 T40.XY, T26.X, 0, #1 9140; EG-NEXT: ALU clause starting at 40: 9141; EG-NEXT: LSHR T19.X, KC0[2].Y, literal.x, 9142; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 9143; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 9144; EG-NEXT: LSHR T20.X, PV.W, literal.x, 9145; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 9146; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 9147; EG-NEXT: LSHR T21.X, PV.W, literal.x, 9148; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 9149; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 9150; EG-NEXT: LSHR T22.X, PV.W, literal.x, 9151; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 9152; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 9153; EG-NEXT: LSHR T23.X, PV.W, literal.x, 9154; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 9155; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 9156; EG-NEXT: LSHR T24.X, PV.W, literal.x, 9157; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 9158; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 9159; EG-NEXT: LSHR T25.X, PV.W, literal.x, 9160; EG-NEXT: MOV * T26.X, KC0[2].Z, 9161; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 9162; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 9163; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 9164; EG-NEXT: ALU clause starting at 63: 9165; EG-NEXT: LSHR T26.X, T0.W, literal.x, 9166; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 9167; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 9168; EG-NEXT: LSHR T27.X, PV.W, literal.x, 9169; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 9170; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 9171; EG-NEXT: LSHR T28.X, PV.W, literal.x, 9172; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 9173; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 9174; EG-NEXT: LSHR T29.X, PV.W, literal.x, 9175; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 9176; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 9177; EG-NEXT: LSHR T30.X, PV.W, literal.x, 9178; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 9179; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43) 9180; EG-NEXT: LSHR T31.X, PV.W, literal.x, 9181; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 9182; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43) 9183; EG-NEXT: LSHR T32.X, PV.W, literal.x, 9184; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 9185; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43) 9186; EG-NEXT: LSHR T33.X, PV.W, literal.x, 9187; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 9188; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43) 9189; EG-NEXT: LSHR T34.X, PV.W, literal.x, 9190; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 9191; EG-NEXT: 2(2.802597e-45), 256(3.587324e-43) 9192; EG-NEXT: LSHR T35.X, PV.W, literal.x, 9193; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 9194; EG-NEXT: 2(2.802597e-45), 272(3.811532e-43) 9195; EG-NEXT: LSHR T36.X, PV.W, literal.x, 9196; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 9197; EG-NEXT: 2(2.802597e-45), 288(4.035740e-43) 9198; EG-NEXT: LSHR T37.X, PV.W, literal.x, 9199; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 9200; EG-NEXT: 2(2.802597e-45), 304(4.259947e-43) 9201; EG-NEXT: LSHR T38.X, PV.W, literal.x, 9202; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 9203; EG-NEXT: 2(2.802597e-45), 320(4.484155e-43) 9204; EG-NEXT: LSHR T39.X, PV.W, literal.x, 9205; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 9206; EG-NEXT: 2(2.802597e-45), 336(4.708363e-43) 9207; EG-NEXT: LSHR T41.X, PV.W, literal.x, 9208; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 9209; EG-NEXT: 2(2.802597e-45), 352(4.932571e-43) 9210; EG-NEXT: LSHR T42.X, PV.W, literal.x, 9211; EG-NEXT: LSHR T0.Z, T40.Y, literal.y, 9212; EG-NEXT: LSHR T0.W, T40.Y, literal.z, 9213; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.w, 9214; EG-NEXT: 2(2.802597e-45), 28(3.923636e-44) 9215; EG-NEXT: 29(4.063766e-44), 368(5.156778e-43) 9216; EG-NEXT: LSHR T43.X, PS, literal.x, 9217; EG-NEXT: LSHR T0.Y, T40.Y, literal.y, 9218; EG-NEXT: LSHR T1.Z, T40.Y, literal.z, 9219; EG-NEXT: LSHR * T1.W, T40.Y, literal.w, 9220; EG-NEXT: 2(2.802597e-45), 24(3.363116e-44) 9221; EG-NEXT: 25(3.503246e-44), 20(2.802597e-44) 9222; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.x, 9223; EG-NEXT: 384(5.380986e-43), 0(0.000000e+00) 9224; EG-NEXT: LSHR T44.X, PV.W, literal.x, 9225; EG-NEXT: LSHR T1.Y, T40.Y, literal.y, 9226; EG-NEXT: LSHR T2.Z, T40.Y, literal.z, 9227; EG-NEXT: LSHR * T2.W, T40.Y, literal.w, 9228; EG-NEXT: 2(2.802597e-45), 21(2.942727e-44) 9229; EG-NEXT: 16(2.242078e-44), 17(2.382207e-44) 9230; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.x, 9231; EG-NEXT: 400(5.605194e-43), 0(0.000000e+00) 9232; EG-NEXT: LSHR T45.X, PV.W, literal.x, 9233; EG-NEXT: LSHR T2.Y, T40.Y, literal.y, 9234; EG-NEXT: LSHR T3.Z, T40.Y, literal.z, 9235; EG-NEXT: LSHR * T3.W, T40.Y, literal.w, 9236; EG-NEXT: 2(2.802597e-45), 12(1.681558e-44) 9237; EG-NEXT: 13(1.821688e-44), 8(1.121039e-44) 9238; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.x, 9239; EG-NEXT: 416(5.829402e-43), 0(0.000000e+00) 9240; EG-NEXT: LSHR T46.X, PV.W, literal.x, 9241; EG-NEXT: LSHR T3.Y, T40.Y, literal.y, 9242; EG-NEXT: LSHR T4.Z, T40.Y, literal.z, 9243; EG-NEXT: LSHR * T4.W, T40.Y, literal.w, 9244; EG-NEXT: 2(2.802597e-45), 9(1.261169e-44) 9245; EG-NEXT: 4(5.605194e-45), 5(7.006492e-45) 9246; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.x, 9247; EG-NEXT: 432(6.053609e-43), 0(0.000000e+00) 9248; EG-NEXT: LSHR T47.X, PV.W, literal.x, 9249; EG-NEXT: ADD_INT T4.Y, KC0[2].Y, literal.y, 9250; EG-NEXT: LSHR T5.Z, T40.Y, 1, 9251; EG-NEXT: LSHR T5.W, T40.X, literal.z, 9252; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.w, 9253; EG-NEXT: 2(2.802597e-45), 464(6.502025e-43) 9254; EG-NEXT: 28(3.923636e-44), 448(6.277817e-43) 9255; EG-NEXT: ALU clause starting at 153: 9256; EG-NEXT: LSHR T48.X, T6.W, literal.x, 9257; EG-NEXT: LSHR T5.Y, T40.X, literal.y, 9258; EG-NEXT: LSHR T6.Z, T40.X, literal.z, 9259; EG-NEXT: LSHR * T6.W, T40.X, literal.w, 9260; EG-NEXT: 2(2.802597e-45), 29(4.063766e-44) 9261; EG-NEXT: 24(3.363116e-44), 25(3.503246e-44) 9262; EG-NEXT: LSHR * T7.W, T40.X, literal.x, 9263; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) 9264; EG-NEXT: BFE_INT T49.X, T40.X, 0.0, 1, 9265; EG-NEXT: LSHR T6.Y, T40.X, literal.x, 9266; EG-NEXT: ASHR T50.Z, T40.Y, literal.y, 9267; EG-NEXT: LSHR T8.W, T40.Y, literal.z, 9268; EG-NEXT: LSHR * T9.W, T40.Y, literal.w, 9269; EG-NEXT: 21(2.942727e-44), 31(4.344025e-44) 9270; EG-NEXT: 27(3.783506e-44), 30(4.203895e-44) 9271; EG-NEXT: BFE_INT T50.X, PS, 0.0, 1, 9272; EG-NEXT: LSHR T7.Y, T40.X, literal.x, 9273; EG-NEXT: BFE_INT T51.Z, PV.W, 0.0, 1, 9274; EG-NEXT: LSHR T8.W, T40.Y, literal.y, 9275; EG-NEXT: LSHR * T9.W, T40.Y, literal.z, 9276; EG-NEXT: 16(2.242078e-44), 23(3.222986e-44) 9277; EG-NEXT: 26(3.643376e-44), 0(0.000000e+00) 9278; EG-NEXT: BFE_INT T51.X, PS, 0.0, 1, 9279; EG-NEXT: MOV T50.Y, PV.X, 9280; EG-NEXT: BFE_INT T52.Z, PV.W, 0.0, 1, 9281; EG-NEXT: LSHR T8.W, T40.Y, literal.x, 9282; EG-NEXT: LSHR * T9.W, T40.Y, literal.y, 9283; EG-NEXT: 19(2.662467e-44), 22(3.082857e-44) 9284; EG-NEXT: BFE_INT T52.X, PS, 0.0, 1, 9285; EG-NEXT: MOV T51.Y, PV.X, 9286; EG-NEXT: BFE_INT T53.Z, PV.W, 0.0, 1, 9287; EG-NEXT: LSHR T8.W, T40.Y, literal.x, 9288; EG-NEXT: LSHR * T9.W, T40.Y, literal.y, 9289; EG-NEXT: 15(2.101948e-44), 18(2.522337e-44) 9290; EG-NEXT: BFE_INT T53.X, PS, 0.0, 1, 9291; EG-NEXT: MOV T52.Y, PV.X, 9292; EG-NEXT: BFE_INT T54.Z, PV.W, 0.0, 1, 9293; EG-NEXT: LSHR T8.W, T40.Y, literal.x, 9294; EG-NEXT: LSHR * T9.W, T40.Y, literal.y, 9295; EG-NEXT: 11(1.541428e-44), 14(1.961818e-44) 9296; EG-NEXT: BFE_INT T54.X, PS, 0.0, 1, 9297; EG-NEXT: MOV T53.Y, PV.X, 9298; EG-NEXT: BFE_INT T55.Z, PV.W, 0.0, 1, 9299; EG-NEXT: LSHR T8.W, T40.Y, literal.x, 9300; EG-NEXT: LSHR * T9.W, T40.Y, literal.y, 9301; EG-NEXT: 7(9.809089e-45), 10(1.401298e-44) 9302; EG-NEXT: BFE_INT T55.X, PS, 0.0, 1, 9303; EG-NEXT: MOV T54.Y, PV.X, 9304; EG-NEXT: BFE_INT T56.Z, PV.W, 0.0, 1, 9305; EG-NEXT: LSHR T8.W, T40.Y, literal.x, 9306; EG-NEXT: LSHR * T9.W, T40.Y, literal.y, 9307; EG-NEXT: 3(4.203895e-45), 6(8.407791e-45) 9308; EG-NEXT: BFE_INT T56.X, PS, 0.0, 1, 9309; EG-NEXT: MOV T55.Y, PV.X, 9310; EG-NEXT: BFE_INT T57.Z, PV.W, 0.0, 1, 9311; EG-NEXT: LSHR T8.W, T40.X, literal.x, 9312; EG-NEXT: LSHR * T9.W, T40.Y, literal.y, 9313; EG-NEXT: 17(2.382207e-44), 2(2.802597e-45) 9314; EG-NEXT: BFE_INT T57.X, PS, 0.0, 1, 9315; EG-NEXT: MOV T56.Y, PV.X, 9316; EG-NEXT: ASHR T58.Z, T40.X, literal.x, 9317; EG-NEXT: LSHR T9.W, T40.X, literal.y, 9318; EG-NEXT: LSHR * T10.W, T40.X, literal.z, 9319; EG-NEXT: 31(4.344025e-44), 27(3.783506e-44) 9320; EG-NEXT: 30(4.203895e-44), 0(0.000000e+00) 9321; EG-NEXT: BFE_INT T58.X, PS, 0.0, 1, 9322; EG-NEXT: MOV T57.Y, PV.X, 9323; EG-NEXT: BFE_INT T59.Z, PV.W, 0.0, 1, 9324; EG-NEXT: LSHR T9.W, T40.X, literal.x, 9325; EG-NEXT: LSHR * T10.W, T40.X, literal.y, 9326; EG-NEXT: 23(3.222986e-44), 26(3.643376e-44) 9327; EG-NEXT: BFE_INT T59.X, PS, 0.0, 1, 9328; EG-NEXT: MOV T58.Y, PV.X, 9329; EG-NEXT: BFE_INT T60.Z, PV.W, 0.0, 1, 9330; EG-NEXT: LSHR T9.W, T40.X, literal.x, 9331; EG-NEXT: LSHR * T10.W, T40.X, literal.y, 9332; EG-NEXT: 19(2.662467e-44), 22(3.082857e-44) 9333; EG-NEXT: BFE_INT T60.X, PS, 0.0, 1, 9334; EG-NEXT: MOV T59.Y, PV.X, 9335; EG-NEXT: BFE_INT T61.Z, PV.W, 0.0, 1, 9336; EG-NEXT: LSHR T9.W, T40.X, literal.x, 9337; EG-NEXT: LSHR * T10.W, T40.X, literal.y, 9338; EG-NEXT: 15(2.101948e-44), 18(2.522337e-44) 9339; EG-NEXT: BFE_INT T61.X, PS, 0.0, 1, 9340; EG-NEXT: MOV T60.Y, PV.X, 9341; EG-NEXT: BFE_INT T62.Z, PV.W, 0.0, 1, 9342; EG-NEXT: LSHR T9.W, T40.X, literal.x, 9343; EG-NEXT: LSHR * T10.W, T40.X, literal.y, 9344; EG-NEXT: 11(1.541428e-44), 14(1.961818e-44) 9345; EG-NEXT: BFE_INT T62.X, PS, 0.0, 1, 9346; EG-NEXT: MOV T61.Y, PV.X, 9347; EG-NEXT: BFE_INT T63.Z, PV.W, 0.0, 1, 9348; EG-NEXT: LSHR T9.W, T40.X, literal.x, 9349; EG-NEXT: LSHR * T10.W, T40.X, literal.y, 9350; EG-NEXT: 7(9.809089e-45), 10(1.401298e-44) 9351; EG-NEXT: BFE_INT T63.X, PS, 0.0, 1, 9352; EG-NEXT: MOV T62.Y, PV.X, 9353; EG-NEXT: BFE_INT T64.Z, PV.W, 0.0, 1, 9354; EG-NEXT: LSHR * T9.W, T40.X, literal.x, 9355; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 9356; EG-NEXT: ALU clause starting at 253: 9357; EG-NEXT: LSHR * T10.W, T40.X, literal.x, 9358; EG-NEXT: 6(8.407791e-45), 0(0.000000e+00) 9359; EG-NEXT: BFE_INT T64.X, PV.W, 0.0, 1, 9360; EG-NEXT: MOV T63.Y, T63.X, 9361; EG-NEXT: BFE_INT T65.Z, T9.W, 0.0, 1, 9362; EG-NEXT: LSHR T9.W, T40.X, 1, BS:VEC_120/SCL_212 9363; EG-NEXT: LSHR * T10.W, T40.X, literal.x, 9364; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 9365; EG-NEXT: BFE_INT T65.X, PS, 0.0, 1, 9366; EG-NEXT: MOV T64.Y, PV.X, 9367; EG-NEXT: BFE_INT T49.Z, PV.W, 0.0, 1, 9368; EG-NEXT: LSHR T9.W, T40.X, literal.x, 9369; EG-NEXT: LSHR * T10.W, T40.X, literal.y, 9370; EG-NEXT: 12(1.681558e-44), 5(7.006492e-45) 9371; EG-NEXT: BFE_INT T66.X, T40.Y, 0.0, 1, 9372; EG-NEXT: MOV T65.Y, PV.X, 9373; EG-NEXT: BFE_INT T67.Z, PS, 0.0, 1, 9374; EG-NEXT: LSHR T10.W, T40.X, literal.x, 9375; EG-NEXT: LSHR * T11.W, T40.X, literal.y, 9376; EG-NEXT: 9(1.261169e-44), 4(5.605194e-45) 9377; EG-NEXT: BFE_INT T67.X, PS, 0.0, 1, 9378; EG-NEXT: MOV T49.Y, T49.X, 9379; EG-NEXT: BFE_INT T40.Z, PV.W, 0.0, 1, 9380; EG-NEXT: LSHR T10.W, T40.X, literal.x, BS:VEC_120/SCL_212 9381; EG-NEXT: LSHR * T11.W, T40.X, literal.y, 9382; EG-NEXT: 13(1.821688e-44), 8(1.121039e-44) 9383; EG-NEXT: BFE_INT T40.X, PS, 0.0, 1, 9384; EG-NEXT: MOV T67.Y, PV.X, 9385; EG-NEXT: BFE_INT T68.Z, PV.W, 0.0, 1, 9386; EG-NEXT: MOV T49.W, T49.Z, 9387; EG-NEXT: MOV * T65.W, T65.Z, 9388; EG-NEXT: BFE_INT T68.X, T9.W, 0.0, 1, 9389; EG-NEXT: MOV T40.Y, PV.X, 9390; EG-NEXT: BFE_INT T69.Z, T8.W, 0.0, 1, BS:VEC_120/SCL_212 9391; EG-NEXT: MOV T67.W, T67.Z, 9392; EG-NEXT: MOV * T64.W, T64.Z, 9393; EG-NEXT: BFE_INT T69.X, T7.Y, 0.0, 1, 9394; EG-NEXT: MOV T68.Y, PV.X, 9395; EG-NEXT: BFE_INT T70.Z, T6.Y, 0.0, 1, BS:VEC_120/SCL_212 9396; EG-NEXT: MOV T40.W, T40.Z, 9397; EG-NEXT: MOV * T63.W, T63.Z, 9398; EG-NEXT: BFE_INT T70.X, T7.W, 0.0, 1, 9399; EG-NEXT: MOV T69.Y, PV.X, 9400; EG-NEXT: BFE_INT T71.Z, T6.W, 0.0, 1, BS:VEC_120/SCL_212 9401; EG-NEXT: MOV T68.W, T68.Z, 9402; EG-NEXT: MOV * T62.W, T62.Z, 9403; EG-NEXT: BFE_INT T71.X, T6.Z, 0.0, 1, 9404; EG-NEXT: MOV T70.Y, PV.X, 9405; EG-NEXT: BFE_INT T72.Z, T5.Y, 0.0, 1, 9406; EG-NEXT: MOV T69.W, T69.Z, BS:VEC_120/SCL_212 9407; EG-NEXT: MOV * T61.W, T61.Z, 9408; EG-NEXT: BFE_INT T72.X, T5.W, 0.0, 1, 9409; EG-NEXT: MOV T71.Y, PV.X, 9410; EG-NEXT: BFE_INT T66.Z, T5.Z, 0.0, 1, 9411; EG-NEXT: MOV T70.W, T70.Z, BS:VEC_120/SCL_212 9412; EG-NEXT: MOV * T60.W, T60.Z, 9413; EG-NEXT: LSHR T73.X, T4.Y, literal.x, 9414; EG-NEXT: MOV T72.Y, PV.X, 9415; EG-NEXT: BFE_INT T74.Z, T4.W, 0.0, 1, 9416; EG-NEXT: MOV T71.W, T71.Z, 9417; EG-NEXT: MOV * T59.W, T59.Z, 9418; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 9419; EG-NEXT: BFE_INT T74.X, T4.Z, 0.0, 1, 9420; EG-NEXT: MOV T66.Y, T66.X, 9421; EG-NEXT: BFE_INT T75.Z, T3.Y, 0.0, 1, 9422; EG-NEXT: MOV T72.W, T72.Z, BS:VEC_120/SCL_212 9423; EG-NEXT: MOV * T58.W, T58.Z, 9424; EG-NEXT: BFE_INT T75.X, T3.W, 0.0, 1, 9425; EG-NEXT: MOV T74.Y, PV.X, 9426; EG-NEXT: BFE_INT T76.Z, T3.Z, 0.0, 1, 9427; EG-NEXT: MOV T66.W, T66.Z, BS:VEC_120/SCL_212 9428; EG-NEXT: MOV * T57.W, T57.Z, 9429; EG-NEXT: BFE_INT T76.X, T2.Y, 0.0, 1, 9430; EG-NEXT: MOV T75.Y, PV.X, 9431; EG-NEXT: BFE_INT T77.Z, T2.W, 0.0, 1, 9432; EG-NEXT: MOV T74.W, T74.Z, 9433; EG-NEXT: MOV * T56.W, T56.Z, 9434; EG-NEXT: BFE_INT T77.X, T2.Z, 0.0, 1, 9435; EG-NEXT: MOV T76.Y, PV.X, 9436; EG-NEXT: BFE_INT T78.Z, T1.Y, 0.0, 1, 9437; EG-NEXT: MOV T75.W, T75.Z, BS:VEC_120/SCL_212 9438; EG-NEXT: MOV * T55.W, T55.Z, 9439; EG-NEXT: BFE_INT T78.X, T1.W, 0.0, 1, 9440; EG-NEXT: MOV T77.Y, PV.X, 9441; EG-NEXT: BFE_INT T79.Z, T1.Z, 0.0, 1, 9442; EG-NEXT: MOV T76.W, T76.Z, BS:VEC_120/SCL_212 9443; EG-NEXT: MOV * T54.W, T54.Z, 9444; EG-NEXT: BFE_INT T79.X, T0.Y, 0.0, 1, 9445; EG-NEXT: MOV T78.Y, PV.X, 9446; EG-NEXT: BFE_INT T80.Z, T0.W, 0.0, 1, 9447; EG-NEXT: MOV T77.W, T77.Z, 9448; EG-NEXT: MOV * T53.W, T53.Z, 9449; EG-NEXT: BFE_INT T80.X, T0.Z, 0.0, 1, 9450; EG-NEXT: MOV T79.Y, PV.X, 9451; EG-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.x, 9452; EG-NEXT: MOV T78.W, T78.Z, BS:VEC_120/SCL_212 9453; EG-NEXT: MOV * T52.W, T52.Z, 9454; EG-NEXT: 480(6.726233e-43), 0(0.000000e+00) 9455; EG-NEXT: LSHR T81.X, PV.Z, literal.x, 9456; EG-NEXT: MOV T80.Y, PV.X, 9457; EG-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y, 9458; EG-NEXT: MOV T79.W, T79.Z, 9459; EG-NEXT: MOV * T51.W, T51.Z, 9460; EG-NEXT: 2(2.802597e-45), 496(6.950440e-43) 9461; EG-NEXT: LSHR T82.X, PV.Z, literal.x, 9462; EG-NEXT: MOV T80.W, T80.Z, 9463; EG-NEXT: MOV * T50.W, T50.Z, 9464; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 9465; 9466; GFX12-LABEL: constant_sextload_v64i1_to_v64i64: 9467; GFX12: ; %bb.0: 9468; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 9469; GFX12-NEXT: s_wait_kmcnt 0x0 9470; GFX12-NEXT: s_load_b64 s[12:13], s[2:3], 0x0 9471; GFX12-NEXT: s_wait_kmcnt 0x0 9472; GFX12-NEXT: s_lshr_b32 s96, s13, 30 9473; GFX12-NEXT: s_lshr_b32 s98, s13, 31 9474; GFX12-NEXT: s_lshr_b32 s92, s13, 28 9475; GFX12-NEXT: s_lshr_b32 s94, s13, 29 9476; GFX12-NEXT: s_lshr_b32 s78, s13, 26 9477; GFX12-NEXT: s_lshr_b32 s88, s13, 27 9478; GFX12-NEXT: s_wait_alu 0xfffe 9479; GFX12-NEXT: s_bfe_i64 s[96:97], s[96:97], 0x10000 9480; GFX12-NEXT: s_bfe_i64 s[100:101], s[98:99], 0x10000 9481; GFX12-NEXT: s_lshr_b32 s66, s13, 24 9482; GFX12-NEXT: s_lshr_b32 s74, s13, 25 9483; GFX12-NEXT: s_bfe_i64 s[92:93], s[92:93], 0x10000 9484; GFX12-NEXT: s_bfe_i64 s[94:95], s[94:95], 0x10000 9485; GFX12-NEXT: s_wait_alu 0xfffe 9486; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s96 9487; GFX12-NEXT: s_lshr_b32 s56, s13, 22 9488; GFX12-NEXT: s_lshr_b32 s62, s13, 23 9489; GFX12-NEXT: v_dual_mov_b32 v2, s97 :: v_dual_mov_b32 v3, s100 9490; GFX12-NEXT: v_dual_mov_b32 v4, s101 :: v_dual_mov_b32 v5, s92 9491; GFX12-NEXT: s_bfe_i64 s[78:79], s[78:79], 0x10000 9492; GFX12-NEXT: s_bfe_i64 s[88:89], s[88:89], 0x10000 9493; GFX12-NEXT: s_lshr_b32 s44, s13, 20 9494; GFX12-NEXT: s_lshr_b32 s52, s13, 21 9495; GFX12-NEXT: s_lshr_b32 s30, s13, 18 9496; GFX12-NEXT: s_lshr_b32 s40, s13, 19 9497; GFX12-NEXT: s_lshr_b32 s18, s13, 16 9498; GFX12-NEXT: s_lshr_b32 s26, s13, 17 9499; GFX12-NEXT: s_lshr_b32 s2, s13, 14 9500; GFX12-NEXT: s_lshr_b32 s4, s13, 15 9501; GFX12-NEXT: v_dual_mov_b32 v6, s93 :: v_dual_mov_b32 v7, s94 9502; GFX12-NEXT: s_wait_alu 0xfffe 9503; GFX12-NEXT: v_dual_mov_b32 v8, s95 :: v_dual_mov_b32 v9, s78 9504; GFX12-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000 9505; GFX12-NEXT: s_bfe_i64 s[74:75], s[74:75], 0x10000 9506; GFX12-NEXT: s_lshr_b32 s6, s13, 12 9507; GFX12-NEXT: s_lshr_b32 s8, s13, 13 9508; GFX12-NEXT: v_dual_mov_b32 v10, s79 :: v_dual_mov_b32 v11, s88 9509; GFX12-NEXT: s_wait_alu 0xfffe 9510; GFX12-NEXT: v_dual_mov_b32 v12, s89 :: v_dual_mov_b32 v13, s66 9511; GFX12-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000 9512; GFX12-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000 9513; GFX12-NEXT: s_lshr_b32 s10, s13, 10 9514; GFX12-NEXT: s_lshr_b32 s14, s13, 11 9515; GFX12-NEXT: v_dual_mov_b32 v14, s67 :: v_dual_mov_b32 v15, s74 9516; GFX12-NEXT: s_wait_alu 0xfffe 9517; GFX12-NEXT: v_dual_mov_b32 v16, s75 :: v_dual_mov_b32 v17, s56 9518; GFX12-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000 9519; GFX12-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000 9520; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 9521; GFX12-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000 9522; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000 9523; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000 9524; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000 9525; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000 9526; GFX12-NEXT: s_lshr_b32 s16, s13, 8 9527; GFX12-NEXT: s_lshr_b32 s20, s13, 9 9528; GFX12-NEXT: v_dual_mov_b32 v18, s57 :: v_dual_mov_b32 v19, s62 9529; GFX12-NEXT: s_wait_alu 0xfffe 9530; GFX12-NEXT: v_dual_mov_b32 v20, s63 :: v_dual_mov_b32 v21, s44 9531; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000 9532; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000 9533; GFX12-NEXT: s_lshr_b32 s22, s13, 6 9534; GFX12-NEXT: s_lshr_b32 s24, s13, 7 9535; GFX12-NEXT: v_dual_mov_b32 v22, s45 :: v_dual_mov_b32 v23, s52 9536; GFX12-NEXT: v_dual_mov_b32 v24, s53 :: v_dual_mov_b32 v25, s30 9537; GFX12-NEXT: v_dual_mov_b32 v26, s31 :: v_dual_mov_b32 v27, s40 9538; GFX12-NEXT: v_dual_mov_b32 v28, s41 :: v_dual_mov_b32 v29, s18 9539; GFX12-NEXT: v_dual_mov_b32 v30, s19 :: v_dual_mov_b32 v31, s26 9540; GFX12-NEXT: v_mov_b32_e32 v32, s27 9541; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000 9542; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000 9543; GFX12-NEXT: s_clause 0x7 9544; GFX12-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:496 9545; GFX12-NEXT: global_store_b128 v0, v[5:8], s[0:1] offset:480 9546; GFX12-NEXT: global_store_b128 v0, v[9:12], s[0:1] offset:464 9547; GFX12-NEXT: global_store_b128 v0, v[13:16], s[0:1] offset:448 9548; GFX12-NEXT: global_store_b128 v0, v[17:20], s[0:1] offset:432 9549; GFX12-NEXT: global_store_b128 v0, v[21:24], s[0:1] offset:416 9550; GFX12-NEXT: global_store_b128 v0, v[25:28], s[0:1] offset:400 9551; GFX12-NEXT: global_store_b128 v0, v[29:32], s[0:1] offset:384 9552; GFX12-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s3 9553; GFX12-NEXT: v_dual_mov_b32 v3, s4 :: v_dual_mov_b32 v4, s5 9554; GFX12-NEXT: v_mov_b32_e32 v5, s6 9555; GFX12-NEXT: s_lshr_b32 s28, s13, 4 9556; GFX12-NEXT: s_lshr_b32 s34, s13, 5 9557; GFX12-NEXT: s_lshr_b32 s36, s13, 2 9558; GFX12-NEXT: s_lshr_b32 s38, s13, 3 9559; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000 9560; GFX12-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000 9561; GFX12-NEXT: v_dual_mov_b32 v6, s7 :: v_dual_mov_b32 v7, s8 9562; GFX12-NEXT: v_dual_mov_b32 v8, s9 :: v_dual_mov_b32 v9, s10 9563; GFX12-NEXT: s_lshr_b32 s42, s13, 1 9564; GFX12-NEXT: s_mov_b32 s46, s13 9565; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000 9566; GFX12-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000 9567; GFX12-NEXT: v_dual_mov_b32 v10, s11 :: v_dual_mov_b32 v11, s14 9568; GFX12-NEXT: v_dual_mov_b32 v12, s15 :: v_dual_mov_b32 v13, s16 9569; GFX12-NEXT: s_lshr_b32 s48, s12, 30 9570; GFX12-NEXT: s_lshr_b32 s50, s12, 31 9571; GFX12-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000 9572; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000 9573; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000 9574; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000 9575; GFX12-NEXT: v_dual_mov_b32 v14, s17 :: v_dual_mov_b32 v15, s20 9576; GFX12-NEXT: v_dual_mov_b32 v16, s21 :: v_dual_mov_b32 v17, s22 9577; GFX12-NEXT: s_lshr_b32 s54, s12, 28 9578; GFX12-NEXT: s_lshr_b32 s58, s12, 29 9579; GFX12-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000 9580; GFX12-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000 9581; GFX12-NEXT: v_dual_mov_b32 v18, s23 :: v_dual_mov_b32 v19, s24 9582; GFX12-NEXT: v_dual_mov_b32 v20, s25 :: v_dual_mov_b32 v21, s28 9583; GFX12-NEXT: s_lshr_b32 s60, s12, 26 9584; GFX12-NEXT: s_lshr_b32 s64, s12, 27 9585; GFX12-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000 9586; GFX12-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000 9587; GFX12-NEXT: v_dual_mov_b32 v22, s29 :: v_dual_mov_b32 v23, s34 9588; GFX12-NEXT: v_mov_b32_e32 v24, s35 9589; GFX12-NEXT: s_clause 0x5 9590; GFX12-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:368 9591; GFX12-NEXT: global_store_b128 v0, v[5:8], s[0:1] offset:352 9592; GFX12-NEXT: global_store_b128 v0, v[9:12], s[0:1] offset:336 9593; GFX12-NEXT: global_store_b128 v0, v[13:16], s[0:1] offset:320 9594; GFX12-NEXT: global_store_b128 v0, v[17:20], s[0:1] offset:304 9595; GFX12-NEXT: global_store_b128 v0, v[21:24], s[0:1] offset:288 9596; GFX12-NEXT: v_dual_mov_b32 v1, s36 :: v_dual_mov_b32 v2, s37 9597; GFX12-NEXT: v_dual_mov_b32 v3, s38 :: v_dual_mov_b32 v4, s39 9598; GFX12-NEXT: v_mov_b32_e32 v5, s46 9599; GFX12-NEXT: s_lshr_b32 s68, s12, 24 9600; GFX12-NEXT: s_lshr_b32 s70, s12, 25 9601; GFX12-NEXT: s_lshr_b32 s72, s12, 22 9602; GFX12-NEXT: s_lshr_b32 s76, s12, 23 9603; GFX12-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000 9604; GFX12-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000 9605; GFX12-NEXT: v_dual_mov_b32 v6, s47 :: v_dual_mov_b32 v7, s42 9606; GFX12-NEXT: v_dual_mov_b32 v8, s43 :: v_dual_mov_b32 v9, s48 9607; GFX12-NEXT: s_lshr_b32 s80, s12, 20 9608; GFX12-NEXT: s_lshr_b32 s82, s12, 21 9609; GFX12-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000 9610; GFX12-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000 9611; GFX12-NEXT: v_dual_mov_b32 v10, s49 :: v_dual_mov_b32 v11, s50 9612; GFX12-NEXT: v_dual_mov_b32 v12, s51 :: v_dual_mov_b32 v13, s54 9613; GFX12-NEXT: s_lshr_b32 s84, s12, 18 9614; GFX12-NEXT: s_lshr_b32 s86, s12, 19 9615; GFX12-NEXT: s_bfe_i64 s[76:77], s[76:77], 0x10000 9616; GFX12-NEXT: s_bfe_i64 s[72:73], s[72:73], 0x10000 9617; GFX12-NEXT: s_bfe_i64 s[70:71], s[70:71], 0x10000 9618; GFX12-NEXT: s_bfe_i64 s[68:69], s[68:69], 0x10000 9619; GFX12-NEXT: v_dual_mov_b32 v14, s55 :: v_dual_mov_b32 v15, s58 9620; GFX12-NEXT: v_dual_mov_b32 v16, s59 :: v_dual_mov_b32 v17, s60 9621; GFX12-NEXT: s_lshr_b32 s90, s12, 16 9622; GFX12-NEXT: s_lshr_b32 s98, s12, 17 9623; GFX12-NEXT: s_bfe_i64 s[82:83], s[82:83], 0x10000 9624; GFX12-NEXT: s_bfe_i64 s[80:81], s[80:81], 0x10000 9625; GFX12-NEXT: v_dual_mov_b32 v18, s61 :: v_dual_mov_b32 v19, s64 9626; GFX12-NEXT: v_dual_mov_b32 v20, s65 :: v_dual_mov_b32 v21, s68 9627; GFX12-NEXT: s_lshr_b32 s96, s12, 14 9628; GFX12-NEXT: s_lshr_b32 s100, s12, 15 9629; GFX12-NEXT: s_lshr_b32 s94, s12, 13 9630; GFX12-NEXT: s_lshr_b32 s88, s12, 11 9631; GFX12-NEXT: s_lshr_b32 s74, s12, 9 9632; GFX12-NEXT: s_lshr_b32 s62, s12, 7 9633; GFX12-NEXT: s_lshr_b32 s52, s12, 5 9634; GFX12-NEXT: s_lshr_b32 s40, s12, 3 9635; GFX12-NEXT: s_lshr_b32 s26, s12, 1 9636; GFX12-NEXT: s_bfe_i64 s[86:87], s[86:87], 0x10000 9637; GFX12-NEXT: s_bfe_i64 s[84:85], s[84:85], 0x10000 9638; GFX12-NEXT: v_dual_mov_b32 v22, s69 :: v_dual_mov_b32 v23, s70 9639; GFX12-NEXT: v_mov_b32_e32 v24, s71 9640; GFX12-NEXT: s_clause 0x5 9641; GFX12-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:272 9642; GFX12-NEXT: global_store_b128 v0, v[5:8], s[0:1] offset:256 9643; GFX12-NEXT: global_store_b128 v0, v[9:12], s[0:1] offset:240 9644; GFX12-NEXT: global_store_b128 v0, v[13:16], s[0:1] offset:224 9645; GFX12-NEXT: global_store_b128 v0, v[17:20], s[0:1] offset:208 9646; GFX12-NEXT: global_store_b128 v0, v[21:24], s[0:1] offset:192 9647; GFX12-NEXT: v_dual_mov_b32 v1, s72 :: v_dual_mov_b32 v2, s73 9648; GFX12-NEXT: v_dual_mov_b32 v3, s76 :: v_dual_mov_b32 v4, s77 9649; GFX12-NEXT: v_mov_b32_e32 v5, s80 9650; GFX12-NEXT: s_lshr_b32 s92, s12, 12 9651; GFX12-NEXT: s_lshr_b32 s78, s12, 10 9652; GFX12-NEXT: s_bfe_i64 s[98:99], s[98:99], 0x10000 9653; GFX12-NEXT: s_bfe_i64 s[90:91], s[90:91], 0x10000 9654; GFX12-NEXT: v_dual_mov_b32 v6, s81 :: v_dual_mov_b32 v7, s82 9655; GFX12-NEXT: v_dual_mov_b32 v8, s83 :: v_dual_mov_b32 v9, s84 9656; GFX12-NEXT: s_lshr_b32 s66, s12, 8 9657; GFX12-NEXT: s_lshr_b32 s56, s12, 6 9658; GFX12-NEXT: s_lshr_b32 s44, s12, 4 9659; GFX12-NEXT: s_lshr_b32 s30, s12, 2 9660; GFX12-NEXT: s_bfe_i64 s[18:19], s[12:13], 0x10000 9661; GFX12-NEXT: s_bfe_i64 s[12:13], s[26:27], 0x10000 9662; GFX12-NEXT: s_bfe_i64 s[26:27], s[40:41], 0x10000 9663; GFX12-NEXT: s_bfe_i64 s[40:41], s[52:53], 0x10000 9664; GFX12-NEXT: s_bfe_i64 s[52:53], s[62:63], 0x10000 9665; GFX12-NEXT: s_bfe_i64 s[62:63], s[74:75], 0x10000 9666; GFX12-NEXT: s_bfe_i64 s[74:75], s[88:89], 0x10000 9667; GFX12-NEXT: s_bfe_i64 s[88:89], s[94:95], 0x10000 9668; GFX12-NEXT: s_bfe_i64 s[94:95], s[100:101], 0x10000 9669; GFX12-NEXT: s_bfe_i64 s[96:97], s[96:97], 0x10000 9670; GFX12-NEXT: v_dual_mov_b32 v10, s85 :: v_dual_mov_b32 v11, s86 9671; GFX12-NEXT: v_dual_mov_b32 v12, s87 :: v_dual_mov_b32 v13, s90 9672; GFX12-NEXT: s_bfe_i64 s[78:79], s[78:79], 0x10000 9673; GFX12-NEXT: s_bfe_i64 s[92:93], s[92:93], 0x10000 9674; GFX12-NEXT: v_dual_mov_b32 v14, s91 :: v_dual_mov_b32 v15, s98 9675; GFX12-NEXT: s_wait_alu 0xfffe 9676; GFX12-NEXT: v_dual_mov_b32 v16, s99 :: v_dual_mov_b32 v17, s96 9677; GFX12-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000 9678; GFX12-NEXT: v_dual_mov_b32 v18, s97 :: v_dual_mov_b32 v19, s94 9679; GFX12-NEXT: v_dual_mov_b32 v20, s95 :: v_dual_mov_b32 v21, s92 9680; GFX12-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000 9681; GFX12-NEXT: v_dual_mov_b32 v22, s93 :: v_dual_mov_b32 v23, s88 9682; GFX12-NEXT: v_mov_b32_e32 v24, s89 9683; GFX12-NEXT: s_clause 0x5 9684; GFX12-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:176 9685; GFX12-NEXT: global_store_b128 v0, v[5:8], s[0:1] offset:160 9686; GFX12-NEXT: global_store_b128 v0, v[9:12], s[0:1] offset:144 9687; GFX12-NEXT: global_store_b128 v0, v[13:16], s[0:1] offset:128 9688; GFX12-NEXT: global_store_b128 v0, v[17:20], s[0:1] offset:112 9689; GFX12-NEXT: global_store_b128 v0, v[21:24], s[0:1] offset:96 9690; GFX12-NEXT: v_dual_mov_b32 v1, s78 :: v_dual_mov_b32 v2, s79 9691; GFX12-NEXT: v_dual_mov_b32 v3, s74 :: v_dual_mov_b32 v4, s75 9692; GFX12-NEXT: s_wait_alu 0xfffe 9693; GFX12-NEXT: v_mov_b32_e32 v5, s66 9694; GFX12-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000 9695; GFX12-NEXT: v_dual_mov_b32 v6, s67 :: v_dual_mov_b32 v7, s62 9696; GFX12-NEXT: v_dual_mov_b32 v8, s63 :: v_dual_mov_b32 v9, s56 9697; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 9698; GFX12-NEXT: v_dual_mov_b32 v10, s57 :: v_dual_mov_b32 v11, s52 9699; GFX12-NEXT: s_wait_alu 0xfffe 9700; GFX12-NEXT: v_dual_mov_b32 v12, s53 :: v_dual_mov_b32 v13, s44 9701; GFX12-NEXT: v_dual_mov_b32 v14, s45 :: v_dual_mov_b32 v15, s40 9702; GFX12-NEXT: v_dual_mov_b32 v16, s41 :: v_dual_mov_b32 v17, s30 9703; GFX12-NEXT: v_dual_mov_b32 v18, s31 :: v_dual_mov_b32 v19, s26 9704; GFX12-NEXT: v_dual_mov_b32 v20, s27 :: v_dual_mov_b32 v21, s18 9705; GFX12-NEXT: v_dual_mov_b32 v22, s19 :: v_dual_mov_b32 v23, s12 9706; GFX12-NEXT: v_mov_b32_e32 v24, s13 9707; GFX12-NEXT: s_clause 0x5 9708; GFX12-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:80 9709; GFX12-NEXT: global_store_b128 v0, v[5:8], s[0:1] offset:64 9710; GFX12-NEXT: global_store_b128 v0, v[9:12], s[0:1] offset:48 9711; GFX12-NEXT: global_store_b128 v0, v[13:16], s[0:1] offset:32 9712; GFX12-NEXT: global_store_b128 v0, v[17:20], s[0:1] offset:16 9713; GFX12-NEXT: global_store_b128 v0, v[21:24], s[0:1] 9714; GFX12-NEXT: s_endpgm 9715 %load = load <64 x i1>, ptr addrspace(4) %in 9716 %ext = sext <64 x i1> %load to <64 x i64> 9717 store <64 x i64> %ext, ptr addrspace(1) %out 9718 ret void 9719} 9720 9721attributes #0 = { nounwind } 9722