1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=GFX6 %s 3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefix=GFX7 %s 4; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=GFX8 %s 5; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s 7 8define amdgpu_kernel void @constant_load_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 9; GFX6-LABEL: constant_load_i64: 10; GFX6: ; %bb.0: 11; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 12; GFX6-NEXT: s_waitcnt lgkmcnt(0) 13; GFX6-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 14; GFX6-NEXT: s_mov_b32 s3, 0xf000 15; GFX6-NEXT: s_mov_b32 s2, -1 16; GFX6-NEXT: s_waitcnt lgkmcnt(0) 17; GFX6-NEXT: v_mov_b32_e32 v0, s4 18; GFX6-NEXT: v_mov_b32_e32 v1, s5 19; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 20; GFX6-NEXT: s_endpgm 21; 22; GFX7-LABEL: constant_load_i64: 23; GFX7: ; %bb.0: 24; GFX7-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 25; GFX7-NEXT: s_waitcnt lgkmcnt(0) 26; GFX7-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 27; GFX7-NEXT: v_mov_b32_e32 v0, s0 28; GFX7-NEXT: v_mov_b32_e32 v1, s1 29; GFX7-NEXT: s_waitcnt lgkmcnt(0) 30; GFX7-NEXT: v_mov_b32_e32 v2, s2 31; GFX7-NEXT: v_mov_b32_e32 v3, s3 32; GFX7-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 33; GFX7-NEXT: s_endpgm 34; 35; GFX8-LABEL: constant_load_i64: 36; GFX8: ; %bb.0: 37; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 38; GFX8-NEXT: s_waitcnt lgkmcnt(0) 39; GFX8-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 40; GFX8-NEXT: v_mov_b32_e32 v0, s0 41; GFX8-NEXT: v_mov_b32_e32 v1, s1 42; GFX8-NEXT: s_waitcnt lgkmcnt(0) 43; GFX8-NEXT: v_mov_b32_e32 v2, s2 44; GFX8-NEXT: v_mov_b32_e32 v3, s3 45; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 46; GFX8-NEXT: s_endpgm 47; 48; EG-LABEL: constant_load_i64: 49; EG: ; %bb.0: 50; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 51; EG-NEXT: TEX 0 @6 52; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 53; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 54; EG-NEXT: CF_END 55; EG-NEXT: PAD 56; EG-NEXT: Fetch clause starting at 6: 57; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 58; EG-NEXT: ALU clause starting at 8: 59; EG-NEXT: MOV * T0.X, KC0[2].Z, 60; EG-NEXT: ALU clause starting at 9: 61; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 62; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 63; 64; GFX12-LABEL: constant_load_i64: 65; GFX12: ; %bb.0: 66; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 67; GFX12-NEXT: s_wait_kmcnt 0x0 68; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 69; GFX12-NEXT: v_mov_b32_e32 v2, 0 70; GFX12-NEXT: s_wait_kmcnt 0x0 71; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 72; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] 73; GFX12-NEXT: s_endpgm 74 %ld = load i64, ptr addrspace(4) %in 75 store i64 %ld, ptr addrspace(1) %out 76 ret void 77} 78 79define amdgpu_kernel void @constant_load_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 80; GFX6-LABEL: constant_load_v2i64: 81; GFX6: ; %bb.0: ; %entry 82; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 83; GFX6-NEXT: s_waitcnt lgkmcnt(0) 84; GFX6-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 85; GFX6-NEXT: s_mov_b32 s3, 0xf000 86; GFX6-NEXT: s_mov_b32 s2, -1 87; GFX6-NEXT: s_waitcnt lgkmcnt(0) 88; GFX6-NEXT: v_mov_b32_e32 v0, s4 89; GFX6-NEXT: v_mov_b32_e32 v1, s5 90; GFX6-NEXT: v_mov_b32_e32 v2, s6 91; GFX6-NEXT: v_mov_b32_e32 v3, s7 92; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 93; GFX6-NEXT: s_endpgm 94; 95; GFX7-LABEL: constant_load_v2i64: 96; GFX7: ; %bb.0: ; %entry 97; GFX7-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 98; GFX7-NEXT: s_waitcnt lgkmcnt(0) 99; GFX7-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 100; GFX7-NEXT: v_mov_b32_e32 v4, s0 101; GFX7-NEXT: v_mov_b32_e32 v5, s1 102; GFX7-NEXT: s_waitcnt lgkmcnt(0) 103; GFX7-NEXT: v_mov_b32_e32 v0, s4 104; GFX7-NEXT: v_mov_b32_e32 v1, s5 105; GFX7-NEXT: v_mov_b32_e32 v2, s6 106; GFX7-NEXT: v_mov_b32_e32 v3, s7 107; GFX7-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 108; GFX7-NEXT: s_endpgm 109; 110; GFX8-LABEL: constant_load_v2i64: 111; GFX8: ; %bb.0: ; %entry 112; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 113; GFX8-NEXT: s_waitcnt lgkmcnt(0) 114; GFX8-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 115; GFX8-NEXT: v_mov_b32_e32 v4, s0 116; GFX8-NEXT: v_mov_b32_e32 v5, s1 117; GFX8-NEXT: s_waitcnt lgkmcnt(0) 118; GFX8-NEXT: v_mov_b32_e32 v0, s4 119; GFX8-NEXT: v_mov_b32_e32 v1, s5 120; GFX8-NEXT: v_mov_b32_e32 v2, s6 121; GFX8-NEXT: v_mov_b32_e32 v3, s7 122; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 123; GFX8-NEXT: s_endpgm 124; 125; EG-LABEL: constant_load_v2i64: 126; EG: ; %bb.0: ; %entry 127; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 128; EG-NEXT: TEX 0 @6 129; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 130; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 131; EG-NEXT: CF_END 132; EG-NEXT: PAD 133; EG-NEXT: Fetch clause starting at 6: 134; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 135; EG-NEXT: ALU clause starting at 8: 136; EG-NEXT: MOV * T0.X, KC0[2].Z, 137; EG-NEXT: ALU clause starting at 9: 138; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 139; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 140; 141; GFX12-LABEL: constant_load_v2i64: 142; GFX12: ; %bb.0: ; %entry 143; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 144; GFX12-NEXT: s_wait_kmcnt 0x0 145; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 146; GFX12-NEXT: v_mov_b32_e32 v4, 0 147; GFX12-NEXT: s_wait_kmcnt 0x0 148; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v3, s7 149; GFX12-NEXT: v_dual_mov_b32 v1, s5 :: v_dual_mov_b32 v2, s6 150; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1] 151; GFX12-NEXT: s_endpgm 152entry: 153 %ld = load <2 x i64>, ptr addrspace(4) %in 154 store <2 x i64> %ld, ptr addrspace(1) %out 155 ret void 156} 157 158define amdgpu_kernel void @constant_load_v3i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 159; GFX6-LABEL: constant_load_v3i64: 160; GFX6: ; %bb.0: ; %entry 161; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 162; GFX6-NEXT: s_waitcnt lgkmcnt(0) 163; GFX6-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x4 164; GFX6-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 165; GFX6-NEXT: s_mov_b32 s3, 0xf000 166; GFX6-NEXT: s_mov_b32 s2, -1 167; GFX6-NEXT: s_waitcnt lgkmcnt(0) 168; GFX6-NEXT: v_mov_b32_e32 v0, s8 169; GFX6-NEXT: v_mov_b32_e32 v1, s9 170; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 offset:16 171; GFX6-NEXT: s_waitcnt expcnt(0) 172; GFX6-NEXT: v_mov_b32_e32 v0, s4 173; GFX6-NEXT: v_mov_b32_e32 v1, s5 174; GFX6-NEXT: v_mov_b32_e32 v2, s6 175; GFX6-NEXT: v_mov_b32_e32 v3, s7 176; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 177; GFX6-NEXT: s_endpgm 178; 179; GFX7-LABEL: constant_load_v3i64: 180; GFX7: ; %bb.0: ; %entry 181; GFX7-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 182; GFX7-NEXT: s_waitcnt lgkmcnt(0) 183; GFX7-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x4 184; GFX7-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 185; GFX7-NEXT: s_add_u32 s2, s0, 16 186; GFX7-NEXT: s_addc_u32 s3, s1, 0 187; GFX7-NEXT: v_mov_b32_e32 v4, s3 188; GFX7-NEXT: v_mov_b32_e32 v3, s2 189; GFX7-NEXT: s_waitcnt lgkmcnt(0) 190; GFX7-NEXT: v_mov_b32_e32 v5, s8 191; GFX7-NEXT: v_mov_b32_e32 v6, s9 192; GFX7-NEXT: flat_store_dwordx2 v[3:4], v[5:6] 193; GFX7-NEXT: v_mov_b32_e32 v5, s1 194; GFX7-NEXT: v_mov_b32_e32 v0, s4 195; GFX7-NEXT: v_mov_b32_e32 v1, s5 196; GFX7-NEXT: v_mov_b32_e32 v2, s6 197; GFX7-NEXT: v_mov_b32_e32 v3, s7 198; GFX7-NEXT: v_mov_b32_e32 v4, s0 199; GFX7-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 200; GFX7-NEXT: s_endpgm 201; 202; GFX8-LABEL: constant_load_v3i64: 203; GFX8: ; %bb.0: ; %entry 204; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 205; GFX8-NEXT: s_waitcnt lgkmcnt(0) 206; GFX8-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x10 207; GFX8-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 208; GFX8-NEXT: s_add_u32 s2, s0, 16 209; GFX8-NEXT: s_addc_u32 s3, s1, 0 210; GFX8-NEXT: v_mov_b32_e32 v4, s3 211; GFX8-NEXT: v_mov_b32_e32 v3, s2 212; GFX8-NEXT: s_waitcnt lgkmcnt(0) 213; GFX8-NEXT: v_mov_b32_e32 v5, s8 214; GFX8-NEXT: v_mov_b32_e32 v6, s9 215; GFX8-NEXT: flat_store_dwordx2 v[3:4], v[5:6] 216; GFX8-NEXT: v_mov_b32_e32 v5, s1 217; GFX8-NEXT: v_mov_b32_e32 v0, s4 218; GFX8-NEXT: v_mov_b32_e32 v1, s5 219; GFX8-NEXT: v_mov_b32_e32 v2, s6 220; GFX8-NEXT: v_mov_b32_e32 v3, s7 221; GFX8-NEXT: v_mov_b32_e32 v4, s0 222; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 223; GFX8-NEXT: s_endpgm 224; 225; EG-LABEL: constant_load_v3i64: 226; EG: ; %bb.0: ; %entry 227; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 228; EG-NEXT: TEX 0 @8 229; EG-NEXT: ALU 1, @13, KC0[CB0:0-32], KC1[] 230; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 231; EG-NEXT: TEX 0 @10 232; EG-NEXT: ALU 3, @15, KC0[CB0:0-32], KC1[] 233; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 234; EG-NEXT: CF_END 235; EG-NEXT: Fetch clause starting at 8: 236; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 0, #1 237; EG-NEXT: Fetch clause starting at 10: 238; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 16, #1 239; EG-NEXT: ALU clause starting at 12: 240; EG-NEXT: MOV * T0.X, KC0[2].Z, 241; EG-NEXT: ALU clause starting at 13: 242; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, 243; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 244; EG-NEXT: ALU clause starting at 15: 245; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 246; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 247; EG-NEXT: LSHR * T1.X, PV.W, literal.x, 248; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 249; 250; GFX12-LABEL: constant_load_v3i64: 251; GFX12: ; %bb.0: ; %entry 252; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 253; GFX12-NEXT: s_wait_kmcnt 0x0 254; GFX12-NEXT: s_clause 0x1 255; GFX12-NEXT: s_load_b64 s[8:9], s[2:3], 0x10 256; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 257; GFX12-NEXT: s_wait_kmcnt 0x0 258; GFX12-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v5, s9 259; GFX12-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v1, s5 260; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v3, s7 261; GFX12-NEXT: v_mov_b32_e32 v2, s6 262; GFX12-NEXT: s_clause 0x1 263; GFX12-NEXT: global_store_b64 v6, v[4:5], s[0:1] offset:16 264; GFX12-NEXT: global_store_b128 v6, v[0:3], s[0:1] 265; GFX12-NEXT: s_endpgm 266entry: 267 %ld = load <3 x i64>, ptr addrspace(4) %in 268 store <3 x i64> %ld, ptr addrspace(1) %out 269 ret void 270} 271 272define amdgpu_kernel void @constant_load_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 273; GFX6-LABEL: constant_load_v4i64: 274; GFX6: ; %bb.0: ; %entry 275; GFX6-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 276; GFX6-NEXT: s_waitcnt lgkmcnt(0) 277; GFX6-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 278; GFX6-NEXT: s_mov_b32 s11, 0xf000 279; GFX6-NEXT: s_mov_b32 s10, -1 280; GFX6-NEXT: s_waitcnt lgkmcnt(0) 281; GFX6-NEXT: v_mov_b32_e32 v0, s4 282; GFX6-NEXT: v_mov_b32_e32 v1, s5 283; GFX6-NEXT: v_mov_b32_e32 v2, s6 284; GFX6-NEXT: v_mov_b32_e32 v3, s7 285; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16 286; GFX6-NEXT: s_waitcnt expcnt(0) 287; GFX6-NEXT: v_mov_b32_e32 v0, s0 288; GFX6-NEXT: v_mov_b32_e32 v1, s1 289; GFX6-NEXT: v_mov_b32_e32 v2, s2 290; GFX6-NEXT: v_mov_b32_e32 v3, s3 291; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 292; GFX6-NEXT: s_endpgm 293; 294; GFX7-LABEL: constant_load_v4i64: 295; GFX7: ; %bb.0: ; %entry 296; GFX7-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x0 297; GFX7-NEXT: s_waitcnt lgkmcnt(0) 298; GFX7-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 299; GFX7-NEXT: s_add_u32 s10, s8, 16 300; GFX7-NEXT: s_addc_u32 s11, s9, 0 301; GFX7-NEXT: v_mov_b32_e32 v6, s10 302; GFX7-NEXT: v_mov_b32_e32 v7, s11 303; GFX7-NEXT: s_waitcnt lgkmcnt(0) 304; GFX7-NEXT: v_mov_b32_e32 v0, s4 305; GFX7-NEXT: v_mov_b32_e32 v1, s5 306; GFX7-NEXT: v_mov_b32_e32 v2, s6 307; GFX7-NEXT: v_mov_b32_e32 v3, s7 308; GFX7-NEXT: flat_store_dwordx4 v[6:7], v[0:3] 309; GFX7-NEXT: v_mov_b32_e32 v4, s0 310; GFX7-NEXT: v_mov_b32_e32 v0, s8 311; GFX7-NEXT: v_mov_b32_e32 v5, s1 312; GFX7-NEXT: v_mov_b32_e32 v6, s2 313; GFX7-NEXT: v_mov_b32_e32 v7, s3 314; GFX7-NEXT: v_mov_b32_e32 v1, s9 315; GFX7-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 316; GFX7-NEXT: s_endpgm 317; 318; GFX8-LABEL: constant_load_v4i64: 319; GFX8: ; %bb.0: ; %entry 320; GFX8-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 321; GFX8-NEXT: s_waitcnt lgkmcnt(0) 322; GFX8-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 323; GFX8-NEXT: s_add_u32 s10, s8, 16 324; GFX8-NEXT: s_addc_u32 s11, s9, 0 325; GFX8-NEXT: v_mov_b32_e32 v6, s10 326; GFX8-NEXT: v_mov_b32_e32 v7, s11 327; GFX8-NEXT: s_waitcnt lgkmcnt(0) 328; GFX8-NEXT: v_mov_b32_e32 v0, s4 329; GFX8-NEXT: v_mov_b32_e32 v1, s5 330; GFX8-NEXT: v_mov_b32_e32 v2, s6 331; GFX8-NEXT: v_mov_b32_e32 v3, s7 332; GFX8-NEXT: flat_store_dwordx4 v[6:7], v[0:3] 333; GFX8-NEXT: v_mov_b32_e32 v4, s0 334; GFX8-NEXT: v_mov_b32_e32 v0, s8 335; GFX8-NEXT: v_mov_b32_e32 v5, s1 336; GFX8-NEXT: v_mov_b32_e32 v6, s2 337; GFX8-NEXT: v_mov_b32_e32 v7, s3 338; GFX8-NEXT: v_mov_b32_e32 v1, s9 339; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 340; GFX8-NEXT: s_endpgm 341; 342; EG-LABEL: constant_load_v4i64: 343; EG: ; %bb.0: ; %entry 344; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 345; EG-NEXT: TEX 0 @8 346; EG-NEXT: ALU 3, @13, KC0[CB0:0-32], KC1[] 347; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 348; EG-NEXT: TEX 0 @10 349; EG-NEXT: ALU 1, @17, KC0[CB0:0-32], KC1[] 350; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 351; EG-NEXT: CF_END 352; EG-NEXT: Fetch clause starting at 8: 353; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 354; EG-NEXT: Fetch clause starting at 10: 355; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 356; EG-NEXT: ALU clause starting at 12: 357; EG-NEXT: MOV * T0.X, KC0[2].Z, 358; EG-NEXT: ALU clause starting at 13: 359; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 360; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 361; EG-NEXT: LSHR * T2.X, PV.W, literal.x, 362; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 363; EG-NEXT: ALU clause starting at 17: 364; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 365; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 366; 367; GFX12-LABEL: constant_load_v4i64: 368; GFX12: ; %bb.0: ; %entry 369; GFX12-NEXT: s_load_b128 s[8:11], s[4:5], 0x24 370; GFX12-NEXT: s_wait_kmcnt 0x0 371; GFX12-NEXT: s_load_b256 s[0:7], s[10:11], 0x0 372; GFX12-NEXT: s_wait_kmcnt 0x0 373; GFX12-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s5 374; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v3, s7 375; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v5, s1 376; GFX12-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v7, s3 377; GFX12-NEXT: v_mov_b32_e32 v6, s2 378; GFX12-NEXT: s_clause 0x1 379; GFX12-NEXT: global_store_b128 v8, v[0:3], s[8:9] offset:16 380; GFX12-NEXT: global_store_b128 v8, v[4:7], s[8:9] 381; GFX12-NEXT: s_endpgm 382entry: 383 %ld = load <4 x i64>, ptr addrspace(4) %in 384 store <4 x i64> %ld, ptr addrspace(1) %out 385 ret void 386} 387 388define amdgpu_kernel void @constant_load_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 389; GFX6-LABEL: constant_load_v8i64: 390; GFX6: ; %bb.0: ; %entry 391; GFX6-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x9 392; GFX6-NEXT: s_waitcnt lgkmcnt(0) 393; GFX6-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 394; GFX6-NEXT: s_mov_b32 s19, 0xf000 395; GFX6-NEXT: s_mov_b32 s18, -1 396; GFX6-NEXT: s_waitcnt lgkmcnt(0) 397; GFX6-NEXT: v_mov_b32_e32 v0, s12 398; GFX6-NEXT: v_mov_b32_e32 v1, s13 399; GFX6-NEXT: v_mov_b32_e32 v2, s14 400; GFX6-NEXT: v_mov_b32_e32 v3, s15 401; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:48 402; GFX6-NEXT: s_waitcnt expcnt(0) 403; GFX6-NEXT: v_mov_b32_e32 v0, s8 404; GFX6-NEXT: v_mov_b32_e32 v1, s9 405; GFX6-NEXT: v_mov_b32_e32 v2, s10 406; GFX6-NEXT: v_mov_b32_e32 v3, s11 407; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:32 408; GFX6-NEXT: s_waitcnt expcnt(0) 409; GFX6-NEXT: v_mov_b32_e32 v0, s4 410; GFX6-NEXT: v_mov_b32_e32 v1, s5 411; GFX6-NEXT: v_mov_b32_e32 v2, s6 412; GFX6-NEXT: v_mov_b32_e32 v3, s7 413; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:16 414; GFX6-NEXT: s_waitcnt expcnt(0) 415; GFX6-NEXT: v_mov_b32_e32 v0, s0 416; GFX6-NEXT: v_mov_b32_e32 v1, s1 417; GFX6-NEXT: v_mov_b32_e32 v2, s2 418; GFX6-NEXT: v_mov_b32_e32 v3, s3 419; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 420; GFX6-NEXT: s_endpgm 421; 422; GFX7-LABEL: constant_load_v8i64: 423; GFX7: ; %bb.0: ; %entry 424; GFX7-NEXT: s_load_dwordx4 s[16:19], s[8:9], 0x0 425; GFX7-NEXT: s_waitcnt lgkmcnt(0) 426; GFX7-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 427; GFX7-NEXT: s_add_u32 s18, s16, 48 428; GFX7-NEXT: s_addc_u32 s19, s17, 0 429; GFX7-NEXT: v_mov_b32_e32 v6, s18 430; GFX7-NEXT: v_mov_b32_e32 v7, s19 431; GFX7-NEXT: s_waitcnt lgkmcnt(0) 432; GFX7-NEXT: v_mov_b32_e32 v0, s12 433; GFX7-NEXT: v_mov_b32_e32 v1, s13 434; GFX7-NEXT: v_mov_b32_e32 v2, s14 435; GFX7-NEXT: v_mov_b32_e32 v3, s15 436; GFX7-NEXT: v_mov_b32_e32 v4, s8 437; GFX7-NEXT: s_add_u32 s8, s16, 32 438; GFX7-NEXT: v_mov_b32_e32 v5, s9 439; GFX7-NEXT: flat_store_dwordx4 v[6:7], v[0:3] 440; GFX7-NEXT: s_addc_u32 s9, s17, 0 441; GFX7-NEXT: v_mov_b32_e32 v0, s8 442; GFX7-NEXT: v_mov_b32_e32 v6, s10 443; GFX7-NEXT: v_mov_b32_e32 v7, s11 444; GFX7-NEXT: v_mov_b32_e32 v1, s9 445; GFX7-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 446; GFX7-NEXT: v_mov_b32_e32 v0, s4 447; GFX7-NEXT: s_add_u32 s4, s16, 16 448; GFX7-NEXT: v_mov_b32_e32 v1, s5 449; GFX7-NEXT: s_addc_u32 s5, s17, 0 450; GFX7-NEXT: v_mov_b32_e32 v4, s4 451; GFX7-NEXT: v_mov_b32_e32 v2, s6 452; GFX7-NEXT: v_mov_b32_e32 v3, s7 453; GFX7-NEXT: v_mov_b32_e32 v5, s5 454; GFX7-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 455; GFX7-NEXT: v_mov_b32_e32 v4, s16 456; GFX7-NEXT: v_mov_b32_e32 v0, s0 457; GFX7-NEXT: v_mov_b32_e32 v1, s1 458; GFX7-NEXT: v_mov_b32_e32 v2, s2 459; GFX7-NEXT: v_mov_b32_e32 v3, s3 460; GFX7-NEXT: v_mov_b32_e32 v5, s17 461; GFX7-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 462; GFX7-NEXT: s_endpgm 463; 464; GFX8-LABEL: constant_load_v8i64: 465; GFX8: ; %bb.0: ; %entry 466; GFX8-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x24 467; GFX8-NEXT: s_waitcnt lgkmcnt(0) 468; GFX8-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 469; GFX8-NEXT: s_add_u32 s18, s16, 48 470; GFX8-NEXT: s_addc_u32 s19, s17, 0 471; GFX8-NEXT: v_mov_b32_e32 v6, s18 472; GFX8-NEXT: v_mov_b32_e32 v7, s19 473; GFX8-NEXT: s_waitcnt lgkmcnt(0) 474; GFX8-NEXT: v_mov_b32_e32 v0, s12 475; GFX8-NEXT: v_mov_b32_e32 v1, s13 476; GFX8-NEXT: v_mov_b32_e32 v2, s14 477; GFX8-NEXT: v_mov_b32_e32 v3, s15 478; GFX8-NEXT: v_mov_b32_e32 v4, s8 479; GFX8-NEXT: s_add_u32 s8, s16, 32 480; GFX8-NEXT: v_mov_b32_e32 v5, s9 481; GFX8-NEXT: flat_store_dwordx4 v[6:7], v[0:3] 482; GFX8-NEXT: s_addc_u32 s9, s17, 0 483; GFX8-NEXT: v_mov_b32_e32 v0, s8 484; GFX8-NEXT: v_mov_b32_e32 v6, s10 485; GFX8-NEXT: v_mov_b32_e32 v7, s11 486; GFX8-NEXT: v_mov_b32_e32 v1, s9 487; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 488; GFX8-NEXT: v_mov_b32_e32 v0, s4 489; GFX8-NEXT: s_add_u32 s4, s16, 16 490; GFX8-NEXT: v_mov_b32_e32 v1, s5 491; GFX8-NEXT: s_addc_u32 s5, s17, 0 492; GFX8-NEXT: v_mov_b32_e32 v4, s4 493; GFX8-NEXT: v_mov_b32_e32 v2, s6 494; GFX8-NEXT: v_mov_b32_e32 v3, s7 495; GFX8-NEXT: v_mov_b32_e32 v5, s5 496; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 497; GFX8-NEXT: v_mov_b32_e32 v4, s16 498; GFX8-NEXT: v_mov_b32_e32 v0, s0 499; GFX8-NEXT: v_mov_b32_e32 v1, s1 500; GFX8-NEXT: v_mov_b32_e32 v2, s2 501; GFX8-NEXT: v_mov_b32_e32 v3, s3 502; GFX8-NEXT: v_mov_b32_e32 v5, s17 503; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 504; GFX8-NEXT: s_endpgm 505; 506; EG-LABEL: constant_load_v8i64: 507; EG: ; %bb.0: ; %entry 508; EG-NEXT: ALU 0, @22, KC0[CB0:0-32], KC1[] 509; EG-NEXT: TEX 0 @14 510; EG-NEXT: ALU 3, @23, KC0[CB0:0-32], KC1[] 511; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 512; EG-NEXT: TEX 0 @16 513; EG-NEXT: ALU 3, @27, KC0[CB0:0-32], KC1[] 514; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 515; EG-NEXT: TEX 0 @18 516; EG-NEXT: ALU 3, @31, KC0[CB0:0-32], KC1[] 517; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 518; EG-NEXT: TEX 0 @20 519; EG-NEXT: ALU 1, @35, KC0[CB0:0-32], KC1[] 520; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 521; EG-NEXT: CF_END 522; EG-NEXT: Fetch clause starting at 14: 523; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 48, #1 524; EG-NEXT: Fetch clause starting at 16: 525; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 32, #1 526; EG-NEXT: Fetch clause starting at 18: 527; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 528; EG-NEXT: Fetch clause starting at 20: 529; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 530; EG-NEXT: ALU clause starting at 22: 531; EG-NEXT: MOV * T0.X, KC0[2].Z, 532; EG-NEXT: ALU clause starting at 23: 533; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 534; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 535; EG-NEXT: LSHR * T2.X, PV.W, literal.x, 536; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 537; EG-NEXT: ALU clause starting at 27: 538; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 539; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 540; EG-NEXT: LSHR * T2.X, PV.W, literal.x, 541; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 542; EG-NEXT: ALU clause starting at 31: 543; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 544; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 545; EG-NEXT: LSHR * T2.X, PV.W, literal.x, 546; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 547; EG-NEXT: ALU clause starting at 35: 548; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 549; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 550; 551; GFX12-LABEL: constant_load_v8i64: 552; GFX12: ; %bb.0: ; %entry 553; GFX12-NEXT: s_load_b128 s[16:19], s[4:5], 0x24 554; GFX12-NEXT: s_wait_kmcnt 0x0 555; GFX12-NEXT: s_load_b512 s[0:15], s[18:19], 0x0 556; GFX12-NEXT: s_wait_kmcnt 0x0 557; GFX12-NEXT: v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v1, s13 558; GFX12-NEXT: v_dual_mov_b32 v0, s12 :: v_dual_mov_b32 v3, s15 559; GFX12-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v5, s9 560; GFX12-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v7, s11 561; GFX12-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v9, s5 562; GFX12-NEXT: v_dual_mov_b32 v8, s4 :: v_dual_mov_b32 v11, s7 563; GFX12-NEXT: v_dual_mov_b32 v10, s6 :: v_dual_mov_b32 v13, s1 564; GFX12-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v15, s3 565; GFX12-NEXT: v_mov_b32_e32 v14, s2 566; GFX12-NEXT: s_clause 0x3 567; GFX12-NEXT: global_store_b128 v16, v[0:3], s[16:17] offset:48 568; GFX12-NEXT: global_store_b128 v16, v[4:7], s[16:17] offset:32 569; GFX12-NEXT: global_store_b128 v16, v[8:11], s[16:17] offset:16 570; GFX12-NEXT: global_store_b128 v16, v[12:15], s[16:17] 571; GFX12-NEXT: s_endpgm 572entry: 573 %ld = load <8 x i64>, ptr addrspace(4) %in 574 store <8 x i64> %ld, ptr addrspace(1) %out 575 ret void 576} 577 578define amdgpu_kernel void @constant_load_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 579; GFX6-LABEL: constant_load_v16i64: 580; GFX6: ; %bb.0: ; %entry 581; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 582; GFX6-NEXT: s_waitcnt lgkmcnt(0) 583; GFX6-NEXT: s_load_dwordx16 s[16:31], s[2:3], 0x10 584; GFX6-NEXT: s_mov_b32 s39, 0xf000 585; GFX6-NEXT: s_mov_b32 s38, -1 586; GFX6-NEXT: s_mov_b32 s36, s0 587; GFX6-NEXT: s_mov_b32 s37, s1 588; GFX6-NEXT: s_load_dwordx16 s[0:15], s[2:3], 0x0 589; GFX6-NEXT: s_waitcnt lgkmcnt(0) 590; GFX6-NEXT: v_mov_b32_e32 v0, s28 591; GFX6-NEXT: v_mov_b32_e32 v1, s29 592; GFX6-NEXT: v_mov_b32_e32 v2, s30 593; GFX6-NEXT: v_mov_b32_e32 v3, s31 594; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[36:39], 0 offset:112 595; GFX6-NEXT: s_waitcnt expcnt(0) 596; GFX6-NEXT: v_mov_b32_e32 v0, s24 597; GFX6-NEXT: v_mov_b32_e32 v1, s25 598; GFX6-NEXT: v_mov_b32_e32 v2, s26 599; GFX6-NEXT: v_mov_b32_e32 v3, s27 600; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[36:39], 0 offset:96 601; GFX6-NEXT: s_waitcnt expcnt(0) 602; GFX6-NEXT: v_mov_b32_e32 v0, s20 603; GFX6-NEXT: v_mov_b32_e32 v1, s21 604; GFX6-NEXT: v_mov_b32_e32 v2, s22 605; GFX6-NEXT: v_mov_b32_e32 v3, s23 606; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[36:39], 0 offset:80 607; GFX6-NEXT: s_waitcnt expcnt(0) 608; GFX6-NEXT: v_mov_b32_e32 v0, s16 609; GFX6-NEXT: v_mov_b32_e32 v1, s17 610; GFX6-NEXT: v_mov_b32_e32 v2, s18 611; GFX6-NEXT: v_mov_b32_e32 v3, s19 612; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[36:39], 0 offset:64 613; GFX6-NEXT: s_waitcnt expcnt(0) 614; GFX6-NEXT: v_mov_b32_e32 v0, s12 615; GFX6-NEXT: v_mov_b32_e32 v1, s13 616; GFX6-NEXT: v_mov_b32_e32 v2, s14 617; GFX6-NEXT: v_mov_b32_e32 v3, s15 618; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[36:39], 0 offset:48 619; GFX6-NEXT: s_waitcnt expcnt(0) 620; GFX6-NEXT: v_mov_b32_e32 v0, s8 621; GFX6-NEXT: v_mov_b32_e32 v1, s9 622; GFX6-NEXT: v_mov_b32_e32 v2, s10 623; GFX6-NEXT: v_mov_b32_e32 v3, s11 624; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[36:39], 0 offset:32 625; GFX6-NEXT: s_waitcnt expcnt(0) 626; GFX6-NEXT: v_mov_b32_e32 v0, s4 627; GFX6-NEXT: v_mov_b32_e32 v1, s5 628; GFX6-NEXT: v_mov_b32_e32 v2, s6 629; GFX6-NEXT: v_mov_b32_e32 v3, s7 630; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[36:39], 0 offset:16 631; GFX6-NEXT: s_waitcnt expcnt(0) 632; GFX6-NEXT: v_mov_b32_e32 v0, s0 633; GFX6-NEXT: v_mov_b32_e32 v1, s1 634; GFX6-NEXT: v_mov_b32_e32 v2, s2 635; GFX6-NEXT: v_mov_b32_e32 v3, s3 636; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[36:39], 0 637; GFX6-NEXT: s_endpgm 638; 639; GFX7-LABEL: constant_load_v16i64: 640; GFX7: ; %bb.0: ; %entry 641; GFX7-NEXT: s_load_dwordx4 s[16:19], s[8:9], 0x0 642; GFX7-NEXT: s_waitcnt lgkmcnt(0) 643; GFX7-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x10 644; GFX7-NEXT: s_waitcnt lgkmcnt(0) 645; GFX7-NEXT: v_mov_b32_e32 v0, s12 646; GFX7-NEXT: v_mov_b32_e32 v1, s13 647; GFX7-NEXT: v_mov_b32_e32 v2, s14 648; GFX7-NEXT: v_mov_b32_e32 v3, s15 649; GFX7-NEXT: v_mov_b32_e32 v4, s8 650; GFX7-NEXT: v_mov_b32_e32 v5, s9 651; GFX7-NEXT: v_mov_b32_e32 v6, s10 652; GFX7-NEXT: v_mov_b32_e32 v7, s11 653; GFX7-NEXT: v_mov_b32_e32 v8, s4 654; GFX7-NEXT: v_mov_b32_e32 v9, s5 655; GFX7-NEXT: v_mov_b32_e32 v10, s6 656; GFX7-NEXT: v_mov_b32_e32 v11, s7 657; GFX7-NEXT: v_mov_b32_e32 v12, s0 658; GFX7-NEXT: v_mov_b32_e32 v13, s1 659; GFX7-NEXT: v_mov_b32_e32 v14, s2 660; GFX7-NEXT: v_mov_b32_e32 v15, s3 661; GFX7-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 662; GFX7-NEXT: s_add_u32 s18, s16, 0x70 663; GFX7-NEXT: s_addc_u32 s19, s17, 0 664; GFX7-NEXT: v_mov_b32_e32 v16, s18 665; GFX7-NEXT: v_mov_b32_e32 v17, s19 666; GFX7-NEXT: s_add_u32 s18, s16, 0x60 667; GFX7-NEXT: flat_store_dwordx4 v[16:17], v[0:3] 668; GFX7-NEXT: s_addc_u32 s19, s17, 0 669; GFX7-NEXT: v_mov_b32_e32 v0, s18 670; GFX7-NEXT: v_mov_b32_e32 v1, s19 671; GFX7-NEXT: s_add_u32 s18, s16, 0x50 672; GFX7-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 673; GFX7-NEXT: s_addc_u32 s19, s17, 0 674; GFX7-NEXT: v_mov_b32_e32 v0, s18 675; GFX7-NEXT: v_mov_b32_e32 v1, s19 676; GFX7-NEXT: s_add_u32 s18, s16, 64 677; GFX7-NEXT: flat_store_dwordx4 v[0:1], v[8:11] 678; GFX7-NEXT: s_addc_u32 s19, s17, 0 679; GFX7-NEXT: v_mov_b32_e32 v0, s18 680; GFX7-NEXT: v_mov_b32_e32 v1, s19 681; GFX7-NEXT: flat_store_dwordx4 v[0:1], v[12:15] 682; GFX7-NEXT: s_waitcnt lgkmcnt(0) 683; GFX7-NEXT: v_mov_b32_e32 v0, s12 684; GFX7-NEXT: s_add_u32 s12, s16, 48 685; GFX7-NEXT: v_mov_b32_e32 v1, s13 686; GFX7-NEXT: s_addc_u32 s13, s17, 0 687; GFX7-NEXT: v_mov_b32_e32 v4, s12 688; GFX7-NEXT: v_mov_b32_e32 v2, s14 689; GFX7-NEXT: v_mov_b32_e32 v3, s15 690; GFX7-NEXT: v_mov_b32_e32 v5, s13 691; GFX7-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 692; GFX7-NEXT: s_nop 0 693; GFX7-NEXT: v_mov_b32_e32 v0, s8 694; GFX7-NEXT: s_add_u32 s8, s16, 32 695; GFX7-NEXT: v_mov_b32_e32 v1, s9 696; GFX7-NEXT: s_addc_u32 s9, s17, 0 697; GFX7-NEXT: v_mov_b32_e32 v4, s8 698; GFX7-NEXT: v_mov_b32_e32 v2, s10 699; GFX7-NEXT: v_mov_b32_e32 v3, s11 700; GFX7-NEXT: v_mov_b32_e32 v5, s9 701; GFX7-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 702; GFX7-NEXT: s_nop 0 703; GFX7-NEXT: v_mov_b32_e32 v0, s4 704; GFX7-NEXT: s_add_u32 s4, s16, 16 705; GFX7-NEXT: v_mov_b32_e32 v1, s5 706; GFX7-NEXT: s_addc_u32 s5, s17, 0 707; GFX7-NEXT: v_mov_b32_e32 v4, s4 708; GFX7-NEXT: v_mov_b32_e32 v2, s6 709; GFX7-NEXT: v_mov_b32_e32 v3, s7 710; GFX7-NEXT: v_mov_b32_e32 v5, s5 711; GFX7-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 712; GFX7-NEXT: v_mov_b32_e32 v4, s16 713; GFX7-NEXT: v_mov_b32_e32 v0, s0 714; GFX7-NEXT: v_mov_b32_e32 v1, s1 715; GFX7-NEXT: v_mov_b32_e32 v2, s2 716; GFX7-NEXT: v_mov_b32_e32 v3, s3 717; GFX7-NEXT: v_mov_b32_e32 v5, s17 718; GFX7-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 719; GFX7-NEXT: s_endpgm 720; 721; GFX8-LABEL: constant_load_v16i64: 722; GFX8: ; %bb.0: ; %entry 723; GFX8-NEXT: s_load_dwordx4 s[36:39], s[4:5], 0x24 724; GFX8-NEXT: s_waitcnt lgkmcnt(0) 725; GFX8-NEXT: s_load_dwordx16 s[16:31], s[38:39], 0x40 726; GFX8-NEXT: s_load_dwordx16 s[0:15], s[38:39], 0x0 727; GFX8-NEXT: s_add_u32 s34, s36, 0x70 728; GFX8-NEXT: s_addc_u32 s35, s37, 0 729; GFX8-NEXT: v_mov_b32_e32 v5, s34 730; GFX8-NEXT: v_mov_b32_e32 v6, s35 731; GFX8-NEXT: s_waitcnt lgkmcnt(0) 732; GFX8-NEXT: v_mov_b32_e32 v0, s28 733; GFX8-NEXT: v_mov_b32_e32 v1, s29 734; GFX8-NEXT: v_mov_b32_e32 v2, s30 735; GFX8-NEXT: v_mov_b32_e32 v3, s31 736; GFX8-NEXT: v_mov_b32_e32 v4, s24 737; GFX8-NEXT: s_add_u32 s24, s36, 0x60 738; GFX8-NEXT: flat_store_dwordx4 v[5:6], v[0:3] 739; GFX8-NEXT: v_mov_b32_e32 v5, s25 740; GFX8-NEXT: s_addc_u32 s25, s37, 0 741; GFX8-NEXT: v_mov_b32_e32 v0, s24 742; GFX8-NEXT: v_mov_b32_e32 v6, s26 743; GFX8-NEXT: v_mov_b32_e32 v7, s27 744; GFX8-NEXT: v_mov_b32_e32 v1, s25 745; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 746; GFX8-NEXT: v_mov_b32_e32 v0, s20 747; GFX8-NEXT: s_add_u32 s20, s36, 0x50 748; GFX8-NEXT: v_mov_b32_e32 v1, s21 749; GFX8-NEXT: s_addc_u32 s21, s37, 0 750; GFX8-NEXT: v_mov_b32_e32 v4, s20 751; GFX8-NEXT: v_mov_b32_e32 v2, s22 752; GFX8-NEXT: v_mov_b32_e32 v3, s23 753; GFX8-NEXT: v_mov_b32_e32 v5, s21 754; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 755; GFX8-NEXT: s_nop 0 756; GFX8-NEXT: v_mov_b32_e32 v0, s16 757; GFX8-NEXT: s_add_u32 s16, s36, 64 758; GFX8-NEXT: v_mov_b32_e32 v1, s17 759; GFX8-NEXT: s_addc_u32 s17, s37, 0 760; GFX8-NEXT: v_mov_b32_e32 v4, s16 761; GFX8-NEXT: v_mov_b32_e32 v2, s18 762; GFX8-NEXT: v_mov_b32_e32 v3, s19 763; GFX8-NEXT: v_mov_b32_e32 v5, s17 764; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 765; GFX8-NEXT: s_nop 0 766; GFX8-NEXT: v_mov_b32_e32 v0, s12 767; GFX8-NEXT: s_add_u32 s12, s36, 48 768; GFX8-NEXT: v_mov_b32_e32 v1, s13 769; GFX8-NEXT: s_addc_u32 s13, s37, 0 770; GFX8-NEXT: v_mov_b32_e32 v4, s12 771; GFX8-NEXT: v_mov_b32_e32 v2, s14 772; GFX8-NEXT: v_mov_b32_e32 v3, s15 773; GFX8-NEXT: v_mov_b32_e32 v5, s13 774; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 775; GFX8-NEXT: s_nop 0 776; GFX8-NEXT: v_mov_b32_e32 v0, s8 777; GFX8-NEXT: s_add_u32 s8, s36, 32 778; GFX8-NEXT: v_mov_b32_e32 v1, s9 779; GFX8-NEXT: s_addc_u32 s9, s37, 0 780; GFX8-NEXT: v_mov_b32_e32 v4, s8 781; GFX8-NEXT: v_mov_b32_e32 v2, s10 782; GFX8-NEXT: v_mov_b32_e32 v3, s11 783; GFX8-NEXT: v_mov_b32_e32 v5, s9 784; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 785; GFX8-NEXT: s_nop 0 786; GFX8-NEXT: v_mov_b32_e32 v0, s4 787; GFX8-NEXT: s_add_u32 s4, s36, 16 788; GFX8-NEXT: v_mov_b32_e32 v1, s5 789; GFX8-NEXT: s_addc_u32 s5, s37, 0 790; GFX8-NEXT: v_mov_b32_e32 v4, s4 791; GFX8-NEXT: v_mov_b32_e32 v2, s6 792; GFX8-NEXT: v_mov_b32_e32 v3, s7 793; GFX8-NEXT: v_mov_b32_e32 v5, s5 794; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 795; GFX8-NEXT: v_mov_b32_e32 v4, s36 796; GFX8-NEXT: v_mov_b32_e32 v0, s0 797; GFX8-NEXT: v_mov_b32_e32 v1, s1 798; GFX8-NEXT: v_mov_b32_e32 v2, s2 799; GFX8-NEXT: v_mov_b32_e32 v3, s3 800; GFX8-NEXT: v_mov_b32_e32 v5, s37 801; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 802; GFX8-NEXT: s_endpgm 803; 804; EG-LABEL: constant_load_v16i64: 805; EG: ; %bb.0: ; %entry 806; EG-NEXT: ALU 0, @42, KC0[CB0:0-32], KC1[] 807; EG-NEXT: TEX 0 @26 808; EG-NEXT: ALU 3, @43, KC0[CB0:0-32], KC1[] 809; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 810; EG-NEXT: TEX 0 @28 811; EG-NEXT: ALU 3, @47, KC0[CB0:0-32], KC1[] 812; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 813; EG-NEXT: TEX 0 @30 814; EG-NEXT: ALU 3, @51, KC0[CB0:0-32], KC1[] 815; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 816; EG-NEXT: TEX 0 @32 817; EG-NEXT: ALU 3, @55, KC0[CB0:0-32], KC1[] 818; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 819; EG-NEXT: TEX 0 @34 820; EG-NEXT: ALU 3, @59, KC0[CB0:0-32], KC1[] 821; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 822; EG-NEXT: TEX 0 @36 823; EG-NEXT: ALU 3, @63, KC0[CB0:0-32], KC1[] 824; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 825; EG-NEXT: TEX 0 @38 826; EG-NEXT: ALU 3, @67, KC0[CB0:0-32], KC1[] 827; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 828; EG-NEXT: TEX 0 @40 829; EG-NEXT: ALU 1, @71, KC0[CB0:0-32], KC1[] 830; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 831; EG-NEXT: CF_END 832; EG-NEXT: Fetch clause starting at 26: 833; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 112, #1 834; EG-NEXT: Fetch clause starting at 28: 835; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 96, #1 836; EG-NEXT: Fetch clause starting at 30: 837; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 80, #1 838; EG-NEXT: Fetch clause starting at 32: 839; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 64, #1 840; EG-NEXT: Fetch clause starting at 34: 841; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 48, #1 842; EG-NEXT: Fetch clause starting at 36: 843; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 32, #1 844; EG-NEXT: Fetch clause starting at 38: 845; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 846; EG-NEXT: Fetch clause starting at 40: 847; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 848; EG-NEXT: ALU clause starting at 42: 849; EG-NEXT: MOV * T0.X, KC0[2].Z, 850; EG-NEXT: ALU clause starting at 43: 851; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 852; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 853; EG-NEXT: LSHR * T2.X, PV.W, literal.x, 854; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 855; EG-NEXT: ALU clause starting at 47: 856; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 857; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) 858; EG-NEXT: LSHR * T2.X, PV.W, literal.x, 859; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 860; EG-NEXT: ALU clause starting at 51: 861; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 862; EG-NEXT: 80(1.121039e-43), 0(0.000000e+00) 863; EG-NEXT: LSHR * T2.X, PV.W, literal.x, 864; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 865; EG-NEXT: ALU clause starting at 55: 866; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 867; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00) 868; EG-NEXT: LSHR * T2.X, PV.W, literal.x, 869; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 870; EG-NEXT: ALU clause starting at 59: 871; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 872; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 873; EG-NEXT: LSHR * T2.X, PV.W, literal.x, 874; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 875; EG-NEXT: ALU clause starting at 63: 876; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 877; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 878; EG-NEXT: LSHR * T2.X, PV.W, literal.x, 879; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 880; EG-NEXT: ALU clause starting at 67: 881; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 882; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 883; EG-NEXT: LSHR * T2.X, PV.W, literal.x, 884; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 885; EG-NEXT: ALU clause starting at 71: 886; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 887; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 888; 889; GFX12-LABEL: constant_load_v16i64: 890; GFX12: ; %bb.0: ; %entry 891; GFX12-NEXT: s_load_b128 s[36:39], s[4:5], 0x24 892; GFX12-NEXT: s_wait_kmcnt 0x0 893; GFX12-NEXT: s_clause 0x1 894; GFX12-NEXT: s_load_b512 s[16:31], s[38:39], 0x40 895; GFX12-NEXT: s_load_b512 s[0:15], s[38:39], 0x0 896; GFX12-NEXT: s_wait_kmcnt 0x0 897; GFX12-NEXT: v_dual_mov_b32 v32, 0 :: v_dual_mov_b32 v1, s29 898; GFX12-NEXT: v_dual_mov_b32 v0, s28 :: v_dual_mov_b32 v3, s31 899; GFX12-NEXT: v_dual_mov_b32 v2, s30 :: v_dual_mov_b32 v5, s25 900; GFX12-NEXT: v_dual_mov_b32 v4, s24 :: v_dual_mov_b32 v7, s27 901; GFX12-NEXT: v_dual_mov_b32 v6, s26 :: v_dual_mov_b32 v9, s21 902; GFX12-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v11, s23 903; GFX12-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v13, s17 904; GFX12-NEXT: v_dual_mov_b32 v12, s16 :: v_dual_mov_b32 v15, s19 905; GFX12-NEXT: v_dual_mov_b32 v14, s18 :: v_dual_mov_b32 v17, s13 906; GFX12-NEXT: v_dual_mov_b32 v16, s12 :: v_dual_mov_b32 v19, s15 907; GFX12-NEXT: v_dual_mov_b32 v18, s14 :: v_dual_mov_b32 v21, s9 908; GFX12-NEXT: v_dual_mov_b32 v20, s8 :: v_dual_mov_b32 v23, s11 909; GFX12-NEXT: v_dual_mov_b32 v22, s10 :: v_dual_mov_b32 v25, s5 910; GFX12-NEXT: v_dual_mov_b32 v24, s4 :: v_dual_mov_b32 v27, s7 911; GFX12-NEXT: v_dual_mov_b32 v26, s6 :: v_dual_mov_b32 v29, s1 912; GFX12-NEXT: v_dual_mov_b32 v28, s0 :: v_dual_mov_b32 v31, s3 913; GFX12-NEXT: v_mov_b32_e32 v30, s2 914; GFX12-NEXT: s_clause 0x7 915; GFX12-NEXT: global_store_b128 v32, v[0:3], s[36:37] offset:112 916; GFX12-NEXT: global_store_b128 v32, v[4:7], s[36:37] offset:96 917; GFX12-NEXT: global_store_b128 v32, v[8:11], s[36:37] offset:80 918; GFX12-NEXT: global_store_b128 v32, v[12:15], s[36:37] offset:64 919; GFX12-NEXT: global_store_b128 v32, v[16:19], s[36:37] offset:48 920; GFX12-NEXT: global_store_b128 v32, v[20:23], s[36:37] offset:32 921; GFX12-NEXT: global_store_b128 v32, v[24:27], s[36:37] offset:16 922; GFX12-NEXT: global_store_b128 v32, v[28:31], s[36:37] 923; GFX12-NEXT: s_endpgm 924entry: 925 %ld = load <16 x i64>, ptr addrspace(4) %in 926 store <16 x i64> %ld, ptr addrspace(1) %out 927 ret void 928} 929 930attributes #0 = { nounwind } 931