1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s 5 6define amdgpu_kernel void @v_insert_v64i32_37(ptr addrspace(1) %ptr.in, ptr addrspace(1) %ptr.out) #0 { 7; GCN-LABEL: v_insert_v64i32_37: 8; GCN: ; %bb.0: 9; GCN-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 10; GCN-NEXT: v_lshlrev_b32_e32 v64, 8, v0 11; GCN-NEXT: s_waitcnt lgkmcnt(0) 12; GCN-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] 13; GCN-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:16 14; GCN-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:32 15; GCN-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:48 16; GCN-NEXT: global_load_dwordx4 v[16:19], v64, s[0:1] offset:64 17; GCN-NEXT: global_load_dwordx4 v[20:23], v64, s[0:1] offset:80 18; GCN-NEXT: global_load_dwordx4 v[24:27], v64, s[0:1] offset:96 19; GCN-NEXT: global_load_dwordx4 v[28:31], v64, s[0:1] offset:112 20; GCN-NEXT: global_load_dwordx4 v[32:35], v64, s[0:1] offset:128 21; GCN-NEXT: global_load_dwordx4 v[36:39], v64, s[0:1] offset:144 22; GCN-NEXT: global_load_dwordx4 v[40:43], v64, s[0:1] offset:160 23; GCN-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] offset:176 24; GCN-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:192 25; GCN-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:208 26; GCN-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:224 27; GCN-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:240 28; GCN-NEXT: s_waitcnt vmcnt(6) 29; GCN-NEXT: v_mov_b32_e32 v37, 0x3e7 30; GCN-NEXT: global_store_dwordx4 v64, v[0:3], s[2:3] 31; GCN-NEXT: global_store_dwordx4 v64, v[4:7], s[2:3] offset:16 32; GCN-NEXT: global_store_dwordx4 v64, v[8:11], s[2:3] offset:32 33; GCN-NEXT: global_store_dwordx4 v64, v[12:15], s[2:3] offset:48 34; GCN-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:64 35; GCN-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:80 36; GCN-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:96 37; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:112 38; GCN-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] offset:128 39; GCN-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:144 40; GCN-NEXT: s_waitcnt vmcnt(15) 41; GCN-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:160 42; GCN-NEXT: s_waitcnt vmcnt(15) 43; GCN-NEXT: global_store_dwordx4 v64, v[44:47], s[2:3] offset:176 44; GCN-NEXT: s_waitcnt vmcnt(15) 45; GCN-NEXT: global_store_dwordx4 v64, v[48:51], s[2:3] offset:192 46; GCN-NEXT: s_waitcnt vmcnt(15) 47; GCN-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:208 48; GCN-NEXT: s_waitcnt vmcnt(15) 49; GCN-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:224 50; GCN-NEXT: s_waitcnt vmcnt(15) 51; GCN-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:240 52; GCN-NEXT: s_endpgm 53; 54; GFX10-LABEL: v_insert_v64i32_37: 55; GFX10: ; %bb.0: 56; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 57; GFX10-NEXT: v_lshlrev_b32_e32 v64, 8, v0 58; GFX10-NEXT: s_waitcnt lgkmcnt(0) 59; GFX10-NEXT: s_clause 0xf 60; GFX10-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] 61; GFX10-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:16 62; GFX10-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:32 63; GFX10-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:48 64; GFX10-NEXT: global_load_dwordx4 v[16:19], v64, s[0:1] offset:64 65; GFX10-NEXT: global_load_dwordx4 v[20:23], v64, s[0:1] offset:80 66; GFX10-NEXT: global_load_dwordx4 v[24:27], v64, s[0:1] offset:96 67; GFX10-NEXT: global_load_dwordx4 v[28:31], v64, s[0:1] offset:112 68; GFX10-NEXT: global_load_dwordx4 v[32:35], v64, s[0:1] offset:128 69; GFX10-NEXT: global_load_dwordx4 v[36:39], v64, s[0:1] offset:144 70; GFX10-NEXT: global_load_dwordx4 v[40:43], v64, s[0:1] offset:160 71; GFX10-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] offset:176 72; GFX10-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:192 73; GFX10-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:208 74; GFX10-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:224 75; GFX10-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:240 76; GFX10-NEXT: s_waitcnt vmcnt(6) 77; GFX10-NEXT: v_mov_b32_e32 v37, 0x3e7 78; GFX10-NEXT: global_store_dwordx4 v64, v[0:3], s[2:3] 79; GFX10-NEXT: global_store_dwordx4 v64, v[4:7], s[2:3] offset:16 80; GFX10-NEXT: global_store_dwordx4 v64, v[8:11], s[2:3] offset:32 81; GFX10-NEXT: global_store_dwordx4 v64, v[12:15], s[2:3] offset:48 82; GFX10-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:64 83; GFX10-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:80 84; GFX10-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:96 85; GFX10-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:112 86; GFX10-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] offset:128 87; GFX10-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:144 88; GFX10-NEXT: s_waitcnt vmcnt(5) 89; GFX10-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:160 90; GFX10-NEXT: s_waitcnt vmcnt(4) 91; GFX10-NEXT: global_store_dwordx4 v64, v[44:47], s[2:3] offset:176 92; GFX10-NEXT: s_waitcnt vmcnt(3) 93; GFX10-NEXT: global_store_dwordx4 v64, v[48:51], s[2:3] offset:192 94; GFX10-NEXT: s_waitcnt vmcnt(2) 95; GFX10-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:208 96; GFX10-NEXT: s_waitcnt vmcnt(1) 97; GFX10-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:224 98; GFX10-NEXT: s_waitcnt vmcnt(0) 99; GFX10-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:240 100; GFX10-NEXT: s_endpgm 101; 102; GFX11-LABEL: v_insert_v64i32_37: 103; GFX11: ; %bb.0: 104; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 105; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 106; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 107; GFX11-NEXT: v_lshlrev_b32_e32 v64, 8, v0 108; GFX11-NEXT: s_waitcnt lgkmcnt(0) 109; GFX11-NEXT: s_clause 0xf 110; GFX11-NEXT: global_load_b128 v[0:3], v64, s[0:1] 111; GFX11-NEXT: global_load_b128 v[4:7], v64, s[0:1] offset:16 112; GFX11-NEXT: global_load_b128 v[8:11], v64, s[0:1] offset:32 113; GFX11-NEXT: global_load_b128 v[12:15], v64, s[0:1] offset:48 114; GFX11-NEXT: global_load_b128 v[16:19], v64, s[0:1] offset:64 115; GFX11-NEXT: global_load_b128 v[20:23], v64, s[0:1] offset:80 116; GFX11-NEXT: global_load_b128 v[24:27], v64, s[0:1] offset:96 117; GFX11-NEXT: global_load_b128 v[28:31], v64, s[0:1] offset:112 118; GFX11-NEXT: global_load_b128 v[32:35], v64, s[0:1] offset:128 119; GFX11-NEXT: global_load_b128 v[36:39], v64, s[0:1] offset:144 120; GFX11-NEXT: global_load_b128 v[40:43], v64, s[0:1] offset:160 121; GFX11-NEXT: global_load_b128 v[44:47], v64, s[0:1] offset:176 122; GFX11-NEXT: global_load_b128 v[48:51], v64, s[0:1] offset:192 123; GFX11-NEXT: global_load_b128 v[52:55], v64, s[0:1] offset:208 124; GFX11-NEXT: global_load_b128 v[56:59], v64, s[0:1] offset:224 125; GFX11-NEXT: global_load_b128 v[60:63], v64, s[0:1] offset:240 126; GFX11-NEXT: s_waitcnt vmcnt(6) 127; GFX11-NEXT: v_mov_b32_e32 v37, 0x3e7 128; GFX11-NEXT: s_clause 0x9 129; GFX11-NEXT: global_store_b128 v64, v[0:3], s[2:3] 130; GFX11-NEXT: global_store_b128 v64, v[4:7], s[2:3] offset:16 131; GFX11-NEXT: global_store_b128 v64, v[8:11], s[2:3] offset:32 132; GFX11-NEXT: global_store_b128 v64, v[12:15], s[2:3] offset:48 133; GFX11-NEXT: global_store_b128 v64, v[16:19], s[2:3] offset:64 134; GFX11-NEXT: global_store_b128 v64, v[20:23], s[2:3] offset:80 135; GFX11-NEXT: global_store_b128 v64, v[24:27], s[2:3] offset:96 136; GFX11-NEXT: global_store_b128 v64, v[28:31], s[2:3] offset:112 137; GFX11-NEXT: global_store_b128 v64, v[32:35], s[2:3] offset:128 138; GFX11-NEXT: global_store_b128 v64, v[36:39], s[2:3] offset:144 139; GFX11-NEXT: s_waitcnt vmcnt(5) 140; GFX11-NEXT: global_store_b128 v64, v[40:43], s[2:3] offset:160 141; GFX11-NEXT: s_waitcnt vmcnt(4) 142; GFX11-NEXT: global_store_b128 v64, v[44:47], s[2:3] offset:176 143; GFX11-NEXT: s_waitcnt vmcnt(3) 144; GFX11-NEXT: global_store_b128 v64, v[48:51], s[2:3] offset:192 145; GFX11-NEXT: s_waitcnt vmcnt(2) 146; GFX11-NEXT: global_store_b128 v64, v[52:55], s[2:3] offset:208 147; GFX11-NEXT: s_waitcnt vmcnt(1) 148; GFX11-NEXT: global_store_b128 v64, v[56:59], s[2:3] offset:224 149; GFX11-NEXT: s_waitcnt vmcnt(0) 150; GFX11-NEXT: global_store_b128 v64, v[60:63], s[2:3] offset:240 151; GFX11-NEXT: s_endpgm 152 %id = call i32 @llvm.amdgcn.workitem.id.x() 153 %gep.in = getelementptr <64 x i32>, ptr addrspace(1) %ptr.in, i32 %id 154 %vec = load <64 x i32>, ptr addrspace(1) %gep.in 155 %insert = insertelement <64 x i32> %vec, i32 999, i32 37 156 %gep.out = getelementptr <64 x i32>, ptr addrspace(1) %ptr.out, i32 %id 157 store <64 x i32> %insert, ptr addrspace(1) %gep.out 158 ret void 159} 160 161declare i32 @llvm.amdgcn.workitem.id.x() #1 162 163attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="1,10" } 164attributes #1 = { nounwind readnone speculatable willreturn } 165