1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s 3 4; Check lowering of some large insertelement that use the stack 5; instead of register indexing. 6 7define amdgpu_kernel void @v_insert_v64i32_varidx(ptr addrspace(1) %out.ptr, ptr addrspace(1) %ptr, i32 %val, i32 %idx) #0 { 8; GCN-LABEL: v_insert_v64i32_varidx: 9; GCN: ; %bb.0: 10; GCN-NEXT: s_load_dwordx4 s[20:23], s[8:9], 0x0 11; GCN-NEXT: s_load_dwordx2 s[24:25], s[8:9], 0x10 12; GCN-NEXT: s_add_u32 s0, s0, s15 13; GCN-NEXT: s_addc_u32 s1, s1, 0 14; GCN-NEXT: v_mov_b32_e32 v64, 0 15; GCN-NEXT: s_waitcnt lgkmcnt(0) 16; GCN-NEXT: s_load_dwordx16 s[36:51], s[22:23], 0x0 17; GCN-NEXT: s_load_dwordx16 s[52:67], s[22:23], 0x40 18; GCN-NEXT: s_load_dwordx16 s[4:19], s[22:23], 0x80 19; GCN-NEXT: s_waitcnt lgkmcnt(0) 20; GCN-NEXT: v_mov_b32_e32 v0, s36 21; GCN-NEXT: v_mov_b32_e32 v1, s37 22; GCN-NEXT: v_mov_b32_e32 v2, s38 23; GCN-NEXT: v_mov_b32_e32 v3, s39 24; GCN-NEXT: v_mov_b32_e32 v4, s40 25; GCN-NEXT: v_mov_b32_e32 v5, s41 26; GCN-NEXT: v_mov_b32_e32 v6, s42 27; GCN-NEXT: v_mov_b32_e32 v7, s43 28; GCN-NEXT: v_mov_b32_e32 v8, s44 29; GCN-NEXT: v_mov_b32_e32 v9, s45 30; GCN-NEXT: v_mov_b32_e32 v10, s46 31; GCN-NEXT: v_mov_b32_e32 v11, s47 32; GCN-NEXT: v_mov_b32_e32 v12, s48 33; GCN-NEXT: v_mov_b32_e32 v13, s49 34; GCN-NEXT: v_mov_b32_e32 v14, s50 35; GCN-NEXT: v_mov_b32_e32 v15, s51 36; GCN-NEXT: s_load_dwordx16 s[36:51], s[22:23], 0xc0 37; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 38; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:4 39; GCN-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8 40; GCN-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:12 41; GCN-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:16 42; GCN-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:20 43; GCN-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:24 44; GCN-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:28 45; GCN-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:32 46; GCN-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:36 47; GCN-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:40 48; GCN-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:44 49; GCN-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:48 50; GCN-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:52 51; GCN-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:56 52; GCN-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:60 53; GCN-NEXT: v_mov_b32_e32 v0, s52 54; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:64 55; GCN-NEXT: v_mov_b32_e32 v0, s53 56; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:68 57; GCN-NEXT: v_mov_b32_e32 v0, s54 58; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:72 59; GCN-NEXT: v_mov_b32_e32 v0, s55 60; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:76 61; GCN-NEXT: v_mov_b32_e32 v0, s56 62; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:80 63; GCN-NEXT: v_mov_b32_e32 v0, s57 64; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:84 65; GCN-NEXT: v_mov_b32_e32 v0, s58 66; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:88 67; GCN-NEXT: v_mov_b32_e32 v0, s59 68; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:92 69; GCN-NEXT: v_mov_b32_e32 v0, s60 70; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:96 71; GCN-NEXT: v_mov_b32_e32 v0, s61 72; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:100 73; GCN-NEXT: v_mov_b32_e32 v0, s62 74; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:104 75; GCN-NEXT: v_mov_b32_e32 v0, s63 76; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:108 77; GCN-NEXT: v_mov_b32_e32 v0, s64 78; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:112 79; GCN-NEXT: v_mov_b32_e32 v0, s65 80; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:116 81; GCN-NEXT: v_mov_b32_e32 v0, s66 82; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:120 83; GCN-NEXT: v_mov_b32_e32 v0, s67 84; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:124 85; GCN-NEXT: v_mov_b32_e32 v0, s4 86; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128 87; GCN-NEXT: v_mov_b32_e32 v0, s5 88; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:132 89; GCN-NEXT: v_mov_b32_e32 v0, s6 90; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:136 91; GCN-NEXT: v_mov_b32_e32 v0, s7 92; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:140 93; GCN-NEXT: v_mov_b32_e32 v0, s8 94; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:144 95; GCN-NEXT: v_mov_b32_e32 v0, s9 96; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:148 97; GCN-NEXT: v_mov_b32_e32 v0, s10 98; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:152 99; GCN-NEXT: v_mov_b32_e32 v0, s11 100; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:156 101; GCN-NEXT: v_mov_b32_e32 v0, s12 102; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:160 103; GCN-NEXT: v_mov_b32_e32 v0, s13 104; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:164 105; GCN-NEXT: v_mov_b32_e32 v0, s14 106; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:168 107; GCN-NEXT: v_mov_b32_e32 v0, s15 108; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:172 109; GCN-NEXT: v_mov_b32_e32 v0, s16 110; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:176 111; GCN-NEXT: v_mov_b32_e32 v0, s17 112; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:180 113; GCN-NEXT: v_mov_b32_e32 v0, s18 114; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:184 115; GCN-NEXT: v_mov_b32_e32 v0, s19 116; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:188 117; GCN-NEXT: s_waitcnt lgkmcnt(0) 118; GCN-NEXT: v_mov_b32_e32 v0, s36 119; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:192 120; GCN-NEXT: v_mov_b32_e32 v0, s37 121; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:196 122; GCN-NEXT: v_mov_b32_e32 v0, s38 123; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:200 124; GCN-NEXT: v_mov_b32_e32 v0, s39 125; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:204 126; GCN-NEXT: v_mov_b32_e32 v0, s40 127; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:208 128; GCN-NEXT: v_mov_b32_e32 v0, s41 129; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:212 130; GCN-NEXT: v_mov_b32_e32 v0, s42 131; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:216 132; GCN-NEXT: v_mov_b32_e32 v0, s43 133; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:220 134; GCN-NEXT: v_mov_b32_e32 v0, s44 135; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:224 136; GCN-NEXT: v_mov_b32_e32 v0, s45 137; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:228 138; GCN-NEXT: v_mov_b32_e32 v0, s46 139; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:232 140; GCN-NEXT: v_mov_b32_e32 v0, s47 141; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:236 142; GCN-NEXT: v_mov_b32_e32 v0, s48 143; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:240 144; GCN-NEXT: v_mov_b32_e32 v0, s49 145; GCN-NEXT: s_and_b32 s4, s25, 63 146; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:244 147; GCN-NEXT: v_mov_b32_e32 v0, s50 148; GCN-NEXT: s_lshl_b32 s4, s4, 2 149; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:248 150; GCN-NEXT: v_mov_b32_e32 v0, s51 151; GCN-NEXT: s_add_u32 s4, 0, s4 152; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:252 153; GCN-NEXT: v_mov_b32_e32 v0, s24 154; GCN-NEXT: v_mov_b32_e32 v1, s4 155; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 156; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 157; GCN-NEXT: s_nop 0 158; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 159; GCN-NEXT: buffer_load_dword v2, off, s[0:3], 0 offset:8 160; GCN-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:12 161; GCN-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:16 162; GCN-NEXT: buffer_load_dword v5, off, s[0:3], 0 offset:20 163; GCN-NEXT: buffer_load_dword v6, off, s[0:3], 0 offset:24 164; GCN-NEXT: buffer_load_dword v7, off, s[0:3], 0 offset:28 165; GCN-NEXT: buffer_load_dword v8, off, s[0:3], 0 offset:32 166; GCN-NEXT: buffer_load_dword v9, off, s[0:3], 0 offset:36 167; GCN-NEXT: buffer_load_dword v10, off, s[0:3], 0 offset:40 168; GCN-NEXT: buffer_load_dword v11, off, s[0:3], 0 offset:44 169; GCN-NEXT: buffer_load_dword v12, off, s[0:3], 0 offset:48 170; GCN-NEXT: buffer_load_dword v13, off, s[0:3], 0 offset:52 171; GCN-NEXT: buffer_load_dword v14, off, s[0:3], 0 offset:56 172; GCN-NEXT: buffer_load_dword v15, off, s[0:3], 0 offset:60 173; GCN-NEXT: buffer_load_dword v16, off, s[0:3], 0 offset:64 174; GCN-NEXT: buffer_load_dword v17, off, s[0:3], 0 offset:68 175; GCN-NEXT: buffer_load_dword v18, off, s[0:3], 0 offset:72 176; GCN-NEXT: buffer_load_dword v19, off, s[0:3], 0 offset:76 177; GCN-NEXT: buffer_load_dword v20, off, s[0:3], 0 offset:80 178; GCN-NEXT: buffer_load_dword v21, off, s[0:3], 0 offset:84 179; GCN-NEXT: buffer_load_dword v22, off, s[0:3], 0 offset:88 180; GCN-NEXT: buffer_load_dword v23, off, s[0:3], 0 offset:92 181; GCN-NEXT: buffer_load_dword v24, off, s[0:3], 0 offset:96 182; GCN-NEXT: buffer_load_dword v25, off, s[0:3], 0 offset:100 183; GCN-NEXT: buffer_load_dword v26, off, s[0:3], 0 offset:104 184; GCN-NEXT: buffer_load_dword v27, off, s[0:3], 0 offset:108 185; GCN-NEXT: buffer_load_dword v28, off, s[0:3], 0 offset:112 186; GCN-NEXT: buffer_load_dword v29, off, s[0:3], 0 offset:116 187; GCN-NEXT: buffer_load_dword v30, off, s[0:3], 0 offset:120 188; GCN-NEXT: buffer_load_dword v31, off, s[0:3], 0 offset:124 189; GCN-NEXT: buffer_load_dword v32, off, s[0:3], 0 offset:128 190; GCN-NEXT: buffer_load_dword v33, off, s[0:3], 0 offset:132 191; GCN-NEXT: buffer_load_dword v34, off, s[0:3], 0 offset:136 192; GCN-NEXT: buffer_load_dword v35, off, s[0:3], 0 offset:140 193; GCN-NEXT: buffer_load_dword v36, off, s[0:3], 0 offset:144 194; GCN-NEXT: buffer_load_dword v37, off, s[0:3], 0 offset:148 195; GCN-NEXT: buffer_load_dword v38, off, s[0:3], 0 offset:152 196; GCN-NEXT: buffer_load_dword v39, off, s[0:3], 0 offset:156 197; GCN-NEXT: buffer_load_dword v40, off, s[0:3], 0 offset:160 198; GCN-NEXT: buffer_load_dword v41, off, s[0:3], 0 offset:164 199; GCN-NEXT: buffer_load_dword v42, off, s[0:3], 0 offset:168 200; GCN-NEXT: buffer_load_dword v43, off, s[0:3], 0 offset:172 201; GCN-NEXT: buffer_load_dword v44, off, s[0:3], 0 offset:176 202; GCN-NEXT: buffer_load_dword v45, off, s[0:3], 0 offset:180 203; GCN-NEXT: buffer_load_dword v46, off, s[0:3], 0 offset:184 204; GCN-NEXT: buffer_load_dword v47, off, s[0:3], 0 offset:188 205; GCN-NEXT: buffer_load_dword v48, off, s[0:3], 0 offset:192 206; GCN-NEXT: buffer_load_dword v49, off, s[0:3], 0 offset:196 207; GCN-NEXT: buffer_load_dword v50, off, s[0:3], 0 offset:200 208; GCN-NEXT: buffer_load_dword v51, off, s[0:3], 0 offset:204 209; GCN-NEXT: buffer_load_dword v52, off, s[0:3], 0 offset:208 210; GCN-NEXT: buffer_load_dword v53, off, s[0:3], 0 offset:212 211; GCN-NEXT: buffer_load_dword v54, off, s[0:3], 0 offset:216 212; GCN-NEXT: buffer_load_dword v55, off, s[0:3], 0 offset:220 213; GCN-NEXT: buffer_load_dword v56, off, s[0:3], 0 offset:224 214; GCN-NEXT: buffer_load_dword v57, off, s[0:3], 0 offset:228 215; GCN-NEXT: buffer_load_dword v58, off, s[0:3], 0 offset:232 216; GCN-NEXT: buffer_load_dword v59, off, s[0:3], 0 offset:236 217; GCN-NEXT: buffer_load_dword v60, off, s[0:3], 0 offset:240 218; GCN-NEXT: buffer_load_dword v61, off, s[0:3], 0 offset:244 219; GCN-NEXT: buffer_load_dword v62, off, s[0:3], 0 offset:248 220; GCN-NEXT: buffer_load_dword v63, off, s[0:3], 0 offset:252 221; GCN-NEXT: s_waitcnt vmcnt(60) 222; GCN-NEXT: global_store_dwordx4 v64, v[0:3], s[20:21] 223; GCN-NEXT: s_waitcnt vmcnt(57) 224; GCN-NEXT: global_store_dwordx4 v64, v[4:7], s[20:21] offset:16 225; GCN-NEXT: s_waitcnt vmcnt(54) 226; GCN-NEXT: global_store_dwordx4 v64, v[8:11], s[20:21] offset:32 227; GCN-NEXT: s_waitcnt vmcnt(51) 228; GCN-NEXT: global_store_dwordx4 v64, v[12:15], s[20:21] offset:48 229; GCN-NEXT: s_waitcnt vmcnt(48) 230; GCN-NEXT: global_store_dwordx4 v64, v[16:19], s[20:21] offset:64 231; GCN-NEXT: s_waitcnt vmcnt(45) 232; GCN-NEXT: global_store_dwordx4 v64, v[20:23], s[20:21] offset:80 233; GCN-NEXT: s_waitcnt vmcnt(42) 234; GCN-NEXT: global_store_dwordx4 v64, v[24:27], s[20:21] offset:96 235; GCN-NEXT: s_waitcnt vmcnt(39) 236; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[20:21] offset:112 237; GCN-NEXT: s_waitcnt vmcnt(36) 238; GCN-NEXT: global_store_dwordx4 v64, v[32:35], s[20:21] offset:128 239; GCN-NEXT: s_waitcnt vmcnt(33) 240; GCN-NEXT: global_store_dwordx4 v64, v[36:39], s[20:21] offset:144 241; GCN-NEXT: s_waitcnt vmcnt(30) 242; GCN-NEXT: global_store_dwordx4 v64, v[40:43], s[20:21] offset:160 243; GCN-NEXT: s_waitcnt vmcnt(27) 244; GCN-NEXT: global_store_dwordx4 v64, v[44:47], s[20:21] offset:176 245; GCN-NEXT: s_waitcnt vmcnt(24) 246; GCN-NEXT: global_store_dwordx4 v64, v[48:51], s[20:21] offset:192 247; GCN-NEXT: s_waitcnt vmcnt(21) 248; GCN-NEXT: global_store_dwordx4 v64, v[52:55], s[20:21] offset:208 249; GCN-NEXT: s_waitcnt vmcnt(18) 250; GCN-NEXT: global_store_dwordx4 v64, v[56:59], s[20:21] offset:224 251; GCN-NEXT: s_waitcnt vmcnt(15) 252; GCN-NEXT: global_store_dwordx4 v64, v[60:63], s[20:21] offset:240 253; GCN-NEXT: s_endpgm 254 %vec = load <64 x i32>, ptr addrspace(1) %ptr 255 %insert = insertelement <64 x i32> %vec, i32 %val, i32 %idx 256 store <64 x i32> %insert, ptr addrspace(1) %out.ptr 257 ret void 258} 259 260attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="1,10" } 261