1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -O0 -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3 4; FIXME: we should disable sdwa peephole because dead-code elimination, that 5; runs after peephole, ruins this test (different register numbers) 6 7; Spill all SGPRs so multiple VGPRs are required for spilling all of them. 8 9; Ideally we only need 2 VGPRs for all spilling. The VGPRs are 10; allocated per-frame index, so it's possible to get up with more. 11define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out, i32 %in) #0 { 12; GCN-LABEL: spill_sgprs_to_multiple_vgprs: 13; GCN: ; %bb.0: 14; GCN-NEXT: s_mov_b32 s92, SCRATCH_RSRC_DWORD0 15; GCN-NEXT: s_mov_b32 s93, SCRATCH_RSRC_DWORD1 16; GCN-NEXT: s_mov_b32 s94, -1 17; GCN-NEXT: s_mov_b32 s95, 0xe8f000 18; GCN-NEXT: s_add_u32 s92, s92, s11 19; GCN-NEXT: s_addc_u32 s93, s93, 0 20; GCN-NEXT: s_load_dword s0, s[4:5], 0xb 21; GCN-NEXT: ;;#ASMSTART 22; GCN-NEXT: ; def s[4:11] 23; GCN-NEXT: ;;#ASMEND 24; GCN-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane 25; GCN-NEXT: v_writelane_b32 v2, s4, 0 26; GCN-NEXT: v_writelane_b32 v2, s5, 1 27; GCN-NEXT: v_writelane_b32 v2, s6, 2 28; GCN-NEXT: v_writelane_b32 v2, s7, 3 29; GCN-NEXT: v_writelane_b32 v2, s8, 4 30; GCN-NEXT: v_writelane_b32 v2, s9, 5 31; GCN-NEXT: v_writelane_b32 v2, s10, 6 32; GCN-NEXT: v_writelane_b32 v2, s11, 7 33; GCN-NEXT: ;;#ASMSTART 34; GCN-NEXT: ; def s[4:11] 35; GCN-NEXT: ;;#ASMEND 36; GCN-NEXT: v_writelane_b32 v2, s4, 8 37; GCN-NEXT: v_writelane_b32 v2, s5, 9 38; GCN-NEXT: v_writelane_b32 v2, s6, 10 39; GCN-NEXT: v_writelane_b32 v2, s7, 11 40; GCN-NEXT: v_writelane_b32 v2, s8, 12 41; GCN-NEXT: v_writelane_b32 v2, s9, 13 42; GCN-NEXT: v_writelane_b32 v2, s10, 14 43; GCN-NEXT: v_writelane_b32 v2, s11, 15 44; GCN-NEXT: ;;#ASMSTART 45; GCN-NEXT: ; def s[4:11] 46; GCN-NEXT: ;;#ASMEND 47; GCN-NEXT: v_writelane_b32 v2, s4, 16 48; GCN-NEXT: v_writelane_b32 v2, s5, 17 49; GCN-NEXT: v_writelane_b32 v2, s6, 18 50; GCN-NEXT: v_writelane_b32 v2, s7, 19 51; GCN-NEXT: v_writelane_b32 v2, s8, 20 52; GCN-NEXT: v_writelane_b32 v2, s9, 21 53; GCN-NEXT: v_writelane_b32 v2, s10, 22 54; GCN-NEXT: v_writelane_b32 v2, s11, 23 55; GCN-NEXT: ;;#ASMSTART 56; GCN-NEXT: ; def s[4:11] 57; GCN-NEXT: ;;#ASMEND 58; GCN-NEXT: v_writelane_b32 v2, s4, 24 59; GCN-NEXT: v_writelane_b32 v2, s5, 25 60; GCN-NEXT: v_writelane_b32 v2, s6, 26 61; GCN-NEXT: v_writelane_b32 v2, s7, 27 62; GCN-NEXT: v_writelane_b32 v2, s8, 28 63; GCN-NEXT: v_writelane_b32 v2, s9, 29 64; GCN-NEXT: v_writelane_b32 v2, s10, 30 65; GCN-NEXT: v_writelane_b32 v2, s11, 31 66; GCN-NEXT: ;;#ASMSTART 67; GCN-NEXT: ; def s[4:11] 68; GCN-NEXT: ;;#ASMEND 69; GCN-NEXT: v_writelane_b32 v2, s4, 32 70; GCN-NEXT: v_writelane_b32 v2, s5, 33 71; GCN-NEXT: v_writelane_b32 v2, s6, 34 72; GCN-NEXT: v_writelane_b32 v2, s7, 35 73; GCN-NEXT: v_writelane_b32 v2, s8, 36 74; GCN-NEXT: v_writelane_b32 v2, s9, 37 75; GCN-NEXT: v_writelane_b32 v2, s10, 38 76; GCN-NEXT: v_writelane_b32 v2, s11, 39 77; GCN-NEXT: ;;#ASMSTART 78; GCN-NEXT: ; def s[4:11] 79; GCN-NEXT: ;;#ASMEND 80; GCN-NEXT: v_writelane_b32 v2, s4, 40 81; GCN-NEXT: v_writelane_b32 v2, s5, 41 82; GCN-NEXT: v_writelane_b32 v2, s6, 42 83; GCN-NEXT: v_writelane_b32 v2, s7, 43 84; GCN-NEXT: v_writelane_b32 v2, s8, 44 85; GCN-NEXT: v_writelane_b32 v2, s9, 45 86; GCN-NEXT: v_writelane_b32 v2, s10, 46 87; GCN-NEXT: v_writelane_b32 v2, s11, 47 88; GCN-NEXT: ;;#ASMSTART 89; GCN-NEXT: ; def s[4:11] 90; GCN-NEXT: ;;#ASMEND 91; GCN-NEXT: v_writelane_b32 v2, s4, 48 92; GCN-NEXT: v_writelane_b32 v2, s5, 49 93; GCN-NEXT: v_writelane_b32 v2, s6, 50 94; GCN-NEXT: v_writelane_b32 v2, s7, 51 95; GCN-NEXT: v_writelane_b32 v2, s8, 52 96; GCN-NEXT: v_writelane_b32 v2, s9, 53 97; GCN-NEXT: v_writelane_b32 v2, s10, 54 98; GCN-NEXT: v_writelane_b32 v2, s11, 55 99; GCN-NEXT: ;;#ASMSTART 100; GCN-NEXT: ; def s[4:11] 101; GCN-NEXT: ;;#ASMEND 102; GCN-NEXT: v_writelane_b32 v2, s4, 56 103; GCN-NEXT: v_writelane_b32 v2, s5, 57 104; GCN-NEXT: v_writelane_b32 v2, s6, 58 105; GCN-NEXT: v_writelane_b32 v2, s7, 59 106; GCN-NEXT: v_writelane_b32 v2, s8, 60 107; GCN-NEXT: v_writelane_b32 v2, s9, 61 108; GCN-NEXT: v_writelane_b32 v2, s10, 62 109; GCN-NEXT: v_writelane_b32 v2, s11, 63 110; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 111; GCN-NEXT: buffer_store_dword v2, off, s[92:95], 0 offset:8 ; 4-byte Folded Spill 112; GCN-NEXT: s_mov_b64 exec, s[34:35] 113; GCN-NEXT: ;;#ASMSTART 114; GCN-NEXT: ; def s[4:11] 115; GCN-NEXT: ;;#ASMEND 116; GCN-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane 117; GCN-NEXT: v_writelane_b32 v2, s4, 0 118; GCN-NEXT: v_writelane_b32 v2, s5, 1 119; GCN-NEXT: v_writelane_b32 v2, s6, 2 120; GCN-NEXT: v_writelane_b32 v2, s7, 3 121; GCN-NEXT: v_writelane_b32 v2, s8, 4 122; GCN-NEXT: v_writelane_b32 v2, s9, 5 123; GCN-NEXT: v_writelane_b32 v2, s10, 6 124; GCN-NEXT: v_writelane_b32 v2, s11, 7 125; GCN-NEXT: ;;#ASMSTART 126; GCN-NEXT: ; def s[4:11] 127; GCN-NEXT: ;;#ASMEND 128; GCN-NEXT: v_writelane_b32 v2, s4, 8 129; GCN-NEXT: v_writelane_b32 v2, s5, 9 130; GCN-NEXT: v_writelane_b32 v2, s6, 10 131; GCN-NEXT: v_writelane_b32 v2, s7, 11 132; GCN-NEXT: v_writelane_b32 v2, s8, 12 133; GCN-NEXT: v_writelane_b32 v2, s9, 13 134; GCN-NEXT: v_writelane_b32 v2, s10, 14 135; GCN-NEXT: v_writelane_b32 v2, s11, 15 136; GCN-NEXT: ;;#ASMSTART 137; GCN-NEXT: ; def s[4:11] 138; GCN-NEXT: ;;#ASMEND 139; GCN-NEXT: v_writelane_b32 v2, s4, 16 140; GCN-NEXT: v_writelane_b32 v2, s5, 17 141; GCN-NEXT: v_writelane_b32 v2, s6, 18 142; GCN-NEXT: v_writelane_b32 v2, s7, 19 143; GCN-NEXT: v_writelane_b32 v2, s8, 20 144; GCN-NEXT: v_writelane_b32 v2, s9, 21 145; GCN-NEXT: v_writelane_b32 v2, s10, 22 146; GCN-NEXT: v_writelane_b32 v2, s11, 23 147; GCN-NEXT: ;;#ASMSTART 148; GCN-NEXT: ; def s[4:11] 149; GCN-NEXT: ;;#ASMEND 150; GCN-NEXT: v_writelane_b32 v2, s4, 24 151; GCN-NEXT: v_writelane_b32 v2, s5, 25 152; GCN-NEXT: v_writelane_b32 v2, s6, 26 153; GCN-NEXT: v_writelane_b32 v2, s7, 27 154; GCN-NEXT: v_writelane_b32 v2, s8, 28 155; GCN-NEXT: v_writelane_b32 v2, s9, 29 156; GCN-NEXT: v_writelane_b32 v2, s10, 30 157; GCN-NEXT: v_writelane_b32 v2, s11, 31 158; GCN-NEXT: ;;#ASMSTART 159; GCN-NEXT: ; def s[4:11] 160; GCN-NEXT: ;;#ASMEND 161; GCN-NEXT: v_writelane_b32 v2, s4, 32 162; GCN-NEXT: v_writelane_b32 v2, s5, 33 163; GCN-NEXT: v_writelane_b32 v2, s6, 34 164; GCN-NEXT: v_writelane_b32 v2, s7, 35 165; GCN-NEXT: v_writelane_b32 v2, s8, 36 166; GCN-NEXT: v_writelane_b32 v2, s9, 37 167; GCN-NEXT: v_writelane_b32 v2, s10, 38 168; GCN-NEXT: v_writelane_b32 v2, s11, 39 169; GCN-NEXT: ;;#ASMSTART 170; GCN-NEXT: ; def s[4:11] 171; GCN-NEXT: ;;#ASMEND 172; GCN-NEXT: v_writelane_b32 v2, s4, 40 173; GCN-NEXT: v_writelane_b32 v2, s5, 41 174; GCN-NEXT: v_writelane_b32 v2, s6, 42 175; GCN-NEXT: v_writelane_b32 v2, s7, 43 176; GCN-NEXT: v_writelane_b32 v2, s8, 44 177; GCN-NEXT: v_writelane_b32 v2, s9, 45 178; GCN-NEXT: v_writelane_b32 v2, s10, 46 179; GCN-NEXT: v_writelane_b32 v2, s11, 47 180; GCN-NEXT: ;;#ASMSTART 181; GCN-NEXT: ; def s[4:11] 182; GCN-NEXT: ;;#ASMEND 183; GCN-NEXT: v_writelane_b32 v2, s4, 48 184; GCN-NEXT: v_writelane_b32 v2, s5, 49 185; GCN-NEXT: v_writelane_b32 v2, s6, 50 186; GCN-NEXT: v_writelane_b32 v2, s7, 51 187; GCN-NEXT: v_writelane_b32 v2, s8, 52 188; GCN-NEXT: v_writelane_b32 v2, s9, 53 189; GCN-NEXT: v_writelane_b32 v2, s10, 54 190; GCN-NEXT: v_writelane_b32 v2, s11, 55 191; GCN-NEXT: ;;#ASMSTART 192; GCN-NEXT: ; def s[4:11] 193; GCN-NEXT: ;;#ASMEND 194; GCN-NEXT: v_writelane_b32 v2, s4, 56 195; GCN-NEXT: v_writelane_b32 v2, s5, 57 196; GCN-NEXT: v_writelane_b32 v2, s6, 58 197; GCN-NEXT: v_writelane_b32 v2, s7, 59 198; GCN-NEXT: v_writelane_b32 v2, s8, 60 199; GCN-NEXT: v_writelane_b32 v2, s9, 61 200; GCN-NEXT: v_writelane_b32 v2, s10, 62 201; GCN-NEXT: v_writelane_b32 v2, s11, 63 202; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 203; GCN-NEXT: buffer_store_dword v2, off, s[92:95], 0 offset:4 ; 4-byte Folded Spill 204; GCN-NEXT: s_mov_b64 exec, s[34:35] 205; GCN-NEXT: ;;#ASMSTART 206; GCN-NEXT: ; def s[4:11] 207; GCN-NEXT: ;;#ASMEND 208; GCN-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane 209; GCN-NEXT: v_writelane_b32 v2, s4, 0 210; GCN-NEXT: v_writelane_b32 v2, s5, 1 211; GCN-NEXT: v_writelane_b32 v2, s6, 2 212; GCN-NEXT: v_writelane_b32 v2, s7, 3 213; GCN-NEXT: v_writelane_b32 v2, s8, 4 214; GCN-NEXT: v_writelane_b32 v2, s9, 5 215; GCN-NEXT: v_writelane_b32 v2, s10, 6 216; GCN-NEXT: v_writelane_b32 v2, s11, 7 217; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 218; GCN-NEXT: buffer_store_dword v2, off, s[92:95], 0 ; 4-byte Folded Spill 219; GCN-NEXT: s_mov_b64 exec, s[34:35] 220; GCN-NEXT: s_mov_b32 s1, 0 221; GCN-NEXT: s_waitcnt lgkmcnt(0) 222; GCN-NEXT: s_cmp_lg_u32 s0, s1 223; GCN-NEXT: s_cbranch_scc1 .LBB0_2 224; GCN-NEXT: ; %bb.1: ; %bb0 225; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 226; GCN-NEXT: buffer_load_dword v0, off, s[92:95], 0 offset:4 ; 4-byte Folded Reload 227; GCN-NEXT: s_mov_b64 exec, s[34:35] 228; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 229; GCN-NEXT: buffer_load_dword v1, off, s[92:95], 0 offset:8 ; 4-byte Folded Reload 230; GCN-NEXT: s_mov_b64 exec, s[34:35] 231; GCN-NEXT: s_waitcnt vmcnt(1) 232; GCN-NEXT: v_readlane_b32 s8, v0, 56 233; GCN-NEXT: v_readlane_b32 s9, v0, 57 234; GCN-NEXT: v_readlane_b32 s10, v0, 58 235; GCN-NEXT: v_readlane_b32 s11, v0, 59 236; GCN-NEXT: v_readlane_b32 s12, v0, 60 237; GCN-NEXT: v_readlane_b32 s13, v0, 61 238; GCN-NEXT: v_readlane_b32 s14, v0, 62 239; GCN-NEXT: v_readlane_b32 s15, v0, 63 240; GCN-NEXT: v_readlane_b32 s16, v0, 48 241; GCN-NEXT: v_readlane_b32 s17, v0, 49 242; GCN-NEXT: v_readlane_b32 s18, v0, 50 243; GCN-NEXT: v_readlane_b32 s19, v0, 51 244; GCN-NEXT: v_readlane_b32 s20, v0, 52 245; GCN-NEXT: v_readlane_b32 s21, v0, 53 246; GCN-NEXT: v_readlane_b32 s22, v0, 54 247; GCN-NEXT: v_readlane_b32 s23, v0, 55 248; GCN-NEXT: v_readlane_b32 s24, v0, 40 249; GCN-NEXT: v_readlane_b32 s25, v0, 41 250; GCN-NEXT: v_readlane_b32 s26, v0, 42 251; GCN-NEXT: v_readlane_b32 s27, v0, 43 252; GCN-NEXT: v_readlane_b32 s28, v0, 44 253; GCN-NEXT: v_readlane_b32 s29, v0, 45 254; GCN-NEXT: v_readlane_b32 s30, v0, 46 255; GCN-NEXT: v_readlane_b32 s31, v0, 47 256; GCN-NEXT: v_readlane_b32 s36, v0, 32 257; GCN-NEXT: v_readlane_b32 s37, v0, 33 258; GCN-NEXT: v_readlane_b32 s38, v0, 34 259; GCN-NEXT: v_readlane_b32 s39, v0, 35 260; GCN-NEXT: v_readlane_b32 s40, v0, 36 261; GCN-NEXT: v_readlane_b32 s41, v0, 37 262; GCN-NEXT: v_readlane_b32 s42, v0, 38 263; GCN-NEXT: v_readlane_b32 s43, v0, 39 264; GCN-NEXT: v_readlane_b32 s44, v0, 24 265; GCN-NEXT: v_readlane_b32 s45, v0, 25 266; GCN-NEXT: v_readlane_b32 s46, v0, 26 267; GCN-NEXT: v_readlane_b32 s47, v0, 27 268; GCN-NEXT: v_readlane_b32 s48, v0, 28 269; GCN-NEXT: v_readlane_b32 s49, v0, 29 270; GCN-NEXT: v_readlane_b32 s50, v0, 30 271; GCN-NEXT: v_readlane_b32 s51, v0, 31 272; GCN-NEXT: v_readlane_b32 s52, v0, 16 273; GCN-NEXT: v_readlane_b32 s53, v0, 17 274; GCN-NEXT: v_readlane_b32 s54, v0, 18 275; GCN-NEXT: v_readlane_b32 s55, v0, 19 276; GCN-NEXT: v_readlane_b32 s56, v0, 20 277; GCN-NEXT: v_readlane_b32 s57, v0, 21 278; GCN-NEXT: v_readlane_b32 s58, v0, 22 279; GCN-NEXT: v_readlane_b32 s59, v0, 23 280; GCN-NEXT: v_readlane_b32 s60, v0, 8 281; GCN-NEXT: v_readlane_b32 s61, v0, 9 282; GCN-NEXT: v_readlane_b32 s62, v0, 10 283; GCN-NEXT: v_readlane_b32 s63, v0, 11 284; GCN-NEXT: v_readlane_b32 s64, v0, 12 285; GCN-NEXT: v_readlane_b32 s65, v0, 13 286; GCN-NEXT: v_readlane_b32 s66, v0, 14 287; GCN-NEXT: v_readlane_b32 s67, v0, 15 288; GCN-NEXT: v_readlane_b32 s68, v0, 0 289; GCN-NEXT: v_readlane_b32 s69, v0, 1 290; GCN-NEXT: v_readlane_b32 s70, v0, 2 291; GCN-NEXT: v_readlane_b32 s71, v0, 3 292; GCN-NEXT: v_readlane_b32 s72, v0, 4 293; GCN-NEXT: v_readlane_b32 s73, v0, 5 294; GCN-NEXT: v_readlane_b32 s74, v0, 6 295; GCN-NEXT: v_readlane_b32 s75, v0, 7 296; GCN-NEXT: s_waitcnt vmcnt(0) 297; GCN-NEXT: v_readlane_b32 s76, v1, 56 298; GCN-NEXT: v_readlane_b32 s77, v1, 57 299; GCN-NEXT: v_readlane_b32 s78, v1, 58 300; GCN-NEXT: v_readlane_b32 s79, v1, 59 301; GCN-NEXT: v_readlane_b32 s80, v1, 60 302; GCN-NEXT: v_readlane_b32 s81, v1, 61 303; GCN-NEXT: v_readlane_b32 s82, v1, 62 304; GCN-NEXT: v_readlane_b32 s83, v1, 63 305; GCN-NEXT: v_readlane_b32 s84, v1, 48 306; GCN-NEXT: v_readlane_b32 s85, v1, 49 307; GCN-NEXT: v_readlane_b32 s86, v1, 50 308; GCN-NEXT: v_readlane_b32 s87, v1, 51 309; GCN-NEXT: v_readlane_b32 s88, v1, 52 310; GCN-NEXT: v_readlane_b32 s89, v1, 53 311; GCN-NEXT: v_readlane_b32 s90, v1, 54 312; GCN-NEXT: v_readlane_b32 s91, v1, 55 313; GCN-NEXT: v_readlane_b32 s0, v1, 0 314; GCN-NEXT: v_readlane_b32 s1, v1, 1 315; GCN-NEXT: v_readlane_b32 s2, v1, 2 316; GCN-NEXT: v_readlane_b32 s3, v1, 3 317; GCN-NEXT: v_readlane_b32 s4, v1, 4 318; GCN-NEXT: v_readlane_b32 s5, v1, 5 319; GCN-NEXT: v_readlane_b32 s6, v1, 6 320; GCN-NEXT: v_readlane_b32 s7, v1, 7 321; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 322; GCN-NEXT: buffer_load_dword v2, off, s[92:95], 0 ; 4-byte Folded Reload 323; GCN-NEXT: s_mov_b64 exec, s[34:35] 324; GCN-NEXT: ;;#ASMSTART 325; GCN-NEXT: ; use s[0:7] 326; GCN-NEXT: ;;#ASMEND 327; GCN-NEXT: v_readlane_b32 s0, v1, 8 328; GCN-NEXT: v_readlane_b32 s1, v1, 9 329; GCN-NEXT: v_readlane_b32 s2, v1, 10 330; GCN-NEXT: v_readlane_b32 s3, v1, 11 331; GCN-NEXT: v_readlane_b32 s4, v1, 12 332; GCN-NEXT: v_readlane_b32 s5, v1, 13 333; GCN-NEXT: v_readlane_b32 s6, v1, 14 334; GCN-NEXT: v_readlane_b32 s7, v1, 15 335; GCN-NEXT: ;;#ASMSTART 336; GCN-NEXT: ; use s[0:7] 337; GCN-NEXT: ;;#ASMEND 338; GCN-NEXT: v_readlane_b32 s0, v1, 16 339; GCN-NEXT: v_readlane_b32 s1, v1, 17 340; GCN-NEXT: v_readlane_b32 s2, v1, 18 341; GCN-NEXT: v_readlane_b32 s3, v1, 19 342; GCN-NEXT: v_readlane_b32 s4, v1, 20 343; GCN-NEXT: v_readlane_b32 s5, v1, 21 344; GCN-NEXT: v_readlane_b32 s6, v1, 22 345; GCN-NEXT: v_readlane_b32 s7, v1, 23 346; GCN-NEXT: ;;#ASMSTART 347; GCN-NEXT: ; use s[0:7] 348; GCN-NEXT: ;;#ASMEND 349; GCN-NEXT: v_readlane_b32 s0, v1, 24 350; GCN-NEXT: v_readlane_b32 s1, v1, 25 351; GCN-NEXT: v_readlane_b32 s2, v1, 26 352; GCN-NEXT: v_readlane_b32 s3, v1, 27 353; GCN-NEXT: v_readlane_b32 s4, v1, 28 354; GCN-NEXT: v_readlane_b32 s5, v1, 29 355; GCN-NEXT: v_readlane_b32 s6, v1, 30 356; GCN-NEXT: v_readlane_b32 s7, v1, 31 357; GCN-NEXT: ;;#ASMSTART 358; GCN-NEXT: ; use s[0:7] 359; GCN-NEXT: ;;#ASMEND 360; GCN-NEXT: v_readlane_b32 s0, v1, 32 361; GCN-NEXT: v_readlane_b32 s1, v1, 33 362; GCN-NEXT: v_readlane_b32 s2, v1, 34 363; GCN-NEXT: v_readlane_b32 s3, v1, 35 364; GCN-NEXT: v_readlane_b32 s4, v1, 36 365; GCN-NEXT: v_readlane_b32 s5, v1, 37 366; GCN-NEXT: v_readlane_b32 s6, v1, 38 367; GCN-NEXT: v_readlane_b32 s7, v1, 39 368; GCN-NEXT: ;;#ASMSTART 369; GCN-NEXT: ; use s[0:7] 370; GCN-NEXT: ;;#ASMEND 371; GCN-NEXT: v_readlane_b32 s0, v1, 40 372; GCN-NEXT: v_readlane_b32 s1, v1, 41 373; GCN-NEXT: v_readlane_b32 s2, v1, 42 374; GCN-NEXT: v_readlane_b32 s3, v1, 43 375; GCN-NEXT: v_readlane_b32 s4, v1, 44 376; GCN-NEXT: v_readlane_b32 s5, v1, 45 377; GCN-NEXT: v_readlane_b32 s6, v1, 46 378; GCN-NEXT: v_readlane_b32 s7, v1, 47 379; GCN-NEXT: ;;#ASMSTART 380; GCN-NEXT: ; use s[0:7] 381; GCN-NEXT: ;;#ASMEND 382; GCN-NEXT: s_waitcnt vmcnt(0) 383; GCN-NEXT: v_readlane_b32 s0, v2, 0 384; GCN-NEXT: v_readlane_b32 s1, v2, 1 385; GCN-NEXT: v_readlane_b32 s2, v2, 2 386; GCN-NEXT: v_readlane_b32 s3, v2, 3 387; GCN-NEXT: v_readlane_b32 s4, v2, 4 388; GCN-NEXT: v_readlane_b32 s5, v2, 5 389; GCN-NEXT: v_readlane_b32 s6, v2, 6 390; GCN-NEXT: v_readlane_b32 s7, v2, 7 391; GCN-NEXT: ;;#ASMSTART 392; GCN-NEXT: ; use s[84:91] 393; GCN-NEXT: ;;#ASMEND 394; GCN-NEXT: ;;#ASMSTART 395; GCN-NEXT: ; use s[76:83] 396; GCN-NEXT: ;;#ASMEND 397; GCN-NEXT: ;;#ASMSTART 398; GCN-NEXT: ; use s[68:75] 399; GCN-NEXT: ;;#ASMEND 400; GCN-NEXT: ;;#ASMSTART 401; GCN-NEXT: ; use s[60:67] 402; GCN-NEXT: ;;#ASMEND 403; GCN-NEXT: ;;#ASMSTART 404; GCN-NEXT: ; use s[52:59] 405; GCN-NEXT: ;;#ASMEND 406; GCN-NEXT: ;;#ASMSTART 407; GCN-NEXT: ; use s[44:51] 408; GCN-NEXT: ;;#ASMEND 409; GCN-NEXT: ;;#ASMSTART 410; GCN-NEXT: ; use s[36:43] 411; GCN-NEXT: ;;#ASMEND 412; GCN-NEXT: ;;#ASMSTART 413; GCN-NEXT: ; use s[24:31] 414; GCN-NEXT: ;;#ASMEND 415; GCN-NEXT: ;;#ASMSTART 416; GCN-NEXT: ; use s[16:23] 417; GCN-NEXT: ;;#ASMEND 418; GCN-NEXT: ;;#ASMSTART 419; GCN-NEXT: ; use s[8:15] 420; GCN-NEXT: ;;#ASMEND 421; GCN-NEXT: ;;#ASMSTART 422; GCN-NEXT: ; use s[0:7] 423; GCN-NEXT: ;;#ASMEND 424; GCN-NEXT: .LBB0_2: ; %ret 425; GCN-NEXT: s_endpgm 426 %wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 427 %wide.sgpr1 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 428 %wide.sgpr2 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 429 %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 430 %wide.sgpr4 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 431 %wide.sgpr5 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 432 %wide.sgpr6 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 433 %wide.sgpr7 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 434 %wide.sgpr8 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 435 %wide.sgpr9 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 436 %wide.sgpr10 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 437 %wide.sgpr11 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 438 %wide.sgpr12 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 439 %wide.sgpr13 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 440 %wide.sgpr14 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 441 %wide.sgpr15 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 442 %wide.sgpr16 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 443 %cmp = icmp eq i32 %in, 0 444 br i1 %cmp, label %bb0, label %ret 445 446bb0: 447 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr0) #0 448 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr1) #0 449 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr2) #0 450 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) #0 451 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr4) #0 452 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr5) #0 453 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr6) #0 454 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr7) #0 455 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr8) #0 456 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr9) #0 457 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr10) #0 458 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr11) #0 459 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr12) #0 460 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr13) #0 461 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr14) #0 462 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr15) #0 463 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr16) #0 464 br label %ret 465 466ret: 467 ret void 468} 469 470; Some of the lanes of an SGPR spill are in one VGPR and some forced 471; into the next available VGPR. 472define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 %in) #1 { 473; GCN-LABEL: split_sgpr_spill_2_vgprs: 474; GCN: ; %bb.0: 475; GCN-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 476; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 477; GCN-NEXT: s_mov_b32 s54, -1 478; GCN-NEXT: s_mov_b32 s55, 0xe8f000 479; GCN-NEXT: s_add_u32 s52, s52, s11 480; GCN-NEXT: s_addc_u32 s53, s53, 0 481; GCN-NEXT: s_load_dword s0, s[4:5], 0xb 482; GCN-NEXT: ;;#ASMSTART 483; GCN-NEXT: ; def s[4:19] 484; GCN-NEXT: ;;#ASMEND 485; GCN-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane 486; GCN-NEXT: v_writelane_b32 v1, s4, 0 487; GCN-NEXT: v_writelane_b32 v1, s5, 1 488; GCN-NEXT: v_writelane_b32 v1, s6, 2 489; GCN-NEXT: v_writelane_b32 v1, s7, 3 490; GCN-NEXT: v_writelane_b32 v1, s8, 4 491; GCN-NEXT: v_writelane_b32 v1, s9, 5 492; GCN-NEXT: v_writelane_b32 v1, s10, 6 493; GCN-NEXT: v_writelane_b32 v1, s11, 7 494; GCN-NEXT: v_writelane_b32 v1, s12, 8 495; GCN-NEXT: v_writelane_b32 v1, s13, 9 496; GCN-NEXT: v_writelane_b32 v1, s14, 10 497; GCN-NEXT: v_writelane_b32 v1, s15, 11 498; GCN-NEXT: v_writelane_b32 v1, s16, 12 499; GCN-NEXT: v_writelane_b32 v1, s17, 13 500; GCN-NEXT: v_writelane_b32 v1, s18, 14 501; GCN-NEXT: v_writelane_b32 v1, s19, 15 502; GCN-NEXT: ;;#ASMSTART 503; GCN-NEXT: ; def s[4:19] 504; GCN-NEXT: ;;#ASMEND 505; GCN-NEXT: v_writelane_b32 v1, s4, 16 506; GCN-NEXT: v_writelane_b32 v1, s5, 17 507; GCN-NEXT: v_writelane_b32 v1, s6, 18 508; GCN-NEXT: v_writelane_b32 v1, s7, 19 509; GCN-NEXT: v_writelane_b32 v1, s8, 20 510; GCN-NEXT: v_writelane_b32 v1, s9, 21 511; GCN-NEXT: v_writelane_b32 v1, s10, 22 512; GCN-NEXT: v_writelane_b32 v1, s11, 23 513; GCN-NEXT: v_writelane_b32 v1, s12, 24 514; GCN-NEXT: v_writelane_b32 v1, s13, 25 515; GCN-NEXT: v_writelane_b32 v1, s14, 26 516; GCN-NEXT: v_writelane_b32 v1, s15, 27 517; GCN-NEXT: v_writelane_b32 v1, s16, 28 518; GCN-NEXT: v_writelane_b32 v1, s17, 29 519; GCN-NEXT: v_writelane_b32 v1, s18, 30 520; GCN-NEXT: v_writelane_b32 v1, s19, 31 521; GCN-NEXT: ;;#ASMSTART 522; GCN-NEXT: ; def s[4:19] 523; GCN-NEXT: ;;#ASMEND 524; GCN-NEXT: v_writelane_b32 v1, s4, 32 525; GCN-NEXT: v_writelane_b32 v1, s5, 33 526; GCN-NEXT: v_writelane_b32 v1, s6, 34 527; GCN-NEXT: v_writelane_b32 v1, s7, 35 528; GCN-NEXT: v_writelane_b32 v1, s8, 36 529; GCN-NEXT: v_writelane_b32 v1, s9, 37 530; GCN-NEXT: v_writelane_b32 v1, s10, 38 531; GCN-NEXT: v_writelane_b32 v1, s11, 39 532; GCN-NEXT: v_writelane_b32 v1, s12, 40 533; GCN-NEXT: v_writelane_b32 v1, s13, 41 534; GCN-NEXT: v_writelane_b32 v1, s14, 42 535; GCN-NEXT: v_writelane_b32 v1, s15, 43 536; GCN-NEXT: v_writelane_b32 v1, s16, 44 537; GCN-NEXT: v_writelane_b32 v1, s17, 45 538; GCN-NEXT: v_writelane_b32 v1, s18, 46 539; GCN-NEXT: v_writelane_b32 v1, s19, 47 540; GCN-NEXT: ;;#ASMSTART 541; GCN-NEXT: ; def s[4:19] 542; GCN-NEXT: ;;#ASMEND 543; GCN-NEXT: v_writelane_b32 v1, s4, 48 544; GCN-NEXT: v_writelane_b32 v1, s5, 49 545; GCN-NEXT: v_writelane_b32 v1, s6, 50 546; GCN-NEXT: v_writelane_b32 v1, s7, 51 547; GCN-NEXT: v_writelane_b32 v1, s8, 52 548; GCN-NEXT: v_writelane_b32 v1, s9, 53 549; GCN-NEXT: v_writelane_b32 v1, s10, 54 550; GCN-NEXT: v_writelane_b32 v1, s11, 55 551; GCN-NEXT: v_writelane_b32 v1, s12, 56 552; GCN-NEXT: v_writelane_b32 v1, s13, 57 553; GCN-NEXT: v_writelane_b32 v1, s14, 58 554; GCN-NEXT: v_writelane_b32 v1, s15, 59 555; GCN-NEXT: v_writelane_b32 v1, s16, 60 556; GCN-NEXT: v_writelane_b32 v1, s17, 61 557; GCN-NEXT: v_writelane_b32 v1, s18, 62 558; GCN-NEXT: v_writelane_b32 v1, s19, 63 559; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1 560; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill 561; GCN-NEXT: s_mov_b64 exec, s[28:29] 562; GCN-NEXT: ;;#ASMSTART 563; GCN-NEXT: ; def s[4:11] 564; GCN-NEXT: ;;#ASMEND 565; GCN-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane 566; GCN-NEXT: v_writelane_b32 v1, s4, 0 567; GCN-NEXT: v_writelane_b32 v1, s5, 1 568; GCN-NEXT: v_writelane_b32 v1, s6, 2 569; GCN-NEXT: v_writelane_b32 v1, s7, 3 570; GCN-NEXT: v_writelane_b32 v1, s8, 4 571; GCN-NEXT: v_writelane_b32 v1, s9, 5 572; GCN-NEXT: v_writelane_b32 v1, s10, 6 573; GCN-NEXT: v_writelane_b32 v1, s11, 7 574; GCN-NEXT: ;;#ASMSTART 575; GCN-NEXT: ; def s[2:3] 576; GCN-NEXT: ;;#ASMEND 577; GCN-NEXT: v_writelane_b32 v1, s2, 8 578; GCN-NEXT: v_writelane_b32 v1, s3, 9 579; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1 580; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0 ; 4-byte Folded Spill 581; GCN-NEXT: s_mov_b64 exec, s[28:29] 582; GCN-NEXT: s_mov_b32 s1, 0 583; GCN-NEXT: s_waitcnt lgkmcnt(0) 584; GCN-NEXT: s_cmp_lg_u32 s0, s1 585; GCN-NEXT: s_cbranch_scc1 .LBB1_2 586; GCN-NEXT: ; %bb.1: ; %bb0 587; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1 588; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload 589; GCN-NEXT: s_mov_b64 exec, s[28:29] 590; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1 591; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload 592; GCN-NEXT: s_mov_b64 exec, s[28:29] 593; GCN-NEXT: s_waitcnt vmcnt(1) 594; GCN-NEXT: v_readlane_b32 s16, v0, 8 595; GCN-NEXT: v_readlane_b32 s17, v0, 9 596; GCN-NEXT: v_readlane_b32 s20, v0, 0 597; GCN-NEXT: v_readlane_b32 s21, v0, 1 598; GCN-NEXT: v_readlane_b32 s22, v0, 2 599; GCN-NEXT: v_readlane_b32 s23, v0, 3 600; GCN-NEXT: v_readlane_b32 s24, v0, 4 601; GCN-NEXT: v_readlane_b32 s25, v0, 5 602; GCN-NEXT: v_readlane_b32 s26, v0, 6 603; GCN-NEXT: v_readlane_b32 s27, v0, 7 604; GCN-NEXT: s_waitcnt vmcnt(0) 605; GCN-NEXT: v_readlane_b32 s36, v1, 32 606; GCN-NEXT: v_readlane_b32 s37, v1, 33 607; GCN-NEXT: v_readlane_b32 s38, v1, 34 608; GCN-NEXT: v_readlane_b32 s39, v1, 35 609; GCN-NEXT: v_readlane_b32 s40, v1, 36 610; GCN-NEXT: v_readlane_b32 s41, v1, 37 611; GCN-NEXT: v_readlane_b32 s42, v1, 38 612; GCN-NEXT: v_readlane_b32 s43, v1, 39 613; GCN-NEXT: v_readlane_b32 s44, v1, 40 614; GCN-NEXT: v_readlane_b32 s45, v1, 41 615; GCN-NEXT: v_readlane_b32 s46, v1, 42 616; GCN-NEXT: v_readlane_b32 s47, v1, 43 617; GCN-NEXT: v_readlane_b32 s48, v1, 44 618; GCN-NEXT: v_readlane_b32 s49, v1, 45 619; GCN-NEXT: v_readlane_b32 s50, v1, 46 620; GCN-NEXT: v_readlane_b32 s51, v1, 47 621; GCN-NEXT: v_readlane_b32 s0, v1, 0 622; GCN-NEXT: v_readlane_b32 s1, v1, 1 623; GCN-NEXT: v_readlane_b32 s2, v1, 2 624; GCN-NEXT: v_readlane_b32 s3, v1, 3 625; GCN-NEXT: v_readlane_b32 s4, v1, 4 626; GCN-NEXT: v_readlane_b32 s5, v1, 5 627; GCN-NEXT: v_readlane_b32 s6, v1, 6 628; GCN-NEXT: v_readlane_b32 s7, v1, 7 629; GCN-NEXT: v_readlane_b32 s8, v1, 8 630; GCN-NEXT: v_readlane_b32 s9, v1, 9 631; GCN-NEXT: v_readlane_b32 s10, v1, 10 632; GCN-NEXT: v_readlane_b32 s11, v1, 11 633; GCN-NEXT: v_readlane_b32 s12, v1, 12 634; GCN-NEXT: v_readlane_b32 s13, v1, 13 635; GCN-NEXT: v_readlane_b32 s14, v1, 14 636; GCN-NEXT: v_readlane_b32 s15, v1, 15 637; GCN-NEXT: ;;#ASMSTART 638; GCN-NEXT: ; use s[0:15] 639; GCN-NEXT: ;;#ASMEND 640; GCN-NEXT: v_readlane_b32 s0, v1, 16 641; GCN-NEXT: v_readlane_b32 s1, v1, 17 642; GCN-NEXT: v_readlane_b32 s2, v1, 18 643; GCN-NEXT: v_readlane_b32 s3, v1, 19 644; GCN-NEXT: v_readlane_b32 s4, v1, 20 645; GCN-NEXT: v_readlane_b32 s5, v1, 21 646; GCN-NEXT: v_readlane_b32 s6, v1, 22 647; GCN-NEXT: v_readlane_b32 s7, v1, 23 648; GCN-NEXT: v_readlane_b32 s8, v1, 24 649; GCN-NEXT: v_readlane_b32 s9, v1, 25 650; GCN-NEXT: v_readlane_b32 s10, v1, 26 651; GCN-NEXT: v_readlane_b32 s11, v1, 27 652; GCN-NEXT: v_readlane_b32 s12, v1, 28 653; GCN-NEXT: v_readlane_b32 s13, v1, 29 654; GCN-NEXT: v_readlane_b32 s14, v1, 30 655; GCN-NEXT: v_readlane_b32 s15, v1, 31 656; GCN-NEXT: ;;#ASMSTART 657; GCN-NEXT: ; use s[0:15] 658; GCN-NEXT: ;;#ASMEND 659; GCN-NEXT: v_readlane_b32 s0, v1, 48 660; GCN-NEXT: v_readlane_b32 s1, v1, 49 661; GCN-NEXT: v_readlane_b32 s2, v1, 50 662; GCN-NEXT: v_readlane_b32 s3, v1, 51 663; GCN-NEXT: v_readlane_b32 s4, v1, 52 664; GCN-NEXT: v_readlane_b32 s5, v1, 53 665; GCN-NEXT: v_readlane_b32 s6, v1, 54 666; GCN-NEXT: v_readlane_b32 s7, v1, 55 667; GCN-NEXT: v_readlane_b32 s8, v1, 56 668; GCN-NEXT: v_readlane_b32 s9, v1, 57 669; GCN-NEXT: v_readlane_b32 s10, v1, 58 670; GCN-NEXT: v_readlane_b32 s11, v1, 59 671; GCN-NEXT: v_readlane_b32 s12, v1, 60 672; GCN-NEXT: v_readlane_b32 s13, v1, 61 673; GCN-NEXT: v_readlane_b32 s14, v1, 62 674; GCN-NEXT: v_readlane_b32 s15, v1, 63 675; GCN-NEXT: ;;#ASMSTART 676; GCN-NEXT: ; use s[36:51] 677; GCN-NEXT: ;;#ASMEND 678; GCN-NEXT: ;;#ASMSTART 679; GCN-NEXT: ; use s[20:27] 680; GCN-NEXT: ;;#ASMEND 681; GCN-NEXT: ;;#ASMSTART 682; GCN-NEXT: ; use s[16:17] 683; GCN-NEXT: ;;#ASMEND 684; GCN-NEXT: ;;#ASMSTART 685; GCN-NEXT: ; use s[0:15] 686; GCN-NEXT: ;;#ASMEND 687; GCN-NEXT: .LBB1_2: ; %ret 688; GCN-NEXT: s_endpgm 689 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 690 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 691 %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 692 %wide.sgpr5 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 693 %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 694 %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0 695 696 %cmp = icmp eq i32 %in, 0 697 br i1 %cmp, label %bb0, label %ret 698 699bb0: 700 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0 701 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0 702 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0 703 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) #0 704 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0 705 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr5) #0 706 br label %ret 707 708ret: 709 ret void 710} 711 712; The first 64 SGPR spills can go to a VGPR, but there isn't a second 713; so some spills must be to memory. The last 16 element spill runs out 714; of lanes at the 15th element. 715define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %in) #1 { 716; GCN-LABEL: no_vgprs_last_sgpr_spill: 717; GCN: ; %bb.0: 718; GCN-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 719; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 720; GCN-NEXT: s_mov_b32 s54, -1 721; GCN-NEXT: s_mov_b32 s55, 0xe8f000 722; GCN-NEXT: s_add_u32 s52, s52, s11 723; GCN-NEXT: s_addc_u32 s53, s53, 0 724; GCN-NEXT: s_load_dword s0, s[4:5], 0xb 725; GCN-NEXT: ;;#ASMSTART 726; GCN-NEXT: ;;#ASMEND 727; GCN-NEXT: ;;#ASMSTART 728; GCN-NEXT: ;;#ASMEND 729; GCN-NEXT: ;;#ASMSTART 730; GCN-NEXT: ;;#ASMEND 731; GCN-NEXT: ;;#ASMSTART 732; GCN-NEXT: ;;#ASMEND 733; GCN-NEXT: ;;#ASMSTART 734; GCN-NEXT: ;;#ASMEND 735; GCN-NEXT: ;;#ASMSTART 736; GCN-NEXT: ;;#ASMEND 737; GCN-NEXT: ;;#ASMSTART 738; GCN-NEXT: ; def s[4:19] 739; GCN-NEXT: ;;#ASMEND 740; GCN-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane 741; GCN-NEXT: v_writelane_b32 v32, s4, 0 742; GCN-NEXT: v_writelane_b32 v32, s5, 1 743; GCN-NEXT: v_writelane_b32 v32, s6, 2 744; GCN-NEXT: v_writelane_b32 v32, s7, 3 745; GCN-NEXT: v_writelane_b32 v32, s8, 4 746; GCN-NEXT: v_writelane_b32 v32, s9, 5 747; GCN-NEXT: v_writelane_b32 v32, s10, 6 748; GCN-NEXT: v_writelane_b32 v32, s11, 7 749; GCN-NEXT: v_writelane_b32 v32, s12, 8 750; GCN-NEXT: v_writelane_b32 v32, s13, 9 751; GCN-NEXT: v_writelane_b32 v32, s14, 10 752; GCN-NEXT: v_writelane_b32 v32, s15, 11 753; GCN-NEXT: v_writelane_b32 v32, s16, 12 754; GCN-NEXT: v_writelane_b32 v32, s17, 13 755; GCN-NEXT: v_writelane_b32 v32, s18, 14 756; GCN-NEXT: v_writelane_b32 v32, s19, 15 757; GCN-NEXT: ;;#ASMSTART 758; GCN-NEXT: ; def s[4:19] 759; GCN-NEXT: ;;#ASMEND 760; GCN-NEXT: v_writelane_b32 v32, s4, 16 761; GCN-NEXT: v_writelane_b32 v32, s5, 17 762; GCN-NEXT: v_writelane_b32 v32, s6, 18 763; GCN-NEXT: v_writelane_b32 v32, s7, 19 764; GCN-NEXT: v_writelane_b32 v32, s8, 20 765; GCN-NEXT: v_writelane_b32 v32, s9, 21 766; GCN-NEXT: v_writelane_b32 v32, s10, 22 767; GCN-NEXT: v_writelane_b32 v32, s11, 23 768; GCN-NEXT: v_writelane_b32 v32, s12, 24 769; GCN-NEXT: v_writelane_b32 v32, s13, 25 770; GCN-NEXT: v_writelane_b32 v32, s14, 26 771; GCN-NEXT: v_writelane_b32 v32, s15, 27 772; GCN-NEXT: v_writelane_b32 v32, s16, 28 773; GCN-NEXT: v_writelane_b32 v32, s17, 29 774; GCN-NEXT: v_writelane_b32 v32, s18, 30 775; GCN-NEXT: v_writelane_b32 v32, s19, 31 776; GCN-NEXT: ;;#ASMSTART 777; GCN-NEXT: ; def s[4:19] 778; GCN-NEXT: ;;#ASMEND 779; GCN-NEXT: v_writelane_b32 v32, s4, 32 780; GCN-NEXT: v_writelane_b32 v32, s5, 33 781; GCN-NEXT: v_writelane_b32 v32, s6, 34 782; GCN-NEXT: v_writelane_b32 v32, s7, 35 783; GCN-NEXT: v_writelane_b32 v32, s8, 36 784; GCN-NEXT: v_writelane_b32 v32, s9, 37 785; GCN-NEXT: v_writelane_b32 v32, s10, 38 786; GCN-NEXT: v_writelane_b32 v32, s11, 39 787; GCN-NEXT: v_writelane_b32 v32, s12, 40 788; GCN-NEXT: v_writelane_b32 v32, s13, 41 789; GCN-NEXT: v_writelane_b32 v32, s14, 42 790; GCN-NEXT: v_writelane_b32 v32, s15, 43 791; GCN-NEXT: v_writelane_b32 v32, s16, 44 792; GCN-NEXT: v_writelane_b32 v32, s17, 45 793; GCN-NEXT: v_writelane_b32 v32, s18, 46 794; GCN-NEXT: v_writelane_b32 v32, s19, 47 795; GCN-NEXT: ;;#ASMSTART 796; GCN-NEXT: ; def s[4:19] 797; GCN-NEXT: ;;#ASMEND 798; GCN-NEXT: v_writelane_b32 v32, s4, 48 799; GCN-NEXT: v_writelane_b32 v32, s5, 49 800; GCN-NEXT: v_writelane_b32 v32, s6, 50 801; GCN-NEXT: v_writelane_b32 v32, s7, 51 802; GCN-NEXT: v_writelane_b32 v32, s8, 52 803; GCN-NEXT: v_writelane_b32 v32, s9, 53 804; GCN-NEXT: v_writelane_b32 v32, s10, 54 805; GCN-NEXT: v_writelane_b32 v32, s11, 55 806; GCN-NEXT: v_writelane_b32 v32, s12, 56 807; GCN-NEXT: v_writelane_b32 v32, s13, 57 808; GCN-NEXT: v_writelane_b32 v32, s14, 58 809; GCN-NEXT: v_writelane_b32 v32, s15, 59 810; GCN-NEXT: v_writelane_b32 v32, s16, 60 811; GCN-NEXT: v_writelane_b32 v32, s17, 61 812; GCN-NEXT: v_writelane_b32 v32, s18, 62 813; GCN-NEXT: v_writelane_b32 v32, s19, 63 814; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 815; GCN-NEXT: buffer_store_dword v32, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill 816; GCN-NEXT: s_mov_b64 exec, s[34:35] 817; GCN-NEXT: ;;#ASMSTART 818; GCN-NEXT: ; def s[2:3] 819; GCN-NEXT: ;;#ASMEND 820; GCN-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane 821; GCN-NEXT: v_writelane_b32 v32, s2, 0 822; GCN-NEXT: v_writelane_b32 v32, s3, 1 823; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 824; GCN-NEXT: buffer_store_dword v32, off, s[52:55], 0 ; 4-byte Folded Spill 825; GCN-NEXT: s_mov_b64 exec, s[34:35] 826; GCN-NEXT: s_mov_b32 s1, 0 827; GCN-NEXT: s_waitcnt lgkmcnt(0) 828; GCN-NEXT: s_cmp_lg_u32 s0, s1 829; GCN-NEXT: s_cbranch_scc1 .LBB2_2 830; GCN-NEXT: ; %bb.1: ; %bb0 831; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 832; GCN-NEXT: buffer_load_dword v31, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload 833; GCN-NEXT: s_mov_b64 exec, s[34:35] 834; GCN-NEXT: s_waitcnt vmcnt(0) 835; GCN-NEXT: v_readlane_b32 s36, v31, 32 836; GCN-NEXT: v_readlane_b32 s37, v31, 33 837; GCN-NEXT: v_readlane_b32 s38, v31, 34 838; GCN-NEXT: v_readlane_b32 s39, v31, 35 839; GCN-NEXT: v_readlane_b32 s40, v31, 36 840; GCN-NEXT: v_readlane_b32 s41, v31, 37 841; GCN-NEXT: v_readlane_b32 s42, v31, 38 842; GCN-NEXT: v_readlane_b32 s43, v31, 39 843; GCN-NEXT: v_readlane_b32 s44, v31, 40 844; GCN-NEXT: v_readlane_b32 s45, v31, 41 845; GCN-NEXT: v_readlane_b32 s46, v31, 42 846; GCN-NEXT: v_readlane_b32 s47, v31, 43 847; GCN-NEXT: v_readlane_b32 s48, v31, 44 848; GCN-NEXT: v_readlane_b32 s49, v31, 45 849; GCN-NEXT: v_readlane_b32 s50, v31, 46 850; GCN-NEXT: v_readlane_b32 s51, v31, 47 851; GCN-NEXT: v_readlane_b32 s0, v31, 16 852; GCN-NEXT: v_readlane_b32 s1, v31, 17 853; GCN-NEXT: v_readlane_b32 s2, v31, 18 854; GCN-NEXT: v_readlane_b32 s3, v31, 19 855; GCN-NEXT: v_readlane_b32 s4, v31, 20 856; GCN-NEXT: v_readlane_b32 s5, v31, 21 857; GCN-NEXT: v_readlane_b32 s6, v31, 22 858; GCN-NEXT: v_readlane_b32 s7, v31, 23 859; GCN-NEXT: v_readlane_b32 s8, v31, 24 860; GCN-NEXT: v_readlane_b32 s9, v31, 25 861; GCN-NEXT: v_readlane_b32 s10, v31, 26 862; GCN-NEXT: v_readlane_b32 s11, v31, 27 863; GCN-NEXT: v_readlane_b32 s12, v31, 28 864; GCN-NEXT: v_readlane_b32 s13, v31, 29 865; GCN-NEXT: v_readlane_b32 s14, v31, 30 866; GCN-NEXT: v_readlane_b32 s15, v31, 31 867; GCN-NEXT: v_readlane_b32 s16, v31, 0 868; GCN-NEXT: v_readlane_b32 s17, v31, 1 869; GCN-NEXT: v_readlane_b32 s18, v31, 2 870; GCN-NEXT: v_readlane_b32 s19, v31, 3 871; GCN-NEXT: v_readlane_b32 s20, v31, 4 872; GCN-NEXT: v_readlane_b32 s21, v31, 5 873; GCN-NEXT: v_readlane_b32 s22, v31, 6 874; GCN-NEXT: v_readlane_b32 s23, v31, 7 875; GCN-NEXT: v_readlane_b32 s24, v31, 8 876; GCN-NEXT: v_readlane_b32 s25, v31, 9 877; GCN-NEXT: v_readlane_b32 s26, v31, 10 878; GCN-NEXT: v_readlane_b32 s27, v31, 11 879; GCN-NEXT: v_readlane_b32 s28, v31, 12 880; GCN-NEXT: v_readlane_b32 s29, v31, 13 881; GCN-NEXT: v_readlane_b32 s30, v31, 14 882; GCN-NEXT: v_readlane_b32 s31, v31, 15 883; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 884; GCN-NEXT: buffer_load_dword v32, off, s[52:55], 0 ; 4-byte Folded Reload 885; GCN-NEXT: s_mov_b64 exec, s[34:35] 886; GCN-NEXT: ;;#ASMSTART 887; GCN-NEXT: ; use s[16:31] 888; GCN-NEXT: ;;#ASMEND 889; GCN-NEXT: ;;#ASMSTART 890; GCN-NEXT: ; use s[0:15] 891; GCN-NEXT: ;;#ASMEND 892; GCN-NEXT: v_readlane_b32 s4, v31, 48 893; GCN-NEXT: v_readlane_b32 s5, v31, 49 894; GCN-NEXT: v_readlane_b32 s6, v31, 50 895; GCN-NEXT: v_readlane_b32 s7, v31, 51 896; GCN-NEXT: v_readlane_b32 s8, v31, 52 897; GCN-NEXT: v_readlane_b32 s9, v31, 53 898; GCN-NEXT: v_readlane_b32 s10, v31, 54 899; GCN-NEXT: v_readlane_b32 s11, v31, 55 900; GCN-NEXT: v_readlane_b32 s12, v31, 56 901; GCN-NEXT: v_readlane_b32 s13, v31, 57 902; GCN-NEXT: v_readlane_b32 s14, v31, 58 903; GCN-NEXT: v_readlane_b32 s15, v31, 59 904; GCN-NEXT: v_readlane_b32 s16, v31, 60 905; GCN-NEXT: v_readlane_b32 s17, v31, 61 906; GCN-NEXT: v_readlane_b32 s18, v31, 62 907; GCN-NEXT: v_readlane_b32 s19, v31, 63 908; GCN-NEXT: s_waitcnt vmcnt(0) 909; GCN-NEXT: v_readlane_b32 s0, v32, 0 910; GCN-NEXT: v_readlane_b32 s1, v32, 1 911; GCN-NEXT: ;;#ASMSTART 912; GCN-NEXT: ; use s[36:51] 913; GCN-NEXT: ;;#ASMEND 914; GCN-NEXT: ;;#ASMSTART 915; GCN-NEXT: ; use s[4:19] 916; GCN-NEXT: ;;#ASMEND 917; GCN-NEXT: ;;#ASMSTART 918; GCN-NEXT: ; use s[0:1] 919; GCN-NEXT: ;;#ASMEND 920; GCN-NEXT: .LBB2_2: ; %ret 921; GCN-NEXT: s_endpgm 922 call void asm sideeffect "", "~{v[0:7]}" () #0 923 call void asm sideeffect "", "~{v[8:15]}" () #0 924 call void asm sideeffect "", "~{v[16:23]}" () #0 925 call void asm sideeffect "", "~{v[24:27]}"() #0 926 call void asm sideeffect "", "~{v[28:29]}"() #0 927 call void asm sideeffect "", "~{v30}"() #0 928 929 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 930 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 931 %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 932 %wide.sgpr3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 933 %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0 934 %cmp = icmp eq i32 %in, 0 935 br i1 %cmp, label %bb0, label %ret 936 937bb0: 938 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0 939 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0 940 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0 941 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr3) #0 942 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0 943 br label %ret 944 945ret: 946 ret void 947} 948 949; Same as @no_vgprs_last_sgpr_spill, some SGPR spills must go to memory. 950; Additionally, v0 is live throughout the function. 951define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 { 952; GCN-LABEL: no_vgprs_last_sgpr_spill_live_v0: 953; GCN: ; %bb.0: 954; GCN-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 955; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 956; GCN-NEXT: s_mov_b32 s54, -1 957; GCN-NEXT: s_mov_b32 s55, 0xe8f000 958; GCN-NEXT: s_add_u32 s52, s52, s11 959; GCN-NEXT: s_addc_u32 s53, s53, 0 960; GCN-NEXT: s_load_dword s0, s[4:5], 0x9 961; GCN-NEXT: ;;#ASMSTART 962; GCN-NEXT: ;;#ASMEND 963; GCN-NEXT: ;;#ASMSTART 964; GCN-NEXT: ;;#ASMEND 965; GCN-NEXT: ;;#ASMSTART 966; GCN-NEXT: ;;#ASMEND 967; GCN-NEXT: ;;#ASMSTART 968; GCN-NEXT: ;;#ASMEND 969; GCN-NEXT: ;;#ASMSTART 970; GCN-NEXT: ;;#ASMEND 971; GCN-NEXT: ;;#ASMSTART 972; GCN-NEXT: ;;#ASMEND 973; GCN-NEXT: ;;#ASMSTART 974; GCN-NEXT: ; def s[4:19] 975; GCN-NEXT: ;;#ASMEND 976; GCN-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane 977; GCN-NEXT: v_writelane_b32 v32, s4, 0 978; GCN-NEXT: v_writelane_b32 v32, s5, 1 979; GCN-NEXT: v_writelane_b32 v32, s6, 2 980; GCN-NEXT: v_writelane_b32 v32, s7, 3 981; GCN-NEXT: v_writelane_b32 v32, s8, 4 982; GCN-NEXT: v_writelane_b32 v32, s9, 5 983; GCN-NEXT: v_writelane_b32 v32, s10, 6 984; GCN-NEXT: v_writelane_b32 v32, s11, 7 985; GCN-NEXT: v_writelane_b32 v32, s12, 8 986; GCN-NEXT: v_writelane_b32 v32, s13, 9 987; GCN-NEXT: v_writelane_b32 v32, s14, 10 988; GCN-NEXT: v_writelane_b32 v32, s15, 11 989; GCN-NEXT: v_writelane_b32 v32, s16, 12 990; GCN-NEXT: v_writelane_b32 v32, s17, 13 991; GCN-NEXT: v_writelane_b32 v32, s18, 14 992; GCN-NEXT: v_writelane_b32 v32, s19, 15 993; GCN-NEXT: ;;#ASMSTART 994; GCN-NEXT: ; def s[4:19] 995; GCN-NEXT: ;;#ASMEND 996; GCN-NEXT: v_writelane_b32 v32, s4, 16 997; GCN-NEXT: v_writelane_b32 v32, s5, 17 998; GCN-NEXT: v_writelane_b32 v32, s6, 18 999; GCN-NEXT: v_writelane_b32 v32, s7, 19 1000; GCN-NEXT: v_writelane_b32 v32, s8, 20 1001; GCN-NEXT: v_writelane_b32 v32, s9, 21 1002; GCN-NEXT: v_writelane_b32 v32, s10, 22 1003; GCN-NEXT: v_writelane_b32 v32, s11, 23 1004; GCN-NEXT: v_writelane_b32 v32, s12, 24 1005; GCN-NEXT: v_writelane_b32 v32, s13, 25 1006; GCN-NEXT: v_writelane_b32 v32, s14, 26 1007; GCN-NEXT: v_writelane_b32 v32, s15, 27 1008; GCN-NEXT: v_writelane_b32 v32, s16, 28 1009; GCN-NEXT: v_writelane_b32 v32, s17, 29 1010; GCN-NEXT: v_writelane_b32 v32, s18, 30 1011; GCN-NEXT: v_writelane_b32 v32, s19, 31 1012; GCN-NEXT: ;;#ASMSTART 1013; GCN-NEXT: ; def s[4:19] 1014; GCN-NEXT: ;;#ASMEND 1015; GCN-NEXT: v_writelane_b32 v32, s4, 32 1016; GCN-NEXT: v_writelane_b32 v32, s5, 33 1017; GCN-NEXT: v_writelane_b32 v32, s6, 34 1018; GCN-NEXT: v_writelane_b32 v32, s7, 35 1019; GCN-NEXT: v_writelane_b32 v32, s8, 36 1020; GCN-NEXT: v_writelane_b32 v32, s9, 37 1021; GCN-NEXT: v_writelane_b32 v32, s10, 38 1022; GCN-NEXT: v_writelane_b32 v32, s11, 39 1023; GCN-NEXT: v_writelane_b32 v32, s12, 40 1024; GCN-NEXT: v_writelane_b32 v32, s13, 41 1025; GCN-NEXT: v_writelane_b32 v32, s14, 42 1026; GCN-NEXT: v_writelane_b32 v32, s15, 43 1027; GCN-NEXT: v_writelane_b32 v32, s16, 44 1028; GCN-NEXT: v_writelane_b32 v32, s17, 45 1029; GCN-NEXT: v_writelane_b32 v32, s18, 46 1030; GCN-NEXT: v_writelane_b32 v32, s19, 47 1031; GCN-NEXT: ;;#ASMSTART 1032; GCN-NEXT: ; def s[4:19] 1033; GCN-NEXT: ;;#ASMEND 1034; GCN-NEXT: v_writelane_b32 v32, s4, 48 1035; GCN-NEXT: v_writelane_b32 v32, s5, 49 1036; GCN-NEXT: v_writelane_b32 v32, s6, 50 1037; GCN-NEXT: v_writelane_b32 v32, s7, 51 1038; GCN-NEXT: v_writelane_b32 v32, s8, 52 1039; GCN-NEXT: v_writelane_b32 v32, s9, 53 1040; GCN-NEXT: v_writelane_b32 v32, s10, 54 1041; GCN-NEXT: v_writelane_b32 v32, s11, 55 1042; GCN-NEXT: v_writelane_b32 v32, s12, 56 1043; GCN-NEXT: v_writelane_b32 v32, s13, 57 1044; GCN-NEXT: v_writelane_b32 v32, s14, 58 1045; GCN-NEXT: v_writelane_b32 v32, s15, 59 1046; GCN-NEXT: v_writelane_b32 v32, s16, 60 1047; GCN-NEXT: v_writelane_b32 v32, s17, 61 1048; GCN-NEXT: v_writelane_b32 v32, s18, 62 1049; GCN-NEXT: v_writelane_b32 v32, s19, 63 1050; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 1051; GCN-NEXT: buffer_store_dword v32, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill 1052; GCN-NEXT: s_mov_b64 exec, s[34:35] 1053; GCN-NEXT: ;;#ASMSTART 1054; GCN-NEXT: ; def s[2:3] 1055; GCN-NEXT: ;;#ASMEND 1056; GCN-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane 1057; GCN-NEXT: v_writelane_b32 v32, s2, 0 1058; GCN-NEXT: v_writelane_b32 v32, s3, 1 1059; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 1060; GCN-NEXT: buffer_store_dword v32, off, s[52:55], 0 ; 4-byte Folded Spill 1061; GCN-NEXT: s_mov_b64 exec, s[34:35] 1062; GCN-NEXT: s_mov_b32 s1, 0 1063; GCN-NEXT: s_waitcnt lgkmcnt(0) 1064; GCN-NEXT: s_cmp_lg_u32 s0, s1 1065; GCN-NEXT: s_cbranch_scc1 .LBB3_2 1066; GCN-NEXT: ; %bb.1: ; %bb0 1067; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 1068; GCN-NEXT: buffer_load_dword v31, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload 1069; GCN-NEXT: s_mov_b64 exec, s[34:35] 1070; GCN-NEXT: s_waitcnt vmcnt(0) 1071; GCN-NEXT: v_readlane_b32 s36, v31, 32 1072; GCN-NEXT: v_readlane_b32 s37, v31, 33 1073; GCN-NEXT: v_readlane_b32 s38, v31, 34 1074; GCN-NEXT: v_readlane_b32 s39, v31, 35 1075; GCN-NEXT: v_readlane_b32 s40, v31, 36 1076; GCN-NEXT: v_readlane_b32 s41, v31, 37 1077; GCN-NEXT: v_readlane_b32 s42, v31, 38 1078; GCN-NEXT: v_readlane_b32 s43, v31, 39 1079; GCN-NEXT: v_readlane_b32 s44, v31, 40 1080; GCN-NEXT: v_readlane_b32 s45, v31, 41 1081; GCN-NEXT: v_readlane_b32 s46, v31, 42 1082; GCN-NEXT: v_readlane_b32 s47, v31, 43 1083; GCN-NEXT: v_readlane_b32 s48, v31, 44 1084; GCN-NEXT: v_readlane_b32 s49, v31, 45 1085; GCN-NEXT: v_readlane_b32 s50, v31, 46 1086; GCN-NEXT: v_readlane_b32 s51, v31, 47 1087; GCN-NEXT: v_readlane_b32 s0, v31, 16 1088; GCN-NEXT: v_readlane_b32 s1, v31, 17 1089; GCN-NEXT: v_readlane_b32 s2, v31, 18 1090; GCN-NEXT: v_readlane_b32 s3, v31, 19 1091; GCN-NEXT: v_readlane_b32 s4, v31, 20 1092; GCN-NEXT: v_readlane_b32 s5, v31, 21 1093; GCN-NEXT: v_readlane_b32 s6, v31, 22 1094; GCN-NEXT: v_readlane_b32 s7, v31, 23 1095; GCN-NEXT: v_readlane_b32 s8, v31, 24 1096; GCN-NEXT: v_readlane_b32 s9, v31, 25 1097; GCN-NEXT: v_readlane_b32 s10, v31, 26 1098; GCN-NEXT: v_readlane_b32 s11, v31, 27 1099; GCN-NEXT: v_readlane_b32 s12, v31, 28 1100; GCN-NEXT: v_readlane_b32 s13, v31, 29 1101; GCN-NEXT: v_readlane_b32 s14, v31, 30 1102; GCN-NEXT: v_readlane_b32 s15, v31, 31 1103; GCN-NEXT: v_readlane_b32 s16, v31, 0 1104; GCN-NEXT: v_readlane_b32 s17, v31, 1 1105; GCN-NEXT: v_readlane_b32 s18, v31, 2 1106; GCN-NEXT: v_readlane_b32 s19, v31, 3 1107; GCN-NEXT: v_readlane_b32 s20, v31, 4 1108; GCN-NEXT: v_readlane_b32 s21, v31, 5 1109; GCN-NEXT: v_readlane_b32 s22, v31, 6 1110; GCN-NEXT: v_readlane_b32 s23, v31, 7 1111; GCN-NEXT: v_readlane_b32 s24, v31, 8 1112; GCN-NEXT: v_readlane_b32 s25, v31, 9 1113; GCN-NEXT: v_readlane_b32 s26, v31, 10 1114; GCN-NEXT: v_readlane_b32 s27, v31, 11 1115; GCN-NEXT: v_readlane_b32 s28, v31, 12 1116; GCN-NEXT: v_readlane_b32 s29, v31, 13 1117; GCN-NEXT: v_readlane_b32 s30, v31, 14 1118; GCN-NEXT: v_readlane_b32 s31, v31, 15 1119; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 1120; GCN-NEXT: buffer_load_dword v32, off, s[52:55], 0 ; 4-byte Folded Reload 1121; GCN-NEXT: s_mov_b64 exec, s[34:35] 1122; GCN-NEXT: ;;#ASMSTART 1123; GCN-NEXT: ; def v0 1124; GCN-NEXT: ;;#ASMEND 1125; GCN-NEXT: ;;#ASMSTART 1126; GCN-NEXT: ; use s[16:31] 1127; GCN-NEXT: ;;#ASMEND 1128; GCN-NEXT: ;;#ASMSTART 1129; GCN-NEXT: ; use s[0:15] 1130; GCN-NEXT: ;;#ASMEND 1131; GCN-NEXT: v_readlane_b32 s4, v31, 48 1132; GCN-NEXT: v_readlane_b32 s5, v31, 49 1133; GCN-NEXT: v_readlane_b32 s6, v31, 50 1134; GCN-NEXT: v_readlane_b32 s7, v31, 51 1135; GCN-NEXT: v_readlane_b32 s8, v31, 52 1136; GCN-NEXT: v_readlane_b32 s9, v31, 53 1137; GCN-NEXT: v_readlane_b32 s10, v31, 54 1138; GCN-NEXT: v_readlane_b32 s11, v31, 55 1139; GCN-NEXT: v_readlane_b32 s12, v31, 56 1140; GCN-NEXT: v_readlane_b32 s13, v31, 57 1141; GCN-NEXT: v_readlane_b32 s14, v31, 58 1142; GCN-NEXT: v_readlane_b32 s15, v31, 59 1143; GCN-NEXT: v_readlane_b32 s16, v31, 60 1144; GCN-NEXT: v_readlane_b32 s17, v31, 61 1145; GCN-NEXT: v_readlane_b32 s18, v31, 62 1146; GCN-NEXT: v_readlane_b32 s19, v31, 63 1147; GCN-NEXT: s_waitcnt vmcnt(0) 1148; GCN-NEXT: v_readlane_b32 s0, v32, 0 1149; GCN-NEXT: v_readlane_b32 s1, v32, 1 1150; GCN-NEXT: ;;#ASMSTART 1151; GCN-NEXT: ; use s[36:51] 1152; GCN-NEXT: ;;#ASMEND 1153; GCN-NEXT: ;;#ASMSTART 1154; GCN-NEXT: ; use s[4:19] 1155; GCN-NEXT: ;;#ASMEND 1156; GCN-NEXT: ;;#ASMSTART 1157; GCN-NEXT: ; use s[0:1] 1158; GCN-NEXT: ;;#ASMEND 1159; GCN-NEXT: ;;#ASMSTART 1160; GCN-NEXT: ; use v0 1161; GCN-NEXT: ;;#ASMEND 1162; GCN-NEXT: .LBB3_2: ; %ret 1163; GCN-NEXT: s_endpgm 1164 call void asm sideeffect "", "~{v[0:7]}" () #0 1165 call void asm sideeffect "", "~{v[8:15]}" () #0 1166 call void asm sideeffect "", "~{v[16:23]}" () #0 1167 call void asm sideeffect "", "~{v[24:27]}"() #0 1168 call void asm sideeffect "", "~{v[28:29]}"() #0 1169 call void asm sideeffect "", "~{v30}"() #0 1170 1171 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 1172 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 1173 %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 1174 %wide.sgpr3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 1175 %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0 1176 %cmp = icmp eq i32 %in, 0 1177 br i1 %cmp, label %bb0, label %ret 1178 1179bb0: 1180 %vgpr0 = call i32 asm sideeffect "; def $0", "=v" () #0 1181 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0 1182 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0 1183 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0 1184 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr3) #0 1185 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0 1186 call void asm sideeffect "; use $0", "v"(i32 %vgpr0) #0 1187 br label %ret 1188 1189ret: 1190 ret void 1191} 1192 1193attributes #0 = { nounwind } 1194attributes #1 = { nounwind "amdgpu-waves-per-eu"="7,7" } 1195 1196!llvm.module.flags = !{!0} 1197!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} 1198