1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX906 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX908 %s 4 5; Due to high register pressure, regalloc would split the liverange of wwm VGPR register used for SGPR spills 6; and introduce a copy. The copy should be of whole-wave with exec mask manipulation around it. 7; FIXME: The destination register involved in the whole-wave copy should be considered for preserving all the lanes 8; with a spill/restore at function prolog/epilog. The copy might otherwise clobber its inactive lanes unwantedly. 9define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 { 10; GFX906-LABEL: preserve_wwm_copy_dstreg: 11; GFX906: ; %bb.0: 12; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13; GFX906-NEXT: s_mov_b32 s16, s33 14; GFX906-NEXT: s_mov_b32 s33, s32 15; GFX906-NEXT: s_xor_saveexec_b64 s[18:19], -1 16; GFX906-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill 17; GFX906-NEXT: s_mov_b64 exec, -1 18; GFX906-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill 19; GFX906-NEXT: s_mov_b64 exec, s[18:19] 20; GFX906-NEXT: s_mov_b32 s21, s15 21; GFX906-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane 22; GFX906-NEXT: s_mov_b32 s22, s14 23; GFX906-NEXT: v_writelane_b32 v39, s21, 0 24; GFX906-NEXT: v_writelane_b32 v39, s22, 1 25; GFX906-NEXT: s_mov_b32 s23, s13 26; GFX906-NEXT: v_writelane_b32 v39, s23, 2 27; GFX906-NEXT: s_mov_b32 s24, s12 28; GFX906-NEXT: v_writelane_b32 v39, s24, 3 29; GFX906-NEXT: s_mov_b64 s[26:27], s[10:11] 30; GFX906-NEXT: v_writelane_b32 v39, s26, 4 31; GFX906-NEXT: v_writelane_b32 v39, s27, 5 32; GFX906-NEXT: v_writelane_b32 v39, s8, 6 33; GFX906-NEXT: v_writelane_b32 v41, s16, 4 34; GFX906-NEXT: v_writelane_b32 v39, s9, 7 35; GFX906-NEXT: v_writelane_b32 v41, s34, 2 36; GFX906-NEXT: v_writelane_b32 v39, s6, 8 37; GFX906-NEXT: v_writelane_b32 v41, s35, 3 38; GFX906-NEXT: v_writelane_b32 v39, s7, 9 39; GFX906-NEXT: v_writelane_b32 v41, s30, 0 40; GFX906-NEXT: v_writelane_b32 v39, s4, 10 41; GFX906-NEXT: s_addk_i32 s32, 0x2800 42; GFX906-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 43; GFX906-NEXT: v_writelane_b32 v41, s31, 1 44; GFX906-NEXT: v_mov_b32_e32 v32, v31 45; GFX906-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill 46; GFX906-NEXT: s_nop 0 47; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill 48; GFX906-NEXT: v_writelane_b32 v39, s5, 11 49; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 50; GFX906-NEXT: s_mov_b64 exec, s[34:35] 51; GFX906-NEXT: ;;#ASMSTART 52; GFX906-NEXT: ; def v[0:31] 53; GFX906-NEXT: ;;#ASMEND 54; GFX906-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill 55; GFX906-NEXT: s_nop 0 56; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill 57; GFX906-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill 58; GFX906-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill 59; GFX906-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill 60; GFX906-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill 61; GFX906-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill 62; GFX906-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill 63; GFX906-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill 64; GFX906-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill 65; GFX906-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill 66; GFX906-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill 67; GFX906-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill 68; GFX906-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill 69; GFX906-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill 70; GFX906-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill 71; GFX906-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill 72; GFX906-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill 73; GFX906-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill 74; GFX906-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill 75; GFX906-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill 76; GFX906-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill 77; GFX906-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill 78; GFX906-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill 79; GFX906-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill 80; GFX906-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill 81; GFX906-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill 82; GFX906-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill 83; GFX906-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill 84; GFX906-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill 85; GFX906-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill 86; GFX906-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill 87; GFX906-NEXT: ;;#ASMSTART 88; GFX906-NEXT: ; def v40 89; GFX906-NEXT: ;;#ASMEND 90; GFX906-NEXT: ;;#ASMSTART 91; GFX906-NEXT: ; def s11 92; GFX906-NEXT: ;;#ASMEND 93; GFX906-NEXT: v_writelane_b32 v39, s11, 12 94; GFX906-NEXT: ;;#ASMSTART 95; GFX906-NEXT: ; def s12 96; GFX906-NEXT: ;;#ASMEND 97; GFX906-NEXT: v_writelane_b32 v39, s12, 13 98; GFX906-NEXT: ;;#ASMSTART 99; GFX906-NEXT: ; def s13 100; GFX906-NEXT: ;;#ASMEND 101; GFX906-NEXT: v_writelane_b32 v39, s13, 14 102; GFX906-NEXT: ;;#ASMSTART 103; GFX906-NEXT: ; def s14 104; GFX906-NEXT: ;;#ASMEND 105; GFX906-NEXT: v_writelane_b32 v39, s14, 15 106; GFX906-NEXT: ;;#ASMSTART 107; GFX906-NEXT: ; def s15 108; GFX906-NEXT: ;;#ASMEND 109; GFX906-NEXT: v_writelane_b32 v39, s15, 16 110; GFX906-NEXT: s_getpc_b64 s[10:11] 111; GFX906-NEXT: s_add_u32 s10, s10, foo@gotpcrel32@lo+4 112; GFX906-NEXT: s_addc_u32 s11, s11, foo@gotpcrel32@hi+12 113; GFX906-NEXT: ;;#ASMSTART 114; GFX906-NEXT: ; def s16 115; GFX906-NEXT: ;;#ASMEND 116; GFX906-NEXT: v_writelane_b32 v39, s16, 17 117; GFX906-NEXT: s_load_dwordx2 s[10:11], s[10:11], 0x0 118; GFX906-NEXT: ;;#ASMSTART 119; GFX906-NEXT: ; def s17 120; GFX906-NEXT: ;;#ASMEND 121; GFX906-NEXT: v_writelane_b32 v39, s17, 18 122; GFX906-NEXT: ;;#ASMSTART 123; GFX906-NEXT: ; def s18 124; GFX906-NEXT: ;;#ASMEND 125; GFX906-NEXT: v_writelane_b32 v39, s18, 19 126; GFX906-NEXT: ;;#ASMSTART 127; GFX906-NEXT: ; def s19 128; GFX906-NEXT: ;;#ASMEND 129; GFX906-NEXT: v_writelane_b32 v39, s19, 20 130; GFX906-NEXT: ;;#ASMSTART 131; GFX906-NEXT: ; def s20 132; GFX906-NEXT: ;;#ASMEND 133; GFX906-NEXT: v_writelane_b32 v39, s20, 21 134; GFX906-NEXT: s_waitcnt lgkmcnt(0) 135; GFX906-NEXT: v_writelane_b32 v39, s10, 22 136; GFX906-NEXT: v_writelane_b32 v39, s11, 23 137; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 138; GFX906-NEXT: s_mov_b64 exec, s[34:35] 139; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 140; GFX906-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 141; GFX906-NEXT: s_mov_b64 exec, s[34:35] 142; GFX906-NEXT: v_readlane_b32 s16, v39, 22 143; GFX906-NEXT: s_mov_b32 s12, s24 144; GFX906-NEXT: s_mov_b32 s13, s23 145; GFX906-NEXT: s_mov_b32 s14, s22 146; GFX906-NEXT: v_mov_b32_e32 v31, v32 147; GFX906-NEXT: s_mov_b32 s15, s21 148; GFX906-NEXT: s_mov_b64 s[10:11], s[26:27] 149; GFX906-NEXT: v_readlane_b32 s17, v39, 23 150; GFX906-NEXT: v_mov_b32_e32 v40, v32 151; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17] 152; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 153; GFX906-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload 154; GFX906-NEXT: s_mov_b64 exec, s[34:35] 155; GFX906-NEXT: s_waitcnt vmcnt(0) 156; GFX906-NEXT: v_readlane_b32 s11, v39, 12 157; GFX906-NEXT: ;;#ASMSTART 158; GFX906-NEXT: ; use s11 159; GFX906-NEXT: ;;#ASMEND 160; GFX906-NEXT: v_readlane_b32 s12, v39, 13 161; GFX906-NEXT: ;;#ASMSTART 162; GFX906-NEXT: ; use s12 163; GFX906-NEXT: ;;#ASMEND 164; GFX906-NEXT: v_readlane_b32 s13, v39, 14 165; GFX906-NEXT: ;;#ASMSTART 166; GFX906-NEXT: ; use s13 167; GFX906-NEXT: ;;#ASMEND 168; GFX906-NEXT: v_readlane_b32 s14, v39, 15 169; GFX906-NEXT: ;;#ASMSTART 170; GFX906-NEXT: ; use s14 171; GFX906-NEXT: ;;#ASMEND 172; GFX906-NEXT: v_readlane_b32 s15, v39, 16 173; GFX906-NEXT: ;;#ASMSTART 174; GFX906-NEXT: ; use s15 175; GFX906-NEXT: ;;#ASMEND 176; GFX906-NEXT: v_readlane_b32 s16, v39, 17 177; GFX906-NEXT: ;;#ASMSTART 178; GFX906-NEXT: ; use s16 179; GFX906-NEXT: ;;#ASMEND 180; GFX906-NEXT: v_readlane_b32 s17, v39, 18 181; GFX906-NEXT: ;;#ASMSTART 182; GFX906-NEXT: ; use s17 183; GFX906-NEXT: ;;#ASMEND 184; GFX906-NEXT: v_readlane_b32 s18, v39, 19 185; GFX906-NEXT: ;;#ASMSTART 186; GFX906-NEXT: ; use s18 187; GFX906-NEXT: ;;#ASMEND 188; GFX906-NEXT: v_readlane_b32 s19, v39, 20 189; GFX906-NEXT: ;;#ASMSTART 190; GFX906-NEXT: ; use s19 191; GFX906-NEXT: ;;#ASMEND 192; GFX906-NEXT: v_readlane_b32 s20, v39, 21 193; GFX906-NEXT: ;;#ASMSTART 194; GFX906-NEXT: ; use s20 195; GFX906-NEXT: ;;#ASMEND 196; GFX906-NEXT: ;;#ASMSTART 197; GFX906-NEXT: ; def s21 198; GFX906-NEXT: ;;#ASMEND 199; GFX906-NEXT: v_writelane_b32 v39, s21, 12 200; GFX906-NEXT: ;;#ASMSTART 201; GFX906-NEXT: ; def s22 202; GFX906-NEXT: ;;#ASMEND 203; GFX906-NEXT: v_writelane_b32 v39, s22, 13 204; GFX906-NEXT: ;;#ASMSTART 205; GFX906-NEXT: ; def s23 206; GFX906-NEXT: ;;#ASMEND 207; GFX906-NEXT: v_writelane_b32 v39, s23, 14 208; GFX906-NEXT: ;;#ASMSTART 209; GFX906-NEXT: ; def s24 210; GFX906-NEXT: ;;#ASMEND 211; GFX906-NEXT: v_writelane_b32 v39, s24, 15 212; GFX906-NEXT: ;;#ASMSTART 213; GFX906-NEXT: ; def s25 214; GFX906-NEXT: ;;#ASMEND 215; GFX906-NEXT: v_writelane_b32 v39, s25, 16 216; GFX906-NEXT: ;;#ASMSTART 217; GFX906-NEXT: ; def s26 218; GFX906-NEXT: ;;#ASMEND 219; GFX906-NEXT: v_writelane_b32 v39, s26, 17 220; GFX906-NEXT: ;;#ASMSTART 221; GFX906-NEXT: ; def s27 222; GFX906-NEXT: ;;#ASMEND 223; GFX906-NEXT: v_writelane_b32 v39, s27, 18 224; GFX906-NEXT: ;;#ASMSTART 225; GFX906-NEXT: ; def s28 226; GFX906-NEXT: ;;#ASMEND 227; GFX906-NEXT: v_writelane_b32 v39, s28, 19 228; GFX906-NEXT: ;;#ASMSTART 229; GFX906-NEXT: ; def s29 230; GFX906-NEXT: ;;#ASMEND 231; GFX906-NEXT: v_writelane_b32 v39, s29, 20 232; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 233; GFX906-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 234; GFX906-NEXT: s_mov_b64 exec, s[34:35] 235; GFX906-NEXT: v_readlane_b32 s4, v39, 10 236; GFX906-NEXT: v_readlane_b32 s6, v39, 8 237; GFX906-NEXT: v_readlane_b32 s8, v39, 6 238; GFX906-NEXT: v_readlane_b32 s10, v39, 4 239; GFX906-NEXT: v_readlane_b32 s16, v39, 22 240; GFX906-NEXT: v_readlane_b32 s12, v39, 3 241; GFX906-NEXT: v_mov_b32_e32 v31, v40 242; GFX906-NEXT: v_readlane_b32 s13, v39, 2 243; GFX906-NEXT: v_readlane_b32 s14, v39, 1 244; GFX906-NEXT: v_readlane_b32 s15, v39, 0 245; GFX906-NEXT: v_readlane_b32 s5, v39, 11 246; GFX906-NEXT: v_readlane_b32 s7, v39, 9 247; GFX906-NEXT: v_readlane_b32 s9, v39, 7 248; GFX906-NEXT: v_readlane_b32 s11, v39, 5 249; GFX906-NEXT: v_readlane_b32 s17, v39, 23 250; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17] 251; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 252; GFX906-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload 253; GFX906-NEXT: s_mov_b64 exec, s[34:35] 254; GFX906-NEXT: s_waitcnt vmcnt(0) 255; GFX906-NEXT: v_readlane_b32 s4, v39, 10 256; GFX906-NEXT: v_readlane_b32 s6, v39, 8 257; GFX906-NEXT: v_readlane_b32 s8, v39, 6 258; GFX906-NEXT: v_readlane_b32 s10, v39, 4 259; GFX906-NEXT: v_readlane_b32 s16, v39, 22 260; GFX906-NEXT: v_readlane_b32 s5, v39, 11 261; GFX906-NEXT: v_readlane_b32 s7, v39, 9 262; GFX906-NEXT: v_readlane_b32 s9, v39, 7 263; GFX906-NEXT: v_readlane_b32 s11, v39, 5 264; GFX906-NEXT: v_readlane_b32 s12, v39, 3 265; GFX906-NEXT: v_readlane_b32 s13, v39, 2 266; GFX906-NEXT: v_readlane_b32 s14, v39, 1 267; GFX906-NEXT: v_readlane_b32 s15, v39, 0 268; GFX906-NEXT: v_mov_b32_e32 v31, v40 269; GFX906-NEXT: v_readlane_b32 s17, v39, 23 270; GFX906-NEXT: v_readlane_b32 s21, v39, 12 271; GFX906-NEXT: ;;#ASMSTART 272; GFX906-NEXT: ; use s21 273; GFX906-NEXT: ;;#ASMEND 274; GFX906-NEXT: v_readlane_b32 s22, v39, 13 275; GFX906-NEXT: ;;#ASMSTART 276; GFX906-NEXT: ; use s22 277; GFX906-NEXT: ;;#ASMEND 278; GFX906-NEXT: v_readlane_b32 s23, v39, 14 279; GFX906-NEXT: ;;#ASMSTART 280; GFX906-NEXT: ; use s23 281; GFX906-NEXT: ;;#ASMEND 282; GFX906-NEXT: v_readlane_b32 s24, v39, 15 283; GFX906-NEXT: ;;#ASMSTART 284; GFX906-NEXT: ; use s24 285; GFX906-NEXT: ;;#ASMEND 286; GFX906-NEXT: v_readlane_b32 s25, v39, 16 287; GFX906-NEXT: ;;#ASMSTART 288; GFX906-NEXT: ; use s25 289; GFX906-NEXT: ;;#ASMEND 290; GFX906-NEXT: v_readlane_b32 s26, v39, 17 291; GFX906-NEXT: ;;#ASMSTART 292; GFX906-NEXT: ; use s26 293; GFX906-NEXT: ;;#ASMEND 294; GFX906-NEXT: v_readlane_b32 s27, v39, 18 295; GFX906-NEXT: ;;#ASMSTART 296; GFX906-NEXT: ; use s27 297; GFX906-NEXT: ;;#ASMEND 298; GFX906-NEXT: v_readlane_b32 s28, v39, 19 299; GFX906-NEXT: ;;#ASMSTART 300; GFX906-NEXT: ; use s28 301; GFX906-NEXT: ;;#ASMEND 302; GFX906-NEXT: v_readlane_b32 s29, v39, 20 303; GFX906-NEXT: ;;#ASMSTART 304; GFX906-NEXT: ; use s29 305; GFX906-NEXT: ;;#ASMEND 306; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17] 307; GFX906-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload 308; GFX906-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload 309; GFX906-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload 310; GFX906-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload 311; GFX906-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload 312; GFX906-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload 313; GFX906-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload 314; GFX906-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload 315; GFX906-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload 316; GFX906-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload 317; GFX906-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload 318; GFX906-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload 319; GFX906-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload 320; GFX906-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload 321; GFX906-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload 322; GFX906-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload 323; GFX906-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload 324; GFX906-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload 325; GFX906-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload 326; GFX906-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload 327; GFX906-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload 328; GFX906-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload 329; GFX906-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload 330; GFX906-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload 331; GFX906-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload 332; GFX906-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload 333; GFX906-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload 334; GFX906-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload 335; GFX906-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload 336; GFX906-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload 337; GFX906-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload 338; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload 339; GFX906-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload 340; GFX906-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload 341; GFX906-NEXT: v_readlane_b32 s31, v41, 1 342; GFX906-NEXT: v_readlane_b32 s30, v41, 0 343; GFX906-NEXT: s_mov_b32 s32, s33 344; GFX906-NEXT: v_readlane_b32 s4, v41, 4 345; GFX906-NEXT: v_readlane_b32 s34, v41, 2 346; GFX906-NEXT: v_readlane_b32 s35, v41, 3 347; GFX906-NEXT: s_waitcnt vmcnt(0) 348; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[30:33] offset:112 349; GFX906-NEXT: s_waitcnt vmcnt(0) 350; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[26:29] offset:96 351; GFX906-NEXT: s_waitcnt vmcnt(0) 352; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[22:25] offset:80 353; GFX906-NEXT: s_waitcnt vmcnt(0) 354; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[18:21] offset:64 355; GFX906-NEXT: s_waitcnt vmcnt(0) 356; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[14:17] offset:48 357; GFX906-NEXT: s_waitcnt vmcnt(0) 358; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[10:13] offset:32 359; GFX906-NEXT: s_waitcnt vmcnt(0) 360; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[6:9] offset:16 361; GFX906-NEXT: s_waitcnt vmcnt(0) 362; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 363; GFX906-NEXT: s_waitcnt vmcnt(0) 364; GFX906-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 365; GFX906-NEXT: s_xor_saveexec_b64 s[6:7], -1 366; GFX906-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload 367; GFX906-NEXT: s_mov_b64 exec, -1 368; GFX906-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload 369; GFX906-NEXT: s_mov_b64 exec, s[6:7] 370; GFX906-NEXT: s_mov_b32 s33, s4 371; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 372; GFX906-NEXT: s_setpc_b64 s[30:31] 373; 374; GFX908-LABEL: preserve_wwm_copy_dstreg: 375; GFX908: ; %bb.0: 376; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 377; GFX908-NEXT: s_mov_b32 s16, s33 378; GFX908-NEXT: s_mov_b32 s33, s32 379; GFX908-NEXT: s_xor_saveexec_b64 s[18:19], -1 380; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill 381; GFX908-NEXT: s_mov_b64 exec, s[18:19] 382; GFX908-NEXT: v_mov_b32_e32 v2, s16 383; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill 384; GFX908-NEXT: v_mov_b32_e32 v2, s34 385; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill 386; GFX908-NEXT: v_mov_b32_e32 v2, s35 387; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill 388; GFX908-NEXT: s_addk_i32 s32, 0x2c00 389; GFX908-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 390; GFX908-NEXT: s_mov_b64 s[16:17], exec 391; GFX908-NEXT: s_mov_b64 exec, 1 392; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:168 393; GFX908-NEXT: v_writelane_b32 v2, s30, 0 394; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 395; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:168 396; GFX908-NEXT: s_waitcnt vmcnt(0) 397; GFX908-NEXT: s_mov_b64 exec, s[16:17] 398; GFX908-NEXT: s_mov_b64 s[16:17], exec 399; GFX908-NEXT: s_mov_b64 exec, 1 400; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:168 401; GFX908-NEXT: v_writelane_b32 v2, s31, 0 402; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill 403; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:168 404; GFX908-NEXT: s_waitcnt vmcnt(0) 405; GFX908-NEXT: s_mov_b64 exec, s[16:17] 406; GFX908-NEXT: s_mov_b32 s21, s15 407; GFX908-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane 408; GFX908-NEXT: s_mov_b32 s22, s14 409; GFX908-NEXT: v_writelane_b32 v39, s21, 0 410; GFX908-NEXT: v_writelane_b32 v39, s22, 1 411; GFX908-NEXT: s_mov_b32 s23, s13 412; GFX908-NEXT: v_writelane_b32 v39, s23, 2 413; GFX908-NEXT: s_mov_b32 s24, s12 414; GFX908-NEXT: v_writelane_b32 v39, s24, 3 415; GFX908-NEXT: s_mov_b64 s[26:27], s[10:11] 416; GFX908-NEXT: v_writelane_b32 v39, s26, 4 417; GFX908-NEXT: v_writelane_b32 v39, s27, 5 418; GFX908-NEXT: v_writelane_b32 v39, s8, 6 419; GFX908-NEXT: v_writelane_b32 v39, s9, 7 420; GFX908-NEXT: v_writelane_b32 v39, s6, 8 421; GFX908-NEXT: v_writelane_b32 v39, s7, 9 422; GFX908-NEXT: v_writelane_b32 v39, s4, 10 423; GFX908-NEXT: v_mov_b32_e32 v32, v31 424; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill 425; GFX908-NEXT: s_nop 0 426; GFX908-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill 427; GFX908-NEXT: v_writelane_b32 v39, s5, 11 428; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 429; GFX908-NEXT: s_mov_b64 exec, s[34:35] 430; GFX908-NEXT: ;;#ASMSTART 431; GFX908-NEXT: ; def v[0:31] 432; GFX908-NEXT: ;;#ASMEND 433; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill 434; GFX908-NEXT: s_nop 0 435; GFX908-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill 436; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill 437; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill 438; GFX908-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill 439; GFX908-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill 440; GFX908-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill 441; GFX908-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill 442; GFX908-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill 443; GFX908-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill 444; GFX908-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill 445; GFX908-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill 446; GFX908-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill 447; GFX908-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill 448; GFX908-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill 449; GFX908-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill 450; GFX908-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill 451; GFX908-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill 452; GFX908-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill 453; GFX908-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill 454; GFX908-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill 455; GFX908-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill 456; GFX908-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill 457; GFX908-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill 458; GFX908-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill 459; GFX908-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill 460; GFX908-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill 461; GFX908-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill 462; GFX908-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill 463; GFX908-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill 464; GFX908-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill 465; GFX908-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill 466; GFX908-NEXT: ;;#ASMSTART 467; GFX908-NEXT: ; def v40 468; GFX908-NEXT: ;;#ASMEND 469; GFX908-NEXT: ;;#ASMSTART 470; GFX908-NEXT: ; def s11 471; GFX908-NEXT: ;;#ASMEND 472; GFX908-NEXT: v_writelane_b32 v39, s11, 12 473; GFX908-NEXT: ;;#ASMSTART 474; GFX908-NEXT: ; def s12 475; GFX908-NEXT: ;;#ASMEND 476; GFX908-NEXT: v_writelane_b32 v39, s12, 13 477; GFX908-NEXT: ;;#ASMSTART 478; GFX908-NEXT: ; def s13 479; GFX908-NEXT: ;;#ASMEND 480; GFX908-NEXT: v_writelane_b32 v39, s13, 14 481; GFX908-NEXT: ;;#ASMSTART 482; GFX908-NEXT: ; def s14 483; GFX908-NEXT: ;;#ASMEND 484; GFX908-NEXT: v_writelane_b32 v39, s14, 15 485; GFX908-NEXT: ;;#ASMSTART 486; GFX908-NEXT: ; def s15 487; GFX908-NEXT: ;;#ASMEND 488; GFX908-NEXT: v_writelane_b32 v39, s15, 16 489; GFX908-NEXT: s_getpc_b64 s[10:11] 490; GFX908-NEXT: s_add_u32 s10, s10, foo@gotpcrel32@lo+4 491; GFX908-NEXT: s_addc_u32 s11, s11, foo@gotpcrel32@hi+12 492; GFX908-NEXT: ;;#ASMSTART 493; GFX908-NEXT: ; def s16 494; GFX908-NEXT: ;;#ASMEND 495; GFX908-NEXT: v_writelane_b32 v39, s16, 17 496; GFX908-NEXT: s_load_dwordx2 s[10:11], s[10:11], 0x0 497; GFX908-NEXT: ;;#ASMSTART 498; GFX908-NEXT: ; def s17 499; GFX908-NEXT: ;;#ASMEND 500; GFX908-NEXT: v_writelane_b32 v39, s17, 18 501; GFX908-NEXT: ;;#ASMSTART 502; GFX908-NEXT: ; def s18 503; GFX908-NEXT: ;;#ASMEND 504; GFX908-NEXT: v_writelane_b32 v39, s18, 19 505; GFX908-NEXT: ;;#ASMSTART 506; GFX908-NEXT: ; def s19 507; GFX908-NEXT: ;;#ASMEND 508; GFX908-NEXT: v_writelane_b32 v39, s19, 20 509; GFX908-NEXT: ;;#ASMSTART 510; GFX908-NEXT: ; def s20 511; GFX908-NEXT: ;;#ASMEND 512; GFX908-NEXT: v_writelane_b32 v39, s20, 21 513; GFX908-NEXT: s_waitcnt lgkmcnt(0) 514; GFX908-NEXT: v_writelane_b32 v39, s10, 22 515; GFX908-NEXT: v_writelane_b32 v39, s11, 23 516; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 517; GFX908-NEXT: s_mov_b64 exec, s[34:35] 518; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 519; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill 520; GFX908-NEXT: s_mov_b64 exec, s[34:35] 521; GFX908-NEXT: v_readlane_b32 s16, v39, 22 522; GFX908-NEXT: s_mov_b32 s12, s24 523; GFX908-NEXT: s_mov_b32 s13, s23 524; GFX908-NEXT: s_mov_b32 s14, s22 525; GFX908-NEXT: v_mov_b32_e32 v31, v32 526; GFX908-NEXT: s_mov_b32 s15, s21 527; GFX908-NEXT: s_mov_b64 s[10:11], s[26:27] 528; GFX908-NEXT: v_readlane_b32 s17, v39, 23 529; GFX908-NEXT: v_mov_b32_e32 v40, v32 530; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] 531; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 532; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload 533; GFX908-NEXT: s_mov_b64 exec, s[34:35] 534; GFX908-NEXT: s_waitcnt vmcnt(0) 535; GFX908-NEXT: v_readlane_b32 s11, v39, 12 536; GFX908-NEXT: ;;#ASMSTART 537; GFX908-NEXT: ; use s11 538; GFX908-NEXT: ;;#ASMEND 539; GFX908-NEXT: v_readlane_b32 s12, v39, 13 540; GFX908-NEXT: ;;#ASMSTART 541; GFX908-NEXT: ; use s12 542; GFX908-NEXT: ;;#ASMEND 543; GFX908-NEXT: v_readlane_b32 s13, v39, 14 544; GFX908-NEXT: ;;#ASMSTART 545; GFX908-NEXT: ; use s13 546; GFX908-NEXT: ;;#ASMEND 547; GFX908-NEXT: v_readlane_b32 s14, v39, 15 548; GFX908-NEXT: ;;#ASMSTART 549; GFX908-NEXT: ; use s14 550; GFX908-NEXT: ;;#ASMEND 551; GFX908-NEXT: v_readlane_b32 s15, v39, 16 552; GFX908-NEXT: ;;#ASMSTART 553; GFX908-NEXT: ; use s15 554; GFX908-NEXT: ;;#ASMEND 555; GFX908-NEXT: v_readlane_b32 s16, v39, 17 556; GFX908-NEXT: ;;#ASMSTART 557; GFX908-NEXT: ; use s16 558; GFX908-NEXT: ;;#ASMEND 559; GFX908-NEXT: v_readlane_b32 s17, v39, 18 560; GFX908-NEXT: ;;#ASMSTART 561; GFX908-NEXT: ; use s17 562; GFX908-NEXT: ;;#ASMEND 563; GFX908-NEXT: v_readlane_b32 s18, v39, 19 564; GFX908-NEXT: ;;#ASMSTART 565; GFX908-NEXT: ; use s18 566; GFX908-NEXT: ;;#ASMEND 567; GFX908-NEXT: v_readlane_b32 s19, v39, 20 568; GFX908-NEXT: ;;#ASMSTART 569; GFX908-NEXT: ; use s19 570; GFX908-NEXT: ;;#ASMEND 571; GFX908-NEXT: v_readlane_b32 s20, v39, 21 572; GFX908-NEXT: ;;#ASMSTART 573; GFX908-NEXT: ; use s20 574; GFX908-NEXT: ;;#ASMEND 575; GFX908-NEXT: ;;#ASMSTART 576; GFX908-NEXT: ; def s21 577; GFX908-NEXT: ;;#ASMEND 578; GFX908-NEXT: v_writelane_b32 v39, s21, 12 579; GFX908-NEXT: ;;#ASMSTART 580; GFX908-NEXT: ; def s22 581; GFX908-NEXT: ;;#ASMEND 582; GFX908-NEXT: v_writelane_b32 v39, s22, 13 583; GFX908-NEXT: ;;#ASMSTART 584; GFX908-NEXT: ; def s23 585; GFX908-NEXT: ;;#ASMEND 586; GFX908-NEXT: v_writelane_b32 v39, s23, 14 587; GFX908-NEXT: ;;#ASMSTART 588; GFX908-NEXT: ; def s24 589; GFX908-NEXT: ;;#ASMEND 590; GFX908-NEXT: v_writelane_b32 v39, s24, 15 591; GFX908-NEXT: ;;#ASMSTART 592; GFX908-NEXT: ; def s25 593; GFX908-NEXT: ;;#ASMEND 594; GFX908-NEXT: v_writelane_b32 v39, s25, 16 595; GFX908-NEXT: ;;#ASMSTART 596; GFX908-NEXT: ; def s26 597; GFX908-NEXT: ;;#ASMEND 598; GFX908-NEXT: v_writelane_b32 v39, s26, 17 599; GFX908-NEXT: ;;#ASMSTART 600; GFX908-NEXT: ; def s27 601; GFX908-NEXT: ;;#ASMEND 602; GFX908-NEXT: v_writelane_b32 v39, s27, 18 603; GFX908-NEXT: ;;#ASMSTART 604; GFX908-NEXT: ; def s28 605; GFX908-NEXT: ;;#ASMEND 606; GFX908-NEXT: v_writelane_b32 v39, s28, 19 607; GFX908-NEXT: ;;#ASMSTART 608; GFX908-NEXT: ; def s29 609; GFX908-NEXT: ;;#ASMEND 610; GFX908-NEXT: v_writelane_b32 v39, s29, 20 611; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 612; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill 613; GFX908-NEXT: s_mov_b64 exec, s[34:35] 614; GFX908-NEXT: v_readlane_b32 s4, v39, 10 615; GFX908-NEXT: v_readlane_b32 s6, v39, 8 616; GFX908-NEXT: v_readlane_b32 s8, v39, 6 617; GFX908-NEXT: v_readlane_b32 s10, v39, 4 618; GFX908-NEXT: v_readlane_b32 s16, v39, 22 619; GFX908-NEXT: v_readlane_b32 s12, v39, 3 620; GFX908-NEXT: v_mov_b32_e32 v31, v40 621; GFX908-NEXT: v_readlane_b32 s13, v39, 2 622; GFX908-NEXT: v_readlane_b32 s14, v39, 1 623; GFX908-NEXT: v_readlane_b32 s15, v39, 0 624; GFX908-NEXT: v_readlane_b32 s5, v39, 11 625; GFX908-NEXT: v_readlane_b32 s7, v39, 9 626; GFX908-NEXT: v_readlane_b32 s9, v39, 7 627; GFX908-NEXT: v_readlane_b32 s11, v39, 5 628; GFX908-NEXT: v_readlane_b32 s17, v39, 23 629; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] 630; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 631; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload 632; GFX908-NEXT: s_mov_b64 exec, s[34:35] 633; GFX908-NEXT: s_waitcnt vmcnt(0) 634; GFX908-NEXT: v_readlane_b32 s4, v39, 10 635; GFX908-NEXT: v_readlane_b32 s6, v39, 8 636; GFX908-NEXT: v_readlane_b32 s8, v39, 6 637; GFX908-NEXT: v_readlane_b32 s10, v39, 4 638; GFX908-NEXT: v_readlane_b32 s16, v39, 22 639; GFX908-NEXT: v_readlane_b32 s5, v39, 11 640; GFX908-NEXT: v_readlane_b32 s7, v39, 9 641; GFX908-NEXT: v_readlane_b32 s9, v39, 7 642; GFX908-NEXT: v_readlane_b32 s11, v39, 5 643; GFX908-NEXT: v_readlane_b32 s12, v39, 3 644; GFX908-NEXT: v_readlane_b32 s13, v39, 2 645; GFX908-NEXT: v_readlane_b32 s14, v39, 1 646; GFX908-NEXT: v_readlane_b32 s15, v39, 0 647; GFX908-NEXT: v_mov_b32_e32 v31, v40 648; GFX908-NEXT: v_readlane_b32 s17, v39, 23 649; GFX908-NEXT: v_readlane_b32 s21, v39, 12 650; GFX908-NEXT: ;;#ASMSTART 651; GFX908-NEXT: ; use s21 652; GFX908-NEXT: ;;#ASMEND 653; GFX908-NEXT: v_readlane_b32 s22, v39, 13 654; GFX908-NEXT: ;;#ASMSTART 655; GFX908-NEXT: ; use s22 656; GFX908-NEXT: ;;#ASMEND 657; GFX908-NEXT: v_readlane_b32 s23, v39, 14 658; GFX908-NEXT: ;;#ASMSTART 659; GFX908-NEXT: ; use s23 660; GFX908-NEXT: ;;#ASMEND 661; GFX908-NEXT: v_readlane_b32 s24, v39, 15 662; GFX908-NEXT: ;;#ASMSTART 663; GFX908-NEXT: ; use s24 664; GFX908-NEXT: ;;#ASMEND 665; GFX908-NEXT: v_readlane_b32 s25, v39, 16 666; GFX908-NEXT: ;;#ASMSTART 667; GFX908-NEXT: ; use s25 668; GFX908-NEXT: ;;#ASMEND 669; GFX908-NEXT: v_readlane_b32 s26, v39, 17 670; GFX908-NEXT: ;;#ASMSTART 671; GFX908-NEXT: ; use s26 672; GFX908-NEXT: ;;#ASMEND 673; GFX908-NEXT: v_readlane_b32 s27, v39, 18 674; GFX908-NEXT: ;;#ASMSTART 675; GFX908-NEXT: ; use s27 676; GFX908-NEXT: ;;#ASMEND 677; GFX908-NEXT: v_readlane_b32 s28, v39, 19 678; GFX908-NEXT: ;;#ASMSTART 679; GFX908-NEXT: ; use s28 680; GFX908-NEXT: ;;#ASMEND 681; GFX908-NEXT: v_readlane_b32 s29, v39, 20 682; GFX908-NEXT: ;;#ASMSTART 683; GFX908-NEXT: ; use s29 684; GFX908-NEXT: ;;#ASMEND 685; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] 686; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload 687; GFX908-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload 688; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload 689; GFX908-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload 690; GFX908-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload 691; GFX908-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload 692; GFX908-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload 693; GFX908-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload 694; GFX908-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload 695; GFX908-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload 696; GFX908-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload 697; GFX908-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload 698; GFX908-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload 699; GFX908-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload 700; GFX908-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload 701; GFX908-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload 702; GFX908-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload 703; GFX908-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload 704; GFX908-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload 705; GFX908-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload 706; GFX908-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload 707; GFX908-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload 708; GFX908-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload 709; GFX908-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload 710; GFX908-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload 711; GFX908-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload 712; GFX908-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload 713; GFX908-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload 714; GFX908-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload 715; GFX908-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload 716; GFX908-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload 717; GFX908-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload 718; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload 719; GFX908-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload 720; GFX908-NEXT: s_mov_b64 s[4:5], exec 721; GFX908-NEXT: s_waitcnt vmcnt(0) 722; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[30:33] offset:112 723; GFX908-NEXT: s_waitcnt vmcnt(0) 724; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[26:29] offset:96 725; GFX908-NEXT: s_waitcnt vmcnt(0) 726; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[22:25] offset:80 727; GFX908-NEXT: s_waitcnt vmcnt(0) 728; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[18:21] offset:64 729; GFX908-NEXT: s_waitcnt vmcnt(0) 730; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[14:17] offset:48 731; GFX908-NEXT: s_waitcnt vmcnt(0) 732; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[10:13] offset:32 733; GFX908-NEXT: s_waitcnt vmcnt(0) 734; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[6:9] offset:16 735; GFX908-NEXT: s_waitcnt vmcnt(0) 736; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 737; GFX908-NEXT: s_waitcnt vmcnt(0) 738; GFX908-NEXT: s_mov_b64 exec, 1 739; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:168 740; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload 741; GFX908-NEXT: s_waitcnt vmcnt(0) 742; GFX908-NEXT: v_readlane_b32 s31, v0, 0 743; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:168 744; GFX908-NEXT: s_waitcnt vmcnt(0) 745; GFX908-NEXT: s_mov_b64 exec, s[4:5] 746; GFX908-NEXT: s_mov_b64 s[4:5], exec 747; GFX908-NEXT: s_mov_b64 exec, 1 748; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:168 749; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload 750; GFX908-NEXT: s_waitcnt vmcnt(0) 751; GFX908-NEXT: v_readlane_b32 s30, v0, 0 752; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:168 753; GFX908-NEXT: s_waitcnt vmcnt(0) 754; GFX908-NEXT: s_mov_b64 exec, s[4:5] 755; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload 756; GFX908-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 757; GFX908-NEXT: s_mov_b32 s32, s33 758; GFX908-NEXT: s_waitcnt vmcnt(0) 759; GFX908-NEXT: v_readfirstlane_b32 s4, v0 760; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload 761; GFX908-NEXT: s_waitcnt vmcnt(0) 762; GFX908-NEXT: v_readfirstlane_b32 s34, v0 763; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload 764; GFX908-NEXT: s_waitcnt vmcnt(0) 765; GFX908-NEXT: v_readfirstlane_b32 s35, v0 766; GFX908-NEXT: s_xor_saveexec_b64 s[6:7], -1 767; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload 768; GFX908-NEXT: s_mov_b64 exec, s[6:7] 769; GFX908-NEXT: s_mov_b32 s33, s4 770; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 771; GFX908-NEXT: s_setpc_b64 s[30:31] 772 %vreg0 = call <32 x float> asm sideeffect "; def $0", "=v"() 773 %v40 = call i32 asm sideeffect "; def $0","=${v40}"() 774 775 %s11 = call i32 asm sideeffect "; def $0","=${s11}"() 776 %s12 = call i32 asm sideeffect "; def $0","=${s12}"() 777 %s13 = call i32 asm sideeffect "; def $0","=${s13}"() 778 %s14 = call i32 asm sideeffect "; def $0","=${s14}"() 779 %s15 = call i32 asm sideeffect "; def $0","=${s15}"() 780 %s16 = call i32 asm sideeffect "; def $0","=${s16}"() 781 %s17 = call i32 asm sideeffect "; def $0","=${s17}"() 782 %s18 = call i32 asm sideeffect "; def $0","=${s18}"() 783 %s19 = call i32 asm sideeffect "; def $0","=${s19}"() 784 %s20 = call i32 asm sideeffect "; def $0","=${s20}"() 785 call void @foo() 786 call void asm sideeffect "; use $0","${s11}"(i32 %s11) 787 call void asm sideeffect "; use $0","${s12}"(i32 %s12) 788 call void asm sideeffect "; use $0","${s13}"(i32 %s13) 789 call void asm sideeffect "; use $0","${s14}"(i32 %s14) 790 call void asm sideeffect "; use $0","${s15}"(i32 %s15) 791 call void asm sideeffect "; use $0","${s16}"(i32 %s16) 792 call void asm sideeffect "; use $0","${s17}"(i32 %s17) 793 call void asm sideeffect "; use $0","${s18}"(i32 %s18) 794 call void asm sideeffect "; use $0","${s19}"(i32 %s19) 795 call void asm sideeffect "; use $0","${s20}"(i32 %s20) 796 797 %s21 = call i32 asm sideeffect "; def $0","=${s21}"() 798 %s22 = call i32 asm sideeffect "; def $0","=${s22}"() 799 %s23 = call i32 asm sideeffect "; def $0","=${s23}"() 800 %s24 = call i32 asm sideeffect "; def $0","=${s24}"() 801 %s25 = call i32 asm sideeffect "; def $0","=${s25}"() 802 %s26 = call i32 asm sideeffect "; def $0","=${s26}"() 803 %s27 = call i32 asm sideeffect "; def $0","=${s27}"() 804 %s28 = call i32 asm sideeffect "; def $0","=${s28}"() 805 %s29 = call i32 asm sideeffect "; def $0","=${s29}"() 806 call void @foo() 807 call void asm sideeffect "; use $0","${s21}"(i32 %s21) 808 call void asm sideeffect "; use $0","${s22}"(i32 %s22) 809 call void asm sideeffect "; use $0","${s23}"(i32 %s23) 810 call void asm sideeffect "; use $0","${s24}"(i32 %s24) 811 call void asm sideeffect "; use $0","${s25}"(i32 %s25) 812 call void asm sideeffect "; use $0","${s26}"(i32 %s26) 813 call void asm sideeffect "; use $0","${s27}"(i32 %s27) 814 call void asm sideeffect "; use $0","${s28}"(i32 %s28) 815 call void asm sideeffect "; use $0","${s29}"(i32 %s29) 816 817 call void @foo() 818 819 store volatile <32 x float> %vreg0, ptr %parg0 820 821 ret void 822} 823 824declare void @foo() 825 826attributes #0 = { "amdgpu-num-vgpr"="42" "amdgpu-num-sgpr"="40"} 827 828!llvm.module.flags = !{!0} 829!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} 830