1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GCN,MUBUF 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 -mattr=+enable-flat-scratch < %s | FileCheck %s -check-prefixes=GCN,FLATSCR 3 4; GCN-LABEL: spill_v2i32: 5; MUBUF-DAG: buffer_store_dword v{{.*}} offset:16 ; 4-byte Folded Spill 6; MUBUF-DAG: buffer_store_dword v{{.*}} offset:20 ; 4-byte Folded Spill 7; FLATSCR: scratch_store_dwordx2 off, v{{.*}} offset:16 ; 8-byte Folded Spill 8; FLATSCR-NOT: scratch_store_dword 9; GCN: ;;#ASMSTART 10; GCN-NEXT: ;;#ASMEND 11; MUBUF-DAG: buffer_load_dword v{{.*}} offset:16 ; 4-byte Folded Reload 12; MUBUF-DAG: buffer_load_dword v{{.*}} offset:20 ; 4-byte Folded Reload 13; FLATSCR: scratch_load_dwordx2 v{{.*}} offset:16 ; 8-byte Folded Reload 14; FLATSCR-NOT: scratch_load_dword 15 16define void @spill_v2i32() { 17entry: 18 %alloca = alloca <2 x i32>, i32 2, align 4, addrspace(5) 19 20 %aptr = getelementptr <2 x i32>, ptr addrspace(5) %alloca, i32 1 21 %a = load volatile <2 x i32>, ptr addrspace(5) %aptr 22 23 ; Force %a to spill. 24 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () 25 26 %outptr = getelementptr <2 x i32>, ptr addrspace(5) %alloca, i32 1 27 store volatile <2 x i32> %a, ptr addrspace(5) %outptr 28 29 ret void 30} 31 32; GCN-LABEL: spill_v2f32: 33; MUBUF-DAG: buffer_store_dword v{{.*}} offset:16 ; 4-byte Folded Spill 34; MUBUF-DAG: buffer_store_dword v{{.*}} offset:20 ; 4-byte Folded Spill 35; FLATSCR: scratch_store_dwordx2 off, v{{.*}} offset:16 ; 8-byte Folded Spill 36; FLATSCR-NOT: scratch_store_dword 37; GCN: ;;#ASMSTART 38; GCN-NEXT: ;;#ASMEND 39; MUBUF-DAG: buffer_load_dword v{{.*}} offset:16 ; 4-byte Folded Reload 40; MUBUF-DAG: buffer_load_dword v{{.*}} offset:20 ; 4-byte Folded Reload 41; FLATSCR: scratch_load_dwordx2 v{{.*}} offset:16 ; 8-byte Folded Reload 42; FLATSCR-NOT: scratch_load_dword 43 44define void @spill_v2f32() { 45entry: 46 %alloca = alloca <2 x i32>, i32 2, align 4, addrspace(5) 47 48 %aptr = getelementptr <2 x i32>, ptr addrspace(5) %alloca, i32 1 49 %a = load volatile <2 x i32>, ptr addrspace(5) %aptr 50 51 ; Force %a to spill. 52 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () 53 54 %outptr = getelementptr <2 x i32>, ptr addrspace(5) %alloca, i32 1 55 store volatile <2 x i32> %a, ptr addrspace(5) %outptr 56 57 ret void 58} 59 60; GCN-LABEL: spill_v3i32: 61; MUBUF-DAG: buffer_store_dword v{{.*}} offset:32 ; 4-byte Folded Spill 62; MUBUF-DAG: buffer_store_dword v{{.*}} offset:36 ; 4-byte Folded Spill 63; MUBUF-DAG: buffer_store_dword v{{.*}} offset:40 ; 4-byte Folded Spill 64; FLATSCR: scratch_store_dwordx3 off, v{{.*}} offset:32 ; 12-byte Folded Spill 65; FLATSCR-NOT: scratch_store_dword 66; GCN: ;;#ASMSTART 67; GCN-NEXT: ;;#ASMEND 68; MUBUF-DAG: buffer_load_dword v{{.*}} offset:32 ; 4-byte Folded Reload 69; MUBUF-DAG: buffer_load_dword v{{.*}} offset:36 ; 4-byte Folded Reload 70; MUBUF-DAG: buffer_load_dword v{{.*}} offset:40 ; 4-byte Folded Reload 71; FLATSCR: scratch_load_dwordx3 v{{.*}} offset:32 ; 12-byte Folded Reload 72; FLATSCR-NOT: scratch_load_dword 73 74define void @spill_v3i32() { 75entry: 76 %alloca = alloca <3 x i32>, i32 2, align 4, addrspace(5) 77 78 %aptr = getelementptr <3 x i32>, ptr addrspace(5) %alloca, i32 1 79 %a = load volatile <3 x i32>, ptr addrspace(5) %aptr 80 81 ; Force %a to spill. 82 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () 83 84 %outptr = getelementptr <3 x i32>, ptr addrspace(5) %alloca, i32 1 85 store volatile <3 x i32> %a, ptr addrspace(5) %outptr 86 87 ret void 88} 89 90; GCN-LABEL: spill_v3f32: 91; MUBUF-DAG: buffer_store_dword v{{.*}} offset:32 ; 4-byte Folded Spill 92; MUBUF-DAG: buffer_store_dword v{{.*}} offset:36 ; 4-byte Folded Spill 93; MUBUF-DAG: buffer_store_dword v{{.*}} offset:40 ; 4-byte Folded Spill 94; FLATSCR: scratch_store_dwordx3 off, v{{.*}} offset:32 ; 12-byte Folded Spill 95; FLATSCR-NOT: scratch_store_dword 96; GCN: ;;#ASMSTART 97; GCN-NEXT: ;;#ASMEND 98; MUBUF-DAG: buffer_load_dword v{{.*}} offset:32 ; 4-byte Folded Reload 99; MUBUF-DAG: buffer_load_dword v{{.*}} offset:36 ; 4-byte Folded Reload 100; MUBUF-DAG: buffer_load_dword v{{.*}} offset:40 ; 4-byte Folded Reload 101; FLATSCR: scratch_load_dwordx3 v{{.*}} offset:32 ; 12-byte Folded Reload 102; FLATSCR-NOT: scratch_load_dword 103 104define void @spill_v3f32() { 105entry: 106 %alloca = alloca <3 x i32>, i32 2, align 4, addrspace(5) 107 108 %aptr = getelementptr <3 x i32>, ptr addrspace(5) %alloca, i32 1 109 %a = load volatile <3 x i32>, ptr addrspace(5) %aptr 110 111 ; Force %a to spill. 112 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () 113 114 %outptr = getelementptr <3 x i32>, ptr addrspace(5) %alloca, i32 1 115 store volatile <3 x i32> %a, ptr addrspace(5) %outptr 116 117 ret void 118} 119 120; GCN-LABEL: spill_v4i32: 121; MUBUF-DAG: buffer_store_dword v{{.*}} offset:32 ; 4-byte Folded Spill 122; MUBUF-DAG: buffer_store_dword v{{.*}} offset:36 ; 4-byte Folded Spill 123; MUBUF-DAG: buffer_store_dword v{{.*}} offset:40 ; 4-byte Folded Spill 124; MUBUF-DAG: buffer_store_dword v{{.*}} offset:44 ; 4-byte Folded Spill 125; FLATSCR: scratch_store_dwordx4 off, v{{.*}} offset:32 ; 16-byte Folded Spill 126; FLATSCR-NOT: scratch_store_dword 127; GCN: ;;#ASMSTART 128; GCN-NEXT: ;;#ASMEND 129; MUBUF-DAG: buffer_load_dword v{{.*}} offset:32 ; 4-byte Folded Reload 130; MUBUF-DAG: buffer_load_dword v{{.*}} offset:36 ; 4-byte Folded Reload 131; MUBUF-DAG: buffer_load_dword v{{.*}} offset:40 ; 4-byte Folded Reload 132; MUBUF-DAG: buffer_load_dword v{{.*}} offset:44 ; 4-byte Folded Reload 133; FLATSCR: scratch_load_dwordx4 v{{.*}} offset:32 ; 16-byte Folded Reload 134; FLATSCR-NOT: scratch_load_dword 135 136define void @spill_v4i32() { 137entry: 138 %alloca = alloca <4 x i32>, i32 2, align 4, addrspace(5) 139 140 %aptr = getelementptr <4 x i32>, ptr addrspace(5) %alloca, i32 1 141 %a = load volatile <4 x i32>, ptr addrspace(5) %aptr 142 143 ; Force %a to spill. 144 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () 145 146 %outptr = getelementptr <4 x i32>, ptr addrspace(5) %alloca, i32 1 147 store volatile <4 x i32> %a, ptr addrspace(5) %outptr 148 149 ret void 150} 151 152; GCN-LABEL: spill_v4f32: 153; MUBUF-DAG: buffer_store_dword v{{.*}} offset:32 ; 4-byte Folded Spill 154; MUBUF-DAG: buffer_store_dword v{{.*}} offset:36 ; 4-byte Folded Spill 155; MUBUF-DAG: buffer_store_dword v{{.*}} offset:40 ; 4-byte Folded Spill 156; MUBUF-DAG: buffer_store_dword v{{.*}} offset:44 ; 4-byte Folded Spill 157; FLATSCR: scratch_store_dwordx4 off, v{{.*}} offset:32 ; 16-byte Folded Spill 158; FLATSCR-NOT: scratch_store_dword 159; GCN: ;;#ASMSTART 160; GCN-NEXT: ;;#ASMEND 161; MUBUF-DAG: buffer_load_dword v{{.*}} offset:32 ; 4-byte Folded Reload 162; MUBUF-DAG: buffer_load_dword v{{.*}} offset:36 ; 4-byte Folded Reload 163; MUBUF-DAG: buffer_load_dword v{{.*}} offset:40 ; 4-byte Folded Reload 164; MUBUF-DAG: buffer_load_dword v{{.*}} offset:44 ; 4-byte Folded Reload 165; FLATSCR: scratch_load_dwordx4 v{{.*}} offset:32 ; 16-byte Folded Reload 166; FLATSCR-NOT: scratch_load_dword 167 168define void @spill_v4f32() { 169entry: 170 %alloca = alloca <4 x i32>, i32 2, align 4, addrspace(5) 171 172 %aptr = getelementptr <4 x i32>, ptr addrspace(5) %alloca, i32 1 173 %a = load volatile <4 x i32>, ptr addrspace(5) %aptr 174 175 ; Force %a to spill. 176 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () 177 178 %outptr = getelementptr <4 x i32>, ptr addrspace(5) %alloca, i32 1 179 store volatile <4 x i32> %a, ptr addrspace(5) %outptr 180 181 ret void 182} 183 184; GCN-LABEL: spill_v5i32: 185; MUBUF-DAG: buffer_store_dword v{{.*}} offset:64 ; 4-byte Folded Spill 186; MUBUF-DAG: buffer_store_dword v{{.*}} offset:68 ; 4-byte Folded Spill 187; MUBUF-DAG: buffer_store_dword v{{.*}} offset:72 ; 4-byte Folded Spill 188; MUBUF-DAG: buffer_store_dword v{{.*}} offset:76 ; 4-byte Folded Spill 189; FLATSCR-DAG: scratch_store_dwordx4 off, v{{.*}} offset:64 ; 16-byte Folded Spill 190; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:80 ; 4-byte Folded Spill 191; FLATSCR-NOT: scratch_store_dword 192; GCN: ;;#ASMSTART 193; GCN-NEXT: ;;#ASMEND 194; MUBUF-DAG: buffer_load_dword v{{.*}} offset:64 ; 4-byte Folded Reload 195; MUBUF-DAG: buffer_load_dword v{{.*}} offset:68 ; 4-byte Folded Reload 196; MUBUF-DAG: buffer_load_dword v{{.*}} offset:72 ; 4-byte Folded Reload 197; MUBUF-DAG: buffer_load_dword v{{.*}} offset:76 ; 4-byte Folded Reload 198; FLATSCR-DAG: scratch_load_dwordx4 v{{.*}} offset:64 ; 16-byte Folded Reload 199; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:80 ; 4-byte Folded Reload 200; FLATSCR-NOT: scratch_load_dword 201define void @spill_v5i32() { 202entry: 203 %alloca = alloca <5 x i32>, i32 2, align 4, addrspace(5) 204 205 %aptr = getelementptr <5 x i32>, ptr addrspace(5) %alloca, i32 1 206 %a = load volatile <5 x i32>, ptr addrspace(5) %aptr 207 208 ; Force %a to spill. 209 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () 210 211 %outptr = getelementptr <5 x i32>, ptr addrspace(5) %alloca, i32 1 212 store volatile <5 x i32> %a, ptr addrspace(5) %outptr 213 214 ret void 215} 216 217; GCN-LABEL: spill_v5f32: 218; MUBUF-DAG: buffer_store_dword v{{.*}} offset:64 ; 4-byte Folded Spill 219; MUBUF-DAG: buffer_store_dword v{{.*}} offset:68 ; 4-byte Folded Spill 220; MUBUF-DAG: buffer_store_dword v{{.*}} offset:72 ; 4-byte Folded Spill 221; MUBUF-DAG: buffer_store_dword v{{.*}} offset:76 ; 4-byte Folded Spill 222; FLATSCR-DAG: scratch_store_dwordx4 off, v{{.*}} offset:64 ; 16-byte Folded Spill 223; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:80 ; 4-byte Folded Spill 224; FLATSCR-NOT: scratch_store_dword 225; GCN: ;;#ASMSTART 226; GCN-NEXT: ;;#ASMEND 227; MUBUF-DAG: buffer_load_dword v{{.*}} offset:64 ; 4-byte Folded Reload 228; MUBUF-DAG: buffer_load_dword v{{.*}} offset:68 ; 4-byte Folded Reload 229; MUBUF-DAG: buffer_load_dword v{{.*}} offset:72 ; 4-byte Folded Reload 230; MUBUF-DAG: buffer_load_dword v{{.*}} offset:76 ; 4-byte Folded Reload 231; FLATSCR-DAG: scratch_load_dwordx4 v{{.*}} offset:64 ; 16-byte Folded Reload 232; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:80 ; 4-byte Folded Reload 233; FLATSCR-NOT: scratch_load_dword 234define void @spill_v5f32() { 235entry: 236 %alloca = alloca <5 x i32>, i32 2, align 4, addrspace(5) 237 238 %aptr = getelementptr <5 x i32>, ptr addrspace(5) %alloca, i32 1 239 %a = load volatile <5 x i32>, ptr addrspace(5) %aptr 240 241 ; Force %a to spill. 242 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () 243 244 %outptr = getelementptr <5 x i32>, ptr addrspace(5) %alloca, i32 1 245 store volatile <5 x i32> %a, ptr addrspace(5) %outptr 246 247 ret void 248} 249