1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX7 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 6 7define amdgpu_ps float @test_writelane_s_s_s(i32 inreg %data, i32 inreg %lane, i32 inreg %vdst.in) #0 { 8; GFX7-LABEL: test_writelane_s_s_s: 9; GFX7: ; %bb.0: 10; GFX7-NEXT: v_mov_b32_e32 v0, s4 11; GFX7-NEXT: s_mov_b32 m0, s3 12; GFX7-NEXT: v_writelane_b32 v0, s2, m0 13; GFX7-NEXT: ; return to shader part epilog 14; 15; GFX8-LABEL: test_writelane_s_s_s: 16; GFX8: ; %bb.0: 17; GFX8-NEXT: v_mov_b32_e32 v0, s4 18; GFX8-NEXT: s_mov_b32 m0, s3 19; GFX8-NEXT: v_writelane_b32 v0, s2, m0 20; GFX8-NEXT: ; return to shader part epilog 21; 22; GFX10-LABEL: test_writelane_s_s_s: 23; GFX10: ; %bb.0: 24; GFX10-NEXT: v_mov_b32_e32 v0, s4 25; GFX10-NEXT: v_writelane_b32 v0, s2, s3 26; GFX10-NEXT: ; return to shader part epilog 27 %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 %lane, i32 %vdst.in) 28 %writelane.cast = bitcast i32 %writelane to float 29 ret float %writelane.cast 30} 31 32define amdgpu_ps float @test_writelane_s_s_imm(i32 inreg %data, i32 inreg %lane) #0 { 33; GFX7-LABEL: test_writelane_s_s_imm: 34; GFX7: ; %bb.0: 35; GFX7-NEXT: v_mov_b32_e32 v0, 42 36; GFX7-NEXT: s_mov_b32 m0, s3 37; GFX7-NEXT: v_writelane_b32 v0, s2, m0 38; GFX7-NEXT: ; return to shader part epilog 39; 40; GFX8-LABEL: test_writelane_s_s_imm: 41; GFX8: ; %bb.0: 42; GFX8-NEXT: v_mov_b32_e32 v0, 42 43; GFX8-NEXT: s_mov_b32 m0, s3 44; GFX8-NEXT: v_writelane_b32 v0, s2, m0 45; GFX8-NEXT: ; return to shader part epilog 46; 47; GFX10-LABEL: test_writelane_s_s_imm: 48; GFX10: ; %bb.0: 49; GFX10-NEXT: v_mov_b32_e32 v0, 42 50; GFX10-NEXT: v_writelane_b32 v0, s2, s3 51; GFX10-NEXT: ; return to shader part epilog 52 %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 %lane, i32 42) 53 %writelane.cast = bitcast i32 %writelane to float 54 ret float %writelane.cast 55} 56 57; data is not inline imm 58define amdgpu_ps float @test_writelane_k_s_v(i32 inreg %lane, i32 %vdst.in) #0 { 59; GFX7-LABEL: test_writelane_k_s_v: 60; GFX7: ; %bb.0: 61; GFX7-NEXT: s_movk_i32 s0, 0x3e7 62; GFX7-NEXT: s_mov_b32 m0, s2 63; GFX7-NEXT: v_writelane_b32 v0, s0, m0 64; GFX7-NEXT: ; return to shader part epilog 65; 66; GFX8-LABEL: test_writelane_k_s_v: 67; GFX8: ; %bb.0: 68; GFX8-NEXT: s_movk_i32 s0, 0x3e7 69; GFX8-NEXT: s_mov_b32 m0, s2 70; GFX8-NEXT: v_writelane_b32 v0, s0, m0 71; GFX8-NEXT: ; return to shader part epilog 72; 73; GFX10-LABEL: test_writelane_k_s_v: 74; GFX10: ; %bb.0: 75; GFX10-NEXT: s_movk_i32 s0, 0x3e7 76; GFX10-NEXT: v_writelane_b32 v0, s0, s2 77; GFX10-NEXT: ; return to shader part epilog 78 %writelane = call i32 @llvm.amdgcn.writelane(i32 999, i32 %lane, i32 %vdst.in) 79 %writelane.cast = bitcast i32 %writelane to float 80 ret float %writelane.cast 81} 82 83; Data is inline imm 84define amdgpu_ps float @test_writelane_imm_s_v(i32 inreg %lane, i32 %vdst.in) #0 { 85; GFX7-LABEL: test_writelane_imm_s_v: 86; GFX7: ; %bb.0: 87; GFX7-NEXT: v_writelane_b32 v0, 42, s2 88; GFX7-NEXT: ; return to shader part epilog 89; 90; GFX8-LABEL: test_writelane_imm_s_v: 91; GFX8: ; %bb.0: 92; GFX8-NEXT: v_writelane_b32 v0, 42, s2 93; GFX8-NEXT: ; return to shader part epilog 94; 95; GFX10-LABEL: test_writelane_imm_s_v: 96; GFX10: ; %bb.0: 97; GFX10-NEXT: v_writelane_b32 v0, 42, s2 98; GFX10-NEXT: ; return to shader part epilog 99 %writelane = call i32 @llvm.amdgcn.writelane(i32 42, i32 %lane, i32 %vdst.in) 100 %writelane.cast = bitcast i32 %writelane to float 101 ret float %writelane.cast 102} 103 104; Data is subtarget dependent inline imm 105define amdgpu_ps float @test_writelane_imminv2pi_s_v(i32 inreg %lane, i32 %vdst.in) #0 { 106; GFX7-LABEL: test_writelane_imminv2pi_s_v: 107; GFX7: ; %bb.0: 108; GFX7-NEXT: s_mov_b32 s0, 0x3e22f983 109; GFX7-NEXT: s_mov_b32 m0, s2 110; GFX7-NEXT: v_writelane_b32 v0, s0, m0 111; GFX7-NEXT: ; return to shader part epilog 112; 113; GFX8-LABEL: test_writelane_imminv2pi_s_v: 114; GFX8: ; %bb.0: 115; GFX8-NEXT: v_writelane_b32 v0, 0.15915494, s2 116; GFX8-NEXT: ; return to shader part epilog 117; 118; GFX10-LABEL: test_writelane_imminv2pi_s_v: 119; GFX10: ; %bb.0: 120; GFX10-NEXT: v_writelane_b32 v0, 0.15915494, s2 121; GFX10-NEXT: ; return to shader part epilog 122 %writelane = call i32 @llvm.amdgcn.writelane(i32 bitcast (float 0x3FC45F3060000000 to i32), i32 %lane, i32 %vdst.in) 123 %writelane.cast = bitcast i32 %writelane to float 124 ret float %writelane.cast 125} 126 127 128; Lane is inline imm 129define amdgpu_ps float @test_writelane_s_imm_v(i32 inreg %data, i32 %vdst.in) #0 { 130; GFX7-LABEL: test_writelane_s_imm_v: 131; GFX7: ; %bb.0: 132; GFX7-NEXT: v_writelane_b32 v0, s2, 23 133; GFX7-NEXT: ; return to shader part epilog 134; 135; GFX8-LABEL: test_writelane_s_imm_v: 136; GFX8: ; %bb.0: 137; GFX8-NEXT: v_writelane_b32 v0, s2, 23 138; GFX8-NEXT: ; return to shader part epilog 139; 140; GFX10-LABEL: test_writelane_s_imm_v: 141; GFX10: ; %bb.0: 142; GFX10-NEXT: v_writelane_b32 v0, s2, 23 143; GFX10-NEXT: ; return to shader part epilog 144 %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 23, i32 %vdst.in) 145 %writelane.cast = bitcast i32 %writelane to float 146 ret float %writelane.cast 147} 148 149; Lane index is larger than the wavesize 150define amdgpu_ps float @test_writelane_s_k0_v(i32 inreg %data, i32 %vdst.in) #0 { 151; GFX7-LABEL: test_writelane_s_k0_v: 152; GFX7: ; %bb.0: 153; GFX7-NEXT: v_writelane_b32 v0, s2, 3 154; GFX7-NEXT: ; return to shader part epilog 155; 156; GFX8-LABEL: test_writelane_s_k0_v: 157; GFX8: ; %bb.0: 158; GFX8-NEXT: v_writelane_b32 v0, s2, 3 159; GFX8-NEXT: ; return to shader part epilog 160; 161; GFX10-LABEL: test_writelane_s_k0_v: 162; GFX10: ; %bb.0: 163; GFX10-NEXT: s_movk_i32 s0, 0x43 164; GFX10-NEXT: v_writelane_b32 v0, s2, s0 165; GFX10-NEXT: ; return to shader part epilog 166 %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 67, i32 %vdst.in) 167 %writelane.cast = bitcast i32 %writelane to float 168 ret float %writelane.cast 169} 170 171; Lane index is larger than the wavesize for wave32 172define amdgpu_ps float @test_writelane_s_k1_v(i32 inreg %data, i32 %vdst.in) #0 { 173; GFX7-LABEL: test_writelane_s_k1_v: 174; GFX7: ; %bb.0: 175; GFX7-NEXT: v_writelane_b32 v0, s2, 32 176; GFX7-NEXT: ; return to shader part epilog 177; 178; GFX8-LABEL: test_writelane_s_k1_v: 179; GFX8: ; %bb.0: 180; GFX8-NEXT: v_writelane_b32 v0, s2, 32 181; GFX8-NEXT: ; return to shader part epilog 182; 183; GFX10-LABEL: test_writelane_s_k1_v: 184; GFX10: ; %bb.0: 185; GFX10-NEXT: v_writelane_b32 v0, s2, 32 186; GFX10-NEXT: ; return to shader part epilog 187 %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 32, i32 %vdst.in) 188 %writelane.cast = bitcast i32 %writelane to float 189 ret float %writelane.cast 190} 191 192define amdgpu_ps float @test_writelane_v_v_v(i32 %data, i32 %lane, i32 %vdst.in) #0 { 193; GFX7-LABEL: test_writelane_v_v_v: 194; GFX7: ; %bb.0: 195; GFX7-NEXT: v_readfirstlane_b32 s1, v1 196; GFX7-NEXT: v_readfirstlane_b32 s0, v0 197; GFX7-NEXT: s_mov_b32 m0, s1 198; GFX7-NEXT: v_writelane_b32 v2, s0, m0 199; GFX7-NEXT: v_mov_b32_e32 v0, v2 200; GFX7-NEXT: ; return to shader part epilog 201; 202; GFX8-LABEL: test_writelane_v_v_v: 203; GFX8: ; %bb.0: 204; GFX8-NEXT: v_readfirstlane_b32 s1, v1 205; GFX8-NEXT: v_readfirstlane_b32 s0, v0 206; GFX8-NEXT: s_mov_b32 m0, s1 207; GFX8-NEXT: v_writelane_b32 v2, s0, m0 208; GFX8-NEXT: v_mov_b32_e32 v0, v2 209; GFX8-NEXT: ; return to shader part epilog 210; 211; GFX10-LABEL: test_writelane_v_v_v: 212; GFX10: ; %bb.0: 213; GFX10-NEXT: v_readfirstlane_b32 s0, v0 214; GFX10-NEXT: v_readfirstlane_b32 s1, v1 215; GFX10-NEXT: v_writelane_b32 v2, s0, s1 216; GFX10-NEXT: v_mov_b32_e32 v0, v2 217; GFX10-NEXT: ; return to shader part epilog 218 %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 %lane, i32 %vdst.in) 219 %writelane.cast = bitcast i32 %writelane to float 220 ret float %writelane.cast 221} 222 223define amdgpu_ps float @test_writelane_v_s_v(i32 %data, i32 inreg %lane, i32 %vdst.in) #0 { 224; GFX7-LABEL: test_writelane_v_s_v: 225; GFX7: ; %bb.0: 226; GFX7-NEXT: v_readfirstlane_b32 s0, v0 227; GFX7-NEXT: s_mov_b32 m0, s2 228; GFX7-NEXT: v_writelane_b32 v1, s0, m0 229; GFX7-NEXT: v_mov_b32_e32 v0, v1 230; GFX7-NEXT: ; return to shader part epilog 231; 232; GFX8-LABEL: test_writelane_v_s_v: 233; GFX8: ; %bb.0: 234; GFX8-NEXT: v_readfirstlane_b32 s0, v0 235; GFX8-NEXT: s_mov_b32 m0, s2 236; GFX8-NEXT: v_writelane_b32 v1, s0, m0 237; GFX8-NEXT: v_mov_b32_e32 v0, v1 238; GFX8-NEXT: ; return to shader part epilog 239; 240; GFX10-LABEL: test_writelane_v_s_v: 241; GFX10: ; %bb.0: 242; GFX10-NEXT: v_readfirstlane_b32 s0, v0 243; GFX10-NEXT: v_writelane_b32 v1, s0, s2 244; GFX10-NEXT: v_mov_b32_e32 v0, v1 245; GFX10-NEXT: ; return to shader part epilog 246 %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 inreg %lane, i32 %vdst.in) 247 %writelane.cast = bitcast i32 %writelane to float 248 ret float %writelane.cast 249} 250 251; FIXME: This could theoretically use m0 directly as the data source, 252; and another SGPR as the lane selector and avoid register swap. 253define amdgpu_ps float @test_writelane_m0_s_v(i32 inreg %lane, i32 %vdst.in) #0 { 254; GFX7-LABEL: test_writelane_m0_s_v: 255; GFX7: ; %bb.0: 256; GFX7-NEXT: ;;#ASMSTART 257; GFX7-NEXT: s_mov_b32 m0, -1 258; GFX7-NEXT: ;;#ASMEND 259; GFX7-NEXT: s_mov_b32 s0, m0 260; GFX7-NEXT: s_mov_b32 m0, s2 261; GFX7-NEXT: v_writelane_b32 v0, s0, m0 262; GFX7-NEXT: ; return to shader part epilog 263; 264; GFX8-LABEL: test_writelane_m0_s_v: 265; GFX8: ; %bb.0: 266; GFX8-NEXT: ;;#ASMSTART 267; GFX8-NEXT: s_mov_b32 m0, -1 268; GFX8-NEXT: ;;#ASMEND 269; GFX8-NEXT: s_mov_b32 s0, m0 270; GFX8-NEXT: s_mov_b32 m0, s2 271; GFX8-NEXT: v_writelane_b32 v0, s0, m0 272; GFX8-NEXT: ; return to shader part epilog 273; 274; GFX10-LABEL: test_writelane_m0_s_v: 275; GFX10: ; %bb.0: 276; GFX10-NEXT: ;;#ASMSTART 277; GFX10-NEXT: s_mov_b32 m0, -1 278; GFX10-NEXT: ;;#ASMEND 279; GFX10-NEXT: v_writelane_b32 v0, m0, s2 280; GFX10-NEXT: ; return to shader part epilog 281 %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"() 282 %writelane = call i32 @llvm.amdgcn.writelane(i32 %m0, i32 %lane, i32 %vdst.in) 283 %writelane.cast = bitcast i32 %writelane to float 284 ret float %writelane.cast 285} 286 287define amdgpu_ps float @test_writelane_s_m0_v(i32 inreg %data, i32 %vdst.in) #0 { 288; GFX7-LABEL: test_writelane_s_m0_v: 289; GFX7: ; %bb.0: 290; GFX7-NEXT: ;;#ASMSTART 291; GFX7-NEXT: s_mov_b32 m0, -1 292; GFX7-NEXT: ;;#ASMEND 293; GFX7-NEXT: v_writelane_b32 v0, s2, m0 294; GFX7-NEXT: ; return to shader part epilog 295; 296; GFX8-LABEL: test_writelane_s_m0_v: 297; GFX8: ; %bb.0: 298; GFX8-NEXT: ;;#ASMSTART 299; GFX8-NEXT: s_mov_b32 m0, -1 300; GFX8-NEXT: ;;#ASMEND 301; GFX8-NEXT: v_writelane_b32 v0, s2, m0 302; GFX8-NEXT: ; return to shader part epilog 303; 304; GFX10-LABEL: test_writelane_s_m0_v: 305; GFX10: ; %bb.0: 306; GFX10-NEXT: ;;#ASMSTART 307; GFX10-NEXT: s_mov_b32 m0, -1 308; GFX10-NEXT: ;;#ASMEND 309; GFX10-NEXT: v_writelane_b32 v0, s2, m0 310; GFX10-NEXT: ; return to shader part epilog 311 %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"() 312 %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 %m0, i32 %vdst.in) 313 %writelane.cast = bitcast i32 %writelane to float 314 ret float %writelane.cast 315} 316 317declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #1 318declare i32 @llvm.amdgcn.workitem.id.x() #2 319 320attributes #0 = { nounwind } 321attributes #1 = { convergent nounwind readnone willreturn } 322attributes #2 = { nounwind readnone speculatable willreturn } 323