1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=ATTRIB %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-nsa-threshold=2 -verify-machineinstrs < %s | FileCheck -check-prefix=FORCE-2 %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-nsa-threshold=3 -verify-machineinstrs < %s | FileCheck -check-prefix=FORCE-3 %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-nsa-threshold=4 -verify-machineinstrs < %s | FileCheck -check-prefix=FORCE-4 %s 6 7; Note: command line argument should override function attribute. 8 9define amdgpu_ps <4 x float> @sample_2d_nsa2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %t, float %s) #2 { 10; ATTRIB-LABEL: sample_2d_nsa2: 11; ATTRIB: ; %bb.0: ; %main_body 12; ATTRIB-NEXT: s_mov_b32 s12, exec_lo 13; ATTRIB-NEXT: s_wqm_b32 exec_lo, exec_lo 14; ATTRIB-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 15; ATTRIB-NEXT: s_and_b32 exec_lo, exec_lo, s12 16; ATTRIB-NEXT: image_sample v[0:3], [v1, v0], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 17; ATTRIB-NEXT: s_waitcnt vmcnt(0) 18; ATTRIB-NEXT: ; return to shader part epilog 19; 20; FORCE-2-LABEL: sample_2d_nsa2: 21; FORCE-2: ; %bb.0: ; %main_body 22; FORCE-2-NEXT: s_mov_b32 s12, exec_lo 23; FORCE-2-NEXT: s_wqm_b32 exec_lo, exec_lo 24; FORCE-2-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 25; FORCE-2-NEXT: s_and_b32 exec_lo, exec_lo, s12 26; FORCE-2-NEXT: image_sample v[0:3], [v1, v0], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 27; FORCE-2-NEXT: s_waitcnt vmcnt(0) 28; FORCE-2-NEXT: ; return to shader part epilog 29; 30; FORCE-3-LABEL: sample_2d_nsa2: 31; FORCE-3: ; %bb.0: ; %main_body 32; FORCE-3-NEXT: s_mov_b32 s12, exec_lo 33; FORCE-3-NEXT: s_wqm_b32 exec_lo, exec_lo 34; FORCE-3-NEXT: v_mov_b32_e32 v2, v0 35; FORCE-3-NEXT: s_and_b32 exec_lo, exec_lo, s12 36; FORCE-3-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 37; FORCE-3-NEXT: s_waitcnt vmcnt(0) 38; FORCE-3-NEXT: ; return to shader part epilog 39; 40; FORCE-4-LABEL: sample_2d_nsa2: 41; FORCE-4: ; %bb.0: ; %main_body 42; FORCE-4-NEXT: s_mov_b32 s12, exec_lo 43; FORCE-4-NEXT: s_wqm_b32 exec_lo, exec_lo 44; FORCE-4-NEXT: v_mov_b32_e32 v2, v0 45; FORCE-4-NEXT: s_and_b32 exec_lo, exec_lo, s12 46; FORCE-4-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 47; FORCE-4-NEXT: s_waitcnt vmcnt(0) 48; FORCE-4-NEXT: ; return to shader part epilog 49main_body: 50 %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 51 ret <4 x float> %v 52} 53 54define amdgpu_ps <4 x float> @sample_3d_nsa2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %r, float %s, float %t) #2 { 55; ATTRIB-LABEL: sample_3d_nsa2: 56; ATTRIB: ; %bb.0: ; %main_body 57; ATTRIB-NEXT: s_mov_b32 s12, exec_lo 58; ATTRIB-NEXT: s_wqm_b32 exec_lo, exec_lo 59; ATTRIB-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 60; ATTRIB-NEXT: s_and_b32 exec_lo, exec_lo, s12 61; ATTRIB-NEXT: image_sample v[0:3], [v1, v2, v0], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 62; ATTRIB-NEXT: s_waitcnt vmcnt(0) 63; ATTRIB-NEXT: ; return to shader part epilog 64; 65; FORCE-2-LABEL: sample_3d_nsa2: 66; FORCE-2: ; %bb.0: ; %main_body 67; FORCE-2-NEXT: s_mov_b32 s12, exec_lo 68; FORCE-2-NEXT: s_wqm_b32 exec_lo, exec_lo 69; FORCE-2-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 70; FORCE-2-NEXT: s_and_b32 exec_lo, exec_lo, s12 71; FORCE-2-NEXT: image_sample v[0:3], [v1, v2, v0], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 72; FORCE-2-NEXT: s_waitcnt vmcnt(0) 73; FORCE-2-NEXT: ; return to shader part epilog 74; 75; FORCE-3-LABEL: sample_3d_nsa2: 76; FORCE-3: ; %bb.0: ; %main_body 77; FORCE-3-NEXT: s_mov_b32 s12, exec_lo 78; FORCE-3-NEXT: s_wqm_b32 exec_lo, exec_lo 79; FORCE-3-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 80; FORCE-3-NEXT: s_and_b32 exec_lo, exec_lo, s12 81; FORCE-3-NEXT: image_sample v[0:3], [v1, v2, v0], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 82; FORCE-3-NEXT: s_waitcnt vmcnt(0) 83; FORCE-3-NEXT: ; return to shader part epilog 84; 85; FORCE-4-LABEL: sample_3d_nsa2: 86; FORCE-4: ; %bb.0: ; %main_body 87; FORCE-4-NEXT: s_mov_b32 s12, exec_lo 88; FORCE-4-NEXT: s_wqm_b32 exec_lo, exec_lo 89; FORCE-4-NEXT: v_mov_b32_e32 v3, v0 90; FORCE-4-NEXT: s_and_b32 exec_lo, exec_lo, s12 91; FORCE-4-NEXT: image_sample v[0:3], v[1:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 92; FORCE-4-NEXT: s_waitcnt vmcnt(0) 93; FORCE-4-NEXT: ; return to shader part epilog 94main_body: 95 %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 96 ret <4 x float> %v 97} 98 99define amdgpu_ps <4 x float> @sample_2d_nsa3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %t, float %s) #3 { 100; ATTRIB-LABEL: sample_2d_nsa3: 101; ATTRIB: ; %bb.0: ; %main_body 102; ATTRIB-NEXT: s_mov_b32 s12, exec_lo 103; ATTRIB-NEXT: s_wqm_b32 exec_lo, exec_lo 104; ATTRIB-NEXT: v_mov_b32_e32 v2, v0 105; ATTRIB-NEXT: s_and_b32 exec_lo, exec_lo, s12 106; ATTRIB-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 107; ATTRIB-NEXT: s_waitcnt vmcnt(0) 108; ATTRIB-NEXT: ; return to shader part epilog 109; 110; FORCE-2-LABEL: sample_2d_nsa3: 111; FORCE-2: ; %bb.0: ; %main_body 112; FORCE-2-NEXT: s_mov_b32 s12, exec_lo 113; FORCE-2-NEXT: s_wqm_b32 exec_lo, exec_lo 114; FORCE-2-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 115; FORCE-2-NEXT: s_and_b32 exec_lo, exec_lo, s12 116; FORCE-2-NEXT: image_sample v[0:3], [v1, v0], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 117; FORCE-2-NEXT: s_waitcnt vmcnt(0) 118; FORCE-2-NEXT: ; return to shader part epilog 119; 120; FORCE-3-LABEL: sample_2d_nsa3: 121; FORCE-3: ; %bb.0: ; %main_body 122; FORCE-3-NEXT: s_mov_b32 s12, exec_lo 123; FORCE-3-NEXT: s_wqm_b32 exec_lo, exec_lo 124; FORCE-3-NEXT: v_mov_b32_e32 v2, v0 125; FORCE-3-NEXT: s_and_b32 exec_lo, exec_lo, s12 126; FORCE-3-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 127; FORCE-3-NEXT: s_waitcnt vmcnt(0) 128; FORCE-3-NEXT: ; return to shader part epilog 129; 130; FORCE-4-LABEL: sample_2d_nsa3: 131; FORCE-4: ; %bb.0: ; %main_body 132; FORCE-4-NEXT: s_mov_b32 s12, exec_lo 133; FORCE-4-NEXT: s_wqm_b32 exec_lo, exec_lo 134; FORCE-4-NEXT: v_mov_b32_e32 v2, v0 135; FORCE-4-NEXT: s_and_b32 exec_lo, exec_lo, s12 136; FORCE-4-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 137; FORCE-4-NEXT: s_waitcnt vmcnt(0) 138; FORCE-4-NEXT: ; return to shader part epilog 139main_body: 140 %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 141 ret <4 x float> %v 142} 143 144define amdgpu_ps <4 x float> @sample_3d_nsa3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %r, float %s, float %t) #3 { 145; ATTRIB-LABEL: sample_3d_nsa3: 146; ATTRIB: ; %bb.0: ; %main_body 147; ATTRIB-NEXT: s_mov_b32 s12, exec_lo 148; ATTRIB-NEXT: s_wqm_b32 exec_lo, exec_lo 149; ATTRIB-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 150; ATTRIB-NEXT: s_and_b32 exec_lo, exec_lo, s12 151; ATTRIB-NEXT: image_sample v[0:3], [v1, v2, v0], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 152; ATTRIB-NEXT: s_waitcnt vmcnt(0) 153; ATTRIB-NEXT: ; return to shader part epilog 154; 155; FORCE-2-LABEL: sample_3d_nsa3: 156; FORCE-2: ; %bb.0: ; %main_body 157; FORCE-2-NEXT: s_mov_b32 s12, exec_lo 158; FORCE-2-NEXT: s_wqm_b32 exec_lo, exec_lo 159; FORCE-2-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 160; FORCE-2-NEXT: s_and_b32 exec_lo, exec_lo, s12 161; FORCE-2-NEXT: image_sample v[0:3], [v1, v2, v0], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 162; FORCE-2-NEXT: s_waitcnt vmcnt(0) 163; FORCE-2-NEXT: ; return to shader part epilog 164; 165; FORCE-3-LABEL: sample_3d_nsa3: 166; FORCE-3: ; %bb.0: ; %main_body 167; FORCE-3-NEXT: s_mov_b32 s12, exec_lo 168; FORCE-3-NEXT: s_wqm_b32 exec_lo, exec_lo 169; FORCE-3-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 170; FORCE-3-NEXT: s_and_b32 exec_lo, exec_lo, s12 171; FORCE-3-NEXT: image_sample v[0:3], [v1, v2, v0], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 172; FORCE-3-NEXT: s_waitcnt vmcnt(0) 173; FORCE-3-NEXT: ; return to shader part epilog 174; 175; FORCE-4-LABEL: sample_3d_nsa3: 176; FORCE-4: ; %bb.0: ; %main_body 177; FORCE-4-NEXT: s_mov_b32 s12, exec_lo 178; FORCE-4-NEXT: s_wqm_b32 exec_lo, exec_lo 179; FORCE-4-NEXT: v_mov_b32_e32 v3, v0 180; FORCE-4-NEXT: s_and_b32 exec_lo, exec_lo, s12 181; FORCE-4-NEXT: image_sample v[0:3], v[1:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 182; FORCE-4-NEXT: s_waitcnt vmcnt(0) 183; FORCE-4-NEXT: ; return to shader part epilog 184main_body: 185 %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 186 ret <4 x float> %v 187} 188 189define amdgpu_ps <4 x float> @sample_2d_nsa4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %t, float %s) #4 { 190; ATTRIB-LABEL: sample_2d_nsa4: 191; ATTRIB: ; %bb.0: ; %main_body 192; ATTRIB-NEXT: s_mov_b32 s12, exec_lo 193; ATTRIB-NEXT: s_wqm_b32 exec_lo, exec_lo 194; ATTRIB-NEXT: v_mov_b32_e32 v2, v0 195; ATTRIB-NEXT: s_and_b32 exec_lo, exec_lo, s12 196; ATTRIB-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 197; ATTRIB-NEXT: s_waitcnt vmcnt(0) 198; ATTRIB-NEXT: ; return to shader part epilog 199; 200; FORCE-2-LABEL: sample_2d_nsa4: 201; FORCE-2: ; %bb.0: ; %main_body 202; FORCE-2-NEXT: s_mov_b32 s12, exec_lo 203; FORCE-2-NEXT: s_wqm_b32 exec_lo, exec_lo 204; FORCE-2-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 205; FORCE-2-NEXT: s_and_b32 exec_lo, exec_lo, s12 206; FORCE-2-NEXT: image_sample v[0:3], [v1, v0], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 207; FORCE-2-NEXT: s_waitcnt vmcnt(0) 208; FORCE-2-NEXT: ; return to shader part epilog 209; 210; FORCE-3-LABEL: sample_2d_nsa4: 211; FORCE-3: ; %bb.0: ; %main_body 212; FORCE-3-NEXT: s_mov_b32 s12, exec_lo 213; FORCE-3-NEXT: s_wqm_b32 exec_lo, exec_lo 214; FORCE-3-NEXT: v_mov_b32_e32 v2, v0 215; FORCE-3-NEXT: s_and_b32 exec_lo, exec_lo, s12 216; FORCE-3-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 217; FORCE-3-NEXT: s_waitcnt vmcnt(0) 218; FORCE-3-NEXT: ; return to shader part epilog 219; 220; FORCE-4-LABEL: sample_2d_nsa4: 221; FORCE-4: ; %bb.0: ; %main_body 222; FORCE-4-NEXT: s_mov_b32 s12, exec_lo 223; FORCE-4-NEXT: s_wqm_b32 exec_lo, exec_lo 224; FORCE-4-NEXT: v_mov_b32_e32 v2, v0 225; FORCE-4-NEXT: s_and_b32 exec_lo, exec_lo, s12 226; FORCE-4-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 227; FORCE-4-NEXT: s_waitcnt vmcnt(0) 228; FORCE-4-NEXT: ; return to shader part epilog 229main_body: 230 %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 231 ret <4 x float> %v 232} 233 234define amdgpu_ps <4 x float> @sample_3d_nsa4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %r, float %s, float %t) #4 { 235; ATTRIB-LABEL: sample_3d_nsa4: 236; ATTRIB: ; %bb.0: ; %main_body 237; ATTRIB-NEXT: s_mov_b32 s12, exec_lo 238; ATTRIB-NEXT: s_wqm_b32 exec_lo, exec_lo 239; ATTRIB-NEXT: v_mov_b32_e32 v3, v0 240; ATTRIB-NEXT: s_and_b32 exec_lo, exec_lo, s12 241; ATTRIB-NEXT: image_sample v[0:3], v[1:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 242; ATTRIB-NEXT: s_waitcnt vmcnt(0) 243; ATTRIB-NEXT: ; return to shader part epilog 244; 245; FORCE-2-LABEL: sample_3d_nsa4: 246; FORCE-2: ; %bb.0: ; %main_body 247; FORCE-2-NEXT: s_mov_b32 s12, exec_lo 248; FORCE-2-NEXT: s_wqm_b32 exec_lo, exec_lo 249; FORCE-2-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 250; FORCE-2-NEXT: s_and_b32 exec_lo, exec_lo, s12 251; FORCE-2-NEXT: image_sample v[0:3], [v1, v2, v0], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 252; FORCE-2-NEXT: s_waitcnt vmcnt(0) 253; FORCE-2-NEXT: ; return to shader part epilog 254; 255; FORCE-3-LABEL: sample_3d_nsa4: 256; FORCE-3: ; %bb.0: ; %main_body 257; FORCE-3-NEXT: s_mov_b32 s12, exec_lo 258; FORCE-3-NEXT: s_wqm_b32 exec_lo, exec_lo 259; FORCE-3-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 260; FORCE-3-NEXT: s_and_b32 exec_lo, exec_lo, s12 261; FORCE-3-NEXT: image_sample v[0:3], [v1, v2, v0], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 262; FORCE-3-NEXT: s_waitcnt vmcnt(0) 263; FORCE-3-NEXT: ; return to shader part epilog 264; 265; FORCE-4-LABEL: sample_3d_nsa4: 266; FORCE-4: ; %bb.0: ; %main_body 267; FORCE-4-NEXT: s_mov_b32 s12, exec_lo 268; FORCE-4-NEXT: s_wqm_b32 exec_lo, exec_lo 269; FORCE-4-NEXT: v_mov_b32_e32 v3, v0 270; FORCE-4-NEXT: s_and_b32 exec_lo, exec_lo, s12 271; FORCE-4-NEXT: image_sample v[0:3], v[1:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 272; FORCE-4-NEXT: s_waitcnt vmcnt(0) 273; FORCE-4-NEXT: ; return to shader part epilog 274main_body: 275 %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 276 ret <4 x float> %v 277} 278 279declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 280declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 281 282attributes #1 = { nounwind readonly } 283attributes #2 = { nounwind readonly "amdgpu-nsa-threshold"="2" } 284attributes #3 = { nounwind readonly "amdgpu-nsa-threshold"="3" } 285attributes #4 = { nounwind readonly "amdgpu-nsa-threshold"="4" } 286