1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-UNPACKED %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-PACKED %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s 5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS %s 6; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS %s 7; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s 8 9define amdgpu_ps half @load_1d_f16_x(<8 x i32> inreg %rsrc, i32 %s) { 10; GFX8-UNPACKED-LABEL: load_1d_f16_x: 11; GFX8-UNPACKED: ; %bb.0: 12; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 13; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 14; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 15; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 16; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 17; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 18; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 19; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 20; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm d16 21; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 22; GFX8-UNPACKED-NEXT: ; return to shader part epilog 23; 24; GFX8-PACKED-LABEL: load_1d_f16_x: 25; GFX8-PACKED: ; %bb.0: 26; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 27; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 28; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 29; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 30; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 31; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 32; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 33; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 34; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm d16 35; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 36; GFX8-PACKED-NEXT: ; return to shader part epilog 37; 38; GFX9-LABEL: load_1d_f16_x: 39; GFX9: ; %bb.0: 40; GFX9-NEXT: s_mov_b32 s0, s2 41; GFX9-NEXT: s_mov_b32 s1, s3 42; GFX9-NEXT: s_mov_b32 s2, s4 43; GFX9-NEXT: s_mov_b32 s3, s5 44; GFX9-NEXT: s_mov_b32 s4, s6 45; GFX9-NEXT: s_mov_b32 s5, s7 46; GFX9-NEXT: s_mov_b32 s6, s8 47; GFX9-NEXT: s_mov_b32 s7, s9 48; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm d16 49; GFX9-NEXT: s_waitcnt vmcnt(0) 50; GFX9-NEXT: ; return to shader part epilog 51; 52; GFX10PLUS-LABEL: load_1d_f16_x: 53; GFX10PLUS: ; %bb.0: 54; GFX10PLUS-NEXT: s_mov_b32 s0, s2 55; GFX10PLUS-NEXT: s_mov_b32 s1, s3 56; GFX10PLUS-NEXT: s_mov_b32 s2, s4 57; GFX10PLUS-NEXT: s_mov_b32 s3, s5 58; GFX10PLUS-NEXT: s_mov_b32 s4, s6 59; GFX10PLUS-NEXT: s_mov_b32 s5, s7 60; GFX10PLUS-NEXT: s_mov_b32 s6, s8 61; GFX10PLUS-NEXT: s_mov_b32 s7, s9 62; GFX10PLUS-NEXT: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm d16 63; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 64; GFX10PLUS-NEXT: ; return to shader part epilog 65; 66; GFX12-LABEL: load_1d_f16_x: 67; GFX12: ; %bb.0: 68; GFX12-NEXT: s_mov_b32 s0, s2 69; GFX12-NEXT: s_mov_b32 s1, s3 70; GFX12-NEXT: s_mov_b32 s2, s4 71; GFX12-NEXT: s_mov_b32 s3, s5 72; GFX12-NEXT: s_mov_b32 s4, s6 73; GFX12-NEXT: s_mov_b32 s5, s7 74; GFX12-NEXT: s_mov_b32 s6, s8 75; GFX12-NEXT: s_mov_b32 s7, s9 76; GFX12-NEXT: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D d16 77; GFX12-NEXT: s_wait_loadcnt 0x0 78; GFX12-NEXT: ; return to shader part epilog 79 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 80 ret half %v 81} 82 83define amdgpu_ps half @load_1d_f16_y(<8 x i32> inreg %rsrc, i32 %s) { 84; GFX8-UNPACKED-LABEL: load_1d_f16_y: 85; GFX8-UNPACKED: ; %bb.0: 86; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 87; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 88; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 89; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 90; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 91; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 92; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 93; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 94; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm d16 95; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 96; GFX8-UNPACKED-NEXT: ; return to shader part epilog 97; 98; GFX8-PACKED-LABEL: load_1d_f16_y: 99; GFX8-PACKED: ; %bb.0: 100; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 101; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 102; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 103; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 104; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 105; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 106; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 107; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 108; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm d16 109; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 110; GFX8-PACKED-NEXT: ; return to shader part epilog 111; 112; GFX9-LABEL: load_1d_f16_y: 113; GFX9: ; %bb.0: 114; GFX9-NEXT: s_mov_b32 s0, s2 115; GFX9-NEXT: s_mov_b32 s1, s3 116; GFX9-NEXT: s_mov_b32 s2, s4 117; GFX9-NEXT: s_mov_b32 s3, s5 118; GFX9-NEXT: s_mov_b32 s4, s6 119; GFX9-NEXT: s_mov_b32 s5, s7 120; GFX9-NEXT: s_mov_b32 s6, s8 121; GFX9-NEXT: s_mov_b32 s7, s9 122; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm d16 123; GFX9-NEXT: s_waitcnt vmcnt(0) 124; GFX9-NEXT: ; return to shader part epilog 125; 126; GFX10PLUS-LABEL: load_1d_f16_y: 127; GFX10PLUS: ; %bb.0: 128; GFX10PLUS-NEXT: s_mov_b32 s0, s2 129; GFX10PLUS-NEXT: s_mov_b32 s1, s3 130; GFX10PLUS-NEXT: s_mov_b32 s2, s4 131; GFX10PLUS-NEXT: s_mov_b32 s3, s5 132; GFX10PLUS-NEXT: s_mov_b32 s4, s6 133; GFX10PLUS-NEXT: s_mov_b32 s5, s7 134; GFX10PLUS-NEXT: s_mov_b32 s6, s8 135; GFX10PLUS-NEXT: s_mov_b32 s7, s9 136; GFX10PLUS-NEXT: image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm d16 137; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 138; GFX10PLUS-NEXT: ; return to shader part epilog 139; 140; GFX12-LABEL: load_1d_f16_y: 141; GFX12: ; %bb.0: 142; GFX12-NEXT: s_mov_b32 s0, s2 143; GFX12-NEXT: s_mov_b32 s1, s3 144; GFX12-NEXT: s_mov_b32 s2, s4 145; GFX12-NEXT: s_mov_b32 s3, s5 146; GFX12-NEXT: s_mov_b32 s4, s6 147; GFX12-NEXT: s_mov_b32 s5, s7 148; GFX12-NEXT: s_mov_b32 s6, s8 149; GFX12-NEXT: s_mov_b32 s7, s9 150; GFX12-NEXT: image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D d16 151; GFX12-NEXT: s_wait_loadcnt 0x0 152; GFX12-NEXT: ; return to shader part epilog 153 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 154 ret half %v 155} 156 157define amdgpu_ps half @load_1d_f16_z(<8 x i32> inreg %rsrc, i32 %s) { 158; GFX8-UNPACKED-LABEL: load_1d_f16_z: 159; GFX8-UNPACKED: ; %bb.0: 160; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 161; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 162; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 163; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 164; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 165; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 166; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 167; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 168; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm d16 169; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 170; GFX8-UNPACKED-NEXT: ; return to shader part epilog 171; 172; GFX8-PACKED-LABEL: load_1d_f16_z: 173; GFX8-PACKED: ; %bb.0: 174; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 175; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 176; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 177; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 178; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 179; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 180; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 181; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 182; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm d16 183; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 184; GFX8-PACKED-NEXT: ; return to shader part epilog 185; 186; GFX9-LABEL: load_1d_f16_z: 187; GFX9: ; %bb.0: 188; GFX9-NEXT: s_mov_b32 s0, s2 189; GFX9-NEXT: s_mov_b32 s1, s3 190; GFX9-NEXT: s_mov_b32 s2, s4 191; GFX9-NEXT: s_mov_b32 s3, s5 192; GFX9-NEXT: s_mov_b32 s4, s6 193; GFX9-NEXT: s_mov_b32 s5, s7 194; GFX9-NEXT: s_mov_b32 s6, s8 195; GFX9-NEXT: s_mov_b32 s7, s9 196; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm d16 197; GFX9-NEXT: s_waitcnt vmcnt(0) 198; GFX9-NEXT: ; return to shader part epilog 199; 200; GFX10PLUS-LABEL: load_1d_f16_z: 201; GFX10PLUS: ; %bb.0: 202; GFX10PLUS-NEXT: s_mov_b32 s0, s2 203; GFX10PLUS-NEXT: s_mov_b32 s1, s3 204; GFX10PLUS-NEXT: s_mov_b32 s2, s4 205; GFX10PLUS-NEXT: s_mov_b32 s3, s5 206; GFX10PLUS-NEXT: s_mov_b32 s4, s6 207; GFX10PLUS-NEXT: s_mov_b32 s5, s7 208; GFX10PLUS-NEXT: s_mov_b32 s6, s8 209; GFX10PLUS-NEXT: s_mov_b32 s7, s9 210; GFX10PLUS-NEXT: image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D unorm d16 211; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 212; GFX10PLUS-NEXT: ; return to shader part epilog 213; 214; GFX12-LABEL: load_1d_f16_z: 215; GFX12: ; %bb.0: 216; GFX12-NEXT: s_mov_b32 s0, s2 217; GFX12-NEXT: s_mov_b32 s1, s3 218; GFX12-NEXT: s_mov_b32 s2, s4 219; GFX12-NEXT: s_mov_b32 s3, s5 220; GFX12-NEXT: s_mov_b32 s4, s6 221; GFX12-NEXT: s_mov_b32 s5, s7 222; GFX12-NEXT: s_mov_b32 s6, s8 223; GFX12-NEXT: s_mov_b32 s7, s9 224; GFX12-NEXT: image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D d16 225; GFX12-NEXT: s_wait_loadcnt 0x0 226; GFX12-NEXT: ; return to shader part epilog 227 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 4, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 228 ret half %v 229} 230 231define amdgpu_ps half @load_1d_f16_w(<8 x i32> inreg %rsrc, i32 %s) { 232; GFX8-UNPACKED-LABEL: load_1d_f16_w: 233; GFX8-UNPACKED: ; %bb.0: 234; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 235; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 236; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 237; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 238; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 239; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 240; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 241; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 242; GFX8-UNPACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm d16 243; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 244; GFX8-UNPACKED-NEXT: ; return to shader part epilog 245; 246; GFX8-PACKED-LABEL: load_1d_f16_w: 247; GFX8-PACKED: ; %bb.0: 248; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 249; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 250; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 251; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 252; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 253; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 254; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 255; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 256; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm d16 257; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 258; GFX8-PACKED-NEXT: ; return to shader part epilog 259; 260; GFX9-LABEL: load_1d_f16_w: 261; GFX9: ; %bb.0: 262; GFX9-NEXT: s_mov_b32 s0, s2 263; GFX9-NEXT: s_mov_b32 s1, s3 264; GFX9-NEXT: s_mov_b32 s2, s4 265; GFX9-NEXT: s_mov_b32 s3, s5 266; GFX9-NEXT: s_mov_b32 s4, s6 267; GFX9-NEXT: s_mov_b32 s5, s7 268; GFX9-NEXT: s_mov_b32 s6, s8 269; GFX9-NEXT: s_mov_b32 s7, s9 270; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm d16 271; GFX9-NEXT: s_waitcnt vmcnt(0) 272; GFX9-NEXT: ; return to shader part epilog 273; 274; GFX10PLUS-LABEL: load_1d_f16_w: 275; GFX10PLUS: ; %bb.0: 276; GFX10PLUS-NEXT: s_mov_b32 s0, s2 277; GFX10PLUS-NEXT: s_mov_b32 s1, s3 278; GFX10PLUS-NEXT: s_mov_b32 s2, s4 279; GFX10PLUS-NEXT: s_mov_b32 s3, s5 280; GFX10PLUS-NEXT: s_mov_b32 s4, s6 281; GFX10PLUS-NEXT: s_mov_b32 s5, s7 282; GFX10PLUS-NEXT: s_mov_b32 s6, s8 283; GFX10PLUS-NEXT: s_mov_b32 s7, s9 284; GFX10PLUS-NEXT: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm d16 285; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 286; GFX10PLUS-NEXT: ; return to shader part epilog 287; 288; GFX12-LABEL: load_1d_f16_w: 289; GFX12: ; %bb.0: 290; GFX12-NEXT: s_mov_b32 s0, s2 291; GFX12-NEXT: s_mov_b32 s1, s3 292; GFX12-NEXT: s_mov_b32 s2, s4 293; GFX12-NEXT: s_mov_b32 s3, s5 294; GFX12-NEXT: s_mov_b32 s4, s6 295; GFX12-NEXT: s_mov_b32 s5, s7 296; GFX12-NEXT: s_mov_b32 s6, s8 297; GFX12-NEXT: s_mov_b32 s7, s9 298; GFX12-NEXT: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D d16 299; GFX12-NEXT: s_wait_loadcnt 0x0 300; GFX12-NEXT: ; return to shader part epilog 301 %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 302 ret half %v 303} 304 305define amdgpu_ps <2 x half> @load_1d_v2f16_xy(<8 x i32> inreg %rsrc, i32 %s) { 306; GFX8-UNPACKED-LABEL: load_1d_v2f16_xy: 307; GFX8-UNPACKED: ; %bb.0: 308; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 309; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 310; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 311; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 312; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 313; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 314; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 315; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 316; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm d16 317; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 318; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 319; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 320; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 321; GFX8-UNPACKED-NEXT: ; return to shader part epilog 322; 323; GFX8-PACKED-LABEL: load_1d_v2f16_xy: 324; GFX8-PACKED: ; %bb.0: 325; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 326; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 327; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 328; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 329; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 330; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 331; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 332; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 333; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x3 unorm d16 334; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 335; GFX8-PACKED-NEXT: ; return to shader part epilog 336; 337; GFX9-LABEL: load_1d_v2f16_xy: 338; GFX9: ; %bb.0: 339; GFX9-NEXT: s_mov_b32 s0, s2 340; GFX9-NEXT: s_mov_b32 s1, s3 341; GFX9-NEXT: s_mov_b32 s2, s4 342; GFX9-NEXT: s_mov_b32 s3, s5 343; GFX9-NEXT: s_mov_b32 s4, s6 344; GFX9-NEXT: s_mov_b32 s5, s7 345; GFX9-NEXT: s_mov_b32 s6, s8 346; GFX9-NEXT: s_mov_b32 s7, s9 347; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x3 unorm d16 348; GFX9-NEXT: s_waitcnt vmcnt(0) 349; GFX9-NEXT: ; return to shader part epilog 350; 351; GFX10PLUS-LABEL: load_1d_v2f16_xy: 352; GFX10PLUS: ; %bb.0: 353; GFX10PLUS-NEXT: s_mov_b32 s0, s2 354; GFX10PLUS-NEXT: s_mov_b32 s1, s3 355; GFX10PLUS-NEXT: s_mov_b32 s2, s4 356; GFX10PLUS-NEXT: s_mov_b32 s3, s5 357; GFX10PLUS-NEXT: s_mov_b32 s4, s6 358; GFX10PLUS-NEXT: s_mov_b32 s5, s7 359; GFX10PLUS-NEXT: s_mov_b32 s6, s8 360; GFX10PLUS-NEXT: s_mov_b32 s7, s9 361; GFX10PLUS-NEXT: image_load v0, v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm d16 362; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 363; GFX10PLUS-NEXT: ; return to shader part epilog 364; 365; GFX12-LABEL: load_1d_v2f16_xy: 366; GFX12: ; %bb.0: 367; GFX12-NEXT: s_mov_b32 s0, s2 368; GFX12-NEXT: s_mov_b32 s1, s3 369; GFX12-NEXT: s_mov_b32 s2, s4 370; GFX12-NEXT: s_mov_b32 s3, s5 371; GFX12-NEXT: s_mov_b32 s4, s6 372; GFX12-NEXT: s_mov_b32 s5, s7 373; GFX12-NEXT: s_mov_b32 s6, s8 374; GFX12-NEXT: s_mov_b32 s7, s9 375; GFX12-NEXT: image_load v0, v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D d16 376; GFX12-NEXT: s_wait_loadcnt 0x0 377; GFX12-NEXT: ; return to shader part epilog 378 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 379 ret <2 x half> %v 380} 381 382define amdgpu_ps <2 x half> @load_1d_v2f16_xz(<8 x i32> inreg %rsrc, i32 %s) { 383; GFX8-UNPACKED-LABEL: load_1d_v2f16_xz: 384; GFX8-UNPACKED: ; %bb.0: 385; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 386; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 387; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 388; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 389; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 390; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 391; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 392; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 393; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x5 unorm d16 394; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 395; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 396; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 397; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 398; GFX8-UNPACKED-NEXT: ; return to shader part epilog 399; 400; GFX8-PACKED-LABEL: load_1d_v2f16_xz: 401; GFX8-PACKED: ; %bb.0: 402; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 403; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 404; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 405; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 406; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 407; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 408; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 409; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 410; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x5 unorm d16 411; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 412; GFX8-PACKED-NEXT: ; return to shader part epilog 413; 414; GFX9-LABEL: load_1d_v2f16_xz: 415; GFX9: ; %bb.0: 416; GFX9-NEXT: s_mov_b32 s0, s2 417; GFX9-NEXT: s_mov_b32 s1, s3 418; GFX9-NEXT: s_mov_b32 s2, s4 419; GFX9-NEXT: s_mov_b32 s3, s5 420; GFX9-NEXT: s_mov_b32 s4, s6 421; GFX9-NEXT: s_mov_b32 s5, s7 422; GFX9-NEXT: s_mov_b32 s6, s8 423; GFX9-NEXT: s_mov_b32 s7, s9 424; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x5 unorm d16 425; GFX9-NEXT: s_waitcnt vmcnt(0) 426; GFX9-NEXT: ; return to shader part epilog 427; 428; GFX10PLUS-LABEL: load_1d_v2f16_xz: 429; GFX10PLUS: ; %bb.0: 430; GFX10PLUS-NEXT: s_mov_b32 s0, s2 431; GFX10PLUS-NEXT: s_mov_b32 s1, s3 432; GFX10PLUS-NEXT: s_mov_b32 s2, s4 433; GFX10PLUS-NEXT: s_mov_b32 s3, s5 434; GFX10PLUS-NEXT: s_mov_b32 s4, s6 435; GFX10PLUS-NEXT: s_mov_b32 s5, s7 436; GFX10PLUS-NEXT: s_mov_b32 s6, s8 437; GFX10PLUS-NEXT: s_mov_b32 s7, s9 438; GFX10PLUS-NEXT: image_load v0, v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D unorm d16 439; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 440; GFX10PLUS-NEXT: ; return to shader part epilog 441; 442; GFX12-LABEL: load_1d_v2f16_xz: 443; GFX12: ; %bb.0: 444; GFX12-NEXT: s_mov_b32 s0, s2 445; GFX12-NEXT: s_mov_b32 s1, s3 446; GFX12-NEXT: s_mov_b32 s2, s4 447; GFX12-NEXT: s_mov_b32 s3, s5 448; GFX12-NEXT: s_mov_b32 s4, s6 449; GFX12-NEXT: s_mov_b32 s5, s7 450; GFX12-NEXT: s_mov_b32 s6, s8 451; GFX12-NEXT: s_mov_b32 s7, s9 452; GFX12-NEXT: image_load v0, v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D d16 453; GFX12-NEXT: s_wait_loadcnt 0x0 454; GFX12-NEXT: ; return to shader part epilog 455 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 5, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 456 ret <2 x half> %v 457} 458 459define amdgpu_ps <2 x half> @load_1d_v2f16_xw(<8 x i32> inreg %rsrc, i32 %s) { 460; GFX8-UNPACKED-LABEL: load_1d_v2f16_xw: 461; GFX8-UNPACKED: ; %bb.0: 462; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 463; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 464; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 465; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 466; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 467; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 468; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 469; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 470; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm d16 471; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 472; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 473; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 474; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 475; GFX8-UNPACKED-NEXT: ; return to shader part epilog 476; 477; GFX8-PACKED-LABEL: load_1d_v2f16_xw: 478; GFX8-PACKED: ; %bb.0: 479; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 480; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 481; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 482; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 483; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 484; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 485; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 486; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 487; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x9 unorm d16 488; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 489; GFX8-PACKED-NEXT: ; return to shader part epilog 490; 491; GFX9-LABEL: load_1d_v2f16_xw: 492; GFX9: ; %bb.0: 493; GFX9-NEXT: s_mov_b32 s0, s2 494; GFX9-NEXT: s_mov_b32 s1, s3 495; GFX9-NEXT: s_mov_b32 s2, s4 496; GFX9-NEXT: s_mov_b32 s3, s5 497; GFX9-NEXT: s_mov_b32 s4, s6 498; GFX9-NEXT: s_mov_b32 s5, s7 499; GFX9-NEXT: s_mov_b32 s6, s8 500; GFX9-NEXT: s_mov_b32 s7, s9 501; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x9 unorm d16 502; GFX9-NEXT: s_waitcnt vmcnt(0) 503; GFX9-NEXT: ; return to shader part epilog 504; 505; GFX10PLUS-LABEL: load_1d_v2f16_xw: 506; GFX10PLUS: ; %bb.0: 507; GFX10PLUS-NEXT: s_mov_b32 s0, s2 508; GFX10PLUS-NEXT: s_mov_b32 s1, s3 509; GFX10PLUS-NEXT: s_mov_b32 s2, s4 510; GFX10PLUS-NEXT: s_mov_b32 s3, s5 511; GFX10PLUS-NEXT: s_mov_b32 s4, s6 512; GFX10PLUS-NEXT: s_mov_b32 s5, s7 513; GFX10PLUS-NEXT: s_mov_b32 s6, s8 514; GFX10PLUS-NEXT: s_mov_b32 s7, s9 515; GFX10PLUS-NEXT: image_load v0, v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm d16 516; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 517; GFX10PLUS-NEXT: ; return to shader part epilog 518; 519; GFX12-LABEL: load_1d_v2f16_xw: 520; GFX12: ; %bb.0: 521; GFX12-NEXT: s_mov_b32 s0, s2 522; GFX12-NEXT: s_mov_b32 s1, s3 523; GFX12-NEXT: s_mov_b32 s2, s4 524; GFX12-NEXT: s_mov_b32 s3, s5 525; GFX12-NEXT: s_mov_b32 s4, s6 526; GFX12-NEXT: s_mov_b32 s5, s7 527; GFX12-NEXT: s_mov_b32 s6, s8 528; GFX12-NEXT: s_mov_b32 s7, s9 529; GFX12-NEXT: image_load v0, v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D d16 530; GFX12-NEXT: s_wait_loadcnt 0x0 531; GFX12-NEXT: ; return to shader part epilog 532 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 533 ret <2 x half> %v 534} 535 536define amdgpu_ps <2 x half> @load_1d_v2f16_yz(<8 x i32> inreg %rsrc, i32 %s) { 537; GFX8-UNPACKED-LABEL: load_1d_v2f16_yz: 538; GFX8-UNPACKED: ; %bb.0: 539; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 540; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 541; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 542; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 543; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 544; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 545; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 546; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 547; GFX8-UNPACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x6 unorm d16 548; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 549; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 550; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 551; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 552; GFX8-UNPACKED-NEXT: ; return to shader part epilog 553; 554; GFX8-PACKED-LABEL: load_1d_v2f16_yz: 555; GFX8-PACKED: ; %bb.0: 556; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 557; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 558; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 559; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 560; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 561; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 562; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 563; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 564; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x6 unorm d16 565; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 566; GFX8-PACKED-NEXT: ; return to shader part epilog 567; 568; GFX9-LABEL: load_1d_v2f16_yz: 569; GFX9: ; %bb.0: 570; GFX9-NEXT: s_mov_b32 s0, s2 571; GFX9-NEXT: s_mov_b32 s1, s3 572; GFX9-NEXT: s_mov_b32 s2, s4 573; GFX9-NEXT: s_mov_b32 s3, s5 574; GFX9-NEXT: s_mov_b32 s4, s6 575; GFX9-NEXT: s_mov_b32 s5, s7 576; GFX9-NEXT: s_mov_b32 s6, s8 577; GFX9-NEXT: s_mov_b32 s7, s9 578; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x6 unorm d16 579; GFX9-NEXT: s_waitcnt vmcnt(0) 580; GFX9-NEXT: ; return to shader part epilog 581; 582; GFX10PLUS-LABEL: load_1d_v2f16_yz: 583; GFX10PLUS: ; %bb.0: 584; GFX10PLUS-NEXT: s_mov_b32 s0, s2 585; GFX10PLUS-NEXT: s_mov_b32 s1, s3 586; GFX10PLUS-NEXT: s_mov_b32 s2, s4 587; GFX10PLUS-NEXT: s_mov_b32 s3, s5 588; GFX10PLUS-NEXT: s_mov_b32 s4, s6 589; GFX10PLUS-NEXT: s_mov_b32 s5, s7 590; GFX10PLUS-NEXT: s_mov_b32 s6, s8 591; GFX10PLUS-NEXT: s_mov_b32 s7, s9 592; GFX10PLUS-NEXT: image_load v0, v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm d16 593; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 594; GFX10PLUS-NEXT: ; return to shader part epilog 595; 596; GFX12-LABEL: load_1d_v2f16_yz: 597; GFX12: ; %bb.0: 598; GFX12-NEXT: s_mov_b32 s0, s2 599; GFX12-NEXT: s_mov_b32 s1, s3 600; GFX12-NEXT: s_mov_b32 s2, s4 601; GFX12-NEXT: s_mov_b32 s3, s5 602; GFX12-NEXT: s_mov_b32 s4, s6 603; GFX12-NEXT: s_mov_b32 s5, s7 604; GFX12-NEXT: s_mov_b32 s6, s8 605; GFX12-NEXT: s_mov_b32 s7, s9 606; GFX12-NEXT: image_load v0, v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D d16 607; GFX12-NEXT: s_wait_loadcnt 0x0 608; GFX12-NEXT: ; return to shader part epilog 609 %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 610 ret <2 x half> %v 611} 612 613define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) { 614; GFX8-UNPACKED-LABEL: load_1d_v3f16_xyz: 615; GFX8-UNPACKED: ; %bb.0: 616; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 617; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 618; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 619; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 620; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 621; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 622; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 623; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 624; GFX8-UNPACKED-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm d16 625; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 626; GFX8-UNPACKED-NEXT: v_and_b32_e32 v3, 0xffff, v1 627; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v2 628; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v3 629; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 630; GFX8-UNPACKED-NEXT: ; return to shader part epilog 631; 632; GFX8-PACKED-LABEL: load_1d_v3f16_xyz: 633; GFX8-PACKED: ; %bb.0: 634; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 635; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 636; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 637; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 638; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 639; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 640; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 641; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 642; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16 643; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 644; GFX8-PACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 645; GFX8-PACKED-NEXT: ; return to shader part epilog 646; 647; GFX9-LABEL: load_1d_v3f16_xyz: 648; GFX9: ; %bb.0: 649; GFX9-NEXT: s_mov_b32 s0, s2 650; GFX9-NEXT: s_mov_b32 s1, s3 651; GFX9-NEXT: s_mov_b32 s2, s4 652; GFX9-NEXT: s_mov_b32 s3, s5 653; GFX9-NEXT: s_mov_b32 s4, s6 654; GFX9-NEXT: s_mov_b32 s5, s7 655; GFX9-NEXT: s_mov_b32 s6, s8 656; GFX9-NEXT: s_mov_b32 s7, s9 657; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16 658; GFX9-NEXT: s_waitcnt vmcnt(0) 659; GFX9-NEXT: ; return to shader part epilog 660; 661; GFX10PLUS-LABEL: load_1d_v3f16_xyz: 662; GFX10PLUS: ; %bb.0: 663; GFX10PLUS-NEXT: s_mov_b32 s0, s2 664; GFX10PLUS-NEXT: s_mov_b32 s1, s3 665; GFX10PLUS-NEXT: s_mov_b32 s2, s4 666; GFX10PLUS-NEXT: s_mov_b32 s3, s5 667; GFX10PLUS-NEXT: s_mov_b32 s4, s6 668; GFX10PLUS-NEXT: s_mov_b32 s5, s7 669; GFX10PLUS-NEXT: s_mov_b32 s6, s8 670; GFX10PLUS-NEXT: s_mov_b32 s7, s9 671; GFX10PLUS-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm d16 672; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 673; GFX10PLUS-NEXT: ; return to shader part epilog 674; 675; GFX12-LABEL: load_1d_v3f16_xyz: 676; GFX12: ; %bb.0: 677; GFX12-NEXT: s_mov_b32 s0, s2 678; GFX12-NEXT: s_mov_b32 s1, s3 679; GFX12-NEXT: s_mov_b32 s2, s4 680; GFX12-NEXT: s_mov_b32 s3, s5 681; GFX12-NEXT: s_mov_b32 s4, s6 682; GFX12-NEXT: s_mov_b32 s5, s7 683; GFX12-NEXT: s_mov_b32 s6, s8 684; GFX12-NEXT: s_mov_b32 s7, s9 685; GFX12-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D d16 686; GFX12-NEXT: s_wait_loadcnt 0x0 687; GFX12-NEXT: ; return to shader part epilog 688 %v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 689 ret <3 x half> %v 690} 691 692define amdgpu_ps <4 x half> @load_1d_v4f16_xyzw(<8 x i32> inreg %rsrc, i32 %s) { 693; GFX8-UNPACKED-LABEL: load_1d_v4f16_xyzw: 694; GFX8-UNPACKED: ; %bb.0: 695; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 696; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 697; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 698; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 699; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 700; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 701; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 702; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 703; GFX8-UNPACKED-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm d16 704; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 705; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 706; GFX8-UNPACKED-NEXT: v_and_b32_e32 v3, 0xffff, v3 707; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 708; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v3, 16, v3 709; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 710; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 711; GFX8-UNPACKED-NEXT: ; return to shader part epilog 712; 713; GFX8-PACKED-LABEL: load_1d_v4f16_xyzw: 714; GFX8-PACKED: ; %bb.0: 715; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 716; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 717; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 718; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 719; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 720; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 721; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 722; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 723; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0xf unorm d16 724; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 725; GFX8-PACKED-NEXT: ; return to shader part epilog 726; 727; GFX9-LABEL: load_1d_v4f16_xyzw: 728; GFX9: ; %bb.0: 729; GFX9-NEXT: s_mov_b32 s0, s2 730; GFX9-NEXT: s_mov_b32 s1, s3 731; GFX9-NEXT: s_mov_b32 s2, s4 732; GFX9-NEXT: s_mov_b32 s3, s5 733; GFX9-NEXT: s_mov_b32 s4, s6 734; GFX9-NEXT: s_mov_b32 s5, s7 735; GFX9-NEXT: s_mov_b32 s6, s8 736; GFX9-NEXT: s_mov_b32 s7, s9 737; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0xf unorm d16 738; GFX9-NEXT: s_waitcnt vmcnt(0) 739; GFX9-NEXT: ; return to shader part epilog 740; 741; GFX10PLUS-LABEL: load_1d_v4f16_xyzw: 742; GFX10PLUS: ; %bb.0: 743; GFX10PLUS-NEXT: s_mov_b32 s0, s2 744; GFX10PLUS-NEXT: s_mov_b32 s1, s3 745; GFX10PLUS-NEXT: s_mov_b32 s2, s4 746; GFX10PLUS-NEXT: s_mov_b32 s3, s5 747; GFX10PLUS-NEXT: s_mov_b32 s4, s6 748; GFX10PLUS-NEXT: s_mov_b32 s5, s7 749; GFX10PLUS-NEXT: s_mov_b32 s6, s8 750; GFX10PLUS-NEXT: s_mov_b32 s7, s9 751; GFX10PLUS-NEXT: image_load v[0:1], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm d16 752; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 753; GFX10PLUS-NEXT: ; return to shader part epilog 754; 755; GFX12-LABEL: load_1d_v4f16_xyzw: 756; GFX12: ; %bb.0: 757; GFX12-NEXT: s_mov_b32 s0, s2 758; GFX12-NEXT: s_mov_b32 s1, s3 759; GFX12-NEXT: s_mov_b32 s2, s4 760; GFX12-NEXT: s_mov_b32 s3, s5 761; GFX12-NEXT: s_mov_b32 s4, s6 762; GFX12-NEXT: s_mov_b32 s5, s7 763; GFX12-NEXT: s_mov_b32 s6, s8 764; GFX12-NEXT: s_mov_b32 s7, s9 765; GFX12-NEXT: image_load v[0:1], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D d16 766; GFX12-NEXT: s_wait_loadcnt 0x0 767; GFX12-NEXT: ; return to shader part epilog 768 %v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 769 ret <4 x half> %v 770} 771 772define amdgpu_ps float @load_1d_f16_tfe_dmask_x(<8 x i32> inreg %rsrc, i32 %s) { 773; GFX8-UNPACKED-LABEL: load_1d_f16_tfe_dmask_x: 774; GFX8-UNPACKED: ; %bb.0: 775; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0 776; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 777; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 778; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 779; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 780; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 781; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 782; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 783; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 784; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1 785; GFX8-UNPACKED-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe d16 786; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 787; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v2 788; GFX8-UNPACKED-NEXT: ; return to shader part epilog 789; 790; GFX8-PACKED-LABEL: load_1d_f16_tfe_dmask_x: 791; GFX8-PACKED: ; %bb.0: 792; GFX8-PACKED-NEXT: v_mov_b32_e32 v1, 0 793; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 794; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 795; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 796; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 797; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 798; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 799; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 800; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 801; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, v1 802; GFX8-PACKED-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe d16 803; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 804; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v2 805; GFX8-PACKED-NEXT: ; return to shader part epilog 806; 807; GFX9-LABEL: load_1d_f16_tfe_dmask_x: 808; GFX9: ; %bb.0: 809; GFX9-NEXT: v_mov_b32_e32 v1, 0 810; GFX9-NEXT: s_mov_b32 s0, s2 811; GFX9-NEXT: s_mov_b32 s1, s3 812; GFX9-NEXT: s_mov_b32 s2, s4 813; GFX9-NEXT: s_mov_b32 s3, s5 814; GFX9-NEXT: s_mov_b32 s4, s6 815; GFX9-NEXT: s_mov_b32 s5, s7 816; GFX9-NEXT: s_mov_b32 s6, s8 817; GFX9-NEXT: s_mov_b32 s7, s9 818; GFX9-NEXT: v_mov_b32_e32 v2, v1 819; GFX9-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe d16 820; GFX9-NEXT: s_waitcnt vmcnt(0) 821; GFX9-NEXT: v_mov_b32_e32 v0, v2 822; GFX9-NEXT: ; return to shader part epilog 823; 824; GFX10PLUS-LABEL: load_1d_f16_tfe_dmask_x: 825; GFX10PLUS: ; %bb.0: 826; GFX10PLUS-NEXT: v_mov_b32_e32 v1, 0 827; GFX10PLUS-NEXT: s_mov_b32 s0, s2 828; GFX10PLUS-NEXT: s_mov_b32 s1, s3 829; GFX10PLUS-NEXT: s_mov_b32 s2, s4 830; GFX10PLUS-NEXT: s_mov_b32 s3, s5 831; GFX10PLUS-NEXT: s_mov_b32 s4, s6 832; GFX10PLUS-NEXT: s_mov_b32 s5, s7 833; GFX10PLUS-NEXT: s_mov_b32 s6, s8 834; GFX10PLUS-NEXT: s_mov_b32 s7, s9 835; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v1 836; GFX10PLUS-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe d16 837; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 838; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v2 839; GFX10PLUS-NEXT: ; return to shader part epilog 840; 841; GFX12-LABEL: load_1d_f16_tfe_dmask_x: 842; GFX12: ; %bb.0: 843; GFX12-NEXT: v_mov_b32_e32 v1, 0 844; GFX12-NEXT: s_mov_b32 s0, s2 845; GFX12-NEXT: s_mov_b32 s1, s3 846; GFX12-NEXT: s_mov_b32 s2, s4 847; GFX12-NEXT: s_mov_b32 s3, s5 848; GFX12-NEXT: s_mov_b32 s4, s6 849; GFX12-NEXT: s_mov_b32 s5, s7 850; GFX12-NEXT: s_mov_b32 s6, s8 851; GFX12-NEXT: s_mov_b32 s7, s9 852; GFX12-NEXT: v_mov_b32_e32 v2, v1 853; GFX12-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe d16 854; GFX12-NEXT: s_wait_loadcnt 0x0 855; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 856; GFX12-NEXT: v_mov_b32_e32 v0, v2 857; GFX12-NEXT: ; return to shader part epilog 858 %v = call { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 859 %v.err = extractvalue { half, i32 } %v, 1 860 %vv = bitcast i32 %v.err to float 861 ret float %vv 862} 863 864define amdgpu_ps float @load_1d_v2f16_tfe_dmask_xy(<8 x i32> inreg %rsrc, i32 %s) { 865; GFX8-UNPACKED-LABEL: load_1d_v2f16_tfe_dmask_xy: 866; GFX8-UNPACKED: ; %bb.0: 867; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0 868; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 869; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 870; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 871; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 872; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 873; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 874; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 875; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 876; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1 877; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v3, v1 878; GFX8-UNPACKED-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x3 unorm tfe d16 879; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 880; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v3 881; GFX8-UNPACKED-NEXT: ; return to shader part epilog 882; 883; GFX8-PACKED-LABEL: load_1d_v2f16_tfe_dmask_xy: 884; GFX8-PACKED: ; %bb.0: 885; GFX8-PACKED-NEXT: v_mov_b32_e32 v1, 0 886; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 887; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 888; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 889; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 890; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 891; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 892; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 893; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 894; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, v1 895; GFX8-PACKED-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x3 unorm tfe d16 896; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 897; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v2 898; GFX8-PACKED-NEXT: ; return to shader part epilog 899; 900; GFX9-LABEL: load_1d_v2f16_tfe_dmask_xy: 901; GFX9: ; %bb.0: 902; GFX9-NEXT: v_mov_b32_e32 v1, 0 903; GFX9-NEXT: s_mov_b32 s0, s2 904; GFX9-NEXT: s_mov_b32 s1, s3 905; GFX9-NEXT: s_mov_b32 s2, s4 906; GFX9-NEXT: s_mov_b32 s3, s5 907; GFX9-NEXT: s_mov_b32 s4, s6 908; GFX9-NEXT: s_mov_b32 s5, s7 909; GFX9-NEXT: s_mov_b32 s6, s8 910; GFX9-NEXT: s_mov_b32 s7, s9 911; GFX9-NEXT: v_mov_b32_e32 v2, v1 912; GFX9-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x3 unorm tfe d16 913; GFX9-NEXT: s_waitcnt vmcnt(0) 914; GFX9-NEXT: v_mov_b32_e32 v0, v2 915; GFX9-NEXT: ; return to shader part epilog 916; 917; GFX10PLUS-LABEL: load_1d_v2f16_tfe_dmask_xy: 918; GFX10PLUS: ; %bb.0: 919; GFX10PLUS-NEXT: v_mov_b32_e32 v1, 0 920; GFX10PLUS-NEXT: s_mov_b32 s0, s2 921; GFX10PLUS-NEXT: s_mov_b32 s1, s3 922; GFX10PLUS-NEXT: s_mov_b32 s2, s4 923; GFX10PLUS-NEXT: s_mov_b32 s3, s5 924; GFX10PLUS-NEXT: s_mov_b32 s4, s6 925; GFX10PLUS-NEXT: s_mov_b32 s5, s7 926; GFX10PLUS-NEXT: s_mov_b32 s6, s8 927; GFX10PLUS-NEXT: s_mov_b32 s7, s9 928; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v1 929; GFX10PLUS-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm tfe d16 930; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 931; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v2 932; GFX10PLUS-NEXT: ; return to shader part epilog 933; 934; GFX12-LABEL: load_1d_v2f16_tfe_dmask_xy: 935; GFX12: ; %bb.0: 936; GFX12-NEXT: v_mov_b32_e32 v1, 0 937; GFX12-NEXT: s_mov_b32 s0, s2 938; GFX12-NEXT: s_mov_b32 s1, s3 939; GFX12-NEXT: s_mov_b32 s2, s4 940; GFX12-NEXT: s_mov_b32 s3, s5 941; GFX12-NEXT: s_mov_b32 s4, s6 942; GFX12-NEXT: s_mov_b32 s5, s7 943; GFX12-NEXT: s_mov_b32 s6, s8 944; GFX12-NEXT: s_mov_b32 s7, s9 945; GFX12-NEXT: v_mov_b32_e32 v2, v1 946; GFX12-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe d16 947; GFX12-NEXT: s_wait_loadcnt 0x0 948; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 949; GFX12-NEXT: v_mov_b32_e32 v0, v2 950; GFX12-NEXT: ; return to shader part epilog 951 %v = call { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 952 %v.err = extractvalue { <2 x half>, i32 } %v, 1 953 %vv = bitcast i32 %v.err to float 954 ret float %vv 955} 956 957define amdgpu_ps float @load_1d_v3f16_tfe_dmask_xyz(<8 x i32> inreg %rsrc, i32 %s) { 958; GFX8-UNPACKED-LABEL: load_1d_v3f16_tfe_dmask_xyz: 959; GFX8-UNPACKED: ; %bb.0: 960; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0 961; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 962; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 963; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 964; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 965; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 966; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 967; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 968; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 969; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1 970; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v3, v1 971; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v4, v1 972; GFX8-UNPACKED-NEXT: image_load v[1:4], v0, s[0:7] dmask:0x7 unorm tfe d16 973; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 974; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v4 975; GFX8-UNPACKED-NEXT: ; return to shader part epilog 976; 977; GFX8-PACKED-LABEL: load_1d_v3f16_tfe_dmask_xyz: 978; GFX8-PACKED: ; %bb.0: 979; GFX8-PACKED-NEXT: v_mov_b32_e32 v1, 0 980; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 981; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 982; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 983; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 984; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 985; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 986; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 987; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 988; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, v1 989; GFX8-PACKED-NEXT: v_mov_b32_e32 v3, v1 990; GFX8-PACKED-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x7 unorm tfe d16 991; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 992; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v3 993; GFX8-PACKED-NEXT: ; return to shader part epilog 994; 995; GFX9-LABEL: load_1d_v3f16_tfe_dmask_xyz: 996; GFX9: ; %bb.0: 997; GFX9-NEXT: v_mov_b32_e32 v1, 0 998; GFX9-NEXT: s_mov_b32 s0, s2 999; GFX9-NEXT: s_mov_b32 s1, s3 1000; GFX9-NEXT: s_mov_b32 s2, s4 1001; GFX9-NEXT: s_mov_b32 s3, s5 1002; GFX9-NEXT: s_mov_b32 s4, s6 1003; GFX9-NEXT: s_mov_b32 s5, s7 1004; GFX9-NEXT: s_mov_b32 s6, s8 1005; GFX9-NEXT: s_mov_b32 s7, s9 1006; GFX9-NEXT: v_mov_b32_e32 v2, v1 1007; GFX9-NEXT: v_mov_b32_e32 v3, v1 1008; GFX9-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x7 unorm tfe d16 1009; GFX9-NEXT: s_waitcnt vmcnt(0) 1010; GFX9-NEXT: v_mov_b32_e32 v0, v3 1011; GFX9-NEXT: ; return to shader part epilog 1012; 1013; GFX10PLUS-LABEL: load_1d_v3f16_tfe_dmask_xyz: 1014; GFX10PLUS: ; %bb.0: 1015; GFX10PLUS-NEXT: v_mov_b32_e32 v1, 0 1016; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1017; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1018; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1019; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1020; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1021; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1022; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1023; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1024; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v1 1025; GFX10PLUS-NEXT: v_mov_b32_e32 v3, v1 1026; GFX10PLUS-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe d16 1027; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1028; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v3 1029; GFX10PLUS-NEXT: ; return to shader part epilog 1030; 1031; GFX12-LABEL: load_1d_v3f16_tfe_dmask_xyz: 1032; GFX12: ; %bb.0: 1033; GFX12-NEXT: v_mov_b32_e32 v1, 0 1034; GFX12-NEXT: s_mov_b32 s0, s2 1035; GFX12-NEXT: s_mov_b32 s1, s3 1036; GFX12-NEXT: s_mov_b32 s2, s4 1037; GFX12-NEXT: s_mov_b32 s3, s5 1038; GFX12-NEXT: s_mov_b32 s4, s6 1039; GFX12-NEXT: s_mov_b32 s5, s7 1040; GFX12-NEXT: s_mov_b32 s6, s8 1041; GFX12-NEXT: s_mov_b32 s7, s9 1042; GFX12-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v3, v1 1043; GFX12-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D tfe d16 1044; GFX12-NEXT: s_wait_loadcnt 0x0 1045; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1046; GFX12-NEXT: v_mov_b32_e32 v0, v3 1047; GFX12-NEXT: ; return to shader part epilog 1048 %v = call { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 1049 %v.err = extractvalue { <3 x half>, i32 } %v, 1 1050 %vv = bitcast i32 %v.err to float 1051 ret float %vv 1052} 1053 1054define amdgpu_ps float @load_1d_v4f16_tfe_dmask_xyzw(<8 x i32> inreg %rsrc, i32 %s) { 1055; GFX8-UNPACKED-LABEL: load_1d_v4f16_tfe_dmask_xyzw: 1056; GFX8-UNPACKED: ; %bb.0: 1057; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v1, 0 1058; GFX8-UNPACKED-NEXT: s_mov_b32 s0, s2 1059; GFX8-UNPACKED-NEXT: s_mov_b32 s1, s3 1060; GFX8-UNPACKED-NEXT: s_mov_b32 s2, s4 1061; GFX8-UNPACKED-NEXT: s_mov_b32 s3, s5 1062; GFX8-UNPACKED-NEXT: s_mov_b32 s4, s6 1063; GFX8-UNPACKED-NEXT: s_mov_b32 s5, s7 1064; GFX8-UNPACKED-NEXT: s_mov_b32 s6, s8 1065; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 1066; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v2, v1 1067; GFX8-UNPACKED-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 unorm tfe d16 1068; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) 1069; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v0, v2 1070; GFX8-UNPACKED-NEXT: ; return to shader part epilog 1071; 1072; GFX8-PACKED-LABEL: load_1d_v4f16_tfe_dmask_xyzw: 1073; GFX8-PACKED: ; %bb.0: 1074; GFX8-PACKED-NEXT: v_mov_b32_e32 v1, 0 1075; GFX8-PACKED-NEXT: s_mov_b32 s0, s2 1076; GFX8-PACKED-NEXT: s_mov_b32 s1, s3 1077; GFX8-PACKED-NEXT: s_mov_b32 s2, s4 1078; GFX8-PACKED-NEXT: s_mov_b32 s3, s5 1079; GFX8-PACKED-NEXT: s_mov_b32 s4, s6 1080; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 1081; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 1082; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 1083; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, v1 1084; GFX8-PACKED-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 unorm tfe d16 1085; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) 1086; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v2 1087; GFX8-PACKED-NEXT: ; return to shader part epilog 1088; 1089; GFX9-LABEL: load_1d_v4f16_tfe_dmask_xyzw: 1090; GFX9: ; %bb.0: 1091; GFX9-NEXT: v_mov_b32_e32 v1, 0 1092; GFX9-NEXT: s_mov_b32 s0, s2 1093; GFX9-NEXT: s_mov_b32 s1, s3 1094; GFX9-NEXT: s_mov_b32 s2, s4 1095; GFX9-NEXT: s_mov_b32 s3, s5 1096; GFX9-NEXT: s_mov_b32 s4, s6 1097; GFX9-NEXT: s_mov_b32 s5, s7 1098; GFX9-NEXT: s_mov_b32 s6, s8 1099; GFX9-NEXT: s_mov_b32 s7, s9 1100; GFX9-NEXT: v_mov_b32_e32 v2, v1 1101; GFX9-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 unorm tfe d16 1102; GFX9-NEXT: s_waitcnt vmcnt(0) 1103; GFX9-NEXT: v_mov_b32_e32 v0, v2 1104; GFX9-NEXT: ; return to shader part epilog 1105; 1106; GFX10PLUS-LABEL: load_1d_v4f16_tfe_dmask_xyzw: 1107; GFX10PLUS: ; %bb.0: 1108; GFX10PLUS-NEXT: v_mov_b32_e32 v1, 0 1109; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1110; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1111; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1112; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1113; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1114; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1115; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1116; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1117; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v1 1118; GFX10PLUS-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D unorm tfe d16 1119; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1120; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v2 1121; GFX10PLUS-NEXT: ; return to shader part epilog 1122; 1123; GFX12-LABEL: load_1d_v4f16_tfe_dmask_xyzw: 1124; GFX12: ; %bb.0: 1125; GFX12-NEXT: v_mov_b32_e32 v1, 0 1126; GFX12-NEXT: s_mov_b32 s0, s2 1127; GFX12-NEXT: s_mov_b32 s1, s3 1128; GFX12-NEXT: s_mov_b32 s2, s4 1129; GFX12-NEXT: s_mov_b32 s3, s5 1130; GFX12-NEXT: s_mov_b32 s4, s6 1131; GFX12-NEXT: s_mov_b32 s5, s7 1132; GFX12-NEXT: s_mov_b32 s6, s8 1133; GFX12-NEXT: s_mov_b32 s7, s9 1134; GFX12-NEXT: v_mov_b32_e32 v2, v1 1135; GFX12-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D tfe d16 1136; GFX12-NEXT: s_wait_loadcnt 0x0 1137; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1138; GFX12-NEXT: v_mov_b32_e32 v0, v2 1139; GFX12-NEXT: ; return to shader part epilog 1140 %v = call { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 16, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 1141 %v.err = extractvalue { <4 x half>, i32 } %v, 1 1142 %vv = bitcast i32 %v.err to float 1143 ret float %vv 1144} 1145 1146declare half @llvm.amdgcn.image.load.1d.half.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1147declare <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1148declare <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1149declare <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1150 1151declare { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1152declare { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1153declare { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1154declare { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1155 1156attributes #0 = { nounwind readonly } 1157