1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GFX68 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GFX68 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=-enable-prt-strict-null -verify-machineinstrs < %s | FileCheck -check-prefix=NOPRT %s 6; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 7; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s 8 9define amdgpu_ps float @load_1d_f32_x(<8 x i32> inreg %rsrc, i32 %s) { 10; GFX68-LABEL: load_1d_f32_x: 11; GFX68: ; %bb.0: 12; GFX68-NEXT: s_mov_b32 s0, s2 13; GFX68-NEXT: s_mov_b32 s1, s3 14; GFX68-NEXT: s_mov_b32 s2, s4 15; GFX68-NEXT: s_mov_b32 s3, s5 16; GFX68-NEXT: s_mov_b32 s4, s6 17; GFX68-NEXT: s_mov_b32 s5, s7 18; GFX68-NEXT: s_mov_b32 s6, s8 19; GFX68-NEXT: s_mov_b32 s7, s9 20; GFX68-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm 21; GFX68-NEXT: s_waitcnt vmcnt(0) 22; GFX68-NEXT: ; return to shader part epilog 23; 24; GFX10-LABEL: load_1d_f32_x: 25; GFX10: ; %bb.0: 26; GFX10-NEXT: s_mov_b32 s0, s2 27; GFX10-NEXT: s_mov_b32 s1, s3 28; GFX10-NEXT: s_mov_b32 s2, s4 29; GFX10-NEXT: s_mov_b32 s3, s5 30; GFX10-NEXT: s_mov_b32 s4, s6 31; GFX10-NEXT: s_mov_b32 s5, s7 32; GFX10-NEXT: s_mov_b32 s6, s8 33; GFX10-NEXT: s_mov_b32 s7, s9 34; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm 35; GFX10-NEXT: s_waitcnt vmcnt(0) 36; GFX10-NEXT: ; return to shader part epilog 37; 38; NOPRT-LABEL: load_1d_f32_x: 39; NOPRT: ; %bb.0: 40; NOPRT-NEXT: s_mov_b32 s0, s2 41; NOPRT-NEXT: s_mov_b32 s1, s3 42; NOPRT-NEXT: s_mov_b32 s2, s4 43; NOPRT-NEXT: s_mov_b32 s3, s5 44; NOPRT-NEXT: s_mov_b32 s4, s6 45; NOPRT-NEXT: s_mov_b32 s5, s7 46; NOPRT-NEXT: s_mov_b32 s6, s8 47; NOPRT-NEXT: s_mov_b32 s7, s9 48; NOPRT-NEXT: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm 49; NOPRT-NEXT: s_waitcnt vmcnt(0) 50; NOPRT-NEXT: ; return to shader part epilog 51; 52; GFX12-LABEL: load_1d_f32_x: 53; GFX12: ; %bb.0: 54; GFX12-NEXT: s_mov_b32 s0, s2 55; GFX12-NEXT: s_mov_b32 s1, s3 56; GFX12-NEXT: s_mov_b32 s2, s4 57; GFX12-NEXT: s_mov_b32 s3, s5 58; GFX12-NEXT: s_mov_b32 s4, s6 59; GFX12-NEXT: s_mov_b32 s5, s7 60; GFX12-NEXT: s_mov_b32 s6, s8 61; GFX12-NEXT: s_mov_b32 s7, s9 62; GFX12-NEXT: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D 63; GFX12-NEXT: s_wait_loadcnt 0x0 64; GFX12-NEXT: ; return to shader part epilog 65 %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 66 ret float %v 67} 68 69define amdgpu_ps float @load_1d_f32_y(<8 x i32> inreg %rsrc, i32 %s) { 70; GFX68-LABEL: load_1d_f32_y: 71; GFX68: ; %bb.0: 72; GFX68-NEXT: s_mov_b32 s0, s2 73; GFX68-NEXT: s_mov_b32 s1, s3 74; GFX68-NEXT: s_mov_b32 s2, s4 75; GFX68-NEXT: s_mov_b32 s3, s5 76; GFX68-NEXT: s_mov_b32 s4, s6 77; GFX68-NEXT: s_mov_b32 s5, s7 78; GFX68-NEXT: s_mov_b32 s6, s8 79; GFX68-NEXT: s_mov_b32 s7, s9 80; GFX68-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm 81; GFX68-NEXT: s_waitcnt vmcnt(0) 82; GFX68-NEXT: ; return to shader part epilog 83; 84; GFX10-LABEL: load_1d_f32_y: 85; GFX10: ; %bb.0: 86; GFX10-NEXT: s_mov_b32 s0, s2 87; GFX10-NEXT: s_mov_b32 s1, s3 88; GFX10-NEXT: s_mov_b32 s2, s4 89; GFX10-NEXT: s_mov_b32 s3, s5 90; GFX10-NEXT: s_mov_b32 s4, s6 91; GFX10-NEXT: s_mov_b32 s5, s7 92; GFX10-NEXT: s_mov_b32 s6, s8 93; GFX10-NEXT: s_mov_b32 s7, s9 94; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm 95; GFX10-NEXT: s_waitcnt vmcnt(0) 96; GFX10-NEXT: ; return to shader part epilog 97; 98; NOPRT-LABEL: load_1d_f32_y: 99; NOPRT: ; %bb.0: 100; NOPRT-NEXT: s_mov_b32 s0, s2 101; NOPRT-NEXT: s_mov_b32 s1, s3 102; NOPRT-NEXT: s_mov_b32 s2, s4 103; NOPRT-NEXT: s_mov_b32 s3, s5 104; NOPRT-NEXT: s_mov_b32 s4, s6 105; NOPRT-NEXT: s_mov_b32 s5, s7 106; NOPRT-NEXT: s_mov_b32 s6, s8 107; NOPRT-NEXT: s_mov_b32 s7, s9 108; NOPRT-NEXT: image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm 109; NOPRT-NEXT: s_waitcnt vmcnt(0) 110; NOPRT-NEXT: ; return to shader part epilog 111; 112; GFX12-LABEL: load_1d_f32_y: 113; GFX12: ; %bb.0: 114; GFX12-NEXT: s_mov_b32 s0, s2 115; GFX12-NEXT: s_mov_b32 s1, s3 116; GFX12-NEXT: s_mov_b32 s2, s4 117; GFX12-NEXT: s_mov_b32 s3, s5 118; GFX12-NEXT: s_mov_b32 s4, s6 119; GFX12-NEXT: s_mov_b32 s5, s7 120; GFX12-NEXT: s_mov_b32 s6, s8 121; GFX12-NEXT: s_mov_b32 s7, s9 122; GFX12-NEXT: image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D 123; GFX12-NEXT: s_wait_loadcnt 0x0 124; GFX12-NEXT: ; return to shader part epilog 125 %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 126 ret float %v 127} 128 129define amdgpu_ps float @load_1d_f32_z(<8 x i32> inreg %rsrc, i32 %s) { 130; GFX68-LABEL: load_1d_f32_z: 131; GFX68: ; %bb.0: 132; GFX68-NEXT: s_mov_b32 s0, s2 133; GFX68-NEXT: s_mov_b32 s1, s3 134; GFX68-NEXT: s_mov_b32 s2, s4 135; GFX68-NEXT: s_mov_b32 s3, s5 136; GFX68-NEXT: s_mov_b32 s4, s6 137; GFX68-NEXT: s_mov_b32 s5, s7 138; GFX68-NEXT: s_mov_b32 s6, s8 139; GFX68-NEXT: s_mov_b32 s7, s9 140; GFX68-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm 141; GFX68-NEXT: s_waitcnt vmcnt(0) 142; GFX68-NEXT: ; return to shader part epilog 143; 144; GFX10-LABEL: load_1d_f32_z: 145; GFX10: ; %bb.0: 146; GFX10-NEXT: s_mov_b32 s0, s2 147; GFX10-NEXT: s_mov_b32 s1, s3 148; GFX10-NEXT: s_mov_b32 s2, s4 149; GFX10-NEXT: s_mov_b32 s3, s5 150; GFX10-NEXT: s_mov_b32 s4, s6 151; GFX10-NEXT: s_mov_b32 s5, s7 152; GFX10-NEXT: s_mov_b32 s6, s8 153; GFX10-NEXT: s_mov_b32 s7, s9 154; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D unorm 155; GFX10-NEXT: s_waitcnt vmcnt(0) 156; GFX10-NEXT: ; return to shader part epilog 157; 158; NOPRT-LABEL: load_1d_f32_z: 159; NOPRT: ; %bb.0: 160; NOPRT-NEXT: s_mov_b32 s0, s2 161; NOPRT-NEXT: s_mov_b32 s1, s3 162; NOPRT-NEXT: s_mov_b32 s2, s4 163; NOPRT-NEXT: s_mov_b32 s3, s5 164; NOPRT-NEXT: s_mov_b32 s4, s6 165; NOPRT-NEXT: s_mov_b32 s5, s7 166; NOPRT-NEXT: s_mov_b32 s6, s8 167; NOPRT-NEXT: s_mov_b32 s7, s9 168; NOPRT-NEXT: image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D unorm 169; NOPRT-NEXT: s_waitcnt vmcnt(0) 170; NOPRT-NEXT: ; return to shader part epilog 171; 172; GFX12-LABEL: load_1d_f32_z: 173; GFX12: ; %bb.0: 174; GFX12-NEXT: s_mov_b32 s0, s2 175; GFX12-NEXT: s_mov_b32 s1, s3 176; GFX12-NEXT: s_mov_b32 s2, s4 177; GFX12-NEXT: s_mov_b32 s3, s5 178; GFX12-NEXT: s_mov_b32 s4, s6 179; GFX12-NEXT: s_mov_b32 s5, s7 180; GFX12-NEXT: s_mov_b32 s6, s8 181; GFX12-NEXT: s_mov_b32 s7, s9 182; GFX12-NEXT: image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D 183; GFX12-NEXT: s_wait_loadcnt 0x0 184; GFX12-NEXT: ; return to shader part epilog 185 %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 4, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 186 ret float %v 187} 188 189define amdgpu_ps float @load_1d_f32_w(<8 x i32> inreg %rsrc, i32 %s) { 190; GFX68-LABEL: load_1d_f32_w: 191; GFX68: ; %bb.0: 192; GFX68-NEXT: s_mov_b32 s0, s2 193; GFX68-NEXT: s_mov_b32 s1, s3 194; GFX68-NEXT: s_mov_b32 s2, s4 195; GFX68-NEXT: s_mov_b32 s3, s5 196; GFX68-NEXT: s_mov_b32 s4, s6 197; GFX68-NEXT: s_mov_b32 s5, s7 198; GFX68-NEXT: s_mov_b32 s6, s8 199; GFX68-NEXT: s_mov_b32 s7, s9 200; GFX68-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm 201; GFX68-NEXT: s_waitcnt vmcnt(0) 202; GFX68-NEXT: ; return to shader part epilog 203; 204; GFX10-LABEL: load_1d_f32_w: 205; GFX10: ; %bb.0: 206; GFX10-NEXT: s_mov_b32 s0, s2 207; GFX10-NEXT: s_mov_b32 s1, s3 208; GFX10-NEXT: s_mov_b32 s2, s4 209; GFX10-NEXT: s_mov_b32 s3, s5 210; GFX10-NEXT: s_mov_b32 s4, s6 211; GFX10-NEXT: s_mov_b32 s5, s7 212; GFX10-NEXT: s_mov_b32 s6, s8 213; GFX10-NEXT: s_mov_b32 s7, s9 214; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm 215; GFX10-NEXT: s_waitcnt vmcnt(0) 216; GFX10-NEXT: ; return to shader part epilog 217; 218; NOPRT-LABEL: load_1d_f32_w: 219; NOPRT: ; %bb.0: 220; NOPRT-NEXT: s_mov_b32 s0, s2 221; NOPRT-NEXT: s_mov_b32 s1, s3 222; NOPRT-NEXT: s_mov_b32 s2, s4 223; NOPRT-NEXT: s_mov_b32 s3, s5 224; NOPRT-NEXT: s_mov_b32 s4, s6 225; NOPRT-NEXT: s_mov_b32 s5, s7 226; NOPRT-NEXT: s_mov_b32 s6, s8 227; NOPRT-NEXT: s_mov_b32 s7, s9 228; NOPRT-NEXT: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm 229; NOPRT-NEXT: s_waitcnt vmcnt(0) 230; NOPRT-NEXT: ; return to shader part epilog 231; 232; GFX12-LABEL: load_1d_f32_w: 233; GFX12: ; %bb.0: 234; GFX12-NEXT: s_mov_b32 s0, s2 235; GFX12-NEXT: s_mov_b32 s1, s3 236; GFX12-NEXT: s_mov_b32 s2, s4 237; GFX12-NEXT: s_mov_b32 s3, s5 238; GFX12-NEXT: s_mov_b32 s4, s6 239; GFX12-NEXT: s_mov_b32 s5, s7 240; GFX12-NEXT: s_mov_b32 s6, s8 241; GFX12-NEXT: s_mov_b32 s7, s9 242; GFX12-NEXT: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D 243; GFX12-NEXT: s_wait_loadcnt 0x0 244; GFX12-NEXT: ; return to shader part epilog 245 %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 246 ret float %v 247} 248 249define amdgpu_ps <2 x float> @load_1d_v2f32_xy(<8 x i32> inreg %rsrc, i32 %s) { 250; GFX68-LABEL: load_1d_v2f32_xy: 251; GFX68: ; %bb.0: 252; GFX68-NEXT: s_mov_b32 s0, s2 253; GFX68-NEXT: s_mov_b32 s1, s3 254; GFX68-NEXT: s_mov_b32 s2, s4 255; GFX68-NEXT: s_mov_b32 s3, s5 256; GFX68-NEXT: s_mov_b32 s4, s6 257; GFX68-NEXT: s_mov_b32 s5, s7 258; GFX68-NEXT: s_mov_b32 s6, s8 259; GFX68-NEXT: s_mov_b32 s7, s9 260; GFX68-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm 261; GFX68-NEXT: s_waitcnt vmcnt(0) 262; GFX68-NEXT: ; return to shader part epilog 263; 264; GFX10-LABEL: load_1d_v2f32_xy: 265; GFX10: ; %bb.0: 266; GFX10-NEXT: s_mov_b32 s0, s2 267; GFX10-NEXT: s_mov_b32 s1, s3 268; GFX10-NEXT: s_mov_b32 s2, s4 269; GFX10-NEXT: s_mov_b32 s3, s5 270; GFX10-NEXT: s_mov_b32 s4, s6 271; GFX10-NEXT: s_mov_b32 s5, s7 272; GFX10-NEXT: s_mov_b32 s6, s8 273; GFX10-NEXT: s_mov_b32 s7, s9 274; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm 275; GFX10-NEXT: s_waitcnt vmcnt(0) 276; GFX10-NEXT: ; return to shader part epilog 277; 278; NOPRT-LABEL: load_1d_v2f32_xy: 279; NOPRT: ; %bb.0: 280; NOPRT-NEXT: s_mov_b32 s0, s2 281; NOPRT-NEXT: s_mov_b32 s1, s3 282; NOPRT-NEXT: s_mov_b32 s2, s4 283; NOPRT-NEXT: s_mov_b32 s3, s5 284; NOPRT-NEXT: s_mov_b32 s4, s6 285; NOPRT-NEXT: s_mov_b32 s5, s7 286; NOPRT-NEXT: s_mov_b32 s6, s8 287; NOPRT-NEXT: s_mov_b32 s7, s9 288; NOPRT-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm 289; NOPRT-NEXT: s_waitcnt vmcnt(0) 290; NOPRT-NEXT: ; return to shader part epilog 291; 292; GFX12-LABEL: load_1d_v2f32_xy: 293; GFX12: ; %bb.0: 294; GFX12-NEXT: s_mov_b32 s0, s2 295; GFX12-NEXT: s_mov_b32 s1, s3 296; GFX12-NEXT: s_mov_b32 s2, s4 297; GFX12-NEXT: s_mov_b32 s3, s5 298; GFX12-NEXT: s_mov_b32 s4, s6 299; GFX12-NEXT: s_mov_b32 s5, s7 300; GFX12-NEXT: s_mov_b32 s6, s8 301; GFX12-NEXT: s_mov_b32 s7, s9 302; GFX12-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D 303; GFX12-NEXT: s_wait_loadcnt 0x0 304; GFX12-NEXT: ; return to shader part epilog 305 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 306 ret <2 x float> %v 307} 308 309define amdgpu_ps <2 x float> @load_1d_v2f32_xz(<8 x i32> inreg %rsrc, i32 %s) { 310; GFX68-LABEL: load_1d_v2f32_xz: 311; GFX68: ; %bb.0: 312; GFX68-NEXT: s_mov_b32 s0, s2 313; GFX68-NEXT: s_mov_b32 s1, s3 314; GFX68-NEXT: s_mov_b32 s2, s4 315; GFX68-NEXT: s_mov_b32 s3, s5 316; GFX68-NEXT: s_mov_b32 s4, s6 317; GFX68-NEXT: s_mov_b32 s5, s7 318; GFX68-NEXT: s_mov_b32 s6, s8 319; GFX68-NEXT: s_mov_b32 s7, s9 320; GFX68-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x5 unorm 321; GFX68-NEXT: s_waitcnt vmcnt(0) 322; GFX68-NEXT: ; return to shader part epilog 323; 324; GFX10-LABEL: load_1d_v2f32_xz: 325; GFX10: ; %bb.0: 326; GFX10-NEXT: s_mov_b32 s0, s2 327; GFX10-NEXT: s_mov_b32 s1, s3 328; GFX10-NEXT: s_mov_b32 s2, s4 329; GFX10-NEXT: s_mov_b32 s3, s5 330; GFX10-NEXT: s_mov_b32 s4, s6 331; GFX10-NEXT: s_mov_b32 s5, s7 332; GFX10-NEXT: s_mov_b32 s6, s8 333; GFX10-NEXT: s_mov_b32 s7, s9 334; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D unorm 335; GFX10-NEXT: s_waitcnt vmcnt(0) 336; GFX10-NEXT: ; return to shader part epilog 337; 338; NOPRT-LABEL: load_1d_v2f32_xz: 339; NOPRT: ; %bb.0: 340; NOPRT-NEXT: s_mov_b32 s0, s2 341; NOPRT-NEXT: s_mov_b32 s1, s3 342; NOPRT-NEXT: s_mov_b32 s2, s4 343; NOPRT-NEXT: s_mov_b32 s3, s5 344; NOPRT-NEXT: s_mov_b32 s4, s6 345; NOPRT-NEXT: s_mov_b32 s5, s7 346; NOPRT-NEXT: s_mov_b32 s6, s8 347; NOPRT-NEXT: s_mov_b32 s7, s9 348; NOPRT-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D unorm 349; NOPRT-NEXT: s_waitcnt vmcnt(0) 350; NOPRT-NEXT: ; return to shader part epilog 351; 352; GFX12-LABEL: load_1d_v2f32_xz: 353; GFX12: ; %bb.0: 354; GFX12-NEXT: s_mov_b32 s0, s2 355; GFX12-NEXT: s_mov_b32 s1, s3 356; GFX12-NEXT: s_mov_b32 s2, s4 357; GFX12-NEXT: s_mov_b32 s3, s5 358; GFX12-NEXT: s_mov_b32 s4, s6 359; GFX12-NEXT: s_mov_b32 s5, s7 360; GFX12-NEXT: s_mov_b32 s6, s8 361; GFX12-NEXT: s_mov_b32 s7, s9 362; GFX12-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D 363; GFX12-NEXT: s_wait_loadcnt 0x0 364; GFX12-NEXT: ; return to shader part epilog 365 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 5, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 366 ret <2 x float> %v 367} 368 369define amdgpu_ps <2 x float> @load_1d_v2f32_xw(<8 x i32> inreg %rsrc, i32 %s) { 370; GFX68-LABEL: load_1d_v2f32_xw: 371; GFX68: ; %bb.0: 372; GFX68-NEXT: s_mov_b32 s0, s2 373; GFX68-NEXT: s_mov_b32 s1, s3 374; GFX68-NEXT: s_mov_b32 s2, s4 375; GFX68-NEXT: s_mov_b32 s3, s5 376; GFX68-NEXT: s_mov_b32 s4, s6 377; GFX68-NEXT: s_mov_b32 s5, s7 378; GFX68-NEXT: s_mov_b32 s6, s8 379; GFX68-NEXT: s_mov_b32 s7, s9 380; GFX68-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm 381; GFX68-NEXT: s_waitcnt vmcnt(0) 382; GFX68-NEXT: ; return to shader part epilog 383; 384; GFX10-LABEL: load_1d_v2f32_xw: 385; GFX10: ; %bb.0: 386; GFX10-NEXT: s_mov_b32 s0, s2 387; GFX10-NEXT: s_mov_b32 s1, s3 388; GFX10-NEXT: s_mov_b32 s2, s4 389; GFX10-NEXT: s_mov_b32 s3, s5 390; GFX10-NEXT: s_mov_b32 s4, s6 391; GFX10-NEXT: s_mov_b32 s5, s7 392; GFX10-NEXT: s_mov_b32 s6, s8 393; GFX10-NEXT: s_mov_b32 s7, s9 394; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm 395; GFX10-NEXT: s_waitcnt vmcnt(0) 396; GFX10-NEXT: ; return to shader part epilog 397; 398; NOPRT-LABEL: load_1d_v2f32_xw: 399; NOPRT: ; %bb.0: 400; NOPRT-NEXT: s_mov_b32 s0, s2 401; NOPRT-NEXT: s_mov_b32 s1, s3 402; NOPRT-NEXT: s_mov_b32 s2, s4 403; NOPRT-NEXT: s_mov_b32 s3, s5 404; NOPRT-NEXT: s_mov_b32 s4, s6 405; NOPRT-NEXT: s_mov_b32 s5, s7 406; NOPRT-NEXT: s_mov_b32 s6, s8 407; NOPRT-NEXT: s_mov_b32 s7, s9 408; NOPRT-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm 409; NOPRT-NEXT: s_waitcnt vmcnt(0) 410; NOPRT-NEXT: ; return to shader part epilog 411; 412; GFX12-LABEL: load_1d_v2f32_xw: 413; GFX12: ; %bb.0: 414; GFX12-NEXT: s_mov_b32 s0, s2 415; GFX12-NEXT: s_mov_b32 s1, s3 416; GFX12-NEXT: s_mov_b32 s2, s4 417; GFX12-NEXT: s_mov_b32 s3, s5 418; GFX12-NEXT: s_mov_b32 s4, s6 419; GFX12-NEXT: s_mov_b32 s5, s7 420; GFX12-NEXT: s_mov_b32 s6, s8 421; GFX12-NEXT: s_mov_b32 s7, s9 422; GFX12-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D 423; GFX12-NEXT: s_wait_loadcnt 0x0 424; GFX12-NEXT: ; return to shader part epilog 425 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 426 ret <2 x float> %v 427} 428 429define amdgpu_ps <2 x float> @load_1d_v2f32_yz(<8 x i32> inreg %rsrc, i32 %s) { 430; GFX68-LABEL: load_1d_v2f32_yz: 431; GFX68: ; %bb.0: 432; GFX68-NEXT: s_mov_b32 s0, s2 433; GFX68-NEXT: s_mov_b32 s1, s3 434; GFX68-NEXT: s_mov_b32 s2, s4 435; GFX68-NEXT: s_mov_b32 s3, s5 436; GFX68-NEXT: s_mov_b32 s4, s6 437; GFX68-NEXT: s_mov_b32 s5, s7 438; GFX68-NEXT: s_mov_b32 s6, s8 439; GFX68-NEXT: s_mov_b32 s7, s9 440; GFX68-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x6 unorm 441; GFX68-NEXT: s_waitcnt vmcnt(0) 442; GFX68-NEXT: ; return to shader part epilog 443; 444; GFX10-LABEL: load_1d_v2f32_yz: 445; GFX10: ; %bb.0: 446; GFX10-NEXT: s_mov_b32 s0, s2 447; GFX10-NEXT: s_mov_b32 s1, s3 448; GFX10-NEXT: s_mov_b32 s2, s4 449; GFX10-NEXT: s_mov_b32 s3, s5 450; GFX10-NEXT: s_mov_b32 s4, s6 451; GFX10-NEXT: s_mov_b32 s5, s7 452; GFX10-NEXT: s_mov_b32 s6, s8 453; GFX10-NEXT: s_mov_b32 s7, s9 454; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm 455; GFX10-NEXT: s_waitcnt vmcnt(0) 456; GFX10-NEXT: ; return to shader part epilog 457; 458; NOPRT-LABEL: load_1d_v2f32_yz: 459; NOPRT: ; %bb.0: 460; NOPRT-NEXT: s_mov_b32 s0, s2 461; NOPRT-NEXT: s_mov_b32 s1, s3 462; NOPRT-NEXT: s_mov_b32 s2, s4 463; NOPRT-NEXT: s_mov_b32 s3, s5 464; NOPRT-NEXT: s_mov_b32 s4, s6 465; NOPRT-NEXT: s_mov_b32 s5, s7 466; NOPRT-NEXT: s_mov_b32 s6, s8 467; NOPRT-NEXT: s_mov_b32 s7, s9 468; NOPRT-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm 469; NOPRT-NEXT: s_waitcnt vmcnt(0) 470; NOPRT-NEXT: ; return to shader part epilog 471; 472; GFX12-LABEL: load_1d_v2f32_yz: 473; GFX12: ; %bb.0: 474; GFX12-NEXT: s_mov_b32 s0, s2 475; GFX12-NEXT: s_mov_b32 s1, s3 476; GFX12-NEXT: s_mov_b32 s2, s4 477; GFX12-NEXT: s_mov_b32 s3, s5 478; GFX12-NEXT: s_mov_b32 s4, s6 479; GFX12-NEXT: s_mov_b32 s5, s7 480; GFX12-NEXT: s_mov_b32 s6, s8 481; GFX12-NEXT: s_mov_b32 s7, s9 482; GFX12-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D 483; GFX12-NEXT: s_wait_loadcnt 0x0 484; GFX12-NEXT: ; return to shader part epilog 485 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 486 ret <2 x float> %v 487} 488 489define amdgpu_ps <3 x float> @load_1d_v3f32_xyz(<8 x i32> inreg %rsrc, i32 %s) { 490; GFX68-LABEL: load_1d_v3f32_xyz: 491; GFX68: ; %bb.0: 492; GFX68-NEXT: s_mov_b32 s0, s2 493; GFX68-NEXT: s_mov_b32 s1, s3 494; GFX68-NEXT: s_mov_b32 s2, s4 495; GFX68-NEXT: s_mov_b32 s3, s5 496; GFX68-NEXT: s_mov_b32 s4, s6 497; GFX68-NEXT: s_mov_b32 s5, s7 498; GFX68-NEXT: s_mov_b32 s6, s8 499; GFX68-NEXT: s_mov_b32 s7, s9 500; GFX68-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm 501; GFX68-NEXT: s_waitcnt vmcnt(0) 502; GFX68-NEXT: ; return to shader part epilog 503; 504; GFX10-LABEL: load_1d_v3f32_xyz: 505; GFX10: ; %bb.0: 506; GFX10-NEXT: s_mov_b32 s0, s2 507; GFX10-NEXT: s_mov_b32 s1, s3 508; GFX10-NEXT: s_mov_b32 s2, s4 509; GFX10-NEXT: s_mov_b32 s3, s5 510; GFX10-NEXT: s_mov_b32 s4, s6 511; GFX10-NEXT: s_mov_b32 s5, s7 512; GFX10-NEXT: s_mov_b32 s6, s8 513; GFX10-NEXT: s_mov_b32 s7, s9 514; GFX10-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm 515; GFX10-NEXT: s_waitcnt vmcnt(0) 516; GFX10-NEXT: ; return to shader part epilog 517; 518; NOPRT-LABEL: load_1d_v3f32_xyz: 519; NOPRT: ; %bb.0: 520; NOPRT-NEXT: s_mov_b32 s0, s2 521; NOPRT-NEXT: s_mov_b32 s1, s3 522; NOPRT-NEXT: s_mov_b32 s2, s4 523; NOPRT-NEXT: s_mov_b32 s3, s5 524; NOPRT-NEXT: s_mov_b32 s4, s6 525; NOPRT-NEXT: s_mov_b32 s5, s7 526; NOPRT-NEXT: s_mov_b32 s6, s8 527; NOPRT-NEXT: s_mov_b32 s7, s9 528; NOPRT-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm 529; NOPRT-NEXT: s_waitcnt vmcnt(0) 530; NOPRT-NEXT: ; return to shader part epilog 531; 532; GFX12-LABEL: load_1d_v3f32_xyz: 533; GFX12: ; %bb.0: 534; GFX12-NEXT: s_mov_b32 s0, s2 535; GFX12-NEXT: s_mov_b32 s1, s3 536; GFX12-NEXT: s_mov_b32 s2, s4 537; GFX12-NEXT: s_mov_b32 s3, s5 538; GFX12-NEXT: s_mov_b32 s4, s6 539; GFX12-NEXT: s_mov_b32 s5, s7 540; GFX12-NEXT: s_mov_b32 s6, s8 541; GFX12-NEXT: s_mov_b32 s7, s9 542; GFX12-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D 543; GFX12-NEXT: s_wait_loadcnt 0x0 544; GFX12-NEXT: ; return to shader part epilog 545 %v = call <3 x float> @llvm.amdgcn.image.load.1d.v3f32.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 546 ret <3 x float> %v 547} 548 549define amdgpu_ps <4 x float> @load_1d_v4f32_xyzw(<8 x i32> inreg %rsrc, i32 %s) { 550; GFX68-LABEL: load_1d_v4f32_xyzw: 551; GFX68: ; %bb.0: 552; GFX68-NEXT: s_mov_b32 s0, s2 553; GFX68-NEXT: s_mov_b32 s1, s3 554; GFX68-NEXT: s_mov_b32 s2, s4 555; GFX68-NEXT: s_mov_b32 s3, s5 556; GFX68-NEXT: s_mov_b32 s4, s6 557; GFX68-NEXT: s_mov_b32 s5, s7 558; GFX68-NEXT: s_mov_b32 s6, s8 559; GFX68-NEXT: s_mov_b32 s7, s9 560; GFX68-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm 561; GFX68-NEXT: s_waitcnt vmcnt(0) 562; GFX68-NEXT: ; return to shader part epilog 563; 564; GFX10-LABEL: load_1d_v4f32_xyzw: 565; GFX10: ; %bb.0: 566; GFX10-NEXT: s_mov_b32 s0, s2 567; GFX10-NEXT: s_mov_b32 s1, s3 568; GFX10-NEXT: s_mov_b32 s2, s4 569; GFX10-NEXT: s_mov_b32 s3, s5 570; GFX10-NEXT: s_mov_b32 s4, s6 571; GFX10-NEXT: s_mov_b32 s5, s7 572; GFX10-NEXT: s_mov_b32 s6, s8 573; GFX10-NEXT: s_mov_b32 s7, s9 574; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm 575; GFX10-NEXT: s_waitcnt vmcnt(0) 576; GFX10-NEXT: ; return to shader part epilog 577; 578; NOPRT-LABEL: load_1d_v4f32_xyzw: 579; NOPRT: ; %bb.0: 580; NOPRT-NEXT: s_mov_b32 s0, s2 581; NOPRT-NEXT: s_mov_b32 s1, s3 582; NOPRT-NEXT: s_mov_b32 s2, s4 583; NOPRT-NEXT: s_mov_b32 s3, s5 584; NOPRT-NEXT: s_mov_b32 s4, s6 585; NOPRT-NEXT: s_mov_b32 s5, s7 586; NOPRT-NEXT: s_mov_b32 s6, s8 587; NOPRT-NEXT: s_mov_b32 s7, s9 588; NOPRT-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm 589; NOPRT-NEXT: s_waitcnt vmcnt(0) 590; NOPRT-NEXT: ; return to shader part epilog 591; 592; GFX12-LABEL: load_1d_v4f32_xyzw: 593; GFX12: ; %bb.0: 594; GFX12-NEXT: s_mov_b32 s0, s2 595; GFX12-NEXT: s_mov_b32 s1, s3 596; GFX12-NEXT: s_mov_b32 s2, s4 597; GFX12-NEXT: s_mov_b32 s3, s5 598; GFX12-NEXT: s_mov_b32 s4, s6 599; GFX12-NEXT: s_mov_b32 s5, s7 600; GFX12-NEXT: s_mov_b32 s6, s8 601; GFX12-NEXT: s_mov_b32 s7, s9 602; GFX12-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D 603; GFX12-NEXT: s_wait_loadcnt 0x0 604; GFX12-NEXT: ; return to shader part epilog 605 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 606 ret <4 x float> %v 607} 608 609define amdgpu_ps float @load_1d_f32_tfe_dmask_x(<8 x i32> inreg %rsrc, i32 %s) { 610; GFX68-LABEL: load_1d_f32_tfe_dmask_x: 611; GFX68: ; %bb.0: 612; GFX68-NEXT: v_mov_b32_e32 v1, 0 613; GFX68-NEXT: s_mov_b32 s0, s2 614; GFX68-NEXT: s_mov_b32 s1, s3 615; GFX68-NEXT: s_mov_b32 s2, s4 616; GFX68-NEXT: s_mov_b32 s3, s5 617; GFX68-NEXT: s_mov_b32 s4, s6 618; GFX68-NEXT: s_mov_b32 s5, s7 619; GFX68-NEXT: s_mov_b32 s6, s8 620; GFX68-NEXT: s_mov_b32 s7, s9 621; GFX68-NEXT: v_mov_b32_e32 v2, v1 622; GFX68-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe 623; GFX68-NEXT: s_waitcnt vmcnt(0) 624; GFX68-NEXT: v_mov_b32_e32 v0, v2 625; GFX68-NEXT: ; return to shader part epilog 626; 627; GFX10-LABEL: load_1d_f32_tfe_dmask_x: 628; GFX10: ; %bb.0: 629; GFX10-NEXT: v_mov_b32_e32 v1, 0 630; GFX10-NEXT: s_mov_b32 s0, s2 631; GFX10-NEXT: s_mov_b32 s1, s3 632; GFX10-NEXT: s_mov_b32 s2, s4 633; GFX10-NEXT: s_mov_b32 s3, s5 634; GFX10-NEXT: s_mov_b32 s4, s6 635; GFX10-NEXT: s_mov_b32 s5, s7 636; GFX10-NEXT: s_mov_b32 s6, s8 637; GFX10-NEXT: s_mov_b32 s7, s9 638; GFX10-NEXT: v_mov_b32_e32 v2, v1 639; GFX10-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe 640; GFX10-NEXT: s_waitcnt vmcnt(0) 641; GFX10-NEXT: v_mov_b32_e32 v0, v2 642; GFX10-NEXT: ; return to shader part epilog 643; 644; NOPRT-LABEL: load_1d_f32_tfe_dmask_x: 645; NOPRT: ; %bb.0: 646; NOPRT-NEXT: s_mov_b32 s0, s2 647; NOPRT-NEXT: s_mov_b32 s1, s3 648; NOPRT-NEXT: s_mov_b32 s2, s4 649; NOPRT-NEXT: s_mov_b32 s3, s5 650; NOPRT-NEXT: s_mov_b32 s4, s6 651; NOPRT-NEXT: s_mov_b32 s5, s7 652; NOPRT-NEXT: s_mov_b32 s6, s8 653; NOPRT-NEXT: s_mov_b32 s7, s9 654; NOPRT-NEXT: v_mov_b32_e32 v1, 0 655; NOPRT-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe 656; NOPRT-NEXT: s_waitcnt vmcnt(0) 657; NOPRT-NEXT: v_mov_b32_e32 v0, v1 658; NOPRT-NEXT: ; return to shader part epilog 659; 660; GFX12-LABEL: load_1d_f32_tfe_dmask_x: 661; GFX12: ; %bb.0: 662; GFX12-NEXT: v_mov_b32_e32 v1, 0 663; GFX12-NEXT: s_mov_b32 s0, s2 664; GFX12-NEXT: s_mov_b32 s1, s3 665; GFX12-NEXT: s_mov_b32 s2, s4 666; GFX12-NEXT: s_mov_b32 s3, s5 667; GFX12-NEXT: s_mov_b32 s4, s6 668; GFX12-NEXT: s_mov_b32 s5, s7 669; GFX12-NEXT: s_mov_b32 s6, s8 670; GFX12-NEXT: s_mov_b32 s7, s9 671; GFX12-NEXT: v_mov_b32_e32 v2, v1 672; GFX12-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe 673; GFX12-NEXT: s_wait_loadcnt 0x0 674; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 675; GFX12-NEXT: v_mov_b32_e32 v0, v2 676; GFX12-NEXT: ; return to shader part epilog 677 %v = call { float, i32 } @llvm.amdgcn.image.load.1d.sl_f32i32s.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 678 %v.err = extractvalue { float, i32 } %v, 1 679 %vv = bitcast i32 %v.err to float 680 ret float %vv 681} 682 683define amdgpu_ps float @load_1d_v2f32_tfe_dmask_xy(<8 x i32> inreg %rsrc, i32 %s) { 684; GFX68-LABEL: load_1d_v2f32_tfe_dmask_xy: 685; GFX68: ; %bb.0: 686; GFX68-NEXT: v_mov_b32_e32 v1, 0 687; GFX68-NEXT: s_mov_b32 s0, s2 688; GFX68-NEXT: s_mov_b32 s1, s3 689; GFX68-NEXT: s_mov_b32 s2, s4 690; GFX68-NEXT: s_mov_b32 s3, s5 691; GFX68-NEXT: s_mov_b32 s4, s6 692; GFX68-NEXT: s_mov_b32 s5, s7 693; GFX68-NEXT: s_mov_b32 s6, s8 694; GFX68-NEXT: s_mov_b32 s7, s9 695; GFX68-NEXT: v_mov_b32_e32 v2, v1 696; GFX68-NEXT: v_mov_b32_e32 v3, v1 697; GFX68-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x3 unorm tfe 698; GFX68-NEXT: s_waitcnt vmcnt(0) 699; GFX68-NEXT: v_mov_b32_e32 v0, v3 700; GFX68-NEXT: ; return to shader part epilog 701; 702; GFX10-LABEL: load_1d_v2f32_tfe_dmask_xy: 703; GFX10: ; %bb.0: 704; GFX10-NEXT: v_mov_b32_e32 v1, 0 705; GFX10-NEXT: s_mov_b32 s0, s2 706; GFX10-NEXT: s_mov_b32 s1, s3 707; GFX10-NEXT: s_mov_b32 s2, s4 708; GFX10-NEXT: s_mov_b32 s3, s5 709; GFX10-NEXT: s_mov_b32 s4, s6 710; GFX10-NEXT: s_mov_b32 s5, s7 711; GFX10-NEXT: s_mov_b32 s6, s8 712; GFX10-NEXT: s_mov_b32 s7, s9 713; GFX10-NEXT: v_mov_b32_e32 v2, v1 714; GFX10-NEXT: v_mov_b32_e32 v3, v1 715; GFX10-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm tfe 716; GFX10-NEXT: s_waitcnt vmcnt(0) 717; GFX10-NEXT: v_mov_b32_e32 v0, v3 718; GFX10-NEXT: ; return to shader part epilog 719; 720; NOPRT-LABEL: load_1d_v2f32_tfe_dmask_xy: 721; NOPRT: ; %bb.0: 722; NOPRT-NEXT: s_mov_b32 s0, s2 723; NOPRT-NEXT: s_mov_b32 s1, s3 724; NOPRT-NEXT: s_mov_b32 s2, s4 725; NOPRT-NEXT: s_mov_b32 s3, s5 726; NOPRT-NEXT: s_mov_b32 s4, s6 727; NOPRT-NEXT: s_mov_b32 s5, s7 728; NOPRT-NEXT: s_mov_b32 s6, s8 729; NOPRT-NEXT: s_mov_b32 s7, s9 730; NOPRT-NEXT: v_mov_b32_e32 v2, 0 731; NOPRT-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm tfe 732; NOPRT-NEXT: s_waitcnt vmcnt(0) 733; NOPRT-NEXT: v_mov_b32_e32 v0, v2 734; NOPRT-NEXT: ; return to shader part epilog 735; 736; GFX12-LABEL: load_1d_v2f32_tfe_dmask_xy: 737; GFX12: ; %bb.0: 738; GFX12-NEXT: v_mov_b32_e32 v1, 0 739; GFX12-NEXT: s_mov_b32 s0, s2 740; GFX12-NEXT: s_mov_b32 s1, s3 741; GFX12-NEXT: s_mov_b32 s2, s4 742; GFX12-NEXT: s_mov_b32 s3, s5 743; GFX12-NEXT: s_mov_b32 s4, s6 744; GFX12-NEXT: s_mov_b32 s5, s7 745; GFX12-NEXT: s_mov_b32 s6, s8 746; GFX12-NEXT: s_mov_b32 s7, s9 747; GFX12-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v3, v1 748; GFX12-NEXT: image_load v[1:3], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe 749; GFX12-NEXT: s_wait_loadcnt 0x0 750; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 751; GFX12-NEXT: v_mov_b32_e32 v0, v3 752; GFX12-NEXT: ; return to shader part epilog 753 %v = call { <2 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f32i32s.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 754 %v.err = extractvalue { <2 x float>, i32 } %v, 1 755 %vv = bitcast i32 %v.err to float 756 ret float %vv 757} 758 759define amdgpu_ps float @load_1d_v3f32_tfe_dmask_xyz(<8 x i32> inreg %rsrc, i32 %s) { 760; GFX68-LABEL: load_1d_v3f32_tfe_dmask_xyz: 761; GFX68: ; %bb.0: 762; GFX68-NEXT: v_mov_b32_e32 v1, 0 763; GFX68-NEXT: s_mov_b32 s0, s2 764; GFX68-NEXT: s_mov_b32 s1, s3 765; GFX68-NEXT: s_mov_b32 s2, s4 766; GFX68-NEXT: s_mov_b32 s3, s5 767; GFX68-NEXT: s_mov_b32 s4, s6 768; GFX68-NEXT: s_mov_b32 s5, s7 769; GFX68-NEXT: s_mov_b32 s6, s8 770; GFX68-NEXT: s_mov_b32 s7, s9 771; GFX68-NEXT: v_mov_b32_e32 v2, v1 772; GFX68-NEXT: v_mov_b32_e32 v3, v1 773; GFX68-NEXT: v_mov_b32_e32 v4, v1 774; GFX68-NEXT: image_load v[1:4], v0, s[0:7] dmask:0x7 unorm tfe 775; GFX68-NEXT: s_waitcnt vmcnt(0) 776; GFX68-NEXT: v_mov_b32_e32 v0, v4 777; GFX68-NEXT: ; return to shader part epilog 778; 779; GFX10-LABEL: load_1d_v3f32_tfe_dmask_xyz: 780; GFX10: ; %bb.0: 781; GFX10-NEXT: v_mov_b32_e32 v1, 0 782; GFX10-NEXT: s_mov_b32 s0, s2 783; GFX10-NEXT: s_mov_b32 s1, s3 784; GFX10-NEXT: s_mov_b32 s2, s4 785; GFX10-NEXT: s_mov_b32 s3, s5 786; GFX10-NEXT: s_mov_b32 s4, s6 787; GFX10-NEXT: s_mov_b32 s5, s7 788; GFX10-NEXT: s_mov_b32 s6, s8 789; GFX10-NEXT: s_mov_b32 s7, s9 790; GFX10-NEXT: v_mov_b32_e32 v2, v1 791; GFX10-NEXT: v_mov_b32_e32 v3, v1 792; GFX10-NEXT: v_mov_b32_e32 v4, v1 793; GFX10-NEXT: image_load v[1:4], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe 794; GFX10-NEXT: s_waitcnt vmcnt(0) 795; GFX10-NEXT: v_mov_b32_e32 v0, v4 796; GFX10-NEXT: ; return to shader part epilog 797; 798; NOPRT-LABEL: load_1d_v3f32_tfe_dmask_xyz: 799; NOPRT: ; %bb.0: 800; NOPRT-NEXT: s_mov_b32 s0, s2 801; NOPRT-NEXT: s_mov_b32 s1, s3 802; NOPRT-NEXT: s_mov_b32 s2, s4 803; NOPRT-NEXT: s_mov_b32 s3, s5 804; NOPRT-NEXT: s_mov_b32 s4, s6 805; NOPRT-NEXT: s_mov_b32 s5, s7 806; NOPRT-NEXT: s_mov_b32 s6, s8 807; NOPRT-NEXT: s_mov_b32 s7, s9 808; NOPRT-NEXT: v_mov_b32_e32 v3, 0 809; NOPRT-NEXT: image_load v[0:3], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe 810; NOPRT-NEXT: s_waitcnt vmcnt(0) 811; NOPRT-NEXT: v_mov_b32_e32 v0, v3 812; NOPRT-NEXT: ; return to shader part epilog 813; 814; GFX12-LABEL: load_1d_v3f32_tfe_dmask_xyz: 815; GFX12: ; %bb.0: 816; GFX12-NEXT: v_mov_b32_e32 v1, 0 817; GFX12-NEXT: s_mov_b32 s0, s2 818; GFX12-NEXT: s_mov_b32 s1, s3 819; GFX12-NEXT: s_mov_b32 s2, s4 820; GFX12-NEXT: s_mov_b32 s3, s5 821; GFX12-NEXT: s_mov_b32 s4, s6 822; GFX12-NEXT: s_mov_b32 s5, s7 823; GFX12-NEXT: s_mov_b32 s6, s8 824; GFX12-NEXT: s_mov_b32 s7, s9 825; GFX12-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v3, v1 826; GFX12-NEXT: v_mov_b32_e32 v4, v1 827; GFX12-NEXT: image_load v[1:4], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D tfe 828; GFX12-NEXT: s_wait_loadcnt 0x0 829; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 830; GFX12-NEXT: v_mov_b32_e32 v0, v4 831; GFX12-NEXT: ; return to shader part epilog 832 %v = call { <3 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f32i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 833 %v.err = extractvalue { <3 x float>, i32 } %v, 1 834 %vv = bitcast i32 %v.err to float 835 ret float %vv 836} 837 838define amdgpu_ps float @load_1d_v4f32_tfe_dmask_xyzw(<8 x i32> inreg %rsrc, i32 %s) { 839; GFX68-LABEL: load_1d_v4f32_tfe_dmask_xyzw: 840; GFX68: ; %bb.0: 841; GFX68-NEXT: v_mov_b32_e32 v1, 0 842; GFX68-NEXT: s_mov_b32 s0, s2 843; GFX68-NEXT: s_mov_b32 s1, s3 844; GFX68-NEXT: s_mov_b32 s2, s4 845; GFX68-NEXT: s_mov_b32 s3, s5 846; GFX68-NEXT: s_mov_b32 s4, s6 847; GFX68-NEXT: s_mov_b32 s5, s7 848; GFX68-NEXT: s_mov_b32 s6, s8 849; GFX68-NEXT: s_mov_b32 s7, s9 850; GFX68-NEXT: v_mov_b32_e32 v2, v1 851; GFX68-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 unorm tfe 852; GFX68-NEXT: s_waitcnt vmcnt(0) 853; GFX68-NEXT: v_mov_b32_e32 v0, v2 854; GFX68-NEXT: ; return to shader part epilog 855; 856; GFX10-LABEL: load_1d_v4f32_tfe_dmask_xyzw: 857; GFX10: ; %bb.0: 858; GFX10-NEXT: v_mov_b32_e32 v1, 0 859; GFX10-NEXT: s_mov_b32 s0, s2 860; GFX10-NEXT: s_mov_b32 s1, s3 861; GFX10-NEXT: s_mov_b32 s2, s4 862; GFX10-NEXT: s_mov_b32 s3, s5 863; GFX10-NEXT: s_mov_b32 s4, s6 864; GFX10-NEXT: s_mov_b32 s5, s7 865; GFX10-NEXT: s_mov_b32 s6, s8 866; GFX10-NEXT: s_mov_b32 s7, s9 867; GFX10-NEXT: v_mov_b32_e32 v2, v1 868; GFX10-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D unorm tfe 869; GFX10-NEXT: s_waitcnt vmcnt(0) 870; GFX10-NEXT: v_mov_b32_e32 v0, v2 871; GFX10-NEXT: ; return to shader part epilog 872; 873; NOPRT-LABEL: load_1d_v4f32_tfe_dmask_xyzw: 874; NOPRT: ; %bb.0: 875; NOPRT-NEXT: s_mov_b32 s0, s2 876; NOPRT-NEXT: s_mov_b32 s1, s3 877; NOPRT-NEXT: s_mov_b32 s2, s4 878; NOPRT-NEXT: s_mov_b32 s3, s5 879; NOPRT-NEXT: s_mov_b32 s4, s6 880; NOPRT-NEXT: s_mov_b32 s5, s7 881; NOPRT-NEXT: s_mov_b32 s6, s8 882; NOPRT-NEXT: s_mov_b32 s7, s9 883; NOPRT-NEXT: v_mov_b32_e32 v1, 0 884; NOPRT-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D unorm tfe 885; NOPRT-NEXT: s_waitcnt vmcnt(0) 886; NOPRT-NEXT: v_mov_b32_e32 v0, v1 887; NOPRT-NEXT: ; return to shader part epilog 888; 889; GFX12-LABEL: load_1d_v4f32_tfe_dmask_xyzw: 890; GFX12: ; %bb.0: 891; GFX12-NEXT: v_mov_b32_e32 v1, 0 892; GFX12-NEXT: s_mov_b32 s0, s2 893; GFX12-NEXT: s_mov_b32 s1, s3 894; GFX12-NEXT: s_mov_b32 s2, s4 895; GFX12-NEXT: s_mov_b32 s3, s5 896; GFX12-NEXT: s_mov_b32 s4, s6 897; GFX12-NEXT: s_mov_b32 s5, s7 898; GFX12-NEXT: s_mov_b32 s6, s8 899; GFX12-NEXT: s_mov_b32 s7, s9 900; GFX12-NEXT: v_mov_b32_e32 v2, v1 901; GFX12-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D tfe 902; GFX12-NEXT: s_wait_loadcnt 0x0 903; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 904; GFX12-NEXT: v_mov_b32_e32 v0, v2 905; GFX12-NEXT: ; return to shader part epilog 906 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 16, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 907 %v.err = extractvalue { <4 x float>, i32 } %v, 1 908 %vv = bitcast i32 %v.err to float 909 ret float %vv 910} 911 912define amdgpu_ps float @load_1d_f32_tfe_dmask_0(<8 x i32> inreg %rsrc, i32 %s) { 913; GFX68-LABEL: load_1d_f32_tfe_dmask_0: 914; GFX68: ; %bb.0: 915; GFX68-NEXT: v_mov_b32_e32 v1, 0 916; GFX68-NEXT: s_mov_b32 s0, s2 917; GFX68-NEXT: s_mov_b32 s1, s3 918; GFX68-NEXT: s_mov_b32 s2, s4 919; GFX68-NEXT: s_mov_b32 s3, s5 920; GFX68-NEXT: s_mov_b32 s4, s6 921; GFX68-NEXT: s_mov_b32 s5, s7 922; GFX68-NEXT: s_mov_b32 s6, s8 923; GFX68-NEXT: s_mov_b32 s7, s9 924; GFX68-NEXT: v_mov_b32_e32 v2, v1 925; GFX68-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe 926; GFX68-NEXT: s_waitcnt vmcnt(0) 927; GFX68-NEXT: v_mov_b32_e32 v0, v2 928; GFX68-NEXT: ; return to shader part epilog 929; 930; GFX10-LABEL: load_1d_f32_tfe_dmask_0: 931; GFX10: ; %bb.0: 932; GFX10-NEXT: v_mov_b32_e32 v1, 0 933; GFX10-NEXT: s_mov_b32 s0, s2 934; GFX10-NEXT: s_mov_b32 s1, s3 935; GFX10-NEXT: s_mov_b32 s2, s4 936; GFX10-NEXT: s_mov_b32 s3, s5 937; GFX10-NEXT: s_mov_b32 s4, s6 938; GFX10-NEXT: s_mov_b32 s5, s7 939; GFX10-NEXT: s_mov_b32 s6, s8 940; GFX10-NEXT: s_mov_b32 s7, s9 941; GFX10-NEXT: v_mov_b32_e32 v2, v1 942; GFX10-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe 943; GFX10-NEXT: s_waitcnt vmcnt(0) 944; GFX10-NEXT: v_mov_b32_e32 v0, v2 945; GFX10-NEXT: ; return to shader part epilog 946; 947; NOPRT-LABEL: load_1d_f32_tfe_dmask_0: 948; NOPRT: ; %bb.0: 949; NOPRT-NEXT: s_mov_b32 s0, s2 950; NOPRT-NEXT: s_mov_b32 s1, s3 951; NOPRT-NEXT: s_mov_b32 s2, s4 952; NOPRT-NEXT: s_mov_b32 s3, s5 953; NOPRT-NEXT: s_mov_b32 s4, s6 954; NOPRT-NEXT: s_mov_b32 s5, s7 955; NOPRT-NEXT: s_mov_b32 s6, s8 956; NOPRT-NEXT: s_mov_b32 s7, s9 957; NOPRT-NEXT: v_mov_b32_e32 v1, 0 958; NOPRT-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe 959; NOPRT-NEXT: s_waitcnt vmcnt(0) 960; NOPRT-NEXT: v_mov_b32_e32 v0, v1 961; NOPRT-NEXT: ; return to shader part epilog 962; 963; GFX12-LABEL: load_1d_f32_tfe_dmask_0: 964; GFX12: ; %bb.0: 965; GFX12-NEXT: v_mov_b32_e32 v1, 0 966; GFX12-NEXT: s_mov_b32 s0, s2 967; GFX12-NEXT: s_mov_b32 s1, s3 968; GFX12-NEXT: s_mov_b32 s2, s4 969; GFX12-NEXT: s_mov_b32 s3, s5 970; GFX12-NEXT: s_mov_b32 s4, s6 971; GFX12-NEXT: s_mov_b32 s5, s7 972; GFX12-NEXT: s_mov_b32 s6, s8 973; GFX12-NEXT: s_mov_b32 s7, s9 974; GFX12-NEXT: v_mov_b32_e32 v2, v1 975; GFX12-NEXT: image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe 976; GFX12-NEXT: s_wait_loadcnt 0x0 977; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 978; GFX12-NEXT: v_mov_b32_e32 v0, v2 979; GFX12-NEXT: ; return to shader part epilog 980 %v = call { float, i32 } @llvm.amdgcn.image.load.1d.sl_f32i32s.i32(i32 0, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) 981 %v.err = extractvalue { float, i32 } %v, 1 982 %vv = bitcast i32 %v.err to float 983 ret float %vv 984} 985 986declare float @llvm.amdgcn.image.load.1d.f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 987declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 988declare <3 x float> @llvm.amdgcn.image.load.1d.v3f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 989declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 990 991declare { float, i32 } @llvm.amdgcn.image.load.1d.sl_f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 992declare { <2 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 993declare { <3 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 994declare { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 995 996attributes #0 = { nounwind readonly } 997