1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc < %s -mtriple=amdgcn -mcpu=tahiti | FileCheck %s --check-prefixes=GFX6,GCN 3; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga | FileCheck %s --check-prefixes=GFX8,GCN 4; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 | FileCheck %s --check-prefixes=GFX10,GCN 5; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-vopd=0 | FileCheck %s --check-prefixes=GFX11,GCN 6; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 | FileCheck %s --check-prefixes=GFX940,GCN 7 8define amdgpu_kernel void @build_vector2 (ptr addrspace(1) %out) { 9; GFX6-LABEL: build_vector2: 10; GFX6: ; %bb.0: ; %entry 11; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 12; GFX6-NEXT: s_mov_b32 s3, 0xf000 13; GFX6-NEXT: s_mov_b32 s2, -1 14; GFX6-NEXT: v_mov_b32_e32 v0, 5 15; GFX6-NEXT: v_mov_b32_e32 v1, 6 16; GFX6-NEXT: s_waitcnt lgkmcnt(0) 17; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 18; GFX6-NEXT: s_endpgm 19; 20; GFX8-LABEL: build_vector2: 21; GFX8: ; %bb.0: ; %entry 22; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 23; GFX8-NEXT: v_mov_b32_e32 v0, 5 24; GFX8-NEXT: v_mov_b32_e32 v1, 6 25; GFX8-NEXT: s_waitcnt lgkmcnt(0) 26; GFX8-NEXT: v_mov_b32_e32 v3, s1 27; GFX8-NEXT: v_mov_b32_e32 v2, s0 28; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 29; GFX8-NEXT: s_endpgm 30; 31; GFX10-LABEL: build_vector2: 32; GFX10: ; %bb.0: ; %entry 33; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 34; GFX10-NEXT: v_mov_b32_e32 v2, 0 35; GFX10-NEXT: v_mov_b32_e32 v0, 5 36; GFX10-NEXT: v_mov_b32_e32 v1, 6 37; GFX10-NEXT: s_waitcnt lgkmcnt(0) 38; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 39; GFX10-NEXT: s_endpgm 40; 41; GFX11-LABEL: build_vector2: 42; GFX11: ; %bb.0: ; %entry 43; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 44; GFX11-NEXT: v_mov_b32_e32 v2, 0 45; GFX11-NEXT: v_mov_b32_e32 v0, 5 46; GFX11-NEXT: v_mov_b32_e32 v1, 6 47; GFX11-NEXT: s_waitcnt lgkmcnt(0) 48; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 49; GFX11-NEXT: s_endpgm 50; 51; GFX940-LABEL: build_vector2: 52; GFX940: ; %bb.0: ; %entry 53; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 54; GFX940-NEXT: v_mov_b32_e32 v2, 0 55; GFX940-NEXT: v_mov_b32_e32 v0, 5 56; GFX940-NEXT: v_mov_b32_e32 v1, 6 57; GFX940-NEXT: s_waitcnt lgkmcnt(0) 58; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 59; GFX940-NEXT: s_endpgm 60entry: 61 store <2 x i32> <i32 5, i32 6>, ptr addrspace(1) %out 62 ret void 63} 64 65define amdgpu_kernel void @build_vector4 (ptr addrspace(1) %out) { 66; GFX6-LABEL: build_vector4: 67; GFX6: ; %bb.0: ; %entry 68; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 69; GFX6-NEXT: s_mov_b32 s3, 0xf000 70; GFX6-NEXT: s_mov_b32 s2, -1 71; GFX6-NEXT: v_mov_b32_e32 v0, 5 72; GFX6-NEXT: v_mov_b32_e32 v1, 6 73; GFX6-NEXT: v_mov_b32_e32 v2, 7 74; GFX6-NEXT: v_mov_b32_e32 v3, 8 75; GFX6-NEXT: s_waitcnt lgkmcnt(0) 76; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 77; GFX6-NEXT: s_endpgm 78; 79; GFX8-LABEL: build_vector4: 80; GFX8: ; %bb.0: ; %entry 81; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 82; GFX8-NEXT: v_mov_b32_e32 v0, 5 83; GFX8-NEXT: v_mov_b32_e32 v1, 6 84; GFX8-NEXT: v_mov_b32_e32 v2, 7 85; GFX8-NEXT: v_mov_b32_e32 v3, 8 86; GFX8-NEXT: s_waitcnt lgkmcnt(0) 87; GFX8-NEXT: v_mov_b32_e32 v5, s1 88; GFX8-NEXT: v_mov_b32_e32 v4, s0 89; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 90; GFX8-NEXT: s_endpgm 91; 92; GFX10-LABEL: build_vector4: 93; GFX10: ; %bb.0: ; %entry 94; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 95; GFX10-NEXT: v_mov_b32_e32 v4, 0 96; GFX10-NEXT: v_mov_b32_e32 v0, 5 97; GFX10-NEXT: v_mov_b32_e32 v1, 6 98; GFX10-NEXT: v_mov_b32_e32 v2, 7 99; GFX10-NEXT: v_mov_b32_e32 v3, 8 100; GFX10-NEXT: s_waitcnt lgkmcnt(0) 101; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] 102; GFX10-NEXT: s_endpgm 103; 104; GFX11-LABEL: build_vector4: 105; GFX11: ; %bb.0: ; %entry 106; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 107; GFX11-NEXT: v_mov_b32_e32 v4, 0 108; GFX11-NEXT: v_mov_b32_e32 v0, 5 109; GFX11-NEXT: v_mov_b32_e32 v1, 6 110; GFX11-NEXT: v_mov_b32_e32 v2, 7 111; GFX11-NEXT: v_mov_b32_e32 v3, 8 112; GFX11-NEXT: s_waitcnt lgkmcnt(0) 113; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1] 114; GFX11-NEXT: s_endpgm 115; 116; GFX940-LABEL: build_vector4: 117; GFX940: ; %bb.0: ; %entry 118; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 119; GFX940-NEXT: v_mov_b32_e32 v4, 0 120; GFX940-NEXT: v_mov_b32_e32 v0, 5 121; GFX940-NEXT: v_mov_b32_e32 v1, 6 122; GFX940-NEXT: v_mov_b32_e32 v2, 7 123; GFX940-NEXT: v_mov_b32_e32 v3, 8 124; GFX940-NEXT: s_waitcnt lgkmcnt(0) 125; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 126; GFX940-NEXT: s_endpgm 127entry: 128 store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, ptr addrspace(1) %out 129 ret void 130} 131 132define amdgpu_kernel void @build_vector_v2i16 (ptr addrspace(1) %out) { 133; GFX6-LABEL: build_vector_v2i16: 134; GFX6: ; %bb.0: ; %entry 135; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 136; GFX6-NEXT: s_mov_b32 s3, 0xf000 137; GFX6-NEXT: s_mov_b32 s2, -1 138; GFX6-NEXT: v_mov_b32_e32 v0, 0x60005 139; GFX6-NEXT: s_waitcnt lgkmcnt(0) 140; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 141; GFX6-NEXT: s_endpgm 142; 143; GFX8-LABEL: build_vector_v2i16: 144; GFX8: ; %bb.0: ; %entry 145; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 146; GFX8-NEXT: v_mov_b32_e32 v2, 0x60005 147; GFX8-NEXT: s_waitcnt lgkmcnt(0) 148; GFX8-NEXT: v_mov_b32_e32 v0, s0 149; GFX8-NEXT: v_mov_b32_e32 v1, s1 150; GFX8-NEXT: flat_store_dword v[0:1], v2 151; GFX8-NEXT: s_endpgm 152; 153; GFX10-LABEL: build_vector_v2i16: 154; GFX10: ; %bb.0: ; %entry 155; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 156; GFX10-NEXT: v_mov_b32_e32 v0, 0 157; GFX10-NEXT: v_mov_b32_e32 v1, 0x60005 158; GFX10-NEXT: s_waitcnt lgkmcnt(0) 159; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 160; GFX10-NEXT: s_endpgm 161; 162; GFX11-LABEL: build_vector_v2i16: 163; GFX11: ; %bb.0: ; %entry 164; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 165; GFX11-NEXT: v_mov_b32_e32 v0, 0 166; GFX11-NEXT: v_mov_b32_e32 v1, 0x60005 167; GFX11-NEXT: s_waitcnt lgkmcnt(0) 168; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 169; GFX11-NEXT: s_endpgm 170; 171; GFX940-LABEL: build_vector_v2i16: 172; GFX940: ; %bb.0: ; %entry 173; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 174; GFX940-NEXT: v_mov_b32_e32 v0, 0 175; GFX940-NEXT: v_mov_b32_e32 v1, 0x60005 176; GFX940-NEXT: s_waitcnt lgkmcnt(0) 177; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 178; GFX940-NEXT: s_endpgm 179entry: 180 store <2 x i16> <i16 5, i16 6>, ptr addrspace(1) %out 181 ret void 182} 183 184define amdgpu_kernel void @build_vector_v2i16_trunc (ptr addrspace(1) %out, i32 %a) { 185; GFX6-LABEL: build_vector_v2i16_trunc: 186; GFX6: ; %bb.0: 187; GFX6-NEXT: s_load_dword s6, s[4:5], 0xb 188; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 189; GFX6-NEXT: s_mov_b32 s3, 0xf000 190; GFX6-NEXT: s_mov_b32 s2, -1 191; GFX6-NEXT: s_waitcnt lgkmcnt(0) 192; GFX6-NEXT: v_alignbit_b32 v0, 5, s6, 16 193; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 194; GFX6-NEXT: s_endpgm 195; 196; GFX8-LABEL: build_vector_v2i16_trunc: 197; GFX8: ; %bb.0: 198; GFX8-NEXT: s_load_dword s2, s[4:5], 0x2c 199; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 200; GFX8-NEXT: s_waitcnt lgkmcnt(0) 201; GFX8-NEXT: s_lshr_b32 s2, s2, 16 202; GFX8-NEXT: s_or_b32 s2, s2, 0x50000 203; GFX8-NEXT: v_mov_b32_e32 v0, s0 204; GFX8-NEXT: v_mov_b32_e32 v1, s1 205; GFX8-NEXT: v_mov_b32_e32 v2, s2 206; GFX8-NEXT: flat_store_dword v[0:1], v2 207; GFX8-NEXT: s_endpgm 208; 209; GFX10-LABEL: build_vector_v2i16_trunc: 210; GFX10: ; %bb.0: 211; GFX10-NEXT: s_clause 0x1 212; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 213; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 214; GFX10-NEXT: v_mov_b32_e32 v0, 0 215; GFX10-NEXT: s_waitcnt lgkmcnt(0) 216; GFX10-NEXT: s_lshr_b32 s2, s2, 16 217; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, 5 218; GFX10-NEXT: v_mov_b32_e32 v1, s2 219; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 220; GFX10-NEXT: s_endpgm 221; 222; GFX11-LABEL: build_vector_v2i16_trunc: 223; GFX11: ; %bb.0: 224; GFX11-NEXT: s_clause 0x1 225; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 226; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 227; GFX11-NEXT: v_mov_b32_e32 v0, 0 228; GFX11-NEXT: s_waitcnt lgkmcnt(0) 229; GFX11-NEXT: s_pack_hl_b32_b16 s2, s2, 5 230; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 231; GFX11-NEXT: v_mov_b32_e32 v1, s2 232; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 233; GFX11-NEXT: s_endpgm 234; 235; GFX940-LABEL: build_vector_v2i16_trunc: 236; GFX940: ; %bb.0: 237; GFX940-NEXT: s_load_dword s2, s[4:5], 0x2c 238; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 239; GFX940-NEXT: v_mov_b32_e32 v0, 0 240; GFX940-NEXT: s_waitcnt lgkmcnt(0) 241; GFX940-NEXT: s_lshr_b32 s2, s2, 16 242; GFX940-NEXT: s_pack_ll_b32_b16 s2, s2, 5 243; GFX940-NEXT: v_mov_b32_e32 v1, s2 244; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 245; GFX940-NEXT: s_endpgm 246 %srl = lshr i32 %a, 16 247 %trunc = trunc i32 %srl to i16 248 %ins.0 = insertelement <2 x i16> undef, i16 %trunc, i32 0 249 %ins.1 = insertelement <2 x i16> %ins.0, i16 5, i32 1 250 store <2 x i16> %ins.1, ptr addrspace(1) %out 251 ret void 252} 253 254define amdgpu_kernel void @build_v2i32_from_v4i16_shuffle(ptr addrspace(1) %out, <4 x i16> %in) { 255; GFX6-LABEL: build_v2i32_from_v4i16_shuffle: 256; GFX6: ; %bb.0: ; %entry 257; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 258; GFX6-NEXT: s_mov_b32 s7, 0xf000 259; GFX6-NEXT: s_mov_b32 s6, -1 260; GFX6-NEXT: s_waitcnt lgkmcnt(0) 261; GFX6-NEXT: s_mov_b32 s4, s0 262; GFX6-NEXT: s_mov_b32 s5, s1 263; GFX6-NEXT: s_lshl_b32 s0, s3, 16 264; GFX6-NEXT: s_lshl_b32 s1, s2, 16 265; GFX6-NEXT: v_mov_b32_e32 v0, s1 266; GFX6-NEXT: v_mov_b32_e32 v1, s0 267; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 268; GFX6-NEXT: s_endpgm 269; 270; GFX8-LABEL: build_v2i32_from_v4i16_shuffle: 271; GFX8: ; %bb.0: ; %entry 272; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 273; GFX8-NEXT: s_waitcnt lgkmcnt(0) 274; GFX8-NEXT: s_lshl_b32 s3, s3, 16 275; GFX8-NEXT: s_lshl_b32 s2, s2, 16 276; GFX8-NEXT: v_mov_b32_e32 v3, s1 277; GFX8-NEXT: v_mov_b32_e32 v0, s2 278; GFX8-NEXT: v_mov_b32_e32 v1, s3 279; GFX8-NEXT: v_mov_b32_e32 v2, s0 280; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 281; GFX8-NEXT: s_endpgm 282; 283; GFX10-LABEL: build_v2i32_from_v4i16_shuffle: 284; GFX10: ; %bb.0: ; %entry 285; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 286; GFX10-NEXT: v_mov_b32_e32 v2, 0 287; GFX10-NEXT: s_waitcnt lgkmcnt(0) 288; GFX10-NEXT: s_lshl_b32 s2, s2, 16 289; GFX10-NEXT: s_lshl_b32 s3, s3, 16 290; GFX10-NEXT: v_mov_b32_e32 v0, s2 291; GFX10-NEXT: v_mov_b32_e32 v1, s3 292; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 293; GFX10-NEXT: s_endpgm 294; 295; GFX11-LABEL: build_v2i32_from_v4i16_shuffle: 296; GFX11: ; %bb.0: ; %entry 297; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 298; GFX11-NEXT: v_mov_b32_e32 v2, 0 299; GFX11-NEXT: s_waitcnt lgkmcnt(0) 300; GFX11-NEXT: s_lshl_b32 s2, s2, 16 301; GFX11-NEXT: s_lshl_b32 s3, s3, 16 302; GFX11-NEXT: v_mov_b32_e32 v0, s2 303; GFX11-NEXT: v_mov_b32_e32 v1, s3 304; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 305; GFX11-NEXT: s_endpgm 306; 307; GFX940-LABEL: build_v2i32_from_v4i16_shuffle: 308; GFX940: ; %bb.0: ; %entry 309; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 310; GFX940-NEXT: v_mov_b32_e32 v2, 0 311; GFX940-NEXT: s_waitcnt lgkmcnt(0) 312; GFX940-NEXT: s_lshl_b32 s3, s3, 16 313; GFX940-NEXT: s_lshl_b32 s2, s2, 16 314; GFX940-NEXT: v_mov_b32_e32 v0, s2 315; GFX940-NEXT: v_mov_b32_e32 v1, s3 316; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 317; GFX940-NEXT: s_endpgm 318entry: 319 %shuf = shufflevector <4 x i16> %in, <4 x i16> zeroinitializer, <2 x i32> <i32 0, i32 2> 320 %zextended = zext <2 x i16> %shuf to <2 x i32> 321 %shifted = shl <2 x i32> %zextended, <i32 16, i32 16> 322 store <2 x i32> %shifted, ptr addrspace(1) %out 323 ret void 324} 325;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 326; GCN: {{.*}} 327